Home | History | Annotate | Line # | Download | only in sljit_src
sljitNativePPC_common.c revision 1.1.1.3.4.4
      1 /*	$NetBSD: sljitNativePPC_common.c,v 1.1.1.3.4.4 2017/12/03 11:38:04 jdolecek Exp $	*/
      2 
      3 /*
      4  *    Stack-less Just-In-Time compiler
      5  *
      6  *    Copyright 2009-2012 Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without modification, are
      9  * permitted provided that the following conditions are met:
     10  *
     11  *   1. Redistributions of source code must retain the above copyright notice, this list of
     12  *      conditions and the following disclaimer.
     13  *
     14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     15  *      of conditions and the following disclaimer in the documentation and/or other materials
     16  *      provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
     30 {
     31 	return "PowerPC" SLJIT_CPUINFO;
     32 }
     33 
     34 /* Length of an instruction word.
     35    Both for ppc-32 and ppc-64. */
     36 typedef sljit_u32 sljit_ins;
     37 
     38 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
     39 	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
     40 #define SLJIT_PPC_STACK_FRAME_V2 1
     41 #endif
     42 
     43 #ifdef _AIX
     44 #include <sys/cache.h>
     45 #endif
     46 
     47 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
     48 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
     49 #endif
     50 
     51 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
     52 
     53 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
     54 {
     55 #ifdef _AIX
     56 	_sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
     57 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
     58 #	if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
     59 	/* Cache flush for POWER architecture. */
     60 	while (from < to) {
     61 		__asm__ volatile (
     62 			"clf 0, %0\n"
     63 			"dcs\n"
     64 			: : "r"(from)
     65 		);
     66 		from++;
     67 	}
     68 	__asm__ volatile ( "ics" );
     69 #	elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
     70 #	error "Cache flush is not implemented for PowerPC/POWER common mode."
     71 #	else
     72 	/* Cache flush for PowerPC architecture. */
     73 	while (from < to) {
     74 		__asm__ volatile (
     75 			"dcbf 0, %0\n"
     76 			"sync\n"
     77 			"icbi 0, %0\n"
     78 			: : "r"(from)
     79 		);
     80 		from++;
     81 	}
     82 	__asm__ volatile ( "isync" );
     83 #	endif
     84 #	ifdef __xlc__
     85 #	warning "This file may fail to compile if -qfuncsect is used"
     86 #	endif
     87 #elif defined(__xlc__)
     88 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
     89 #else
     90 #error "This platform requires a cache flush implementation."
     91 #endif /* _AIX */
     92 }
     93 
     94 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
     95 
     96 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     97 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
     98 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
     99 #define TMP_ZERO	(SLJIT_NUMBER_OF_REGISTERS + 5)
    100 
    101 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
    102 #define TMP_CALL_REG	(SLJIT_NUMBER_OF_REGISTERS + 6)
    103 #else
    104 #define TMP_CALL_REG	TMP_REG2
    105 #endif
    106 
    107 #define TMP_FREG1	(0)
    108 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
    109 
    110 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
    111 	0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
    112 };
    113 
    114 /* --------------------------------------------------------------------- */
    115 /*  Instrucion forms                                                     */
    116 /* --------------------------------------------------------------------- */
    117 #define D(d)		(reg_map[d] << 21)
    118 #define S(s)		(reg_map[s] << 21)
    119 #define A(a)		(reg_map[a] << 16)
    120 #define B(b)		(reg_map[b] << 11)
    121 #define C(c)		(reg_map[c] << 6)
    122 #define FD(fd)		((fd) << 21)
    123 #define FS(fs)		((fs) << 21)
    124 #define FA(fa)		((fa) << 16)
    125 #define FB(fb)		((fb) << 11)
    126 #define FC(fc)		((fc) << 6)
    127 #define IMM(imm)	((imm) & 0xffff)
    128 #define CRD(d)		((d) << 21)
    129 
    130 /* Instruction bit sections.
    131    OE and Rc flag (see ALT_SET_FLAGS). */
    132 #define OERC(flags)	(((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
    133 /* Rc flag (see ALT_SET_FLAGS). */
    134 #define RC(flags)	((flags & ALT_SET_FLAGS) >> 10)
    135 #define HI(opcode)	((opcode) << 26)
    136 #define LO(opcode)	((opcode) << 1)
    137 
    138 #define ADD		(HI(31) | LO(266))
    139 #define ADDC		(HI(31) | LO(10))
    140 #define ADDE		(HI(31) | LO(138))
    141 #define ADDI		(HI(14))
    142 #define ADDIC		(HI(13))
    143 #define ADDIS		(HI(15))
    144 #define ADDME		(HI(31) | LO(234))
    145 #define AND		(HI(31) | LO(28))
    146 #define ANDI		(HI(28))
    147 #define ANDIS		(HI(29))
    148 #define Bx		(HI(18))
    149 #define BCx		(HI(16))
    150 #define BCCTR		(HI(19) | LO(528) | (3 << 11))
    151 #define BLR		(HI(19) | LO(16) | (0x14 << 21))
    152 #define CNTLZD		(HI(31) | LO(58))
    153 #define CNTLZW		(HI(31) | LO(26))
    154 #define CMP		(HI(31) | LO(0))
    155 #define CMPI		(HI(11))
    156 #define CMPL		(HI(31) | LO(32))
    157 #define CMPLI		(HI(10))
    158 #define CROR		(HI(19) | LO(449))
    159 #define DIVD		(HI(31) | LO(489))
    160 #define DIVDU		(HI(31) | LO(457))
    161 #define DIVW		(HI(31) | LO(491))
    162 #define DIVWU		(HI(31) | LO(459))
    163 #define EXTSB		(HI(31) | LO(954))
    164 #define EXTSH		(HI(31) | LO(922))
    165 #define EXTSW		(HI(31) | LO(986))
    166 #define FABS		(HI(63) | LO(264))
    167 #define FADD		(HI(63) | LO(21))
    168 #define FADDS		(HI(59) | LO(21))
    169 #define FCFID		(HI(63) | LO(846))
    170 #define FCMPU		(HI(63) | LO(0))
    171 #define FCTIDZ		(HI(63) | LO(815))
    172 #define FCTIWZ		(HI(63) | LO(15))
    173 #define FDIV		(HI(63) | LO(18))
    174 #define FDIVS		(HI(59) | LO(18))
    175 #define FMR		(HI(63) | LO(72))
    176 #define FMUL		(HI(63) | LO(25))
    177 #define FMULS		(HI(59) | LO(25))
    178 #define FNEG		(HI(63) | LO(40))
    179 #define FRSP		(HI(63) | LO(12))
    180 #define FSUB		(HI(63) | LO(20))
    181 #define FSUBS		(HI(59) | LO(20))
    182 #define LD		(HI(58) | 0)
    183 #define LWZ		(HI(32))
    184 #define MFCR		(HI(31) | LO(19))
    185 #define MFLR		(HI(31) | LO(339) | 0x80000)
    186 #define MFXER		(HI(31) | LO(339) | 0x10000)
    187 #define MTCTR		(HI(31) | LO(467) | 0x90000)
    188 #define MTLR		(HI(31) | LO(467) | 0x80000)
    189 #define MTXER		(HI(31) | LO(467) | 0x10000)
    190 #define MULHD		(HI(31) | LO(73))
    191 #define MULHDU		(HI(31) | LO(9))
    192 #define MULHW		(HI(31) | LO(75))
    193 #define MULHWU		(HI(31) | LO(11))
    194 #define MULLD		(HI(31) | LO(233))
    195 #define MULLI		(HI(7))
    196 #define MULLW		(HI(31) | LO(235))
    197 #define NEG		(HI(31) | LO(104))
    198 #define NOP		(HI(24))
    199 #define NOR		(HI(31) | LO(124))
    200 #define OR		(HI(31) | LO(444))
    201 #define ORI		(HI(24))
    202 #define ORIS		(HI(25))
    203 #define RLDICL		(HI(30))
    204 #define RLWINM		(HI(21))
    205 #define SLD		(HI(31) | LO(27))
    206 #define SLW		(HI(31) | LO(24))
    207 #define SRAD		(HI(31) | LO(794))
    208 #define SRADI		(HI(31) | LO(413 << 1))
    209 #define SRAW		(HI(31) | LO(792))
    210 #define SRAWI		(HI(31) | LO(824))
    211 #define SRD		(HI(31) | LO(539))
    212 #define SRW		(HI(31) | LO(536))
    213 #define STD		(HI(62) | 0)
    214 #define STDU		(HI(62) | 1)
    215 #define STDUX		(HI(31) | LO(181))
    216 #define STFIWX		(HI(31) | LO(983))
    217 #define STW		(HI(36))
    218 #define STWU		(HI(37))
    219 #define STWUX		(HI(31) | LO(183))
    220 #define SUBF		(HI(31) | LO(40))
    221 #define SUBFC		(HI(31) | LO(8))
    222 #define SUBFE		(HI(31) | LO(136))
    223 #define SUBFIC		(HI(8))
    224 #define XOR		(HI(31) | LO(316))
    225 #define XORI		(HI(26))
    226 #define XORIS		(HI(27))
    227 
    228 #define SIMM_MAX	(0x7fff)
    229 #define SIMM_MIN	(-0x8000)
    230 #define UIMM_MAX	(0xffff)
    231 
    232 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    233 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
    234 {
    235 	sljit_sw* ptrs;
    236 	if (func_ptr)
    237 		*func_ptr = (void*)context;
    238 	ptrs = (sljit_sw*)func;
    239 	context->addr = addr ? addr : ptrs[0];
    240 	context->r2 = ptrs[1];
    241 	context->r11 = ptrs[2];
    242 }
    243 #endif
    244 
    245 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
    246 {
    247 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
    248 	FAIL_IF(!ptr);
    249 	*ptr = ins;
    250 	compiler->size++;
    251 	return SLJIT_SUCCESS;
    252 }
    253 
    254 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code)
    255 {
    256 	sljit_sw diff;
    257 	sljit_uw target_addr;
    258 	sljit_sw extra_jump_flags;
    259 
    260 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    261 	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
    262 		return 0;
    263 #else
    264 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
    265 		return 0;
    266 #endif
    267 
    268 	if (jump->flags & JUMP_ADDR)
    269 		target_addr = jump->u.target;
    270 	else {
    271 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    272 		target_addr = (sljit_uw)(code + jump->u.label->size);
    273 	}
    274 
    275 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    276 	if (jump->flags & IS_CALL)
    277 		goto keep_address;
    278 #endif
    279 
    280 	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l;
    281 
    282 	extra_jump_flags = 0;
    283 	if (jump->flags & IS_COND) {
    284 		if (diff <= 0x7fff && diff >= -0x8000) {
    285 			jump->flags |= PATCH_B;
    286 			return 1;
    287 		}
    288 		if (target_addr <= 0xffff) {
    289 			jump->flags |= PATCH_B | PATCH_ABS_B;
    290 			return 1;
    291 		}
    292 		extra_jump_flags = REMOVE_COND;
    293 
    294 		diff -= sizeof(sljit_ins);
    295 	}
    296 
    297 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
    298 		jump->flags |= PATCH_B | extra_jump_flags;
    299 		return 1;
    300 	}
    301 	if (target_addr <= 0x03ffffff) {
    302 		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
    303 		return 1;
    304 	}
    305 
    306 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    307 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
    308 keep_address:
    309 #endif
    310 	if (target_addr <= 0x7fffffff) {
    311 		jump->flags |= PATCH_ABS32;
    312 		return 1;
    313 	}
    314 	if (target_addr <= 0x7fffffffffffl) {
    315 		jump->flags |= PATCH_ABS48;
    316 		return 1;
    317 	}
    318 #endif
    319 
    320 	return 0;
    321 }
    322 
    323 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    324 {
    325 	struct sljit_memory_fragment *buf;
    326 	sljit_ins *code;
    327 	sljit_ins *code_ptr;
    328 	sljit_ins *buf_ptr;
    329 	sljit_ins *buf_end;
    330 	sljit_uw word_count;
    331 	sljit_uw addr;
    332 
    333 	struct sljit_label *label;
    334 	struct sljit_jump *jump;
    335 	struct sljit_const *const_;
    336 
    337 	CHECK_ERROR_PTR();
    338 	CHECK_PTR(check_sljit_generate_code(compiler));
    339 	reverse_buf(compiler);
    340 
    341 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    342 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    343 	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    344 #else
    345 	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    346 #endif
    347 #endif
    348 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
    349 	PTR_FAIL_WITH_EXEC_IF(code);
    350 	buf = compiler->buf;
    351 
    352 	code_ptr = code;
    353 	word_count = 0;
    354 	label = compiler->labels;
    355 	jump = compiler->jumps;
    356 	const_ = compiler->consts;
    357 	do {
    358 		buf_ptr = (sljit_ins*)buf->memory;
    359 		buf_end = buf_ptr + (buf->used_size >> 2);
    360 		do {
    361 			*code_ptr = *buf_ptr++;
    362 			SLJIT_ASSERT(!label || label->size >= word_count);
    363 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
    364 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
    365 			/* These structures are ordered by their address. */
    366 			if (label && label->size == word_count) {
    367 				/* Just recording the address. */
    368 				label->addr = (sljit_uw)code_ptr;
    369 				label->size = code_ptr - code;
    370 				label = label->next;
    371 			}
    372 			if (jump && jump->addr == word_count) {
    373 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    374 				jump->addr = (sljit_uw)(code_ptr - 3);
    375 #else
    376 				jump->addr = (sljit_uw)(code_ptr - 6);
    377 #endif
    378 				if (detect_jump_type(jump, code_ptr, code)) {
    379 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    380 					code_ptr[-3] = code_ptr[0];
    381 					code_ptr -= 3;
    382 #else
    383 					if (jump->flags & PATCH_ABS32) {
    384 						code_ptr -= 3;
    385 						code_ptr[-1] = code_ptr[2];
    386 						code_ptr[0] = code_ptr[3];
    387 					}
    388 					else if (jump->flags & PATCH_ABS48) {
    389 						code_ptr--;
    390 						code_ptr[-1] = code_ptr[0];
    391 						code_ptr[0] = code_ptr[1];
    392 						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
    393 						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
    394 						code_ptr[-3] ^= 0x8422;
    395 						/* oris -> ori */
    396 						code_ptr[-2] ^= 0x4000000;
    397 					}
    398 					else {
    399 						code_ptr[-6] = code_ptr[0];
    400 						code_ptr -= 6;
    401 					}
    402 #endif
    403 					if (jump->flags & REMOVE_COND) {
    404 						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
    405 						code_ptr++;
    406 						jump->addr += sizeof(sljit_ins);
    407 						code_ptr[0] = Bx;
    408 						jump->flags -= IS_COND;
    409 					}
    410 				}
    411 				jump = jump->next;
    412 			}
    413 			if (const_ && const_->addr == word_count) {
    414 				const_->addr = (sljit_uw)code_ptr;
    415 				const_ = const_->next;
    416 			}
    417 			code_ptr ++;
    418 			word_count ++;
    419 		} while (buf_ptr < buf_end);
    420 
    421 		buf = buf->next;
    422 	} while (buf);
    423 
    424 	if (label && label->size == word_count) {
    425 		label->addr = (sljit_uw)code_ptr;
    426 		label->size = code_ptr - code;
    427 		label = label->next;
    428 	}
    429 
    430 	SLJIT_ASSERT(!label);
    431 	SLJIT_ASSERT(!jump);
    432 	SLJIT_ASSERT(!const_);
    433 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    434 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
    435 #else
    436 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
    437 #endif
    438 
    439 	jump = compiler->jumps;
    440 	while (jump) {
    441 		do {
    442 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
    443 			buf_ptr = (sljit_ins*)jump->addr;
    444 			if (jump->flags & PATCH_B) {
    445 				if (jump->flags & IS_COND) {
    446 					if (!(jump->flags & PATCH_ABS_B)) {
    447 						addr = addr - jump->addr;
    448 						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
    449 						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
    450 					}
    451 					else {
    452 						SLJIT_ASSERT(addr <= 0xffff);
    453 						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
    454 					}
    455 				}
    456 				else {
    457 					if (!(jump->flags & PATCH_ABS_B)) {
    458 						addr = addr - jump->addr;
    459 						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
    460 						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
    461 					}
    462 					else {
    463 						SLJIT_ASSERT(addr <= 0x03ffffff);
    464 						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
    465 					}
    466 				}
    467 				break;
    468 			}
    469 			/* Set the fields of immediate loads. */
    470 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    471 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    472 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    473 #else
    474 			if (jump->flags & PATCH_ABS32) {
    475 				SLJIT_ASSERT(addr <= 0x7fffffff);
    476 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    477 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    478 				break;
    479 			}
    480 			if (jump->flags & PATCH_ABS48) {
    481 				SLJIT_ASSERT(addr <= 0x7fffffffffff);
    482 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
    483 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
    484 				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
    485 				break;
    486 			}
    487 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
    488 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
    489 			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
    490 			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
    491 #endif
    492 		} while (0);
    493 		jump = jump->next;
    494 	}
    495 
    496 	compiler->error = SLJIT_ERR_COMPILED;
    497 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
    498 	SLJIT_CACHE_FLUSH(code, code_ptr);
    499 
    500 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    501 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    502 	if (((sljit_sw)code_ptr) & 0x4)
    503 		code_ptr++;
    504 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
    505 	return code_ptr;
    506 #else
    507 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
    508 	return code_ptr;
    509 #endif
    510 #else
    511 	return code;
    512 #endif
    513 }
    514 
    515 /* --------------------------------------------------------------------- */
    516 /*  Entry, exit                                                          */
    517 /* --------------------------------------------------------------------- */
    518 
    519 /* inp_flags: */
    520 
    521 /* Creates an index in data_transfer_insts array. */
    522 #define LOAD_DATA	0x01
    523 #define INDEXED		0x02
    524 #define WRITE_BACK	0x04
    525 #define WORD_DATA	0x00
    526 #define BYTE_DATA	0x08
    527 #define HALF_DATA	0x10
    528 #define INT_DATA	0x18
    529 #define SIGNED_DATA	0x20
    530 /* Separates integer and floating point registers */
    531 #define GPR_REG		0x3f
    532 #define DOUBLE_DATA	0x40
    533 
    534 #define MEM_MASK	0x7f
    535 
    536 /* Other inp_flags. */
    537 
    538 #define ARG_TEST	0x000100
    539 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
    540 #define ALT_SIGN_EXT	0x000200
    541 /* This flag affects the RC() and OERC() macros. */
    542 #define ALT_SET_FLAGS	0x000400
    543 #define ALT_KEEP_CACHE	0x000800
    544 #define ALT_FORM1	0x010000
    545 #define ALT_FORM2	0x020000
    546 #define ALT_FORM3	0x040000
    547 #define ALT_FORM4	0x080000
    548 #define ALT_FORM5	0x100000
    549 #define ALT_FORM6	0x200000
    550 
    551 /* Source and destination is register. */
    552 #define REG_DEST	0x000001
    553 #define REG1_SOURCE	0x000002
    554 #define REG2_SOURCE	0x000004
    555 /* getput_arg_fast returned true. */
    556 #define FAST_DEST	0x000008
    557 /* Multiple instructions are required. */
    558 #define SLOW_DEST	0x000010
    559 /*
    560 ALT_SIGN_EXT		0x000200
    561 ALT_SET_FLAGS		0x000400
    562 ALT_FORM1		0x010000
    563 ...
    564 ALT_FORM6		0x200000 */
    565 
    566 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    567 #include "sljitNativePPC_32.c"
    568 #else
    569 #include "sljitNativePPC_64.c"
    570 #endif
    571 
    572 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    573 #define STACK_STORE	STW
    574 #define STACK_LOAD	LWZ
    575 #else
    576 #define STACK_STORE	STD
    577 #define STACK_LOAD	LD
    578 #endif
    579 
    580 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
    581 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    582 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    583 {
    584 	sljit_s32 i, tmp, offs;
    585 
    586 	CHECK_ERROR();
    587 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    588 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    589 
    590 	FAIL_IF(push_inst(compiler, MFLR | D(0)));
    591 	offs = -(sljit_s32)(sizeof(sljit_sw));
    592 	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    593 
    594 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
    595 	for (i = SLJIT_S0; i >= tmp; i--) {
    596 		offs -= (sljit_s32)(sizeof(sljit_sw));
    597 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    598 	}
    599 
    600 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
    601 		offs -= (sljit_s32)(sizeof(sljit_sw));
    602 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    603 	}
    604 
    605 	SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
    606 
    607 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    608 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    609 #else
    610 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    611 #endif
    612 
    613 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
    614 	if (args >= 1)
    615 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
    616 	if (args >= 2)
    617 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
    618 	if (args >= 3)
    619 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
    620 
    621 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
    622 	local_size = (local_size + 15) & ~0xf;
    623 	compiler->local_size = local_size;
    624 
    625 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    626 	if (local_size <= SIMM_MAX)
    627 		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    628 	else {
    629 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    630 		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    631 	}
    632 #else
    633 	if (local_size <= SIMM_MAX)
    634 		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    635 	else {
    636 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    637 		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    638 	}
    639 #endif
    640 
    641 	return SLJIT_SUCCESS;
    642 }
    643 
    644 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    645 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    646 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    647 {
    648 	CHECK_ERROR();
    649 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    650 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    651 
    652 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
    653 	compiler->local_size = (local_size + 15) & ~0xf;
    654 	return SLJIT_SUCCESS;
    655 }
    656 
    657 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    658 {
    659 	sljit_s32 i, tmp, offs;
    660 
    661 	CHECK_ERROR();
    662 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    663 
    664 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    665 
    666 	if (compiler->local_size <= SIMM_MAX)
    667 		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
    668 	else {
    669 		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
    670 		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    671 	}
    672 
    673 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    674 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    675 #else
    676 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    677 #endif
    678 
    679 	offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
    680 
    681 	tmp = compiler->scratches;
    682 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
    683 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    684 		offs += (sljit_s32)(sizeof(sljit_sw));
    685 	}
    686 
    687 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
    688 	for (i = tmp; i <= SLJIT_S0; i++) {
    689 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    690 		offs += (sljit_s32)(sizeof(sljit_sw));
    691 	}
    692 
    693 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    694 	SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
    695 
    696 	FAIL_IF(push_inst(compiler, MTLR | S(0)));
    697 	FAIL_IF(push_inst(compiler, BLR));
    698 
    699 	return SLJIT_SUCCESS;
    700 }
    701 
    702 #undef STACK_STORE
    703 #undef STACK_LOAD
    704 
    705 /* --------------------------------------------------------------------- */
    706 /*  Operators                                                            */
    707 /* --------------------------------------------------------------------- */
    708 
    709 /* i/x - immediate/indexed form
    710    n/w - no write-back / write-back (1 bit)
    711    s/l - store/load (1 bit)
    712    u/s - signed/unsigned (1 bit)
    713    w/b/h/i - word/byte/half/int allowed (2 bit)
    714    It contans 32 items, but not all are different. */
    715 
    716 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
    717 #define INT_ALIGNED	0x10000
    718 /* 64-bit only: there is no lwau instruction. */
    719 #define UPDATE_REQ	0x20000
    720 
    721 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    722 #define ARCH_32_64(a, b)	a
    723 #define INST_CODE_AND_DST(inst, flags, reg) \
    724 	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    725 #else
    726 #define ARCH_32_64(a, b)	b
    727 #define INST_CODE_AND_DST(inst, flags, reg) \
    728 	(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    729 #endif
    730 
    731 static const sljit_ins data_transfer_insts[64 + 8] = {
    732 
    733 /* -------- Unsigned -------- */
    734 
    735 /* Word. */
    736 
    737 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    738 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    739 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    740 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    741 
    742 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    743 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    744 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    745 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    746 
    747 /* Byte. */
    748 
    749 /* u b n i s */ HI(38) /* stb */,
    750 /* u b n i l */ HI(34) /* lbz */,
    751 /* u b n x s */ HI(31) | LO(215) /* stbx */,
    752 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
    753 
    754 /* u b w i s */ HI(39) /* stbu */,
    755 /* u b w i l */ HI(35) /* lbzu */,
    756 /* u b w x s */ HI(31) | LO(247) /* stbux */,
    757 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
    758 
    759 /* Half. */
    760 
    761 /* u h n i s */ HI(44) /* sth */,
    762 /* u h n i l */ HI(40) /* lhz */,
    763 /* u h n x s */ HI(31) | LO(407) /* sthx */,
    764 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
    765 
    766 /* u h w i s */ HI(45) /* sthu */,
    767 /* u h w i l */ HI(41) /* lhzu */,
    768 /* u h w x s */ HI(31) | LO(439) /* sthux */,
    769 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
    770 
    771 /* Int. */
    772 
    773 /* u i n i s */ HI(36) /* stw */,
    774 /* u i n i l */ HI(32) /* lwz */,
    775 /* u i n x s */ HI(31) | LO(151) /* stwx */,
    776 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
    777 
    778 /* u i w i s */ HI(37) /* stwu */,
    779 /* u i w i l */ HI(33) /* lwzu */,
    780 /* u i w x s */ HI(31) | LO(183) /* stwux */,
    781 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
    782 
    783 /* -------- Signed -------- */
    784 
    785 /* Word. */
    786 
    787 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    788 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    789 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    790 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    791 
    792 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    793 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    794 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    795 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    796 
    797 /* Byte. */
    798 
    799 /* s b n i s */ HI(38) /* stb */,
    800 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
    801 /* s b n x s */ HI(31) | LO(215) /* stbx */,
    802 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
    803 
    804 /* s b w i s */ HI(39) /* stbu */,
    805 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
    806 /* s b w x s */ HI(31) | LO(247) /* stbux */,
    807 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
    808 
    809 /* Half. */
    810 
    811 /* s h n i s */ HI(44) /* sth */,
    812 /* s h n i l */ HI(42) /* lha */,
    813 /* s h n x s */ HI(31) | LO(407) /* sthx */,
    814 /* s h n x l */ HI(31) | LO(343) /* lhax */,
    815 
    816 /* s h w i s */ HI(45) /* sthu */,
    817 /* s h w i l */ HI(43) /* lhau */,
    818 /* s h w x s */ HI(31) | LO(439) /* sthux */,
    819 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
    820 
    821 /* Int. */
    822 
    823 /* s i n i s */ HI(36) /* stw */,
    824 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
    825 /* s i n x s */ HI(31) | LO(151) /* stwx */,
    826 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
    827 
    828 /* s i w i s */ HI(37) /* stwu */,
    829 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
    830 /* s i w x s */ HI(31) | LO(183) /* stwux */,
    831 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
    832 
    833 /* -------- Double -------- */
    834 
    835 /* d   n i s */ HI(54) /* stfd */,
    836 /* d   n i l */ HI(50) /* lfd */,
    837 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
    838 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
    839 
    840 /* s   n i s */ HI(52) /* stfs */,
    841 /* s   n i l */ HI(48) /* lfs */,
    842 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
    843 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
    844 
    845 };
    846 
    847 #undef ARCH_32_64
    848 
    849 /* Simple cases, (no caching is required). */
    850 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
    851 {
    852 	sljit_ins inst;
    853 
    854 	/* Should work when (arg & REG_MASK) == 0. */
    855 	SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0);
    856 	SLJIT_ASSERT(arg & SLJIT_MEM);
    857 
    858 	if (arg & OFFS_REG_MASK) {
    859 		if (argw & 0x3)
    860 			return 0;
    861 		if (inp_flags & ARG_TEST)
    862 			return 1;
    863 
    864 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    865 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    866 		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
    867 		return -1;
    868 	}
    869 
    870 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
    871 		inp_flags &= ~WRITE_BACK;
    872 
    873 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    874 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    875 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
    876 
    877 	if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
    878 		return 0;
    879 	if (inp_flags & ARG_TEST)
    880 		return 1;
    881 #endif
    882 
    883 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    884 	if (argw > SIMM_MAX || argw < SIMM_MIN)
    885 		return 0;
    886 	if (inp_flags & ARG_TEST)
    887 		return 1;
    888 
    889 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    890 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    891 #endif
    892 
    893 	FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
    894 	return -1;
    895 }
    896 
    897 /* See getput_arg below.
    898    Note: can_cache is called only for binary operators. Those operator always
    899    uses word arguments without write back. */
    900 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
    901 {
    902 	sljit_sw high_short, next_high_short;
    903 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    904 	sljit_sw diff;
    905 #endif
    906 
    907 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
    908 
    909 	if (arg & OFFS_REG_MASK)
    910 		return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
    911 
    912 	if (next_arg & OFFS_REG_MASK)
    913 		return 0;
    914 
    915 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    916 	high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
    917 	next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
    918 	return high_short == next_high_short;
    919 #else
    920 	if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
    921 		high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
    922 		next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
    923 		if (high_short == next_high_short)
    924 			return 1;
    925 	}
    926 
    927 	diff = argw - next_argw;
    928 	if (!(arg & REG_MASK))
    929 		return diff <= SIMM_MAX && diff >= SIMM_MIN;
    930 
    931 	if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
    932 		return 1;
    933 
    934 	return 0;
    935 #endif
    936 }
    937 
    938 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    939 #define ADJUST_CACHED_IMM(imm) \
    940 	if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
    941 		/* Adjust cached value. Fortunately this is really a rare case */ \
    942 		compiler->cache_argw += imm & 0x3; \
    943 		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
    944 		imm &= ~0x3; \
    945 	}
    946 #endif
    947 
    948 /* Emit the necessary instructions. See can_cache above. */
    949 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
    950 {
    951 	sljit_s32 tmp_r;
    952 	sljit_ins inst;
    953 	sljit_sw high_short, next_high_short;
    954 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    955 	sljit_sw diff;
    956 #endif
    957 
    958 	SLJIT_ASSERT(arg & SLJIT_MEM);
    959 
    960 	tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
    961 	/* Special case for "mov reg, [reg, ... ]". */
    962 	if ((arg & REG_MASK) == tmp_r)
    963 		tmp_r = TMP_REG1;
    964 
    965 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
    966 		argw &= 0x3;
    967 		/* Otherwise getput_arg_fast would capture it. */
    968 		SLJIT_ASSERT(argw);
    969 
    970 		if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
    971 			tmp_r = TMP_REG3;
    972 		else {
    973 			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
    974 				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
    975 				compiler->cache_argw = argw;
    976 				tmp_r = TMP_REG3;
    977 			}
    978 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    979 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
    980 #else
    981 			FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
    982 #endif
    983 		}
    984 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    985 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    986 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
    987 	}
    988 
    989 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
    990 		inp_flags &= ~WRITE_BACK;
    991 
    992 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    993 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
    994 
    995 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    996 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l
    997 			&& (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
    998 #endif
    999 
   1000 		arg &= REG_MASK;
   1001 		high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
   1002 		/* The getput_arg_fast should handle this otherwise. */
   1003 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1004 		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
   1005 #else
   1006 		SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
   1007 #endif
   1008 
   1009 		if (inp_flags & WRITE_BACK) {
   1010 			if (arg == reg) {
   1011 				FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg)));
   1012 				reg = tmp_r;
   1013 			}
   1014 			tmp_r = arg;
   1015 			FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
   1016 		}
   1017 		else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
   1018 			if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
   1019 				next_high_short = (sljit_s32)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
   1020 				if (high_short == next_high_short) {
   1021 					compiler->cache_arg = SLJIT_MEM | arg;
   1022 					compiler->cache_argw = high_short;
   1023 					tmp_r = TMP_REG3;
   1024 				}
   1025 			}
   1026 			FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
   1027 		}
   1028 		else
   1029 			tmp_r = TMP_REG3;
   1030 
   1031 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
   1032 
   1033 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1034 	}
   1035 
   1036 	/* Everything else is PPC-64 only. */
   1037 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
   1038 		diff = argw - compiler->cache_argw;
   1039 		if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1040 			ADJUST_CACHED_IMM(diff);
   1041 			return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
   1042 		}
   1043 
   1044 		diff = argw - next_argw;
   1045 		if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1046 			SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1047 
   1048 			compiler->cache_arg = SLJIT_IMM;
   1049 			compiler->cache_argw = argw;
   1050 			tmp_r = TMP_REG3;
   1051 		}
   1052 
   1053 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
   1054 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
   1055 	}
   1056 
   1057 	diff = argw - compiler->cache_argw;
   1058 	if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1059 		SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
   1060 		ADJUST_CACHED_IMM(diff);
   1061 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
   1062 	}
   1063 
   1064 	if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1065 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1066 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1067 		if (compiler->cache_argw != argw) {
   1068 			FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
   1069 			compiler->cache_argw = argw;
   1070 		}
   1071 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
   1072 	}
   1073 
   1074 	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
   1075 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1076 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1077 
   1078 		compiler->cache_arg = SLJIT_IMM;
   1079 		compiler->cache_argw = argw;
   1080 
   1081 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1082 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1083 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
   1084 	}
   1085 
   1086 	diff = argw - next_argw;
   1087 	if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1088 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1089 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1090 		FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
   1091 
   1092 		compiler->cache_arg = arg;
   1093 		compiler->cache_argw = argw;
   1094 
   1095 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
   1096 	}
   1097 
   1098 	if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1099 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1100 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1101 
   1102 		compiler->cache_arg = SLJIT_IMM;
   1103 		compiler->cache_argw = argw;
   1104 		tmp_r = TMP_REG3;
   1105 	}
   1106 	else
   1107 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
   1108 
   1109 	/* Get the indexed version instead of the normal one. */
   1110 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1111 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1112 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
   1113 #endif
   1114 }
   1115 
   1116 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
   1117 {
   1118 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
   1119 		return compiler->error;
   1120 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
   1121 }
   1122 
   1123 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
   1124 	sljit_s32 dst, sljit_sw dstw,
   1125 	sljit_s32 src1, sljit_sw src1w,
   1126 	sljit_s32 src2, sljit_sw src2w)
   1127 {
   1128 	/* arg1 goes to TMP_REG1 or src reg
   1129 	   arg2 goes to TMP_REG2, imm or src reg
   1130 	   TMP_REG3 can be used for caching
   1131 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
   1132 	sljit_s32 dst_r;
   1133 	sljit_s32 src1_r;
   1134 	sljit_s32 src2_r;
   1135 	sljit_s32 sugg_src2_r = TMP_REG2;
   1136 	sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS);
   1137 
   1138 	if (!(input_flags & ALT_KEEP_CACHE)) {
   1139 		compiler->cache_arg = 0;
   1140 		compiler->cache_argw = 0;
   1141 	}
   1142 
   1143 	/* Destination check. */
   1144 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
   1145 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
   1146 			return SLJIT_SUCCESS;
   1147 		dst_r = TMP_REG2;
   1148 	}
   1149 	else if (FAST_IS_REG(dst)) {
   1150 		dst_r = dst;
   1151 		flags |= REG_DEST;
   1152 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
   1153 			sugg_src2_r = dst_r;
   1154 	}
   1155 	else {
   1156 		SLJIT_ASSERT(dst & SLJIT_MEM);
   1157 		if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
   1158 			flags |= FAST_DEST;
   1159 			dst_r = TMP_REG2;
   1160 		}
   1161 		else {
   1162 			flags |= SLOW_DEST;
   1163 			dst_r = 0;
   1164 		}
   1165 	}
   1166 
   1167 	/* Source 1. */
   1168 	if (FAST_IS_REG(src1)) {
   1169 		src1_r = src1;
   1170 		flags |= REG1_SOURCE;
   1171 	}
   1172 	else if (src1 & SLJIT_IMM) {
   1173 		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
   1174 		src1_r = TMP_REG1;
   1175 	}
   1176 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
   1177 		FAIL_IF(compiler->error);
   1178 		src1_r = TMP_REG1;
   1179 	}
   1180 	else
   1181 		src1_r = 0;
   1182 
   1183 	/* Source 2. */
   1184 	if (FAST_IS_REG(src2)) {
   1185 		src2_r = src2;
   1186 		flags |= REG2_SOURCE;
   1187 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
   1188 			dst_r = src2_r;
   1189 	}
   1190 	else if (src2 & SLJIT_IMM) {
   1191 		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
   1192 		src2_r = sugg_src2_r;
   1193 	}
   1194 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
   1195 		FAIL_IF(compiler->error);
   1196 		src2_r = sugg_src2_r;
   1197 	}
   1198 	else
   1199 		src2_r = 0;
   1200 
   1201 	/* src1_r, src2_r and dst_r can be zero (=unprocessed).
   1202 	   All arguments are complex addressing modes, and it is a binary operator. */
   1203 	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
   1204 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   1205 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
   1206 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
   1207 		}
   1208 		else {
   1209 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
   1210 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
   1211 		}
   1212 		src1_r = TMP_REG1;
   1213 		src2_r = TMP_REG2;
   1214 	}
   1215 	else if (src1_r == 0 && src2_r == 0) {
   1216 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
   1217 		src1_r = TMP_REG1;
   1218 	}
   1219 	else if (src1_r == 0 && dst_r == 0) {
   1220 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
   1221 		src1_r = TMP_REG1;
   1222 	}
   1223 	else if (src2_r == 0 && dst_r == 0) {
   1224 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
   1225 		src2_r = sugg_src2_r;
   1226 	}
   1227 
   1228 	if (dst_r == 0)
   1229 		dst_r = TMP_REG2;
   1230 
   1231 	if (src1_r == 0) {
   1232 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
   1233 		src1_r = TMP_REG1;
   1234 	}
   1235 
   1236 	if (src2_r == 0) {
   1237 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
   1238 		src2_r = sugg_src2_r;
   1239 	}
   1240 
   1241 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
   1242 
   1243 	if (flags & (FAST_DEST | SLOW_DEST)) {
   1244 		if (flags & FAST_DEST)
   1245 			FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
   1246 		else
   1247 			FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
   1248 	}
   1249 	return SLJIT_SUCCESS;
   1250 }
   1251 
   1252 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
   1253 {
   1254 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1255 	sljit_s32 int_op = op & SLJIT_I32_OP;
   1256 #endif
   1257 
   1258 	CHECK_ERROR();
   1259 	CHECK(check_sljit_emit_op0(compiler, op));
   1260 
   1261 	op = GET_OPCODE(op);
   1262 	switch (op) {
   1263 	case SLJIT_BREAKPOINT:
   1264 	case SLJIT_NOP:
   1265 		return push_inst(compiler, NOP);
   1266 	case SLJIT_LMUL_UW:
   1267 	case SLJIT_LMUL_SW:
   1268 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1269 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1270 		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1271 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1272 #else
   1273 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1274 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1275 #endif
   1276 	case SLJIT_DIVMOD_UW:
   1277 	case SLJIT_DIVMOD_SW:
   1278 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1279 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1280 		FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
   1281 		FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1282 #else
   1283 		FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
   1284 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1285 #endif
   1286 		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
   1287 	case SLJIT_DIV_UW:
   1288 	case SLJIT_DIV_SW:
   1289 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1290 		return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
   1291 #else
   1292 		return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
   1293 #endif
   1294 	}
   1295 
   1296 	return SLJIT_SUCCESS;
   1297 }
   1298 
   1299 #define EMIT_MOV(type, type_flags, type_cast) \
   1300 	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
   1301 
   1302 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
   1303 	sljit_s32 dst, sljit_sw dstw,
   1304 	sljit_s32 src, sljit_sw srcw)
   1305 {
   1306 	sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1307 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
   1308 
   1309 	CHECK_ERROR();
   1310 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
   1311 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1312 	ADJUST_LOCAL_OFFSET(src, srcw);
   1313 
   1314 	op = GET_OPCODE(op);
   1315 	if ((src & SLJIT_IMM) && srcw == 0)
   1316 		src = TMP_ZERO;
   1317 
   1318 	if (op_flags & SLJIT_SET_O)
   1319 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1320 
   1321 	if (op_flags & SLJIT_I32_OP) {
   1322 		if (op < SLJIT_NOT) {
   1323 			if (FAST_IS_REG(src) && src == dst) {
   1324 				if (!TYPE_CAST_NEEDED(op))
   1325 					return SLJIT_SUCCESS;
   1326 			}
   1327 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1328 			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
   1329 				op = SLJIT_MOV_U32;
   1330 			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
   1331 				op = SLJIT_MOVU_U32;
   1332 			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
   1333 				op = SLJIT_MOV_S32;
   1334 			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
   1335 				op = SLJIT_MOVU_S32;
   1336 #endif
   1337 		}
   1338 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1339 		else {
   1340 			/* Most operations expect sign extended arguments. */
   1341 			flags |= INT_DATA | SIGNED_DATA;
   1342 			if (src & SLJIT_IMM)
   1343 				srcw = (sljit_s32)srcw;
   1344 		}
   1345 #endif
   1346 	}
   1347 
   1348 	switch (op) {
   1349 	case SLJIT_MOV:
   1350 	case SLJIT_MOV_P:
   1351 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1352 	case SLJIT_MOV_U32:
   1353 	case SLJIT_MOV_S32:
   1354 #endif
   1355 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
   1356 
   1357 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1358 	case SLJIT_MOV_U32:
   1359 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32));
   1360 
   1361 	case SLJIT_MOV_S32:
   1362 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32));
   1363 #endif
   1364 
   1365 	case SLJIT_MOV_U8:
   1366 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8));
   1367 
   1368 	case SLJIT_MOV_S8:
   1369 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8));
   1370 
   1371 	case SLJIT_MOV_U16:
   1372 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16));
   1373 
   1374 	case SLJIT_MOV_S16:
   1375 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
   1376 
   1377 	case SLJIT_MOVU:
   1378 	case SLJIT_MOVU_P:
   1379 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1380 	case SLJIT_MOVU_U32:
   1381 	case SLJIT_MOVU_S32:
   1382 #endif
   1383 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
   1384 
   1385 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1386 	case SLJIT_MOVU_U32:
   1387 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA | WRITE_BACK, (sljit_u32));
   1388 
   1389 	case SLJIT_MOVU_S32:
   1390 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s32));
   1391 #endif
   1392 
   1393 	case SLJIT_MOVU_U8:
   1394 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, (sljit_u8));
   1395 
   1396 	case SLJIT_MOVU_S8:
   1397 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s8));
   1398 
   1399 	case SLJIT_MOVU_U16:
   1400 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, (sljit_u16));
   1401 
   1402 	case SLJIT_MOVU_S16:
   1403 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s16));
   1404 
   1405 	case SLJIT_NOT:
   1406 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1407 
   1408 	case SLJIT_NEG:
   1409 		return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1410 
   1411 	case SLJIT_CLZ:
   1412 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1413 		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
   1414 #else
   1415 		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1416 #endif
   1417 	}
   1418 
   1419 	return SLJIT_SUCCESS;
   1420 }
   1421 
   1422 #undef EMIT_MOV
   1423 
   1424 #define TEST_SL_IMM(src, srcw) \
   1425 	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
   1426 
   1427 #define TEST_UL_IMM(src, srcw) \
   1428 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
   1429 
   1430 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1431 #define TEST_SH_IMM(src, srcw) \
   1432 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
   1433 #else
   1434 #define TEST_SH_IMM(src, srcw) \
   1435 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
   1436 #endif
   1437 
   1438 #define TEST_UH_IMM(src, srcw) \
   1439 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
   1440 
   1441 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1442 #define TEST_ADD_IMM(src, srcw) \
   1443 	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
   1444 #else
   1445 #define TEST_ADD_IMM(src, srcw) \
   1446 	((src) & SLJIT_IMM)
   1447 #endif
   1448 
   1449 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1450 #define TEST_UI_IMM(src, srcw) \
   1451 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
   1452 #else
   1453 #define TEST_UI_IMM(src, srcw) \
   1454 	((src) & SLJIT_IMM)
   1455 #endif
   1456 
   1457 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
   1458 	sljit_s32 dst, sljit_sw dstw,
   1459 	sljit_s32 src1, sljit_sw src1w,
   1460 	sljit_s32 src2, sljit_sw src2w)
   1461 {
   1462 	sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1463 
   1464 	CHECK_ERROR();
   1465 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1466 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1467 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1468 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1469 
   1470 	if ((src1 & SLJIT_IMM) && src1w == 0)
   1471 		src1 = TMP_ZERO;
   1472 	if ((src2 & SLJIT_IMM) && src2w == 0)
   1473 		src2 = TMP_ZERO;
   1474 
   1475 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1476 	if (op & SLJIT_I32_OP) {
   1477 		/* Most operations expect sign extended arguments. */
   1478 		flags |= INT_DATA | SIGNED_DATA;
   1479 		if (src1 & SLJIT_IMM)
   1480 			src1w = (sljit_s32)(src1w);
   1481 		if (src2 & SLJIT_IMM)
   1482 			src2w = (sljit_s32)(src2w);
   1483 		if (GET_FLAGS(op))
   1484 			flags |= ALT_SIGN_EXT;
   1485 	}
   1486 #endif
   1487 	if (op & SLJIT_SET_O)
   1488 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1489 	if (src2 == TMP_REG2)
   1490 		flags |= ALT_KEEP_CACHE;
   1491 
   1492 	switch (GET_OPCODE(op)) {
   1493 	case SLJIT_ADD:
   1494 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1495 			if (TEST_SL_IMM(src2, src2w)) {
   1496 				compiler->imm = src2w & 0xffff;
   1497 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1498 			}
   1499 			if (TEST_SL_IMM(src1, src1w)) {
   1500 				compiler->imm = src1w & 0xffff;
   1501 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1502 			}
   1503 			if (TEST_SH_IMM(src2, src2w)) {
   1504 				compiler->imm = (src2w >> 16) & 0xffff;
   1505 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1506 			}
   1507 			if (TEST_SH_IMM(src1, src1w)) {
   1508 				compiler->imm = (src1w >> 16) & 0xffff;
   1509 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1510 			}
   1511 			/* Range between -1 and -32768 is covered above. */
   1512 			if (TEST_ADD_IMM(src2, src2w)) {
   1513 				compiler->imm = src2w & 0xffffffff;
   1514 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1515 			}
   1516 			if (TEST_ADD_IMM(src1, src1w)) {
   1517 				compiler->imm = src1w & 0xffffffff;
   1518 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
   1519 			}
   1520 		}
   1521 		if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) {
   1522 			if (TEST_SL_IMM(src2, src2w)) {
   1523 				compiler->imm = src2w & 0xffff;
   1524 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1525 			}
   1526 			if (TEST_SL_IMM(src1, src1w)) {
   1527 				compiler->imm = src1w & 0xffff;
   1528 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1529 			}
   1530 		}
   1531 		return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
   1532 
   1533 	case SLJIT_ADDC:
   1534 		return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
   1535 
   1536 	case SLJIT_SUB:
   1537 		if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1538 			if (TEST_SL_IMM(src2, -src2w)) {
   1539 				compiler->imm = (-src2w) & 0xffff;
   1540 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1541 			}
   1542 			if (TEST_SL_IMM(src1, src1w)) {
   1543 				compiler->imm = src1w & 0xffff;
   1544 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1545 			}
   1546 			if (TEST_SH_IMM(src2, -src2w)) {
   1547 				compiler->imm = ((-src2w) >> 16) & 0xffff;
   1548 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1549 			}
   1550 			/* Range between -1 and -32768 is covered above. */
   1551 			if (TEST_ADD_IMM(src2, -src2w)) {
   1552 				compiler->imm = -src2w & 0xffffffff;
   1553 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1554 			}
   1555 		}
   1556 		if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) {
   1557 			if (!(op & SLJIT_SET_U)) {
   1558 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
   1559 				if (TEST_SL_IMM(src2, src2w)) {
   1560 					compiler->imm = src2w & 0xffff;
   1561 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1562 				}
   1563 				if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) {
   1564 					compiler->imm = src1w & 0xffff;
   1565 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1566 				}
   1567 			}
   1568 			if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) {
   1569 				/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
   1570 				if (TEST_UL_IMM(src2, src2w)) {
   1571 					compiler->imm = src2w & 0xffff;
   1572 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1573 				}
   1574 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
   1575 			}
   1576 			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) {
   1577 				compiler->imm = src2w;
   1578 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1579 			}
   1580 			return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
   1581 		}
   1582 		if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) {
   1583 			if (TEST_SL_IMM(src2, -src2w)) {
   1584 				compiler->imm = (-src2w) & 0xffff;
   1585 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1586 			}
   1587 		}
   1588 		/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
   1589 		return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w);
   1590 
   1591 	case SLJIT_SUBC:
   1592 		return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w);
   1593 
   1594 	case SLJIT_MUL:
   1595 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1596 		if (op & SLJIT_I32_OP)
   1597 			flags |= ALT_FORM2;
   1598 #endif
   1599 		if (!GET_FLAGS(op)) {
   1600 			if (TEST_SL_IMM(src2, src2w)) {
   1601 				compiler->imm = src2w & 0xffff;
   1602 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1603 			}
   1604 			if (TEST_SL_IMM(src1, src1w)) {
   1605 				compiler->imm = src1w & 0xffff;
   1606 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1607 			}
   1608 		}
   1609 		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
   1610 
   1611 	case SLJIT_AND:
   1612 	case SLJIT_OR:
   1613 	case SLJIT_XOR:
   1614 		/* Commutative unsigned operations. */
   1615 		if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
   1616 			if (TEST_UL_IMM(src2, src2w)) {
   1617 				compiler->imm = src2w;
   1618 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1619 			}
   1620 			if (TEST_UL_IMM(src1, src1w)) {
   1621 				compiler->imm = src1w;
   1622 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1623 			}
   1624 			if (TEST_UH_IMM(src2, src2w)) {
   1625 				compiler->imm = (src2w >> 16) & 0xffff;
   1626 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1627 			}
   1628 			if (TEST_UH_IMM(src1, src1w)) {
   1629 				compiler->imm = (src1w >> 16) & 0xffff;
   1630 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1631 			}
   1632 		}
   1633 		if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) {
   1634 			if (TEST_UI_IMM(src2, src2w)) {
   1635 				compiler->imm = src2w;
   1636 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1637 			}
   1638 			if (TEST_UI_IMM(src1, src1w)) {
   1639 				compiler->imm = src1w;
   1640 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1641 			}
   1642 		}
   1643 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1644 
   1645 	case SLJIT_ASHR:
   1646 		if (op & SLJIT_KEEP_FLAGS)
   1647 			flags |= ALT_FORM3;
   1648 		/* Fall through. */
   1649 	case SLJIT_SHL:
   1650 	case SLJIT_LSHR:
   1651 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1652 		if (op & SLJIT_I32_OP)
   1653 			flags |= ALT_FORM2;
   1654 #endif
   1655 		if (src2 & SLJIT_IMM) {
   1656 			compiler->imm = src2w;
   1657 			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1658 		}
   1659 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1660 	}
   1661 
   1662 	return SLJIT_SUCCESS;
   1663 }
   1664 
   1665 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
   1666 {
   1667 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
   1668 	return reg_map[reg];
   1669 }
   1670 
   1671 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
   1672 {
   1673 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
   1674 	return reg;
   1675 }
   1676 
   1677 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
   1678 	void *instruction, sljit_s32 size)
   1679 {
   1680 	CHECK_ERROR();
   1681 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
   1682 
   1683 	return push_inst(compiler, *(sljit_ins*)instruction);
   1684 }
   1685 
   1686 /* --------------------------------------------------------------------- */
   1687 /*  Floating point operators                                             */
   1688 /* --------------------------------------------------------------------- */
   1689 
   1690 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
   1691 {
   1692 #ifdef SLJIT_IS_FPU_AVAILABLE
   1693 	return SLJIT_IS_FPU_AVAILABLE;
   1694 #else
   1695 	/* Available by default. */
   1696 	return 1;
   1697 #endif
   1698 }
   1699 
   1700 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
   1701 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
   1702 
   1703 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1704 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
   1705 #else
   1706 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
   1707 
   1708 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
   1709 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
   1710 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
   1711 #else
   1712 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
   1713 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
   1714 #endif
   1715 
   1716 #endif /* SLJIT_CONFIG_PPC_64 */
   1717 
   1718 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
   1719 	sljit_s32 dst, sljit_sw dstw,
   1720 	sljit_s32 src, sljit_sw srcw)
   1721 {
   1722 	if (src & SLJIT_MEM) {
   1723 		/* We can ignore the temporary data store on the stack from caching point of view. */
   1724 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
   1725 		src = TMP_FREG1;
   1726 	}
   1727 
   1728 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1729 	op = GET_OPCODE(op);
   1730 	FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
   1731 
   1732 	if (dst == SLJIT_UNUSED)
   1733 		return SLJIT_SUCCESS;
   1734 
   1735 	if (op == SLJIT_CONV_SW_FROM_F64) {
   1736 		if (FAST_IS_REG(dst)) {
   1737 			FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
   1738 			return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
   1739 		}
   1740 		return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
   1741 	}
   1742 
   1743 #else
   1744 	FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
   1745 
   1746 	if (dst == SLJIT_UNUSED)
   1747 		return SLJIT_SUCCESS;
   1748 #endif
   1749 
   1750 	if (FAST_IS_REG(dst)) {
   1751 		FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
   1752 		FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
   1753 		return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
   1754 	}
   1755 
   1756 	SLJIT_ASSERT(dst & SLJIT_MEM);
   1757 
   1758 	if (dst & OFFS_REG_MASK) {
   1759 		dstw &= 0x3;
   1760 		if (dstw) {
   1761 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1762 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
   1763 #else
   1764 			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
   1765 #endif
   1766 			dstw = TMP_REG1;
   1767 		}
   1768 		else
   1769 			dstw = OFFS_REG(dst);
   1770 	}
   1771 	else {
   1772 		if ((dst & REG_MASK) && !dstw) {
   1773 			dstw = dst & REG_MASK;
   1774 			dst = 0;
   1775 		}
   1776 		else {
   1777 			/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
   1778 			FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
   1779 			dstw = TMP_REG1;
   1780 		}
   1781 	}
   1782 
   1783 	return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
   1784 }
   1785 
   1786 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
   1787 	sljit_s32 dst, sljit_sw dstw,
   1788 	sljit_s32 src, sljit_sw srcw)
   1789 {
   1790 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1791 
   1792 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1793 
   1794 	if (src & SLJIT_IMM) {
   1795 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
   1796 			srcw = (sljit_s32)srcw;
   1797 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   1798 		src = TMP_REG1;
   1799 	}
   1800 	else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
   1801 		if (FAST_IS_REG(src))
   1802 			FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
   1803 		else
   1804 			FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1805 		src = TMP_REG1;
   1806 	}
   1807 
   1808 	if (FAST_IS_REG(src)) {
   1809 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1810 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
   1811 	}
   1812 	else
   1813 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
   1814 
   1815 	FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
   1816 
   1817 	if (dst & SLJIT_MEM)
   1818 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
   1819 	if (op & SLJIT_F32_OP)
   1820 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1821 	return SLJIT_SUCCESS;
   1822 
   1823 #else
   1824 
   1825 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1826 	sljit_s32 invert_sign = 1;
   1827 
   1828 	if (src & SLJIT_IMM) {
   1829 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
   1830 		src = TMP_REG1;
   1831 		invert_sign = 0;
   1832 	}
   1833 	else if (!FAST_IS_REG(src)) {
   1834 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1835 		src = TMP_REG1;
   1836 	}
   1837 
   1838 	/* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
   1839 	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
   1840 	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
   1841 	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
   1842 	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
   1843 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
   1844 	if (invert_sign)
   1845 		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
   1846 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1847 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
   1848 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
   1849 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1850 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1851 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1852 
   1853 	FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
   1854 
   1855 	if (dst & SLJIT_MEM)
   1856 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
   1857 	if (op & SLJIT_F32_OP)
   1858 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1859 	return SLJIT_SUCCESS;
   1860 
   1861 #endif
   1862 }
   1863 
   1864 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
   1865 	sljit_s32 src1, sljit_sw src1w,
   1866 	sljit_s32 src2, sljit_sw src2w)
   1867 {
   1868 	if (src1 & SLJIT_MEM) {
   1869 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1870 		src1 = TMP_FREG1;
   1871 	}
   1872 
   1873 	if (src2 & SLJIT_MEM) {
   1874 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
   1875 		src2 = TMP_FREG2;
   1876 	}
   1877 
   1878 	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
   1879 }
   1880 
   1881 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
   1882 	sljit_s32 dst, sljit_sw dstw,
   1883 	sljit_s32 src, sljit_sw srcw)
   1884 {
   1885 	sljit_s32 dst_r;
   1886 
   1887 	CHECK_ERROR();
   1888 	compiler->cache_arg = 0;
   1889 	compiler->cache_argw = 0;
   1890 
   1891 	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
   1892 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
   1893 
   1894 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
   1895 		op ^= SLJIT_F32_OP;
   1896 
   1897 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1898 
   1899 	if (src & SLJIT_MEM) {
   1900 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
   1901 		src = dst_r;
   1902 	}
   1903 
   1904 	switch (GET_OPCODE(op)) {
   1905 	case SLJIT_CONV_F64_FROM_F32:
   1906 		op ^= SLJIT_F32_OP;
   1907 		if (op & SLJIT_F32_OP) {
   1908 			FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
   1909 			break;
   1910 		}
   1911 		/* Fall through. */
   1912 	case SLJIT_MOV_F64:
   1913 		if (src != dst_r) {
   1914 			if (dst_r != TMP_FREG1)
   1915 				FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
   1916 			else
   1917 				dst_r = src;
   1918 		}
   1919 		break;
   1920 	case SLJIT_NEG_F64:
   1921 		FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
   1922 		break;
   1923 	case SLJIT_ABS_F64:
   1924 		FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
   1925 		break;
   1926 	}
   1927 
   1928 	if (dst & SLJIT_MEM)
   1929 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
   1930 	return SLJIT_SUCCESS;
   1931 }
   1932 
   1933 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
   1934 	sljit_s32 dst, sljit_sw dstw,
   1935 	sljit_s32 src1, sljit_sw src1w,
   1936 	sljit_s32 src2, sljit_sw src2w)
   1937 {
   1938 	sljit_s32 dst_r, flags = 0;
   1939 
   1940 	CHECK_ERROR();
   1941 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1942 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1943 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1944 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1945 
   1946 	compiler->cache_arg = 0;
   1947 	compiler->cache_argw = 0;
   1948 
   1949 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
   1950 
   1951 	if (src1 & SLJIT_MEM) {
   1952 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
   1953 			FAIL_IF(compiler->error);
   1954 			src1 = TMP_FREG1;
   1955 		} else
   1956 			flags |= ALT_FORM1;
   1957 	}
   1958 
   1959 	if (src2 & SLJIT_MEM) {
   1960 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
   1961 			FAIL_IF(compiler->error);
   1962 			src2 = TMP_FREG2;
   1963 		} else
   1964 			flags |= ALT_FORM2;
   1965 	}
   1966 
   1967 	if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
   1968 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   1969 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
   1970 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1971 		}
   1972 		else {
   1973 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1974 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1975 		}
   1976 	}
   1977 	else if (flags & ALT_FORM1)
   1978 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1979 	else if (flags & ALT_FORM2)
   1980 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1981 
   1982 	if (flags & ALT_FORM1)
   1983 		src1 = TMP_FREG1;
   1984 	if (flags & ALT_FORM2)
   1985 		src2 = TMP_FREG2;
   1986 
   1987 	switch (GET_OPCODE(op)) {
   1988 	case SLJIT_ADD_F64:
   1989 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
   1990 		break;
   1991 
   1992 	case SLJIT_SUB_F64:
   1993 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
   1994 		break;
   1995 
   1996 	case SLJIT_MUL_F64:
   1997 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
   1998 		break;
   1999 
   2000 	case SLJIT_DIV_F64:
   2001 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
   2002 		break;
   2003 	}
   2004 
   2005 	if (dst_r == TMP_FREG2)
   2006 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
   2007 
   2008 	return SLJIT_SUCCESS;
   2009 }
   2010 
   2011 #undef FLOAT_DATA
   2012 #undef SELECT_FOP
   2013 
   2014 /* --------------------------------------------------------------------- */
   2015 /*  Other instructions                                                   */
   2016 /* --------------------------------------------------------------------- */
   2017 
   2018 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
   2019 {
   2020 	CHECK_ERROR();
   2021 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
   2022 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2023 
   2024 	/* For UNUSED dst. Uncommon, but possible. */
   2025 	if (dst == SLJIT_UNUSED)
   2026 		return SLJIT_SUCCESS;
   2027 
   2028 	if (FAST_IS_REG(dst))
   2029 		return push_inst(compiler, MFLR | D(dst));
   2030 
   2031 	/* Memory. */
   2032 	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
   2033 	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
   2034 }
   2035 
   2036 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
   2037 {
   2038 	CHECK_ERROR();
   2039 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
   2040 	ADJUST_LOCAL_OFFSET(src, srcw);
   2041 
   2042 	if (FAST_IS_REG(src))
   2043 		FAIL_IF(push_inst(compiler, MTLR | S(src)));
   2044 	else {
   2045 		if (src & SLJIT_MEM)
   2046 			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
   2047 		else if (src & SLJIT_IMM)
   2048 			FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
   2049 		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
   2050 	}
   2051 	return push_inst(compiler, BLR);
   2052 }
   2053 
   2054 /* --------------------------------------------------------------------- */
   2055 /*  Conditional instructions                                             */
   2056 /* --------------------------------------------------------------------- */
   2057 
   2058 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   2059 {
   2060 	struct sljit_label *label;
   2061 
   2062 	CHECK_ERROR_PTR();
   2063 	CHECK_PTR(check_sljit_emit_label(compiler));
   2064 
   2065 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2066 		return compiler->last_label;
   2067 
   2068 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   2069 	PTR_FAIL_IF(!label);
   2070 	set_label(label, compiler);
   2071 	return label;
   2072 }
   2073 
   2074 static sljit_ins get_bo_bi_flags(sljit_s32 type)
   2075 {
   2076 	switch (type) {
   2077 	case SLJIT_EQUAL:
   2078 		return (12 << 21) | (2 << 16);
   2079 
   2080 	case SLJIT_NOT_EQUAL:
   2081 		return (4 << 21) | (2 << 16);
   2082 
   2083 	case SLJIT_LESS:
   2084 	case SLJIT_LESS_F64:
   2085 		return (12 << 21) | ((4 + 0) << 16);
   2086 
   2087 	case SLJIT_GREATER_EQUAL:
   2088 	case SLJIT_GREATER_EQUAL_F64:
   2089 		return (4 << 21) | ((4 + 0) << 16);
   2090 
   2091 	case SLJIT_GREATER:
   2092 	case SLJIT_GREATER_F64:
   2093 		return (12 << 21) | ((4 + 1) << 16);
   2094 
   2095 	case SLJIT_LESS_EQUAL:
   2096 	case SLJIT_LESS_EQUAL_F64:
   2097 		return (4 << 21) | ((4 + 1) << 16);
   2098 
   2099 	case SLJIT_SIG_LESS:
   2100 		return (12 << 21) | (0 << 16);
   2101 
   2102 	case SLJIT_SIG_GREATER_EQUAL:
   2103 		return (4 << 21) | (0 << 16);
   2104 
   2105 	case SLJIT_SIG_GREATER:
   2106 		return (12 << 21) | (1 << 16);
   2107 
   2108 	case SLJIT_SIG_LESS_EQUAL:
   2109 		return (4 << 21) | (1 << 16);
   2110 
   2111 	case SLJIT_OVERFLOW:
   2112 	case SLJIT_MUL_OVERFLOW:
   2113 		return (12 << 21) | (3 << 16);
   2114 
   2115 	case SLJIT_NOT_OVERFLOW:
   2116 	case SLJIT_MUL_NOT_OVERFLOW:
   2117 		return (4 << 21) | (3 << 16);
   2118 
   2119 	case SLJIT_EQUAL_F64:
   2120 		return (12 << 21) | ((4 + 2) << 16);
   2121 
   2122 	case SLJIT_NOT_EQUAL_F64:
   2123 		return (4 << 21) | ((4 + 2) << 16);
   2124 
   2125 	case SLJIT_UNORDERED_F64:
   2126 		return (12 << 21) | ((4 + 3) << 16);
   2127 
   2128 	case SLJIT_ORDERED_F64:
   2129 		return (4 << 21) | ((4 + 3) << 16);
   2130 
   2131 	default:
   2132 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
   2133 		return (20 << 21);
   2134 	}
   2135 }
   2136 
   2137 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
   2138 {
   2139 	struct sljit_jump *jump;
   2140 	sljit_ins bo_bi_flags;
   2141 
   2142 	CHECK_ERROR_PTR();
   2143 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
   2144 
   2145 	bo_bi_flags = get_bo_bi_flags(type & 0xff);
   2146 	if (!bo_bi_flags)
   2147 		return NULL;
   2148 
   2149 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2150 	PTR_FAIL_IF(!jump);
   2151 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2152 	type &= 0xff;
   2153 
   2154 	/* In PPC, we don't need to touch the arguments. */
   2155 	if (type < SLJIT_JUMP)
   2156 		jump->flags |= IS_COND;
   2157 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2158 	if (type >= SLJIT_CALL0)
   2159 		jump->flags |= IS_CALL;
   2160 #endif
   2161 
   2162 	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   2163 	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
   2164 	jump->addr = compiler->size;
   2165 	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
   2166 	return jump;
   2167 }
   2168 
   2169 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
   2170 {
   2171 	struct sljit_jump *jump = NULL;
   2172 	sljit_s32 src_r;
   2173 
   2174 	CHECK_ERROR();
   2175 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
   2176 	ADJUST_LOCAL_OFFSET(src, srcw);
   2177 
   2178 	if (FAST_IS_REG(src)) {
   2179 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2180 		if (type >= SLJIT_CALL0) {
   2181 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
   2182 			src_r = TMP_CALL_REG;
   2183 		}
   2184 		else
   2185 			src_r = src;
   2186 #else
   2187 		src_r = src;
   2188 #endif
   2189 	} else if (src & SLJIT_IMM) {
   2190 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2191 		FAIL_IF(!jump);
   2192 		set_jump(jump, compiler, JUMP_ADDR);
   2193 		jump->u.target = srcw;
   2194 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2195 		if (type >= SLJIT_CALL0)
   2196 			jump->flags |= IS_CALL;
   2197 #endif
   2198 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   2199 		src_r = TMP_CALL_REG;
   2200 	}
   2201 	else {
   2202 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
   2203 		src_r = TMP_CALL_REG;
   2204 	}
   2205 
   2206 	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
   2207 	if (jump)
   2208 		jump->addr = compiler->size;
   2209 	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
   2210 }
   2211 
   2212 /* Get a bit from CR, all other bits are zeroed. */
   2213 #define GET_CR_BIT(bit, dst) \
   2214 	FAIL_IF(push_inst(compiler, MFCR | D(dst))); \
   2215 	FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
   2216 
   2217 #define INVERT_BIT(dst) \
   2218 	FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
   2219 
   2220 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
   2221 	sljit_s32 dst, sljit_sw dstw,
   2222 	sljit_s32 src, sljit_sw srcw,
   2223 	sljit_s32 type)
   2224 {
   2225 	sljit_s32 reg, input_flags;
   2226 	sljit_s32 flags = GET_ALL_FLAGS(op);
   2227 	sljit_sw original_dstw = dstw;
   2228 
   2229 	CHECK_ERROR();
   2230 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
   2231 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2232 
   2233 	if (dst == SLJIT_UNUSED)
   2234 		return SLJIT_SUCCESS;
   2235 
   2236 	op = GET_OPCODE(op);
   2237 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
   2238 
   2239 	compiler->cache_arg = 0;
   2240 	compiler->cache_argw = 0;
   2241 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
   2242 		ADJUST_LOCAL_OFFSET(src, srcw);
   2243 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2244 		input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
   2245 #else
   2246 		input_flags = WORD_DATA;
   2247 #endif
   2248 		FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
   2249 		src = TMP_REG1;
   2250 		srcw = 0;
   2251 	}
   2252 
   2253 	switch (type & 0xff) {
   2254 	case SLJIT_EQUAL:
   2255 		GET_CR_BIT(2, reg);
   2256 		break;
   2257 
   2258 	case SLJIT_NOT_EQUAL:
   2259 		GET_CR_BIT(2, reg);
   2260 		INVERT_BIT(reg);
   2261 		break;
   2262 
   2263 	case SLJIT_LESS:
   2264 	case SLJIT_LESS_F64:
   2265 		GET_CR_BIT(4 + 0, reg);
   2266 		break;
   2267 
   2268 	case SLJIT_GREATER_EQUAL:
   2269 	case SLJIT_GREATER_EQUAL_F64:
   2270 		GET_CR_BIT(4 + 0, reg);
   2271 		INVERT_BIT(reg);
   2272 		break;
   2273 
   2274 	case SLJIT_GREATER:
   2275 	case SLJIT_GREATER_F64:
   2276 		GET_CR_BIT(4 + 1, reg);
   2277 		break;
   2278 
   2279 	case SLJIT_LESS_EQUAL:
   2280 	case SLJIT_LESS_EQUAL_F64:
   2281 		GET_CR_BIT(4 + 1, reg);
   2282 		INVERT_BIT(reg);
   2283 		break;
   2284 
   2285 	case SLJIT_SIG_LESS:
   2286 		GET_CR_BIT(0, reg);
   2287 		break;
   2288 
   2289 	case SLJIT_SIG_GREATER_EQUAL:
   2290 		GET_CR_BIT(0, reg);
   2291 		INVERT_BIT(reg);
   2292 		break;
   2293 
   2294 	case SLJIT_SIG_GREATER:
   2295 		GET_CR_BIT(1, reg);
   2296 		break;
   2297 
   2298 	case SLJIT_SIG_LESS_EQUAL:
   2299 		GET_CR_BIT(1, reg);
   2300 		INVERT_BIT(reg);
   2301 		break;
   2302 
   2303 	case SLJIT_OVERFLOW:
   2304 	case SLJIT_MUL_OVERFLOW:
   2305 		GET_CR_BIT(3, reg);
   2306 		break;
   2307 
   2308 	case SLJIT_NOT_OVERFLOW:
   2309 	case SLJIT_MUL_NOT_OVERFLOW:
   2310 		GET_CR_BIT(3, reg);
   2311 		INVERT_BIT(reg);
   2312 		break;
   2313 
   2314 	case SLJIT_EQUAL_F64:
   2315 		GET_CR_BIT(4 + 2, reg);
   2316 		break;
   2317 
   2318 	case SLJIT_NOT_EQUAL_F64:
   2319 		GET_CR_BIT(4 + 2, reg);
   2320 		INVERT_BIT(reg);
   2321 		break;
   2322 
   2323 	case SLJIT_UNORDERED_F64:
   2324 		GET_CR_BIT(4 + 3, reg);
   2325 		break;
   2326 
   2327 	case SLJIT_ORDERED_F64:
   2328 		GET_CR_BIT(4 + 3, reg);
   2329 		INVERT_BIT(reg);
   2330 		break;
   2331 
   2332 	default:
   2333 		SLJIT_ASSERT_STOP();
   2334 		break;
   2335 	}
   2336 
   2337 	if (op < SLJIT_ADD) {
   2338 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2339 		if (op == SLJIT_MOV)
   2340 			input_flags = WORD_DATA;
   2341 		else {
   2342 			op = SLJIT_MOV_U32;
   2343 			input_flags = INT_DATA;
   2344 		}
   2345 #else
   2346 		op = SLJIT_MOV;
   2347 		input_flags = WORD_DATA;
   2348 #endif
   2349 		if (reg != TMP_REG2)
   2350 			return SLJIT_SUCCESS;
   2351 		return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
   2352 	}
   2353 
   2354 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
   2355 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   2356 	compiler->skip_checks = 1;
   2357 #endif
   2358 	return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
   2359 }
   2360 
   2361 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
   2362 {
   2363 	struct sljit_const *const_;
   2364 	sljit_s32 reg;
   2365 
   2366 	CHECK_ERROR_PTR();
   2367 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
   2368 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2369 
   2370 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2371 	PTR_FAIL_IF(!const_);
   2372 	set_const(const_, compiler);
   2373 
   2374 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
   2375 
   2376 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
   2377 
   2378 	if (dst & SLJIT_MEM)
   2379 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
   2380 	return const_;
   2381 }
   2382