Home | History | Annotate | Line # | Download | only in sljit_src
      1 /*	$NetBSD: sljitNativePPC_common.c,v 1.8 2019/01/20 23:14:16 alnsn Exp $	*/
      2 
      3 /*
      4  *    Stack-less Just-In-Time compiler
      5  *
      6  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without modification, are
      9  * permitted provided that the following conditions are met:
     10  *
     11  *   1. Redistributions of source code must retain the above copyright notice, this list of
     12  *      conditions and the following disclaimer.
     13  *
     14  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     15  *      of conditions and the following disclaimer in the documentation and/or other materials
     16  *      provided with the distribution.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     21  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     23  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
     30 {
     31 	return "PowerPC" SLJIT_CPUINFO;
     32 }
     33 
     34 /* Length of an instruction word.
     35    Both for ppc-32 and ppc-64. */
     36 typedef sljit_u32 sljit_ins;
     37 
     38 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
     39 	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
     40 #define SLJIT_PPC_STACK_FRAME_V2 1
     41 #endif
     42 
     43 #ifdef _AIX
     44 #include <sys/cache.h>
     45 #endif
     46 
     47 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
     48 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
     49 #endif
     50 
     51 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
     52 
     53 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
     54 {
     55 #ifdef _AIX
     56 	_sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
     57 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
     58 #	if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
     59 	/* Cache flush for POWER architecture. */
     60 	while (from < to) {
     61 		__asm__ volatile (
     62 			"clf 0, %0\n"
     63 			"dcs\n"
     64 			: : "r"(from)
     65 		);
     66 		from++;
     67 	}
     68 	__asm__ volatile ( "ics" );
     69 #	elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
     70 #	error "Cache flush is not implemented for PowerPC/POWER common mode."
     71 #	else
     72 	/* Cache flush for PowerPC architecture. */
     73 	while (from < to) {
     74 		__asm__ volatile (
     75 			"dcbf 0, %0\n"
     76 			"sync\n"
     77 			"icbi 0, %0\n"
     78 			: : "r"(from)
     79 		);
     80 		from++;
     81 	}
     82 	__asm__ volatile ( "isync" );
     83 #	endif
     84 #	ifdef __xlc__
     85 #	warning "This file may fail to compile if -qfuncsect is used"
     86 #	endif
     87 #elif defined(__xlc__)
     88 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
     89 #else
     90 #error "This platform requires a cache flush implementation."
     91 #endif /* _AIX */
     92 }
     93 
     94 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
     95 
     96 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     97 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
     98 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
     99 #define TMP_ZERO	(SLJIT_NUMBER_OF_REGISTERS + 5)
    100 
    101 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
    102 #define TMP_CALL_REG	(SLJIT_NUMBER_OF_REGISTERS + 6)
    103 #else
    104 #define TMP_CALL_REG	TMP_REG2
    105 #endif
    106 
    107 #define TMP_FREG1	(0)
    108 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
    109 
    110 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
    111 	0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
    112 };
    113 
    114 /* --------------------------------------------------------------------- */
    115 /*  Instrucion forms                                                     */
    116 /* --------------------------------------------------------------------- */
    117 #define D(d)		(reg_map[d] << 21)
    118 #define S(s)		(reg_map[s] << 21)
    119 #define A(a)		(reg_map[a] << 16)
    120 #define B(b)		(reg_map[b] << 11)
    121 #define C(c)		(reg_map[c] << 6)
    122 #define FD(fd)		((fd) << 21)
    123 #define FS(fs)		((fs) << 21)
    124 #define FA(fa)		((fa) << 16)
    125 #define FB(fb)		((fb) << 11)
    126 #define FC(fc)		((fc) << 6)
    127 #define IMM(imm)	((imm) & 0xffff)
    128 #define CRD(d)		((d) << 21)
    129 
    130 /* Instruction bit sections.
    131    OE and Rc flag (see ALT_SET_FLAGS). */
    132 #define OERC(flags)	(((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS))
    133 /* Rc flag (see ALT_SET_FLAGS). */
    134 #define RC(flags)	((flags & ALT_SET_FLAGS) >> 10)
    135 #define HI(opcode)	((opcode) << 26)
    136 #define LO(opcode)	((opcode) << 1)
    137 
    138 #define ADD		(HI(31) | LO(266))
    139 #define ADDC		(HI(31) | LO(10))
    140 #define ADDE		(HI(31) | LO(138))
    141 #define ADDI		(HI(14))
    142 #define ADDIC		(HI(13))
    143 #define ADDIS		(HI(15))
    144 #define ADDME		(HI(31) | LO(234))
    145 #define AND		(HI(31) | LO(28))
    146 #define ANDI		(HI(28))
    147 #define ANDIS		(HI(29))
    148 #define Bx		(HI(18))
    149 #define BCx		(HI(16))
    150 #define BCCTR		(HI(19) | LO(528) | (3 << 11))
    151 #define BLR		(HI(19) | LO(16) | (0x14 << 21))
    152 #define CNTLZD		(HI(31) | LO(58))
    153 #define CNTLZW		(HI(31) | LO(26))
    154 #define CMP		(HI(31) | LO(0))
    155 #define CMPI		(HI(11))
    156 #define CMPL		(HI(31) | LO(32))
    157 #define CMPLI		(HI(10))
    158 #define CROR		(HI(19) | LO(449))
    159 #define DIVD		(HI(31) | LO(489))
    160 #define DIVDU		(HI(31) | LO(457))
    161 #define DIVW		(HI(31) | LO(491))
    162 #define DIVWU		(HI(31) | LO(459))
    163 #define EXTSB		(HI(31) | LO(954))
    164 #define EXTSH		(HI(31) | LO(922))
    165 #define EXTSW		(HI(31) | LO(986))
    166 #define FABS		(HI(63) | LO(264))
    167 #define FADD		(HI(63) | LO(21))
    168 #define FADDS		(HI(59) | LO(21))
    169 #define FCFID		(HI(63) | LO(846))
    170 #define FCMPU		(HI(63) | LO(0))
    171 #define FCTIDZ		(HI(63) | LO(815))
    172 #define FCTIWZ		(HI(63) | LO(15))
    173 #define FDIV		(HI(63) | LO(18))
    174 #define FDIVS		(HI(59) | LO(18))
    175 #define FMR		(HI(63) | LO(72))
    176 #define FMUL		(HI(63) | LO(25))
    177 #define FMULS		(HI(59) | LO(25))
    178 #define FNEG		(HI(63) | LO(40))
    179 #define FRSP		(HI(63) | LO(12))
    180 #define FSUB		(HI(63) | LO(20))
    181 #define FSUBS		(HI(59) | LO(20))
    182 #define LD		(HI(58) | 0)
    183 #define LWZ		(HI(32))
    184 #define MFCR		(HI(31) | LO(19))
    185 #define MFLR		(HI(31) | LO(339) | 0x80000)
    186 #define MFXER		(HI(31) | LO(339) | 0x10000)
    187 #define MTCTR		(HI(31) | LO(467) | 0x90000)
    188 #define MTLR		(HI(31) | LO(467) | 0x80000)
    189 #define MTXER		(HI(31) | LO(467) | 0x10000)
    190 #define MULHD		(HI(31) | LO(73))
    191 #define MULHDU		(HI(31) | LO(9))
    192 #define MULHW		(HI(31) | LO(75))
    193 #define MULHWU		(HI(31) | LO(11))
    194 #define MULLD		(HI(31) | LO(233))
    195 #define MULLI		(HI(7))
    196 #define MULLW		(HI(31) | LO(235))
    197 #define NEG		(HI(31) | LO(104))
    198 #define NOP		(HI(24))
    199 #define NOR		(HI(31) | LO(124))
    200 #define OR		(HI(31) | LO(444))
    201 #define ORI		(HI(24))
    202 #define ORIS		(HI(25))
    203 #define RLDICL		(HI(30))
    204 #define RLWINM		(HI(21))
    205 #define SLD		(HI(31) | LO(27))
    206 #define SLW		(HI(31) | LO(24))
    207 #define SRAD		(HI(31) | LO(794))
    208 #define SRADI		(HI(31) | LO(413 << 1))
    209 #define SRAW		(HI(31) | LO(792))
    210 #define SRAWI		(HI(31) | LO(824))
    211 #define SRD		(HI(31) | LO(539))
    212 #define SRW		(HI(31) | LO(536))
    213 #define STD		(HI(62) | 0)
    214 #define STDU		(HI(62) | 1)
    215 #define STDUX		(HI(31) | LO(181))
    216 #define STFIWX		(HI(31) | LO(983))
    217 #define STW		(HI(36))
    218 #define STWU		(HI(37))
    219 #define STWUX		(HI(31) | LO(183))
    220 #define SUBF		(HI(31) | LO(40))
    221 #define SUBFC		(HI(31) | LO(8))
    222 #define SUBFE		(HI(31) | LO(136))
    223 #define SUBFIC		(HI(8))
    224 #define XOR		(HI(31) | LO(316))
    225 #define XORI		(HI(26))
    226 #define XORIS		(HI(27))
    227 
    228 #define SIMM_MAX	(0x7fff)
    229 #define SIMM_MIN	(-0x8000)
    230 #define UIMM_MAX	(0xffff)
    231 
    232 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    233 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
    234 {
    235 	sljit_sw* ptrs;
    236 	if (func_ptr)
    237 		*func_ptr = (void*)context;
    238 	ptrs = (sljit_sw*)func;
    239 	context->addr = addr ? addr : ptrs[0];
    240 	context->r2 = ptrs[1];
    241 	context->r11 = ptrs[2];
    242 }
    243 #endif
    244 
    245 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
    246 {
    247 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
    248 	FAIL_IF(!ptr);
    249 	*ptr = ins;
    250 	compiler->size++;
    251 	return SLJIT_SUCCESS;
    252 }
    253 
    254 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
    255 {
    256 	sljit_sw diff;
    257 	sljit_uw target_addr;
    258 	sljit_sw extra_jump_flags;
    259 
    260 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    261 	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
    262 		return 0;
    263 #else
    264 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
    265 		return 0;
    266 #endif
    267 
    268 	if (jump->flags & JUMP_ADDR)
    269 		target_addr = jump->u.target;
    270 	else {
    271 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    272 		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
    273 	}
    274 
    275 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    276 	if (jump->flags & IS_CALL)
    277 		goto keep_address;
    278 #endif
    279 
    280 	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l;
    281 
    282 	extra_jump_flags = 0;
    283 	if (jump->flags & IS_COND) {
    284 		if (diff <= 0x7fff && diff >= -0x8000) {
    285 			jump->flags |= PATCH_B;
    286 			return 1;
    287 		}
    288 		if (target_addr <= 0xffff) {
    289 			jump->flags |= PATCH_B | PATCH_ABS_B;
    290 			return 1;
    291 		}
    292 		extra_jump_flags = REMOVE_COND;
    293 
    294 		diff -= sizeof(sljit_ins);
    295 	}
    296 
    297 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
    298 		jump->flags |= PATCH_B | extra_jump_flags;
    299 		return 1;
    300 	}
    301 
    302 	if (target_addr <= 0x03ffffff) {
    303 		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
    304 		return 1;
    305 	}
    306 
    307 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    308 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
    309 keep_address:
    310 #endif
    311 	if (target_addr <= 0x7fffffff) {
    312 		jump->flags |= PATCH_ABS32;
    313 		return 1;
    314 	}
    315 
    316 	if (target_addr <= 0x7fffffffffffl) {
    317 		jump->flags |= PATCH_ABS48;
    318 		return 1;
    319 	}
    320 #endif
    321 
    322 	return 0;
    323 }
    324 
    325 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    326 {
    327 	struct sljit_memory_fragment *buf;
    328 	sljit_ins *code;
    329 	sljit_ins *code_ptr;
    330 	sljit_ins *buf_ptr;
    331 	sljit_ins *buf_end;
    332 	sljit_uw word_count;
    333 	sljit_sw executable_offset;
    334 	sljit_uw addr;
    335 
    336 	struct sljit_label *label;
    337 	struct sljit_jump *jump;
    338 	struct sljit_const *const_;
    339 
    340 	CHECK_ERROR_PTR();
    341 	CHECK_PTR(check_sljit_generate_code(compiler));
    342 	reverse_buf(compiler);
    343 
    344 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    345 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    346 	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    347 #else
    348 	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
    349 #endif
    350 #endif
    351 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
    352 	PTR_FAIL_WITH_EXEC_IF(code);
    353 	buf = compiler->buf;
    354 
    355 	code_ptr = code;
    356 	word_count = 0;
    357 	executable_offset = SLJIT_EXEC_OFFSET(code);
    358 
    359 	label = compiler->labels;
    360 	jump = compiler->jumps;
    361 	const_ = compiler->consts;
    362 
    363 	do {
    364 		buf_ptr = (sljit_ins*)buf->memory;
    365 		buf_end = buf_ptr + (buf->used_size >> 2);
    366 		do {
    367 			*code_ptr = *buf_ptr++;
    368 			SLJIT_ASSERT(!label || label->size >= word_count);
    369 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
    370 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
    371 			/* These structures are ordered by their address. */
    372 			if (label && label->size == word_count) {
    373 				/* Just recording the address. */
    374 				label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    375 				label->size = code_ptr - code;
    376 				label = label->next;
    377 			}
    378 			if (jump && jump->addr == word_count) {
    379 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    380 				jump->addr = (sljit_uw)(code_ptr - 3);
    381 #else
    382 				jump->addr = (sljit_uw)(code_ptr - 6);
    383 #endif
    384 				if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
    385 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    386 					code_ptr[-3] = code_ptr[0];
    387 					code_ptr -= 3;
    388 #else
    389 					if (jump->flags & PATCH_ABS32) {
    390 						code_ptr -= 3;
    391 						code_ptr[-1] = code_ptr[2];
    392 						code_ptr[0] = code_ptr[3];
    393 					}
    394 					else if (jump->flags & PATCH_ABS48) {
    395 						code_ptr--;
    396 						code_ptr[-1] = code_ptr[0];
    397 						code_ptr[0] = code_ptr[1];
    398 						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
    399 						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
    400 						code_ptr[-3] ^= 0x8422;
    401 						/* oris -> ori */
    402 						code_ptr[-2] ^= 0x4000000;
    403 					}
    404 					else {
    405 						code_ptr[-6] = code_ptr[0];
    406 						code_ptr -= 6;
    407 					}
    408 #endif
    409 					if (jump->flags & REMOVE_COND) {
    410 						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
    411 						code_ptr++;
    412 						jump->addr += sizeof(sljit_ins);
    413 						code_ptr[0] = Bx;
    414 						jump->flags -= IS_COND;
    415 					}
    416 				}
    417 				jump = jump->next;
    418 			}
    419 			if (const_ && const_->addr == word_count) {
    420 				const_->addr = (sljit_uw)code_ptr;
    421 				const_ = const_->next;
    422 			}
    423 			code_ptr ++;
    424 			word_count ++;
    425 		} while (buf_ptr < buf_end);
    426 
    427 		buf = buf->next;
    428 	} while (buf);
    429 
    430 	if (label && label->size == word_count) {
    431 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    432 		label->size = code_ptr - code;
    433 		label = label->next;
    434 	}
    435 
    436 	SLJIT_ASSERT(!label);
    437 	SLJIT_ASSERT(!jump);
    438 	SLJIT_ASSERT(!const_);
    439 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    440 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
    441 #else
    442 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
    443 #endif
    444 
    445 	jump = compiler->jumps;
    446 	while (jump) {
    447 		do {
    448 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
    449 			buf_ptr = (sljit_ins *)jump->addr;
    450 
    451 			if (jump->flags & PATCH_B) {
    452 				if (jump->flags & IS_COND) {
    453 					if (!(jump->flags & PATCH_ABS_B)) {
    454 						addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
    455 						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
    456 						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
    457 					}
    458 					else {
    459 						SLJIT_ASSERT(addr <= 0xffff);
    460 						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
    461 					}
    462 				}
    463 				else {
    464 					if (!(jump->flags & PATCH_ABS_B)) {
    465 						addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
    466 						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
    467 						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
    468 					}
    469 					else {
    470 						SLJIT_ASSERT(addr <= 0x03ffffff);
    471 						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
    472 					}
    473 				}
    474 				break;
    475 			}
    476 
    477 			/* Set the fields of immediate loads. */
    478 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    479 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    480 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    481 #else
    482 			if (jump->flags & PATCH_ABS32) {
    483 				SLJIT_ASSERT(addr <= 0x7fffffff);
    484 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
    485 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
    486 				break;
    487 			}
    488 			if (jump->flags & PATCH_ABS48) {
    489 				SLJIT_ASSERT(addr <= 0x7fffffffffff);
    490 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
    491 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
    492 				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
    493 				break;
    494 			}
    495 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
    496 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
    497 			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
    498 			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
    499 #endif
    500 		} while (0);
    501 		jump = jump->next;
    502 	}
    503 
    504 	compiler->error = SLJIT_ERR_COMPILED;
    505 	compiler->executable_offset = executable_offset;
    506 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
    507 
    508 	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
    509 
    510 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    511 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    512 	if (((sljit_sw)code_ptr) & 0x4)
    513 		code_ptr++;
    514 #endif
    515 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
    516 #endif
    517 
    518 	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    519 
    520 	SLJIT_CACHE_FLUSH(code, code_ptr);
    521 
    522 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
    523 	return code_ptr;
    524 #else
    525 	return code;
    526 #endif
    527 }
    528 
    529 /* --------------------------------------------------------------------- */
    530 /*  Entry, exit                                                          */
    531 /* --------------------------------------------------------------------- */
    532 
    533 /* inp_flags: */
    534 
    535 /* Creates an index in data_transfer_insts array. */
    536 #define LOAD_DATA	0x01
    537 #define INDEXED		0x02
    538 #define WRITE_BACK	0x04
    539 #define WORD_DATA	0x00
    540 #define BYTE_DATA	0x08
    541 #define HALF_DATA	0x10
    542 #define INT_DATA	0x18
    543 #define SIGNED_DATA	0x20
    544 /* Separates integer and floating point registers */
    545 #define GPR_REG		0x3f
    546 #define DOUBLE_DATA	0x40
    547 
    548 #define MEM_MASK	0x7f
    549 
    550 /* Other inp_flags. */
    551 
    552 #define ARG_TEST	0x000100
    553 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
    554 #define ALT_SIGN_EXT	0x000200
    555 /* This flag affects the RC() and OERC() macros. */
    556 #define ALT_SET_FLAGS	0x000400
    557 #define ALT_KEEP_CACHE	0x000800
    558 #define ALT_FORM1	0x010000
    559 #define ALT_FORM2	0x020000
    560 #define ALT_FORM3	0x040000
    561 #define ALT_FORM4	0x080000
    562 #define ALT_FORM5	0x100000
    563 #define ALT_FORM6	0x200000
    564 #define ALT_FORM7	0x400000
    565 
    566 /* Source and destination is register. */
    567 #define REG_DEST	0x000001
    568 #define REG1_SOURCE	0x000002
    569 #define REG2_SOURCE	0x000004
    570 /* getput_arg_fast returned true. */
    571 #define FAST_DEST	0x000008
    572 /* Multiple instructions are required. */
    573 #define SLOW_DEST	0x000010
    574 /*
    575 ALT_SIGN_EXT		0x000200
    576 ALT_SET_FLAGS		0x000400
    577 ALT_FORM1		0x010000
    578 ...
    579 ALT_FORM7		0x400000 */
    580 
    581 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    582 #include "sljitNativePPC_32.c"
    583 #else
    584 #include "sljitNativePPC_64.c"
    585 #endif
    586 
    587 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    588 #define STACK_STORE	STW
    589 #define STACK_LOAD	LWZ
    590 #else
    591 #define STACK_STORE	STD
    592 #define STACK_LOAD	LD
    593 #endif
    594 
    595 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
    596 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    597 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    598 {
    599 	sljit_s32 i, tmp, offs;
    600 
    601 	CHECK_ERROR();
    602 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    603 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    604 
    605 	FAIL_IF(push_inst(compiler, MFLR | D(0)));
    606 	offs = -(sljit_s32)(sizeof(sljit_sw));
    607 	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    608 
    609 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
    610 	for (i = SLJIT_S0; i >= tmp; i--) {
    611 		offs -= (sljit_s32)(sizeof(sljit_sw));
    612 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    613 	}
    614 
    615 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
    616 		offs -= (sljit_s32)(sizeof(sljit_sw));
    617 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
    618 	}
    619 
    620 	SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
    621 
    622 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    623 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    624 #else
    625 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    626 #endif
    627 
    628 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
    629 	if (args >= 1)
    630 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
    631 	if (args >= 2)
    632 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
    633 	if (args >= 3)
    634 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
    635 
    636 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
    637 	local_size = (local_size + 15) & ~0xf;
    638 	compiler->local_size = local_size;
    639 
    640 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    641 	if (local_size <= SIMM_MAX)
    642 		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    643 	else {
    644 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    645 		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    646 	}
    647 #else
    648 	if (local_size <= SIMM_MAX)
    649 		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
    650 	else {
    651 		FAIL_IF(load_immediate(compiler, 0, -local_size));
    652 		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    653 	}
    654 #endif
    655 
    656 	return SLJIT_SUCCESS;
    657 }
    658 
    659 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    660 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    661 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    662 {
    663 	CHECK_ERROR();
    664 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    665 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    666 
    667 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
    668 	compiler->local_size = (local_size + 15) & ~0xf;
    669 	return SLJIT_SUCCESS;
    670 }
    671 
    672 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    673 {
    674 	sljit_s32 i, tmp, offs;
    675 
    676 	CHECK_ERROR();
    677 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    678 
    679 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    680 
    681 	if (compiler->local_size <= SIMM_MAX)
    682 		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
    683 	else {
    684 		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
    685 		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
    686 	}
    687 
    688 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
    689 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
    690 #else
    691 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
    692 #endif
    693 
    694 	offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
    695 
    696 	tmp = compiler->scratches;
    697 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
    698 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    699 		offs += (sljit_s32)(sizeof(sljit_sw));
    700 	}
    701 
    702 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
    703 	for (i = tmp; i <= SLJIT_S0; i++) {
    704 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
    705 		offs += (sljit_s32)(sizeof(sljit_sw));
    706 	}
    707 
    708 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
    709 	SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
    710 
    711 	FAIL_IF(push_inst(compiler, MTLR | S(0)));
    712 	FAIL_IF(push_inst(compiler, BLR));
    713 
    714 	return SLJIT_SUCCESS;
    715 }
    716 
    717 #undef STACK_STORE
    718 #undef STACK_LOAD
    719 
    720 /* --------------------------------------------------------------------- */
    721 /*  Operators                                                            */
    722 /* --------------------------------------------------------------------- */
    723 
    724 /* i/x - immediate/indexed form
    725    n/w - no write-back / write-back (1 bit)
    726    s/l - store/load (1 bit)
    727    u/s - signed/unsigned (1 bit)
    728    w/b/h/i - word/byte/half/int allowed (2 bit)
    729    It contans 32 items, but not all are different. */
    730 
    731 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
    732 #define INT_ALIGNED	0x10000
    733 /* 64-bit only: there is no lwau instruction. */
    734 #define UPDATE_REQ	0x20000
    735 
    736 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    737 #define ARCH_32_64(a, b)	a
    738 #define INST_CODE_AND_DST(inst, flags, reg) \
    739 	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    740 #else
    741 #define ARCH_32_64(a, b)	b
    742 #define INST_CODE_AND_DST(inst, flags, reg) \
    743 	(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
    744 #endif
    745 
    746 static const sljit_ins data_transfer_insts[64 + 8] = {
    747 
    748 /* -------- Unsigned -------- */
    749 
    750 /* Word. */
    751 
    752 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    753 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    754 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    755 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    756 
    757 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    758 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    759 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    760 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    761 
    762 /* Byte. */
    763 
    764 /* u b n i s */ HI(38) /* stb */,
    765 /* u b n i l */ HI(34) /* lbz */,
    766 /* u b n x s */ HI(31) | LO(215) /* stbx */,
    767 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
    768 
    769 /* u b w i s */ HI(39) /* stbu */,
    770 /* u b w i l */ HI(35) /* lbzu */,
    771 /* u b w x s */ HI(31) | LO(247) /* stbux */,
    772 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
    773 
    774 /* Half. */
    775 
    776 /* u h n i s */ HI(44) /* sth */,
    777 /* u h n i l */ HI(40) /* lhz */,
    778 /* u h n x s */ HI(31) | LO(407) /* sthx */,
    779 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
    780 
    781 /* u h w i s */ HI(45) /* sthu */,
    782 /* u h w i l */ HI(41) /* lhzu */,
    783 /* u h w x s */ HI(31) | LO(439) /* sthux */,
    784 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
    785 
    786 /* Int. */
    787 
    788 /* u i n i s */ HI(36) /* stw */,
    789 /* u i n i l */ HI(32) /* lwz */,
    790 /* u i n x s */ HI(31) | LO(151) /* stwx */,
    791 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
    792 
    793 /* u i w i s */ HI(37) /* stwu */,
    794 /* u i w i l */ HI(33) /* lwzu */,
    795 /* u i w x s */ HI(31) | LO(183) /* stwux */,
    796 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
    797 
    798 /* -------- Signed -------- */
    799 
    800 /* Word. */
    801 
    802 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
    803 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
    804 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
    805 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
    806 
    807 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
    808 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
    809 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
    810 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
    811 
    812 /* Byte. */
    813 
    814 /* s b n i s */ HI(38) /* stb */,
    815 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
    816 /* s b n x s */ HI(31) | LO(215) /* stbx */,
    817 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
    818 
    819 /* s b w i s */ HI(39) /* stbu */,
    820 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
    821 /* s b w x s */ HI(31) | LO(247) /* stbux */,
    822 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
    823 
    824 /* Half. */
    825 
    826 /* s h n i s */ HI(44) /* sth */,
    827 /* s h n i l */ HI(42) /* lha */,
    828 /* s h n x s */ HI(31) | LO(407) /* sthx */,
    829 /* s h n x l */ HI(31) | LO(343) /* lhax */,
    830 
    831 /* s h w i s */ HI(45) /* sthu */,
    832 /* s h w i l */ HI(43) /* lhau */,
    833 /* s h w x s */ HI(31) | LO(439) /* sthux */,
    834 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
    835 
    836 /* Int. */
    837 
    838 /* s i n i s */ HI(36) /* stw */,
    839 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
    840 /* s i n x s */ HI(31) | LO(151) /* stwx */,
    841 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
    842 
    843 /* s i w i s */ HI(37) /* stwu */,
    844 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
    845 /* s i w x s */ HI(31) | LO(183) /* stwux */,
    846 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
    847 
    848 /* -------- Double -------- */
    849 
    850 /* d   n i s */ HI(54) /* stfd */,
    851 /* d   n i l */ HI(50) /* lfd */,
    852 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
    853 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
    854 
    855 /* s   n i s */ HI(52) /* stfs */,
    856 /* s   n i l */ HI(48) /* lfs */,
    857 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
    858 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
    859 
    860 };
    861 
    862 #undef ARCH_32_64
    863 
    864 /* Simple cases, (no caching is required). */
    865 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
    866 {
    867 	sljit_ins inst;
    868 
    869 	/* Should work when (arg & REG_MASK) == 0. */
    870 	SLJIT_ASSERT(A(0) == 0);
    871 	SLJIT_ASSERT(arg & SLJIT_MEM);
    872 
    873 	if (arg & OFFS_REG_MASK) {
    874 		if (argw & 0x3)
    875 			return 0;
    876 		if (inp_flags & ARG_TEST)
    877 			return 1;
    878 
    879 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
    880 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    881 		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
    882 		return -1;
    883 	}
    884 
    885 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
    886 		inp_flags &= ~WRITE_BACK;
    887 
    888 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    889 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    890 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
    891 
    892 	if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
    893 		return 0;
    894 	if (inp_flags & ARG_TEST)
    895 		return 1;
    896 #endif
    897 
    898 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    899 	if (argw > SIMM_MAX || argw < SIMM_MIN)
    900 		return 0;
    901 	if (inp_flags & ARG_TEST)
    902 		return 1;
    903 
    904 	inst = data_transfer_insts[inp_flags & MEM_MASK];
    905 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
    906 #endif
    907 
    908 	FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
    909 	return -1;
    910 }
    911 
    912 /* See getput_arg below.
    913    Note: can_cache is called only for binary operators. Those operator always
    914    uses word arguments without write back. */
    915 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
    916 {
    917 	sljit_sw high_short, next_high_short;
    918 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    919 	sljit_sw diff;
    920 #endif
    921 
    922 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
    923 
    924 	if (arg & OFFS_REG_MASK)
    925 		return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
    926 
    927 	if (next_arg & OFFS_REG_MASK)
    928 		return 0;
    929 
    930 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    931 	high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
    932 	next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
    933 	return high_short == next_high_short;
    934 #else
    935 	if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
    936 		high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
    937 		next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
    938 		if (high_short == next_high_short)
    939 			return 1;
    940 	}
    941 
    942 	diff = argw - next_argw;
    943 	if (!(arg & REG_MASK))
    944 		return diff <= SIMM_MAX && diff >= SIMM_MIN;
    945 
    946 	if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
    947 		return 1;
    948 
    949 	return 0;
    950 #endif
    951 }
    952 
    953 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    954 #define ADJUST_CACHED_IMM(imm) \
    955 	if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
    956 		/* Adjust cached value. Fortunately this is really a rare case */ \
    957 		compiler->cache_argw += imm & 0x3; \
    958 		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
    959 		imm &= ~0x3; \
    960 	}
    961 #endif
    962 
    963 /* Emit the necessary instructions. See can_cache above. */
    964 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
    965 {
    966 	sljit_s32 tmp_r;
    967 	sljit_ins inst;
    968 	sljit_sw high_short, next_high_short;
    969 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
    970 	sljit_sw diff;
    971 #endif
    972 
    973 	SLJIT_ASSERT(arg & SLJIT_MEM);
    974 
    975 	tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
    976 	/* Special case for "mov reg, [reg, ... ]". */
    977 	if ((arg & REG_MASK) == tmp_r)
    978 		tmp_r = TMP_REG1;
    979 
    980 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
    981 		argw &= 0x3;
    982 		/* Otherwise getput_arg_fast would capture it. */
    983 		SLJIT_ASSERT(argw);
    984 
    985 		if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
    986 			tmp_r = TMP_REG3;
    987 		else {
    988 			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
    989 				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
    990 				compiler->cache_argw = argw;
    991 				tmp_r = TMP_REG3;
    992 			}
    993 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
    994 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
    995 #else
    996 			FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
    997 #endif
    998 		}
    999 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1000 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1001 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
   1002 	}
   1003 
   1004 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
   1005 		inp_flags &= ~WRITE_BACK;
   1006 
   1007 	inst = data_transfer_insts[inp_flags & MEM_MASK];
   1008 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
   1009 
   1010 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1011 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l
   1012 			&& (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
   1013 #endif
   1014 
   1015 		arg &= REG_MASK;
   1016 		high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
   1017 		/* The getput_arg_fast should handle this otherwise. */
   1018 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1019 		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
   1020 #else
   1021 		SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
   1022 #endif
   1023 
   1024 		if (inp_flags & WRITE_BACK) {
   1025 			tmp_r = arg;
   1026 			FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
   1027 		}
   1028 		else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
   1029 			if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
   1030 				next_high_short = (sljit_s32)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
   1031 				if (high_short == next_high_short) {
   1032 					compiler->cache_arg = SLJIT_MEM | arg;
   1033 					compiler->cache_argw = high_short;
   1034 					tmp_r = TMP_REG3;
   1035 				}
   1036 			}
   1037 			FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
   1038 		}
   1039 		else
   1040 			tmp_r = TMP_REG3;
   1041 
   1042 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
   1043 
   1044 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1045 	}
   1046 
   1047 	/* Everything else is PPC-64 only. */
   1048 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
   1049 		diff = argw - compiler->cache_argw;
   1050 		if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1051 			ADJUST_CACHED_IMM(diff);
   1052 			return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
   1053 		}
   1054 
   1055 		diff = argw - next_argw;
   1056 		if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1057 			SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1058 
   1059 			compiler->cache_arg = SLJIT_IMM;
   1060 			compiler->cache_argw = argw;
   1061 			tmp_r = TMP_REG3;
   1062 		}
   1063 
   1064 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
   1065 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
   1066 	}
   1067 
   1068 	diff = argw - compiler->cache_argw;
   1069 	if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1070 		SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
   1071 		ADJUST_CACHED_IMM(diff);
   1072 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
   1073 	}
   1074 
   1075 	if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1076 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1077 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1078 		if (compiler->cache_argw != argw) {
   1079 			FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
   1080 			compiler->cache_argw = argw;
   1081 		}
   1082 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
   1083 	}
   1084 
   1085 	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
   1086 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1087 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1088 
   1089 		compiler->cache_arg = SLJIT_IMM;
   1090 		compiler->cache_argw = argw;
   1091 
   1092 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1093 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1094 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
   1095 	}
   1096 
   1097 	diff = argw - next_argw;
   1098 	if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1099 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1100 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1101 		FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
   1102 
   1103 		compiler->cache_arg = arg;
   1104 		compiler->cache_argw = argw;
   1105 
   1106 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
   1107 	}
   1108 
   1109 	if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
   1110 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
   1111 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
   1112 
   1113 		compiler->cache_arg = SLJIT_IMM;
   1114 		compiler->cache_argw = argw;
   1115 		tmp_r = TMP_REG3;
   1116 	}
   1117 	else
   1118 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
   1119 
   1120 	/* Get the indexed version instead of the normal one. */
   1121 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
   1122 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
   1123 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
   1124 #endif
   1125 }
   1126 
   1127 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
   1128 {
   1129 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
   1130 		return compiler->error;
   1131 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
   1132 }
   1133 
   1134 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
   1135 	sljit_s32 dst, sljit_sw dstw,
   1136 	sljit_s32 src1, sljit_sw src1w,
   1137 	sljit_s32 src2, sljit_sw src2w)
   1138 {
   1139 	/* arg1 goes to TMP_REG1 or src reg
   1140 	   arg2 goes to TMP_REG2, imm or src reg
   1141 	   TMP_REG3 can be used for caching
   1142 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
   1143 	sljit_s32 dst_r;
   1144 	sljit_s32 src1_r;
   1145 	sljit_s32 src2_r;
   1146 	sljit_s32 sugg_src2_r = TMP_REG2;
   1147 	sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_FORM7 | ALT_SIGN_EXT | ALT_SET_FLAGS);
   1148 
   1149 	if (!(input_flags & ALT_KEEP_CACHE)) {
   1150 		compiler->cache_arg = 0;
   1151 		compiler->cache_argw = 0;
   1152 	}
   1153 
   1154 	/* Destination check. */
   1155 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
   1156 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM))
   1157 			return SLJIT_SUCCESS;
   1158 		dst_r = TMP_REG2;
   1159 	}
   1160 	else if (FAST_IS_REG(dst)) {
   1161 		dst_r = dst;
   1162 		flags |= REG_DEST;
   1163 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
   1164 			sugg_src2_r = dst_r;
   1165 	}
   1166 	else {
   1167 		SLJIT_ASSERT(dst & SLJIT_MEM);
   1168 		if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
   1169 			flags |= FAST_DEST;
   1170 			dst_r = TMP_REG2;
   1171 		}
   1172 		else {
   1173 			flags |= SLOW_DEST;
   1174 			dst_r = 0;
   1175 		}
   1176 	}
   1177 
   1178 	/* Source 1. */
   1179 	if (FAST_IS_REG(src1)) {
   1180 		src1_r = src1;
   1181 		flags |= REG1_SOURCE;
   1182 	}
   1183 	else if (src1 & SLJIT_IMM) {
   1184 		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
   1185 		src1_r = TMP_REG1;
   1186 	}
   1187 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
   1188 		FAIL_IF(compiler->error);
   1189 		src1_r = TMP_REG1;
   1190 	}
   1191 	else
   1192 		src1_r = 0;
   1193 
   1194 	/* Source 2. */
   1195 	if (FAST_IS_REG(src2)) {
   1196 		src2_r = src2;
   1197 		flags |= REG2_SOURCE;
   1198 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
   1199 			dst_r = src2_r;
   1200 	}
   1201 	else if (src2 & SLJIT_IMM) {
   1202 		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
   1203 		src2_r = sugg_src2_r;
   1204 	}
   1205 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
   1206 		FAIL_IF(compiler->error);
   1207 		src2_r = sugg_src2_r;
   1208 	}
   1209 	else
   1210 		src2_r = 0;
   1211 
   1212 	/* src1_r, src2_r and dst_r can be zero (=unprocessed).
   1213 	   All arguments are complex addressing modes, and it is a binary operator. */
   1214 	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
   1215 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   1216 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
   1217 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
   1218 		}
   1219 		else {
   1220 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
   1221 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
   1222 		}
   1223 		src1_r = TMP_REG1;
   1224 		src2_r = TMP_REG2;
   1225 	}
   1226 	else if (src1_r == 0 && src2_r == 0) {
   1227 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
   1228 		src1_r = TMP_REG1;
   1229 	}
   1230 	else if (src1_r == 0 && dst_r == 0) {
   1231 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
   1232 		src1_r = TMP_REG1;
   1233 	}
   1234 	else if (src2_r == 0 && dst_r == 0) {
   1235 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
   1236 		src2_r = sugg_src2_r;
   1237 	}
   1238 
   1239 	if (dst_r == 0)
   1240 		dst_r = TMP_REG2;
   1241 
   1242 	if (src1_r == 0) {
   1243 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
   1244 		src1_r = TMP_REG1;
   1245 	}
   1246 
   1247 	if (src2_r == 0) {
   1248 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
   1249 		src2_r = sugg_src2_r;
   1250 	}
   1251 
   1252 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
   1253 
   1254 	if (flags & (FAST_DEST | SLOW_DEST)) {
   1255 		if (flags & FAST_DEST)
   1256 			FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
   1257 		else
   1258 			FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
   1259 	}
   1260 	return SLJIT_SUCCESS;
   1261 }
   1262 
   1263 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
   1264 {
   1265 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1266 	sljit_s32 int_op = op & SLJIT_I32_OP;
   1267 #endif
   1268 
   1269 	CHECK_ERROR();
   1270 	CHECK(check_sljit_emit_op0(compiler, op));
   1271 
   1272 	op = GET_OPCODE(op);
   1273 	switch (op) {
   1274 	case SLJIT_BREAKPOINT:
   1275 	case SLJIT_NOP:
   1276 		return push_inst(compiler, NOP);
   1277 	case SLJIT_LMUL_UW:
   1278 	case SLJIT_LMUL_SW:
   1279 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1280 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1281 		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1282 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1283 #else
   1284 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
   1285 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
   1286 #endif
   1287 	case SLJIT_DIVMOD_UW:
   1288 	case SLJIT_DIVMOD_SW:
   1289 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
   1290 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1291 		FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
   1292 		FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1293 #else
   1294 		FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
   1295 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
   1296 #endif
   1297 		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
   1298 	case SLJIT_DIV_UW:
   1299 	case SLJIT_DIV_SW:
   1300 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1301 		return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
   1302 #else
   1303 		return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
   1304 #endif
   1305 	}
   1306 
   1307 	return SLJIT_SUCCESS;
   1308 }
   1309 
   1310 #define EMIT_MOV(type, type_flags, type_cast) \
   1311 	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
   1312 
   1313 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
   1314 	sljit_s32 dst, sljit_sw dstw,
   1315 	sljit_s32 src, sljit_sw srcw)
   1316 {
   1317 	sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1318 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
   1319 
   1320 	CHECK_ERROR();
   1321 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
   1322 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1323 	ADJUST_LOCAL_OFFSET(src, srcw);
   1324 
   1325 	op = GET_OPCODE(op);
   1326 	if ((src & SLJIT_IMM) && srcw == 0)
   1327 		src = TMP_ZERO;
   1328 
   1329 	if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op_flags) == SLJIT_NOT_OVERFLOW)
   1330 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1331 
   1332 	if (op_flags & SLJIT_I32_OP) {
   1333 		if (op < SLJIT_NOT) {
   1334 			if (FAST_IS_REG(src) && src == dst) {
   1335 				if (!TYPE_CAST_NEEDED(op))
   1336 					return SLJIT_SUCCESS;
   1337 			}
   1338 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1339 			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
   1340 				op = SLJIT_MOV_U32;
   1341 			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
   1342 				op = SLJIT_MOVU_U32;
   1343 			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
   1344 				op = SLJIT_MOV_S32;
   1345 			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
   1346 				op = SLJIT_MOVU_S32;
   1347 #endif
   1348 		}
   1349 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1350 		else {
   1351 			/* Most operations expect sign extended arguments. */
   1352 			flags |= INT_DATA | SIGNED_DATA;
   1353 			if (src & SLJIT_IMM)
   1354 				srcw = (sljit_s32)srcw;
   1355 		}
   1356 #endif
   1357 	}
   1358 
   1359 	switch (op) {
   1360 	case SLJIT_MOV:
   1361 	case SLJIT_MOV_P:
   1362 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1363 	case SLJIT_MOV_U32:
   1364 	case SLJIT_MOV_S32:
   1365 #endif
   1366 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
   1367 
   1368 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1369 	case SLJIT_MOV_U32:
   1370 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32));
   1371 
   1372 	case SLJIT_MOV_S32:
   1373 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32));
   1374 #endif
   1375 
   1376 	case SLJIT_MOV_U8:
   1377 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8));
   1378 
   1379 	case SLJIT_MOV_S8:
   1380 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8));
   1381 
   1382 	case SLJIT_MOV_U16:
   1383 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16));
   1384 
   1385 	case SLJIT_MOV_S16:
   1386 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
   1387 
   1388 	case SLJIT_MOVU:
   1389 	case SLJIT_MOVU_P:
   1390 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1391 	case SLJIT_MOVU_U32:
   1392 	case SLJIT_MOVU_S32:
   1393 #endif
   1394 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
   1395 
   1396 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1397 	case SLJIT_MOVU_U32:
   1398 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA | WRITE_BACK, (sljit_u32));
   1399 
   1400 	case SLJIT_MOVU_S32:
   1401 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s32));
   1402 #endif
   1403 
   1404 	case SLJIT_MOVU_U8:
   1405 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, (sljit_u8));
   1406 
   1407 	case SLJIT_MOVU_S8:
   1408 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s8));
   1409 
   1410 	case SLJIT_MOVU_U16:
   1411 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, (sljit_u16));
   1412 
   1413 	case SLJIT_MOVU_S16:
   1414 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s16));
   1415 
   1416 	case SLJIT_NOT:
   1417 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1418 
   1419 	case SLJIT_NEG:
   1420 		return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1421 
   1422 	case SLJIT_CLZ:
   1423 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1424 		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
   1425 #else
   1426 		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
   1427 #endif
   1428 	}
   1429 
   1430 	return SLJIT_SUCCESS;
   1431 }
   1432 
   1433 #undef EMIT_MOV
   1434 
   1435 #define TEST_SL_IMM(src, srcw) \
   1436 	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
   1437 
   1438 #define TEST_UL_IMM(src, srcw) \
   1439 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
   1440 
   1441 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1442 #define TEST_SH_IMM(src, srcw) \
   1443 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
   1444 #else
   1445 #define TEST_SH_IMM(src, srcw) \
   1446 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
   1447 #endif
   1448 
   1449 #define TEST_UH_IMM(src, srcw) \
   1450 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
   1451 
   1452 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1453 #define TEST_ADD_IMM(src, srcw) \
   1454 	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
   1455 #else
   1456 #define TEST_ADD_IMM(src, srcw) \
   1457 	((src) & SLJIT_IMM)
   1458 #endif
   1459 
   1460 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1461 #define TEST_UI_IMM(src, srcw) \
   1462 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
   1463 #else
   1464 #define TEST_UI_IMM(src, srcw) \
   1465 	((src) & SLJIT_IMM)
   1466 #endif
   1467 
   1468 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
   1469 	sljit_s32 dst, sljit_sw dstw,
   1470 	sljit_s32 src1, sljit_sw src1w,
   1471 	sljit_s32 src2, sljit_sw src2w)
   1472 {
   1473 	sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
   1474 
   1475 	CHECK_ERROR();
   1476 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1477 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1478 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1479 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1480 
   1481 	if ((src1 & SLJIT_IMM) && src1w == 0)
   1482 		src1 = TMP_ZERO;
   1483 	if ((src2 & SLJIT_IMM) && src2w == 0)
   1484 		src2 = TMP_ZERO;
   1485 
   1486 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1487 	if (op & SLJIT_I32_OP) {
   1488 		/* Most operations expect sign extended arguments. */
   1489 		flags |= INT_DATA | SIGNED_DATA;
   1490 		if (src1 & SLJIT_IMM)
   1491 			src1w = (sljit_s32)(src1w);
   1492 		if (src2 & SLJIT_IMM)
   1493 			src2w = (sljit_s32)(src2w);
   1494 		if (HAS_FLAGS(op))
   1495 			flags |= ALT_SIGN_EXT;
   1496 	}
   1497 #endif
   1498 	if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW || GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW)
   1499 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1500 	if (src2 == TMP_REG2)
   1501 		flags |= ALT_KEEP_CACHE;
   1502 
   1503 	switch (GET_OPCODE(op)) {
   1504 	case SLJIT_ADD:
   1505 		if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1506 			if (TEST_SL_IMM(src2, src2w)) {
   1507 				compiler->imm = src2w & 0xffff;
   1508 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1509 			}
   1510 			if (TEST_SL_IMM(src1, src1w)) {
   1511 				compiler->imm = src1w & 0xffff;
   1512 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1513 			}
   1514 			if (TEST_SH_IMM(src2, src2w)) {
   1515 				compiler->imm = (src2w >> 16) & 0xffff;
   1516 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1517 			}
   1518 			if (TEST_SH_IMM(src1, src1w)) {
   1519 				compiler->imm = (src1w >> 16) & 0xffff;
   1520 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1521 			}
   1522 			/* Range between -1 and -32768 is covered above. */
   1523 			if (TEST_ADD_IMM(src2, src2w)) {
   1524 				compiler->imm = src2w & 0xffffffff;
   1525 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1526 			}
   1527 			if (TEST_ADD_IMM(src1, src1w)) {
   1528 				compiler->imm = src1w & 0xffffffff;
   1529 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
   1530 			}
   1531 		}
   1532 		if (HAS_FLAGS(op)) {
   1533 			if (TEST_SL_IMM(src2, src2w)) {
   1534 				compiler->imm = src2w & 0xffff;
   1535 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1536 			}
   1537 			if (TEST_SL_IMM(src1, src1w)) {
   1538 				compiler->imm = src1w & 0xffff;
   1539 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1540 			}
   1541 		}
   1542 		return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w);
   1543 
   1544 	case SLJIT_ADDC:
   1545 		return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w);
   1546 
   1547 	case SLJIT_SUB:
   1548 		if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL)
   1549 		{
   1550 			if (dst == SLJIT_UNUSED)
   1551 			{
   1552 				if (TEST_UL_IMM(src2, src2w)) {
   1553 					compiler->imm = src2w & 0xffff;
   1554 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1555 				}
   1556 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM5, dst, dstw, src1, src1w, src2, src2w);
   1557 			}
   1558 
   1559 			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1))
   1560 			{
   1561 				compiler->imm = src2w;
   1562 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM6, dst, dstw, src1, src1w, TMP_REG2, 0);
   1563 			}
   1564 			return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM7, dst, dstw, src1, src1w, src2, src2w);
   1565 		}
   1566 
   1567 		if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
   1568 			if (TEST_SL_IMM(src2, -src2w)) {
   1569 				compiler->imm = (-src2w) & 0xffff;
   1570 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1571 			}
   1572 			if (TEST_SL_IMM(src1, src1w)) {
   1573 				compiler->imm = src1w & 0xffff;
   1574 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1575 			}
   1576 			if (TEST_SH_IMM(src2, -src2w)) {
   1577 				compiler->imm = ((-src2w) >> 16) & 0xffff;
   1578 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1579 			}
   1580 			/* Range between -1 and -32768 is covered above. */
   1581 			if (TEST_ADD_IMM(src2, -src2w)) {
   1582 				compiler->imm = -src2w & 0xffffffff;
   1583 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
   1584 			}
   1585 		}
   1586 
   1587 		if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)
   1588 				&& GET_FLAG_TYPE(op) == SLJIT_OVERFLOW && GET_FLAG_TYPE(op) == SLJIT_NOT_OVERFLOW) {
   1589 			if (TEST_SL_IMM(src2, src2w)) {
   1590 				compiler->imm = src2w & 0xffff;
   1591 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1592 			}
   1593 			return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
   1594 		}
   1595 
   1596 		if (TEST_SL_IMM(src2, -src2w)) {
   1597 			compiler->imm = (-src2w) & 0xffff;
   1598 			return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1599 		}
   1600 		/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
   1601 		return emit_op(compiler, SLJIT_SUB, flags, dst, dstw, src1, src1w, src2, src2w);
   1602 
   1603 	case SLJIT_SUBC:
   1604 		return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w);
   1605 
   1606 	case SLJIT_MUL:
   1607 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1608 		if (op & SLJIT_I32_OP)
   1609 			flags |= ALT_FORM2;
   1610 #endif
   1611 		if (!HAS_FLAGS(op)) {
   1612 			if (TEST_SL_IMM(src2, src2w)) {
   1613 				compiler->imm = src2w & 0xffff;
   1614 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1615 			}
   1616 			if (TEST_SL_IMM(src1, src1w)) {
   1617 				compiler->imm = src1w & 0xffff;
   1618 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1619 			}
   1620 		}
   1621 		else
   1622 			FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
   1623 		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
   1624 
   1625 	case SLJIT_AND:
   1626 	case SLJIT_OR:
   1627 	case SLJIT_XOR:
   1628 		/* Commutative unsigned operations. */
   1629 		if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
   1630 			if (TEST_UL_IMM(src2, src2w)) {
   1631 				compiler->imm = src2w;
   1632 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1633 			}
   1634 			if (TEST_UL_IMM(src1, src1w)) {
   1635 				compiler->imm = src1w;
   1636 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
   1637 			}
   1638 			if (TEST_UH_IMM(src2, src2w)) {
   1639 				compiler->imm = (src2w >> 16) & 0xffff;
   1640 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
   1641 			}
   1642 			if (TEST_UH_IMM(src1, src1w)) {
   1643 				compiler->imm = (src1w >> 16) & 0xffff;
   1644 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
   1645 			}
   1646 		}
   1647 		if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) {
   1648 			/* Unlike or and xor, and resets unwanted bits as well. */
   1649 			if (TEST_UI_IMM(src2, src2w)) {
   1650 				compiler->imm = src2w;
   1651 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
   1652 			}
   1653 			if (TEST_UI_IMM(src1, src1w)) {
   1654 				compiler->imm = src1w;
   1655 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
   1656 			}
   1657 		}
   1658 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1659 
   1660 	case SLJIT_SHL:
   1661 	case SLJIT_LSHR:
   1662 	case SLJIT_ASHR:
   1663 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1664 		if (op & SLJIT_I32_OP)
   1665 			flags |= ALT_FORM2;
   1666 #endif
   1667 		if (src2 & SLJIT_IMM) {
   1668 			compiler->imm = src2w;
   1669 			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
   1670 		}
   1671 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
   1672 	}
   1673 
   1674 	return SLJIT_SUCCESS;
   1675 }
   1676 
   1677 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
   1678 {
   1679 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
   1680 	return reg_map[reg];
   1681 }
   1682 
   1683 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
   1684 {
   1685 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
   1686 	return reg;
   1687 }
   1688 
   1689 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
   1690 	void *instruction, sljit_s32 size)
   1691 {
   1692 	CHECK_ERROR();
   1693 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
   1694 
   1695 	return push_inst(compiler, *(sljit_ins*)instruction);
   1696 }
   1697 
   1698 /* --------------------------------------------------------------------- */
   1699 /*  Floating point operators                                             */
   1700 /* --------------------------------------------------------------------- */
   1701 
   1702 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
   1703 {
   1704 #ifdef SLJIT_IS_FPU_AVAILABLE
   1705 	return SLJIT_IS_FPU_AVAILABLE;
   1706 #else
   1707 	/* Available by default. */
   1708 	return 1;
   1709 #endif
   1710 }
   1711 
   1712 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
   1713 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
   1714 
   1715 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1716 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
   1717 #else
   1718 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
   1719 
   1720 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
   1721 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
   1722 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
   1723 #else
   1724 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
   1725 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
   1726 #endif
   1727 
   1728 #endif /* SLJIT_CONFIG_PPC_64 */
   1729 
   1730 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
   1731 	sljit_s32 dst, sljit_sw dstw,
   1732 	sljit_s32 src, sljit_sw srcw)
   1733 {
   1734 	if (src & SLJIT_MEM) {
   1735 		/* We can ignore the temporary data store on the stack from caching point of view. */
   1736 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
   1737 		src = TMP_FREG1;
   1738 	}
   1739 
   1740 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1741 	op = GET_OPCODE(op);
   1742 	FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
   1743 
   1744 	if (dst == SLJIT_UNUSED)
   1745 		return SLJIT_SUCCESS;
   1746 
   1747 	if (op == SLJIT_CONV_SW_FROM_F64) {
   1748 		if (FAST_IS_REG(dst)) {
   1749 			FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
   1750 			return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
   1751 		}
   1752 		return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
   1753 	}
   1754 
   1755 #else
   1756 	FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
   1757 
   1758 	if (dst == SLJIT_UNUSED)
   1759 		return SLJIT_SUCCESS;
   1760 #endif
   1761 
   1762 	if (FAST_IS_REG(dst)) {
   1763 		FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
   1764 		FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
   1765 		return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
   1766 	}
   1767 
   1768 	SLJIT_ASSERT(dst & SLJIT_MEM);
   1769 
   1770 	if (dst & OFFS_REG_MASK) {
   1771 		dstw &= 0x3;
   1772 		if (dstw) {
   1773 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
   1774 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
   1775 #else
   1776 			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
   1777 #endif
   1778 			dstw = TMP_REG1;
   1779 		}
   1780 		else
   1781 			dstw = OFFS_REG(dst);
   1782 	}
   1783 	else {
   1784 		if ((dst & REG_MASK) && !dstw) {
   1785 			dstw = dst & REG_MASK;
   1786 			dst = 0;
   1787 		}
   1788 		else {
   1789 			/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
   1790 			FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
   1791 			dstw = TMP_REG1;
   1792 		}
   1793 	}
   1794 
   1795 	return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
   1796 }
   1797 
   1798 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
   1799 	sljit_s32 dst, sljit_sw dstw,
   1800 	sljit_s32 src, sljit_sw srcw)
   1801 {
   1802 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   1803 
   1804 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1805 
   1806 	if (src & SLJIT_IMM) {
   1807 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
   1808 			srcw = (sljit_s32)srcw;
   1809 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   1810 		src = TMP_REG1;
   1811 	}
   1812 	else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
   1813 		if (FAST_IS_REG(src))
   1814 			FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
   1815 		else
   1816 			FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1817 		src = TMP_REG1;
   1818 	}
   1819 
   1820 	if (FAST_IS_REG(src)) {
   1821 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1822 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
   1823 	}
   1824 	else
   1825 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
   1826 
   1827 	FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
   1828 
   1829 	if (dst & SLJIT_MEM)
   1830 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
   1831 	if (op & SLJIT_F32_OP)
   1832 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1833 	return SLJIT_SUCCESS;
   1834 
   1835 #else
   1836 
   1837 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1838 	sljit_s32 invert_sign = 1;
   1839 
   1840 	if (src & SLJIT_IMM) {
   1841 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
   1842 		src = TMP_REG1;
   1843 		invert_sign = 0;
   1844 	}
   1845 	else if (!FAST_IS_REG(src)) {
   1846 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1847 		src = TMP_REG1;
   1848 	}
   1849 
   1850 	/* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
   1851 	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
   1852 	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
   1853 	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
   1854 	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
   1855 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
   1856 	if (invert_sign)
   1857 		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
   1858 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1859 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
   1860 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
   1861 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1862 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
   1863 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
   1864 
   1865 	FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
   1866 
   1867 	if (dst & SLJIT_MEM)
   1868 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
   1869 	if (op & SLJIT_F32_OP)
   1870 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
   1871 	return SLJIT_SUCCESS;
   1872 
   1873 #endif
   1874 }
   1875 
   1876 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
   1877 	sljit_s32 src1, sljit_sw src1w,
   1878 	sljit_s32 src2, sljit_sw src2w)
   1879 {
   1880 	if (src1 & SLJIT_MEM) {
   1881 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1882 		src1 = TMP_FREG1;
   1883 	}
   1884 
   1885 	if (src2 & SLJIT_MEM) {
   1886 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
   1887 		src2 = TMP_FREG2;
   1888 	}
   1889 
   1890 	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
   1891 }
   1892 
   1893 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
   1894 	sljit_s32 dst, sljit_sw dstw,
   1895 	sljit_s32 src, sljit_sw srcw)
   1896 {
   1897 	sljit_s32 dst_r;
   1898 
   1899 	CHECK_ERROR();
   1900 	compiler->cache_arg = 0;
   1901 	compiler->cache_argw = 0;
   1902 
   1903 	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
   1904 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
   1905 
   1906 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
   1907 		op ^= SLJIT_F32_OP;
   1908 
   1909 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1910 
   1911 	if (src & SLJIT_MEM) {
   1912 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
   1913 		src = dst_r;
   1914 	}
   1915 
   1916 	switch (GET_OPCODE(op)) {
   1917 	case SLJIT_CONV_F64_FROM_F32:
   1918 		op ^= SLJIT_F32_OP;
   1919 		if (op & SLJIT_F32_OP) {
   1920 			FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
   1921 			break;
   1922 		}
   1923 		/* Fall through. */
   1924 	case SLJIT_MOV_F64:
   1925 		if (src != dst_r) {
   1926 			if (dst_r != TMP_FREG1)
   1927 				FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
   1928 			else
   1929 				dst_r = src;
   1930 		}
   1931 		break;
   1932 	case SLJIT_NEG_F64:
   1933 		FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
   1934 		break;
   1935 	case SLJIT_ABS_F64:
   1936 		FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
   1937 		break;
   1938 	}
   1939 
   1940 	if (dst & SLJIT_MEM)
   1941 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
   1942 	return SLJIT_SUCCESS;
   1943 }
   1944 
   1945 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
   1946 	sljit_s32 dst, sljit_sw dstw,
   1947 	sljit_s32 src1, sljit_sw src1w,
   1948 	sljit_s32 src2, sljit_sw src2w)
   1949 {
   1950 	sljit_s32 dst_r, flags = 0;
   1951 
   1952 	CHECK_ERROR();
   1953 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1954 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1955 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1956 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1957 
   1958 	compiler->cache_arg = 0;
   1959 	compiler->cache_argw = 0;
   1960 
   1961 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
   1962 
   1963 	if (src1 & SLJIT_MEM) {
   1964 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
   1965 			FAIL_IF(compiler->error);
   1966 			src1 = TMP_FREG1;
   1967 		} else
   1968 			flags |= ALT_FORM1;
   1969 	}
   1970 
   1971 	if (src2 & SLJIT_MEM) {
   1972 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
   1973 			FAIL_IF(compiler->error);
   1974 			src2 = TMP_FREG2;
   1975 		} else
   1976 			flags |= ALT_FORM2;
   1977 	}
   1978 
   1979 	if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
   1980 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
   1981 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
   1982 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1983 		}
   1984 		else {
   1985 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
   1986 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1987 		}
   1988 	}
   1989 	else if (flags & ALT_FORM1)
   1990 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
   1991 	else if (flags & ALT_FORM2)
   1992 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
   1993 
   1994 	if (flags & ALT_FORM1)
   1995 		src1 = TMP_FREG1;
   1996 	if (flags & ALT_FORM2)
   1997 		src2 = TMP_FREG2;
   1998 
   1999 	switch (GET_OPCODE(op)) {
   2000 	case SLJIT_ADD_F64:
   2001 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
   2002 		break;
   2003 
   2004 	case SLJIT_SUB_F64:
   2005 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
   2006 		break;
   2007 
   2008 	case SLJIT_MUL_F64:
   2009 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
   2010 		break;
   2011 
   2012 	case SLJIT_DIV_F64:
   2013 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
   2014 		break;
   2015 	}
   2016 
   2017 	if (dst_r == TMP_FREG2)
   2018 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
   2019 
   2020 	return SLJIT_SUCCESS;
   2021 }
   2022 
   2023 #undef FLOAT_DATA
   2024 #undef SELECT_FOP
   2025 
   2026 /* --------------------------------------------------------------------- */
   2027 /*  Other instructions                                                   */
   2028 /* --------------------------------------------------------------------- */
   2029 
   2030 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
   2031 {
   2032 	CHECK_ERROR();
   2033 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
   2034 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2035 
   2036 	/* For UNUSED dst. Uncommon, but possible. */
   2037 	if (dst == SLJIT_UNUSED)
   2038 		return SLJIT_SUCCESS;
   2039 
   2040 	if (FAST_IS_REG(dst))
   2041 		return push_inst(compiler, MFLR | D(dst));
   2042 
   2043 	/* Memory. */
   2044 	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
   2045 	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
   2046 }
   2047 
   2048 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
   2049 {
   2050 	CHECK_ERROR();
   2051 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
   2052 	ADJUST_LOCAL_OFFSET(src, srcw);
   2053 
   2054 	if (FAST_IS_REG(src))
   2055 		FAIL_IF(push_inst(compiler, MTLR | S(src)));
   2056 	else {
   2057 		if (src & SLJIT_MEM)
   2058 			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
   2059 		else if (src & SLJIT_IMM)
   2060 			FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
   2061 		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
   2062 	}
   2063 	return push_inst(compiler, BLR);
   2064 }
   2065 
   2066 /* --------------------------------------------------------------------- */
   2067 /*  Conditional instructions                                             */
   2068 /* --------------------------------------------------------------------- */
   2069 
   2070 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   2071 {
   2072 	struct sljit_label *label;
   2073 
   2074 	CHECK_ERROR_PTR();
   2075 	CHECK_PTR(check_sljit_emit_label(compiler));
   2076 
   2077 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2078 		return compiler->last_label;
   2079 
   2080 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   2081 	PTR_FAIL_IF(!label);
   2082 	set_label(label, compiler);
   2083 	return label;
   2084 }
   2085 
   2086 static sljit_ins get_bo_bi_flags(sljit_s32 type)
   2087 {
   2088 	switch (type) {
   2089 	case SLJIT_EQUAL:
   2090 		return (12 << 21) | (2 << 16);
   2091 
   2092 	case SLJIT_NOT_EQUAL:
   2093 		return (4 << 21) | (2 << 16);
   2094 
   2095 	case SLJIT_LESS:
   2096 	case SLJIT_SIG_LESS:
   2097 		return (12 << 21) | (0 << 16);
   2098 
   2099 	case SLJIT_GREATER_EQUAL:
   2100 	case SLJIT_SIG_GREATER_EQUAL:
   2101 		return (4 << 21) | (0 << 16);
   2102 
   2103 	case SLJIT_GREATER:
   2104 	case SLJIT_SIG_GREATER:
   2105 		return (12 << 21) | (1 << 16);
   2106 
   2107 	case SLJIT_LESS_EQUAL:
   2108 	case SLJIT_SIG_LESS_EQUAL:
   2109 		return (4 << 21) | (1 << 16);
   2110 
   2111 	case SLJIT_LESS_F64:
   2112 		return (12 << 21) | ((4 + 0) << 16);
   2113 
   2114 	case SLJIT_GREATER_EQUAL_F64:
   2115 		return (4 << 21) | ((4 + 0) << 16);
   2116 
   2117 	case SLJIT_GREATER_F64:
   2118 		return (12 << 21) | ((4 + 1) << 16);
   2119 
   2120 	case SLJIT_LESS_EQUAL_F64:
   2121 		return (4 << 21) | ((4 + 1) << 16);
   2122 
   2123 	case SLJIT_OVERFLOW:
   2124 	case SLJIT_MUL_OVERFLOW:
   2125 		return (12 << 21) | (3 << 16);
   2126 
   2127 	case SLJIT_NOT_OVERFLOW:
   2128 	case SLJIT_MUL_NOT_OVERFLOW:
   2129 		return (4 << 21) | (3 << 16);
   2130 
   2131 	case SLJIT_EQUAL_F64:
   2132 		return (12 << 21) | ((4 + 2) << 16);
   2133 
   2134 	case SLJIT_NOT_EQUAL_F64:
   2135 		return (4 << 21) | ((4 + 2) << 16);
   2136 
   2137 	case SLJIT_UNORDERED_F64:
   2138 		return (12 << 21) | ((4 + 3) << 16);
   2139 
   2140 	case SLJIT_ORDERED_F64:
   2141 		return (4 << 21) | ((4 + 3) << 16);
   2142 
   2143 	default:
   2144 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
   2145 		return (20 << 21);
   2146 	}
   2147 }
   2148 
   2149 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
   2150 {
   2151 	struct sljit_jump *jump;
   2152 	sljit_ins bo_bi_flags;
   2153 
   2154 	CHECK_ERROR_PTR();
   2155 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
   2156 
   2157 	bo_bi_flags = get_bo_bi_flags(type & 0xff);
   2158 	if (!bo_bi_flags)
   2159 		return NULL;
   2160 
   2161 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2162 	PTR_FAIL_IF(!jump);
   2163 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2164 	type &= 0xff;
   2165 
   2166 	/* In PPC, we don't need to touch the arguments. */
   2167 	if (type < SLJIT_JUMP)
   2168 		jump->flags |= IS_COND;
   2169 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2170 	if (type >= SLJIT_CALL0)
   2171 		jump->flags |= IS_CALL;
   2172 #endif
   2173 
   2174 	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   2175 	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
   2176 	jump->addr = compiler->size;
   2177 	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
   2178 	return jump;
   2179 }
   2180 
   2181 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
   2182 {
   2183 	struct sljit_jump *jump = NULL;
   2184 	sljit_s32 src_r;
   2185 
   2186 	CHECK_ERROR();
   2187 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
   2188 	ADJUST_LOCAL_OFFSET(src, srcw);
   2189 
   2190 	if (FAST_IS_REG(src)) {
   2191 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2192 		if (type >= SLJIT_CALL0) {
   2193 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
   2194 			src_r = TMP_CALL_REG;
   2195 		}
   2196 		else
   2197 			src_r = src;
   2198 #else
   2199 		src_r = src;
   2200 #endif
   2201 	} else if (src & SLJIT_IMM) {
   2202 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2203 		FAIL_IF(!jump);
   2204 		set_jump(jump, compiler, JUMP_ADDR);
   2205 		jump->u.target = srcw;
   2206 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
   2207 		if (type >= SLJIT_CALL0)
   2208 			jump->flags |= IS_CALL;
   2209 #endif
   2210 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
   2211 		src_r = TMP_CALL_REG;
   2212 	}
   2213 	else {
   2214 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
   2215 		src_r = TMP_CALL_REG;
   2216 	}
   2217 
   2218 	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
   2219 	if (jump)
   2220 		jump->addr = compiler->size;
   2221 	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
   2222 }
   2223 
   2224 /* Get a bit from CR, all other bits are zeroed. */
   2225 #define GET_CR_BIT(bit, dst) \
   2226 	FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1)));
   2227 
   2228 #define INVERT_BIT(dst) \
   2229 	FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1));
   2230 
   2231 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
   2232 	sljit_s32 dst, sljit_sw dstw,
   2233 	sljit_s32 src, sljit_sw srcw,
   2234 	sljit_s32 type)
   2235 {
   2236 	sljit_s32 reg, input_flags;
   2237 	sljit_s32 flags = GET_ALL_FLAGS(op);
   2238 	sljit_sw original_dstw = dstw;
   2239 
   2240 	CHECK_ERROR();
   2241 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
   2242 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2243 
   2244 	if (dst == SLJIT_UNUSED)
   2245 		return SLJIT_SUCCESS;
   2246 
   2247 	op = GET_OPCODE(op);
   2248 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
   2249 
   2250 	compiler->cache_arg = 0;
   2251 	compiler->cache_argw = 0;
   2252 	if (op >= SLJIT_ADD && (src & SLJIT_MEM)) {
   2253 		ADJUST_LOCAL_OFFSET(src, srcw);
   2254 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2255 		input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
   2256 #else
   2257 		input_flags = WORD_DATA;
   2258 #endif
   2259 		FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
   2260 		src = TMP_REG1;
   2261 		srcw = 0;
   2262 	}
   2263 
   2264 	FAIL_IF(push_inst(compiler, MFCR | D(reg)));
   2265 
   2266 	switch (type & 0xff) {
   2267 	case SLJIT_EQUAL:
   2268 		GET_CR_BIT(2, reg);
   2269 		break;
   2270 
   2271 	case SLJIT_NOT_EQUAL:
   2272 		GET_CR_BIT(2, reg);
   2273 		INVERT_BIT(reg);
   2274 		break;
   2275 
   2276 	case SLJIT_LESS:
   2277 	case SLJIT_SIG_LESS:
   2278 		GET_CR_BIT(0, reg);
   2279 		break;
   2280 
   2281 	case SLJIT_GREATER_EQUAL:
   2282 	case SLJIT_SIG_GREATER_EQUAL:
   2283 		GET_CR_BIT(0, reg);
   2284 		INVERT_BIT(reg);
   2285 		break;
   2286 
   2287 	case SLJIT_GREATER:
   2288 	case SLJIT_SIG_GREATER:
   2289 		GET_CR_BIT(1, reg);
   2290 		break;
   2291 
   2292 	case SLJIT_LESS_EQUAL:
   2293 	case SLJIT_SIG_LESS_EQUAL:
   2294 		GET_CR_BIT(1, reg);
   2295 		INVERT_BIT(reg);
   2296 		break;
   2297 
   2298 	case SLJIT_LESS_F64:
   2299 		GET_CR_BIT(4 + 0, reg);
   2300 		break;
   2301 
   2302 	case SLJIT_GREATER_EQUAL_F64:
   2303 		GET_CR_BIT(4 + 0, reg);
   2304 		INVERT_BIT(reg);
   2305 		break;
   2306 
   2307 	case SLJIT_GREATER_F64:
   2308 		GET_CR_BIT(4 + 1, reg);
   2309 		break;
   2310 
   2311 	case SLJIT_LESS_EQUAL_F64:
   2312 		GET_CR_BIT(4 + 1, reg);
   2313 		INVERT_BIT(reg);
   2314 		break;
   2315 
   2316 	case SLJIT_OVERFLOW:
   2317 	case SLJIT_MUL_OVERFLOW:
   2318 		GET_CR_BIT(3, reg);
   2319 		break;
   2320 
   2321 	case SLJIT_NOT_OVERFLOW:
   2322 	case SLJIT_MUL_NOT_OVERFLOW:
   2323 		GET_CR_BIT(3, reg);
   2324 		INVERT_BIT(reg);
   2325 		break;
   2326 
   2327 	case SLJIT_EQUAL_F64:
   2328 		GET_CR_BIT(4 + 2, reg);
   2329 		break;
   2330 
   2331 	case SLJIT_NOT_EQUAL_F64:
   2332 		GET_CR_BIT(4 + 2, reg);
   2333 		INVERT_BIT(reg);
   2334 		break;
   2335 
   2336 	case SLJIT_UNORDERED_F64:
   2337 		GET_CR_BIT(4 + 3, reg);
   2338 		break;
   2339 
   2340 	case SLJIT_ORDERED_F64:
   2341 		GET_CR_BIT(4 + 3, reg);
   2342 		INVERT_BIT(reg);
   2343 		break;
   2344 
   2345 	default:
   2346 		SLJIT_UNREACHABLE();
   2347 		break;
   2348 	}
   2349 
   2350 	if (op < SLJIT_ADD) {
   2351 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
   2352 		if (op == SLJIT_MOV)
   2353 			input_flags = WORD_DATA;
   2354 		else {
   2355 			op = SLJIT_MOV_U32;
   2356 			input_flags = INT_DATA;
   2357 		}
   2358 #else
   2359 		op = SLJIT_MOV;
   2360 		input_flags = WORD_DATA;
   2361 #endif
   2362 		if (reg != TMP_REG2)
   2363 			return SLJIT_SUCCESS;
   2364 		return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
   2365 	}
   2366 
   2367 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
   2368 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   2369 	compiler->skip_checks = 1;
   2370 #endif
   2371 	return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0);
   2372 }
   2373 
   2374 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
   2375 {
   2376 	struct sljit_const *const_;
   2377 	sljit_s32 reg;
   2378 
   2379 	CHECK_ERROR_PTR();
   2380 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
   2381 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2382 
   2383 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2384 	PTR_FAIL_IF(!const_);
   2385 	set_const(const_, compiler);
   2386 
   2387 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
   2388 
   2389 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
   2390 
   2391 	if (dst & SLJIT_MEM)
   2392 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
   2393 	return const_;
   2394 }
   2395