Home | History | Annotate | Line # | Download | only in sljit_src
sljitNativeARM_32.c revision 1.1.1.3
      1 /*
      2  *    Stack-less Just-In-Time compiler
      3  *
      4  *    Copyright Zoltan Herczeg (hzmester (at) freemail.hu). All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without modification, are
      7  * permitted provided that the following conditions are met:
      8  *
      9  *   1. Redistributions of source code must retain the above copyright notice, this list of
     10  *      conditions and the following disclaimer.
     11  *
     12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
     13  *      of conditions and the following disclaimer in the documentation and/or other materials
     14  *      provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
     17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
     19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
     21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     25  */
     26 
     27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
     28 {
     29 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
     30 	return "ARMv7" SLJIT_CPUINFO;
     31 #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
     32 	return "ARMv5" SLJIT_CPUINFO;
     33 #else
     34 #error "Internal error: Unknown ARM architecture"
     35 #endif
     36 }
     37 
     38 /* Last register + 1. */
     39 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
     40 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
     41 #define TMP_PC		(SLJIT_NUMBER_OF_REGISTERS + 4)
     42 
     43 #define TMP_FREG1	(0)
     44 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
     45 
     46 /* In ARM instruction words.
     47    Cache lines are usually 32 byte aligned. */
     48 #define CONST_POOL_ALIGNMENT	8
     49 #define CONST_POOL_EMPTY	0xffffffff
     50 
     51 #define ALIGN_INSTRUCTION(ptr) \
     52 	(sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1))
     53 #define MAX_DIFFERENCE(max_diff) \
     54 	(((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
     55 
     56 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
     57 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
     58 	0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 14, 12, 15
     59 };
     60 
     61 #define RM(rm) (reg_map[rm])
     62 #define RD(rd) (reg_map[rd] << 12)
     63 #define RN(rn) (reg_map[rn] << 16)
     64 
     65 /* --------------------------------------------------------------------- */
     66 /*  Instrucion forms                                                     */
     67 /* --------------------------------------------------------------------- */
     68 
     69 /* The instruction includes the AL condition.
     70    INST_NAME - CONDITIONAL remove this flag. */
     71 #define COND_MASK	0xf0000000
     72 #define CONDITIONAL	0xe0000000
     73 #define PUSH_POOL	0xff000000
     74 
     75 /* DP - Data Processing instruction (use with EMIT_DATA_PROCESS_INS). */
     76 #define ADC_DP		0x5
     77 #define ADD_DP		0x4
     78 #define AND_DP		0x0
     79 #define B		0xea000000
     80 #define BIC_DP		0xe
     81 #define BL		0xeb000000
     82 #define BLX		0xe12fff30
     83 #define BX		0xe12fff10
     84 #define CLZ		0xe16f0f10
     85 #define CMP_DP		0xa
     86 #define BKPT		0xe1200070
     87 #define EOR_DP		0x1
     88 #define MOV_DP		0xd
     89 #define MUL		0xe0000090
     90 #define MVN_DP		0xf
     91 #define NOP		0xe1a00000
     92 #define ORR_DP		0xc
     93 #define PUSH		0xe92d0000
     94 #define POP		0xe8bd0000
     95 #define RSB_DP		0x3
     96 #define RSC_DP		0x7
     97 #define SBC_DP		0x6
     98 #define SMULL		0xe0c00090
     99 #define SUB_DP		0x2
    100 #define UMULL		0xe0800090
    101 #define VABS_F32	0xeeb00ac0
    102 #define VADD_F32	0xee300a00
    103 #define VCMP_F32	0xeeb40a40
    104 #define VCVT_F32_S32	0xeeb80ac0
    105 #define VCVT_F64_F32	0xeeb70ac0
    106 #define VCVT_S32_F32	0xeebd0ac0
    107 #define VDIV_F32	0xee800a00
    108 #define VMOV_F32	0xeeb00a40
    109 #define VMOV		0xee000a10
    110 #define VMRS		0xeef1fa10
    111 #define VMUL_F32	0xee200a00
    112 #define VNEG_F32	0xeeb10a40
    113 #define VSTR_F32	0xed000a00
    114 #define VSUB_F32	0xee300a40
    115 
    116 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
    117 /* Arm v7 specific instructions. */
    118 #define MOVW		0xe3000000
    119 #define MOVT		0xe3400000
    120 #define SXTB		0xe6af0070
    121 #define SXTH		0xe6bf0070
    122 #define UXTB		0xe6ef0070
    123 #define UXTH		0xe6ff0070
    124 #endif
    125 
    126 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    127 
    128 static sljit_s32 push_cpool(struct sljit_compiler *compiler)
    129 {
    130 	/* Pushing the constant pool into the instruction stream. */
    131 	sljit_uw* inst;
    132 	sljit_uw* cpool_ptr;
    133 	sljit_uw* cpool_end;
    134 	sljit_s32 i;
    135 
    136 	/* The label could point the address after the constant pool. */
    137 	if (compiler->last_label && compiler->last_label->size == compiler->size)
    138 		compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
    139 
    140 	SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
    141 	inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
    142 	FAIL_IF(!inst);
    143 	compiler->size++;
    144 	*inst = 0xff000000 | compiler->cpool_fill;
    145 
    146 	for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
    147 		inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
    148 		FAIL_IF(!inst);
    149 		compiler->size++;
    150 		*inst = 0;
    151 	}
    152 
    153 	cpool_ptr = compiler->cpool;
    154 	cpool_end = cpool_ptr + compiler->cpool_fill;
    155 	while (cpool_ptr < cpool_end) {
    156 		inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
    157 		FAIL_IF(!inst);
    158 		compiler->size++;
    159 		*inst = *cpool_ptr++;
    160 	}
    161 	compiler->cpool_diff = CONST_POOL_EMPTY;
    162 	compiler->cpool_fill = 0;
    163 	return SLJIT_SUCCESS;
    164 }
    165 
    166 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
    167 {
    168 	sljit_uw* ptr;
    169 
    170 	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
    171 		FAIL_IF(push_cpool(compiler));
    172 
    173 	ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
    174 	FAIL_IF(!ptr);
    175 	compiler->size++;
    176 	*ptr = inst;
    177 	return SLJIT_SUCCESS;
    178 }
    179 
    180 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
    181 {
    182 	sljit_uw* ptr;
    183 	sljit_uw cpool_index = CPOOL_SIZE;
    184 	sljit_uw* cpool_ptr;
    185 	sljit_uw* cpool_end;
    186 	sljit_u8* cpool_unique_ptr;
    187 
    188 	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
    189 		FAIL_IF(push_cpool(compiler));
    190 	else if (compiler->cpool_fill > 0) {
    191 		cpool_ptr = compiler->cpool;
    192 		cpool_end = cpool_ptr + compiler->cpool_fill;
    193 		cpool_unique_ptr = compiler->cpool_unique;
    194 		do {
    195 			if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
    196 				cpool_index = cpool_ptr - compiler->cpool;
    197 				break;
    198 			}
    199 			cpool_ptr++;
    200 			cpool_unique_ptr++;
    201 		} while (cpool_ptr < cpool_end);
    202 	}
    203 
    204 	if (cpool_index == CPOOL_SIZE) {
    205 		/* Must allocate a new entry in the literal pool. */
    206 		if (compiler->cpool_fill < CPOOL_SIZE) {
    207 			cpool_index = compiler->cpool_fill;
    208 			compiler->cpool_fill++;
    209 		}
    210 		else {
    211 			FAIL_IF(push_cpool(compiler));
    212 			cpool_index = 0;
    213 			compiler->cpool_fill = 1;
    214 		}
    215 	}
    216 
    217 	SLJIT_ASSERT((inst & 0xfff) == 0);
    218 	ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
    219 	FAIL_IF(!ptr);
    220 	compiler->size++;
    221 	*ptr = inst | cpool_index;
    222 
    223 	compiler->cpool[cpool_index] = literal;
    224 	compiler->cpool_unique[cpool_index] = 0;
    225 	if (compiler->cpool_diff == CONST_POOL_EMPTY)
    226 		compiler->cpool_diff = compiler->size;
    227 	return SLJIT_SUCCESS;
    228 }
    229 
    230 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
    231 {
    232 	sljit_uw* ptr;
    233 	if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
    234 		FAIL_IF(push_cpool(compiler));
    235 
    236 	SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
    237 	ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
    238 	FAIL_IF(!ptr);
    239 	compiler->size++;
    240 	*ptr = inst | compiler->cpool_fill;
    241 
    242 	compiler->cpool[compiler->cpool_fill] = literal;
    243 	compiler->cpool_unique[compiler->cpool_fill] = 1;
    244 	compiler->cpool_fill++;
    245 	if (compiler->cpool_diff == CONST_POOL_EMPTY)
    246 		compiler->cpool_diff = compiler->size;
    247 	return SLJIT_SUCCESS;
    248 }
    249 
    250 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
    251 {
    252 	/* Place for at least two instruction (doesn't matter whether the first has a literal). */
    253 	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
    254 		return push_cpool(compiler);
    255 	return SLJIT_SUCCESS;
    256 }
    257 
    258 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
    259 {
    260 	/* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
    261 	SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
    262 	return push_inst(compiler, BLX | RM(TMP_REG2));
    263 }
    264 
    265 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
    266 {
    267 	sljit_uw diff;
    268 	sljit_uw ind;
    269 	sljit_uw counter = 0;
    270 	sljit_uw* clear_const_pool = const_pool;
    271 	sljit_uw* clear_const_pool_end = const_pool + cpool_size;
    272 
    273 	SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
    274 	/* Set unused flag for all literals in the constant pool.
    275 	   I.e.: unused literals can belong to branches, which can be encoded as B or BL.
    276 	   We can "compress" the constant pool by discarding these literals. */
    277 	while (clear_const_pool < clear_const_pool_end)
    278 		*clear_const_pool++ = (sljit_uw)(-1);
    279 
    280 	while (last_pc_patch < code_ptr) {
    281 		/* Data transfer instruction with Rn == r15. */
    282 		if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) {
    283 			diff = const_pool - last_pc_patch;
    284 			ind = (*last_pc_patch) & 0xfff;
    285 
    286 			/* Must be a load instruction with immediate offset. */
    287 			SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
    288 			if ((sljit_s32)const_pool[ind] < 0) {
    289 				const_pool[ind] = counter;
    290 				ind = counter;
    291 				counter++;
    292 			}
    293 			else
    294 				ind = const_pool[ind];
    295 
    296 			SLJIT_ASSERT(diff >= 1);
    297 			if (diff >= 2 || ind > 0) {
    298 				diff = (diff + ind - 2) << 2;
    299 				SLJIT_ASSERT(diff <= 0xfff);
    300 				*last_pc_patch = (*last_pc_patch & ~0xfff) | diff;
    301 			}
    302 			else
    303 				*last_pc_patch = (*last_pc_patch & ~(0xfff | (1 << 23))) | 0x004;
    304 		}
    305 		last_pc_patch++;
    306 	}
    307 	return counter;
    308 }
    309 
    310 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
    311 struct future_patch {
    312 	struct future_patch* next;
    313 	sljit_s32 index;
    314 	sljit_s32 value;
    315 };
    316 
    317 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
    318 {
    319 	sljit_s32 value;
    320 	struct future_patch *curr_patch, *prev_patch;
    321 
    322 	SLJIT_UNUSED_ARG(compiler);
    323 
    324 	/* Using the values generated by patch_pc_relative_loads. */
    325 	if (!*first_patch)
    326 		value = (sljit_s32)cpool_start_address[cpool_current_index];
    327 	else {
    328 		curr_patch = *first_patch;
    329 		prev_patch = NULL;
    330 		while (1) {
    331 			if (!curr_patch) {
    332 				value = (sljit_s32)cpool_start_address[cpool_current_index];
    333 				break;
    334 			}
    335 			if ((sljit_uw)curr_patch->index == cpool_current_index) {
    336 				value = curr_patch->value;
    337 				if (prev_patch)
    338 					prev_patch->next = curr_patch->next;
    339 				else
    340 					*first_patch = curr_patch->next;
    341 				SLJIT_FREE(curr_patch, compiler->allocator_data);
    342 				break;
    343 			}
    344 			prev_patch = curr_patch;
    345 			curr_patch = curr_patch->next;
    346 		}
    347 	}
    348 
    349 	if (value >= 0) {
    350 		if ((sljit_uw)value > cpool_current_index) {
    351 			curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
    352 			if (!curr_patch) {
    353 				while (*first_patch) {
    354 					curr_patch = *first_patch;
    355 					*first_patch = (*first_patch)->next;
    356 					SLJIT_FREE(curr_patch, compiler->allocator_data);
    357 				}
    358 				return SLJIT_ERR_ALLOC_FAILED;
    359 			}
    360 			curr_patch->next = *first_patch;
    361 			curr_patch->index = value;
    362 			curr_patch->value = cpool_start_address[value];
    363 			*first_patch = curr_patch;
    364 		}
    365 		cpool_start_address[value] = *buf_ptr;
    366 	}
    367 	return SLJIT_SUCCESS;
    368 }
    369 
    370 #else
    371 
    372 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
    373 {
    374 	sljit_uw* ptr;
    375 
    376 	ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
    377 	FAIL_IF(!ptr);
    378 	compiler->size++;
    379 	*ptr = inst;
    380 	return SLJIT_SUCCESS;
    381 }
    382 
    383 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
    384 {
    385 	FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
    386 	return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
    387 }
    388 
    389 #endif
    390 
    391 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
    392 {
    393 	sljit_sw diff;
    394 
    395 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
    396 		return 0;
    397 
    398 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    399 	if (jump->flags & IS_BL)
    400 		code_ptr--;
    401 
    402 	if (jump->flags & JUMP_ADDR)
    403 		diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
    404 	else {
    405 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    406 		diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
    407 	}
    408 
    409 	/* Branch to Thumb code has not been optimized yet. */
    410 	if (diff & 0x3)
    411 		return 0;
    412 
    413 	if (jump->flags & IS_BL) {
    414 		if (diff <= 0x01ffffff && diff >= -0x02000000) {
    415 			*code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
    416 			jump->flags |= PATCH_B;
    417 			return 1;
    418 		}
    419 	}
    420 	else {
    421 		if (diff <= 0x01ffffff && diff >= -0x02000000) {
    422 			*code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
    423 			jump->flags |= PATCH_B;
    424 		}
    425 	}
    426 #else
    427 	if (jump->flags & JUMP_ADDR)
    428 		diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset);
    429 	else {
    430 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    431 		diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
    432 	}
    433 
    434 	/* Branch to Thumb code has not been optimized yet. */
    435 	if (diff & 0x3)
    436 		return 0;
    437 
    438 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
    439 		code_ptr -= 2;
    440 		*code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
    441 		jump->flags |= PATCH_B;
    442 		return 1;
    443 	}
    444 #endif
    445 	return 0;
    446 }
    447 
    448 static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
    449 {
    450 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    451 	sljit_uw *ptr = (sljit_uw *)jump_ptr;
    452 	sljit_uw *inst = (sljit_uw *)ptr[0];
    453 	sljit_uw mov_pc = ptr[1];
    454 	sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
    455 	sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
    456 
    457 	if (diff <= 0x7fffff && diff >= -0x800000) {
    458 		/* Turn to branch. */
    459 		if (!bl) {
    460 			inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
    461 			if (flush_cache) {
    462 				inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
    463 				SLJIT_CACHE_FLUSH(inst, inst + 1);
    464 			}
    465 		} else {
    466 			inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
    467 			inst[1] = NOP;
    468 			if (flush_cache) {
    469 				inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
    470 				SLJIT_CACHE_FLUSH(inst, inst + 2);
    471 			}
    472 		}
    473 	} else {
    474 		/* Get the position of the constant. */
    475 		if (mov_pc & (1 << 23))
    476 			ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
    477 		else
    478 			ptr = inst + 1;
    479 
    480 		if (*inst != mov_pc) {
    481 			inst[0] = mov_pc;
    482 			if (!bl) {
    483 				if (flush_cache) {
    484 					inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
    485 					SLJIT_CACHE_FLUSH(inst, inst + 1);
    486 				}
    487 			} else {
    488 				inst[1] = BLX | RM(TMP_REG1);
    489 				if (flush_cache) {
    490 					inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
    491 					SLJIT_CACHE_FLUSH(inst, inst + 2);
    492 				}
    493 			}
    494 		}
    495 		*ptr = new_addr;
    496 	}
    497 #else
    498 	sljit_uw *inst = (sljit_uw*)jump_ptr;
    499 	SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
    500 	inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
    501 	inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
    502 	if (flush_cache) {
    503 		inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
    504 		SLJIT_CACHE_FLUSH(inst, inst + 2);
    505 	}
    506 #endif
    507 }
    508 
    509 static sljit_uw get_imm(sljit_uw imm);
    510 
    511 static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache)
    512 {
    513 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    514 	sljit_uw *ptr = (sljit_uw*)addr;
    515 	sljit_uw *inst = (sljit_uw*)ptr[0];
    516 	sljit_uw ldr_literal = ptr[1];
    517 	sljit_uw src2;
    518 
    519 	src2 = get_imm(new_constant);
    520 	if (src2) {
    521 		*inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
    522 		if (flush_cache) {
    523 			inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
    524 			SLJIT_CACHE_FLUSH(inst, inst + 1);
    525 		}
    526 		return;
    527 	}
    528 
    529 	src2 = get_imm(~new_constant);
    530 	if (src2) {
    531 		*inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
    532 		if (flush_cache) {
    533 			inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
    534 			SLJIT_CACHE_FLUSH(inst, inst + 1);
    535 		}
    536 		return;
    537 	}
    538 
    539 	if (ldr_literal & (1 << 23))
    540 		ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
    541 	else
    542 		ptr = inst + 1;
    543 
    544 	if (*inst != ldr_literal) {
    545 		*inst = ldr_literal;
    546 		if (flush_cache) {
    547 			inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
    548 			SLJIT_CACHE_FLUSH(inst, inst + 1);
    549 		}
    550 	}
    551 	*ptr = new_constant;
    552 #else
    553 	sljit_uw *inst = (sljit_uw*)addr;
    554 	SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
    555 	inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
    556 	inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
    557 	if (flush_cache) {
    558 		inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
    559 		SLJIT_CACHE_FLUSH(inst, inst + 2);
    560 	}
    561 #endif
    562 }
    563 
    564 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
    565 {
    566 	struct sljit_memory_fragment *buf;
    567 	sljit_uw *code;
    568 	sljit_uw *code_ptr;
    569 	sljit_uw *buf_ptr;
    570 	sljit_uw *buf_end;
    571 	sljit_uw size;
    572 	sljit_uw word_count;
    573 	sljit_sw executable_offset;
    574 	sljit_sw jump_addr;
    575 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    576 	sljit_uw cpool_size;
    577 	sljit_uw cpool_skip_alignment;
    578 	sljit_uw cpool_current_index;
    579 	sljit_uw *cpool_start_address;
    580 	sljit_uw *last_pc_patch;
    581 	struct future_patch *first_patch;
    582 #endif
    583 
    584 	struct sljit_label *label;
    585 	struct sljit_jump *jump;
    586 	struct sljit_const *const_;
    587 
    588 	CHECK_ERROR_PTR();
    589 	CHECK_PTR(check_sljit_generate_code(compiler));
    590 	reverse_buf(compiler);
    591 
    592 	/* Second code generation pass. */
    593 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    594 	size = compiler->size + (compiler->patches << 1);
    595 	if (compiler->cpool_fill > 0)
    596 		size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
    597 #else
    598 	size = compiler->size;
    599 #endif
    600 	code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw));
    601 	PTR_FAIL_WITH_EXEC_IF(code);
    602 	buf = compiler->buf;
    603 
    604 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    605 	cpool_size = 0;
    606 	cpool_skip_alignment = 0;
    607 	cpool_current_index = 0;
    608 	cpool_start_address = NULL;
    609 	first_patch = NULL;
    610 	last_pc_patch = code;
    611 #endif
    612 
    613 	code_ptr = code;
    614 	word_count = 0;
    615 	executable_offset = SLJIT_EXEC_OFFSET(code);
    616 
    617 	label = compiler->labels;
    618 	jump = compiler->jumps;
    619 	const_ = compiler->consts;
    620 
    621 	if (label && label->size == 0) {
    622 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
    623 		label = label->next;
    624 	}
    625 
    626 	do {
    627 		buf_ptr = (sljit_uw*)buf->memory;
    628 		buf_end = buf_ptr + (buf->used_size >> 2);
    629 		do {
    630 			word_count++;
    631 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    632 			if (cpool_size > 0) {
    633 				if (cpool_skip_alignment > 0) {
    634 					buf_ptr++;
    635 					cpool_skip_alignment--;
    636 				}
    637 				else {
    638 					if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
    639 						SLJIT_FREE_EXEC(code);
    640 						compiler->error = SLJIT_ERR_ALLOC_FAILED;
    641 						return NULL;
    642 					}
    643 					buf_ptr++;
    644 					if (++cpool_current_index >= cpool_size) {
    645 						SLJIT_ASSERT(!first_patch);
    646 						cpool_size = 0;
    647 						if (label && label->size == word_count) {
    648 							/* Points after the current instruction. */
    649 							label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    650 							label->size = code_ptr - code;
    651 							label = label->next;
    652 						}
    653 					}
    654 				}
    655 			}
    656 			else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
    657 #endif
    658 				*code_ptr = *buf_ptr++;
    659 				/* These structures are ordered by their address. */
    660 				SLJIT_ASSERT(!label || label->size >= word_count);
    661 				SLJIT_ASSERT(!jump || jump->addr >= word_count);
    662 				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
    663 				if (jump && jump->addr == word_count) {
    664 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    665 					if (detect_jump_type(jump, code_ptr, code, executable_offset))
    666 						code_ptr--;
    667 					jump->addr = (sljit_uw)code_ptr;
    668 #else
    669 					jump->addr = (sljit_uw)(code_ptr - 2);
    670 					if (detect_jump_type(jump, code_ptr, code, executable_offset))
    671 						code_ptr -= 2;
    672 #endif
    673 					jump = jump->next;
    674 				}
    675 				if (label && label->size == word_count) {
    676 					/* code_ptr can be affected above. */
    677 					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
    678 					label->size = (code_ptr + 1) - code;
    679 					label = label->next;
    680 				}
    681 				if (const_ && const_->addr == word_count) {
    682 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    683 					const_->addr = (sljit_uw)code_ptr;
    684 #else
    685 					const_->addr = (sljit_uw)(code_ptr - 1);
    686 #endif
    687 					const_ = const_->next;
    688 				}
    689 				code_ptr++;
    690 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    691 			}
    692 			else {
    693 				/* Fortunately, no need to shift. */
    694 				cpool_size = *buf_ptr++ & ~PUSH_POOL;
    695 				SLJIT_ASSERT(cpool_size > 0);
    696 				cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
    697 				cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
    698 				if (cpool_current_index > 0) {
    699 					/* Unconditional branch. */
    700 					*code_ptr = B | (((cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
    701 					code_ptr = cpool_start_address + cpool_current_index;
    702 				}
    703 				cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
    704 				cpool_current_index = 0;
    705 				last_pc_patch = code_ptr;
    706 			}
    707 #endif
    708 		} while (buf_ptr < buf_end);
    709 		buf = buf->next;
    710 	} while (buf);
    711 
    712 	SLJIT_ASSERT(!label);
    713 	SLJIT_ASSERT(!jump);
    714 	SLJIT_ASSERT(!const_);
    715 
    716 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    717 	SLJIT_ASSERT(cpool_size == 0);
    718 	if (compiler->cpool_fill > 0) {
    719 		cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
    720 		cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
    721 		if (cpool_current_index > 0)
    722 			code_ptr = cpool_start_address + cpool_current_index;
    723 
    724 		buf_ptr = compiler->cpool;
    725 		buf_end = buf_ptr + compiler->cpool_fill;
    726 		cpool_current_index = 0;
    727 		while (buf_ptr < buf_end) {
    728 			if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
    729 				SLJIT_FREE_EXEC(code);
    730 				compiler->error = SLJIT_ERR_ALLOC_FAILED;
    731 				return NULL;
    732 			}
    733 			buf_ptr++;
    734 			cpool_current_index++;
    735 		}
    736 		SLJIT_ASSERT(!first_patch);
    737 	}
    738 #endif
    739 
    740 	jump = compiler->jumps;
    741 	while (jump) {
    742 		buf_ptr = (sljit_uw *)jump->addr;
    743 
    744 		if (jump->flags & PATCH_B) {
    745 			jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
    746 			if (!(jump->flags & JUMP_ADDR)) {
    747 				SLJIT_ASSERT(jump->flags & JUMP_LABEL);
    748 				SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000);
    749 				*buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff;
    750 			}
    751 			else {
    752 				SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000);
    753 				*buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff;
    754 			}
    755 		}
    756 		else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
    757 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    758 			jump->addr = (sljit_uw)code_ptr;
    759 			code_ptr[0] = (sljit_uw)buf_ptr;
    760 			code_ptr[1] = *buf_ptr;
    761 			inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
    762 			code_ptr += 2;
    763 #else
    764 			inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
    765 #endif
    766 		}
    767 		else {
    768 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    769 			if (jump->flags & IS_BL)
    770 				buf_ptr--;
    771 			if (*buf_ptr & (1 << 23))
    772 				buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
    773 			else
    774 				buf_ptr += 1;
    775 			*buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
    776 #else
    777 			inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
    778 #endif
    779 		}
    780 		jump = jump->next;
    781 	}
    782 
    783 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
    784 	const_ = compiler->consts;
    785 	while (const_) {
    786 		buf_ptr = (sljit_uw*)const_->addr;
    787 		const_->addr = (sljit_uw)code_ptr;
    788 
    789 		code_ptr[0] = (sljit_uw)buf_ptr;
    790 		code_ptr[1] = *buf_ptr;
    791 		if (*buf_ptr & (1 << 23))
    792 			buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
    793 		else
    794 			buf_ptr += 1;
    795 		/* Set the value again (can be a simple constant). */
    796 		inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
    797 		code_ptr += 2;
    798 
    799 		const_ = const_->next;
    800 	}
    801 #endif
    802 
    803 	SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
    804 
    805 	compiler->error = SLJIT_ERR_COMPILED;
    806 	compiler->executable_offset = executable_offset;
    807 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw);
    808 
    809 	code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
    810 	code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
    811 
    812 	SLJIT_CACHE_FLUSH(code, code_ptr);
    813 	return code;
    814 }
    815 
    816 /* --------------------------------------------------------------------- */
    817 /*  Entry, exit                                                          */
    818 /* --------------------------------------------------------------------- */
    819 
    820 /* Creates an index in data_transfer_insts array. */
    821 #define WORD_DATA	0x00
    822 #define BYTE_DATA	0x01
    823 #define HALF_DATA	0x02
    824 #define SIGNED_DATA	0x04
    825 #define LOAD_DATA	0x08
    826 
    827 /* emit_op inp_flags.
    828    WRITE_BACK must be the first, since it is a flag. */
    829 #define WRITE_BACK	0x10
    830 #define ALLOW_IMM	0x20
    831 #define ALLOW_INV_IMM	0x40
    832 #define ALLOW_ANY_IMM	(ALLOW_IMM | ALLOW_INV_IMM)
    833 
    834 /* s/l - store/load (1 bit)
    835    u/s - signed/unsigned (1 bit)
    836    w/b/h/N - word/byte/half/NOT allowed (2 bit)
    837    Storing signed and unsigned values are the same operations. */
    838 
    839 static const sljit_uw data_transfer_insts[16] = {
    840 /* s u w */ 0xe5000000 /* str */,
    841 /* s u b */ 0xe5400000 /* strb */,
    842 /* s u h */ 0xe10000b0 /* strh */,
    843 /* s u N */ 0x00000000 /* not allowed */,
    844 /* s s w */ 0xe5000000 /* str */,
    845 /* s s b */ 0xe5400000 /* strb */,
    846 /* s s h */ 0xe10000b0 /* strh */,
    847 /* s s N */ 0x00000000 /* not allowed */,
    848 
    849 /* l u w */ 0xe5100000 /* ldr */,
    850 /* l u b */ 0xe5500000 /* ldrb */,
    851 /* l u h */ 0xe11000b0 /* ldrh */,
    852 /* l u N */ 0x00000000 /* not allowed */,
    853 /* l s w */ 0xe5100000 /* ldr */,
    854 /* l s b */ 0xe11000d0 /* ldrsb */,
    855 /* l s h */ 0xe11000f0 /* ldrsh */,
    856 /* l s N */ 0x00000000 /* not allowed */,
    857 };
    858 
    859 #define EMIT_DATA_TRANSFER(type, add, wb, target_reg, base_reg, arg) \
    860 	(data_transfer_insts[(type) & 0xf] | ((add) << 23) | ((wb) << (21 - 4)) | (reg_map[target_reg] << 12) | (reg_map[base_reg] << 16) | (arg))
    861 
    862 /* Normal ldr/str instruction.
    863    Type2: ldrsb, ldrh, ldrsh */
    864 #define IS_TYPE1_TRANSFER(type) \
    865 	(data_transfer_insts[(type) & 0xf] & 0x04000000)
    866 #define TYPE2_TRANSFER_IMM(imm) \
    867 	(((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
    868 
    869 /* Condition: AL. */
    870 #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \
    871 	(0xe0000000 | ((opcode) << 21) | (set_flags) | RD(dst) | RN(src1) | (src2))
    872 
    873 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
    874 	sljit_s32 dst, sljit_sw dstw,
    875 	sljit_s32 src1, sljit_sw src1w,
    876 	sljit_s32 src2, sljit_sw src2w);
    877 
    878 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
    879 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    880 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    881 {
    882 	sljit_s32 size, i, tmp;
    883 	sljit_uw push;
    884 
    885 	CHECK_ERROR();
    886 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    887 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    888 
    889 	/* Push saved registers, temporary registers
    890 	   stmdb sp!, {..., lr} */
    891 	push = PUSH | (1 << 14);
    892 
    893 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
    894 	for (i = SLJIT_S0; i >= tmp; i--)
    895 		push |= 1 << reg_map[i];
    896 
    897 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
    898 		push |= 1 << reg_map[i];
    899 
    900 	FAIL_IF(push_inst(compiler, push));
    901 
    902 	/* Stack must be aligned to 8 bytes: */
    903 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
    904 	local_size = ((size + local_size + 7) & ~7) - size;
    905 	compiler->local_size = local_size;
    906 	if (local_size > 0)
    907 		FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
    908 
    909 	if (args >= 1)
    910 		FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S0, SLJIT_UNUSED, RM(SLJIT_R0))));
    911 	if (args >= 2)
    912 		FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S1, SLJIT_UNUSED, RM(SLJIT_R1))));
    913 	if (args >= 3)
    914 		FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, SLJIT_S2, SLJIT_UNUSED, RM(SLJIT_R2))));
    915 
    916 	return SLJIT_SUCCESS;
    917 }
    918 
    919 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
    920 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
    921 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
    922 {
    923 	sljit_s32 size;
    924 
    925 	CHECK_ERROR();
    926 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
    927 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
    928 
    929 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
    930 	compiler->local_size = ((size + local_size + 7) & ~7) - size;
    931 	return SLJIT_SUCCESS;
    932 }
    933 
    934 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
    935 {
    936 	sljit_s32 i, tmp;
    937 	sljit_uw pop;
    938 
    939 	CHECK_ERROR();
    940 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
    941 
    942 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
    943 
    944 	if (compiler->local_size > 0)
    945 		FAIL_IF(emit_op(compiler, SLJIT_ADD, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size));
    946 
    947 	/* Push saved registers, temporary registers
    948 	   ldmia sp!, {..., pc} */
    949 	pop = POP | (1 << 15);
    950 
    951 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
    952 	for (i = SLJIT_S0; i >= tmp; i--)
    953 		pop |= 1 << reg_map[i];
    954 
    955 	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
    956 		pop |= 1 << reg_map[i];
    957 
    958 	return push_inst(compiler, pop);
    959 }
    960 
    961 /* --------------------------------------------------------------------- */
    962 /*  Operators                                                            */
    963 /* --------------------------------------------------------------------- */
    964 
    965 /* flags: */
    966   /* Arguments are swapped. */
    967 #define ARGS_SWAPPED	0x01
    968   /* Inverted immediate. */
    969 #define INV_IMM		0x02
    970   /* Source and destination is register. */
    971 #define MOVE_REG_CONV	0x04
    972 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
    973 #define SET_FLAGS	(1 << 20)
    974 /* dst: reg
    975    src1: reg
    976    src2: reg or imm (if allowed)
    977    SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
    978 #define SRC2_IMM	(1 << 25)
    979 
    980 #define EMIT_SHIFT_INS_AND_RETURN(opcode) \
    981 	SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \
    982 	if (compiler->shift_imm != 0x20) { \
    983 		SLJIT_ASSERT(src1 == TMP_REG1); \
    984 		SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \
    985 		\
    986 		if (compiler->shift_imm != 0) \
    987 			return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \
    988 				dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | RM(src2))); \
    989 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); \
    990 	} \
    991 	return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \
    992 		dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1)));
    993 
    994 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
    995 	sljit_s32 dst, sljit_s32 src1, sljit_s32 src2)
    996 {
    997 	switch (GET_OPCODE(op)) {
    998 	case SLJIT_MOV:
    999 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
   1000 		if (dst != src2) {
   1001 			if (src2 & SRC2_IMM) {
   1002 				return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
   1003 					dst, SLJIT_UNUSED, src2));
   1004 			}
   1005 			return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(src2)));
   1006 		}
   1007 		return SLJIT_SUCCESS;
   1008 
   1009 	case SLJIT_MOV_U8:
   1010 	case SLJIT_MOV_S8:
   1011 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
   1012 		if (flags & MOVE_REG_CONV) {
   1013 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   1014 			if (op == SLJIT_MOV_U8)
   1015 				return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff));
   1016 			FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | RM(src2))));
   1017 			return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst)));
   1018 #else
   1019 			return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
   1020 #endif
   1021 		}
   1022 		else if (dst != src2) {
   1023 			SLJIT_ASSERT(src2 & SRC2_IMM);
   1024 			return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
   1025 				dst, SLJIT_UNUSED, src2));
   1026 		}
   1027 		return SLJIT_SUCCESS;
   1028 
   1029 	case SLJIT_MOV_U16:
   1030 	case SLJIT_MOV_S16:
   1031 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
   1032 		if (flags & MOVE_REG_CONV) {
   1033 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   1034 			FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | RM(src2))));
   1035 			return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst)));
   1036 #else
   1037 			return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
   1038 #endif
   1039 		}
   1040 		else if (dst != src2) {
   1041 			SLJIT_ASSERT(src2 & SRC2_IMM);
   1042 			return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0,
   1043 				dst, SLJIT_UNUSED, src2));
   1044 		}
   1045 		return SLJIT_SUCCESS;
   1046 
   1047 	case SLJIT_NOT:
   1048 		if (src2 & SRC2_IMM) {
   1049 			return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MOV_DP : MVN_DP, flags & SET_FLAGS,
   1050 				dst, SLJIT_UNUSED, src2));
   1051 		}
   1052 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2)));
   1053 
   1054 	case SLJIT_CLZ:
   1055 		SLJIT_ASSERT(!(flags & INV_IMM));
   1056 		SLJIT_ASSERT(!(src2 & SRC2_IMM));
   1057 		FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
   1058 		if (flags & SET_FLAGS)
   1059 			return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, flags & SET_FLAGS, SLJIT_UNUSED, dst, SRC2_IMM));
   1060 		return SLJIT_SUCCESS;
   1061 
   1062 	case SLJIT_ADD:
   1063 		SLJIT_ASSERT(!(flags & INV_IMM));
   1064 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, flags & SET_FLAGS,
   1065 			dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
   1066 
   1067 	case SLJIT_ADDC:
   1068 		SLJIT_ASSERT(!(flags & INV_IMM));
   1069 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADC_DP, flags & SET_FLAGS,
   1070 			dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
   1071 
   1072 	case SLJIT_SUB:
   1073 		SLJIT_ASSERT(!(flags & INV_IMM));
   1074 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SUB_DP : RSB_DP, flags & SET_FLAGS,
   1075 			dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
   1076 
   1077 	case SLJIT_SUBC:
   1078 		SLJIT_ASSERT(!(flags & INV_IMM));
   1079 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SBC_DP : RSC_DP, flags & SET_FLAGS,
   1080 			dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
   1081 
   1082 	case SLJIT_MUL:
   1083 		SLJIT_ASSERT(!(flags & INV_IMM));
   1084 		SLJIT_ASSERT(!(src2 & SRC2_IMM));
   1085 
   1086 		if (!HAS_FLAGS(op))
   1087 			return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]);
   1088 
   1089 		FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1]));
   1090 
   1091 		/* cmp TMP_REG1, dst asr #31. */
   1092 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG1, RM(dst) | 0xfc0));
   1093 
   1094 	case SLJIT_AND:
   1095 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & INV_IMM) ? AND_DP : BIC_DP, flags & SET_FLAGS,
   1096 			dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
   1097 
   1098 	case SLJIT_OR:
   1099 		SLJIT_ASSERT(!(flags & INV_IMM));
   1100 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(ORR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
   1101 
   1102 	case SLJIT_XOR:
   1103 		SLJIT_ASSERT(!(flags & INV_IMM));
   1104 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(EOR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2)));
   1105 
   1106 	case SLJIT_SHL:
   1107 		EMIT_SHIFT_INS_AND_RETURN(0);
   1108 
   1109 	case SLJIT_LSHR:
   1110 		EMIT_SHIFT_INS_AND_RETURN(1);
   1111 
   1112 	case SLJIT_ASHR:
   1113 		EMIT_SHIFT_INS_AND_RETURN(2);
   1114 	}
   1115 
   1116 	SLJIT_UNREACHABLE();
   1117 	return SLJIT_SUCCESS;
   1118 }
   1119 
   1120 #undef EMIT_SHIFT_INS_AND_RETURN
   1121 
   1122 /* Tests whether the immediate can be stored in the 12 bit imm field.
   1123    Returns with 0 if not possible. */
   1124 static sljit_uw get_imm(sljit_uw imm)
   1125 {
   1126 	sljit_s32 rol;
   1127 
   1128 	if (imm <= 0xff)
   1129 		return SRC2_IMM | imm;
   1130 
   1131 	if (!(imm & 0xff000000)) {
   1132 		imm <<= 8;
   1133 		rol = 8;
   1134 	}
   1135 	else {
   1136 		imm = (imm << 24) | (imm >> 8);
   1137 		rol = 0;
   1138 	}
   1139 
   1140 	if (!(imm & 0xff000000)) {
   1141 		imm <<= 8;
   1142 		rol += 4;
   1143 	}
   1144 
   1145 	if (!(imm & 0xf0000000)) {
   1146 		imm <<= 4;
   1147 		rol += 2;
   1148 	}
   1149 
   1150 	if (!(imm & 0xc0000000)) {
   1151 		imm <<= 2;
   1152 		rol += 1;
   1153 	}
   1154 
   1155 	if (!(imm & 0x00ffffff))
   1156 		return SRC2_IMM | (imm >> 24) | (rol << 8);
   1157 	else
   1158 		return 0;
   1159 }
   1160 
   1161 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   1162 static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive)
   1163 {
   1164 	sljit_uw mask;
   1165 	sljit_uw imm1;
   1166 	sljit_uw imm2;
   1167 	sljit_s32 rol;
   1168 
   1169 	/* Step1: Search a zero byte (8 continous zero bit). */
   1170 	mask = 0xff000000;
   1171 	rol = 8;
   1172 	while(1) {
   1173 		if (!(imm & mask)) {
   1174 			/* Rol imm by rol. */
   1175 			imm = (imm << rol) | (imm >> (32 - rol));
   1176 			/* Calculate arm rol. */
   1177 			rol = 4 + (rol >> 1);
   1178 			break;
   1179 		}
   1180 		rol += 2;
   1181 		mask >>= 2;
   1182 		if (mask & 0x3) {
   1183 			/* rol by 8. */
   1184 			imm = (imm << 8) | (imm >> 24);
   1185 			mask = 0xff00;
   1186 			rol = 24;
   1187 			while (1) {
   1188 				if (!(imm & mask)) {
   1189 					/* Rol imm by rol. */
   1190 					imm = (imm << rol) | (imm >> (32 - rol));
   1191 					/* Calculate arm rol. */
   1192 					rol = (rol >> 1) - 8;
   1193 					break;
   1194 				}
   1195 				rol += 2;
   1196 				mask >>= 2;
   1197 				if (mask & 0x3)
   1198 					return 0;
   1199 			}
   1200 			break;
   1201 		}
   1202 	}
   1203 
   1204 	/* The low 8 bit must be zero. */
   1205 	SLJIT_ASSERT(!(imm & 0xff));
   1206 
   1207 	if (!(imm & 0xff000000)) {
   1208 		imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
   1209 		imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
   1210 	}
   1211 	else if (imm & 0xc0000000) {
   1212 		imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
   1213 		imm <<= 8;
   1214 		rol += 4;
   1215 
   1216 		if (!(imm & 0xff000000)) {
   1217 			imm <<= 8;
   1218 			rol += 4;
   1219 		}
   1220 
   1221 		if (!(imm & 0xf0000000)) {
   1222 			imm <<= 4;
   1223 			rol += 2;
   1224 		}
   1225 
   1226 		if (!(imm & 0xc0000000)) {
   1227 			imm <<= 2;
   1228 			rol += 1;
   1229 		}
   1230 
   1231 		if (!(imm & 0x00ffffff))
   1232 			imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
   1233 		else
   1234 			return 0;
   1235 	}
   1236 	else {
   1237 		if (!(imm & 0xf0000000)) {
   1238 			imm <<= 4;
   1239 			rol += 2;
   1240 		}
   1241 
   1242 		if (!(imm & 0xc0000000)) {
   1243 			imm <<= 2;
   1244 			rol += 1;
   1245 		}
   1246 
   1247 		imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
   1248 		imm <<= 8;
   1249 		rol += 4;
   1250 
   1251 		if (!(imm & 0xf0000000)) {
   1252 			imm <<= 4;
   1253 			rol += 2;
   1254 		}
   1255 
   1256 		if (!(imm & 0xc0000000)) {
   1257 			imm <<= 2;
   1258 			rol += 1;
   1259 		}
   1260 
   1261 		if (!(imm & 0x00ffffff))
   1262 			imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
   1263 		else
   1264 			return 0;
   1265 	}
   1266 
   1267 	FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? MOV_DP : MVN_DP, 0, reg, SLJIT_UNUSED, imm1)));
   1268 	FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(positive ? ORR_DP : BIC_DP, 0, reg, reg, imm2)));
   1269 	return 1;
   1270 }
   1271 #endif
   1272 
   1273 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
   1274 {
   1275 	sljit_uw tmp;
   1276 
   1277 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
   1278 	if (!(imm & ~0xffff))
   1279 		return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
   1280 #endif
   1281 
   1282 	/* Create imm by 1 inst. */
   1283 	tmp = get_imm(imm);
   1284 	if (tmp)
   1285 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, tmp));
   1286 
   1287 	tmp = get_imm(~imm);
   1288 	if (tmp)
   1289 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, tmp));
   1290 
   1291 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   1292 	/* Create imm by 2 inst. */
   1293 	FAIL_IF(generate_int(compiler, reg, imm, 1));
   1294 	FAIL_IF(generate_int(compiler, reg, ~imm, 0));
   1295 
   1296 	/* Load integer. */
   1297 	return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm);
   1298 #else
   1299 	return emit_imm(compiler, reg, imm);
   1300 #endif
   1301 }
   1302 
   1303 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
   1304 	sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
   1305 {
   1306 	sljit_uw offset_reg, imm;
   1307 	sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags);
   1308 
   1309 	SLJIT_ASSERT (arg & SLJIT_MEM);
   1310 	SLJIT_ASSERT((arg & REG_MASK) != tmp_reg);
   1311 
   1312 	SLJIT_COMPILE_ASSERT(WRITE_BACK == 0x10, optimized_for_emit_data_transfer);
   1313 
   1314 	if ((arg & REG_MASK) == SLJIT_UNUSED) {
   1315 		/* Write back is not used. */
   1316 		FAIL_IF(load_immediate(compiler, tmp_reg, argw));
   1317 		return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, is_type1_transfer ? 0 : TYPE2_TRANSFER_IMM(0)));
   1318 	}
   1319 
   1320 	if (arg & OFFS_REG_MASK) {
   1321 		offset_reg = OFFS_REG(arg);
   1322 		arg &= REG_MASK;
   1323 		argw &= 0x3;
   1324 
   1325 		if (argw != 0 && !is_type1_transfer) {
   1326 			SLJIT_ASSERT(!(flags & WRITE_BACK));
   1327 
   1328 			FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_reg, arg, RM(offset_reg) | (argw << 7))));
   1329 			return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
   1330 		}
   1331 
   1332 		/* Bit 25: RM is offset. */
   1333 		return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg,
   1334 			RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7)));
   1335 	}
   1336 
   1337 	arg &= REG_MASK;
   1338 
   1339 	if (is_type1_transfer) {
   1340 		if (argw > 0xfff) {
   1341 			imm = get_imm(argw & ~0xfff);
   1342 			if (imm) {
   1343 				offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
   1344 				FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm)));
   1345 				argw = argw & 0xfff;
   1346 				arg = offset_reg;
   1347 			}
   1348 		}
   1349 		else if (argw < -0xfff) {
   1350 			imm = get_imm(-argw & ~0xfff);
   1351 			if (imm) {
   1352 				offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
   1353 				FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm)));
   1354 				argw = -(-argw & 0xfff);
   1355 				arg = offset_reg;
   1356 			}
   1357 		}
   1358 
   1359 		if (argw >= 0 && argw <= 0xfff) {
   1360 			return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg & REG_MASK, argw));
   1361 		}
   1362 		if (argw < 0 && argw >= -0xfff) {
   1363 			return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg & REG_MASK, -argw));
   1364 		}
   1365 	}
   1366 	else {
   1367 		if (argw > 0xff) {
   1368 			imm = get_imm(argw & ~0xff);
   1369 			if (imm) {
   1370 				offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
   1371 				FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm)));
   1372 				argw = argw & 0xff;
   1373 				arg = offset_reg;
   1374 			}
   1375 		}
   1376 		else if (argw < -0xff) {
   1377 			imm = get_imm(-argw & ~0xff);
   1378 			if (imm) {
   1379 				offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg;
   1380 				FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm)));
   1381 				argw = -(-argw & 0xff);
   1382 				arg = offset_reg;
   1383 			}
   1384 		}
   1385 
   1386 		if (argw >= 0 && argw <= 0xff) {
   1387 			return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw)));
   1388 		}
   1389 		if (argw < 0 && argw >= -0xff) {
   1390 			argw = -argw;
   1391 			return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw)));
   1392 		}
   1393 	}
   1394 
   1395 	FAIL_IF(load_immediate(compiler, tmp_reg, argw));
   1396 	return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg,
   1397 		RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0)));
   1398 }
   1399 
   1400 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
   1401 	sljit_s32 dst, sljit_sw dstw,
   1402 	sljit_s32 src1, sljit_sw src1w,
   1403 	sljit_s32 src2, sljit_sw src2w)
   1404 {
   1405 	/* src1 is reg or TMP_REG1
   1406 	   src2 is reg, TMP_REG2, or imm
   1407 	   result goes to TMP_REG2, so put result can use TMP_REG1. */
   1408 
   1409 	/* We prefers register and simple consts. */
   1410 	sljit_s32 dst_reg;
   1411 	sljit_s32 src1_reg;
   1412 	sljit_s32 src2_reg;
   1413 	sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
   1414 
   1415 	/* Destination check. */
   1416 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
   1417 		if (op <= SLJIT_MOVU_P && !(src2 & SLJIT_MEM))
   1418 			return SLJIT_SUCCESS;
   1419 	}
   1420 
   1421 	SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
   1422 
   1423 	src2_reg = 0;
   1424 
   1425 	do {
   1426 		if (!(inp_flags & ALLOW_IMM))
   1427 			break;
   1428 
   1429 		if (src2 & SLJIT_IMM) {
   1430 			src2_reg = get_imm(src2w);
   1431 			if (src2_reg)
   1432 				break;
   1433 			if (inp_flags & ALLOW_INV_IMM) {
   1434 				src2_reg = get_imm(~src2w);
   1435 				if (src2_reg) {
   1436 					flags |= INV_IMM;
   1437 					break;
   1438 				}
   1439 			}
   1440 			if (GET_OPCODE(op) == SLJIT_ADD) {
   1441 				src2_reg = get_imm(-src2w);
   1442 				if (src2_reg) {
   1443 					op = SLJIT_SUB | GET_ALL_FLAGS(op);
   1444 					break;
   1445 				}
   1446 			}
   1447 			if (GET_OPCODE(op) == SLJIT_SUB) {
   1448 				src2_reg = get_imm(-src2w);
   1449 				if (src2_reg) {
   1450 					op = SLJIT_ADD | GET_ALL_FLAGS(op);
   1451 					break;
   1452 				}
   1453 			}
   1454 		}
   1455 
   1456 		if (src1 & SLJIT_IMM) {
   1457 			src2_reg = get_imm(src1w);
   1458 			if (src2_reg) {
   1459 				flags |= ARGS_SWAPPED;
   1460 				src1 = src2;
   1461 				src1w = src2w;
   1462 				break;
   1463 			}
   1464 			if (inp_flags & ALLOW_INV_IMM) {
   1465 				src2_reg = get_imm(~src1w);
   1466 				if (src2_reg) {
   1467 					flags |= ARGS_SWAPPED | INV_IMM;
   1468 					src1 = src2;
   1469 					src1w = src2w;
   1470 					break;
   1471 				}
   1472 			}
   1473 			if (GET_OPCODE(op) == SLJIT_ADD) {
   1474 				src2_reg = get_imm(-src1w);
   1475 				if (src2_reg) {
   1476 					/* Note: add is commutative operation. */
   1477 					src1 = src2;
   1478 					src1w = src2w;
   1479 					op = SLJIT_SUB | GET_ALL_FLAGS(op);
   1480 					break;
   1481 				}
   1482 			}
   1483 		}
   1484 	} while(0);
   1485 
   1486 	/* Source 1. */
   1487 	if (FAST_IS_REG(src1))
   1488 		src1_reg = src1;
   1489 	else if (src1 & SLJIT_MEM) {
   1490 		FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
   1491 		src1_reg = TMP_REG1;
   1492 	}
   1493 	else {
   1494 		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
   1495 		src1_reg = TMP_REG1;
   1496 	}
   1497 
   1498 	/* Destination. */
   1499 	dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
   1500 
   1501 	if (op <= SLJIT_MOVU_P) {
   1502 		if (dst & SLJIT_MEM) {
   1503 			if (inp_flags & BYTE_DATA)
   1504 				inp_flags &= ~SIGNED_DATA;
   1505 
   1506 			if (FAST_IS_REG(src2))
   1507 				return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2);
   1508 		}
   1509 
   1510 		if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
   1511 			flags |= MOVE_REG_CONV;
   1512 	}
   1513 
   1514 	/* Source 2. */
   1515 	if (src2_reg == 0) {
   1516 		src2_reg = (op <= SLJIT_MOVU_P) ? dst_reg : TMP_REG2;
   1517 
   1518 		if (FAST_IS_REG(src2))
   1519 			src2_reg = src2;
   1520 		else if (src2 & SLJIT_MEM)
   1521 			FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2));
   1522 		else
   1523 			FAIL_IF(load_immediate(compiler, src2_reg, src2w));
   1524 	}
   1525 
   1526 	FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg));
   1527 
   1528 	if (!(dst & SLJIT_MEM))
   1529 		return SLJIT_SUCCESS;
   1530 
   1531 	return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
   1532 }
   1533 
   1534 #ifdef __cplusplus
   1535 extern "C" {
   1536 #endif
   1537 
   1538 #if defined(__GNUC__)
   1539 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
   1540 extern int __aeabi_idivmod(int numerator, int denominator);
   1541 #else
   1542 #error "Software divmod functions are needed"
   1543 #endif
   1544 
   1545 #ifdef __cplusplus
   1546 }
   1547 #endif
   1548 
   1549 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
   1550 {
   1551 	sljit_sw saved_reg_list[3];
   1552 	sljit_sw saved_reg_count;
   1553 
   1554 	CHECK_ERROR();
   1555 	CHECK(check_sljit_emit_op0(compiler, op));
   1556 
   1557 	op = GET_OPCODE(op);
   1558 	switch (op) {
   1559 	case SLJIT_BREAKPOINT:
   1560 		FAIL_IF(push_inst(compiler, BKPT));
   1561 		break;
   1562 	case SLJIT_NOP:
   1563 		FAIL_IF(push_inst(compiler, NOP));
   1564 		break;
   1565 	case SLJIT_LMUL_UW:
   1566 	case SLJIT_LMUL_SW:
   1567 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
   1568 			| (reg_map[SLJIT_R1] << 16)
   1569 			| (reg_map[SLJIT_R0] << 12)
   1570 			| (reg_map[SLJIT_R0] << 8)
   1571 			| reg_map[SLJIT_R1]);
   1572 	case SLJIT_DIVMOD_UW:
   1573 	case SLJIT_DIVMOD_SW:
   1574 	case SLJIT_DIV_UW:
   1575 	case SLJIT_DIV_SW:
   1576 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
   1577 		SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
   1578 
   1579 		saved_reg_count = 0;
   1580 		if (compiler->scratches >= 4)
   1581 			saved_reg_list[saved_reg_count++] = 3;
   1582 		if (compiler->scratches >= 3)
   1583 			saved_reg_list[saved_reg_count++] = 2;
   1584 		if (op >= SLJIT_DIV_UW)
   1585 			saved_reg_list[saved_reg_count++] = 1;
   1586 
   1587 		if (saved_reg_count > 0) {
   1588 			FAIL_IF(push_inst(compiler, 0xe52d0000 | (saved_reg_count >= 3 ? 16 : 8)
   1589 						| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
   1590 			if (saved_reg_count >= 2) {
   1591 				SLJIT_ASSERT(saved_reg_list[1] < 8);
   1592 				FAIL_IF(push_inst(compiler, 0xe58d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
   1593 			}
   1594 			if (saved_reg_count >= 3) {
   1595 				SLJIT_ASSERT(saved_reg_list[2] < 8);
   1596 				FAIL_IF(push_inst(compiler, 0xe58d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
   1597 			}
   1598 		}
   1599 
   1600 #if defined(__GNUC__)
   1601 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
   1602 			((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
   1603 #else
   1604 #error "Software divmod functions are needed"
   1605 #endif
   1606 
   1607 		if (saved_reg_count > 0) {
   1608 			if (saved_reg_count >= 3) {
   1609 				SLJIT_ASSERT(saved_reg_list[2] < 8);
   1610 				FAIL_IF(push_inst(compiler, 0xe59d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
   1611 			}
   1612 			if (saved_reg_count >= 2) {
   1613 				SLJIT_ASSERT(saved_reg_list[1] < 8);
   1614 				FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
   1615 			}
   1616 			return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8)
   1617 						| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
   1618 		}
   1619 		return SLJIT_SUCCESS;
   1620 	}
   1621 
   1622 	return SLJIT_SUCCESS;
   1623 }
   1624 
   1625 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
   1626 	sljit_s32 dst, sljit_sw dstw,
   1627 	sljit_s32 src, sljit_sw srcw)
   1628 {
   1629 	CHECK_ERROR();
   1630 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
   1631 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1632 	ADJUST_LOCAL_OFFSET(src, srcw);
   1633 
   1634 	switch (GET_OPCODE(op)) {
   1635 	case SLJIT_MOV:
   1636 	case SLJIT_MOV_U32:
   1637 	case SLJIT_MOV_S32:
   1638 	case SLJIT_MOV_P:
   1639 		return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
   1640 
   1641 	case SLJIT_MOV_U8:
   1642 		return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
   1643 
   1644 	case SLJIT_MOV_S8:
   1645 		return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
   1646 
   1647 	case SLJIT_MOV_U16:
   1648 		return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
   1649 
   1650 	case SLJIT_MOV_S16:
   1651 		return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
   1652 
   1653 	case SLJIT_MOVU:
   1654 	case SLJIT_MOVU_U32:
   1655 	case SLJIT_MOVU_S32:
   1656 	case SLJIT_MOVU_P:
   1657 		return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
   1658 
   1659 	case SLJIT_MOVU_U8:
   1660 		return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
   1661 
   1662 	case SLJIT_MOVU_S8:
   1663 		return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED_DATA | BYTE_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
   1664 
   1665 	case SLJIT_MOVU_U16:
   1666 		return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
   1667 
   1668 	case SLJIT_MOVU_S16:
   1669 		return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED_DATA | HALF_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
   1670 
   1671 	case SLJIT_NOT:
   1672 		return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
   1673 
   1674 	case SLJIT_NEG:
   1675 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
   1676 			|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
   1677 		compiler->skip_checks = 1;
   1678 #endif
   1679 		return sljit_emit_op2(compiler, SLJIT_SUB | GET_ALL_FLAGS(op), dst, dstw, SLJIT_IMM, 0, src, srcw);
   1680 
   1681 	case SLJIT_CLZ:
   1682 		return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
   1683 	}
   1684 
   1685 	return SLJIT_SUCCESS;
   1686 }
   1687 
   1688 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
   1689 	sljit_s32 dst, sljit_sw dstw,
   1690 	sljit_s32 src1, sljit_sw src1w,
   1691 	sljit_s32 src2, sljit_sw src2w)
   1692 {
   1693 	CHECK_ERROR();
   1694 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1695 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1696 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1697 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1698 
   1699 	switch (GET_OPCODE(op)) {
   1700 	case SLJIT_ADD:
   1701 	case SLJIT_ADDC:
   1702 	case SLJIT_SUB:
   1703 	case SLJIT_SUBC:
   1704 	case SLJIT_OR:
   1705 	case SLJIT_XOR:
   1706 		return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
   1707 
   1708 	case SLJIT_MUL:
   1709 		return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
   1710 
   1711 	case SLJIT_AND:
   1712 		return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
   1713 
   1714 	case SLJIT_SHL:
   1715 	case SLJIT_LSHR:
   1716 	case SLJIT_ASHR:
   1717 		if (src2 & SLJIT_IMM) {
   1718 			compiler->shift_imm = src2w & 0x1f;
   1719 			return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
   1720 		}
   1721 		else {
   1722 			compiler->shift_imm = 0x20;
   1723 			return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
   1724 		}
   1725 	}
   1726 
   1727 	return SLJIT_SUCCESS;
   1728 }
   1729 
   1730 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
   1731 {
   1732 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
   1733 	return reg_map[reg];
   1734 }
   1735 
   1736 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
   1737 {
   1738 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
   1739 	return reg << 1;
   1740 }
   1741 
   1742 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
   1743 	void *instruction, sljit_s32 size)
   1744 {
   1745 	CHECK_ERROR();
   1746 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
   1747 
   1748 	return push_inst(compiler, *(sljit_uw*)instruction);
   1749 }
   1750 
   1751 /* --------------------------------------------------------------------- */
   1752 /*  Floating point operators                                             */
   1753 /* --------------------------------------------------------------------- */
   1754 
   1755 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   1756 
   1757 /* 0 - no fpu
   1758    1 - vfp */
   1759 static sljit_s32 arm_fpu_type = -1;
   1760 
   1761 static void init_compiler(void)
   1762 {
   1763 	if (arm_fpu_type != -1)
   1764 		return;
   1765 
   1766 	/* TODO: Only the OS can help to determine the correct fpu type. */
   1767 	arm_fpu_type = 1;
   1768 }
   1769 
   1770 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
   1771 {
   1772 #ifdef SLJIT_IS_FPU_AVAILABLE
   1773 	return SLJIT_IS_FPU_AVAILABLE;
   1774 #else
   1775 	if (arm_fpu_type == -1)
   1776 		init_compiler();
   1777 	return arm_fpu_type;
   1778 #endif
   1779 }
   1780 
   1781 #else
   1782 
   1783 #define arm_fpu_type 1
   1784 
   1785 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void)
   1786 {
   1787 	/* Always available. */
   1788 	return 1;
   1789 }
   1790 
   1791 #endif
   1792 
   1793 #define FPU_LOAD (1 << 20)
   1794 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
   1795 	((inst) | ((add) << 23) | (reg_map[base] << 16) | (freg << 12) | (offs))
   1796 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
   1797 	((opcode) | (mode) | ((dst) << 12) | (src1) | ((src2) << 16))
   1798 
   1799 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
   1800 {
   1801 	sljit_uw imm;
   1802 	sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD));
   1803 
   1804 	SLJIT_ASSERT(arg & SLJIT_MEM);
   1805 	arg &= ~SLJIT_MEM;
   1806 
   1807 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
   1808 		FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7))));
   1809 		arg = TMP_REG2;
   1810 		argw = 0;
   1811 	}
   1812 
   1813 	/* Fast loads and stores. */
   1814 	if (arg) {
   1815 		if (!(argw & ~0x3fc))
   1816 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
   1817 		if (!(-argw & ~0x3fc))
   1818 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
   1819 
   1820 		imm = get_imm(argw & ~0x3fc);
   1821 		if (imm) {
   1822 			FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, imm)));
   1823 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2));
   1824 		}
   1825 		imm = get_imm(-argw & ~0x3fc);
   1826 		if (imm) {
   1827 			argw = -argw;
   1828 			FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG2, arg & REG_MASK, imm)));
   1829 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2));
   1830 		}
   1831 	}
   1832 
   1833 	if (arg) {
   1834 		FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
   1835 		FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(TMP_REG2))));
   1836 	}
   1837 	else
   1838 		FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
   1839 
   1840 	return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0));
   1841 }
   1842 
   1843 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
   1844 	sljit_s32 dst, sljit_sw dstw,
   1845 	sljit_s32 src, sljit_sw srcw)
   1846 {
   1847 	op ^= SLJIT_F32_OP;
   1848 
   1849 	if (src & SLJIT_MEM) {
   1850 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw));
   1851 		src = TMP_FREG1;
   1852 	}
   1853 
   1854 	FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0)));
   1855 
   1856 	if (dst == SLJIT_UNUSED)
   1857 		return SLJIT_SUCCESS;
   1858 
   1859 	if (FAST_IS_REG(dst))
   1860 		return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16));
   1861 
   1862 	/* Store the integer value from a VFP register. */
   1863 	return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
   1864 }
   1865 
   1866 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
   1867 	sljit_s32 dst, sljit_sw dstw,
   1868 	sljit_s32 src, sljit_sw srcw)
   1869 {
   1870 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1871 
   1872 	op ^= SLJIT_F32_OP;
   1873 
   1874 	if (FAST_IS_REG(src))
   1875 		FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16)));
   1876 	else if (src & SLJIT_MEM) {
   1877 		/* Load the integer value into a VFP register. */
   1878 		FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
   1879 	}
   1880 	else {
   1881 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   1882 		FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | (TMP_FREG1 << 16)));
   1883 	}
   1884 
   1885 	FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_F32_OP, dst_r, TMP_FREG1, 0)));
   1886 
   1887 	if (dst & SLJIT_MEM)
   1888 		return emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw);
   1889 	return SLJIT_SUCCESS;
   1890 }
   1891 
   1892 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
   1893 	sljit_s32 src1, sljit_sw src1w,
   1894 	sljit_s32 src2, sljit_sw src2w)
   1895 {
   1896 	op ^= SLJIT_F32_OP;
   1897 
   1898 	if (src1 & SLJIT_MEM) {
   1899 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
   1900 		src1 = TMP_FREG1;
   1901 	}
   1902 
   1903 	if (src2 & SLJIT_MEM) {
   1904 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
   1905 		src2 = TMP_FREG2;
   1906 	}
   1907 
   1908 	FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_F32_OP, src1, src2, 0)));
   1909 	return push_inst(compiler, VMRS);
   1910 }
   1911 
   1912 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
   1913 	sljit_s32 dst, sljit_sw dstw,
   1914 	sljit_s32 src, sljit_sw srcw)
   1915 {
   1916 	sljit_s32 dst_r;
   1917 
   1918 	CHECK_ERROR();
   1919 
   1920 	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error);
   1921 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
   1922 
   1923 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1924 
   1925 	if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
   1926 		op ^= SLJIT_F32_OP;
   1927 
   1928 	if (src & SLJIT_MEM) {
   1929 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw));
   1930 		src = dst_r;
   1931 	}
   1932 
   1933 	switch (GET_OPCODE(op)) {
   1934 	case SLJIT_MOV_F64:
   1935 		if (src != dst_r) {
   1936 			if (dst_r != TMP_FREG1)
   1937 				FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
   1938 			else
   1939 				dst_r = src;
   1940 		}
   1941 		break;
   1942 	case SLJIT_NEG_F64:
   1943 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
   1944 		break;
   1945 	case SLJIT_ABS_F64:
   1946 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
   1947 		break;
   1948 	case SLJIT_CONV_F64_FROM_F32:
   1949 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_F32_OP, dst_r, src, 0)));
   1950 		op ^= SLJIT_F32_OP;
   1951 		break;
   1952 	}
   1953 
   1954 	if (dst & SLJIT_MEM)
   1955 		return emit_fop_mem(compiler, (op & SLJIT_F32_OP), dst_r, dst, dstw);
   1956 	return SLJIT_SUCCESS;
   1957 }
   1958 
   1959 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
   1960 	sljit_s32 dst, sljit_sw dstw,
   1961 	sljit_s32 src1, sljit_sw src1w,
   1962 	sljit_s32 src2, sljit_sw src2w)
   1963 {
   1964 	sljit_s32 dst_r;
   1965 
   1966 	CHECK_ERROR();
   1967 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
   1968 	ADJUST_LOCAL_OFFSET(dst, dstw);
   1969 	ADJUST_LOCAL_OFFSET(src1, src1w);
   1970 	ADJUST_LOCAL_OFFSET(src2, src2w);
   1971 
   1972 	op ^= SLJIT_F32_OP;
   1973 
   1974 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
   1975 
   1976 	if (src2 & SLJIT_MEM) {
   1977 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG2, src2, src2w));
   1978 		src2 = TMP_FREG2;
   1979 	}
   1980 
   1981 	if (src1 & SLJIT_MEM) {
   1982 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w));
   1983 		src1 = TMP_FREG1;
   1984 	}
   1985 
   1986 	switch (GET_OPCODE(op)) {
   1987 	case SLJIT_ADD_F64:
   1988 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
   1989 		break;
   1990 
   1991 	case SLJIT_SUB_F64:
   1992 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
   1993 		break;
   1994 
   1995 	case SLJIT_MUL_F64:
   1996 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
   1997 		break;
   1998 
   1999 	case SLJIT_DIV_F64:
   2000 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_F32_OP, dst_r, src2, src1)));
   2001 		break;
   2002 	}
   2003 
   2004 	if (dst_r == TMP_FREG1)
   2005 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP), TMP_FREG1, dst, dstw));
   2006 
   2007 	return SLJIT_SUCCESS;
   2008 }
   2009 
   2010 #undef FPU_LOAD
   2011 #undef EMIT_FPU_DATA_TRANSFER
   2012 #undef EMIT_FPU_OPERATION
   2013 
   2014 /* --------------------------------------------------------------------- */
   2015 /*  Other instructions                                                   */
   2016 /* --------------------------------------------------------------------- */
   2017 
   2018 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
   2019 {
   2020 	CHECK_ERROR();
   2021 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
   2022 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2023 
   2024 	SLJIT_ASSERT(reg_map[TMP_REG1] == 14);
   2025 
   2026 	/* For UNUSED dst. Uncommon, but possible. */
   2027 	if (dst == SLJIT_UNUSED)
   2028 		return SLJIT_SUCCESS;
   2029 
   2030 	if (FAST_IS_REG(dst))
   2031 		return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG1)));
   2032 
   2033 	/* Memory. */
   2034 	return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2);
   2035 }
   2036 
   2037 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
   2038 {
   2039 	CHECK_ERROR();
   2040 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
   2041 	ADJUST_LOCAL_OFFSET(src, srcw);
   2042 
   2043 	SLJIT_ASSERT(reg_map[TMP_REG1] == 14);
   2044 
   2045 	if (FAST_IS_REG(src))
   2046 		FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, 0, RM(src))));
   2047 	else if (src & SLJIT_MEM)
   2048 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG2));
   2049 	else if (src & SLJIT_IMM)
   2050 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   2051 
   2052 	return push_inst(compiler, BX | RM(TMP_REG1));
   2053 }
   2054 
   2055 /* --------------------------------------------------------------------- */
   2056 /*  Conditional instructions                                             */
   2057 /* --------------------------------------------------------------------- */
   2058 
   2059 static sljit_uw get_cc(sljit_s32 type)
   2060 {
   2061 	switch (type) {
   2062 	case SLJIT_EQUAL:
   2063 	case SLJIT_MUL_NOT_OVERFLOW:
   2064 	case SLJIT_EQUAL_F64:
   2065 		return 0x00000000;
   2066 
   2067 	case SLJIT_NOT_EQUAL:
   2068 	case SLJIT_MUL_OVERFLOW:
   2069 	case SLJIT_NOT_EQUAL_F64:
   2070 		return 0x10000000;
   2071 
   2072 	case SLJIT_LESS:
   2073 	case SLJIT_LESS_F64:
   2074 		return 0x30000000;
   2075 
   2076 	case SLJIT_GREATER_EQUAL:
   2077 	case SLJIT_GREATER_EQUAL_F64:
   2078 		return 0x20000000;
   2079 
   2080 	case SLJIT_GREATER:
   2081 	case SLJIT_GREATER_F64:
   2082 		return 0x80000000;
   2083 
   2084 	case SLJIT_LESS_EQUAL:
   2085 	case SLJIT_LESS_EQUAL_F64:
   2086 		return 0x90000000;
   2087 
   2088 	case SLJIT_SIG_LESS:
   2089 		return 0xb0000000;
   2090 
   2091 	case SLJIT_SIG_GREATER_EQUAL:
   2092 		return 0xa0000000;
   2093 
   2094 	case SLJIT_SIG_GREATER:
   2095 		return 0xc0000000;
   2096 
   2097 	case SLJIT_SIG_LESS_EQUAL:
   2098 		return 0xd0000000;
   2099 
   2100 	case SLJIT_OVERFLOW:
   2101 	case SLJIT_UNORDERED_F64:
   2102 		return 0x60000000;
   2103 
   2104 	case SLJIT_NOT_OVERFLOW:
   2105 	case SLJIT_ORDERED_F64:
   2106 		return 0x70000000;
   2107 
   2108 	default:
   2109 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
   2110 		return 0xe0000000;
   2111 	}
   2112 }
   2113 
   2114 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
   2115 {
   2116 	struct sljit_label *label;
   2117 
   2118 	CHECK_ERROR_PTR();
   2119 	CHECK_PTR(check_sljit_emit_label(compiler));
   2120 
   2121 	if (compiler->last_label && compiler->last_label->size == compiler->size)
   2122 		return compiler->last_label;
   2123 
   2124 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
   2125 	PTR_FAIL_IF(!label);
   2126 	set_label(label, compiler);
   2127 	return label;
   2128 }
   2129 
   2130 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
   2131 {
   2132 	struct sljit_jump *jump;
   2133 
   2134 	CHECK_ERROR_PTR();
   2135 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
   2136 
   2137 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2138 	PTR_FAIL_IF(!jump);
   2139 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
   2140 	type &= 0xff;
   2141 
   2142 	/* In ARM, we don't need to touch the arguments. */
   2143 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   2144 	if (type >= SLJIT_FAST_CALL)
   2145 		PTR_FAIL_IF(prepare_blx(compiler));
   2146 	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0,
   2147 		type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0));
   2148 
   2149 	if (jump->flags & SLJIT_REWRITABLE_JUMP) {
   2150 		jump->addr = compiler->size;
   2151 		compiler->patches++;
   2152 	}
   2153 
   2154 	if (type >= SLJIT_FAST_CALL) {
   2155 		jump->flags |= IS_BL;
   2156 		PTR_FAIL_IF(emit_blx(compiler));
   2157 	}
   2158 
   2159 	if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
   2160 		jump->addr = compiler->size;
   2161 #else
   2162 	if (type >= SLJIT_FAST_CALL)
   2163 		jump->flags |= IS_BL;
   2164 	PTR_FAIL_IF(emit_imm(compiler, TMP_REG2, 0));
   2165 	PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)) & ~COND_MASK) | get_cc(type)));
   2166 	jump->addr = compiler->size;
   2167 #endif
   2168 	return jump;
   2169 }
   2170 
   2171 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
   2172 {
   2173 	struct sljit_jump *jump;
   2174 
   2175 	CHECK_ERROR();
   2176 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
   2177 	ADJUST_LOCAL_OFFSET(src, srcw);
   2178 
   2179 	/* In ARM, we don't need to touch the arguments. */
   2180 	if (!(src & SLJIT_IMM)) {
   2181 		if (FAST_IS_REG(src))
   2182 			return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
   2183 
   2184 		SLJIT_ASSERT(src & SLJIT_MEM);
   2185 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2));
   2186 		return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2));
   2187 	}
   2188 
   2189 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
   2190 	FAIL_IF(!jump);
   2191 	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
   2192 	jump->u.target = srcw;
   2193 
   2194 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   2195 	if (type >= SLJIT_FAST_CALL)
   2196 		FAIL_IF(prepare_blx(compiler));
   2197 	FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0), 0));
   2198 	if (type >= SLJIT_FAST_CALL)
   2199 		FAIL_IF(emit_blx(compiler));
   2200 #else
   2201 	FAIL_IF(emit_imm(compiler, TMP_REG2, 0));
   2202 	FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)));
   2203 #endif
   2204 	jump->addr = compiler->size;
   2205 	return SLJIT_SUCCESS;
   2206 }
   2207 
   2208 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
   2209 	sljit_s32 dst, sljit_sw dstw,
   2210 	sljit_s32 src, sljit_sw srcw,
   2211 	sljit_s32 type)
   2212 {
   2213 	sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
   2214 	sljit_uw cc, ins;
   2215 
   2216 	CHECK_ERROR();
   2217 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
   2218 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2219 	ADJUST_LOCAL_OFFSET(src, srcw);
   2220 
   2221 	if (dst == SLJIT_UNUSED)
   2222 		return SLJIT_SUCCESS;
   2223 
   2224 	op = GET_OPCODE(op);
   2225 	cc = get_cc(type & 0xff);
   2226 	dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
   2227 
   2228 	if (op < SLJIT_ADD) {
   2229 		FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, SRC2_IMM | 0)));
   2230 		FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc));
   2231 		return (dst_reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1) : SLJIT_SUCCESS;
   2232 	}
   2233 
   2234 	ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP));
   2235 	if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) {
   2236 		FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc));
   2237 		/* The condition must always be set, even if the ORR/EOR is not executed above. */
   2238 		return (flags & SLJIT_SET_Z) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS;
   2239 	}
   2240 
   2241 	if (src & SLJIT_MEM) {
   2242 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
   2243 		src = TMP_REG1;
   2244 	} else if (src & SLJIT_IMM) {
   2245 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
   2246 		src = TMP_REG1;
   2247 	}
   2248 
   2249 	FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_reg, src, SRC2_IMM | 1) & ~COND_MASK) | cc));
   2250 	FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_reg, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
   2251 	if (dst_reg == TMP_REG2)
   2252 		FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1));
   2253 
   2254 	return (flags & SLJIT_SET_Z) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_reg))) : SLJIT_SUCCESS;
   2255 }
   2256 
   2257 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
   2258 {
   2259 	struct sljit_const *const_;
   2260 	sljit_s32 reg;
   2261 
   2262 	CHECK_ERROR_PTR();
   2263 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
   2264 	ADJUST_LOCAL_OFFSET(dst, dstw);
   2265 
   2266 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
   2267 	PTR_FAIL_IF(!const_);
   2268 
   2269 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG2;
   2270 
   2271 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
   2272 	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), init_value));
   2273 	compiler->patches++;
   2274 #else
   2275 	PTR_FAIL_IF(emit_imm(compiler, reg, init_value));
   2276 #endif
   2277 	set_const(const_, compiler);
   2278 
   2279 	if (dst & SLJIT_MEM)
   2280 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1));
   2281 	return const_;
   2282 }
   2283 
   2284 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
   2285 {
   2286 	inline_set_jump_addr(addr, executable_offset, new_target, 1);
   2287 }
   2288 
   2289 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
   2290 {
   2291 	inline_set_const(addr, executable_offset, new_constant, 1);
   2292 }
   2293