17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2019 Google, Inc. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg * Authors: 247ec681f3Smrg * Rob Clark <robclark@freedesktop.org> 257ec681f3Smrg */ 267ec681f3Smrg 277ec681f3Smrg#include "ir3.h" 287ec681f3Smrg 297ec681f3Smrg/* The maximum number of nop's we may need to insert between two instructions. 307ec681f3Smrg */ 317ec681f3Smrg#define MAX_NOPS 6 327ec681f3Smrg 337ec681f3Smrg/* The soft delay for approximating the cost of (ss). On a6xx, it takes the 347ec681f3Smrg * number of delay slots to get a SFU result back (ie. using nop's instead of 357ec681f3Smrg * (ss) is: 367ec681f3Smrg * 377ec681f3Smrg * 8 - single warp 387ec681f3Smrg * 9 - two warps 397ec681f3Smrg * 10 - four warps 407ec681f3Smrg * 417ec681f3Smrg * and so on. Not quite sure where it tapers out (ie. how many warps share an 427ec681f3Smrg * SFU unit). But 10 seems like a reasonable # to choose: 437ec681f3Smrg */ 447ec681f3Smrg#define SOFT_SS_NOPS 10 457ec681f3Smrg 467ec681f3Smrg/* 477ec681f3Smrg * Helpers to figure out the necessary delay slots between instructions. Used 487ec681f3Smrg * both in scheduling pass(es) and the final pass to insert any required nop's 497ec681f3Smrg * so that the shader program is valid. 507ec681f3Smrg * 517ec681f3Smrg * Note that this needs to work both pre and post RA, so we can't assume ssa 527ec681f3Smrg * src iterators work. 537ec681f3Smrg */ 547ec681f3Smrg 557ec681f3Smrg/* calculate required # of delay slots between the instruction that 567ec681f3Smrg * assigns a value and the one that consumes 577ec681f3Smrg */ 587ec681f3Smrgint 597ec681f3Smrgir3_delayslots(struct ir3_instruction *assigner, 607ec681f3Smrg struct ir3_instruction *consumer, unsigned n, bool soft) 617ec681f3Smrg{ 627ec681f3Smrg /* generally don't count false dependencies, since this can just be 637ec681f3Smrg * something like a barrier, or SSBO store. 647ec681f3Smrg */ 657ec681f3Smrg if (__is_false_dep(consumer, n)) 667ec681f3Smrg return 0; 677ec681f3Smrg 687ec681f3Smrg /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal 697ec681f3Smrg * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch 707ec681f3Smrg * handled with sync bits 717ec681f3Smrg */ 727ec681f3Smrg 737ec681f3Smrg if (is_meta(assigner) || is_meta(consumer)) 747ec681f3Smrg return 0; 757ec681f3Smrg 767ec681f3Smrg if (writes_addr0(assigner) || writes_addr1(assigner)) 777ec681f3Smrg return 6; 787ec681f3Smrg 797ec681f3Smrg if (soft && is_sfu(assigner)) 807ec681f3Smrg return SOFT_SS_NOPS; 817ec681f3Smrg 827ec681f3Smrg /* handled via sync flags: */ 837ec681f3Smrg if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner)) 847ec681f3Smrg return 0; 857ec681f3Smrg 867ec681f3Smrg /* As far as we know, shader outputs don't need any delay. */ 877ec681f3Smrg if (consumer->opc == OPC_END || consumer->opc == OPC_CHMASK) 887ec681f3Smrg return 0; 897ec681f3Smrg 907ec681f3Smrg /* assigner must be alu: */ 917ec681f3Smrg if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) || 927ec681f3Smrg is_mem(consumer) || (assigner->dsts[0]->flags & IR3_REG_SHARED)) { 937ec681f3Smrg return 6; 947ec681f3Smrg } else { 957ec681f3Smrg /* In mergedregs mode, there is an extra 2-cycle penalty when half of 967ec681f3Smrg * a full-reg is read as a half-reg or when a half-reg is read as a 977ec681f3Smrg * full-reg. 987ec681f3Smrg */ 997ec681f3Smrg bool mismatched_half = (assigner->dsts[0]->flags & IR3_REG_HALF) != 1007ec681f3Smrg (consumer->srcs[n]->flags & IR3_REG_HALF); 1017ec681f3Smrg unsigned penalty = mismatched_half ? 3 : 0; 1027ec681f3Smrg if ((is_mad(consumer->opc) || is_madsh(consumer->opc)) && (n == 2)) { 1037ec681f3Smrg /* special case, 3rd src to cat3 not required on first cycle */ 1047ec681f3Smrg return 1 + penalty; 1057ec681f3Smrg } else { 1067ec681f3Smrg return 3 + penalty; 1077ec681f3Smrg } 1087ec681f3Smrg } 1097ec681f3Smrg} 1107ec681f3Smrg 1117ec681f3Smrgstatic bool 1127ec681f3Smrgcount_instruction(struct ir3_instruction *n) 1137ec681f3Smrg{ 1147ec681f3Smrg /* NOTE: don't count branch/jump since we don't know yet if they will 1157ec681f3Smrg * be eliminated later in resolve_jumps().. really should do that 1167ec681f3Smrg * earlier so we don't have this constraint. 1177ec681f3Smrg */ 1187ec681f3Smrg return is_alu(n) || 1197ec681f3Smrg (is_flow(n) && (n->opc != OPC_JUMP) && (n->opc != OPC_B)); 1207ec681f3Smrg} 1217ec681f3Smrg 1227ec681f3Smrgstatic unsigned 1237ec681f3Smrgdistance(struct ir3_block *block, struct ir3_instruction *instr, unsigned maxd) 1247ec681f3Smrg{ 1257ec681f3Smrg unsigned d = 0; 1267ec681f3Smrg 1277ec681f3Smrg /* Note that this relies on incrementally building up the block's 1287ec681f3Smrg * instruction list.. but this is how scheduling and nopsched 1297ec681f3Smrg * work. 1307ec681f3Smrg */ 1317ec681f3Smrg foreach_instr_rev (n, &block->instr_list) { 1327ec681f3Smrg if ((n == instr) || (d >= maxd)) 1337ec681f3Smrg return MIN2(maxd, d + n->nop); 1347ec681f3Smrg if (count_instruction(n)) 1357ec681f3Smrg d = MIN2(maxd, d + 1 + n->repeat + n->nop); 1367ec681f3Smrg } 1377ec681f3Smrg 1387ec681f3Smrg return maxd; 1397ec681f3Smrg} 1407ec681f3Smrg 1417ec681f3Smrgstatic unsigned 1427ec681f3Smrgdelay_calc_srcn_prera(struct ir3_block *block, struct ir3_instruction *assigner, 1437ec681f3Smrg struct ir3_instruction *consumer, unsigned srcn) 1447ec681f3Smrg{ 1457ec681f3Smrg unsigned delay = 0; 1467ec681f3Smrg 1477ec681f3Smrg if (assigner->opc == OPC_META_PHI) 1487ec681f3Smrg return 0; 1497ec681f3Smrg 1507ec681f3Smrg if (is_meta(assigner)) { 1517ec681f3Smrg foreach_src_n (src, n, assigner) { 1527ec681f3Smrg unsigned d; 1537ec681f3Smrg 1547ec681f3Smrg if (!src->def) 1557ec681f3Smrg continue; 1567ec681f3Smrg 1577ec681f3Smrg d = delay_calc_srcn_prera(block, src->def->instr, consumer, srcn); 1587ec681f3Smrg delay = MAX2(delay, d); 1597ec681f3Smrg } 1607ec681f3Smrg } else { 1617ec681f3Smrg delay = ir3_delayslots(assigner, consumer, srcn, false); 1627ec681f3Smrg delay -= distance(block, assigner, delay); 1637ec681f3Smrg } 1647ec681f3Smrg 1657ec681f3Smrg return delay; 1667ec681f3Smrg} 1677ec681f3Smrg 1687ec681f3Smrg/** 1697ec681f3Smrg * Calculate delay for instruction before register allocation, using SSA 1707ec681f3Smrg * source pointers. This can't handle inter-block dependencies. 1717ec681f3Smrg */ 1727ec681f3Smrgunsigned 1737ec681f3Smrgir3_delay_calc_prera(struct ir3_block *block, struct ir3_instruction *instr) 1747ec681f3Smrg{ 1757ec681f3Smrg unsigned delay = 0; 1767ec681f3Smrg 1777ec681f3Smrg foreach_src_n (src, i, instr) { 1787ec681f3Smrg unsigned d = 0; 1797ec681f3Smrg 1807ec681f3Smrg if (src->def && src->def->instr->block == block) { 1817ec681f3Smrg d = delay_calc_srcn_prera(block, src->def->instr, instr, i); 1827ec681f3Smrg } 1837ec681f3Smrg 1847ec681f3Smrg delay = MAX2(delay, d); 1857ec681f3Smrg } 1867ec681f3Smrg 1877ec681f3Smrg return delay; 1887ec681f3Smrg} 1897ec681f3Smrg 1907ec681f3Smrg/* Post-RA, we don't have arrays any more, so we have to be a bit careful here 1917ec681f3Smrg * and have to handle relative accesses specially. 1927ec681f3Smrg */ 1937ec681f3Smrg 1947ec681f3Smrgstatic unsigned 1957ec681f3Smrgpost_ra_reg_elems(struct ir3_register *reg) 1967ec681f3Smrg{ 1977ec681f3Smrg if (reg->flags & IR3_REG_RELATIV) 1987ec681f3Smrg return reg->size; 1997ec681f3Smrg return reg_elems(reg); 2007ec681f3Smrg} 2017ec681f3Smrg 2027ec681f3Smrgstatic unsigned 2037ec681f3Smrgpost_ra_reg_num(struct ir3_register *reg) 2047ec681f3Smrg{ 2057ec681f3Smrg if (reg->flags & IR3_REG_RELATIV) 2067ec681f3Smrg return reg->array.base; 2077ec681f3Smrg return reg->num; 2087ec681f3Smrg} 2097ec681f3Smrg 2107ec681f3Smrgstatic unsigned 2117ec681f3Smrgdelay_calc_srcn_postra(struct ir3_instruction *assigner, 2127ec681f3Smrg struct ir3_instruction *consumer, unsigned assigner_n, 2137ec681f3Smrg unsigned consumer_n, bool soft, bool mergedregs) 2147ec681f3Smrg{ 2157ec681f3Smrg struct ir3_register *src = consumer->srcs[consumer_n]; 2167ec681f3Smrg struct ir3_register *dst = assigner->dsts[assigner_n]; 2177ec681f3Smrg bool mismatched_half = 2187ec681f3Smrg (src->flags & IR3_REG_HALF) != (dst->flags & IR3_REG_HALF); 2197ec681f3Smrg 2207ec681f3Smrg /* In the mergedregs case or when the register is a special register, 2217ec681f3Smrg * half-registers do not alias with full registers. 2227ec681f3Smrg */ 2237ec681f3Smrg if ((!mergedregs || is_reg_special(src) || is_reg_special(dst)) && 2247ec681f3Smrg mismatched_half) 2257ec681f3Smrg return 0; 2267ec681f3Smrg 2277ec681f3Smrg unsigned src_start = post_ra_reg_num(src) * reg_elem_size(src); 2287ec681f3Smrg unsigned src_end = src_start + post_ra_reg_elems(src) * reg_elem_size(src); 2297ec681f3Smrg unsigned dst_start = post_ra_reg_num(dst) * reg_elem_size(dst); 2307ec681f3Smrg unsigned dst_end = dst_start + post_ra_reg_elems(dst) * reg_elem_size(dst); 2317ec681f3Smrg 2327ec681f3Smrg if (dst_start >= src_end || src_start >= dst_end) 2337ec681f3Smrg return 0; 2347ec681f3Smrg 2357ec681f3Smrg unsigned delay = ir3_delayslots(assigner, consumer, consumer_n, soft); 2367ec681f3Smrg 2377ec681f3Smrg if (assigner->repeat == 0 && consumer->repeat == 0) 2387ec681f3Smrg return delay; 2397ec681f3Smrg 2407ec681f3Smrg /* If either side is a relative access, we can't really apply most of the 2417ec681f3Smrg * reasoning below because we don't know which component aliases which. 2427ec681f3Smrg * Just bail in this case. 2437ec681f3Smrg */ 2447ec681f3Smrg if ((src->flags & IR3_REG_RELATIV) || (dst->flags & IR3_REG_RELATIV)) 2457ec681f3Smrg return delay; 2467ec681f3Smrg 2477ec681f3Smrg /* MOVMSK seems to require that all users wait until the entire 2487ec681f3Smrg * instruction is finished, so just bail here. 2497ec681f3Smrg */ 2507ec681f3Smrg if (assigner->opc == OPC_MOVMSK) 2517ec681f3Smrg return delay; 2527ec681f3Smrg 2537ec681f3Smrg /* TODO: Handle the combination of (rpt) and different component sizes 2547ec681f3Smrg * better like below. This complicates things significantly because the 2557ec681f3Smrg * components don't line up. 2567ec681f3Smrg */ 2577ec681f3Smrg if (mismatched_half) 2587ec681f3Smrg return delay; 2597ec681f3Smrg 2607ec681f3Smrg /* If an instruction has a (rpt), then it acts as a sequence of 2617ec681f3Smrg * instructions, reading its non-(r) sources at each cycle. First, get the 2627ec681f3Smrg * register num for the first instruction where they interfere: 2637ec681f3Smrg */ 2647ec681f3Smrg 2657ec681f3Smrg unsigned first_num = MAX2(src_start, dst_start) / reg_elem_size(dst); 2667ec681f3Smrg 2677ec681f3Smrg /* Now, for that first conflicting half/full register, figure out the 2687ec681f3Smrg * sub-instruction within assigner/consumer it corresponds to. For (r) 2697ec681f3Smrg * sources, this should already return the correct answer of 0. However we 2707ec681f3Smrg * have to special-case the multi-mov instructions, where the 2717ec681f3Smrg * sub-instructions sometimes come from the src/dst indices instead. 2727ec681f3Smrg */ 2737ec681f3Smrg unsigned first_src_instr; 2747ec681f3Smrg if (consumer->opc == OPC_SWZ || consumer->opc == OPC_GAT) 2757ec681f3Smrg first_src_instr = consumer_n; 2767ec681f3Smrg else 2777ec681f3Smrg first_src_instr = first_num - src->num; 2787ec681f3Smrg 2797ec681f3Smrg unsigned first_dst_instr; 2807ec681f3Smrg if (assigner->opc == OPC_SWZ || assigner->opc == OPC_SCT) 2817ec681f3Smrg first_dst_instr = assigner_n; 2827ec681f3Smrg else 2837ec681f3Smrg first_dst_instr = first_num - dst->num; 2847ec681f3Smrg 2857ec681f3Smrg /* The delay we return is relative to the *end* of assigner and the 2867ec681f3Smrg * *beginning* of consumer, because it's the number of nops (or other 2877ec681f3Smrg * things) needed between them. Any instructions after first_dst_instr 2887ec681f3Smrg * subtract from the delay, and so do any instructions before 2897ec681f3Smrg * first_src_instr. Calculate an offset to subtract from the non-rpt-aware 2907ec681f3Smrg * delay to account for that. 2917ec681f3Smrg * 2927ec681f3Smrg * Now, a priori, we need to go through this process for every 2937ec681f3Smrg * conflicting regnum and take the minimum of the offsets to make sure 2947ec681f3Smrg * that the appropriate number of nop's is inserted for every conflicting 2957ec681f3Smrg * pair of sub-instructions. However, as we go to the next conflicting 2967ec681f3Smrg * regnum (if any), the number of instructions after first_dst_instr 2977ec681f3Smrg * decreases by 1 and the number of source instructions before 2987ec681f3Smrg * first_src_instr correspondingly increases by 1, so the offset stays the 2997ec681f3Smrg * same for all conflicting registers. 3007ec681f3Smrg */ 3017ec681f3Smrg unsigned offset = first_src_instr + (assigner->repeat - first_dst_instr); 3027ec681f3Smrg return offset > delay ? 0 : delay - offset; 3037ec681f3Smrg} 3047ec681f3Smrg 3057ec681f3Smrgstatic unsigned 3067ec681f3Smrgdelay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, 3077ec681f3Smrg struct ir3_instruction *consumer, unsigned distance, 3087ec681f3Smrg bool soft, bool pred, bool mergedregs) 3097ec681f3Smrg{ 3107ec681f3Smrg unsigned delay = 0; 3117ec681f3Smrg /* Search backwards starting at the instruction before start, unless it's 3127ec681f3Smrg * NULL then search backwards from the block end. 3137ec681f3Smrg */ 3147ec681f3Smrg struct list_head *start_list = 3157ec681f3Smrg start ? start->node.prev : block->instr_list.prev; 3167ec681f3Smrg list_for_each_entry_from_rev (struct ir3_instruction, assigner, start_list, 3177ec681f3Smrg &block->instr_list, node) { 3187ec681f3Smrg if (count_instruction(assigner)) 3197ec681f3Smrg distance += assigner->nop; 3207ec681f3Smrg 3217ec681f3Smrg if (distance + delay >= (soft ? SOFT_SS_NOPS : MAX_NOPS)) 3227ec681f3Smrg return delay; 3237ec681f3Smrg 3247ec681f3Smrg if (is_meta(assigner)) 3257ec681f3Smrg continue; 3267ec681f3Smrg 3277ec681f3Smrg unsigned new_delay = 0; 3287ec681f3Smrg 3297ec681f3Smrg foreach_dst_n (dst, dst_n, assigner) { 3307ec681f3Smrg if (dst->wrmask == 0) 3317ec681f3Smrg continue; 3327ec681f3Smrg foreach_src_n (src, src_n, consumer) { 3337ec681f3Smrg if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) 3347ec681f3Smrg continue; 3357ec681f3Smrg 3367ec681f3Smrg unsigned src_delay = delay_calc_srcn_postra( 3377ec681f3Smrg assigner, consumer, dst_n, src_n, soft, mergedregs); 3387ec681f3Smrg new_delay = MAX2(new_delay, src_delay); 3397ec681f3Smrg } 3407ec681f3Smrg } 3417ec681f3Smrg 3427ec681f3Smrg new_delay = new_delay > distance ? new_delay - distance : 0; 3437ec681f3Smrg delay = MAX2(delay, new_delay); 3447ec681f3Smrg 3457ec681f3Smrg if (count_instruction(assigner)) 3467ec681f3Smrg distance += 1 + assigner->repeat; 3477ec681f3Smrg } 3487ec681f3Smrg 3497ec681f3Smrg /* Note: this allows recursion into "block" if it has already been 3507ec681f3Smrg * visited, but *not* recursion into its predecessors. We may have to 3517ec681f3Smrg * visit the original block twice, for the loop case where we have to 3527ec681f3Smrg * consider definititons in an earlier iterations of the same loop: 3537ec681f3Smrg * 3547ec681f3Smrg * while (...) { 3557ec681f3Smrg * mov.u32u32 ..., r0.x 3567ec681f3Smrg * ... 3577ec681f3Smrg * mov.u32u32 r0.x, ... 3587ec681f3Smrg * } 3597ec681f3Smrg * 3607ec681f3Smrg * However any other recursion would be unnecessary. 3617ec681f3Smrg */ 3627ec681f3Smrg 3637ec681f3Smrg if (pred && block->data != block) { 3647ec681f3Smrg block->data = block; 3657ec681f3Smrg 3667ec681f3Smrg for (unsigned i = 0; i < block->predecessors_count; i++) { 3677ec681f3Smrg struct ir3_block *pred = block->predecessors[i]; 3687ec681f3Smrg unsigned pred_delay = delay_calc_postra(pred, NULL, consumer, distance, 3697ec681f3Smrg soft, pred, mergedregs); 3707ec681f3Smrg delay = MAX2(delay, pred_delay); 3717ec681f3Smrg } 3727ec681f3Smrg 3737ec681f3Smrg block->data = NULL; 3747ec681f3Smrg } 3757ec681f3Smrg 3767ec681f3Smrg return delay; 3777ec681f3Smrg} 3787ec681f3Smrg 3797ec681f3Smrg/** 3807ec681f3Smrg * Calculate delay for post-RA scheduling based on physical registers but not 3817ec681f3Smrg * exact (i.e. don't recurse into predecessors, and make it possible to 3827ec681f3Smrg * estimate impact of sync flags). 3837ec681f3Smrg * 3847ec681f3Smrg * @soft: If true, add additional delay for situations where they 3857ec681f3Smrg * would not be strictly required because a sync flag would be 3867ec681f3Smrg * used (but scheduler would prefer to schedule some other 3877ec681f3Smrg * instructions first to avoid stalling on sync flag) 3887ec681f3Smrg * @mergedregs: True if mergedregs is enabled. 3897ec681f3Smrg */ 3907ec681f3Smrgunsigned 3917ec681f3Smrgir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr, 3927ec681f3Smrg bool soft, bool mergedregs) 3937ec681f3Smrg{ 3947ec681f3Smrg return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs); 3957ec681f3Smrg} 3967ec681f3Smrg 3977ec681f3Smrg/** 3987ec681f3Smrg * Calculate delay for nop insertion. This must exactly match hardware 3997ec681f3Smrg * requirements, including recursing into predecessor blocks. 4007ec681f3Smrg */ 4017ec681f3Smrgunsigned 4027ec681f3Smrgir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr, 4037ec681f3Smrg bool mergedregs) 4047ec681f3Smrg{ 4057ec681f3Smrg return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs); 4067ec681f3Smrg} 4077ec681f3Smrg 4087ec681f3Smrg/** 4097ec681f3Smrg * Remove nop instructions. The scheduler can insert placeholder nop's 4107ec681f3Smrg * so that ir3_delay_calc() can account for nop's that won't be needed 4117ec681f3Smrg * due to nop's triggered by a previous instruction. However, before 4127ec681f3Smrg * legalize, we want to remove these. The legalize pass can insert 4137ec681f3Smrg * some nop's if needed to hold (for example) sync flags. This final 4147ec681f3Smrg * remaining nops are inserted by legalize after this. 4157ec681f3Smrg */ 4167ec681f3Smrgvoid 4177ec681f3Smrgir3_remove_nops(struct ir3 *ir) 4187ec681f3Smrg{ 4197ec681f3Smrg foreach_block (block, &ir->block_list) { 4207ec681f3Smrg foreach_instr_safe (instr, &block->instr_list) { 4217ec681f3Smrg if (instr->opc == OPC_NOP) { 4227ec681f3Smrg list_del(&instr->node); 4237ec681f3Smrg } 4247ec681f3Smrg } 4257ec681f3Smrg } 4267ec681f3Smrg} 427