196c5ddc4Srjs#include "bifrost_nir.h" 296c5ddc4Srjs 396c5ddc4Srjs#include "nir.h" 496c5ddc4Srjs#include "nir_builder.h" 596c5ddc4Srjs#include "nir_search.h" 696c5ddc4Srjs#include "nir_search_helpers.h" 796c5ddc4Srjs 896c5ddc4Srjs/* What follows is NIR algebraic transform code for the following 6 996c5ddc4Srjs * transforms: 1096c5ddc4Srjs * ('fmul', 'a', 2.0) => ('fadd', 'a', 'a') 1196c5ddc4Srjs * ('fmin', ('fmax', 'a', -1.0), 1.0) => ('fsat_signed_mali', 'a') 1296c5ddc4Srjs * ('fmax', ('fmin', 'a', 1.0), -1.0) => ('fsat_signed_mali', 'a') 1396c5ddc4Srjs * ('fmax', 'a', 0.0) => ('fclamp_pos_mali', 'a') 1496c5ddc4Srjs * ('fabs', ('fddx', 'a')) => ('fabs', ('fddx_must_abs_mali', 'a')) 1596c5ddc4Srjs * ('fabs', ('fddy', 'b')) => ('fabs', ('fddy_must_abs_mali', 'b')) 1696c5ddc4Srjs */ 1796c5ddc4Srjs 1896c5ddc4Srjs 1996c5ddc4Srjs static const nir_search_variable search0_0 = { 2096c5ddc4Srjs { nir_search_value_variable, -1 }, 2196c5ddc4Srjs 0, /* a */ 2296c5ddc4Srjs false, 2396c5ddc4Srjs nir_type_invalid, 2496c5ddc4Srjs NULL, 2596c5ddc4Srjs {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 2696c5ddc4Srjs}; 2796c5ddc4Srjs 2896c5ddc4Srjsstatic const nir_search_constant search0_1 = { 2996c5ddc4Srjs { nir_search_value_constant, -1 }, 3096c5ddc4Srjs nir_type_float, { 0x4000000000000000 /* 2.0 */ }, 3196c5ddc4Srjs}; 3296c5ddc4Srjsstatic const nir_search_expression search0 = { 3396c5ddc4Srjs { nir_search_value_expression, -1 }, 3496c5ddc4Srjs false, false, 3596c5ddc4Srjs 0, 1, 3696c5ddc4Srjs nir_op_fmul, 3796c5ddc4Srjs { &search0_0.value, &search0_1.value }, 3896c5ddc4Srjs NULL, 3996c5ddc4Srjs}; 4096c5ddc4Srjs 4196c5ddc4Srjs /* replace0_0 -> search0_0 in the cache */ 4296c5ddc4Srjs 4396c5ddc4Srjs/* replace0_1 -> search0_0 in the cache */ 4496c5ddc4Srjsstatic const nir_search_expression replace0 = { 4596c5ddc4Srjs { nir_search_value_expression, -1 }, 4696c5ddc4Srjs false, false, 4796c5ddc4Srjs -1, 0, 4896c5ddc4Srjs nir_op_fadd, 4996c5ddc4Srjs { &search0_0.value, &search0_0.value }, 5096c5ddc4Srjs NULL, 5196c5ddc4Srjs}; 5296c5ddc4Srjs 5396c5ddc4Srjs /* search1_0_0 -> search0_0 in the cache */ 5496c5ddc4Srjs 5596c5ddc4Srjsstatic const nir_search_constant search1_0_1 = { 5696c5ddc4Srjs { nir_search_value_constant, -1 }, 5796c5ddc4Srjs nir_type_float, { 0xbff0000000000000 /* -1.0 */ }, 5896c5ddc4Srjs}; 5996c5ddc4Srjsstatic const nir_search_expression search1_0 = { 6096c5ddc4Srjs { nir_search_value_expression, -1 }, 6196c5ddc4Srjs false, false, 6296c5ddc4Srjs 1, 1, 6396c5ddc4Srjs nir_op_fmax, 6496c5ddc4Srjs { &search0_0.value, &search1_0_1.value }, 6596c5ddc4Srjs NULL, 6696c5ddc4Srjs}; 6796c5ddc4Srjs 6896c5ddc4Srjsstatic const nir_search_constant search1_1 = { 6996c5ddc4Srjs { nir_search_value_constant, -1 }, 7096c5ddc4Srjs nir_type_float, { 0x3ff0000000000000 /* 1.0 */ }, 7196c5ddc4Srjs}; 7296c5ddc4Srjsstatic const nir_search_expression search1 = { 7396c5ddc4Srjs { nir_search_value_expression, -1 }, 7496c5ddc4Srjs false, false, 7596c5ddc4Srjs 0, 2, 7696c5ddc4Srjs nir_op_fmin, 7796c5ddc4Srjs { &search1_0.value, &search1_1.value }, 7896c5ddc4Srjs NULL, 7996c5ddc4Srjs}; 8096c5ddc4Srjs 8196c5ddc4Srjs /* replace1_0 -> search0_0 in the cache */ 8296c5ddc4Srjsstatic const nir_search_expression replace1 = { 8396c5ddc4Srjs { nir_search_value_expression, -1 }, 8496c5ddc4Srjs false, false, 8596c5ddc4Srjs -1, 0, 8696c5ddc4Srjs nir_op_fsat_signed_mali, 8796c5ddc4Srjs { &search0_0.value }, 8896c5ddc4Srjs NULL, 8996c5ddc4Srjs}; 9096c5ddc4Srjs 9196c5ddc4Srjs /* search2_0_0 -> search0_0 in the cache */ 9296c5ddc4Srjs 9396c5ddc4Srjs/* search2_0_1 -> search1_1 in the cache */ 9496c5ddc4Srjsstatic const nir_search_expression search2_0 = { 9596c5ddc4Srjs { nir_search_value_expression, -1 }, 9696c5ddc4Srjs false, false, 9796c5ddc4Srjs 1, 1, 9896c5ddc4Srjs nir_op_fmin, 9996c5ddc4Srjs { &search0_0.value, &search1_1.value }, 10096c5ddc4Srjs NULL, 10196c5ddc4Srjs}; 10296c5ddc4Srjs 10396c5ddc4Srjs/* search2_1 -> search1_0_1 in the cache */ 10496c5ddc4Srjsstatic const nir_search_expression search2 = { 10596c5ddc4Srjs { nir_search_value_expression, -1 }, 10696c5ddc4Srjs false, false, 10796c5ddc4Srjs 0, 2, 10896c5ddc4Srjs nir_op_fmax, 10996c5ddc4Srjs { &search2_0.value, &search1_0_1.value }, 11096c5ddc4Srjs NULL, 11196c5ddc4Srjs}; 11296c5ddc4Srjs 11396c5ddc4Srjs /* replace2_0 -> search0_0 in the cache */ 11496c5ddc4Srjs/* replace2 -> replace1 in the cache */ 11596c5ddc4Srjs 11696c5ddc4Srjs /* search3_0 -> search0_0 in the cache */ 11796c5ddc4Srjs 11896c5ddc4Srjsstatic const nir_search_constant search3_1 = { 11996c5ddc4Srjs { nir_search_value_constant, -1 }, 12096c5ddc4Srjs nir_type_float, { 0x0 /* 0.0 */ }, 12196c5ddc4Srjs}; 12296c5ddc4Srjsstatic const nir_search_expression search3 = { 12396c5ddc4Srjs { nir_search_value_expression, -1 }, 12496c5ddc4Srjs false, false, 12596c5ddc4Srjs 0, 1, 12696c5ddc4Srjs nir_op_fmax, 12796c5ddc4Srjs { &search0_0.value, &search3_1.value }, 12896c5ddc4Srjs NULL, 12996c5ddc4Srjs}; 13096c5ddc4Srjs 13196c5ddc4Srjs /* replace3_0 -> search0_0 in the cache */ 13296c5ddc4Srjsstatic const nir_search_expression replace3 = { 13396c5ddc4Srjs { nir_search_value_expression, -1 }, 13496c5ddc4Srjs false, false, 13596c5ddc4Srjs -1, 0, 13696c5ddc4Srjs nir_op_fclamp_pos_mali, 13796c5ddc4Srjs { &search0_0.value }, 13896c5ddc4Srjs NULL, 13996c5ddc4Srjs}; 14096c5ddc4Srjs 14196c5ddc4Srjs /* search4_0_0 -> search0_0 in the cache */ 14296c5ddc4Srjsstatic const nir_search_expression search4_0 = { 14396c5ddc4Srjs { nir_search_value_expression, -1 }, 14496c5ddc4Srjs false, false, 14596c5ddc4Srjs -1, 0, 14696c5ddc4Srjs nir_op_fddx, 14796c5ddc4Srjs { &search0_0.value }, 14896c5ddc4Srjs NULL, 14996c5ddc4Srjs}; 15096c5ddc4Srjsstatic const nir_search_expression search4 = { 15196c5ddc4Srjs { nir_search_value_expression, -1 }, 15296c5ddc4Srjs false, false, 15396c5ddc4Srjs -1, 0, 15496c5ddc4Srjs nir_op_fabs, 15596c5ddc4Srjs { &search4_0.value }, 15696c5ddc4Srjs NULL, 15796c5ddc4Srjs}; 15896c5ddc4Srjs 15996c5ddc4Srjs /* replace4_0_0 -> search0_0 in the cache */ 16096c5ddc4Srjsstatic const nir_search_expression replace4_0 = { 16196c5ddc4Srjs { nir_search_value_expression, -1 }, 16296c5ddc4Srjs false, false, 16396c5ddc4Srjs -1, 0, 16496c5ddc4Srjs nir_op_fddx_must_abs_mali, 16596c5ddc4Srjs { &search0_0.value }, 16696c5ddc4Srjs NULL, 16796c5ddc4Srjs}; 16896c5ddc4Srjsstatic const nir_search_expression replace4 = { 16996c5ddc4Srjs { nir_search_value_expression, -1 }, 17096c5ddc4Srjs false, false, 17196c5ddc4Srjs -1, 0, 17296c5ddc4Srjs nir_op_fabs, 17396c5ddc4Srjs { &replace4_0.value }, 17496c5ddc4Srjs NULL, 17596c5ddc4Srjs}; 17696c5ddc4Srjs 17796c5ddc4Srjs static const nir_search_variable search5_0_0 = { 17896c5ddc4Srjs { nir_search_value_variable, -1 }, 17996c5ddc4Srjs 0, /* b */ 18096c5ddc4Srjs false, 18196c5ddc4Srjs nir_type_invalid, 18296c5ddc4Srjs NULL, 18396c5ddc4Srjs {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 18496c5ddc4Srjs}; 18596c5ddc4Srjsstatic const nir_search_expression search5_0 = { 18696c5ddc4Srjs { nir_search_value_expression, -1 }, 18796c5ddc4Srjs false, false, 18896c5ddc4Srjs -1, 0, 18996c5ddc4Srjs nir_op_fddy, 19096c5ddc4Srjs { &search5_0_0.value }, 19196c5ddc4Srjs NULL, 19296c5ddc4Srjs}; 19396c5ddc4Srjsstatic const nir_search_expression search5 = { 19496c5ddc4Srjs { nir_search_value_expression, -1 }, 19596c5ddc4Srjs false, false, 19696c5ddc4Srjs -1, 0, 19796c5ddc4Srjs nir_op_fabs, 19896c5ddc4Srjs { &search5_0.value }, 19996c5ddc4Srjs NULL, 20096c5ddc4Srjs}; 20196c5ddc4Srjs 20296c5ddc4Srjs /* replace5_0_0 -> search5_0_0 in the cache */ 20396c5ddc4Srjsstatic const nir_search_expression replace5_0 = { 20496c5ddc4Srjs { nir_search_value_expression, -1 }, 20596c5ddc4Srjs false, false, 20696c5ddc4Srjs -1, 0, 20796c5ddc4Srjs nir_op_fddy_must_abs_mali, 20896c5ddc4Srjs { &search5_0_0.value }, 20996c5ddc4Srjs NULL, 21096c5ddc4Srjs}; 21196c5ddc4Srjsstatic const nir_search_expression replace5 = { 21296c5ddc4Srjs { nir_search_value_expression, -1 }, 21396c5ddc4Srjs false, false, 21496c5ddc4Srjs -1, 0, 21596c5ddc4Srjs nir_op_fabs, 21696c5ddc4Srjs { &replace5_0.value }, 21796c5ddc4Srjs NULL, 21896c5ddc4Srjs}; 21996c5ddc4Srjs 22096c5ddc4Srjs 22196c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state2_xforms[] = { 22296c5ddc4Srjs { &search0, &replace0.value, 0 }, 22396c5ddc4Srjs}; 22496c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state4_xforms[] = { 22596c5ddc4Srjs { &search3, &replace3.value, 0 }, 22696c5ddc4Srjs}; 22796c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state7_xforms[] = { 22896c5ddc4Srjs { &search2, &replace1.value, 0 }, 22996c5ddc4Srjs { &search3, &replace3.value, 0 }, 23096c5ddc4Srjs}; 23196c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state8_xforms[] = { 23296c5ddc4Srjs { &search1, &replace1.value, 0 }, 23396c5ddc4Srjs}; 23496c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state9_xforms[] = { 23596c5ddc4Srjs { &search4, &replace4.value, 0 }, 23696c5ddc4Srjs}; 23796c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state10_xforms[] = { 23896c5ddc4Srjs { &search5, &replace5.value, 0 }, 23996c5ddc4Srjs}; 24096c5ddc4Srjs 24196c5ddc4Srjsstatic const struct per_op_table bifrost_nir_lower_algebraic_late_table[nir_num_search_ops] = { 24296c5ddc4Srjs [nir_op_fmul] = { 24396c5ddc4Srjs .filter = (uint16_t []) { 24496c5ddc4Srjs 0, 24596c5ddc4Srjs 1, 24696c5ddc4Srjs 0, 24796c5ddc4Srjs 0, 24896c5ddc4Srjs 0, 24996c5ddc4Srjs 0, 25096c5ddc4Srjs 0, 25196c5ddc4Srjs 0, 25296c5ddc4Srjs 0, 25396c5ddc4Srjs 0, 25496c5ddc4Srjs 0, 25596c5ddc4Srjs }, 25696c5ddc4Srjs 25796c5ddc4Srjs .num_filtered_states = 2, 25896c5ddc4Srjs .table = (uint16_t []) { 25996c5ddc4Srjs 26096c5ddc4Srjs 0, 26196c5ddc4Srjs 2, 26296c5ddc4Srjs 2, 26396c5ddc4Srjs 2, 26496c5ddc4Srjs }, 26596c5ddc4Srjs }, 26696c5ddc4Srjs [nir_op_fmin] = { 26796c5ddc4Srjs .filter = (uint16_t []) { 26896c5ddc4Srjs 0, 26996c5ddc4Srjs 1, 27096c5ddc4Srjs 0, 27196c5ddc4Srjs 0, 27296c5ddc4Srjs 2, 27396c5ddc4Srjs 0, 27496c5ddc4Srjs 0, 27596c5ddc4Srjs 2, 27696c5ddc4Srjs 0, 27796c5ddc4Srjs 0, 27896c5ddc4Srjs 0, 27996c5ddc4Srjs }, 28096c5ddc4Srjs 28196c5ddc4Srjs .num_filtered_states = 3, 28296c5ddc4Srjs .table = (uint16_t []) { 28396c5ddc4Srjs 28496c5ddc4Srjs 0, 28596c5ddc4Srjs 3, 28696c5ddc4Srjs 0, 28796c5ddc4Srjs 3, 28896c5ddc4Srjs 3, 28996c5ddc4Srjs 8, 29096c5ddc4Srjs 0, 29196c5ddc4Srjs 8, 29296c5ddc4Srjs 0, 29396c5ddc4Srjs }, 29496c5ddc4Srjs }, 29596c5ddc4Srjs [nir_op_fmax] = { 29696c5ddc4Srjs .filter = (uint16_t []) { 29796c5ddc4Srjs 0, 29896c5ddc4Srjs 1, 29996c5ddc4Srjs 0, 30096c5ddc4Srjs 2, 30196c5ddc4Srjs 0, 30296c5ddc4Srjs 0, 30396c5ddc4Srjs 0, 30496c5ddc4Srjs 0, 30596c5ddc4Srjs 2, 30696c5ddc4Srjs 0, 30796c5ddc4Srjs 0, 30896c5ddc4Srjs }, 30996c5ddc4Srjs 31096c5ddc4Srjs .num_filtered_states = 3, 31196c5ddc4Srjs .table = (uint16_t []) { 31296c5ddc4Srjs 31396c5ddc4Srjs 0, 31496c5ddc4Srjs 4, 31596c5ddc4Srjs 0, 31696c5ddc4Srjs 4, 31796c5ddc4Srjs 4, 31896c5ddc4Srjs 7, 31996c5ddc4Srjs 0, 32096c5ddc4Srjs 7, 32196c5ddc4Srjs 0, 32296c5ddc4Srjs }, 32396c5ddc4Srjs }, 32496c5ddc4Srjs [nir_op_fabs] = { 32596c5ddc4Srjs .filter = (uint16_t []) { 32696c5ddc4Srjs 0, 32796c5ddc4Srjs 0, 32896c5ddc4Srjs 0, 32996c5ddc4Srjs 0, 33096c5ddc4Srjs 0, 33196c5ddc4Srjs 1, 33296c5ddc4Srjs 2, 33396c5ddc4Srjs 0, 33496c5ddc4Srjs 0, 33596c5ddc4Srjs 0, 33696c5ddc4Srjs 0, 33796c5ddc4Srjs }, 33896c5ddc4Srjs 33996c5ddc4Srjs .num_filtered_states = 3, 34096c5ddc4Srjs .table = (uint16_t []) { 34196c5ddc4Srjs 34296c5ddc4Srjs 0, 34396c5ddc4Srjs 9, 34496c5ddc4Srjs 10, 34596c5ddc4Srjs }, 34696c5ddc4Srjs }, 34796c5ddc4Srjs [nir_op_fddx] = { 34896c5ddc4Srjs .filter = (uint16_t []) { 34996c5ddc4Srjs 0, 35096c5ddc4Srjs 0, 35196c5ddc4Srjs 0, 35296c5ddc4Srjs 0, 35396c5ddc4Srjs 0, 35496c5ddc4Srjs 0, 35596c5ddc4Srjs 0, 35696c5ddc4Srjs 0, 35796c5ddc4Srjs 0, 35896c5ddc4Srjs 0, 35996c5ddc4Srjs 0, 36096c5ddc4Srjs }, 36196c5ddc4Srjs 36296c5ddc4Srjs .num_filtered_states = 1, 36396c5ddc4Srjs .table = (uint16_t []) { 36496c5ddc4Srjs 36596c5ddc4Srjs 5, 36696c5ddc4Srjs }, 36796c5ddc4Srjs }, 36896c5ddc4Srjs [nir_op_fddy] = { 36996c5ddc4Srjs .filter = (uint16_t []) { 37096c5ddc4Srjs 0, 37196c5ddc4Srjs 0, 37296c5ddc4Srjs 0, 37396c5ddc4Srjs 0, 37496c5ddc4Srjs 0, 37596c5ddc4Srjs 0, 37696c5ddc4Srjs 0, 37796c5ddc4Srjs 0, 37896c5ddc4Srjs 0, 37996c5ddc4Srjs 0, 38096c5ddc4Srjs 0, 38196c5ddc4Srjs }, 38296c5ddc4Srjs 38396c5ddc4Srjs .num_filtered_states = 1, 38496c5ddc4Srjs .table = (uint16_t []) { 38596c5ddc4Srjs 38696c5ddc4Srjs 6, 38796c5ddc4Srjs }, 38896c5ddc4Srjs }, 38996c5ddc4Srjs}; 39096c5ddc4Srjs 39196c5ddc4Srjsconst struct transform *bifrost_nir_lower_algebraic_late_transforms[] = { 39296c5ddc4Srjs NULL, 39396c5ddc4Srjs NULL, 39496c5ddc4Srjs bifrost_nir_lower_algebraic_late_state2_xforms, 39596c5ddc4Srjs NULL, 39696c5ddc4Srjs bifrost_nir_lower_algebraic_late_state4_xforms, 39796c5ddc4Srjs NULL, 39896c5ddc4Srjs NULL, 39996c5ddc4Srjs bifrost_nir_lower_algebraic_late_state7_xforms, 40096c5ddc4Srjs bifrost_nir_lower_algebraic_late_state8_xforms, 40196c5ddc4Srjs bifrost_nir_lower_algebraic_late_state9_xforms, 40296c5ddc4Srjs bifrost_nir_lower_algebraic_late_state10_xforms, 40396c5ddc4Srjs}; 40496c5ddc4Srjs 40596c5ddc4Srjsconst uint16_t bifrost_nir_lower_algebraic_late_transform_counts[] = { 40696c5ddc4Srjs 0, 40796c5ddc4Srjs 0, 40896c5ddc4Srjs (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state2_xforms), 40996c5ddc4Srjs 0, 41096c5ddc4Srjs (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state4_xforms), 41196c5ddc4Srjs 0, 41296c5ddc4Srjs 0, 41396c5ddc4Srjs (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state7_xforms), 41496c5ddc4Srjs (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state8_xforms), 41596c5ddc4Srjs (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state9_xforms), 41696c5ddc4Srjs (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state10_xforms), 41796c5ddc4Srjs}; 41896c5ddc4Srjs 41996c5ddc4Srjsbool 42096c5ddc4Srjsbifrost_nir_lower_algebraic_late(nir_shader *shader) 42196c5ddc4Srjs{ 42296c5ddc4Srjs bool progress = false; 42396c5ddc4Srjs bool condition_flags[1]; 42496c5ddc4Srjs const nir_shader_compiler_options *options = shader->options; 42596c5ddc4Srjs const shader_info *info = &shader->info; 42696c5ddc4Srjs (void) options; 42796c5ddc4Srjs (void) info; 42896c5ddc4Srjs 42996c5ddc4Srjs condition_flags[0] = true; 43096c5ddc4Srjs 43196c5ddc4Srjs nir_foreach_function(function, shader) { 43296c5ddc4Srjs if (function->impl) { 43396c5ddc4Srjs progress |= nir_algebraic_impl(function->impl, condition_flags, 43496c5ddc4Srjs bifrost_nir_lower_algebraic_late_transforms, 43596c5ddc4Srjs bifrost_nir_lower_algebraic_late_transform_counts, 43696c5ddc4Srjs bifrost_nir_lower_algebraic_late_table); 43796c5ddc4Srjs } 43896c5ddc4Srjs } 43996c5ddc4Srjs 44096c5ddc4Srjs return progress; 44196c5ddc4Srjs} 44296c5ddc4Srjs 443