196c5ddc4Srjs#include "bifrost_nir.h"
296c5ddc4Srjs
396c5ddc4Srjs#include "nir.h"
496c5ddc4Srjs#include "nir_builder.h"
596c5ddc4Srjs#include "nir_search.h"
696c5ddc4Srjs#include "nir_search_helpers.h"
796c5ddc4Srjs
896c5ddc4Srjs/* What follows is NIR algebraic transform code for the following 6
996c5ddc4Srjs * transforms:
1096c5ddc4Srjs *    ('fmul', 'a', 2.0) => ('fadd', 'a', 'a')
1196c5ddc4Srjs *    ('fmin', ('fmax', 'a', -1.0), 1.0) => ('fsat_signed_mali', 'a')
1296c5ddc4Srjs *    ('fmax', ('fmin', 'a', 1.0), -1.0) => ('fsat_signed_mali', 'a')
1396c5ddc4Srjs *    ('fmax', 'a', 0.0) => ('fclamp_pos_mali', 'a')
1496c5ddc4Srjs *    ('fabs', ('fddx', 'a')) => ('fabs', ('fddx_must_abs_mali', 'a'))
1596c5ddc4Srjs *    ('fabs', ('fddy', 'b')) => ('fabs', ('fddy_must_abs_mali', 'b'))
1696c5ddc4Srjs */
1796c5ddc4Srjs
1896c5ddc4Srjs
1996c5ddc4Srjs   static const nir_search_variable search0_0 = {
2096c5ddc4Srjs   { nir_search_value_variable, -1 },
2196c5ddc4Srjs   0, /* a */
2296c5ddc4Srjs   false,
2396c5ddc4Srjs   nir_type_invalid,
2496c5ddc4Srjs   NULL,
2596c5ddc4Srjs   {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
2696c5ddc4Srjs};
2796c5ddc4Srjs
2896c5ddc4Srjsstatic const nir_search_constant search0_1 = {
2996c5ddc4Srjs   { nir_search_value_constant, -1 },
3096c5ddc4Srjs   nir_type_float, { 0x4000000000000000 /* 2.0 */ },
3196c5ddc4Srjs};
3296c5ddc4Srjsstatic const nir_search_expression search0 = {
3396c5ddc4Srjs   { nir_search_value_expression, -1 },
3496c5ddc4Srjs   false, false,
3596c5ddc4Srjs   0, 1,
3696c5ddc4Srjs   nir_op_fmul,
3796c5ddc4Srjs   { &search0_0.value, &search0_1.value },
3896c5ddc4Srjs   NULL,
3996c5ddc4Srjs};
4096c5ddc4Srjs
4196c5ddc4Srjs   /* replace0_0 -> search0_0 in the cache */
4296c5ddc4Srjs
4396c5ddc4Srjs/* replace0_1 -> search0_0 in the cache */
4496c5ddc4Srjsstatic const nir_search_expression replace0 = {
4596c5ddc4Srjs   { nir_search_value_expression, -1 },
4696c5ddc4Srjs   false, false,
4796c5ddc4Srjs   -1, 0,
4896c5ddc4Srjs   nir_op_fadd,
4996c5ddc4Srjs   { &search0_0.value, &search0_0.value },
5096c5ddc4Srjs   NULL,
5196c5ddc4Srjs};
5296c5ddc4Srjs
5396c5ddc4Srjs   /* search1_0_0 -> search0_0 in the cache */
5496c5ddc4Srjs
5596c5ddc4Srjsstatic const nir_search_constant search1_0_1 = {
5696c5ddc4Srjs   { nir_search_value_constant, -1 },
5796c5ddc4Srjs   nir_type_float, { 0xbff0000000000000 /* -1.0 */ },
5896c5ddc4Srjs};
5996c5ddc4Srjsstatic const nir_search_expression search1_0 = {
6096c5ddc4Srjs   { nir_search_value_expression, -1 },
6196c5ddc4Srjs   false, false,
6296c5ddc4Srjs   1, 1,
6396c5ddc4Srjs   nir_op_fmax,
6496c5ddc4Srjs   { &search0_0.value, &search1_0_1.value },
6596c5ddc4Srjs   NULL,
6696c5ddc4Srjs};
6796c5ddc4Srjs
6896c5ddc4Srjsstatic const nir_search_constant search1_1 = {
6996c5ddc4Srjs   { nir_search_value_constant, -1 },
7096c5ddc4Srjs   nir_type_float, { 0x3ff0000000000000 /* 1.0 */ },
7196c5ddc4Srjs};
7296c5ddc4Srjsstatic const nir_search_expression search1 = {
7396c5ddc4Srjs   { nir_search_value_expression, -1 },
7496c5ddc4Srjs   false, false,
7596c5ddc4Srjs   0, 2,
7696c5ddc4Srjs   nir_op_fmin,
7796c5ddc4Srjs   { &search1_0.value, &search1_1.value },
7896c5ddc4Srjs   NULL,
7996c5ddc4Srjs};
8096c5ddc4Srjs
8196c5ddc4Srjs   /* replace1_0 -> search0_0 in the cache */
8296c5ddc4Srjsstatic const nir_search_expression replace1 = {
8396c5ddc4Srjs   { nir_search_value_expression, -1 },
8496c5ddc4Srjs   false, false,
8596c5ddc4Srjs   -1, 0,
8696c5ddc4Srjs   nir_op_fsat_signed_mali,
8796c5ddc4Srjs   { &search0_0.value },
8896c5ddc4Srjs   NULL,
8996c5ddc4Srjs};
9096c5ddc4Srjs
9196c5ddc4Srjs   /* search2_0_0 -> search0_0 in the cache */
9296c5ddc4Srjs
9396c5ddc4Srjs/* search2_0_1 -> search1_1 in the cache */
9496c5ddc4Srjsstatic const nir_search_expression search2_0 = {
9596c5ddc4Srjs   { nir_search_value_expression, -1 },
9696c5ddc4Srjs   false, false,
9796c5ddc4Srjs   1, 1,
9896c5ddc4Srjs   nir_op_fmin,
9996c5ddc4Srjs   { &search0_0.value, &search1_1.value },
10096c5ddc4Srjs   NULL,
10196c5ddc4Srjs};
10296c5ddc4Srjs
10396c5ddc4Srjs/* search2_1 -> search1_0_1 in the cache */
10496c5ddc4Srjsstatic const nir_search_expression search2 = {
10596c5ddc4Srjs   { nir_search_value_expression, -1 },
10696c5ddc4Srjs   false, false,
10796c5ddc4Srjs   0, 2,
10896c5ddc4Srjs   nir_op_fmax,
10996c5ddc4Srjs   { &search2_0.value, &search1_0_1.value },
11096c5ddc4Srjs   NULL,
11196c5ddc4Srjs};
11296c5ddc4Srjs
11396c5ddc4Srjs   /* replace2_0 -> search0_0 in the cache */
11496c5ddc4Srjs/* replace2 -> replace1 in the cache */
11596c5ddc4Srjs
11696c5ddc4Srjs   /* search3_0 -> search0_0 in the cache */
11796c5ddc4Srjs
11896c5ddc4Srjsstatic const nir_search_constant search3_1 = {
11996c5ddc4Srjs   { nir_search_value_constant, -1 },
12096c5ddc4Srjs   nir_type_float, { 0x0 /* 0.0 */ },
12196c5ddc4Srjs};
12296c5ddc4Srjsstatic const nir_search_expression search3 = {
12396c5ddc4Srjs   { nir_search_value_expression, -1 },
12496c5ddc4Srjs   false, false,
12596c5ddc4Srjs   0, 1,
12696c5ddc4Srjs   nir_op_fmax,
12796c5ddc4Srjs   { &search0_0.value, &search3_1.value },
12896c5ddc4Srjs   NULL,
12996c5ddc4Srjs};
13096c5ddc4Srjs
13196c5ddc4Srjs   /* replace3_0 -> search0_0 in the cache */
13296c5ddc4Srjsstatic const nir_search_expression replace3 = {
13396c5ddc4Srjs   { nir_search_value_expression, -1 },
13496c5ddc4Srjs   false, false,
13596c5ddc4Srjs   -1, 0,
13696c5ddc4Srjs   nir_op_fclamp_pos_mali,
13796c5ddc4Srjs   { &search0_0.value },
13896c5ddc4Srjs   NULL,
13996c5ddc4Srjs};
14096c5ddc4Srjs
14196c5ddc4Srjs   /* search4_0_0 -> search0_0 in the cache */
14296c5ddc4Srjsstatic const nir_search_expression search4_0 = {
14396c5ddc4Srjs   { nir_search_value_expression, -1 },
14496c5ddc4Srjs   false, false,
14596c5ddc4Srjs   -1, 0,
14696c5ddc4Srjs   nir_op_fddx,
14796c5ddc4Srjs   { &search0_0.value },
14896c5ddc4Srjs   NULL,
14996c5ddc4Srjs};
15096c5ddc4Srjsstatic const nir_search_expression search4 = {
15196c5ddc4Srjs   { nir_search_value_expression, -1 },
15296c5ddc4Srjs   false, false,
15396c5ddc4Srjs   -1, 0,
15496c5ddc4Srjs   nir_op_fabs,
15596c5ddc4Srjs   { &search4_0.value },
15696c5ddc4Srjs   NULL,
15796c5ddc4Srjs};
15896c5ddc4Srjs
15996c5ddc4Srjs   /* replace4_0_0 -> search0_0 in the cache */
16096c5ddc4Srjsstatic const nir_search_expression replace4_0 = {
16196c5ddc4Srjs   { nir_search_value_expression, -1 },
16296c5ddc4Srjs   false, false,
16396c5ddc4Srjs   -1, 0,
16496c5ddc4Srjs   nir_op_fddx_must_abs_mali,
16596c5ddc4Srjs   { &search0_0.value },
16696c5ddc4Srjs   NULL,
16796c5ddc4Srjs};
16896c5ddc4Srjsstatic const nir_search_expression replace4 = {
16996c5ddc4Srjs   { nir_search_value_expression, -1 },
17096c5ddc4Srjs   false, false,
17196c5ddc4Srjs   -1, 0,
17296c5ddc4Srjs   nir_op_fabs,
17396c5ddc4Srjs   { &replace4_0.value },
17496c5ddc4Srjs   NULL,
17596c5ddc4Srjs};
17696c5ddc4Srjs
17796c5ddc4Srjs   static const nir_search_variable search5_0_0 = {
17896c5ddc4Srjs   { nir_search_value_variable, -1 },
17996c5ddc4Srjs   0, /* b */
18096c5ddc4Srjs   false,
18196c5ddc4Srjs   nir_type_invalid,
18296c5ddc4Srjs   NULL,
18396c5ddc4Srjs   {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
18496c5ddc4Srjs};
18596c5ddc4Srjsstatic const nir_search_expression search5_0 = {
18696c5ddc4Srjs   { nir_search_value_expression, -1 },
18796c5ddc4Srjs   false, false,
18896c5ddc4Srjs   -1, 0,
18996c5ddc4Srjs   nir_op_fddy,
19096c5ddc4Srjs   { &search5_0_0.value },
19196c5ddc4Srjs   NULL,
19296c5ddc4Srjs};
19396c5ddc4Srjsstatic const nir_search_expression search5 = {
19496c5ddc4Srjs   { nir_search_value_expression, -1 },
19596c5ddc4Srjs   false, false,
19696c5ddc4Srjs   -1, 0,
19796c5ddc4Srjs   nir_op_fabs,
19896c5ddc4Srjs   { &search5_0.value },
19996c5ddc4Srjs   NULL,
20096c5ddc4Srjs};
20196c5ddc4Srjs
20296c5ddc4Srjs   /* replace5_0_0 -> search5_0_0 in the cache */
20396c5ddc4Srjsstatic const nir_search_expression replace5_0 = {
20496c5ddc4Srjs   { nir_search_value_expression, -1 },
20596c5ddc4Srjs   false, false,
20696c5ddc4Srjs   -1, 0,
20796c5ddc4Srjs   nir_op_fddy_must_abs_mali,
20896c5ddc4Srjs   { &search5_0_0.value },
20996c5ddc4Srjs   NULL,
21096c5ddc4Srjs};
21196c5ddc4Srjsstatic const nir_search_expression replace5 = {
21296c5ddc4Srjs   { nir_search_value_expression, -1 },
21396c5ddc4Srjs   false, false,
21496c5ddc4Srjs   -1, 0,
21596c5ddc4Srjs   nir_op_fabs,
21696c5ddc4Srjs   { &replace5_0.value },
21796c5ddc4Srjs   NULL,
21896c5ddc4Srjs};
21996c5ddc4Srjs
22096c5ddc4Srjs
22196c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state2_xforms[] = {
22296c5ddc4Srjs  { &search0, &replace0.value, 0 },
22396c5ddc4Srjs};
22496c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state4_xforms[] = {
22596c5ddc4Srjs  { &search3, &replace3.value, 0 },
22696c5ddc4Srjs};
22796c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state7_xforms[] = {
22896c5ddc4Srjs  { &search2, &replace1.value, 0 },
22996c5ddc4Srjs  { &search3, &replace3.value, 0 },
23096c5ddc4Srjs};
23196c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state8_xforms[] = {
23296c5ddc4Srjs  { &search1, &replace1.value, 0 },
23396c5ddc4Srjs};
23496c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state9_xforms[] = {
23596c5ddc4Srjs  { &search4, &replace4.value, 0 },
23696c5ddc4Srjs};
23796c5ddc4Srjsstatic const struct transform bifrost_nir_lower_algebraic_late_state10_xforms[] = {
23896c5ddc4Srjs  { &search5, &replace5.value, 0 },
23996c5ddc4Srjs};
24096c5ddc4Srjs
24196c5ddc4Srjsstatic const struct per_op_table bifrost_nir_lower_algebraic_late_table[nir_num_search_ops] = {
24296c5ddc4Srjs   [nir_op_fmul] = {
24396c5ddc4Srjs      .filter = (uint16_t []) {
24496c5ddc4Srjs         0,
24596c5ddc4Srjs         1,
24696c5ddc4Srjs         0,
24796c5ddc4Srjs         0,
24896c5ddc4Srjs         0,
24996c5ddc4Srjs         0,
25096c5ddc4Srjs         0,
25196c5ddc4Srjs         0,
25296c5ddc4Srjs         0,
25396c5ddc4Srjs         0,
25496c5ddc4Srjs         0,
25596c5ddc4Srjs      },
25696c5ddc4Srjs
25796c5ddc4Srjs      .num_filtered_states = 2,
25896c5ddc4Srjs      .table = (uint16_t []) {
25996c5ddc4Srjs
26096c5ddc4Srjs         0,
26196c5ddc4Srjs         2,
26296c5ddc4Srjs         2,
26396c5ddc4Srjs         2,
26496c5ddc4Srjs      },
26596c5ddc4Srjs   },
26696c5ddc4Srjs   [nir_op_fmin] = {
26796c5ddc4Srjs      .filter = (uint16_t []) {
26896c5ddc4Srjs         0,
26996c5ddc4Srjs         1,
27096c5ddc4Srjs         0,
27196c5ddc4Srjs         0,
27296c5ddc4Srjs         2,
27396c5ddc4Srjs         0,
27496c5ddc4Srjs         0,
27596c5ddc4Srjs         2,
27696c5ddc4Srjs         0,
27796c5ddc4Srjs         0,
27896c5ddc4Srjs         0,
27996c5ddc4Srjs      },
28096c5ddc4Srjs
28196c5ddc4Srjs      .num_filtered_states = 3,
28296c5ddc4Srjs      .table = (uint16_t []) {
28396c5ddc4Srjs
28496c5ddc4Srjs         0,
28596c5ddc4Srjs         3,
28696c5ddc4Srjs         0,
28796c5ddc4Srjs         3,
28896c5ddc4Srjs         3,
28996c5ddc4Srjs         8,
29096c5ddc4Srjs         0,
29196c5ddc4Srjs         8,
29296c5ddc4Srjs         0,
29396c5ddc4Srjs      },
29496c5ddc4Srjs   },
29596c5ddc4Srjs   [nir_op_fmax] = {
29696c5ddc4Srjs      .filter = (uint16_t []) {
29796c5ddc4Srjs         0,
29896c5ddc4Srjs         1,
29996c5ddc4Srjs         0,
30096c5ddc4Srjs         2,
30196c5ddc4Srjs         0,
30296c5ddc4Srjs         0,
30396c5ddc4Srjs         0,
30496c5ddc4Srjs         0,
30596c5ddc4Srjs         2,
30696c5ddc4Srjs         0,
30796c5ddc4Srjs         0,
30896c5ddc4Srjs      },
30996c5ddc4Srjs
31096c5ddc4Srjs      .num_filtered_states = 3,
31196c5ddc4Srjs      .table = (uint16_t []) {
31296c5ddc4Srjs
31396c5ddc4Srjs         0,
31496c5ddc4Srjs         4,
31596c5ddc4Srjs         0,
31696c5ddc4Srjs         4,
31796c5ddc4Srjs         4,
31896c5ddc4Srjs         7,
31996c5ddc4Srjs         0,
32096c5ddc4Srjs         7,
32196c5ddc4Srjs         0,
32296c5ddc4Srjs      },
32396c5ddc4Srjs   },
32496c5ddc4Srjs   [nir_op_fabs] = {
32596c5ddc4Srjs      .filter = (uint16_t []) {
32696c5ddc4Srjs         0,
32796c5ddc4Srjs         0,
32896c5ddc4Srjs         0,
32996c5ddc4Srjs         0,
33096c5ddc4Srjs         0,
33196c5ddc4Srjs         1,
33296c5ddc4Srjs         2,
33396c5ddc4Srjs         0,
33496c5ddc4Srjs         0,
33596c5ddc4Srjs         0,
33696c5ddc4Srjs         0,
33796c5ddc4Srjs      },
33896c5ddc4Srjs
33996c5ddc4Srjs      .num_filtered_states = 3,
34096c5ddc4Srjs      .table = (uint16_t []) {
34196c5ddc4Srjs
34296c5ddc4Srjs         0,
34396c5ddc4Srjs         9,
34496c5ddc4Srjs         10,
34596c5ddc4Srjs      },
34696c5ddc4Srjs   },
34796c5ddc4Srjs   [nir_op_fddx] = {
34896c5ddc4Srjs      .filter = (uint16_t []) {
34996c5ddc4Srjs         0,
35096c5ddc4Srjs         0,
35196c5ddc4Srjs         0,
35296c5ddc4Srjs         0,
35396c5ddc4Srjs         0,
35496c5ddc4Srjs         0,
35596c5ddc4Srjs         0,
35696c5ddc4Srjs         0,
35796c5ddc4Srjs         0,
35896c5ddc4Srjs         0,
35996c5ddc4Srjs         0,
36096c5ddc4Srjs      },
36196c5ddc4Srjs
36296c5ddc4Srjs      .num_filtered_states = 1,
36396c5ddc4Srjs      .table = (uint16_t []) {
36496c5ddc4Srjs
36596c5ddc4Srjs         5,
36696c5ddc4Srjs      },
36796c5ddc4Srjs   },
36896c5ddc4Srjs   [nir_op_fddy] = {
36996c5ddc4Srjs      .filter = (uint16_t []) {
37096c5ddc4Srjs         0,
37196c5ddc4Srjs         0,
37296c5ddc4Srjs         0,
37396c5ddc4Srjs         0,
37496c5ddc4Srjs         0,
37596c5ddc4Srjs         0,
37696c5ddc4Srjs         0,
37796c5ddc4Srjs         0,
37896c5ddc4Srjs         0,
37996c5ddc4Srjs         0,
38096c5ddc4Srjs         0,
38196c5ddc4Srjs      },
38296c5ddc4Srjs
38396c5ddc4Srjs      .num_filtered_states = 1,
38496c5ddc4Srjs      .table = (uint16_t []) {
38596c5ddc4Srjs
38696c5ddc4Srjs         6,
38796c5ddc4Srjs      },
38896c5ddc4Srjs   },
38996c5ddc4Srjs};
39096c5ddc4Srjs
39196c5ddc4Srjsconst struct transform *bifrost_nir_lower_algebraic_late_transforms[] = {
39296c5ddc4Srjs   NULL,
39396c5ddc4Srjs   NULL,
39496c5ddc4Srjs   bifrost_nir_lower_algebraic_late_state2_xforms,
39596c5ddc4Srjs   NULL,
39696c5ddc4Srjs   bifrost_nir_lower_algebraic_late_state4_xforms,
39796c5ddc4Srjs   NULL,
39896c5ddc4Srjs   NULL,
39996c5ddc4Srjs   bifrost_nir_lower_algebraic_late_state7_xforms,
40096c5ddc4Srjs   bifrost_nir_lower_algebraic_late_state8_xforms,
40196c5ddc4Srjs   bifrost_nir_lower_algebraic_late_state9_xforms,
40296c5ddc4Srjs   bifrost_nir_lower_algebraic_late_state10_xforms,
40396c5ddc4Srjs};
40496c5ddc4Srjs
40596c5ddc4Srjsconst uint16_t bifrost_nir_lower_algebraic_late_transform_counts[] = {
40696c5ddc4Srjs   0,
40796c5ddc4Srjs   0,
40896c5ddc4Srjs   (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state2_xforms),
40996c5ddc4Srjs   0,
41096c5ddc4Srjs   (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state4_xforms),
41196c5ddc4Srjs   0,
41296c5ddc4Srjs   0,
41396c5ddc4Srjs   (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state7_xforms),
41496c5ddc4Srjs   (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state8_xforms),
41596c5ddc4Srjs   (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state9_xforms),
41696c5ddc4Srjs   (uint16_t)ARRAY_SIZE(bifrost_nir_lower_algebraic_late_state10_xforms),
41796c5ddc4Srjs};
41896c5ddc4Srjs
41996c5ddc4Srjsbool
42096c5ddc4Srjsbifrost_nir_lower_algebraic_late(nir_shader *shader)
42196c5ddc4Srjs{
42296c5ddc4Srjs   bool progress = false;
42396c5ddc4Srjs   bool condition_flags[1];
42496c5ddc4Srjs   const nir_shader_compiler_options *options = shader->options;
42596c5ddc4Srjs   const shader_info *info = &shader->info;
42696c5ddc4Srjs   (void) options;
42796c5ddc4Srjs   (void) info;
42896c5ddc4Srjs
42996c5ddc4Srjs   condition_flags[0] = true;
43096c5ddc4Srjs
43196c5ddc4Srjs   nir_foreach_function(function, shader) {
43296c5ddc4Srjs      if (function->impl) {
43396c5ddc4Srjs         progress |= nir_algebraic_impl(function->impl, condition_flags,
43496c5ddc4Srjs                                        bifrost_nir_lower_algebraic_late_transforms,
43596c5ddc4Srjs                                        bifrost_nir_lower_algebraic_late_transform_counts,
43696c5ddc4Srjs                                        bifrost_nir_lower_algebraic_late_table);
43796c5ddc4Srjs      }
43896c5ddc4Srjs   }
43996c5ddc4Srjs
44096c5ddc4Srjs   return progress;
44196c5ddc4Srjs}
44296c5ddc4Srjs
443