17ec681f3Smrg/* 27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd. 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217ec681f3Smrg * SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg#include "util/u_math.h" 267ec681f3Smrg#include "pan_encoder.h" 277ec681f3Smrg 287ec681f3Smrg/* This file handles attribute descriptors. The 297ec681f3Smrg * bulk of the complexity is from instancing. See mali_job for 307ec681f3Smrg * notes on how this works. But basically, for small vertex 317ec681f3Smrg * counts, we have a lookup table, and for large vertex counts, 327ec681f3Smrg * we look at the high bits as a heuristic. This has to match 337ec681f3Smrg * exactly how the hardware calculates this (which is why the 347ec681f3Smrg * algorithm is so weird) or else instancing will break. */ 357ec681f3Smrg 367ec681f3Smrg/* Given an odd number (of the form 2k + 1), compute k */ 377ec681f3Smrg#define ODD(odd) ((odd - 1) >> 1) 387ec681f3Smrg 397ec681f3Smrgstatic unsigned 407ec681f3Smrgpanfrost_small_padded_vertex_count(unsigned idx) 417ec681f3Smrg{ 427ec681f3Smrg if (idx < 10) 437ec681f3Smrg return idx; 447ec681f3Smrg else 457ec681f3Smrg return (idx + 1) & ~1; 467ec681f3Smrg} 477ec681f3Smrg 487ec681f3Smrgstatic unsigned 497ec681f3Smrgpanfrost_large_padded_vertex_count(uint32_t vertex_count) 507ec681f3Smrg{ 517ec681f3Smrg /* First, we have to find the highest set one */ 527ec681f3Smrg unsigned highest = 32 - __builtin_clz(vertex_count); 537ec681f3Smrg 547ec681f3Smrg /* Using that, we mask out the highest 4-bits */ 557ec681f3Smrg unsigned n = highest - 4; 567ec681f3Smrg unsigned nibble = (vertex_count >> n) & 0xF; 577ec681f3Smrg 587ec681f3Smrg /* Great, we have the nibble. Now we can just try possibilities. Note 597ec681f3Smrg * that we don't care about the bottom most bit in most cases, and we 607ec681f3Smrg * know the top bit must be 1 */ 617ec681f3Smrg 627ec681f3Smrg unsigned middle_two = (nibble >> 1) & 0x3; 637ec681f3Smrg 647ec681f3Smrg switch (middle_two) { 657ec681f3Smrg case 0b00: 667ec681f3Smrg if (!(nibble & 1)) 677ec681f3Smrg return (1 << n) * 9; 687ec681f3Smrg else 697ec681f3Smrg return (1 << (n + 1)) * 5; 707ec681f3Smrg case 0b01: 717ec681f3Smrg return (1 << (n + 2)) * 3; 727ec681f3Smrg case 0b10: 737ec681f3Smrg return (1 << (n + 1)) * 7; 747ec681f3Smrg case 0b11: 757ec681f3Smrg return (1 << (n + 4)); 767ec681f3Smrg default: 777ec681f3Smrg return 0; /* unreachable */ 787ec681f3Smrg } 797ec681f3Smrg} 807ec681f3Smrg 817ec681f3Smrgunsigned 827ec681f3Smrgpanfrost_padded_vertex_count(unsigned vertex_count) 837ec681f3Smrg{ 847ec681f3Smrg if (vertex_count < 20) 857ec681f3Smrg return panfrost_small_padded_vertex_count(vertex_count); 867ec681f3Smrg else 877ec681f3Smrg return panfrost_large_padded_vertex_count(vertex_count); 887ec681f3Smrg} 897ec681f3Smrg 907ec681f3Smrg/* The much, much more irritating case -- instancing is enabled. See 917ec681f3Smrg * panfrost_job.h for notes on how this works */ 927ec681f3Smrg 937ec681f3Smrgunsigned 947ec681f3Smrgpanfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags) 957ec681f3Smrg{ 967ec681f3Smrg /* We have a NPOT divisor. Here's the fun one (multipling by 977ec681f3Smrg * the inverse and shifting) */ 987ec681f3Smrg 997ec681f3Smrg /* floor(log2(d)) */ 1007ec681f3Smrg unsigned shift = util_logbase2(hw_divisor); 1017ec681f3Smrg 1027ec681f3Smrg /* m = ceil(2^(32 + shift) / d) */ 1037ec681f3Smrg uint64_t shift_hi = 32 + shift; 1047ec681f3Smrg uint64_t t = 1ll << shift_hi; 1057ec681f3Smrg double t_f = t; 1067ec681f3Smrg double hw_divisor_d = hw_divisor; 1077ec681f3Smrg double m_f = ceil(t_f / hw_divisor_d); 1087ec681f3Smrg unsigned m = m_f; 1097ec681f3Smrg 1107ec681f3Smrg /* Default case */ 1117ec681f3Smrg uint32_t magic_divisor = m; 1127ec681f3Smrg 1137ec681f3Smrg /* e = 2^(shift + 32) % d */ 1147ec681f3Smrg uint64_t e = t % hw_divisor; 1157ec681f3Smrg 1167ec681f3Smrg /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob 1177ec681f3Smrg * seems to use a different condition */ 1187ec681f3Smrg if (e <= (1ll << shift)) { 1197ec681f3Smrg magic_divisor = m - 1; 1207ec681f3Smrg *extra_flags = 1; 1217ec681f3Smrg } 1227ec681f3Smrg 1237ec681f3Smrg /* Top flag implicitly set */ 1247ec681f3Smrg assert(magic_divisor & (1u << 31)); 1257ec681f3Smrg magic_divisor &= ~(1u << 31); 1267ec681f3Smrg *o_shift = shift; 1277ec681f3Smrg 1287ec681f3Smrg return magic_divisor; 1297ec681f3Smrg} 130