17ec681f3Smrg/*
27ec681f3Smrg * Copyright (C) 2019 Collabora, Ltd.
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg#include "util/u_math.h"
267ec681f3Smrg#include "pan_encoder.h"
277ec681f3Smrg
287ec681f3Smrg/* This file handles attribute descriptors. The
297ec681f3Smrg * bulk of the complexity is from instancing. See mali_job for
307ec681f3Smrg * notes on how this works. But basically, for small vertex
317ec681f3Smrg * counts, we have a lookup table, and for large vertex counts,
327ec681f3Smrg * we look at the high bits as a heuristic. This has to match
337ec681f3Smrg * exactly how the hardware calculates this (which is why the
347ec681f3Smrg * algorithm is so weird) or else instancing will break. */
357ec681f3Smrg
367ec681f3Smrg/* Given an odd number (of the form 2k + 1), compute k */
377ec681f3Smrg#define ODD(odd) ((odd - 1) >> 1)
387ec681f3Smrg
397ec681f3Smrgstatic unsigned
407ec681f3Smrgpanfrost_small_padded_vertex_count(unsigned idx)
417ec681f3Smrg{
427ec681f3Smrg        if (idx < 10)
437ec681f3Smrg                return idx;
447ec681f3Smrg        else
457ec681f3Smrg                return (idx + 1) & ~1;
467ec681f3Smrg}
477ec681f3Smrg
487ec681f3Smrgstatic unsigned
497ec681f3Smrgpanfrost_large_padded_vertex_count(uint32_t vertex_count)
507ec681f3Smrg{
517ec681f3Smrg        /* First, we have to find the highest set one */
527ec681f3Smrg        unsigned highest = 32 - __builtin_clz(vertex_count);
537ec681f3Smrg
547ec681f3Smrg        /* Using that, we mask out the highest 4-bits */
557ec681f3Smrg        unsigned n = highest - 4;
567ec681f3Smrg        unsigned nibble = (vertex_count >> n) & 0xF;
577ec681f3Smrg
587ec681f3Smrg        /* Great, we have the nibble. Now we can just try possibilities. Note
597ec681f3Smrg         * that we don't care about the bottom most bit in most cases, and we
607ec681f3Smrg         * know the top bit must be 1 */
617ec681f3Smrg
627ec681f3Smrg        unsigned middle_two = (nibble >> 1) & 0x3;
637ec681f3Smrg
647ec681f3Smrg        switch (middle_two) {
657ec681f3Smrg        case 0b00:
667ec681f3Smrg                if (!(nibble & 1))
677ec681f3Smrg                        return (1 << n) * 9;
687ec681f3Smrg                else
697ec681f3Smrg                        return (1 << (n + 1)) * 5;
707ec681f3Smrg        case 0b01:
717ec681f3Smrg                return (1 << (n + 2)) * 3;
727ec681f3Smrg        case 0b10:
737ec681f3Smrg                return (1 << (n + 1)) * 7;
747ec681f3Smrg        case 0b11:
757ec681f3Smrg                return (1 << (n + 4));
767ec681f3Smrg        default:
777ec681f3Smrg                return 0; /* unreachable */
787ec681f3Smrg        }
797ec681f3Smrg}
807ec681f3Smrg
817ec681f3Smrgunsigned
827ec681f3Smrgpanfrost_padded_vertex_count(unsigned vertex_count)
837ec681f3Smrg{
847ec681f3Smrg        if (vertex_count < 20)
857ec681f3Smrg                return panfrost_small_padded_vertex_count(vertex_count);
867ec681f3Smrg        else
877ec681f3Smrg                return panfrost_large_padded_vertex_count(vertex_count);
887ec681f3Smrg}
897ec681f3Smrg
907ec681f3Smrg/* The much, much more irritating case -- instancing is enabled. See
917ec681f3Smrg * panfrost_job.h for notes on how this works */
927ec681f3Smrg
937ec681f3Smrgunsigned
947ec681f3Smrgpanfrost_compute_magic_divisor(unsigned hw_divisor, unsigned *o_shift, unsigned *extra_flags)
957ec681f3Smrg{
967ec681f3Smrg        /* We have a NPOT divisor. Here's the fun one (multipling by
977ec681f3Smrg         * the inverse and shifting) */
987ec681f3Smrg
997ec681f3Smrg        /* floor(log2(d)) */
1007ec681f3Smrg        unsigned shift = util_logbase2(hw_divisor);
1017ec681f3Smrg
1027ec681f3Smrg        /* m = ceil(2^(32 + shift) / d) */
1037ec681f3Smrg        uint64_t shift_hi = 32 + shift;
1047ec681f3Smrg        uint64_t t = 1ll << shift_hi;
1057ec681f3Smrg        double t_f = t;
1067ec681f3Smrg        double hw_divisor_d = hw_divisor;
1077ec681f3Smrg        double m_f = ceil(t_f / hw_divisor_d);
1087ec681f3Smrg        unsigned m = m_f;
1097ec681f3Smrg
1107ec681f3Smrg        /* Default case */
1117ec681f3Smrg        uint32_t magic_divisor = m;
1127ec681f3Smrg
1137ec681f3Smrg        /* e = 2^(shift + 32) % d */
1147ec681f3Smrg        uint64_t e = t % hw_divisor;
1157ec681f3Smrg
1167ec681f3Smrg        /* Apply round-down algorithm? e <= 2^shift?. XXX: The blob
1177ec681f3Smrg         * seems to use a different condition */
1187ec681f3Smrg        if (e <= (1ll << shift)) {
1197ec681f3Smrg                magic_divisor = m - 1;
1207ec681f3Smrg                *extra_flags = 1;
1217ec681f3Smrg        }
1227ec681f3Smrg
1237ec681f3Smrg        /* Top flag implicitly set */
1247ec681f3Smrg        assert(magic_divisor & (1u << 31));
1257ec681f3Smrg        magic_divisor &= ~(1u << 31);
1267ec681f3Smrg        *o_shift = shift;
1277ec681f3Smrg
1287ec681f3Smrg        return magic_divisor;
1297ec681f3Smrg}
130