101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2018 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#ifndef UTIL_BIGMATH_H
2501e04c3fSmrg#define UTIL_BIGMATH_H
2601e04c3fSmrg
2701e04c3fSmrg#include "macros.h"
2801e04c3fSmrg
2901e04c3fSmrg#include <assert.h>
3001e04c3fSmrg#include <stdint.h>
3101e04c3fSmrg#include <string.h>
3201e04c3fSmrg
3301e04c3fSmrgstatic inline bool
3401e04c3fSmrg_ubm_add_u32arr(uint32_t *dst, unsigned dst_len,
3501e04c3fSmrg                uint32_t *a, unsigned a_len,
3601e04c3fSmrg                uint32_t *b, unsigned b_len)
3701e04c3fSmrg{
3801e04c3fSmrg   uint32_t carry = 0;
3901e04c3fSmrg   for (unsigned i = 0; i < dst_len; i++) {
4001e04c3fSmrg      uint64_t sum = carry;
4101e04c3fSmrg      if (i < a_len)
4201e04c3fSmrg         sum += a[i];
4301e04c3fSmrg      if (i < b_len)
4401e04c3fSmrg         sum += b[i];
4501e04c3fSmrg      dst[i] = sum;
4601e04c3fSmrg      carry = sum >> 32;
4701e04c3fSmrg   }
4801e04c3fSmrg
4901e04c3fSmrg   /* Now compute overflow */
5001e04c3fSmrg
5101e04c3fSmrg   for (unsigned i = dst_len; i < a_len; i++) {
5201e04c3fSmrg      if (a[i])
5301e04c3fSmrg         return true;
5401e04c3fSmrg   }
5501e04c3fSmrg
5601e04c3fSmrg   for (unsigned i = dst_len; i < b_len; i++) {
5701e04c3fSmrg      if (b[i])
5801e04c3fSmrg         return true;
5901e04c3fSmrg   }
6001e04c3fSmrg
6101e04c3fSmrg   return carry;
6201e04c3fSmrg}
6301e04c3fSmrg#define ubm_add_u32arr(dst, a, b) \
6401e04c3fSmrg   _ubm_add_u32arr(dst, ARRAY_SIZE(dst), a, ARRAY_SIZE(a), b, ARRAY_SIZE(b))
6501e04c3fSmrg
6601e04c3fSmrgstatic inline bool
6701e04c3fSmrg_ubm_mul_u32arr(uint32_t *dst, unsigned dst_len,
6801e04c3fSmrg                uint32_t *a, unsigned a_len,
6901e04c3fSmrg                uint32_t *b, unsigned b_len)
7001e04c3fSmrg{
7101e04c3fSmrg   memset(dst, 0, dst_len * sizeof(*dst));
7201e04c3fSmrg
7301e04c3fSmrg   bool overflow = false;
7401e04c3fSmrg
7501e04c3fSmrg   for (unsigned i = 0; i < a_len; i++) {
7601e04c3fSmrg      uint32_t carry = 0;
7701e04c3fSmrg      for (unsigned j = 0; j < b_len; j++) {
7801e04c3fSmrg         /* The maximum values of a[i] and b[i] are UINT32_MAX so the maximum
7901e04c3fSmrg          * value of tmp is UINT32_MAX * UINT32_MAX.  The maximum value that
8001e04c3fSmrg          * will fit in tmp is
8101e04c3fSmrg          *
8201e04c3fSmrg          *    UINT64_MAX = UINT32_MAX << 32 + UINT32_MAX
8301e04c3fSmrg          *               = UINT32_MAX * (UINT32_MAX + 1) + UINT32_MAX
8401e04c3fSmrg          *               = UINT32_MAX * UINT32_MAX + 2 * UINT32_MAX
8501e04c3fSmrg          *
8601e04c3fSmrg          * so we're guaranteed that we can add in two more 32-bit values
8701e04c3fSmrg          * without overflowing tmp.
8801e04c3fSmrg          */
8901e04c3fSmrg         uint64_t tmp = (uint64_t)a[i] * (uint64_t)b[j];
9001e04c3fSmrg         tmp += carry;
9101e04c3fSmrg         if (i + j < dst_len) {
9201e04c3fSmrg            tmp += dst[i + j];
9301e04c3fSmrg            dst[i + j] = tmp;
9401e04c3fSmrg            carry = tmp >> 32;
9501e04c3fSmrg         } else {
9601e04c3fSmrg            /* We're trying to write a value that doesn't fit */
9701e04c3fSmrg            overflow = overflow || tmp > 0;
9801e04c3fSmrg            break;
9901e04c3fSmrg         }
10001e04c3fSmrg      }
10101e04c3fSmrg      if (i + b_len < dst_len)
10201e04c3fSmrg         dst[i + b_len] = carry;
10301e04c3fSmrg      else
10401e04c3fSmrg         overflow = overflow || carry > 0;
10501e04c3fSmrg   }
10601e04c3fSmrg
10701e04c3fSmrg   return overflow;
10801e04c3fSmrg}
10901e04c3fSmrg#define ubm_mul_u32arr(dst, a, b) \
11001e04c3fSmrg   _ubm_mul_u32arr(dst, ARRAY_SIZE(dst), a, ARRAY_SIZE(a), b, ARRAY_SIZE(b))
11101e04c3fSmrg
11201e04c3fSmrg#endif /* UTIL_BIGMATH_H */
113