1b8e80941Smrg/* Compile with: 2b8e80941Smrg * 3b8e80941Smrg * glsl_compiler --version 400 --dump-builder int64.glsl > builtin_int64.h 4b8e80941Smrg * 5b8e80941Smrg * Version 4.00+ is required for umulExtended. 6b8e80941Smrg */ 7b8e80941Smrg#version 400 8b8e80941Smrg#extension GL_ARB_gpu_shader_int64: require 9b8e80941Smrg#extension GL_ARB_shading_language_420pack: require 10b8e80941Smrg 11b8e80941Smrguvec2 12b8e80941Smrgumul64(uvec2 a, uvec2 b) 13b8e80941Smrg{ 14b8e80941Smrg uvec2 result; 15b8e80941Smrg 16b8e80941Smrg umulExtended(a.x, b.x, result.y, result.x); 17b8e80941Smrg result.y += a.x * b.y + a.y * b.x; 18b8e80941Smrg 19b8e80941Smrg return result; 20b8e80941Smrg} 21b8e80941Smrg 22b8e80941Smrgivec2 23b8e80941Smrgsign64(ivec2 a) 24b8e80941Smrg{ 25b8e80941Smrg ivec2 result; 26b8e80941Smrg 27b8e80941Smrg result.y = a.y >> 31; 28b8e80941Smrg result.x = result.y | int((a.x | a.y) != 0); 29b8e80941Smrg 30b8e80941Smrg return result; 31b8e80941Smrg} 32b8e80941Smrg 33b8e80941Smrguvec4 34b8e80941Smrgudivmod64(uvec2 n, uvec2 d) 35b8e80941Smrg{ 36b8e80941Smrg uvec2 quot = uvec2(0U, 0U); 37b8e80941Smrg int log2_denom = findMSB(d.y) + 32; 38b8e80941Smrg 39b8e80941Smrg /* If the upper 32 bits of denom are non-zero, it is impossible for shifts 40b8e80941Smrg * greater than 32 bits to occur. If the upper 32 bits of the numerator 41b8e80941Smrg * are zero, it is impossible for (denom << [63, 32]) <= numer unless 42b8e80941Smrg * denom == 0. 43b8e80941Smrg */ 44b8e80941Smrg if (d.y == 0 && n.y >= d.x) { 45b8e80941Smrg log2_denom = findMSB(d.x); 46b8e80941Smrg 47b8e80941Smrg /* Since the upper 32 bits of denom are zero, log2_denom <= 31 and we 48b8e80941Smrg * don't have to compare log2_denom inside the loop as is done in the 49b8e80941Smrg * general case (below). 50b8e80941Smrg */ 51b8e80941Smrg for (int i = 31; i >= 1; i--) { 52b8e80941Smrg if (log2_denom <= 31 - i && (d.x << i) <= n.y) { 53b8e80941Smrg n.y -= d.x << i; 54b8e80941Smrg quot.y |= 1U << i; 55b8e80941Smrg } 56b8e80941Smrg } 57b8e80941Smrg 58b8e80941Smrg /* log2_denom is always <= 31, so manually peel the last loop 59b8e80941Smrg * iteration. 60b8e80941Smrg */ 61b8e80941Smrg if (d.x <= n.y) { 62b8e80941Smrg n.y -= d.x; 63b8e80941Smrg quot.y |= 1U; 64b8e80941Smrg } 65b8e80941Smrg } 66b8e80941Smrg 67b8e80941Smrg uint64_t d64 = packUint2x32(d); 68b8e80941Smrg uint64_t n64 = packUint2x32(n); 69b8e80941Smrg for (int i = 31; i >= 1; i--) { 70b8e80941Smrg if (log2_denom <= 63 - i && (d64 << i) <= n64) { 71b8e80941Smrg n64 -= d64 << i; 72b8e80941Smrg quot.x |= 1U << i; 73b8e80941Smrg } 74b8e80941Smrg } 75b8e80941Smrg 76b8e80941Smrg /* log2_denom is always <= 63, so manually peel the last loop 77b8e80941Smrg * iteration. 78b8e80941Smrg */ 79b8e80941Smrg if (d64 <= n64) { 80b8e80941Smrg n64 -= d64; 81b8e80941Smrg quot.x |= 1U; 82b8e80941Smrg } 83b8e80941Smrg 84b8e80941Smrg return uvec4(quot, unpackUint2x32(n64)); 85b8e80941Smrg} 86b8e80941Smrg 87b8e80941Smrguvec2 88b8e80941Smrgudiv64(uvec2 n, uvec2 d) 89b8e80941Smrg{ 90b8e80941Smrg return udivmod64(n, d).xy; 91b8e80941Smrg} 92b8e80941Smrg 93b8e80941Smrgivec2 94b8e80941Smrgidiv64(ivec2 _n, ivec2 _d) 95b8e80941Smrg{ 96b8e80941Smrg const bool negate = (_n.y < 0) != (_d.y < 0); 97b8e80941Smrg uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n)))); 98b8e80941Smrg uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d)))); 99b8e80941Smrg 100b8e80941Smrg uvec2 quot = udivmod64(n, d).xy; 101b8e80941Smrg 102b8e80941Smrg return negate ? unpackInt2x32(-int64_t(packUint2x32(quot))) : ivec2(quot); 103b8e80941Smrg} 104b8e80941Smrg 105b8e80941Smrguvec2 106b8e80941Smrgumod64(uvec2 n, uvec2 d) 107b8e80941Smrg{ 108b8e80941Smrg return udivmod64(n, d).zw; 109b8e80941Smrg} 110b8e80941Smrg 111b8e80941Smrgivec2 112b8e80941Smrgimod64(ivec2 _n, ivec2 _d) 113b8e80941Smrg{ 114b8e80941Smrg const bool negate = (_n.y < 0) != (_d.y < 0); 115b8e80941Smrg uvec2 n = unpackUint2x32(uint64_t(abs(packInt2x32(_n)))); 116b8e80941Smrg uvec2 d = unpackUint2x32(uint64_t(abs(packInt2x32(_d)))); 117b8e80941Smrg 118b8e80941Smrg uvec2 rem = udivmod64(n, d).zw; 119b8e80941Smrg 120b8e80941Smrg return negate ? unpackInt2x32(-int64_t(packUint2x32(rem))) : ivec2(rem); 121b8e80941Smrg} 122