Lines Matching refs:dw

54    uint32_t dw;
116 return ((union __gen_value) { .f = (v) }).dw;
414 uint32_t * restrict dw = (uint32_t * restrict) dst;
416 dw[0] =
420 dw[1] =
425 __gen_combine_address(data, &dw[2], values->Buffer[0], 0);
426 dw[2] = v2_address;
427 dw[3] = v2_address >> 32;
430 __gen_combine_address(data, &dw[4], values->Buffer[1], 0);
431 dw[4] = v4_address;
432 dw[5] = v4_address >> 32;
435 __gen_combine_address(data, &dw[6], values->Buffer[2], 0);
436 dw[6] = v6_address;
437 dw[7] = v6_address >> 32;
440 __gen_combine_address(data, &dw[8], values->Buffer[3], 0);
441 dw[8] = v8_address;
442 dw[9] = v8_address >> 32;
456 uint32_t * restrict dw = (uint32_t * restrict) dst;
458 dw[0] =
473 uint32_t * restrict dw = (uint32_t * restrict) dst;
475 dw[0] =
508 uint32_t * restrict dw = (uint32_t * restrict) dst;
510 dw[0] =
523 dw[1] =
551 uint32_t * restrict dw = (uint32_t * restrict) dst;
553 dw[0] =
576 uint32_t * restrict dw = (uint32_t * restrict) dst;
578 dw[0] =
581 dw[1] =
602 uint32_t * restrict dw = (uint32_t * restrict) dst;
604 dw[0] =
610 dw[1] =
631 uint32_t * restrict dw = (uint32_t * restrict) dst;
633 dw[0] =
636 dw[1] =
639 dw[2] =
642 dw[3] =
647 dw[4] = v4;
648 dw[5] = v4 >> 32;
650 dw[6] = 0;
652 dw[7] = 0;
674 uint32_t * restrict dw = (uint32_t * restrict) dst;
676 dw[0] =
680 dw[1] =
684 dw[2] =
687 dw[3] =
690 dw[4] =
693 dw[5] =
711 uint32_t * restrict dw = (uint32_t * restrict) dst;
713 dw[0] =
729 uint32_t * restrict dw = (uint32_t * restrict) dst;
731 dw[0] =
745 uint32_t * restrict dw = (uint32_t * restrict) dst;
747 dw[0] =
753 dw[1] =
770 uint32_t * restrict dw = (uint32_t * restrict) dst;
772 dw[0] =
778 dw[1] =
797 uint32_t * restrict dw = (uint32_t * restrict) dst;
799 dw[0] =
819 uint32_t * restrict dw = (uint32_t * restrict) dst;
821 dw[0] =
843 uint32_t * restrict dw = (uint32_t * restrict) dst;
848 dw[0] =
869 uint32_t * restrict dw = (uint32_t * restrict) dst;
871 dw[0] =
875 __gen_combine_address(data, &dw[1], values->IndirectPayloadBaseAddress, 0);
876 dw[1] = v1_address;
877 dw[2] = v1_address >> 32;
879 GEN11_MEMORYADDRESSATTRIBUTES_pack(data, &dw[3], &values->IndirectPayloadBaseAddress2);
902 uint32_t * restrict dw = (uint32_t * restrict) dst;
904 dw[0] =
927 uint32_t * restrict dw = (uint32_t * restrict) dst;
929 dw[0] =
946 uint32_t * restrict dw = (uint32_t * restrict) dst;
948 dw[0] =
966 dw[1] =
988 uint32_t * restrict dw = (uint32_t * restrict) dst;
990 dw[0] =
1010 uint32_t * restrict dw = (uint32_t * restrict) dst;
1012 dw[0] =
1031 uint32_t * restrict dw = (uint32_t * restrict) dst;
1033 dw[0] =
1050 uint32_t * restrict dw = (uint32_t * restrict) dst;
1052 dw[0] =
1068 uint32_t * restrict dw = (uint32_t * restrict) dst;
1071 __gen_combine_address(data, &dw[0], values->Address, 0);
1072 dw[0] = v0_address;
1073 dw[1] = v0_address >> 32;
1075 GEN11_MEMORYADDRESSATTRIBUTES_pack(data, &dw[2], &values->MemoryAddressAttributes);
1095 uint32_t * restrict dw = (uint32_t * restrict) dst;
1097 dw[0] =
1103 dw[1] =
1156 uint32_t * restrict dw = (uint32_t * restrict) dst;
1158 dw[0] =
1176 dw[1] =
1183 dw[2] =
1248 uint32_t * restrict dw = (uint32_t * restrict) dst;
1252 dw[0] = v0;
1253 dw[1] = v0 >> 32;
1255 dw[2] =
1264 dw[3] =
1268 dw[4] =
1272 dw[5] =
1276 dw[6] =
1283 dw[7] =
1312 uint32_t * restrict dw = (uint32_t * restrict) dst;
1314 dw[0] =
1320 dw[1] =
1326 dw[2] =
1332 dw[3] =
1359 uint32_t * restrict dw = (uint32_t * restrict) dst;
1361 dw[0] =
1371 dw[1] =
1443 uint32_t * restrict dw = (uint32_t * restrict) dst;
1445 dw[0] =
1464 uint32_t * restrict dw = (uint32_t * restrict) dst;
1466 dw[0] =
1598 uint32_t * restrict dw = (uint32_t * restrict) dst;
1600 dw[0] =
1619 dw[1] =
1624 dw[2] =
1628 dw[3] =
1633 dw[4] =
1642 dw[5] =
1652 dw[6] =
1660 dw[7] =
1670 __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, 0);
1671 dw[8] = v8_address;
1672 dw[9] = v8_address >> 32;
1682 __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, v10);
1683 dw[10] = v10_address;
1684 dw[11] = (v10_address >> 32) | (v10 >> 32);
1691 __gen_combine_address(data, &dw[12], values->ClearValueAddress, v12);
1692 dw[12] = v12_address;
1693 dw[13] = (v12_address >> 32) | (v12 >> 32);
1695 dw[14] =
1698 dw[15] =
1720 uint32_t * restrict dw = (uint32_t * restrict) dst;
1722 dw[0] =
1743 uint32_t * restrict dw = (uint32_t * restrict) dst;
1745 dw[0] =
1749 dw[1] =
1753 dw[2] =
1757 dw[3] =
1787 uint32_t * restrict dw = (uint32_t * restrict) dst;
1789 dw[0] =
1798 dw[1] =
1803 dw[2] =
1808 dw[3] =
1908 uint32_t * restrict dw = (uint32_t * restrict) dst;
1910 dw[0] =
1921 dw[1] =
1930 dw[2] =
1938 dw[3] =
1967 uint32_t * restrict dw = (uint32_t * restrict) dst;
1969 dw[0] =
1977 dw[1] =
1981 dw[2] =
1985 dw[3] =
1989 dw[4] = 0;
1991 dw[5] = 0;
1993 dw[6] = 0;
1995 dw[7] = 0;
2011 uint32_t * restrict dw = (uint32_t * restrict) dst;
2013 dw[0] =
2017 dw[1] =
2040 uint32_t * restrict dw = (uint32_t * restrict) dst;
2042 dw[0] =
2048 dw[1] =
2054 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[2], &values->FilterCoefficients[0]);
2056 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[4], &values->FilterCoefficients[1]);
2058 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[6], &values->FilterCoefficients[2]);
2060 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[8], &values->FilterCoefficients[3]);
2062 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[10], &values->FilterCoefficients[4]);
2064 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[12], &values->FilterCoefficients[5]);
2066 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[14], &values->FilterCoefficients[6]);
2068 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[16], &values->FilterCoefficients[7]);
2070 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[18], &values->FilterCoefficients[8]);
2072 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[20], &values->FilterCoefficients[9]);
2074 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[22], &values->FilterCoefficients[10]);
2076 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[24], &values->FilterCoefficients[11]);
2078 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[26], &values->FilterCoefficients[12]);
2080 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[28], &values->FilterCoefficients[13]);
2082 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[30], &values->FilterCoefficients[14]);
2084 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[32], &values->FilterCoefficients[15]);
2086 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[34], &values->FilterCoefficients[16]);
2088 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[36], &values->FilterCoefficients[17]);
2090 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[38], &values->FilterCoefficients[18]);
2092 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[40], &values->FilterCoefficients[19]);
2094 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[42], &values->FilterCoefficients[20]);
2096 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[44], &values->FilterCoefficients[21]);
2098 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[46], &values->FilterCoefficients[22]);
2100 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[48], &values->FilterCoefficients[23]);
2102 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[50], &values->FilterCoefficients[24]);
2104 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[52], &values->FilterCoefficients[25]);
2106 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[54], &values->FilterCoefficients[26]);
2108 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[56], &values->FilterCoefficients[27]);
2110 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[58], &values->FilterCoefficients[28]);
2112 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[60], &values->FilterCoefficients[29]);
2114 GEN11_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[62], &values->FilterCoefficients[30]);
2143 uint32_t * restrict dw = (uint32_t * restrict) dst;
2145 dw[0] =
2151 dw[1] =
2157 dw[2] =
2163 dw[3] =
2169 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[4], &values->FilterCoefficients[0]);
2171 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[8], &values->FilterCoefficients[1]);
2173 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[12], &values->FilterCoefficients[2]);
2175 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[16], &values->FilterCoefficients[3]);
2177 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[20], &values->FilterCoefficients[4]);
2179 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[24], &values->FilterCoefficients[5]);
2181 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[28], &values->FilterCoefficients[6]);
2183 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[32], &values->FilterCoefficients[7]);
2185 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[36], &values->FilterCoefficients[8]);
2187 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[40], &values->FilterCoefficients[9]);
2189 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[44], &values->FilterCoefficients[10]);
2191 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[48], &values->FilterCoefficients[11]);
2193 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[52], &values->FilterCoefficients[12]);
2195 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[56], &values->FilterCoefficients[13]);
2197 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[60], &values->FilterCoefficients[14]);
2199 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[64], &values->FilterCoefficients[15]);
2201 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[68], &values->FilterCoefficients[16]);
2203 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[72], &values->FilterCoefficients[17]);
2205 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[76], &values->FilterCoefficients[18]);
2207 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[80], &values->FilterCoefficients[19]);
2209 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[84], &values->FilterCoefficients[20]);
2211 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[88], &values->FilterCoefficients[21]);
2213 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[92], &values->FilterCoefficients[22]);
2215 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[96], &values->FilterCoefficients[23]);
2217 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[100], &values->FilterCoefficients[24]);
2219 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[104], &values->FilterCoefficients[25]);
2221 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[108], &values->FilterCoefficients[26]);
2223 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[112], &values->FilterCoefficients[27]);
2225 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[116], &values->FilterCoefficients[28]);
2227 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[120], &values->FilterCoefficients[29]);
2229 GEN11_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[124], &values->FilterCoefficients[30]);
2255 uint32_t * restrict dw = (uint32_t * restrict) dst;
2257 dw[0] =
2262 dw[1] =
2266 dw[2] =
2356 uint32_t * restrict dw = (uint32_t * restrict) dst;
2358 dw[0] =
2365 dw[1] =
2374 dw[2] =
2380 dw[3] =
2387 dw[4] =
2395 dw[5] =
2401 dw[6] =
2404 dw[7] =
2409 dw[8] =
2415 dw[9] =
2421 dw[10] =
2425 dw[11] =
2431 dw[12] =
2437 dw[13] =
2442 dw[14] =
2446 dw[15] =
2452 dw[16] =
2456 dw[17] =
2460 dw[18] =
2464 dw[19] =
2467 dw[20] =
2471 dw[21] =
2475 dw[22] =
2491 uint32_t * restrict dw = (uint32_t * restrict) dst;
2493 dw[0] =
2521 uint32_t * restrict dw = (uint32_t * restrict) dst;
2523 dw[0] =
2526 dw[1] =
2529 dw[2] =
2532 dw[3] =
2535 dw[4] =
2538 dw[5] =
2541 dw[6] = 0;
2543 dw[7] = 0;
2545 dw[8] =
2548 dw[9] =
2551 dw[10] =
2554 dw[11] =
2557 dw[12] =
2560 dw[13] =
2563 dw[14] =
2566 dw[15] =
2595 uint32_t * restrict dw = (uint32_t * restrict) dst;
2597 dw[0] =
2621 uint32_t * restrict dw = (uint32_t * restrict) dst;
2623 dw[0] =
2643 uint32_t * restrict dw = (uint32_t * restrict) dst;
2651 dw[0] =
2661 dw[1] =
2688 uint32_t * restrict dw = (uint32_t * restrict) dst;
2690 dw[0] =
2710 uint32_t * restrict dw = (uint32_t * restrict) dst;
2713 __gen_combine_address(data, &dw[0], values->Address, 0);
2714 dw[0] = v0_address;
2715 dw[1] = v0_address >> 32;
2717 GEN11_VDENC_SURFACE_CONTROL_BITS_pack(data, &dw[2], &values->PictureFields);
2752 uint32_t * restrict dw = (uint32_t * restrict) dst;
2754 dw[0] =
2761 dw[1] =
2770 dw[2] =
2774 dw[3] =
2795 uint32_t * restrict dw = (uint32_t * restrict) dst;
2797 dw[0] =
2805 __gen_combine_address(data, &dw[1], values->BufferStartingAddress, 0);
2806 dw[1] = v1_address;
2807 dw[2] = v1_address >> 32;
2809 dw[3] =
2831 uint32_t * restrict dw = (uint32_t * restrict) dst;
2833 dw[0] =
2840 dw[1] =
2886 uint32_t * restrict dw = (uint32_t * restrict) dst;
2888 dw[0] =
2899 dw[1] =
2904 dw[2] =
2907 dw[3] =
2910 dw[4] =
2913 dw[5] =
2916 dw[6] =
2943 uint32_t * restrict dw = (uint32_t * restrict) dst;
2945 dw[0] =
2952 dw[1] =
2986 uint32_t * restrict dw = (uint32_t * restrict) dst;
2988 dw[0] =
2995 dw[1] =
3001 dw[2] =
3035 uint32_t * restrict dw = (uint32_t * restrict) dst;
3037 dw[0] =
3044 dw[1] =
3076 uint32_t * restrict dw = (uint32_t * restrict) dst;
3078 dw[0] =
3085 dw[1] =
3117 uint32_t * restrict dw = (uint32_t * restrict) dst;
3119 dw[0] =
3126 dw[1] =
3158 uint32_t * restrict dw = (uint32_t * restrict) dst;
3160 dw[0] =
3167 dw[1] =
3199 uint32_t * restrict dw = (uint32_t * restrict) dst;
3201 dw[0] =
3208 dw[1] =
3236 uint32_t * restrict dw = (uint32_t * restrict) dst;
3238 dw[0] =
3245 dw[1] =
3272 uint32_t * restrict dw = (uint32_t * restrict) dst;
3274 dw[0] =
3281 dw[1] =
3308 uint32_t * restrict dw = (uint32_t * restrict) dst;
3310 dw[0] =
3317 dw[1] =
3344 uint32_t * restrict dw = (uint32_t * restrict) dst;
3346 dw[0] =
3353 dw[1] =
3380 uint32_t * restrict dw = (uint32_t * restrict) dst;
3382 dw[0] =
3389 dw[1] =
3420 uint32_t * restrict dw = (uint32_t * restrict) dst;
3422 dw[0] =
3433 __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, v1);
3434 dw[1] = v1_address;
3435 dw[2] = (v1_address >> 32) | (v1 >> 32);
3437 dw[3] =
3465 uint32_t * restrict dw = (uint32_t * restrict) dst;
3467 dw[0] =
3474 dw[1] =
3503 uint32_t * restrict dw = (uint32_t * restrict) dst;
3505 dw[0] =
3512 dw[1] =
3542 uint32_t * restrict dw = (uint32_t * restrict) dst;
3544 dw[0] =
3551 dw[1] =
3554 dw[2] =
3557 dw[3] =
3585 uint32_t * restrict dw = (uint32_t * restrict) dst;
3587 dw[0] =
3594 dw[1] =
3597 dw[2] =
3652 uint32_t * restrict dw = (uint32_t * restrict) dst;
3654 dw[0] =
3661 dw[1] =
3670 dw[2] =
3683 dw[3] =
3714 uint32_t * restrict dw = (uint32_t * restrict) dst;
3716 dw[0] =
3724 GEN11_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3751 uint32_t * restrict dw = (uint32_t * restrict) dst;
3753 dw[0] =
3761 GEN11_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3788 uint32_t * restrict dw = (uint32_t * restrict) dst;
3790 dw[0] =
3798 GEN11_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3826 uint32_t * restrict dw = (uint32_t * restrict) dst;
3828 dw[0] =
3837 GEN11_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3864 uint32_t * restrict dw = (uint32_t * restrict) dst;
3866 dw[0] =
3874 GEN11_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3925 uint32_t * restrict dw = (uint32_t * restrict) dst;
3927 dw[0] =
3934 dw[1] =
3943 __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, 0);
3944 dw[2] = v2_address;
3945 dw[3] = v2_address >> 32;
3947 dw[4] =
3952 dw[5] =
3957 dw[6] =
3961 dw[7] =
3998 uint32_t * restrict dw = (uint32_t * restrict) dst;
4000 dw[0] =
4008 dw[1] =
4012 dw[2] =
4016 dw[3] =
4078 uint32_t * restrict dw = (uint32_t * restrict) dst;
4080 dw[0] =
4089 dw[1] = v1;
4090 dw[2] = v1 >> 32;
4092 dw[3] =
4105 __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, v4);
4106 dw[4] = v4_address;
4107 dw[5] = (v4_address >> 32) | (v4 >> 32);
4109 dw[6] =
4114 dw[7] =
4122 dw[8] =
4130 dw[9] = v9;
4131 dw[10] = v9 >> 32;
4166 uint32_t * restrict dw = (uint32_t * restrict) dst;
4168 dw[0] =
4175 dw[1] =
4180 dw[2] =
4218 uint32_t * restrict dw = (uint32_t * restrict) dst;
4220 dw[0] =
4227 dw[1] =
4232 dw[2] =
4270 uint32_t * restrict dw = (uint32_t * restrict) dst;
4272 dw[0] =
4279 dw[1] =
4284 dw[2] =
4324 uint32_t * restrict dw = (uint32_t * restrict) dst;
4326 dw[0] =
4333 dw[1] =
4339 dw[2] =
4380 uint32_t * restrict dw = (uint32_t * restrict) dst;
4382 dw[0] =
4389 dw[1] =
4395 dw[2] =
4428 uint32_t * restrict dw = (uint32_t * restrict) dst;
4430 dw[0] =
4441 __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, v1);
4442 dw[1] = v1_address;
4443 dw[2] = (v1_address >> 32) | (v1 >> 32);
4445 dw[3] =
4524 uint32_t * restrict dw = (uint32_t * restrict) dst;
4526 dw[0] =
4535 dw[1] = v1;
4536 dw[2] = v1 >> 32;
4538 dw[3] =
4554 __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, v4);
4555 dw[4] = v4_address;
4556 dw[5] = (v4_address >> 32) | (v4 >> 32);
4558 dw[6] =
4567 dw[7] =
4580 dw[8] =
4586 dw[9] =
4623 uint32_t * restrict dw = (uint32_t * restrict) dst;
4625 dw[0] =
4632 dw[1] =
4638 __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, 0);
4639 dw[2] = v2_address;
4640 dw[3] = v2_address >> 32;
4642 dw[4] =
4701 uint32_t * restrict dw = (uint32_t * restrict) dst;
4703 dw[0] =
4710 dw[1] =
4718 dw[2] =
4726 dw[3] = v3;
4727 dw[4] = v3 >> 32;
4732 __gen_combine_address(data, &dw[5], values->ScratchSpaceBasePointer, v5);
4733 dw[5] = v5_address;
4734 dw[6] = (v5_address >> 32) | (v5 >> 32);
4736 dw[7] =
4748 dw[8] = 0;
4780 uint32_t * restrict dw = (uint32_t * restrict) dst;
4782 dw[0] =
4789 dw[1] =
4794 __gen_combine_address(data, &dw[2], values->BufferStartingAddress, 0);
4795 dw[2] = v2_address;
4796 dw[3] = v2_address >> 32;
4798 dw[4] =
4830 uint32_t * restrict dw = (uint32_t * restrict) dst;
4832 dw[0] =
4839 dw[1] =
4845 dw[2] =
4874 uint32_t * restrict dw = (uint32_t * restrict) dst;
4876 dw[0] =
4883 dw[1] =
4915 uint32_t * restrict dw = (uint32_t * restrict) dst;
4917 dw[0] =
4924 dw[1] =
4954 uint32_t * restrict dw = (uint32_t * restrict) dst;
4956 dw[0] =
4963 dw[1] =
4991 uint32_t * restrict dw = (uint32_t * restrict) dst;
4993 dw[0] =
5000 dw[1] =
5003 dw[2] =
5006 dw[3] =
5009 dw[4] =
5012 dw[5] =
5015 dw[6] =
5018 dw[7] =
5021 dw[8] =
5024 dw[9] =
5027 dw[10] =
5030 dw[11] =
5033 dw[12] =
5036 dw[13] =
5039 dw[14] =
5042 dw[15] =
5045 dw[16] =
5048 dw[17] =
5051 dw[18] =
5054 dw[19] =
5057 dw[20] =
5060 dw[21] =
5063 dw[22] =
5066 dw[23] =
5069 dw[24] =
5072 dw[25] =
5075 dw[26] =
5078 dw[27] =
5081 dw[28] =
5084 dw[29] =
5087 dw[30] =
5090 dw[31] =
5093 dw[32] =
5167 uint32_t * restrict dw = (uint32_t * restrict) dst;
5169 dw[0] =
5178 dw[1] = v1;
5179 dw[2] = v1 >> 32;
5181 dw[3] =
5197 __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, v4);
5198 dw[4] = v4_address;
5199 dw[5] = (v4_address >> 32) | (v4 >> 32);
5201 dw[6] =
5211 dw[7] =
5218 dw[8] = v8;
5219 dw[9] = v8 >> 32;
5223 dw[10] = v10;
5224 dw[11] = v10 >> 32;
5258 uint32_t * restrict dw = (uint32_t * restrict) dst;
5260 dw[0] =
5267 dw[1] =
5329 uint32_t * restrict dw = (uint32_t * restrict) dst;
5331 dw[0] =
5338 dw[1] =
5385 uint32_t * restrict dw = (uint32_t * restrict) dst;
5387 dw[0] =
5394 dw[1] =
5423 uint32_t * restrict dw = (uint32_t * restrict) dst;
5425 dw[0] =
5432 dw[1] =
5461 uint32_t * restrict dw = (uint32_t * restrict) dst;
5463 dw[0] =
5470 dw[1] =
5499 uint32_t * restrict dw = (uint32_t * restrict) dst;
5501 dw[0] =
5508 dw[1] =
5537 uint32_t * restrict dw = (uint32_t * restrict) dst;
5539 dw[0] =
5546 dw[1] =
5619 uint32_t * restrict dw = (uint32_t * restrict) dst;
5621 dw[0] =
5628 dw[1] =
5648 dw[2] =
5651 dw[3] =
5654 dw[4] =
5688 uint32_t * restrict dw = (uint32_t * restrict) dst;
5690 dw[0] =
5697 dw[1] =
5701 dw[2] = __gen_combine_address(data, &dw[2], values->GlobalConstantBufferAddress, 0);
5703 dw[3] = __gen_combine_address(data, &dw[3], values->GlobalConstantBufferAddressHigh, 0);
5727 uint32_t * restrict dw = (uint32_t * restrict) dst;
5729 dw[0] =
5759 uint32_t * restrict dw = (uint32_t * restrict) dst;
5761 dw[0] =
5792 uint32_t * restrict dw = (uint32_t * restrict) dst;
5794 dw[0] =
5801 dw[1] =
5828 uint32_t * restrict dw = (uint32_t * restrict) dst;
5830 dw[0] =
5837 dw[1] =
5864 uint32_t * restrict dw = (uint32_t * restrict) dst;
5866 dw[0] =
5873 dw[1] =
5900 uint32_t * restrict dw = (uint32_t * restrict) dst;
5902 dw[0] =
5909 dw[1] =
5936 uint32_t * restrict dw = (uint32_t * restrict) dst;
5938 dw[0] =
5945 dw[1] =
5972 uint32_t * restrict dw = (uint32_t * restrict) dst;
5974 dw[0] =
5981 dw[1] =
6069 uint32_t * restrict dw = (uint32_t * restrict) dst;
6071 dw[0] =
6078 dw[1] =
6088 dw[2] =
6098 dw[3] =
6108 dw[4] =
6118 dw[5] =
6128 dw[6] =
6138 dw[7] =
6148 dw[8] =
6200 uint32_t * restrict dw = (uint32_t * restrict) dst;
6202 dw[0] =
6209 dw[1] =
6223 dw[2] =
6226 dw[3] =
6229 dw[4] =
6247 dw[5] =
6290 uint32_t * restrict dw = (uint32_t * restrict) dst;
6292 dw[0] =
6305 dw[1] =
6315 dw[2] =
6325 dw[3] =
6335 dw[4] =
6345 dw[5] =
6355 dw[6] =
6365 dw[7] =
6375 dw[8] =
6379 dw[9] =
6389 dw[10] =
6423 uint32_t * restrict dw = (uint32_t * restrict) dst;
6425 dw[0] =
6432 dw[1] =
6481 uint32_t * restrict dw = (uint32_t * restrict) dst;
6483 dw[0] =
6490 dw[1] =
6496 dw[2] =
6499 dw[3] =
6542 uint32_t * restrict dw = (uint32_t * restrict) dst;
6544 dw[0] =
6551 dw[1] =
6559 __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, 0);
6560 dw[2] = v2_address;
6561 dw[3] = v2_address >> 32;
6563 dw[4] =
6567 __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, 0);
6568 dw[5] = v5_address;
6569 dw[6] = v5_address >> 32;
6571 dw[7] =
6604 uint32_t * restrict dw = (uint32_t * restrict) dst;
6606 dw[0] =
6613 dw[1] =
6619 dw[2] =
6653 uint32_t * restrict dw = (uint32_t * restrict) dst;
6655 dw[0] =
6662 dw[1] =
6668 __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, 0);
6669 dw[2] = v2_address;
6670 dw[3] = v2_address >> 32;
6672 dw[4] =
6721 uint32_t * restrict dw = (uint32_t * restrict) dst;
6723 dw[0] =
6730 dw[1] =
6738 dw[2] =
6748 dw[3] =
6752 dw[4] =
6797 uint32_t * restrict dw = (uint32_t * restrict) dst;
6799 dw[0] =
6806 dw[1] =
6813 dw[2] =
6816 dw[3] =
6844 uint32_t * restrict dw = (uint32_t * restrict) dst;
6846 dw[0] =
6853 dw[1] =
6883 uint32_t * restrict dw = (uint32_t * restrict) dst;
6885 dw[0] =
6892 dw[1] =
6923 uint32_t * restrict dw = (uint32_t * restrict) dst;
6925 dw[0] =
6932 dw[1] =
6963 uint32_t * restrict dw = (uint32_t * restrict) dst;
6965 dw[0] =
6972 dw[1] =
7003 uint32_t * restrict dw = (uint32_t * restrict) dst;
7005 dw[0] =
7012 dw[1] =
7040 uint32_t * restrict dw = (uint32_t * restrict) dst;
7042 dw[0] =
7072 uint32_t * restrict dw = (uint32_t * restrict) dst;
7074 dw[0] =
7109 uint32_t * restrict dw = (uint32_t * restrict) dst;
7111 dw[0] =
7122 dw[1] =
7180 uint32_t * restrict dw = (uint32_t * restrict) dst;
7182 dw[0] =
7189 dw[1] =
7199 dw[2] =
7209 dw[3] =
7219 dw[4] =
7255 uint32_t * restrict dw = (uint32_t * restrict) dst;
7257 dw[0] =
7264 dw[1] =
7268 dw[2] =
7308 uint32_t * restrict dw = (uint32_t * restrict) dst;
7310 dw[0] =
7317 dw[1] =
7375 uint32_t * restrict dw = (uint32_t * restrict) dst;
7377 dw[0] =
7384 dw[1] =
7394 dw[2] =
7421 uint32_t * restrict dw = (uint32_t * restrict) dst;
7423 dw[0] =
7454 uint32_t * restrict dw = (uint32_t * restrict) dst;
7456 dw[0] =
7463 dw[1] =
7490 uint32_t * restrict dw = (uint32_t * restrict) dst;
7492 dw[0] =
7499 dw[1] =
7526 uint32_t * restrict dw = (uint32_t * restrict) dst;
7528 dw[0] =
7535 dw[1] =
7593 uint32_t * restrict dw = (uint32_t * restrict) dst;
7595 dw[0] =
7604 dw[1] = v1;
7605 dw[2] = v1 >> 32;
7607 dw[3] =
7620 __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, v4);
7621 dw[4] = v4_address;
7622 dw[5] = (v4_address >> 32) | (v4 >> 32);
7624 dw[6] =
7629 dw[7] =
7637 dw[8] =
7707 uint32_t * restrict dw = (uint32_t * restrict) dst;
7709 dw[0] =
7716 dw[1] =
7757 uint32_t * restrict dw = (uint32_t * restrict) dst;
7759 dw[0] =
7766 dw[1] =
7812 uint32_t * restrict dw = (uint32_t * restrict) dst;
7814 dw[0] =
7821 dw[1] =
7837 dw[2] =
7843 dw[3] =
7884 uint32_t * restrict dw = (uint32_t * restrict) dst;
7886 dw[0] =
7893 dw[1] =
7904 dw[2] =
7908 dw[3] =
7912 dw[4] =
7958 uint32_t * restrict dw = (uint32_t * restrict) dst;
7960 dw[0] =
7969 dw[1] =
7972 dw[2] =
7975 dw[3] =
7978 dw[4] =
7984 dw[5] =
7987 dw[6] = 0;
7989 dw[7] =
7992 dw[8] =
7995 dw[9] = 0;
7997 dw[10] =
8000 dw[11] =
8003 dw[12] =
8006 dw[13] =
8009 dw[14] =
8037 uint32_t * restrict dw = (uint32_t * restrict) dst;
8039 dw[0] =
8046 dw[1] = 0;
8048 dw[2] =
8051 dw[3] =
8079 uint32_t * restrict dw = (uint32_t * restrict) dst;
8081 dw[0] =
8088 dw[1] = 0;
8090 dw[2] =
8093 dw[3] =
8140 uint32_t * restrict dw = (uint32_t * restrict) dst;
8142 dw[0] =
8149 dw[1] =
8152 dw[2] =
8161 dw[3] = __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, 0);
8163 dw[4] =
8167 dw[5] =
8201 uint32_t * restrict dw = (uint32_t * restrict) dst;
8203 dw[0] =
8210 dw[1] =
8213 dw[2] =
8217 dw[3] = __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, 0);
8219 dw[4] =
8223 dw[5] =
8226 dw[6] =
8259 uint32_t * restrict dw = (uint32_t * restrict) dst;
8261 dw[0] =
8268 dw[1] =
8271 dw[2] =
8276 dw[3] = 0;
8278 dw[4] =
8281 dw[5] =
8284 dw[6] =
8287 dw[7] =
8290 dw[8] =
8293 dw[9] =
8296 dw[10] =
8299 dw[11] =
8302 dw[12] =
8305 dw[13] =
8308 dw[14] =
8311 dw[15] =
8373 uint32_t * restrict dw = (uint32_t * restrict) dst;
8375 dw[0] =
8382 dw[1] =
8385 dw[2] =
8390 dw[3] =
8393 dw[4] = 0;
8395 dw[5] =
8398 dw[6] =
8404 dw[7] =
8408 dw[8] =
8412 dw[9] =
8416 dw[10] = 0;
8418 dw[11] =
8422 dw[12] =
8426 dw[13] =
8430 dw[14] =
8434 dw[15] =
8438 dw[16] =
8467 uint32_t * restrict dw = (uint32_t * restrict) dst;
8469 dw[0] =
8476 dw[1] =
8514 uint32_t * restrict dw = (uint32_t * restrict) dst;
8516 dw[0] =
8527 __gen_combine_address(data, &dw[1], values->ScratchSpaceBasePointer, v1);
8528 dw[1] = v1_address;
8529 dw[2] = (v1_address >> 32) | (v1 >> 32);
8531 dw[3] =
8536 dw[4] =
8539 dw[5] =
8543 dw[6] = 0;
8545 dw[7] = 0;
8547 dw[8] = 0;
8566 uint32_t * restrict dw = (uint32_t * restrict) dst;
8568 dw[0] =
8592 uint32_t * restrict dw = (uint32_t * restrict) dst;
8594 dw[0] =
8641 uint32_t * restrict dw = (uint32_t * restrict) dst;
8643 dw[0] =
8656 __gen_combine_address(data, &dw[1], values->MemoryAddress, 0);
8657 dw[1] = v1_address;
8658 dw[2] = v1_address >> 32;
8678 uint32_t * restrict dw = (uint32_t * restrict) dst;
8680 dw[0] =
8713 uint32_t * restrict dw = (uint32_t * restrict) dst;
8715 dw[0] =
8725 __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, 0);
8726 dw[1] = v1_address;
8727 dw[2] = v1_address >> 32;
8751 uint32_t * restrict dw = (uint32_t * restrict) dst;
8753 dw[0] =
8762 __gen_combine_address(data, &dw[1], values->PageBaseAddress, v1);
8763 dw[1] = v1_address;
8764 dw[2] = (v1_address >> 32) | (v1 >> 32);
8793 uint32_t * restrict dw = (uint32_t * restrict) dst;
8795 dw[0] =
8803 dw[1] =
8807 __gen_combine_address(data, &dw[2], values->CompareAddress, 0);
8808 dw[2] = v2_address;
8809 dw[3] = v2_address >> 32;
8834 uint32_t * restrict dw = (uint32_t * restrict) dst;
8836 dw[0] =
8844 __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, 0);
8845 dw[1] = v1_address;
8846 dw[2] = v1_address >> 32;
8849 __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, 0);
8850 dw[3] = v3_address;
8851 dw[4] = v3_address >> 32;
8916 uint32_t * restrict dw = (uint32_t * restrict) dst;
8918 dw[0] =
8925 dw[1] =
8933 dw[2] = __gen_combine_address(data, &dw[2], values->DisplayBufferBaseAddress, v2);
8960 uint32_t * restrict dw = (uint32_t * restrict) dst;
8962 dw[0] =
8967 dw[1] =
8999 uint32_t * restrict dw = (uint32_t * restrict) dst;
9001 dw[0] =
9008 dw[1] =
9011 dw[2] =
9039 uint32_t * restrict dw = (uint32_t * restrict) dst;
9041 dw[0] =
9050 dw[1] =
9054 __gen_combine_address(data, &dw[2], values->MemoryAddress, 0);
9055 dw[2] = v2_address;
9056 dw[3] = v2_address >> 32;
9081 uint32_t * restrict dw = (uint32_t * restrict) dst;
9083 dw[0] =
9090 dw[1] =
9093 dw[2] =
9122 uint32_t * restrict dw = (uint32_t * restrict) dst;
9124 dw[0] =
9130 dw[1] =
9161 uint32_t * restrict dw = (uint32_t * restrict) dst;
9163 dw[0] =
9170 dw[1] =
9193 uint32_t * restrict dw = (uint32_t * restrict) dst;
9195 dw[0] =
9219 uint32_t * restrict dw = (uint32_t * restrict) dst;
9221 dw[0] =
9258 uint32_t * restrict dw = (uint32_t * restrict) dst;
9260 dw[0] =
9284 uint32_t * restrict dw = (uint32_t * restrict) dst;
9286 dw[0] =
9313 uint32_t * restrict dw = (uint32_t * restrict) dst;
9315 dw[0] =
9324 __gen_combine_address(data, &dw[1], values->MemoryAddress, v1);
9325 dw[1] = v1_address;
9326 dw[2] = (v1_address >> 32) | (v1 >> 32);
9328 dw[3] =
9351 uint32_t * restrict dw = (uint32_t * restrict) dst;
9353 dw[0] =
9378 uint32_t * restrict dw = (uint32_t * restrict) dst;
9380 dw[0] =
9407 uint32_t * restrict dw = (uint32_t * restrict) dst;
9409 dw[0] =
9417 __gen_combine_address(data, &dw[1], values->DestinationAddress, v1);
9418 dw[1] = v1_address;
9419 dw[2] = (v1_address >> 32) | (v1 >> 32);
9421 dw[3] =
9460 uint32_t * restrict dw = (uint32_t * restrict) dst;
9462 dw[0] =
9469 dw[1] =
9507 uint32_t * restrict dw = (uint32_t * restrict) dst;
9509 dw[0] =
9518 dw[1] =
9522 __gen_combine_address(data, &dw[2], values->SemaphoreAddress, 0);
9523 dw[2] = v2_address;
9524 dw[3] = v2_address >> 32;
9552 uint32_t * restrict dw = (uint32_t * restrict) dst;
9554 dw[0] =
9566 dw[1] = __gen_combine_address(data, &dw[1], values->LogicalContextAddress, v1);
9592 uint32_t * restrict dw = (uint32_t * restrict) dst;
9594 dw[0] =
9623 uint32_t * restrict dw = (uint32_t * restrict) dst;
9625 dw[0] =
9635 __gen_combine_address(data, &dw[1], values->Address, v1);
9636 dw[1] = v1_address;
9637 dw[2] = (v1_address >> 32) | (v1 >> 32);
9641 dw[3] = v3;
9642 dw[4] = v3 >> 32;
9667 uint32_t * restrict dw = (uint32_t * restrict) dst;
9669 dw[0] =
9675 dw[1] =
9678 dw[2] =
9705 uint32_t * restrict dw = (uint32_t * restrict) dst;
9707 dw[0] =
9715 dw[1] =
9719 __gen_combine_address(data, &dw[2], values->MemoryAddress, 0);
9720 dw[2] = v2_address;
9721 dw[3] = v2_address >> 32;
9741 uint32_t * restrict dw = (uint32_t * restrict) dst;
9743 dw[0] =
9766 uint32_t * restrict dw = (uint32_t * restrict) dst;
9768 dw[0] =
9790 uint32_t * restrict dw = (uint32_t * restrict) dst;
9792 dw[0] =
9831 uint32_t * restrict dw = (uint32_t * restrict) dst;
9833 dw[0] =
9875 uint32_t * restrict dw = (uint32_t * restrict) dst;
9877 dw[0] =
9912 uint32_t * restrict dw = (uint32_t * restrict) dst;
9914 dw[0] =
9980 uint32_t * restrict dw = (uint32_t * restrict) dst;
9982 dw[0] =
9989 dw[1] =
10015 __gen_combine_address(data, &dw[2], values->Address, 0);
10016 dw[2] = v2_address;
10017 dw[3] = v2_address >> 32;
10021 dw[4] = v4;
10022 dw[5] = v4 >> 32;
10079 uint32_t * restrict dw = (uint32_t * restrict) dst;
10081 dw[0] =
10092 __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, v1);
10093 dw[1] = v1_address;
10094 dw[2] = (v1_address >> 32) | (v1 >> 32);
10096 dw[3] =
10103 __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, v4);
10104 dw[4] = v4_address;
10105 dw[5] = (v4_address >> 32) | (v4 >> 32);
10111 __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, v6);
10112 dw[6] = v6_address;
10113 dw[7] = (v6_address >> 32) | (v6 >> 32);
10119 __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, v8);
10120 dw[8] = v8_address;
10121 dw[9] = (v8_address >> 32) | (v8 >> 32);
10127 __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, v10);
10128 dw[10] = v10_address;
10129 dw[11] = (v10_address >> 32) | (v10 >> 32);
10131 dw[12] =
10135 dw[13] =
10139 dw[14] =
10143 dw[15] =
10151 __gen_combine_address(data, &dw[16], values->BindlessSurfaceStateBaseAddress, v16);
10152 dw[16] = v16_address;
10153 dw[17] = (v16_address >> 32) | (v16 >> 32);
10155 dw[18] =
10162 __gen_combine_address(data, &dw[19], values->BindlessSamplerStateBaseAddress, v19);
10163 dw[19] = v19_address;
10164 dw[20] = (v19_address >> 32) | (v19 >> 32);
10166 dw[21] =
10193 uint32_t * restrict dw = (uint32_t * restrict) dst;
10195 dw[0] =
10204 dw[1] = v1;
10205 dw[2] = v1 >> 32;
10222 uint32_t * restrict dw = (uint32_t * restrict) dst;
10224 dw[0] =
10264 uint32_t * restrict dw = (uint32_t * restrict) dst;
10266 dw[0] =
10313 uint32_t * restrict dw = (uint32_t * restrict) dst;
10315 dw[0] =
10350 uint32_t * restrict dw = (uint32_t * restrict) dst;
10352 dw[0] =
10374 uint32_t * restrict dw = (uint32_t * restrict) dst;
10378 dw[0] = v0;
10379 dw[1] = v0 >> 32;
10393 uint32_t * restrict dw = (uint32_t * restrict) dst;
10397 dw[0] = v0;
10398 dw[1] = v0 >> 32;
10413 uint32_t * restrict dw = (uint32_t * restrict) dst;
10415 dw[0] =
10434 uint32_t * restrict dw = (uint32_t * restrict) dst;
10436 dw[0] =
10457 uint32_t * restrict dw = (uint32_t * restrict) dst;
10459 dw[0] =
10479 uint32_t * restrict dw = (uint32_t * restrict) dst;
10483 dw[0] = v0;
10484 dw[1] = v0 >> 32;
10498 uint32_t * restrict dw = (uint32_t * restrict) dst;
10502 dw[0] = v0;
10503 dw[1] = v0 >> 32;
10517 uint32_t * restrict dw = (uint32_t * restrict) dst;
10521 dw[0] = v0;
10522 dw[1] = v0 >> 32;
10536 uint32_t * restrict dw = (uint32_t * restrict) dst;
10540 dw[0] = v0;
10541 dw[1] = v0 >> 32;
10556 uint32_t * restrict dw = (uint32_t * restrict) dst;
10558 dw[0] =
10574 uint32_t * restrict dw = (uint32_t * restrict) dst;
10578 dw[0] = v0;
10579 dw[1] = v0 >> 32;
10593 uint32_t * restrict dw = (uint32_t * restrict) dst;
10597 dw[0] = v0;
10598 dw[1] = v0 >> 32;
10612 uint32_t * restrict dw = (uint32_t * restrict) dst;
10616 dw[0] = v0;
10617 dw[1] = v0 >> 32;
10654 uint32_t * restrict dw = (uint32_t * restrict) dst;
10656 dw[0] =
10700 uint32_t * restrict dw = (uint32_t * restrict) dst;
10702 dw[0] =
10723 uint32_t * restrict dw = (uint32_t * restrict) dst;
10727 dw[0] = v0;
10728 dw[1] = v0 >> 32;
10756 uint32_t * restrict dw = (uint32_t * restrict) dst;
10758 dw[0] =
10806 uint32_t * restrict dw = (uint32_t * restrict) dst;
10808 dw[0] =
10843 uint32_t * restrict dw = (uint32_t * restrict) dst;
10845 dw[0] =
10884 uint32_t * restrict dw = (uint32_t * restrict) dst;
10886 dw[0] =
10925 uint32_t * restrict dw = (uint32_t * restrict) dst;
10927 dw[0] =
10943 uint32_t * restrict dw = (uint32_t * restrict) dst;
10947 dw[0] = v0;
10948 dw[1] = v0 >> 32;
10962 uint32_t * restrict dw = (uint32_t * restrict) dst;
10966 dw[0] = v0;
10967 dw[1] = v0 >> 32;
10981 uint32_t * restrict dw = (uint32_t * restrict) dst;
10985 dw[0] = v0;
10986 dw[1] = v0 >> 32;
11000 uint32_t * restrict dw = (uint32_t * restrict) dst;
11004 dw[0] = v0;
11005 dw[1] = v0 >> 32;
11019 uint32_t * restrict dw = (uint32_t * restrict) dst;
11023 dw[0] = v0;
11024 dw[1] = v0 >> 32;
11038 uint32_t * restrict dw = (uint32_t * restrict) dst;
11042 dw[0] = v0;
11043 dw[1] = v0 >> 32;
11057 uint32_t * restrict dw = (uint32_t * restrict) dst;
11061 dw[0] = v0;
11062 dw[1] = v0 >> 32;
11076 uint32_t * restrict dw = (uint32_t * restrict) dst;
11080 dw[0] = v0;
11081 dw[1] = v0 >> 32;
11095 uint32_t * restrict dw = (uint32_t * restrict) dst;
11097 dw[0] =
11112 uint32_t * restrict dw = (uint32_t * restrict) dst;
11114 dw[0] =
11129 uint32_t * restrict dw = (uint32_t * restrict) dst;
11131 dw[0] =
11146 uint32_t * restrict dw = (uint32_t * restrict) dst;
11148 dw[0] =
11193 uint32_t * restrict dw = (uint32_t * restrict) dst;
11195 dw[0] =
11240 uint32_t * restrict dw = (uint32_t * restrict) dst;
11244 dw[0] = v0;
11245 dw[1] = v0 >> 32;