Lines Matching refs:dw
54 uint32_t dw;
116 return ((union __gen_value) { .f = (v) }).dw;
413 uint32_t * restrict dw = (uint32_t * restrict) dst;
415 dw[0] =
419 dw[1] =
424 __gen_combine_address(data, &dw[2], values->Buffer[0], 0);
425 dw[2] = v2_address;
426 dw[3] = v2_address >> 32;
429 __gen_combine_address(data, &dw[4], values->Buffer[1], 0);
430 dw[4] = v4_address;
431 dw[5] = v4_address >> 32;
434 __gen_combine_address(data, &dw[6], values->Buffer[2], 0);
435 dw[6] = v6_address;
436 dw[7] = v6_address >> 32;
439 __gen_combine_address(data, &dw[8], values->Buffer[3], 0);
440 dw[8] = v8_address;
441 dw[9] = v8_address >> 32;
455 uint32_t * restrict dw = (uint32_t * restrict) dst;
457 dw[0] =
472 uint32_t * restrict dw = (uint32_t * restrict) dst;
474 dw[0] =
507 uint32_t * restrict dw = (uint32_t * restrict) dst;
509 dw[0] =
522 dw[1] =
550 uint32_t * restrict dw = (uint32_t * restrict) dst;
552 dw[0] =
575 uint32_t * restrict dw = (uint32_t * restrict) dst;
577 dw[0] =
580 dw[1] =
601 uint32_t * restrict dw = (uint32_t * restrict) dst;
603 dw[0] =
609 dw[1] =
629 uint32_t * restrict dw = (uint32_t * restrict) dst;
631 dw[0] =
634 dw[1] =
637 dw[2] =
640 dw[3] =
643 dw[4] = 0;
645 dw[5] = 0;
647 dw[6] = 0;
649 dw[7] = 0;
671 uint32_t * restrict dw = (uint32_t * restrict) dst;
673 dw[0] =
677 dw[1] =
681 dw[2] =
684 dw[3] =
687 dw[4] =
690 dw[5] =
708 uint32_t * restrict dw = (uint32_t * restrict) dst;
710 dw[0] =
726 uint32_t * restrict dw = (uint32_t * restrict) dst;
728 dw[0] =
742 uint32_t * restrict dw = (uint32_t * restrict) dst;
744 dw[0] =
750 dw[1] =
767 uint32_t * restrict dw = (uint32_t * restrict) dst;
769 dw[0] =
775 dw[1] =
794 uint32_t * restrict dw = (uint32_t * restrict) dst;
796 dw[0] =
816 uint32_t * restrict dw = (uint32_t * restrict) dst;
818 dw[0] =
840 uint32_t * restrict dw = (uint32_t * restrict) dst;
845 dw[0] =
866 uint32_t * restrict dw = (uint32_t * restrict) dst;
868 dw[0] =
872 __gen_combine_address(data, &dw[1], values->IndirectPayloadBaseAddress, 0);
873 dw[1] = v1_address;
874 dw[2] = v1_address >> 32;
876 GEN10_MEMORYADDRESSATTRIBUTES_pack(data, &dw[3], &values->IndirectPayloadBaseAddress2);
899 uint32_t * restrict dw = (uint32_t * restrict) dst;
901 dw[0] =
924 uint32_t * restrict dw = (uint32_t * restrict) dst;
926 dw[0] =
946 uint32_t * restrict dw = (uint32_t * restrict) dst;
948 dw[0] =
966 uint32_t * restrict dw = (uint32_t * restrict) dst;
968 dw[0] =
984 uint32_t * restrict dw = (uint32_t * restrict) dst;
986 dw[0] =
1002 uint32_t * restrict dw = (uint32_t * restrict) dst;
1005 __gen_combine_address(data, &dw[0], values->Address, 0);
1006 dw[0] = v0_address;
1007 dw[1] = v0_address >> 32;
1009 GEN10_MEMORYADDRESSATTRIBUTES_pack(data, &dw[2], &values->MemoryAddressAttributes);
1029 uint32_t * restrict dw = (uint32_t * restrict) dst;
1031 dw[0] =
1037 dw[1] =
1090 uint32_t * restrict dw = (uint32_t * restrict) dst;
1092 dw[0] =
1110 dw[1] =
1117 dw[2] =
1183 uint32_t * restrict dw = (uint32_t * restrict) dst;
1187 dw[0] = v0;
1188 dw[1] = v0 >> 32;
1190 dw[2] =
1200 dw[3] =
1204 dw[4] =
1208 dw[5] =
1212 dw[6] =
1219 dw[7] =
1248 uint32_t * restrict dw = (uint32_t * restrict) dst;
1250 dw[0] =
1256 dw[1] =
1262 dw[2] =
1268 dw[3] =
1295 uint32_t * restrict dw = (uint32_t * restrict) dst;
1297 dw[0] =
1307 dw[1] =
1379 uint32_t * restrict dw = (uint32_t * restrict) dst;
1381 dw[0] =
1400 uint32_t * restrict dw = (uint32_t * restrict) dst;
1402 dw[0] =
1533 uint32_t * restrict dw = (uint32_t * restrict) dst;
1535 dw[0] =
1554 dw[1] =
1559 dw[2] =
1563 dw[3] =
1568 dw[4] =
1577 dw[5] =
1587 dw[6] =
1595 dw[7] =
1605 __gen_combine_address(data, &dw[8], values->SurfaceBaseAddress, 0);
1606 dw[8] = v8_address;
1607 dw[9] = v8_address >> 32;
1617 __gen_combine_address(data, &dw[10], values->AuxiliarySurfaceBaseAddress, v10);
1618 dw[10] = v10_address;
1619 dw[11] = (v10_address >> 32) | (v10 >> 32);
1625 __gen_combine_address(data, &dw[12], values->ClearValueAddress, v12);
1626 dw[12] = v12_address;
1627 dw[13] = (v12_address >> 32) | (v12 >> 32);
1629 dw[14] =
1632 dw[15] =
1654 uint32_t * restrict dw = (uint32_t * restrict) dst;
1656 dw[0] =
1677 uint32_t * restrict dw = (uint32_t * restrict) dst;
1679 dw[0] =
1683 dw[1] =
1687 dw[2] =
1691 dw[3] =
1721 uint32_t * restrict dw = (uint32_t * restrict) dst;
1723 dw[0] =
1732 dw[1] =
1737 dw[2] =
1742 dw[3] =
1837 uint32_t * restrict dw = (uint32_t * restrict) dst;
1839 dw[0] =
1850 dw[1] =
1859 dw[2] =
1864 dw[3] =
1893 uint32_t * restrict dw = (uint32_t * restrict) dst;
1895 dw[0] =
1903 dw[1] =
1907 dw[2] =
1911 dw[3] =
1915 dw[4] = 0;
1917 dw[5] = 0;
1919 dw[6] = 0;
1921 dw[7] = 0;
1937 uint32_t * restrict dw = (uint32_t * restrict) dst;
1939 dw[0] =
1943 dw[1] =
1966 uint32_t * restrict dw = (uint32_t * restrict) dst;
1968 dw[0] =
1974 dw[1] =
1980 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[2], &values->FilterCoefficients[0]);
1982 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[4], &values->FilterCoefficients[1]);
1984 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[6], &values->FilterCoefficients[2]);
1986 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[8], &values->FilterCoefficients[3]);
1988 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[10], &values->FilterCoefficients[4]);
1990 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[12], &values->FilterCoefficients[5]);
1992 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[14], &values->FilterCoefficients[6]);
1994 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[16], &values->FilterCoefficients[7]);
1996 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[18], &values->FilterCoefficients[8]);
1998 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[20], &values->FilterCoefficients[9]);
2000 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[22], &values->FilterCoefficients[10]);
2002 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[24], &values->FilterCoefficients[11]);
2004 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[26], &values->FilterCoefficients[12]);
2006 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[28], &values->FilterCoefficients[13]);
2008 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[30], &values->FilterCoefficients[14]);
2010 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[32], &values->FilterCoefficients[15]);
2012 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[34], &values->FilterCoefficients[16]);
2014 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[36], &values->FilterCoefficients[17]);
2016 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[38], &values->FilterCoefficients[18]);
2018 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[40], &values->FilterCoefficients[19]);
2020 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[42], &values->FilterCoefficients[20]);
2022 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[44], &values->FilterCoefficients[21]);
2024 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[46], &values->FilterCoefficients[22]);
2026 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[48], &values->FilterCoefficients[23]);
2028 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[50], &values->FilterCoefficients[24]);
2030 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[52], &values->FilterCoefficients[25]);
2032 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[54], &values->FilterCoefficients[26]);
2034 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[56], &values->FilterCoefficients[27]);
2036 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[58], &values->FilterCoefficients[28]);
2038 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[60], &values->FilterCoefficients[29]);
2040 GEN10_CHROMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[62], &values->FilterCoefficients[30]);
2069 uint32_t * restrict dw = (uint32_t * restrict) dst;
2071 dw[0] =
2077 dw[1] =
2083 dw[2] =
2089 dw[3] =
2095 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[4], &values->FilterCoefficients[0]);
2097 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[8], &values->FilterCoefficients[1]);
2099 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[12], &values->FilterCoefficients[2]);
2101 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[16], &values->FilterCoefficients[3]);
2103 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[20], &values->FilterCoefficients[4]);
2105 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[24], &values->FilterCoefficients[5]);
2107 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[28], &values->FilterCoefficients[6]);
2109 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[32], &values->FilterCoefficients[7]);
2111 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[36], &values->FilterCoefficients[8]);
2113 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[40], &values->FilterCoefficients[9]);
2115 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[44], &values->FilterCoefficients[10]);
2117 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[48], &values->FilterCoefficients[11]);
2119 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[52], &values->FilterCoefficients[12]);
2121 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[56], &values->FilterCoefficients[13]);
2123 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[60], &values->FilterCoefficients[14]);
2125 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[64], &values->FilterCoefficients[15]);
2127 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[68], &values->FilterCoefficients[16]);
2129 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[72], &values->FilterCoefficients[17]);
2131 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[76], &values->FilterCoefficients[18]);
2133 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[80], &values->FilterCoefficients[19]);
2135 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[84], &values->FilterCoefficients[20]);
2137 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[88], &values->FilterCoefficients[21]);
2139 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[92], &values->FilterCoefficients[22]);
2141 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[96], &values->FilterCoefficients[23]);
2143 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[100], &values->FilterCoefficients[24]);
2145 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[104], &values->FilterCoefficients[25]);
2147 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[108], &values->FilterCoefficients[26]);
2149 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[112], &values->FilterCoefficients[27]);
2151 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[116], &values->FilterCoefficients[28]);
2153 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[120], &values->FilterCoefficients[29]);
2155 GEN10_LUMA_FILTER_COEFFICIENTS_ARRAY_pack(data, &dw[124], &values->FilterCoefficients[30]);
2172 uint32_t * restrict dw = (uint32_t * restrict) dst;
2174 dw[0] =
2179 dw[1] =
2270 uint32_t * restrict dw = (uint32_t * restrict) dst;
2272 dw[0] =
2279 dw[1] =
2288 dw[2] =
2294 dw[3] =
2301 dw[4] =
2309 dw[5] =
2315 dw[6] =
2318 dw[7] =
2323 dw[8] =
2329 dw[9] =
2335 dw[10] =
2339 dw[11] =
2345 dw[12] =
2351 dw[13] =
2356 dw[14] =
2360 dw[15] =
2366 dw[16] =
2370 dw[17] =
2374 dw[18] =
2378 dw[19] =
2381 dw[20] =
2385 dw[21] =
2389 dw[22] =
2405 uint32_t * restrict dw = (uint32_t * restrict) dst;
2407 dw[0] =
2435 uint32_t * restrict dw = (uint32_t * restrict) dst;
2437 dw[0] =
2440 dw[1] =
2443 dw[2] =
2446 dw[3] =
2449 dw[4] =
2452 dw[5] =
2455 dw[6] = 0;
2457 dw[7] = 0;
2459 dw[8] =
2462 dw[9] =
2465 dw[10] =
2468 dw[11] =
2471 dw[12] =
2474 dw[13] =
2477 dw[14] =
2480 dw[15] =
2509 uint32_t * restrict dw = (uint32_t * restrict) dst;
2511 dw[0] =
2535 uint32_t * restrict dw = (uint32_t * restrict) dst;
2537 dw[0] =
2557 uint32_t * restrict dw = (uint32_t * restrict) dst;
2565 dw[0] =
2575 dw[1] =
2602 uint32_t * restrict dw = (uint32_t * restrict) dst;
2604 dw[0] =
2624 uint32_t * restrict dw = (uint32_t * restrict) dst;
2627 __gen_combine_address(data, &dw[0], values->Address, 0);
2628 dw[0] = v0_address;
2629 dw[1] = v0_address >> 32;
2631 GEN10_VDENC_SURFACE_CONTROL_BITS_pack(data, &dw[2], &values->PictureFields);
2674 uint32_t * restrict dw = (uint32_t * restrict) dst;
2676 dw[0] =
2683 dw[1] =
2692 dw[2] =
2696 dw[3] =
2717 uint32_t * restrict dw = (uint32_t * restrict) dst;
2719 dw[0] =
2727 __gen_combine_address(data, &dw[1], values->BufferStartingAddress, 0);
2728 dw[1] = v1_address;
2729 dw[2] = v1_address >> 32;
2731 dw[3] =
2753 uint32_t * restrict dw = (uint32_t * restrict) dst;
2755 dw[0] =
2762 dw[1] =
2808 uint32_t * restrict dw = (uint32_t * restrict) dst;
2810 dw[0] =
2821 dw[1] =
2826 dw[2] =
2829 dw[3] =
2832 dw[4] =
2835 dw[5] =
2838 dw[6] =
2872 uint32_t * restrict dw = (uint32_t * restrict) dst;
2874 dw[0] =
2881 dw[1] =
2887 dw[2] =
2921 uint32_t * restrict dw = (uint32_t * restrict) dst;
2923 dw[0] =
2930 dw[1] =
2962 uint32_t * restrict dw = (uint32_t * restrict) dst;
2964 dw[0] =
2971 dw[1] =
3003 uint32_t * restrict dw = (uint32_t * restrict) dst;
3005 dw[0] =
3012 dw[1] =
3044 uint32_t * restrict dw = (uint32_t * restrict) dst;
3046 dw[0] =
3053 dw[1] =
3085 uint32_t * restrict dw = (uint32_t * restrict) dst;
3087 dw[0] =
3094 dw[1] =
3122 uint32_t * restrict dw = (uint32_t * restrict) dst;
3124 dw[0] =
3131 dw[1] =
3158 uint32_t * restrict dw = (uint32_t * restrict) dst;
3160 dw[0] =
3167 dw[1] =
3194 uint32_t * restrict dw = (uint32_t * restrict) dst;
3196 dw[0] =
3203 dw[1] =
3230 uint32_t * restrict dw = (uint32_t * restrict) dst;
3232 dw[0] =
3239 dw[1] =
3266 uint32_t * restrict dw = (uint32_t * restrict) dst;
3268 dw[0] =
3275 dw[1] =
3306 uint32_t * restrict dw = (uint32_t * restrict) dst;
3308 dw[0] =
3319 __gen_combine_address(data, &dw[1], values->BindingTablePoolBaseAddress, v1);
3320 dw[1] = v1_address;
3321 dw[2] = (v1_address >> 32) | (v1 >> 32);
3323 dw[3] =
3351 uint32_t * restrict dw = (uint32_t * restrict) dst;
3353 dw[0] =
3360 dw[1] =
3389 uint32_t * restrict dw = (uint32_t * restrict) dst;
3391 dw[0] =
3398 dw[1] =
3428 uint32_t * restrict dw = (uint32_t * restrict) dst;
3430 dw[0] =
3437 dw[1] =
3440 dw[2] =
3443 dw[3] =
3471 uint32_t * restrict dw = (uint32_t * restrict) dst;
3473 dw[0] =
3480 dw[1] =
3483 dw[2] =
3538 uint32_t * restrict dw = (uint32_t * restrict) dst;
3540 dw[0] =
3547 dw[1] =
3556 dw[2] =
3569 dw[3] =
3600 uint32_t * restrict dw = (uint32_t * restrict) dst;
3602 dw[0] =
3610 GEN10_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3637 uint32_t * restrict dw = (uint32_t * restrict) dst;
3639 dw[0] =
3647 GEN10_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3674 uint32_t * restrict dw = (uint32_t * restrict) dst;
3676 dw[0] =
3684 GEN10_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3712 uint32_t * restrict dw = (uint32_t * restrict) dst;
3714 dw[0] =
3723 GEN10_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3750 uint32_t * restrict dw = (uint32_t * restrict) dst;
3752 dw[0] =
3760 GEN10_3DSTATE_CONSTANT_BODY_pack(data, &dw[1], &values->ConstantBody);
3811 uint32_t * restrict dw = (uint32_t * restrict) dst;
3813 dw[0] =
3820 dw[1] =
3829 __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, 0);
3830 dw[2] = v2_address;
3831 dw[3] = v2_address >> 32;
3833 dw[4] =
3838 dw[5] =
3843 dw[6] =
3847 dw[7] =
3884 uint32_t * restrict dw = (uint32_t * restrict) dst;
3886 dw[0] =
3894 dw[1] =
3898 dw[2] =
3902 dw[3] =
3965 uint32_t * restrict dw = (uint32_t * restrict) dst;
3967 dw[0] =
3976 dw[1] = v1;
3977 dw[2] = v1 >> 32;
3979 dw[3] =
3992 __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, v4);
3993 dw[4] = v4_address;
3994 dw[5] = (v4_address >> 32) | (v4 >> 32);
3996 dw[6] =
4001 dw[7] =
4009 dw[8] =
4017 dw[9] = v9;
4018 dw[10] = v9 >> 32;
4053 uint32_t * restrict dw = (uint32_t * restrict) dst;
4055 dw[0] =
4062 dw[1] =
4067 dw[2] =
4105 uint32_t * restrict dw = (uint32_t * restrict) dst;
4107 dw[0] =
4114 dw[1] =
4119 dw[2] =
4157 uint32_t * restrict dw = (uint32_t * restrict) dst;
4159 dw[0] =
4166 dw[1] =
4171 dw[2] =
4211 uint32_t * restrict dw = (uint32_t * restrict) dst;
4213 dw[0] =
4220 dw[1] =
4226 dw[2] =
4267 uint32_t * restrict dw = (uint32_t * restrict) dst;
4269 dw[0] =
4276 dw[1] =
4282 dw[2] =
4315 uint32_t * restrict dw = (uint32_t * restrict) dst;
4317 dw[0] =
4328 __gen_combine_address(data, &dw[1], values->GatherPoolBaseAddress, v1);
4329 dw[1] = v1_address;
4330 dw[2] = (v1_address >> 32) | (v1 >> 32);
4332 dw[3] =
4413 uint32_t * restrict dw = (uint32_t * restrict) dst;
4415 dw[0] =
4424 dw[1] = v1;
4425 dw[2] = v1 >> 32;
4427 dw[3] =
4443 __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, v4);
4444 dw[4] = v4_address;
4445 dw[5] = (v4_address >> 32) | (v4 >> 32);
4447 dw[6] =
4456 dw[7] =
4469 dw[8] =
4475 dw[9] =
4508 uint32_t * restrict dw = (uint32_t * restrict) dst;
4510 dw[0] =
4517 dw[1] =
4522 __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, 0);
4523 dw[2] = v2_address;
4524 dw[3] = v2_address >> 32;
4526 dw[4] =
4586 uint32_t * restrict dw = (uint32_t * restrict) dst;
4588 dw[0] =
4595 dw[1] =
4603 dw[2] =
4611 dw[3] = v3;
4612 dw[4] = v3 >> 32;
4617 __gen_combine_address(data, &dw[5], values->ScratchSpaceBasePointer, v5);
4618 dw[5] = v5_address;
4619 dw[6] = (v5_address >> 32) | (v5 >> 32);
4621 dw[7] =
4633 dw[8] = 0;
4665 uint32_t * restrict dw = (uint32_t * restrict) dst;
4667 dw[0] =
4674 dw[1] =
4679 __gen_combine_address(data, &dw[2], values->BufferStartingAddress, 0);
4680 dw[2] = v2_address;
4681 dw[3] = v2_address >> 32;
4683 dw[4] =
4715 uint32_t * restrict dw = (uint32_t * restrict) dst;
4717 dw[0] =
4724 dw[1] =
4730 dw[2] =
4759 uint32_t * restrict dw = (uint32_t * restrict) dst;
4761 dw[0] =
4768 dw[1] =
4800 uint32_t * restrict dw = (uint32_t * restrict) dst;
4802 dw[0] =
4809 dw[1] =
4839 uint32_t * restrict dw = (uint32_t * restrict) dst;
4841 dw[0] =
4848 dw[1] =
4876 uint32_t * restrict dw = (uint32_t * restrict) dst;
4878 dw[0] =
4885 dw[1] =
4888 dw[2] =
4891 dw[3] =
4894 dw[4] =
4897 dw[5] =
4900 dw[6] =
4903 dw[7] =
4906 dw[8] =
4909 dw[9] =
4912 dw[10] =
4915 dw[11] =
4918 dw[12] =
4921 dw[13] =
4924 dw[14] =
4927 dw[15] =
4930 dw[16] =
4933 dw[17] =
4936 dw[18] =
4939 dw[19] =
4942 dw[20] =
4945 dw[21] =
4948 dw[22] =
4951 dw[23] =
4954 dw[24] =
4957 dw[25] =
4960 dw[26] =
4963 dw[27] =
4966 dw[28] =
4969 dw[29] =
4972 dw[30] =
4975 dw[31] =
4978 dw[32] =
5052 uint32_t * restrict dw = (uint32_t * restrict) dst;
5054 dw[0] =
5063 dw[1] = v1;
5064 dw[2] = v1 >> 32;
5066 dw[3] =
5082 __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, v4);
5083 dw[4] = v4_address;
5084 dw[5] = (v4_address >> 32) | (v4 >> 32);
5086 dw[6] =
5096 dw[7] =
5103 dw[8] = v8;
5104 dw[9] = v8 >> 32;
5108 dw[10] = v10;
5109 dw[11] = v10 >> 32;
5143 uint32_t * restrict dw = (uint32_t * restrict) dst;
5145 dw[0] =
5152 dw[1] =
5214 uint32_t * restrict dw = (uint32_t * restrict) dst;
5216 dw[0] =
5223 dw[1] =
5270 uint32_t * restrict dw = (uint32_t * restrict) dst;
5272 dw[0] =
5279 dw[1] =
5308 uint32_t * restrict dw = (uint32_t * restrict) dst;
5310 dw[0] =
5317 dw[1] =
5346 uint32_t * restrict dw = (uint32_t * restrict) dst;
5348 dw[0] =
5355 dw[1] =
5384 uint32_t * restrict dw = (uint32_t * restrict) dst;
5386 dw[0] =
5393 dw[1] =
5422 uint32_t * restrict dw = (uint32_t * restrict) dst;
5424 dw[0] =
5431 dw[1] =
5504 uint32_t * restrict dw = (uint32_t * restrict) dst;
5506 dw[0] =
5513 dw[1] =
5533 dw[2] =
5536 dw[3] =
5539 dw[4] =
5573 uint32_t * restrict dw = (uint32_t * restrict) dst;
5575 dw[0] =
5582 dw[1] =
5586 dw[2] = __gen_combine_address(data, &dw[2], values->GlobalConstantBufferAddress, 0);
5588 dw[3] = __gen_combine_address(data, &dw[3], values->GlobalConstantBufferAddressHigh, 0);
5612 uint32_t * restrict dw = (uint32_t * restrict) dst;
5614 dw[0] =
5644 uint32_t * restrict dw = (uint32_t * restrict) dst;
5646 dw[0] =
5677 uint32_t * restrict dw = (uint32_t * restrict) dst;
5679 dw[0] =
5686 dw[1] =
5713 uint32_t * restrict dw = (uint32_t * restrict) dst;
5715 dw[0] =
5722 dw[1] =
5749 uint32_t * restrict dw = (uint32_t * restrict) dst;
5751 dw[0] =
5758 dw[1] =
5785 uint32_t * restrict dw = (uint32_t * restrict) dst;
5787 dw[0] =
5794 dw[1] =
5821 uint32_t * restrict dw = (uint32_t * restrict) dst;
5823 dw[0] =
5830 dw[1] =
5857 uint32_t * restrict dw = (uint32_t * restrict) dst;
5859 dw[0] =
5866 dw[1] =
5954 uint32_t * restrict dw = (uint32_t * restrict) dst;
5956 dw[0] =
5963 dw[1] =
5973 dw[2] =
5983 dw[3] =
5993 dw[4] =
6003 dw[5] =
6013 dw[6] =
6023 dw[7] =
6033 dw[8] =
6085 uint32_t * restrict dw = (uint32_t * restrict) dst;
6087 dw[0] =
6094 dw[1] =
6108 dw[2] =
6111 dw[3] =
6114 dw[4] =
6132 dw[5] =
6175 uint32_t * restrict dw = (uint32_t * restrict) dst;
6177 dw[0] =
6190 dw[1] =
6200 dw[2] =
6210 dw[3] =
6220 dw[4] =
6230 dw[5] =
6240 dw[6] =
6250 dw[7] =
6260 dw[8] =
6264 dw[9] =
6274 dw[10] =
6308 uint32_t * restrict dw = (uint32_t * restrict) dst;
6310 dw[0] =
6317 dw[1] =
6366 uint32_t * restrict dw = (uint32_t * restrict) dst;
6368 dw[0] =
6375 dw[1] =
6381 dw[2] =
6384 dw[3] =
6427 uint32_t * restrict dw = (uint32_t * restrict) dst;
6429 dw[0] =
6436 dw[1] =
6444 __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, 0);
6445 dw[2] = v2_address;
6446 dw[3] = v2_address >> 32;
6448 dw[4] =
6452 __gen_combine_address(data, &dw[5], values->StreamOutputBufferOffsetAddress, 0);
6453 dw[5] = v5_address;
6454 dw[6] = v5_address >> 32;
6456 dw[7] =
6489 uint32_t * restrict dw = (uint32_t * restrict) dst;
6491 dw[0] =
6498 dw[1] =
6504 dw[2] =
6538 uint32_t * restrict dw = (uint32_t * restrict) dst;
6540 dw[0] =
6547 dw[1] =
6553 __gen_combine_address(data, &dw[2], values->SurfaceBaseAddress, 0);
6554 dw[2] = v2_address;
6555 dw[3] = v2_address >> 32;
6557 dw[4] =
6606 uint32_t * restrict dw = (uint32_t * restrict) dst;
6608 dw[0] =
6615 dw[1] =
6623 dw[2] =
6633 dw[3] =
6637 dw[4] =
6682 uint32_t * restrict dw = (uint32_t * restrict) dst;
6684 dw[0] =
6691 dw[1] =
6698 dw[2] =
6701 dw[3] =
6729 uint32_t * restrict dw = (uint32_t * restrict) dst;
6731 dw[0] =
6738 dw[1] =
6768 uint32_t * restrict dw = (uint32_t * restrict) dst;
6770 dw[0] =
6777 dw[1] =
6808 uint32_t * restrict dw = (uint32_t * restrict) dst;
6810 dw[0] =
6817 dw[1] =
6848 uint32_t * restrict dw = (uint32_t * restrict) dst;
6850 dw[0] =
6857 dw[1] =
6888 uint32_t * restrict dw = (uint32_t * restrict) dst;
6890 dw[0] =
6897 dw[1] =
6925 uint32_t * restrict dw = (uint32_t * restrict) dst;
6927 dw[0] =
6957 uint32_t * restrict dw = (uint32_t * restrict) dst;
6959 dw[0] =
6994 uint32_t * restrict dw = (uint32_t * restrict) dst;
6996 dw[0] =
7007 dw[1] =
7065 uint32_t * restrict dw = (uint32_t * restrict) dst;
7067 dw[0] =
7074 dw[1] =
7084 dw[2] =
7094 dw[3] =
7104 dw[4] =
7140 uint32_t * restrict dw = (uint32_t * restrict) dst;
7142 dw[0] =
7149 dw[1] =
7153 dw[2] =
7193 uint32_t * restrict dw = (uint32_t * restrict) dst;
7195 dw[0] =
7202 dw[1] =
7260 uint32_t * restrict dw = (uint32_t * restrict) dst;
7262 dw[0] =
7269 dw[1] =
7279 dw[2] =
7306 uint32_t * restrict dw = (uint32_t * restrict) dst;
7308 dw[0] =
7339 uint32_t * restrict dw = (uint32_t * restrict) dst;
7341 dw[0] =
7348 dw[1] =
7375 uint32_t * restrict dw = (uint32_t * restrict) dst;
7377 dw[0] =
7384 dw[1] =
7411 uint32_t * restrict dw = (uint32_t * restrict) dst;
7413 dw[0] =
7420 dw[1] =
7479 uint32_t * restrict dw = (uint32_t * restrict) dst;
7481 dw[0] =
7490 dw[1] = v1;
7491 dw[2] = v1 >> 32;
7493 dw[3] =
7507 __gen_combine_address(data, &dw[4], values->ScratchSpaceBasePointer, v4);
7508 dw[4] = v4_address;
7509 dw[5] = (v4_address >> 32) | (v4 >> 32);
7511 dw[6] =
7516 dw[7] =
7524 dw[8] =
7594 uint32_t * restrict dw = (uint32_t * restrict) dst;
7596 dw[0] =
7603 dw[1] =
7644 uint32_t * restrict dw = (uint32_t * restrict) dst;
7646 dw[0] =
7653 dw[1] =
7699 uint32_t * restrict dw = (uint32_t * restrict) dst;
7701 dw[0] =
7708 dw[1] =
7724 dw[2] =
7730 dw[3] =
7771 uint32_t * restrict dw = (uint32_t * restrict) dst;
7773 dw[0] =
7780 dw[1] =
7791 dw[2] =
7795 dw[3] =
7799 dw[4] =
7845 uint32_t * restrict dw = (uint32_t * restrict) dst;
7847 dw[0] =
7856 dw[1] =
7859 dw[2] =
7862 dw[3] =
7865 dw[4] =
7871 dw[5] =
7874 dw[6] = 0;
7876 dw[7] =
7879 dw[8] =
7882 dw[9] = 0;
7884 dw[10] =
7887 dw[11] =
7890 dw[12] =
7893 dw[13] =
7896 dw[14] =
7924 uint32_t * restrict dw = (uint32_t * restrict) dst;
7926 dw[0] =
7933 dw[1] = 0;
7935 dw[2] =
7938 dw[3] =
7966 uint32_t * restrict dw = (uint32_t * restrict) dst;
7968 dw[0] =
7975 dw[1] = 0;
7977 dw[2] =
7980 dw[3] =
8031 uint32_t * restrict dw = (uint32_t * restrict) dst;
8033 dw[0] =
8040 dw[1] =
8043 dw[2] =
8053 dw[3] = __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, 0);
8055 dw[4] =
8059 dw[5] =
8098 uint32_t * restrict dw = (uint32_t * restrict) dst;
8100 dw[0] =
8107 dw[1] =
8110 dw[2] =
8115 dw[3] = __gen_combine_address(data, &dw[3], values->IndirectDataStartAddress, 0);
8117 dw[4] =
8121 dw[5] =
8125 dw[6] =
8158 uint32_t * restrict dw = (uint32_t * restrict) dst;
8160 dw[0] =
8167 dw[1] =
8170 dw[2] =
8175 dw[3] = 0;
8177 dw[4] =
8180 dw[5] =
8183 dw[6] =
8186 dw[7] =
8189 dw[8] =
8192 dw[9] =
8195 dw[10] =
8198 dw[11] =
8201 dw[12] =
8204 dw[13] =
8207 dw[14] =
8210 dw[15] =
8276 uint32_t * restrict dw = (uint32_t * restrict) dst;
8278 dw[0] =
8285 dw[1] =
8288 dw[2] =
8294 dw[3] =
8297 dw[4] = 0;
8299 dw[5] =
8303 dw[6] =
8309 dw[7] =
8313 dw[8] =
8317 dw[9] =
8321 dw[10] = 0;
8323 dw[11] =
8327 dw[12] =
8331 dw[13] =
8335 dw[14] =
8339 dw[15] =
8343 dw[16] =
8373 uint32_t * restrict dw = (uint32_t * restrict) dst;
8375 dw[0] =
8382 dw[1] =
8454 uint32_t * restrict dw = (uint32_t * restrict) dst;
8456 dw[0] =
8467 __gen_combine_address(data, &dw[1], values->ScratchSpaceBasePointer, v1);
8468 dw[1] = v1_address;
8469 dw[2] = (v1_address >> 32) | (v1 >> 32);
8471 dw[3] =
8478 dw[4] =
8481 dw[5] =
8485 dw[6] =
8491 dw[7] =
8501 dw[8] =
8528 uint32_t * restrict dw = (uint32_t * restrict) dst;
8530 dw[0] =
8554 uint32_t * restrict dw = (uint32_t * restrict) dst;
8556 dw[0] =
8603 uint32_t * restrict dw = (uint32_t * restrict) dst;
8605 dw[0] =
8618 __gen_combine_address(data, &dw[1], values->MemoryAddress, 0);
8619 dw[1] = v1_address;
8620 dw[2] = v1_address >> 32;
8640 uint32_t * restrict dw = (uint32_t * restrict) dst;
8642 dw[0] =
8676 uint32_t * restrict dw = (uint32_t * restrict) dst;
8678 dw[0] =
8689 __gen_combine_address(data, &dw[1], values->BatchBufferStartAddress, 0);
8690 dw[1] = v1_address;
8691 dw[2] = v1_address >> 32;
8715 uint32_t * restrict dw = (uint32_t * restrict) dst;
8717 dw[0] =
8726 __gen_combine_address(data, &dw[1], values->PageBaseAddress, v1);
8727 dw[1] = v1_address;
8728 dw[2] = (v1_address >> 32) | (v1 >> 32);
8757 uint32_t * restrict dw = (uint32_t * restrict) dst;
8759 dw[0] =
8767 dw[1] =
8771 __gen_combine_address(data, &dw[2], values->CompareAddress, 0);
8772 dw[2] = v2_address;
8773 dw[3] = v2_address >> 32;
8798 uint32_t * restrict dw = (uint32_t * restrict) dst;
8800 dw[0] =
8808 __gen_combine_address(data, &dw[1], values->DestinationMemoryAddress, 0);
8809 dw[1] = v1_address;
8810 dw[2] = v1_address >> 32;
8813 __gen_combine_address(data, &dw[3], values->SourceMemoryAddress, 0);
8814 dw[3] = v3_address;
8815 dw[4] = v3_address >> 32;
8860 uint32_t * restrict dw = (uint32_t * restrict) dst;
8862 dw[0] =
8869 dw[1] =
8877 dw[2] = __gen_combine_address(data, &dw[2], values->DisplayBufferBaseAddress, v2);
8901 uint32_t * restrict dw = (uint32_t * restrict) dst;
8903 dw[0] =
8908 dw[1] =
8936 uint32_t * restrict dw = (uint32_t * restrict) dst;
8938 dw[0] =
8944 dw[1] =
8947 dw[2] =
8973 uint32_t * restrict dw = (uint32_t * restrict) dst;
8975 dw[0] =
8982 dw[1] =
8986 __gen_combine_address(data, &dw[2], values->MemoryAddress, 0);
8987 dw[2] = v2_address;
8988 dw[3] = v2_address >> 32;
9011 uint32_t * restrict dw = (uint32_t * restrict) dst;
9013 dw[0] =
9018 dw[1] =
9021 dw[2] =
9049 uint32_t * restrict dw = (uint32_t * restrict) dst;
9051 dw[0] =
9057 dw[1] =
9087 uint32_t * restrict dw = (uint32_t * restrict) dst;
9089 dw[0] =
9096 dw[1] =
9119 uint32_t * restrict dw = (uint32_t * restrict) dst;
9121 dw[0] =
9145 uint32_t * restrict dw = (uint32_t * restrict) dst;
9147 dw[0] =
9184 uint32_t * restrict dw = (uint32_t * restrict) dst;
9186 dw[0] =
9210 uint32_t * restrict dw = (uint32_t * restrict) dst;
9212 dw[0] =
9239 uint32_t * restrict dw = (uint32_t * restrict) dst;
9241 dw[0] =
9250 __gen_combine_address(data, &dw[1], values->MemoryAddress, v1);
9251 dw[1] = v1_address;
9252 dw[2] = (v1_address >> 32) | (v1 >> 32);
9254 dw[3] =
9277 uint32_t * restrict dw = (uint32_t * restrict) dst;
9279 dw[0] =
9304 uint32_t * restrict dw = (uint32_t * restrict) dst;
9306 dw[0] =
9333 uint32_t * restrict dw = (uint32_t * restrict) dst;
9335 dw[0] =
9343 __gen_combine_address(data, &dw[1], values->DestinationAddress, v1);
9344 dw[1] = v1_address;
9345 dw[2] = (v1_address >> 32) | (v1 >> 32);
9347 dw[3] =
9377 uint32_t * restrict dw = (uint32_t * restrict) dst;
9379 dw[0] =
9386 dw[1] =
9424 uint32_t * restrict dw = (uint32_t * restrict) dst;
9426 dw[0] =
9435 dw[1] =
9439 __gen_combine_address(data, &dw[2], values->SemaphoreAddress, 0);
9440 dw[2] = v2_address;
9441 dw[3] = v2_address >> 32;
9469 uint32_t * restrict dw = (uint32_t * restrict) dst;
9471 dw[0] =
9483 dw[1] = __gen_combine_address(data, &dw[1], values->LogicalContextAddress, v1);
9509 uint32_t * restrict dw = (uint32_t * restrict) dst;
9511 dw[0] =
9540 uint32_t * restrict dw = (uint32_t * restrict) dst;
9542 dw[0] =
9552 __gen_combine_address(data, &dw[1], values->Address, v1);
9553 dw[1] = v1_address;
9554 dw[2] = (v1_address >> 32) | (v1 >> 32);
9558 dw[3] = v3;
9559 dw[4] = v3 >> 32;
9584 uint32_t * restrict dw = (uint32_t * restrict) dst;
9586 dw[0] =
9592 dw[1] =
9595 dw[2] =
9621 uint32_t * restrict dw = (uint32_t * restrict) dst;
9623 dw[0] =
9630 dw[1] =
9634 __gen_combine_address(data, &dw[2], values->MemoryAddress, 0);
9635 dw[2] = v2_address;
9636 dw[3] = v2_address >> 32;
9656 uint32_t * restrict dw = (uint32_t * restrict) dst;
9658 dw[0] =
9681 uint32_t * restrict dw = (uint32_t * restrict) dst;
9683 dw[0] =
9708 uint32_t * restrict dw = (uint32_t * restrict) dst;
9710 dw[0] =
9715 dw[1] = __gen_combine_address(data, &dw[1], values->EntryAddress, 0);
9734 uint32_t * restrict dw = (uint32_t * restrict) dst;
9736 dw[0] =
9775 uint32_t * restrict dw = (uint32_t * restrict) dst;
9777 dw[0] =
9827 uint32_t * restrict dw = (uint32_t * restrict) dst;
9829 dw[0] =
9895 uint32_t * restrict dw = (uint32_t * restrict) dst;
9897 dw[0] =
9904 dw[1] =
9930 __gen_combine_address(data, &dw[2], values->Address, 0);
9931 dw[2] = v2_address;
9932 dw[3] = v2_address >> 32;
9936 dw[4] = v4;
9937 dw[5] = v4 >> 32;
9994 uint32_t * restrict dw = (uint32_t * restrict) dst;
9996 dw[0] =
10007 __gen_combine_address(data, &dw[1], values->GeneralStateBaseAddress, v1);
10008 dw[1] = v1_address;
10009 dw[2] = (v1_address >> 32) | (v1 >> 32);
10011 dw[3] =
10018 __gen_combine_address(data, &dw[4], values->SurfaceStateBaseAddress, v4);
10019 dw[4] = v4_address;
10020 dw[5] = (v4_address >> 32) | (v4 >> 32);
10026 __gen_combine_address(data, &dw[6], values->DynamicStateBaseAddress, v6);
10027 dw[6] = v6_address;
10028 dw[7] = (v6_address >> 32) | (v6 >> 32);
10034 __gen_combine_address(data, &dw[8], values->IndirectObjectBaseAddress, v8);
10035 dw[8] = v8_address;
10036 dw[9] = (v8_address >> 32) | (v8 >> 32);
10042 __gen_combine_address(data, &dw[10], values->InstructionBaseAddress, v10);
10043 dw[10] = v10_address;
10044 dw[11] = (v10_address >> 32) | (v10 >> 32);
10046 dw[12] =
10050 dw[13] =
10054 dw[14] =
10058 dw[15] =
10066 __gen_combine_address(data, &dw[16], values->BindlessSurfaceStateBaseAddress, v16);
10067 dw[16] = v16_address;
10068 dw[17] = (v16_address >> 32) | (v16 >> 32);
10070 dw[18] =
10077 __gen_combine_address(data, &dw[19], values->BindlessSamplerStateBaseAddress, v19);
10078 dw[19] = v19_address;
10079 dw[20] = (v19_address >> 32) | (v19 >> 32);
10081 dw[21] =
10108 uint32_t * restrict dw = (uint32_t * restrict) dst;
10110 dw[0] =
10119 dw[1] = v1;
10120 dw[2] = v1 >> 32;
10137 uint32_t * restrict dw = (uint32_t * restrict) dst;
10139 dw[0] =
10179 uint32_t * restrict dw = (uint32_t * restrict) dst;
10181 dw[0] =
10228 uint32_t * restrict dw = (uint32_t * restrict) dst;
10230 dw[0] =
10265 uint32_t * restrict dw = (uint32_t * restrict) dst;
10267 dw[0] =
10289 uint32_t * restrict dw = (uint32_t * restrict) dst;
10293 dw[0] = v0;
10294 dw[1] = v0 >> 32;
10308 uint32_t * restrict dw = (uint32_t * restrict) dst;
10312 dw[0] = v0;
10313 dw[1] = v0 >> 32;
10330 uint32_t * restrict dw = (uint32_t * restrict) dst;
10332 dw[0] =
10353 uint32_t * restrict dw = (uint32_t * restrict) dst;
10355 dw[0] =
10375 uint32_t * restrict dw = (uint32_t * restrict) dst;
10379 dw[0] = v0;
10380 dw[1] = v0 >> 32;
10394 uint32_t * restrict dw = (uint32_t * restrict) dst;
10398 dw[0] = v0;
10399 dw[1] = v0 >> 32;
10413 uint32_t * restrict dw = (uint32_t * restrict) dst;
10417 dw[0] = v0;
10418 dw[1] = v0 >> 32;
10432 uint32_t * restrict dw = (uint32_t * restrict) dst;
10436 dw[0] = v0;
10437 dw[1] = v0 >> 32;
10451 uint32_t * restrict dw = (uint32_t * restrict) dst;
10455 dw[0] = v0;
10456 dw[1] = v0 >> 32;
10470 uint32_t * restrict dw = (uint32_t * restrict) dst;
10474 dw[0] = v0;
10475 dw[1] = v0 >> 32;
10489 uint32_t * restrict dw = (uint32_t * restrict) dst;
10493 dw[0] = v0;
10494 dw[1] = v0 >> 32;
10529 uint32_t * restrict dw = (uint32_t * restrict) dst;
10531 dw[0] =
10571 uint32_t * restrict dw = (uint32_t * restrict) dst;
10573 dw[0] =
10592 uint32_t * restrict dw = (uint32_t * restrict) dst;
10596 dw[0] = v0;
10597 dw[1] = v0 >> 32;
10625 uint32_t * restrict dw = (uint32_t * restrict) dst;
10627 dw[0] =
10675 uint32_t * restrict dw = (uint32_t * restrict) dst;
10677 dw[0] =
10734 uint32_t * restrict dw = (uint32_t * restrict) dst;
10736 dw[0] =
10774 uint32_t * restrict dw = (uint32_t * restrict) dst;
10778 dw[0] = v0;
10779 dw[1] = v0 >> 32;
10793 uint32_t * restrict dw = (uint32_t * restrict) dst;
10797 dw[0] = v0;
10798 dw[1] = v0 >> 32;
10812 uint32_t * restrict dw = (uint32_t * restrict) dst;
10816 dw[0] = v0;
10817 dw[1] = v0 >> 32;
10831 uint32_t * restrict dw = (uint32_t * restrict) dst;
10835 dw[0] = v0;
10836 dw[1] = v0 >> 32;
10850 uint32_t * restrict dw = (uint32_t * restrict) dst;
10854 dw[0] = v0;
10855 dw[1] = v0 >> 32;
10869 uint32_t * restrict dw = (uint32_t * restrict) dst;
10873 dw[0] = v0;
10874 dw[1] = v0 >> 32;
10888 uint32_t * restrict dw = (uint32_t * restrict) dst;
10892 dw[0] = v0;
10893 dw[1] = v0 >> 32;
10907 uint32_t * restrict dw = (uint32_t * restrict) dst;
10911 dw[0] = v0;
10912 dw[1] = v0 >> 32;
10926 uint32_t * restrict dw = (uint32_t * restrict) dst;
10928 dw[0] =
10943 uint32_t * restrict dw = (uint32_t * restrict) dst;
10945 dw[0] =
10960 uint32_t * restrict dw = (uint32_t * restrict) dst;
10962 dw[0] =
10977 uint32_t * restrict dw = (uint32_t * restrict) dst;
10979 dw[0] =
11025 uint32_t * restrict dw = (uint32_t * restrict) dst;
11027 dw[0] =
11075 uint32_t * restrict dw = (uint32_t * restrict) dst;
11077 dw[0] =
11094 uint32_t * restrict dw = (uint32_t * restrict) dst;
11098 dw[0] = v0;
11099 dw[1] = v0 >> 32;