Lines Matching refs:uint
941 uint *out = (uint*)_out;
945 (out+j)[0] = (uint)(i);
953 uint *out = (uint*)_out;
957 (out+j)[0] = (uint)(i);
958 (out+j)[1] = (uint)(i+1);
966 uint *out = (uint*)_out;
970 (out+j)[0] = (uint)(i);
971 (out+j)[1] = (uint)(i+1);
979 uint *out = (uint*)_out;
983 (out+j)[0] = (uint)(i);
984 (out+j)[1] = (uint)(i+1);
986 (out+j)[0] = (uint)(i);
987 (out+j)[1] = (uint)(start);
994 uint *out = (uint*)_out;
998 (out+j)[0] = (uint)(i);
999 (out+j)[1] = (uint)(i+1);
1000 (out+j)[2] = (uint)(i+2);
1008 uint *out = (uint*)_out;
1012 (out+j)[0] = (uint)(i);
1013 (out+j)[1] = (uint)(i+1+(i&1));
1014 (out+j)[2] = (uint)(i+2-(i&1));
1022 uint *out = (uint*)_out;
1026 (out+j)[0] = (uint)(start);
1027 (out+j)[1] = (uint)(i+1);
1028 (out+j)[2] = (uint)(i+2);
1036 uint *out = (uint*)_out;
1040 (out+j+0)[0] = (uint)(i+0);
1041 (out+j+0)[1] = (uint)(i+1);
1042 (out+j+0)[2] = (uint)(i+2);
1043 (out+j+3)[0] = (uint)(i+0);
1044 (out+j+3)[1] = (uint)(i+2);
1045 (out+j+3)[2] = (uint)(i+3);
1053 uint *out = (uint*)_out;
1057 (out+j+0)[0] = (uint)(i+0);
1058 (out+j+0)[1] = (uint)(i+1);
1059 (out+j+0)[2] = (uint)(i+3);
1060 (out+j+3)[0] = (uint)(i+0);
1061 (out+j+3)[1] = (uint)(i+3);
1062 (out+j+3)[2] = (uint)(i+2);
1070 uint *out = (uint*)_out;
1074 (out+j)[0] = (uint)(start);
1075 (out+j)[1] = (uint)(i+1);
1076 (out+j)[2] = (uint)(i+2);
1084 uint *out = (uint*)_out;
1088 (out+j)[0] = (uint)(i+0);
1089 (out+j)[1] = (uint)(i+1);
1090 (out+j)[2] = (uint)(i+2);
1091 (out+j)[3] = (uint)(i+3);
1099 uint *out = (uint*)_out;
1103 (out+j)[0] = (uint)(i+0);
1104 (out+j)[1] = (uint)(i+1);
1105 (out+j)[2] = (uint)(i+2);
1106 (out+j)[3] = (uint)(i+3);
1114 uint *out = (uint*)_out;
1118 (out+j)[0] = (uint)(i+0);
1119 (out+j)[1] = (uint)(i+1);
1120 (out+j)[2] = (uint)(i+2);
1121 (out+j)[3] = (uint)(i+3);
1122 (out+j)[4] = (uint)(i+4);
1123 (out+j)[5] = (uint)(i+5);
1131 uint *out = (uint*)_out;
1137 (out+j)[0] = (uint)(i+0);
1138 (out+j)[1] = (uint)(i+1);
1139 (out+j)[2] = (uint)(i+2);
1140 (out+j)[3] = (uint)(i+3);
1141 (out+j)[4] = (uint)(i+4);
1142 (out+j)[5] = (uint)(i+5);
1145 (out+j)[0] = (uint)(i+2);
1146 (out+j)[1] = (uint)(i-2);
1147 (out+j)[2] = (uint)(i+0);
1148 (out+j)[3] = (uint)(i+3);
1149 (out+j)[4] = (uint)(i+4);
1150 (out+j)[5] = (uint)(i+6);
1159 uint *out = (uint*)_out;
1163 (out+j)[0] = (uint)(i);
1171 uint *out = (uint*)_out;
1175 (out+j)[0] = (uint)(i+1);
1176 (out+j)[1] = (uint)(i);
1184 uint *out = (uint*)_out;
1188 (out+j)[0] = (uint)(i+1);
1189 (out+j)[1] = (uint)(i);
1197 uint *out = (uint*)_out;
1201 (out+j)[0] = (uint)(i+1);
1202 (out+j)[1] = (uint)(i);
1204 (out+j)[0] = (uint)(start);
1205 (out+j)[1] = (uint)(i);
1212 uint *out = (uint*)_out;
1216 (out+j)[0] = (uint)(i+1);
1217 (out+j)[1] = (uint)(i+2);
1218 (out+j)[2] = (uint)(i);
1226 uint *out = (uint*)_out;
1230 (out+j)[0] = (uint)(i+1+(i&1));
1231 (out+j)[1] = (uint)(i+2-(i&1));
1232 (out+j)[2] = (uint)(i);
1240 uint *out = (uint*)_out;
1244 (out+j)[0] = (uint)(i+1);
1245 (out+j)[1] = (uint)(i+2);
1246 (out+j)[2] = (uint)(start);
1254 uint *out = (uint*)_out;
1258 (out+j+0)[0] = (uint)(i+1);
1259 (out+j+0)[1] = (uint)(i+2);
1260 (out+j+0)[2] = (uint)(i+0);
1261 (out+j+3)[0] = (uint)(i+2);
1262 (out+j+3)[1] = (uint)(i+3);
1263 (out+j+3)[2] = (uint)(i+0);
1271 uint *out = (uint*)_out;
1275 (out+j+0)[0] = (uint)(i+1);
1276 (out+j+0)[1] = (uint)(i+3);
1277 (out+j+0)[2] = (uint)(i+0);
1278 (out+j+3)[0] = (uint)(i+3);
1279 (out+j+3)[1] = (uint)(i+2);
1280 (out+j+3)[2] = (uint)(i+0);
1288 uint *out = (uint*)_out;
1292 (out+j)[0] = (uint)(i+1);
1293 (out+j)[1] = (uint)(i+2);
1294 (out+j)[2] = (uint)(start);
1302 uint *out = (uint*)_out;
1306 (out+j)[0] = (uint)(i+3);
1307 (out+j)[1] = (uint)(i+2);
1308 (out+j)[2] = (uint)(i+1);
1309 (out+j)[3] = (uint)(i+0);
1317 uint *out = (uint*)_out;
1321 (out+j)[0] = (uint)(i+3);
1322 (out+j)[1] = (uint)(i+2);
1323 (out+j)[2] = (uint)(i+1);
1324 (out+j)[3] = (uint)(i+0);
1332 uint *out = (uint*)_out;
1336 (out+j)[0] = (uint)(i+4);
1337 (out+j)[1] = (uint)(i+5);
1338 (out+j)[2] = (uint)(i+0);
1339 (out+j)[3] = (uint)(i+1);
1340 (out+j)[4] = (uint)(i+2);
1341 (out+j)[5] = (uint)(i+3);
1349 uint *out = (uint*)_out;
1355 (out+j)[0] = (uint)(i+4);
1356 (out+j)[1] = (uint)(i+5);
1357 (out+j)[2] = (uint)(i+0);
1358 (out+j)[3] = (uint)(i+1);
1359 (out+j)[4] = (uint)(i+2);
1360 (out+j)[5] = (uint)(i+3);
1363 (out+j)[0] = (uint)(i+4);
1364 (out+j)[1] = (uint)(i+6);
1365 (out+j)[2] = (uint)(i+2);
1366 (out+j)[3] = (uint)(i-2);
1367 (out+j)[4] = (uint)(i+0);
1368 (out+j)[5] = (uint)(i+3);
1377 uint *out = (uint*)_out;
1381 (out+j)[0] = (uint)(i);
1389 uint *out = (uint*)_out;
1393 (out+j)[0] = (uint)(i+1);
1394 (out+j)[1] = (uint)(i);
1402 uint *out = (uint*)_out;
1406 (out+j)[0] = (uint)(i+1);
1407 (out+j)[1] = (uint)(i);
1415 uint *out = (uint*)_out;
1419 (out+j)[0] = (uint)(i+1);
1420 (out+j)[1] = (uint)(i);
1422 (out+j)[0] = (uint)(start);
1423 (out+j)[1] = (uint)(i);
1430 uint *out = (uint*)_out;
1434 (out+j)[0] = (uint)(i+2);
1435 (out+j)[1] = (uint)(i);
1436 (out+j)[2] = (uint)(i+1);
1444 uint *out = (uint*)_out;
1448 (out+j)[0] = (uint)(i+2);
1449 (out+j)[1] = (uint)(i+(i&1));
1450 (out+j)[2] = (uint)(i+1-(i&1));
1458 uint *out = (uint*)_out;
1462 (out+j)[0] = (uint)(i+2);
1463 (out+j)[1] = (uint)(start);
1464 (out+j)[2] = (uint)(i+1);
1472 uint *out = (uint*)_out;
1476 (out+j+0)[0] = (uint)(i+3);
1477 (out+j+0)[1] = (uint)(i+0);
1478 (out+j+0)[2] = (uint)(i+1);
1479 (out+j+3)[0] = (uint)(i+3);
1480 (out+j+3)[1] = (uint)(i+1);
1481 (out+j+3)[2] = (uint)(i+2);
1489 uint *out = (uint*)_out;
1493 (out+j+0)[0] = (uint)(i+3);
1494 (out+j+0)[1] = (uint)(i+2);
1495 (out+j+0)[2] = (uint)(i+0);
1496 (out+j+3)[0] = (uint)(i+3);
1497 (out+j+3)[1] = (uint)(i+0);
1498 (out+j+3)[2] = (uint)(i+1);
1506 uint *out = (uint*)_out;
1510 (out+j)[0] = (uint)(start);
1511 (out+j)[1] = (uint)(i+1);
1512 (out+j)[2] = (uint)(i+2);
1520 uint *out = (uint*)_out;
1524 (out+j)[0] = (uint)(i+3);
1525 (out+j)[1] = (uint)(i+2);
1526 (out+j)[2] = (uint)(i+1);
1527 (out+j)[3] = (uint)(i+0);
1535 uint *out = (uint*)_out;
1539 (out+j)[0] = (uint)(i+3);
1540 (out+j)[1] = (uint)(i+2);
1541 (out+j)[2] = (uint)(i+1);
1542 (out+j)[3] = (uint)(i+0);
1550 uint *out = (uint*)_out;
1554 (out+j)[0] = (uint)(i+4);
1555 (out+j)[1] = (uint)(i+5);
1556 (out+j)[2] = (uint)(i+0);
1557 (out+j)[3] = (uint)(i+1);
1558 (out+j)[4] = (uint)(i+2);
1559 (out+j)[5] = (uint)(i+3);
1567 uint *out = (uint*)_out;
1573 (out+j)[0] = (uint)(i+4);
1574 (out+j)[1] = (uint)(i+5);
1575 (out+j)[2] = (uint)(i+0);
1576 (out+j)[3] = (uint)(i+1);
1577 (out+j)[4] = (uint)(i+2);
1578 (out+j)[5] = (uint)(i+3);
1581 (out+j)[0] = (uint)(i+4);
1582 (out+j)[1] = (uint)(i+6);
1583 (out+j)[2] = (uint)(i+2);
1584 (out+j)[3] = (uint)(i-2);
1585 (out+j)[4] = (uint)(i+0);
1586 (out+j)[5] = (uint)(i+3);
1595 uint *out = (uint*)_out;
1599 (out+j)[0] = (uint)(i);
1607 uint *out = (uint*)_out;
1611 (out+j)[0] = (uint)(i);
1612 (out+j)[1] = (uint)(i+1);
1620 uint *out = (uint*)_out;
1624 (out+j)[0] = (uint)(i);
1625 (out+j)[1] = (uint)(i+1);
1633 uint *out = (uint*)_out;
1637 (out+j)[0] = (uint)(i);
1638 (out+j)[1] = (uint)(i+1);
1640 (out+j)[0] = (uint)(i);
1641 (out+j)[1] = (uint)(start);
1648 uint *out = (uint*)_out;
1652 (out+j)[0] = (uint)(i);
1653 (out+j)[1] = (uint)(i+1);
1654 (out+j)[2] = (uint)(i+2);
1662 uint *out = (uint*)_out;
1666 (out+j)[0] = (uint)(i+(i&1));
1667 (out+j)[1] = (uint)(i+1-(i&1));
1668 (out+j)[2] = (uint)(i+2);
1676 uint *out = (uint*)_out;
1680 (out+j)[0] = (uint)(start);
1681 (out+j)[1] = (uint)(i+1);
1682 (out+j)[2] = (uint)(i+2);
1690 uint *out = (uint*)_out;
1694 (out+j+0)[0] = (uint)(i+0);
1695 (out+j+0)[1] = (uint)(i+1);
1696 (out+j+0)[2] = (uint)(i+3);
1697 (out+j+3)[0] = (uint)(i+1);
1698 (out+j+3)[1] = (uint)(i+2);
1699 (out+j+3)[2] = (uint)(i+3);
1707 uint *out = (uint*)_out;
1711 (out+j+0)[0] = (uint)(i+2);
1712 (out+j+0)[1] = (uint)(i+0);
1713 (out+j+0)[2] = (uint)(i+3);
1714 (out+j+3)[0] = (uint)(i+0);
1715 (out+j+3)[1] = (uint)(i+1);
1716 (out+j+3)[2] = (uint)(i+3);
1724 uint *out = (uint*)_out;
1728 (out+j)[0] = (uint)(i+1);
1729 (out+j)[1] = (uint)(i+2);
1730 (out+j)[2] = (uint)(start);
1738 uint *out = (uint*)_out;
1742 (out+j)[0] = (uint)(i+0);
1743 (out+j)[1] = (uint)(i+1);
1744 (out+j)[2] = (uint)(i+2);
1745 (out+j)[3] = (uint)(i+3);
1753 uint *out = (uint*)_out;
1757 (out+j)[0] = (uint)(i+0);
1758 (out+j)[1] = (uint)(i+1);
1759 (out+j)[2] = (uint)(i+2);
1760 (out+j)[3] = (uint)(i+3);
1768 uint *out = (uint*)_out;
1772 (out+j)[0] = (uint)(i+0);
1773 (out+j)[1] = (uint)(i+1);
1774 (out+j)[2] = (uint)(i+2);
1775 (out+j)[3] = (uint)(i+3);
1776 (out+j)[4] = (uint)(i+4);
1777 (out+j)[5] = (uint)(i+5);
1785 uint *out = (uint*)_out;
1791 (out+j)[0] = (uint)(i+0);
1792 (out+j)[1] = (uint)(i+1);
1793 (out+j)[2] = (uint)(i+2);
1794 (out+j)[3] = (uint)(i+3);
1795 (out+j)[4] = (uint)(i+4);
1796 (out+j)[5] = (uint)(i+5);
1799 (out+j)[0] = (uint)(i+2);
1800 (out+j)[1] = (uint)(i-2);
1801 (out+j)[2] = (uint)(i+0);
1802 (out+j)[3] = (uint)(i+3);
1803 (out+j)[4] = (uint)(i+4);
1804 (out+j)[5] = (uint)(i+6);
4305 uint *out = (uint*)_out;
4309 (out+j)[0] = (uint)in[i];
4321 uint *out = (uint*)_out;
4325 (out+j)[0] = (uint)in[i];
4326 (out+j)[1] = (uint)in[i+1];
4338 uint *out = (uint*)_out;
4342 (out+j)[0] = (uint)in[i];
4343 (out+j)[1] = (uint)in[i+1];
4355 uint *out = (uint*)_out;
4359 (out+j)[0] = (uint)in[i];
4360 (out+j)[1] = (uint)in[i+1];
4362 (out+j)[0] = (uint)in[i];
4363 (out+j)[1] = (uint)in[start];
4374 uint *out = (uint*)_out;
4378 (out+j)[0] = (uint)in[i];
4379 (out+j)[1] = (uint)in[i+1];
4380 (out+j)[2] = (uint)in[i+2];
4392 uint *out = (uint*)_out;
4396 (out+j)[0] = (uint)in[i];
4397 (out+j)[1] = (uint)in[i+1+(i&1)];
4398 (out+j)[2] = (uint)in[i+2-(i&1)];
4410 uint *out = (uint*)_out;
4414 (out+j)[0] = (uint)in[start];
4415 (out+j)[1] = (uint)in[i+1];
4416 (out+j)[2] = (uint)in[i+2];
4428 uint *out = (uint*)_out;
4432 (out+j+0)[0] = (uint)in[i+0];
4433 (out+j+0)[1] = (uint)in[i+1];
4434 (out+j+0)[2] = (uint)in[i+2];
4435 (out+j+3)[0] = (uint)in[i+0];
4436 (out+j+3)[1] = (uint)in[i+2];
4437 (out+j+3)[2] = (uint)in[i+3];
4449 uint *out = (uint*)_out;
4453 (out+j+0)[0] = (uint)in[i+0];
4454 (out+j+0)[1] = (uint)in[i+1];
4455 (out+j+0)[2] = (uint)in[i+3];
4456 (out+j+3)[0] = (uint)in[i+0];
4457 (out+j+3)[1] = (uint)in[i+3];
4458 (out+j+3)[2] = (uint)in[i+2];
4470 uint *out = (uint*)_out;
4474 (out+j)[0] = (uint)in[start];
4475 (out+j)[1] = (uint)in[i+1];
4476 (out+j)[2] = (uint)in[i+2];
4488 uint *out = (uint*)_out;
4492 (out+j)[0] = (uint)in[i+0];
4493 (out+j)[1] = (uint)in[i+1];
4494 (out+j)[2] = (uint)in[i+2];
4495 (out+j)[3] = (uint)in[i+3];
4507 uint *out = (uint*)_out;
4511 (out+j)[0] = (uint)in[i+0];
4512 (out+j)[1] = (uint)in[i+1];
4513 (out+j)[2] = (uint)in[i+2];
4514 (out+j)[3] = (uint)in[i+3];
4526 uint *out = (uint*)_out;
4530 (out+j)[0] = (uint)in[i+0];
4531 (out+j)[1] = (uint)in[i+1];
4532 (out+j)[2] = (uint)in[i+2];
4533 (out+j)[3] = (uint)in[i+3];
4534 (out+j)[4] = (uint)in[i+4];
4535 (out+j)[5] = (uint)in[i+5];
4547 uint *out = (uint*)_out;
4553 (out+j)[0] = (uint)in[i+0];
4554 (out+j)[1] = (uint)in[i+1];
4555 (out+j)[2] = (uint)in[i+2];
4556 (out+j)[3] = (uint)in[i+3];
4557 (out+j)[4] = (uint)in[i+4];
4558 (out+j)[5] = (uint)in[i+5];
4561 (out+j)[0] = (uint)in[i+2];
4562 (out+j)[1] = (uint)in[i-2];
4563 (out+j)[2] = (uint)in[i+0];
4564 (out+j)[3] = (uint)in[i+3];
4565 (out+j)[4] = (uint)in[i+4];
4566 (out+j)[5] = (uint)in[i+6];
4579 uint *out = (uint*)_out;
4583 (out+j)[0] = (uint)in[i];
4595 uint *out = (uint*)_out;
4599 (out+j)[0] = (uint)in[i];
4600 (out+j)[1] = (uint)in[i+1];
4612 uint *out = (uint*)_out;
4616 (out+j)[0] = (uint)in[i];
4617 (out+j)[1] = (uint)in[i+1];
4629 uint *out = (uint*)_out;
4633 (out+j)[0] = (uint)in[i];
4634 (out+j)[1] = (uint)in[i+1];
4636 (out+j)[0] = (uint)in[i];
4637 (out+j)[1] = (uint)in[start];
4648 uint *out = (uint*)_out;
4652 (out+j)[0] = (uint)in[i];
4653 (out+j)[1] = (uint)in[i+1];
4654 (out+j)[2] = (uint)in[i+2];
4666 uint *out = (uint*)_out;
4670 (out+j)[0] = (uint)in[i];
4671 (out+j)[1] = (uint)in[i+1+(i&1)];
4672 (out+j)[2] = (uint)in[i+2-(i&1)];
4684 uint *out = (uint*)_out;
4688 (out+j)[0] = (uint)in[start];
4689 (out+j)[1] = (uint)in[i+1];
4690 (out+j)[2] = (uint)in[i+2];
4702 uint *out = (uint*)_out;
4732 (out+j+0)[0] = (uint)in[i+0];
4733 (out+j+0)[1] = (uint)in[i+1];
4734 (out+j+0)[2] = (uint)in[i+2];
4735 (out+j+3)[0] = (uint)in[i+0];
4736 (out+j+3)[1] = (uint)in[i+2];
4737 (out+j+3)[2] = (uint)in[i+3];
4749 uint *out = (uint*)_out;
4779 (out+j+0)[0] = (uint)in[i+0];
4780 (out+j+0)[1] = (uint)in[i+1];
4781 (out+j+0)[2] = (uint)in[i+3];
4782 (out+j+3)[0] = (uint)in[i+0];
4783 (out+j+3)[1] = (uint)in[i+3];
4784 (out+j+3)[2] = (uint)in[i+2];
4796 uint *out = (uint*)_out;
4822 (out+j)[0] = (uint)in[start];
4823 (out+j)[1] = (uint)in[i+1];
4824 (out+j)[2] = (uint)in[i+2];
4836 uint *out = (uint*)_out;
4840 (out+j)[0] = (uint)in[i+0];
4841 (out+j)[1] = (uint)in[i+1];
4842 (out+j)[2] = (uint)in[i+2];
4843 (out+j)[3] = (uint)in[i+3];
4855 uint *out = (uint*)_out;
4859 (out+j)[0] = (uint)in[i+0];
4860 (out+j)[1] = (uint)in[i+1];
4861 (out+j)[2] = (uint)in[i+2];
4862 (out+j)[3] = (uint)in[i+3];
4874 uint *out = (uint*)_out;
4878 (out+j)[0] = (uint)in[i+0];
4879 (out+j)[1] = (uint)in[i+1];
4880 (out+j)[2] = (uint)in[i+2];
4881 (out+j)[3] = (uint)in[i+3];
4882 (out+j)[4] = (uint)in[i+4];
4883 (out+j)[5] = (uint)in[i+5];
4895 uint *out = (uint*)_out;
4901 (out+j)[0] = (uint)in[i+0];
4902 (out+j)[1] = (uint)in[i+1];
4903 (out+j)[2] = (uint)in[i+2];
4904 (out+j)[3] = (uint)in[i+3];
4905 (out+j)[4] = (uint)in[i+4];
4906 (out+j)[5] = (uint)in[i+5];
4909 (out+j)[0] = (uint)in[i+2];
4910 (out+j)[1] = (uint)in[i-2];
4911 (out+j)[2] = (uint)in[i+0];
4912 (out+j)[3] = (uint)in[i+3];
4913 (out+j)[4] = (uint)in[i+4];
4914 (out+j)[5] = (uint)in[i+6];
4927 uint *out = (uint*)_out;
4931 (out+j)[0] = (uint)in[i];
4943 uint *out = (uint*)_out;
4947 (out+j)[0] = (uint)in[i+1];
4948 (out+j)[1] = (uint)in[i];
4960 uint *out = (uint*)_out;
4964 (out+j)[0] = (uint)in[i+1];
4965 (out+j)[1] = (uint)in[i];
4977 uint *out = (uint*)_out;
4981 (out+j)[0] = (uint)in[i+1];
4982 (out+j)[1] = (uint)in[i];
4984 (out+j)[0] = (uint)in[start];
4985 (out+j)[1] = (uint)in[i];
4996 uint *out = (uint*)_out;
5000 (out+j)[0] = (uint)in[i+1];
5001 (out+j)[1] = (uint)in[i+2];
5002 (out+j)[2] = (uint)in[i];
5014 uint *out = (uint*)_out;
5018 (out+j)[0] = (uint)in[i+1+(i&1)];
5019 (out+j)[1] = (uint)in[i+2-(i&1)];
5020 (out+j)[2] = (uint)in[i];
5032 uint *out = (uint*)_out;
5036 (out+j)[0] = (uint)in[i+1];
5037 (out+j)[1] = (uint)in[i+2];
5038 (out+j)[2] = (uint)in[start];
5050 uint *out = (uint*)_out;
5054 (out+j+0)[0] = (uint)in[i+1];
5055 (out+j+0)[1] = (uint)in[i+2];
5056 (out+j+0)[2] = (uint)in[i+0];
5057 (out+j+3)[0] = (uint)in[i+2];
5058 (out+j+3)[1] = (uint)in[i+3];
5059 (out+j+3)[2] = (uint)in[i+0];
5071 uint *out = (uint*)_out;
5075 (out+j+0)[0] = (uint)in[i+1];
5076 (out+j+0)[1] = (uint)in[i+3];
5077 (out+j+0)[2] = (uint)in[i+0];
5078 (out+j+3)[0] = (uint)in[i+3];
5079 (out+j+3)[1] = (uint)in[i+2];
5080 (out+j+3)[2] = (uint)in[i+0];
5092 uint *out = (uint*)_out;
5096 (out+j)[0] = (uint)in[i+1];
5097 (out+j)[1] = (uint)in[i+2];
5098 (out+j)[2] = (uint)in[start];
5110 uint *out = (uint*)_out;
5114 (out+j)[0] = (uint)in[i+3];
5115 (out+j)[1] = (uint)in[i+2];
5116 (out+j)[2] = (uint)in[i+1];
5117 (out+j)[3] = (uint)in[i+0];
5129 uint *out = (uint*)_out;
5133 (out+j)[0] = (uint)in[i+3];
5134 (out+j)[1] = (uint)in[i+2];
5135 (out+j)[2] = (uint)in[i+1];
5136 (out+j)[3] = (uint)in[i+0];
5148 uint *out = (uint*)_out;
5152 (out+j)[0] = (uint)in[i+4];
5153 (out+j)[1] = (uint)in[i+5];
5154 (out+j)[2] = (uint)in[i+0];
5155 (out+j)[3] = (uint)in[i+1];
5156 (out+j)[4] = (uint)in[i+2];
5157 (out+j)[5] = (uint)in[i+3];
5169 uint *out = (uint*)_out;
5175 (out+j)[0] = (uint)in[i+4];
5176 (out+j)[1] = (uint)in[i+5];
5177 (out+j)[2] = (uint)in[i+0];
5178 (out+j)[3] = (uint)in[i+1];
5179 (out+j)[4] = (uint)in[i+2];
5180 (out+j)[5] = (uint)in[i+3];
5183 (out+j)[0] = (uint)in[i+4];
5184 (out+j)[1] = (uint)in[i+6];
5185 (out+j)[2] = (uint)in[i+2];
5186 (out+j)[3] = (uint)in[i-2];
5187 (out+j)[4] = (uint)in[i+0];
5188 (out+j)[5] = (uint)in[i+3];
5201 uint *out = (uint*)_out;
5205 (out+j)[0] = (uint)in[i];
5217 uint *out = (uint*)_out;
5221 (out+j)[0] = (uint)in[i+1];
5222 (out+j)[1] = (uint)in[i];
5234 uint *out = (uint*)_out;
5238 (out+j)[0] = (uint)in[i+1];
5239 (out+j)[1] = (uint)in[i];
5251 uint *out = (uint*)_out;
5255 (out+j)[0] = (uint)in[i+1];
5256 (out+j)[1] = (uint)in[i];
5258 (out+j)[0] = (uint)in[start];
5259 (out+j)[1] = (uint)in[i];
5270 uint *out = (uint*)_out;
5274 (out+j)[0] = (uint)in[i+1];
5275 (out+j)[1] = (uint)in[i+2];
5276 (out+j)[2] = (uint)in[i];
5288 uint *out = (uint*)_out;
5292 (out+j)[0] = (uint)in[i+1+(i&1)];
5293 (out+j)[1] = (uint)in[i+2-(i&1)];
5294 (out+j)[2] = (uint)in[i];
5306 uint *out = (uint*)_out;
5310 (out+j)[0] = (uint)in[i+1];
5311 (out+j)[1] = (uint)in[i+2];
5312 (out+j)[2] = (uint)in[start];
5324 uint *out = (uint*)_out;
5354 (out+j+0)[0] = (uint)in[i+1];
5355 (out+j+0)[1] = (uint)in[i+2];
5356 (out+j+0)[2] = (uint)in[i+0];
5357 (out+j+3)[0] = (uint)in[i+2];
5358 (out+j+3)[1] = (uint)in[i+3];
5359 (out+j+3)[2] = (uint)in[i+0];
5371 uint *out = (uint*)_out;
5401 (out+j+0)[0] = (uint)in[i+1];
5402 (out+j+0)[1] = (uint)in[i+3];
5403 (out+j+0)[2] = (uint)in[i+0];
5404 (out+j+3)[0] = (uint)in[i+3];
5405 (out+j+3)[1] = (uint)in[i+2];
5406 (out+j+3)[2] = (uint)in[i+0];
5418 uint *out = (uint*)_out;
5444 (out+j)[0] = (uint)in[i+1];
5445 (out+j)[1] = (uint)in[i+2];
5446 (out+j)[2] = (uint)in[start];
5458 uint *out = (uint*)_out;
5462 (out+j)[0] = (uint)in[i+3];
5463 (out+j)[1] = (uint)in[i+2];
5464 (out+j)[2] = (uint)in[i+1];
5465 (out+j)[3] = (uint)in[i+0];
5477 uint *out = (uint*)_out;
5481 (out+j)[0] = (uint)in[i+3];
5482 (out+j)[1] = (uint)in[i+2];
5483 (out+j)[2] = (uint)in[i+1];
5484 (out+j)[3] = (uint)in[i+0];
5496 uint *out = (uint*)_out;
5500 (out+j)[0] = (uint)in[i+4];
5501 (out+j)[1] = (uint)in[i+5];
5502 (out+j)[2] = (uint)in[i+0];
5503 (out+j)[3] = (uint)in[i+1];
5504 (out+j)[4] = (uint)in[i+2];
5505 (out+j)[5] = (uint)in[i+3];
5517 uint *out = (uint*)_out;
5523 (out+j)[0] = (uint)in[i+4];
5524 (out+j)[1] = (uint)in[i+5];
5525 (out+j)[2] = (uint)in[i+0];
5526 (out+j)[3] = (uint)in[i+1];
5527 (out+j)[4] = (uint)in[i+2];
5528 (out+j)[5] = (uint)in[i+3];
5531 (out+j)[0] = (uint)in[i+4];
5532 (out+j)[1] = (uint)in[i+6];
5533 (out+j)[2] = (uint)in[i+2];
5534 (out+j)[3] = (uint)in[i-2];
5535 (out+j)[4] = (uint)in[i+0];
5536 (out+j)[5] = (uint)in[i+3];
5549 uint *out = (uint*)_out;
5553 (out+j)[0] = (uint)in[i];
5565 uint *out = (uint*)_out;
5569 (out+j)[0] = (uint)in[i+1];
5570 (out+j)[1] = (uint)in[i];
5582 uint *out = (uint*)_out;
5586 (out+j)[0] = (uint)in[i+1];
5587 (out+j)[1] = (uint)in[i];
5599 uint *out = (uint*)_out;
5603 (out+j)[0] = (uint)in[i+1];
5604 (out+j)[1] = (uint)in[i];
5606 (out+j)[0] = (uint)in[start];
5607 (out+j)[1] = (uint)in[i];
5618 uint *out = (uint*)_out;
5622 (out+j)[0] = (uint)in[i+2];
5623 (out+j)[1] = (uint)in[i];
5624 (out+j)[2] = (uint)in[i+1];
5636 uint *out = (uint*)_out;
5640 (out+j)[0] = (uint)in[i+2];
5641 (out+j)[1] = (uint)in[i+(i&1)];
5642 (out+j)[2] = (uint)in[i+1-(i&1)];
5654 uint *out = (uint*)_out;
5658 (out+j)[0] = (uint)in[i+2];
5659 (out+j)[1] = (uint)in[start];
5660 (out+j)[2] = (uint)in[i+1];
5672 uint *out = (uint*)_out;
5676 (out+j+0)[0] = (uint)in[i+3];
5677 (out+j+0)[1] = (uint)in[i+0];
5678 (out+j+0)[2] = (uint)in[i+1];
5679 (out+j+3)[0] = (uint)in[i+3];
5680 (out+j+3)[1] = (uint)in[i+1];
5681 (out+j+3)[2] = (uint)in[i+2];
5693 uint *out = (uint*)_out;
5697 (out+j+0)[0] = (uint)in[i+3];
5698 (out+j+0)[1] = (uint)in[i+2];
5699 (out+j+0)[2] = (uint)in[i+0];
5700 (out+j+3)[0] = (uint)in[i+3];
5701 (out+j+3)[1] = (uint)in[i+0];
5702 (out+j+3)[2] = (uint)in[i+1];
5714 uint *out = (uint*)_out;
5718 (out+j)[0] = (uint)in[start];
5719 (out+j)[1] = (uint)in[i+1];
5720 (out+j)[2] = (uint)in[i+2];
5732 uint *out = (uint*)_out;
5736 (out+j)[0] = (uint)in[i+3];
5737 (out+j)[1] = (uint)in[i+2];
5738 (out+j)[2] = (uint)in[i+1];
5739 (out+j)[3] = (uint)in[i+0];
5751 uint *out = (uint*)_out;
5755 (out+j)[0] = (uint)in[i+3];
5756 (out+j)[1] = (uint)in[i+2];
5757 (out+j)[2] = (uint)in[i+1];
5758 (out+j)[3] = (uint)in[i+0];
5770 uint *out = (uint*)_out;
5774 (out+j)[0] = (uint)in[i+4];
5775 (out+j)[1] = (uint)in[i+5];
5776 (out+j)[2] = (uint)in[i+0];
5777 (out+j)[3] = (uint)in[i+1];
5778 (out+j)[4] = (uint)in[i+2];
5779 (out+j)[5] = (uint)in[i+3];
5791 uint *out = (uint*)_out;
5797 (out+j)[0] = (uint)in[i+4];
5798 (out+j)[1] = (uint)in[i+5];
5799 (out+j)[2] = (uint)in[i+0];
5800 (out+j)[3] = (uint)in[i+1];
5801 (out+j)[4] = (uint)in[i+2];
5802 (out+j)[5] = (uint)in[i+3];
5805 (out+j)[0] = (uint)in[i+4];
5806 (out+j)[1] = (uint)in[i+6];
5807 (out+j)[2] = (uint)in[i+2];
5808 (out+j)[3] = (uint)in[i-2];
5809 (out+j)[4] = (uint)in[i+0];
5810 (out+j)[5] = (uint)in[i+3];
5823 uint *out = (uint*)_out;
5827 (out+j)[0] = (uint)in[i];
5839 uint *out = (uint*)_out;
5843 (out+j)[0] = (uint)in[i+1];
5844 (out+j)[1] = (uint)in[i];
5856 uint *out = (uint*)_out;
5860 (out+j)[0] = (uint)in[i+1];
5861 (out+j)[1] = (uint)in[i];
5873 uint *out = (uint*)_out;
5877 (out+j)[0] = (uint)in[i+1];
5878 (out+j)[1] = (uint)in[i];
5880 (out+j)[0] = (uint)in[start];
5881 (out+j)[1] = (uint)in[i];
5892 uint *out = (uint*)_out;
5896 (out+j)[0] = (uint)in[i+2];
5897 (out+j)[1] = (uint)in[i];
5898 (out+j)[2] = (uint)in[i+1];
5910 uint *out = (uint*)_out;
5914 (out+j)[0] = (uint)in[i+2];
5915 (out+j)[1] = (uint)in[i+(i&1)];
5916 (out+j)[2] = (uint)in[i+1-(i&1)];
5928 uint *out = (uint*)_out;
5932 (out+j)[0] = (uint)in[i+2];
5933 (out+j)[1] = (uint)in[start];
5934 (out+j)[2] = (uint)in[i+1];
5946 uint *out = (uint*)_out;
5976 (out+j+0)[0] = (uint)in[i+3];
5977 (out+j+0)[1] = (uint)in[i+0];
5978 (out+j+0)[2] = (uint)in[i+1];
5979 (out+j+3)[0] = (uint)in[i+3];
5980 (out+j+3)[1] = (uint)in[i+1];
5981 (out+j+3)[2] = (uint)in[i+2];
5993 uint *out = (uint*)_out;
6023 (out+j+0)[0] = (uint)in[i+3];
6024 (out+j+0)[1] = (uint)in[i+2];
6025 (out+j+0)[2] = (uint)in[i+0];
6026 (out+j+3)[0] = (uint)in[i+3];
6027 (out+j+3)[1] = (uint)in[i+0];
6028 (out+j+3)[2] = (uint)in[i+1];
6040 uint *out = (uint*)_out;
6066 (out+j)[0] = (uint)in[start];
6067 (out+j)[1] = (uint)in[i+1];
6068 (out+j)[2] = (uint)in[i+2];
6080 uint *out = (uint*)_out;
6084 (out+j)[0] = (uint)in[i+3];
6085 (out+j)[1] = (uint)in[i+2];
6086 (out+j)[2] = (uint)in[i+1];
6087 (out+j)[3] = (uint)in[i+0];
6099 uint *out = (uint*)_out;
6103 (out+j)[0] = (uint)in[i+3];
6104 (out+j)[1] = (uint)in[i+2];
6105 (out+j)[2] = (uint)in[i+1];
6106 (out+j)[3] = (uint)in[i+0];
6118 uint *out = (uint*)_out;
6122 (out+j)[0] = (uint)in[i+4];
6123 (out+j)[1] = (uint)in[i+5];
6124 (out+j)[2] = (uint)in[i+0];
6125 (out+j)[3] = (uint)in[i+1];
6126 (out+j)[4] = (uint)in[i+2];
6127 (out+j)[5] = (uint)in[i+3];
6139 uint *out = (uint*)_out;
6145 (out+j)[0] = (uint)in[i+4];
6146 (out+j)[1] = (uint)in[i+5];
6147 (out+j)[2] = (uint)in[i+0];
6148 (out+j)[3] = (uint)in[i+1];
6149 (out+j)[4] = (uint)in[i+2];
6150 (out+j)[5] = (uint)in[i+3];
6153 (out+j)[0] = (uint)in[i+4];
6154 (out+j)[1] = (uint)in[i+6];
6155 (out+j)[2] = (uint)in[i+2];
6156 (out+j)[3] = (uint)in[i-2];
6157 (out+j)[4] = (uint)in[i+0];
6158 (out+j)[5] = (uint)in[i+3];
6171 uint *out = (uint*)_out;
6175 (out+j)[0] = (uint)in[i];
6187 uint *out = (uint*)_out;
6191 (out+j)[0] = (uint)in[i];
6192 (out+j)[1] = (uint)in[i+1];
6204 uint *out = (uint*)_out;
6208 (out+j)[0] = (uint)in[i];
6209 (out+j)[1] = (uint)in[i+1];
6221 uint *out = (uint*)_out;
6225 (out+j)[0] = (uint)in[i];
6226 (out+j)[1] = (uint)in[i+1];
6228 (out+j)[0] = (uint)in[i];
6229 (out+j)[1] = (uint)in[start];
6240 uint *out = (uint*)_out;
6244 (out+j)[0] = (uint)in[i];
6245 (out+j)[1] = (uint)in[i+1];
6246 (out+j)[2] = (uint)in[i+2];
6258 uint *out = (uint*)_out;
6262 (out+j)[0] = (uint)in[i+(i&1)];
6263 (out+j)[1] = (uint)in[i+1-(i&1)];
6264 (out+j)[2] = (uint)in[i+2];
6276 uint *out = (uint*)_out;
6280 (out+j)[0] = (uint)in[start];
6281 (out+j)[1] = (uint)in[i+1];
6282 (out+j)[2] = (uint)in[i+2];
6294 uint *out = (uint*)_out;
6298 (out+j+0)[0] = (uint)in[i+0];
6299 (out+j+0)[1] = (uint)in[i+1];
6300 (out+j+0)[2] = (uint)in[i+3];
6301 (out+j+3)[0] = (uint)in[i+1];
6302 (out+j+3)[1] = (uint)in[i+2];
6303 (out+j+3)[2] = (uint)in[i+3];
6315 uint *out = (uint*)_out;
6319 (out+j+0)[0] = (uint)in[i+2];
6320 (out+j+0)[1] = (uint)in[i+0];
6321 (out+j+0)[2] = (uint)in[i+3];
6322 (out+j+3)[0] = (uint)in[i+0];
6323 (out+j+3)[1] = (uint)in[i+1];
6324 (out+j+3)[2] = (uint)in[i+3];
6336 uint *out = (uint*)_out;
6340 (out+j)[0] = (uint)in[i+1];
6341 (out+j)[1] = (uint)in[i+2];
6342 (out+j)[2] = (uint)in[start];
6354 uint *out = (uint*)_out;
6358 (out+j)[0] = (uint)in[i+0];
6359 (out+j)[1] = (uint)in[i+1];
6360 (out+j)[2] = (uint)in[i+2];
6361 (out+j)[3] = (uint)in[i+3];
6373 uint *out = (uint*)_out;
6377 (out+j)[0] = (uint)in[i+0];
6378 (out+j)[1] = (uint)in[i+1];
6379 (out+j)[2] = (uint)in[i+2];
6380 (out+j)[3] = (uint)in[i+3];
6392 uint *out = (uint*)_out;
6396 (out+j)[0] = (uint)in[i+0];
6397 (out+j)[1] = (uint)in[i+1];
6398 (out+j)[2] = (uint)in[i+2];
6399 (out+j)[3] = (uint)in[i+3];
6400 (out+j)[4] = (uint)in[i+4];
6401 (out+j)[5] = (uint)in[i+5];
6413 uint *out = (uint*)_out;
6419 (out+j)[0] = (uint)in[i+0];
6420 (out+j)[1] = (uint)in[i+1];
6421 (out+j)[2] = (uint)in[i+2];
6422 (out+j)[3] = (uint)in[i+3];
6423 (out+j)[4] = (uint)in[i+4];
6424 (out+j)[5] = (uint)in[i+5];
6427 (out+j)[0] = (uint)in[i+2];
6428 (out+j)[1] = (uint)in[i-2];
6429 (out+j)[2] = (uint)in[i+0];
6430 (out+j)[3] = (uint)in[i+3];
6431 (out+j)[4] = (uint)in[i+4];
6432 (out+j)[5] = (uint)in[i+6];
6445 uint *out = (uint*)_out;
6449 (out+j)[0] = (uint)in[i];
6461 uint *out = (uint*)_out;
6465 (out+j)[0] = (uint)in[i];
6466 (out+j)[1] = (uint)in[i+1];
6478 uint *out = (uint*)_out;
6482 (out+j)[0] = (uint)in[i];
6483 (out+j)[1] = (uint)in[i+1];
6495 uint *out = (uint*)_out;
6499 (out+j)[0] = (uint)in[i];
6500 (out+j)[1] = (uint)in[i+1];
6502 (out+j)[0] = (uint)in[i];
6503 (out+j)[1] = (uint)in[start];
6514 uint *out = (uint*)_out;
6518 (out+j)[0] = (uint)in[i];
6519 (out+j)[1] = (uint)in[i+1];
6520 (out+j)[2] = (uint)in[i+2];
6532 uint *out = (uint*)_out;
6536 (out+j)[0] = (uint)in[i+(i&1)];
6537 (out+j)[1] = (uint)in[i+1-(i&1)];
6538 (out+j)[2] = (uint)in[i+2];
6550 uint *out = (uint*)_out;
6554 (out+j)[0] = (uint)in[start];
6555 (out+j)[1] = (uint)in[i+1];
6556 (out+j)[2] = (uint)in[i+2];
6568 uint *out = (uint*)_out;
6598 (out+j+0)[0] = (uint)in[i+0];
6599 (out+j+0)[1] = (uint)in[i+1];
6600 (out+j+0)[2] = (uint)in[i+3];
6601 (out+j+3)[0] = (uint)in[i+1];
6602 (out+j+3)[1] = (uint)in[i+2];
6603 (out+j+3)[2] = (uint)in[i+3];
6615 uint *out = (uint*)_out;
6645 (out+j+0)[0] = (uint)in[i+2];
6646 (out+j+0)[1] = (uint)in[i+0];
6647 (out+j+0)[2] = (uint)in[i+3];
6648 (out+j+3)[0] = (uint)in[i+0];
6649 (out+j+3)[1] = (uint)in[i+1];
6650 (out+j+3)[2] = (uint)in[i+3];
6662 uint *out = (uint*)_out;
6688 (out+j)[0] = (uint)in[i+1];
6689 (out+j)[1] = (uint)in[i+2];
6690 (out+j)[2] = (uint)in[start];
6702 uint *out = (uint*)_out;
6706 (out+j)[0] = (uint)in[i+0];
6707 (out+j)[1] = (uint)in[i+1];
6708 (out+j)[2] = (uint)in[i+2];
6709 (out+j)[3] = (uint)in[i+3];
6721 uint *out = (uint*)_out;
6725 (out+j)[0] = (uint)in[i+0];
6726 (out+j)[1] = (uint)in[i+1];
6727 (out+j)[2] = (uint)in[i+2];
6728 (out+j)[3] = (uint)in[i+3];
6740 uint *out = (uint*)_out;
6744 (out+j)[0] = (uint)in[i+0];
6745 (out+j)[1] = (uint)in[i+1];
6746 (out+j)[2] = (uint)in[i+2];
6747 (out+j)[3] = (uint)in[i+3];
6748 (out+j)[4] = (uint)in[i+4];
6749 (out+j)[5] = (uint)in[i+5];
6761 uint *out = (uint*)_out;
6767 (out+j)[0] = (uint)in[i+0];
6768 (out+j)[1] = (uint)in[i+1];
6769 (out+j)[2] = (uint)in[i+2];
6770 (out+j)[3] = (uint)in[i+3];
6771 (out+j)[4] = (uint)in[i+4];
6772 (out+j)[5] = (uint)in[i+5];
6775 (out+j)[0] = (uint)in[i+2];
6776 (out+j)[1] = (uint)in[i-2];
6777 (out+j)[2] = (uint)in[i+0];
6778 (out+j)[3] = (uint)in[i+3];
6779 (out+j)[4] = (uint)in[i+4];
6780 (out+j)[5] = (uint)in[i+6];
9281 uint *out = (uint*)_out;
9285 (out+j)[0] = (uint)in[i];
9297 uint *out = (uint*)_out;
9301 (out+j)[0] = (uint)in[i];
9302 (out+j)[1] = (uint)in[i+1];
9314 uint *out = (uint*)_out;
9318 (out+j)[0] = (uint)in[i];
9319 (out+j)[1] = (uint)in[i+1];
9331 uint *out = (uint*)_out;
9335 (out+j)[0] = (uint)in[i];
9336 (out+j)[1] = (uint)in[i+1];
9338 (out+j)[0] = (uint)in[i];
9339 (out+j)[1] = (uint)in[start];
9350 uint *out = (uint*)_out;
9354 (out+j)[0] = (uint)in[i];
9355 (out+j)[1] = (uint)in[i+1];
9356 (out+j)[2] = (uint)in[i+2];
9368 uint *out = (uint*)_out;
9372 (out+j)[0] = (uint)in[i];
9373 (out+j)[1] = (uint)in[i+1+(i&1)];
9374 (out+j)[2] = (uint)in[i+2-(i&1)];
9386 uint *out = (uint*)_out;
9390 (out+j)[0] = (uint)in[start];
9391 (out+j)[1] = (uint)in[i+1];
9392 (out+j)[2] = (uint)in[i+2];
9404 uint *out = (uint*)_out;
9408 (out+j+0)[0] = (uint)in[i+0];
9409 (out+j+0)[1] = (uint)in[i+1];
9410 (out+j+0)[2] = (uint)in[i+2];
9411 (out+j+3)[0] = (uint)in[i+0];
9412 (out+j+3)[1] = (uint)in[i+2];
9413 (out+j+3)[2] = (uint)in[i+3];
9425 uint *out = (uint*)_out;
9429 (out+j+0)[0] = (uint)in[i+0];
9430 (out+j+0)[1] = (uint)in[i+1];
9431 (out+j+0)[2] = (uint)in[i+3];
9432 (out+j+3)[0] = (uint)in[i+0];
9433 (out+j+3)[1] = (uint)in[i+3];
9434 (out+j+3)[2] = (uint)in[i+2];
9446 uint *out = (uint*)_out;
9450 (out+j)[0] = (uint)in[start];
9451 (out+j)[1] = (uint)in[i+1];
9452 (out+j)[2] = (uint)in[i+2];
9464 uint *out = (uint*)_out;
9468 (out+j)[0] = (uint)in[i+0];
9469 (out+j)[1] = (uint)in[i+1];
9470 (out+j)[2] = (uint)in[i+2];
9471 (out+j)[3] = (uint)in[i+3];
9483 uint *out = (uint*)_out;
9487 (out+j)[0] = (uint)in[i+0];
9488 (out+j)[1] = (uint)in[i+1];
9489 (out+j)[2] = (uint)in[i+2];
9490 (out+j)[3] = (uint)in[i+3];
9502 uint *out = (uint*)_out;
9506 (out+j)[0] = (uint)in[i+0];
9507 (out+j)[1] = (uint)in[i+1];
9508 (out+j)[2] = (uint)in[i+2];
9509 (out+j)[3] = (uint)in[i+3];
9510 (out+j)[4] = (uint)in[i+4];
9511 (out+j)[5] = (uint)in[i+5];
9523 uint *out = (uint*)_out;
9529 (out+j)[0] = (uint)in[i+0];
9530 (out+j)[1] = (uint)in[i+1];
9531 (out+j)[2] = (uint)in[i+2];
9532 (out+j)[3] = (uint)in[i+3];
9533 (out+j)[4] = (uint)in[i+4];
9534 (out+j)[5] = (uint)in[i+5];
9537 (out+j)[0] = (uint)in[i+2];
9538 (out+j)[1] = (uint)in[i-2];
9539 (out+j)[2] = (uint)in[i+0];
9540 (out+j)[3] = (uint)in[i+3];
9541 (out+j)[4] = (uint)in[i+4];
9542 (out+j)[5] = (uint)in[i+6];
9555 uint *out = (uint*)_out;
9559 (out+j)[0] = (uint)in[i];
9571 uint *out = (uint*)_out;
9575 (out+j)[0] = (uint)in[i];
9576 (out+j)[1] = (uint)in[i+1];
9588 uint *out = (uint*)_out;
9592 (out+j)[0] = (uint)in[i];
9593 (out+j)[1] = (uint)in[i+1];
9605 uint *out = (uint*)_out;
9609 (out+j)[0] = (uint)in[i];
9610 (out+j)[1] = (uint)in[i+1];
9612 (out+j)[0] = (uint)in[i];
9613 (out+j)[1] = (uint)in[start];
9624 uint *out = (uint*)_out;
9628 (out+j)[0] = (uint)in[i];
9629 (out+j)[1] = (uint)in[i+1];
9630 (out+j)[2] = (uint)in[i+2];
9642 uint *out = (uint*)_out;
9646 (out+j)[0] = (uint)in[i];
9647 (out+j)[1] = (uint)in[i+1+(i&1)];
9648 (out+j)[2] = (uint)in[i+2-(i&1)];
9660 uint *out = (uint*)_out;
9664 (out+j)[0] = (uint)in[start];
9665 (out+j)[1] = (uint)in[i+1];
9666 (out+j)[2] = (uint)in[i+2];
9678 uint *out = (uint*)_out;
9708 (out+j+0)[0] = (uint)in[i+0];
9709 (out+j+0)[1] = (uint)in[i+1];
9710 (out+j+0)[2] = (uint)in[i+2];
9711 (out+j+3)[0] = (uint)in[i+0];
9712 (out+j+3)[1] = (uint)in[i+2];
9713 (out+j+3)[2] = (uint)in[i+3];
9725 uint *out = (uint*)_out;
9755 (out+j+0)[0] = (uint)in[i+0];
9756 (out+j+0)[1] = (uint)in[i+1];
9757 (out+j+0)[2] = (uint)in[i+3];
9758 (out+j+3)[0] = (uint)in[i+0];
9759 (out+j+3)[1] = (uint)in[i+3];
9760 (out+j+3)[2] = (uint)in[i+2];
9772 uint *out = (uint*)_out;
9798 (out+j)[0] = (uint)in[start];
9799 (out+j)[1] = (uint)in[i+1];
9800 (out+j)[2] = (uint)in[i+2];
9812 uint *out = (uint*)_out;
9816 (out+j)[0] = (uint)in[i+0];
9817 (out+j)[1] = (uint)in[i+1];
9818 (out+j)[2] = (uint)in[i+2];
9819 (out+j)[3] = (uint)in[i+3];
9831 uint *out = (uint*)_out;
9835 (out+j)[0] = (uint)in[i+0];
9836 (out+j)[1] = (uint)in[i+1];
9837 (out+j)[2] = (uint)in[i+2];
9838 (out+j)[3] = (uint)in[i+3];
9850 uint *out = (uint*)_out;
9854 (out+j)[0] = (uint)in[i+0];
9855 (out+j)[1] = (uint)in[i+1];
9856 (out+j)[2] = (uint)in[i+2];
9857 (out+j)[3] = (uint)in[i+3];
9858 (out+j)[4] = (uint)in[i+4];
9859 (out+j)[5] = (uint)in[i+5];
9871 uint *out = (uint*)_out;
9877 (out+j)[0] = (uint)in[i+0];
9878 (out+j)[1] = (uint)in[i+1];
9879 (out+j)[2] = (uint)in[i+2];
9880 (out+j)[3] = (uint)in[i+3];
9881 (out+j)[4] = (uint)in[i+4];
9882 (out+j)[5] = (uint)in[i+5];
9885 (out+j)[0] = (uint)in[i+2];
9886 (out+j)[1] = (uint)in[i-2];
9887 (out+j)[2] = (uint)in[i+0];
9888 (out+j)[3] = (uint)in[i+3];
9889 (out+j)[4] = (uint)in[i+4];
9890 (out+j)[5] = (uint)in[i+6];
9903 uint *out = (uint*)_out;
9907 (out+j)[0] = (uint)in[i];
9919 uint *out = (uint*)_out;
9923 (out+j)[0] = (uint)in[i+1];
9924 (out+j)[1] = (uint)in[i];
9936 uint *out = (uint*)_out;
9940 (out+j)[0] = (uint)in[i+1];
9941 (out+j)[1] = (uint)in[i];
9953 uint *out = (uint*)_out;
9957 (out+j)[0] = (uint)in[i+1];
9958 (out+j)[1] = (uint)in[i];
9960 (out+j)[0] = (uint)in[start];
9961 (out+j)[1] = (uint)in[i];
9972 uint *out = (uint*)_out;
9976 (out+j)[0] = (uint)in[i+1];
9977 (out+j)[1] = (uint)in[i+2];
9978 (out+j)[2] = (uint)in[i];
9990 uint *out = (uint*)_out;
9994 (out+j)[0] = (uint)in[i+1+(i&1)];
9995 (out+j)[1] = (uint)in[i+2-(i&1)];
9996 (out+j)[2] = (uint)in[i];
10008 uint *out = (uint*)_out;
10012 (out+j)[0] = (uint)in[i+1];
10013 (out+j)[1] = (uint)in[i+2];
10014 (out+j)[2] = (uint)in[start];
10026 uint *out = (uint*)_out;
10030 (out+j+0)[0] = (uint)in[i+1];
10031 (out+j+0)[1] = (uint)in[i+2];
10032 (out+j+0)[2] = (uint)in[i+0];
10033 (out+j+3)[0] = (uint)in[i+2];
10034 (out+j+3)[1] = (uint)in[i+3];
10035 (out+j+3)[2] = (uint)in[i+0];
10047 uint *out = (uint*)_out;
10051 (out+j+0)[0] = (uint)in[i+1];
10052 (out+j+0)[1] = (uint)in[i+3];
10053 (out+j+0)[2] = (uint)in[i+0];
10054 (out+j+3)[0] = (uint)in[i+3];
10055 (out+j+3)[1] = (uint)in[i+2];
10056 (out+j+3)[2] = (uint)in[i+0];
10068 uint *out = (uint*)_out;
10072 (out+j)[0] = (uint)in[i+1];
10073 (out+j)[1] = (uint)in[i+2];
10074 (out+j)[2] = (uint)in[start];
10086 uint *out = (uint*)_out;
10090 (out+j)[0] = (uint)in[i+3];
10091 (out+j)[1] = (uint)in[i+2];
10092 (out+j)[2] = (uint)in[i+1];
10093 (out+j)[3] = (uint)in[i+0];
10105 uint *out = (uint*)_out;
10109 (out+j)[0] = (uint)in[i+3];
10110 (out+j)[1] = (uint)in[i+2];
10111 (out+j)[2] = (uint)in[i+1];
10112 (out+j)[3] = (uint)in[i+0];
10124 uint *out = (uint*)_out;
10128 (out+j)[0] = (uint)in[i+4];
10129 (out+j)[1] = (uint)in[i+5];
10130 (out+j)[2] = (uint)in[i+0];
10131 (out+j)[3] = (uint)in[i+1];
10132 (out+j)[4] = (uint)in[i+2];
10133 (out+j)[5] = (uint)in[i+3];
10145 uint *out = (uint*)_out;
10151 (out+j)[0] = (uint)in[i+4];
10152 (out+j)[1] = (uint)in[i+5];
10153 (out+j)[2] = (uint)in[i+0];
10154 (out+j)[3] = (uint)in[i+1];
10155 (out+j)[4] = (uint)in[i+2];
10156 (out+j)[5] = (uint)in[i+3];
10159 (out+j)[0] = (uint)in[i+4];
10160 (out+j)[1] = (uint)in[i+6];
10161 (out+j)[2] = (uint)in[i+2];
10162 (out+j)[3] = (uint)in[i-2];
10163 (out+j)[4] = (uint)in[i+0];
10164 (out+j)[5] = (uint)in[i+3];
10177 uint *out = (uint*)_out;
10181 (out+j)[0] = (uint)in[i];
10193 uint *out = (uint*)_out;
10197 (out+j)[0] = (uint)in[i+1];
10198 (out+j)[1] = (uint)in[i];
10210 uint *out = (uint*)_out;
10214 (out+j)[0] = (uint)in[i+1];
10215 (out+j)[1] = (uint)in[i];
10227 uint *out = (uint*)_out;
10231 (out+j)[0] = (uint)in[i+1];
10232 (out+j)[1] = (uint)in[i];
10234 (out+j)[0] = (uint)in[start];
10235 (out+j)[1] = (uint)in[i];
10246 uint *out = (uint*)_out;
10250 (out+j)[0] = (uint)in[i+1];
10251 (out+j)[1] = (uint)in[i+2];
10252 (out+j)[2] = (uint)in[i];
10264 uint *out = (uint*)_out;
10268 (out+j)[0] = (uint)in[i+1+(i&1)];
10269 (out+j)[1] = (uint)in[i+2-(i&1)];
10270 (out+j)[2] = (uint)in[i];
10282 uint *out = (uint*)_out;
10286 (out+j)[0] = (uint)in[i+1];
10287 (out+j)[1] = (uint)in[i+2];
10288 (out+j)[2] = (uint)in[start];
10300 uint *out = (uint*)_out;
10330 (out+j+0)[0] = (uint)in[i+1];
10331 (out+j+0)[1] = (uint)in[i+2];
10332 (out+j+0)[2] = (uint)in[i+0];
10333 (out+j+3)[0] = (uint)in[i+2];
10334 (out+j+3)[1] = (uint)in[i+3];
10335 (out+j+3)[2] = (uint)in[i+0];
10347 uint *out = (uint*)_out;
10377 (out+j+0)[0] = (uint)in[i+1];
10378 (out+j+0)[1] = (uint)in[i+3];
10379 (out+j+0)[2] = (uint)in[i+0];
10380 (out+j+3)[0] = (uint)in[i+3];
10381 (out+j+3)[1] = (uint)in[i+2];
10382 (out+j+3)[2] = (uint)in[i+0];
10394 uint *out = (uint*)_out;
10420 (out+j)[0] = (uint)in[i+1];
10421 (out+j)[1] = (uint)in[i+2];
10422 (out+j)[2] = (uint)in[start];
10434 uint *out = (uint*)_out;
10438 (out+j)[0] = (uint)in[i+3];
10439 (out+j)[1] = (uint)in[i+2];
10440 (out+j)[2] = (uint)in[i+1];
10441 (out+j)[3] = (uint)in[i+0];
10453 uint *out = (uint*)_out;
10457 (out+j)[0] = (uint)in[i+3];
10458 (out+j)[1] = (uint)in[i+2];
10459 (out+j)[2] = (uint)in[i+1];
10460 (out+j)[3] = (uint)in[i+0];
10472 uint *out = (uint*)_out;
10476 (out+j)[0] = (uint)in[i+4];
10477 (out+j)[1] = (uint)in[i+5];
10478 (out+j)[2] = (uint)in[i+0];
10479 (out+j)[3] = (uint)in[i+1];
10480 (out+j)[4] = (uint)in[i+2];
10481 (out+j)[5] = (uint)in[i+3];
10493 uint *out = (uint*)_out;
10499 (out+j)[0] = (uint)in[i+4];
10500 (out+j)[1] = (uint)in[i+5];
10501 (out+j)[2] = (uint)in[i+0];
10502 (out+j)[3] = (uint)in[i+1];
10503 (out+j)[4] = (uint)in[i+2];
10504 (out+j)[5] = (uint)in[i+3];
10507 (out+j)[0] = (uint)in[i+4];
10508 (out+j)[1] = (uint)in[i+6];
10509 (out+j)[2] = (uint)in[i+2];
10510 (out+j)[3] = (uint)in[i-2];
10511 (out+j)[4] = (uint)in[i+0];
10512 (out+j)[5] = (uint)in[i+3];
10525 uint *out = (uint*)_out;
10529 (out+j)[0] = (uint)in[i];
10541 uint *out = (uint*)_out;
10545 (out+j)[0] = (uint)in[i+1];
10546 (out+j)[1] = (uint)in[i];
10558 uint *out = (uint*)_out;
10562 (out+j)[0] = (uint)in[i+1];
10563 (out+j)[1] = (uint)in[i];
10575 uint *out = (uint*)_out;
10579 (out+j)[0] = (uint)in[i+1];
10580 (out+j)[1] = (uint)in[i];
10582 (out+j)[0] = (uint)in[start];
10583 (out+j)[1] = (uint)in[i];
10594 uint *out = (uint*)_out;
10598 (out+j)[0] = (uint)in[i+2];
10599 (out+j)[1] = (uint)in[i];
10600 (out+j)[2] = (uint)in[i+1];
10612 uint *out = (uint*)_out;
10616 (out+j)[0] = (uint)in[i+2];
10617 (out+j)[1] = (uint)in[i+(i&1)];
10618 (out+j)[2] = (uint)in[i+1-(i&1)];
10630 uint *out = (uint*)_out;
10634 (out+j)[0] = (uint)in[i+2];
10635 (out+j)[1] = (uint)in[start];
10636 (out+j)[2] = (uint)in[i+1];
10648 uint *out = (uint*)_out;
10652 (out+j+0)[0] = (uint)in[i+3];
10653 (out+j+0)[1] = (uint)in[i+0];
10654 (out+j+0)[2] = (uint)in[i+1];
10655 (out+j+3)[0] = (uint)in[i+3];
10656 (out+j+3)[1] = (uint)in[i+1];
10657 (out+j+3)[2] = (uint)in[i+2];
10669 uint *out = (uint*)_out;
10673 (out+j+0)[0] = (uint)in[i+3];
10674 (out+j+0)[1] = (uint)in[i+2];
10675 (out+j+0)[2] = (uint)in[i+0];
10676 (out+j+3)[0] = (uint)in[i+3];
10677 (out+j+3)[1] = (uint)in[i+0];
10678 (out+j+3)[2] = (uint)in[i+1];
10690 uint *out = (uint*)_out;
10694 (out+j)[0] = (uint)in[start];
10695 (out+j)[1] = (uint)in[i+1];
10696 (out+j)[2] = (uint)in[i+2];
10708 uint *out = (uint*)_out;
10712 (out+j)[0] = (uint)in[i+3];
10713 (out+j)[1] = (uint)in[i+2];
10714 (out+j)[2] = (uint)in[i+1];
10715 (out+j)[3] = (uint)in[i+0];
10727 uint *out = (uint*)_out;
10731 (out+j)[0] = (uint)in[i+3];
10732 (out+j)[1] = (uint)in[i+2];
10733 (out+j)[2] = (uint)in[i+1];
10734 (out+j)[3] = (uint)in[i+0];
10746 uint *out = (uint*)_out;
10750 (out+j)[0] = (uint)in[i+4];
10751 (out+j)[1] = (uint)in[i+5];
10752 (out+j)[2] = (uint)in[i+0];
10753 (out+j)[3] = (uint)in[i+1];
10754 (out+j)[4] = (uint)in[i+2];
10755 (out+j)[5] = (uint)in[i+3];
10767 uint *out = (uint*)_out;
10773 (out+j)[0] = (uint)in[i+4];
10774 (out+j)[1] = (uint)in[i+5];
10775 (out+j)[2] = (uint)in[i+0];
10776 (out+j)[3] = (uint)in[i+1];
10777 (out+j)[4] = (uint)in[i+2];
10778 (out+j)[5] = (uint)in[i+3];
10781 (out+j)[0] = (uint)in[i+4];
10782 (out+j)[1] = (uint)in[i+6];
10783 (out+j)[2] = (uint)in[i+2];
10784 (out+j)[3] = (uint)in[i-2];
10785 (out+j)[4] = (uint)in[i+0];
10786 (out+j)[5] = (uint)in[i+3];
10799 uint *out = (uint*)_out;
10803 (out+j)[0] = (uint)in[i];
10815 uint *out = (uint*)_out;
10819 (out+j)[0] = (uint)in[i+1];
10820 (out+j)[1] = (uint)in[i];
10832 uint *out = (uint*)_out;
10836 (out+j)[0] = (uint)in[i+1];
10837 (out+j)[1] = (uint)in[i];
10849 uint *out = (uint*)_out;
10853 (out+j)[0] = (uint)in[i+1];
10854 (out+j)[1] = (uint)in[i];
10856 (out+j)[0] = (uint)in[start];
10857 (out+j)[1] = (uint)in[i];
10868 uint *out = (uint*)_out;
10872 (out+j)[0] = (uint)in[i+2];
10873 (out+j)[1] = (uint)in[i];
10874 (out+j)[2] = (uint)in[i+1];
10886 uint *out = (uint*)_out;
10890 (out+j)[0] = (uint)in[i+2];
10891 (out+j)[1] = (uint)in[i+(i&1)];
10892 (out+j)[2] = (uint)in[i+1-(i&1)];
10904 uint *out = (uint*)_out;
10908 (out+j)[0] = (uint)in[i+2];
10909 (out+j)[1] = (uint)in[start];
10910 (out+j)[2] = (uint)in[i+1];
10922 uint *out = (uint*)_out;
10952 (out+j+0)[0] = (uint)in[i+3];
10953 (out+j+0)[1] = (uint)in[i+0];
10954 (out+j+0)[2] = (uint)in[i+1];
10955 (out+j+3)[0] = (uint)in[i+3];
10956 (out+j+3)[1] = (uint)in[i+1];
10957 (out+j+3)[2] = (uint)in[i+2];
10969 uint *out = (uint*)_out;
10999 (out+j+0)[0] = (uint)in[i+3];
11000 (out+j+0)[1] = (uint)in[i+2];
11001 (out+j+0)[2] = (uint)in[i+0];
11002 (out+j+3)[0] = (uint)in[i+3];
11003 (out+j+3)[1] = (uint)in[i+0];
11004 (out+j+3)[2] = (uint)in[i+1];
11016 uint *out = (uint*)_out;
11042 (out+j)[0] = (uint)in[start];
11043 (out+j)[1] = (uint)in[i+1];
11044 (out+j)[2] = (uint)in[i+2];
11056 uint *out = (uint*)_out;
11060 (out+j)[0] = (uint)in[i+3];
11061 (out+j)[1] = (uint)in[i+2];
11062 (out+j)[2] = (uint)in[i+1];
11063 (out+j)[3] = (uint)in[i+0];
11075 uint *out = (uint*)_out;
11079 (out+j)[0] = (uint)in[i+3];
11080 (out+j)[1] = (uint)in[i+2];
11081 (out+j)[2] = (uint)in[i+1];
11082 (out+j)[3] = (uint)in[i+0];
11094 uint *out = (uint*)_out;
11098 (out+j)[0] = (uint)in[i+4];
11099 (out+j)[1] = (uint)in[i+5];
11100 (out+j)[2] = (uint)in[i+0];
11101 (out+j)[3] = (uint)in[i+1];
11102 (out+j)[4] = (uint)in[i+2];
11103 (out+j)[5] = (uint)in[i+3];
11115 uint *out = (uint*)_out;
11121 (out+j)[0] = (uint)in[i+4];
11122 (out+j)[1] = (uint)in[i+5];
11123 (out+j)[2] = (uint)in[i+0];
11124 (out+j)[3] = (uint)in[i+1];
11125 (out+j)[4] = (uint)in[i+2];
11126 (out+j)[5] = (uint)in[i+3];
11129 (out+j)[0] = (uint)in[i+4];
11130 (out+j)[1] = (uint)in[i+6];
11131 (out+j)[2] = (uint)in[i+2];
11132 (out+j)[3] = (uint)in[i-2];
11133 (out+j)[4] = (uint)in[i+0];
11134 (out+j)[5] = (uint)in[i+3];
11147 uint *out = (uint*)_out;
11151 (out+j)[0] = (uint)in[i];
11163 uint *out = (uint*)_out;
11167 (out+j)[0] = (uint)in[i];
11168 (out+j)[1] = (uint)in[i+1];
11180 uint *out = (uint*)_out;
11184 (out+j)[0] = (uint)in[i];
11185 (out+j)[1] = (uint)in[i+1];
11197 uint *out = (uint*)_out;
11201 (out+j)[0] = (uint)in[i];
11202 (out+j)[1] = (uint)in[i+1];
11204 (out+j)[0] = (uint)in[i];
11205 (out+j)[1] = (uint)in[start];
11216 uint *out = (uint*)_out;
11220 (out+j)[0] = (uint)in[i];
11221 (out+j)[1] = (uint)in[i+1];
11222 (out+j)[2] = (uint)in[i+2];
11234 uint *out = (uint*)_out;
11238 (out+j)[0] = (uint)in[i+(i&1)];
11239 (out+j)[1] = (uint)in[i+1-(i&1)];
11240 (out+j)[2] = (uint)in[i+2];
11252 uint *out = (uint*)_out;
11256 (out+j)[0] = (uint)in[start];
11257 (out+j)[1] = (uint)in[i+1];
11258 (out+j)[2] = (uint)in[i+2];
11270 uint *out = (uint*)_out;
11274 (out+j+0)[0] = (uint)in[i+0];
11275 (out+j+0)[1] = (uint)in[i+1];
11276 (out+j+0)[2] = (uint)in[i+3];
11277 (out+j+3)[0] = (uint)in[i+1];
11278 (out+j+3)[1] = (uint)in[i+2];
11279 (out+j+3)[2] = (uint)in[i+3];
11291 uint *out = (uint*)_out;
11295 (out+j+0)[0] = (uint)in[i+2];
11296 (out+j+0)[1] = (uint)in[i+0];
11297 (out+j+0)[2] = (uint)in[i+3];
11298 (out+j+3)[0] = (uint)in[i+0];
11299 (out+j+3)[1] = (uint)in[i+1];
11300 (out+j+3)[2] = (uint)in[i+3];
11312 uint *out = (uint*)_out;
11316 (out+j)[0] = (uint)in[i+1];
11317 (out+j)[1] = (uint)in[i+2];
11318 (out+j)[2] = (uint)in[start];
11330 uint *out = (uint*)_out;
11334 (out+j)[0] = (uint)in[i+0];
11335 (out+j)[1] = (uint)in[i+1];
11336 (out+j)[2] = (uint)in[i+2];
11337 (out+j)[3] = (uint)in[i+3];
11349 uint *out = (uint*)_out;
11353 (out+j)[0] = (uint)in[i+0];
11354 (out+j)[1] = (uint)in[i+1];
11355 (out+j)[2] = (uint)in[i+2];
11356 (out+j)[3] = (uint)in[i+3];
11368 uint *out = (uint*)_out;
11372 (out+j)[0] = (uint)in[i+0];
11373 (out+j)[1] = (uint)in[i+1];
11374 (out+j)[2] = (uint)in[i+2];
11375 (out+j)[3] = (uint)in[i+3];
11376 (out+j)[4] = (uint)in[i+4];
11377 (out+j)[5] = (uint)in[i+5];
11389 uint *out = (uint*)_out;
11395 (out+j)[0] = (uint)in[i+0];
11396 (out+j)[1] = (uint)in[i+1];
11397 (out+j)[2] = (uint)in[i+2];
11398 (out+j)[3] = (uint)in[i+3];
11399 (out+j)[4] = (uint)in[i+4];
11400 (out+j)[5] = (uint)in[i+5];
11403 (out+j)[0] = (uint)in[i+2];
11404 (out+j)[1] = (uint)in[i-2];
11405 (out+j)[2] = (uint)in[i+0];
11406 (out+j)[3] = (uint)in[i+3];
11407 (out+j)[4] = (uint)in[i+4];
11408 (out+j)[5] = (uint)in[i+6];
11421 uint *out = (uint*)_out;
11425 (out+j)[0] = (uint)in[i];
11437 uint *out = (uint*)_out;
11441 (out+j)[0] = (uint)in[i];
11442 (out+j)[1] = (uint)in[i+1];
11454 uint *out = (uint*)_out;
11458 (out+j)[0] = (uint)in[i];
11459 (out+j)[1] = (uint)in[i+1];
11471 uint *out = (uint*)_out;
11475 (out+j)[0] = (uint)in[i];
11476 (out+j)[1] = (uint)in[i+1];
11478 (out+j)[0] = (uint)in[i];
11479 (out+j)[1] = (uint)in[start];
11490 uint *out = (uint*)_out;
11494 (out+j)[0] = (uint)in[i];
11495 (out+j)[1] = (uint)in[i+1];
11496 (out+j)[2] = (uint)in[i+2];
11508 uint *out = (uint*)_out;
11512 (out+j)[0] = (uint)in[i+(i&1)];
11513 (out+j)[1] = (uint)in[i+1-(i&1)];
11514 (out+j)[2] = (uint)in[i+2];
11526 uint *out = (uint*)_out;
11530 (out+j)[0] = (uint)in[start];
11531 (out+j)[1] = (uint)in[i+1];
11532 (out+j)[2] = (uint)in[i+2];
11544 uint *out = (uint*)_out;
11574 (out+j+0)[0] = (uint)in[i+0];
11575 (out+j+0)[1] = (uint)in[i+1];
11576 (out+j+0)[2] = (uint)in[i+3];
11577 (out+j+3)[0] = (uint)in[i+1];
11578 (out+j+3)[1] = (uint)in[i+2];
11579 (out+j+3)[2] = (uint)in[i+3];
11591 uint *out = (uint*)_out;
11621 (out+j+0)[0] = (uint)in[i+2];
11622 (out+j+0)[1] = (uint)in[i+0];
11623 (out+j+0)[2] = (uint)in[i+3];
11624 (out+j+3)[0] = (uint)in[i+0];
11625 (out+j+3)[1] = (uint)in[i+1];
11626 (out+j+3)[2] = (uint)in[i+3];
11638 uint *out = (uint*)_out;
11664 (out+j)[0] = (uint)in[i+1];
11665 (out+j)[1] = (uint)in[i+2];
11666 (out+j)[2] = (uint)in[start];
11678 uint *out = (uint*)_out;
11682 (out+j)[0] = (uint)in[i+0];
11683 (out+j)[1] = (uint)in[i+1];
11684 (out+j)[2] = (uint)in[i+2];
11685 (out+j)[3] = (uint)in[i+3];
11697 uint *out = (uint*)_out;
11701 (out+j)[0] = (uint)in[i+0];
11702 (out+j)[1] = (uint)in[i+1];
11703 (out+j)[2] = (uint)in[i+2];
11704 (out+j)[3] = (uint)in[i+3];
11716 uint *out = (uint*)_out;
11720 (out+j)[0] = (uint)in[i+0];
11721 (out+j)[1] = (uint)in[i+1];
11722 (out+j)[2] = (uint)in[i+2];
11723 (out+j)[3] = (uint)in[i+3];
11724 (out+j)[4] = (uint)in[i+4];
11725 (out+j)[5] = (uint)in[i+5];
11737 uint *out = (uint*)_out;
11743 (out+j)[0] = (uint)in[i+0];
11744 (out+j)[1] = (uint)in[i+1];
11745 (out+j)[2] = (uint)in[i+2];
11746 (out+j)[3] = (uint)in[i+3];
11747 (out+j)[4] = (uint)in[i+4];
11748 (out+j)[5] = (uint)in[i+5];
11751 (out+j)[0] = (uint)in[i+2];
11752 (out+j)[1] = (uint)in[i-2];
11753 (out+j)[2] = (uint)in[i+0];
11754 (out+j)[3] = (uint)in[i+3];
11755 (out+j)[4] = (uint)in[i+4];
11756 (out+j)[5] = (uint)in[i+6];
11768 const uint*in = (const uint*)_in;
11784 const uint*in = (const uint*)_in;
11801 const uint*in = (const uint*)_in;
11818 const uint*in = (const uint*)_in;
11837 const uint*in = (const uint*)_in;
11855 const uint*in = (const uint*)_in;
11873 const uint*in = (const uint*)_in;
11891 const uint*in = (const uint*)_in;
11912 const uint*in = (const uint*)_in;
11933 const uint*in = (const uint*)_in;
11951 const uint*in = (const uint*)_in;
11970 const uint*in = (const uint*)_in;
11989 const uint*in = (const uint*)_in;
12010 const uint*in = (const uint*)_in;
12042 const uint*in = (const uint*)_in;
12058 const uint*in = (const uint*)_in;
12075 const uint*in = (const uint*)_in;
12092 const uint*in = (const uint*)_in;
12111 const uint*in = (const uint*)_in;
12129 const uint*in = (const uint*)_in;
12147 const uint*in = (const uint*)_in;
12165 const uint*in = (const uint*)_in;
12212 const uint*in = (const uint*)_in;
12259 const uint*in = (const uint*)_in;
12299 const uint*in = (const uint*)_in;
12318 const uint*in = (const uint*)_in;
12337 const uint*in = (const uint*)_in;
12358 const uint*in = (const uint*)_in;
12390 const uint*in = (const uint*)_in;
12406 const uint*in = (const uint*)_in;
12423 const uint*in = (const uint*)_in;
12440 const uint*in = (const uint*)_in;
12459 const uint*in = (const uint*)_in;
12477 const uint*in = (const uint*)_in;
12495 const uint*in = (const uint*)_in;
12513 const uint*in = (const uint*)_in;
12534 const uint*in = (const uint*)_in;
12555 const uint*in = (const uint*)_in;
12573 const uint*in = (const uint*)_in;
12592 const uint*in = (const uint*)_in;
12611 const uint*in = (const uint*)_in;
12632 const uint*in = (const uint*)_in;
12664 const uint*in = (const uint*)_in;
12680 const uint*in = (const uint*)_in;
12697 const uint*in = (const uint*)_in;
12714 const uint*in = (const uint*)_in;
12733 const uint*in = (const uint*)_in;
12751 const uint*in = (const uint*)_in;
12769 const uint*in = (const uint*)_in;
12787 const uint*in = (const uint*)_in;
12834 const uint*in = (const uint*)_in;
12881 const uint*in = (const uint*)_in;
12921 const uint*in = (const uint*)_in;
12940 const uint*in = (const uint*)_in;
12959 const uint*in = (const uint*)_in;
12980 const uint*in = (const uint*)_in;
13012 const uint*in = (const uint*)_in;
13028 const uint*in = (const uint*)_in;
13045 const uint*in = (const uint*)_in;
13062 const uint*in = (const uint*)_in;
13081 const uint*in = (const uint*)_in;
13099 const uint*in = (const uint*)_in;
13117 const uint*in = (const uint*)_in;
13135 const uint*in = (const uint*)_in;
13156 const uint*in = (const uint*)_in;
13177 const uint*in = (const uint*)_in;
13195 const uint*in = (const uint*)_in;
13214 const uint*in = (const uint*)_in;
13233 const uint*in = (const uint*)_in;
13254 const uint*in = (const uint*)_in;
13286 const uint*in = (const uint*)_in;
13302 const uint*in = (const uint*)_in;
13319 const uint*in = (const uint*)_in;
13336 const uint*in = (const uint*)_in;
13355 const uint*in = (const uint*)_in;
13373 const uint*in = (const uint*)_in;
13391 const uint*in = (const uint*)_in;
13409 const uint*in = (const uint*)_in;
13456 const uint*in = (const uint*)_in;
13503 const uint*in = (const uint*)_in;
13543 const uint*in = (const uint*)_in;
13562 const uint*in = (const uint*)_in;
13581 const uint*in = (const uint*)_in;
13602 const uint*in = (const uint*)_in;
13634 const uint*in = (const uint*)_in;
13650 const uint*in = (const uint*)_in;
13667 const uint*in = (const uint*)_in;
13684 const uint*in = (const uint*)_in;
13703 const uint*in = (const uint*)_in;
13721 const uint*in = (const uint*)_in;
13739 const uint*in = (const uint*)_in;
13757 const uint*in = (const uint*)_in;
13778 const uint*in = (const uint*)_in;
13799 const uint*in = (const uint*)_in;
13817 const uint*in = (const uint*)_in;
13836 const uint*in = (const uint*)_in;
13855 const uint*in = (const uint*)_in;
13876 const uint*in = (const uint*)_in;
13908 const uint*in = (const uint*)_in;
13924 const uint*in = (const uint*)_in;
13941 const uint*in = (const uint*)_in;
13958 const uint*in = (const uint*)_in;
13977 const uint*in = (const uint*)_in;
13995 const uint*in = (const uint*)_in;
14013 const uint*in = (const uint*)_in;
14031 const uint*in = (const uint*)_in;
14078 const uint*in = (const uint*)_in;
14125 const uint*in = (const uint*)_in;
14165 const uint*in = (const uint*)_in;
14184 const uint*in = (const uint*)_in;
14203 const uint*in = (const uint*)_in;
14224 const uint*in = (const uint*)_in;
14256 const uint*in = (const uint*)_in;
14257 uint *out = (uint*)_out;
14261 (out+j)[0] = (uint)in[i];
14272 const uint*in = (const uint*)_in;
14273 uint *out = (uint*)_out;
14277 (out+j)[0] = (uint)in[i];
14278 (out+j)[1] = (uint)in[i+1];
14289 const uint*in = (const uint*)_in;
14290 uint *out = (uint*)_out;
14294 (out+j)[0] = (uint)in[i];
14295 (out+j)[1] = (uint)in[i+1];
14306 const uint*in = (const uint*)_in;
14307 uint *out = (uint*)_out;
14311 (out+j)[0] = (uint)in[i];
14312 (out+j)[1] = (uint)in[i+1];
14314 (out+j)[0] = (uint)in[i];
14315 (out+j)[1] = (uint)in[start];
14325 const uint*in = (const uint*)_in;
14326 uint *out = (uint*)_out;
14330 (out+j)[0] = (uint)in[i];
14331 (out+j)[1] = (uint)in[i+1];
14332 (out+j)[2] = (uint)in[i+2];
14343 const uint*in = (const uint*)_in;
14344 uint *out = (uint*)_out;
14348 (out+j)[0] = (uint)in[i];
14349 (out+j)[1] = (uint)in[i+1+(i&1)];
14350 (out+j)[2] = (uint)in[i+2-(i&1)];
14361 const uint*in = (const uint*)_in;
14362 uint *out = (uint*)_out;
14366 (out+j)[0] = (uint)in[start];
14367 (out+j)[1] = (uint)in[i+1];
14368 (out+j)[2] = (uint)in[i+2];
14379 const uint*in = (const uint*)_in;
14380 uint *out = (uint*)_out;
14384 (out+j+0)[0] = (uint)in[i+0];
14385 (out+j+0)[1] = (uint)in[i+1];
14386 (out+j+0)[2] = (uint)in[i+2];
14387 (out+j+3)[0] = (uint)in[i+0];
14388 (out+j+3)[1] = (uint)in[i+2];
14389 (out+j+3)[2] = (uint)in[i+3];
14400 const uint*in = (const uint*)_in;
14401 uint *out = (uint*)_out;
14405 (out+j+0)[0] = (uint)in[i+0];
14406 (out+j+0)[1] = (uint)in[i+1];
14407 (out+j+0)[2] = (uint)in[i+3];
14408 (out+j+3)[0] = (uint)in[i+0];
14409 (out+j+3)[1] = (uint)in[i+3];
14410 (out+j+3)[2] = (uint)in[i+2];
14421 const uint*in = (const uint*)_in;
14422 uint *out = (uint*)_out;
14426 (out+j)[0] = (uint)in[start];
14427 (out+j)[1] = (uint)in[i+1];
14428 (out+j)[2] = (uint)in[i+2];
14439 const uint*in = (const uint*)_in;
14440 uint *out = (uint*)_out;
14444 (out+j)[0] = (uint)in[i+0];
14445 (out+j)[1] = (uint)in[i+1];
14446 (out+j)[2] = (uint)in[i+2];
14447 (out+j)[3] = (uint)in[i+3];
14458 const uint*in = (const uint*)_in;
14459 uint *out = (uint*)_out;
14463 (out+j)[0] = (uint)in[i+0];
14464 (out+j)[1] = (uint)in[i+1];
14465 (out+j)[2] = (uint)in[i+2];
14466 (out+j)[3] = (uint)in[i+3];
14477 const uint*in = (const uint*)_in;
14478 uint *out = (uint*)_out;
14482 (out+j)[0] = (uint)in[i+0];
14483 (out+j)[1] = (uint)in[i+1];
14484 (out+j)[2] = (uint)in[i+2];
14485 (out+j)[3] = (uint)in[i+3];
14486 (out+j)[4] = (uint)in[i+4];
14487 (out+j)[5] = (uint)in[i+5];
14498 const uint*in = (const uint*)_in;
14499 uint *out = (uint*)_out;
14505 (out+j)[0] = (uint)in[i+0];
14506 (out+j)[1] = (uint)in[i+1];
14507 (out+j)[2] = (uint)in[i+2];
14508 (out+j)[3] = (uint)in[i+3];
14509 (out+j)[4] = (uint)in[i+4];
14510 (out+j)[5] = (uint)in[i+5];
14513 (out+j)[0] = (uint)in[i+2];
14514 (out+j)[1] = (uint)in[i-2];
14515 (out+j)[2] = (uint)in[i+0];
14516 (out+j)[3] = (uint)in[i+3];
14517 (out+j)[4] = (uint)in[i+4];
14518 (out+j)[5] = (uint)in[i+6];
14530 const uint*in = (const uint*)_in;
14531 uint *out = (uint*)_out;
14535 (out+j)[0] = (uint)in[i];
14546 const uint*in = (const uint*)_in;
14547 uint *out = (uint*)_out;
14551 (out+j)[0] = (uint)in[i];
14552 (out+j)[1] = (uint)in[i+1];
14563 const uint*in = (const uint*)_in;
14564 uint *out = (uint*)_out;
14568 (out+j)[0] = (uint)in[i];
14569 (out+j)[1] = (uint)in[i+1];
14580 const uint*in = (const uint*)_in;
14581 uint *out = (uint*)_out;
14585 (out+j)[0] = (uint)in[i];
14586 (out+j)[1] = (uint)in[i+1];
14588 (out+j)[0] = (uint)in[i];
14589 (out+j)[1] = (uint)in[start];
14599 const uint*in = (const uint*)_in;
14600 uint *out = (uint*)_out;
14604 (out+j)[0] = (uint)in[i];
14605 (out+j)[1] = (uint)in[i+1];
14606 (out+j)[2] = (uint)in[i+2];
14617 const uint*in = (const uint*)_in;
14618 uint *out = (uint*)_out;
14622 (out+j)[0] = (uint)in[i];
14623 (out+j)[1] = (uint)in[i+1+(i&1)];
14624 (out+j)[2] = (uint)in[i+2-(i&1)];
14635 const uint*in = (const uint*)_in;
14636 uint *out = (uint*)_out;
14640 (out+j)[0] = (uint)in[start];
14641 (out+j)[1] = (uint)in[i+1];
14642 (out+j)[2] = (uint)in[i+2];
14653 const uint*in = (const uint*)_in;
14654 uint *out = (uint*)_out;
14684 (out+j+0)[0] = (uint)in[i+0];
14685 (out+j+0)[1] = (uint)in[i+1];
14686 (out+j+0)[2] = (uint)in[i+2];
14687 (out+j+3)[0] = (uint)in[i+0];
14688 (out+j+3)[1] = (uint)in[i+2];
14689 (out+j+3)[2] = (uint)in[i+3];
14700 const uint*in = (const uint*)_in;
14701 uint *out = (uint*)_out;
14731 (out+j+0)[0] = (uint)in[i+0];
14732 (out+j+0)[1] = (uint)in[i+1];
14733 (out+j+0)[2] = (uint)in[i+3];
14734 (out+j+3)[0] = (uint)in[i+0];
14735 (out+j+3)[1] = (uint)in[i+3];
14736 (out+j+3)[2] = (uint)in[i+2];
14747 const uint*in = (const uint*)_in;
14748 uint *out = (uint*)_out;
14774 (out+j)[0] = (uint)in[start];
14775 (out+j)[1] = (uint)in[i+1];
14776 (out+j)[2] = (uint)in[i+2];
14787 const uint*in = (const uint*)_in;
14788 uint *out = (uint*)_out;
14792 (out+j)[0] = (uint)in[i+0];
14793 (out+j)[1] = (uint)in[i+1];
14794 (out+j)[2] = (uint)in[i+2];
14795 (out+j)[3] = (uint)in[i+3];
14806 const uint*in = (const uint*)_in;
14807 uint *out = (uint*)_out;
14811 (out+j)[0] = (uint)in[i+0];
14812 (out+j)[1] = (uint)in[i+1];
14813 (out+j)[2] = (uint)in[i+2];
14814 (out+j)[3] = (uint)in[i+3];
14825 const uint*in = (const uint*)_in;
14826 uint *out = (uint*)_out;
14830 (out+j)[0] = (uint)in[i+0];
14831 (out+j)[1] = (uint)in[i+1];
14832 (out+j)[2] = (uint)in[i+2];
14833 (out+j)[3] = (uint)in[i+3];
14834 (out+j)[4] = (uint)in[i+4];
14835 (out+j)[5] = (uint)in[i+5];
14846 const uint*in = (const uint*)_in;
14847 uint *out = (uint*)_out;
14853 (out+j)[0] = (uint)in[i+0];
14854 (out+j)[1] = (uint)in[i+1];
14855 (out+j)[2] = (uint)in[i+2];
14856 (out+j)[3] = (uint)in[i+3];
14857 (out+j)[4] = (uint)in[i+4];
14858 (out+j)[5] = (uint)in[i+5];
14861 (out+j)[0] = (uint)in[i+2];
14862 (out+j)[1] = (uint)in[i-2];
14863 (out+j)[2] = (uint)in[i+0];
14864 (out+j)[3] = (uint)in[i+3];
14865 (out+j)[4] = (uint)in[i+4];
14866 (out+j)[5] = (uint)in[i+6];
14878 const uint*in = (const uint*)_in;
14879 uint *out = (uint*)_out;
14883 (out+j)[0] = (uint)in[i];
14894 const uint*in = (const uint*)_in;
14895 uint *out = (uint*)_out;
14899 (out+j)[0] = (uint)in[i+1];
14900 (out+j)[1] = (uint)in[i];
14911 const uint*in = (const uint*)_in;
14912 uint *out = (uint*)_out;
14916 (out+j)[0] = (uint)in[i+1];
14917 (out+j)[1] = (uint)in[i];
14928 const uint*in = (const uint*)_in;
14929 uint *out = (uint*)_out;
14933 (out+j)[0] = (uint)in[i+1];
14934 (out+j)[1] = (uint)in[i];
14936 (out+j)[0] = (uint)in[start];
14937 (out+j)[1] = (uint)in[i];
14947 const uint*in = (const uint*)_in;
14948 uint *out = (uint*)_out;
14952 (out+j)[0] = (uint)in[i+1];
14953 (out+j)[1] = (uint)in[i+2];
14954 (out+j)[2] = (uint)in[i];
14965 const uint*in = (const uint*)_in;
14966 uint *out = (uint*)_out;
14970 (out+j)[0] = (uint)in[i+1+(i&1)];
14971 (out+j)[1] = (uint)in[i+2-(i&1)];
14972 (out+j)[2] = (uint)in[i];
14983 const uint*in = (const uint*)_in;
14984 uint *out = (uint*)_out;
14988 (out+j)[0] = (uint)in[i+1];
14989 (out+j)[1] = (uint)in[i+2];
14990 (out+j)[2] = (uint)in[start];
15001 const uint*in = (const uint*)_in;
15002 uint *out = (uint*)_out;
15006 (out+j+0)[0] = (uint)in[i+1];
15007 (out+j+0)[1] = (uint)in[i+2];
15008 (out+j+0)[2] = (uint)in[i+0];
15009 (out+j+3)[0] = (uint)in[i+2];
15010 (out+j+3)[1] = (uint)in[i+3];
15011 (out+j+3)[2] = (uint)in[i+0];
15022 const uint*in = (const uint*)_in;
15023 uint *out = (uint*)_out;
15027 (out+j+0)[0] = (uint)in[i+1];
15028 (out+j+0)[1] = (uint)in[i+3];
15029 (out+j+0)[2] = (uint)in[i+0];
15030 (out+j+3)[0] = (uint)in[i+3];
15031 (out+j+3)[1] = (uint)in[i+2];
15032 (out+j+3)[2] = (uint)in[i+0];
15043 const uint*in = (const uint*)_in;
15044 uint *out = (uint*)_out;
15048 (out+j)[0] = (uint)in[i+1];
15049 (out+j)[1] = (uint)in[i+2];
15050 (out+j)[2] = (uint)in[start];
15061 const uint*in = (const uint*)_in;
15062 uint *out = (uint*)_out;
15066 (out+j)[0] = (uint)in[i+3];
15067 (out+j)[1] = (uint)in[i+2];
15068 (out+j)[2] = (uint)in[i+1];
15069 (out+j)[3] = (uint)in[i+0];
15080 const uint*in = (const uint*)_in;
15081 uint *out = (uint*)_out;
15085 (out+j)[0] = (uint)in[i+3];
15086 (out+j)[1] = (uint)in[i+2];
15087 (out+j)[2] = (uint)in[i+1];
15088 (out+j)[3] = (uint)in[i+0];
15099 const uint*in = (const uint*)_in;
15100 uint *out = (uint*)_out;
15104 (out+j)[0] = (uint)in[i+4];
15105 (out+j)[1] = (uint)in[i+5];
15106 (out+j)[2] = (uint)in[i+0];
15107 (out+j)[3] = (uint)in[i+1];
15108 (out+j)[4] = (uint)in[i+2];
15109 (out+j)[5] = (uint)in[i+3];
15120 const uint*in = (const uint*)_in;
15121 uint *out = (uint*)_out;
15127 (out+j)[0] = (uint)in[i+4];
15128 (out+j)[1] = (uint)in[i+5];
15129 (out+j)[2] = (uint)in[i+0];
15130 (out+j)[3] = (uint)in[i+1];
15131 (out+j)[4] = (uint)in[i+2];
15132 (out+j)[5] = (uint)in[i+3];
15135 (out+j)[0] = (uint)in[i+4];
15136 (out+j)[1] = (uint)in[i+6];
15137 (out+j)[2] = (uint)in[i+2];
15138 (out+j)[3] = (uint)in[i-2];
15139 (out+j)[4] = (uint)in[i+0];
15140 (out+j)[5] = (uint)in[i+3];
15152 const uint*in = (const uint*)_in;
15153 uint *out = (uint*)_out;
15157 (out+j)[0] = (uint)in[i];
15168 const uint*in = (const uint*)_in;
15169 uint *out = (uint*)_out;
15173 (out+j)[0] = (uint)in[i+1];
15174 (out+j)[1] = (uint)in[i];
15185 const uint*in = (const uint*)_in;
15186 uint *out = (uint*)_out;
15190 (out+j)[0] = (uint)in[i+1];
15191 (out+j)[1] = (uint)in[i];
15202 const uint*in = (const uint*)_in;
15203 uint *out = (uint*)_out;
15207 (out+j)[0] = (uint)in[i+1];
15208 (out+j)[1] = (uint)in[i];
15210 (out+j)[0] = (uint)in[start];
15211 (out+j)[1] = (uint)in[i];
15221 const uint*in = (const uint*)_in;
15222 uint *out = (uint*)_out;
15226 (out+j)[0] = (uint)in[i+1];
15227 (out+j)[1] = (uint)in[i+2];
15228 (out+j)[2] = (uint)in[i];
15239 const uint*in = (const uint*)_in;
15240 uint *out = (uint*)_out;
15244 (out+j)[0] = (uint)in[i+1+(i&1)];
15245 (out+j)[1] = (uint)in[i+2-(i&1)];
15246 (out+j)[2] = (uint)in[i];
15257 const uint*in = (const uint*)_in;
15258 uint *out = (uint*)_out;
15262 (out+j)[0] = (uint)in[i+1];
15263 (out+j)[1] = (uint)in[i+2];
15264 (out+j)[2] = (uint)in[start];
15275 const uint*in = (const uint*)_in;
15276 uint *out = (uint*)_out;
15306 (out+j+0)[0] = (uint)in[i+1];
15307 (out+j+0)[1] = (uint)in[i+2];
15308 (out+j+0)[2] = (uint)in[i+0];
15309 (out+j+3)[0] = (uint)in[i+2];
15310 (out+j+3)[1] = (uint)in[i+3];
15311 (out+j+3)[2] = (uint)in[i+0];
15322 const uint*in = (const uint*)_in;
15323 uint *out = (uint*)_out;
15353 (out+j+0)[0] = (uint)in[i+1];
15354 (out+j+0)[1] = (uint)in[i+3];
15355 (out+j+0)[2] = (uint)in[i+0];
15356 (out+j+3)[0] = (uint)in[i+3];
15357 (out+j+3)[1] = (uint)in[i+2];
15358 (out+j+3)[2] = (uint)in[i+0];
15369 const uint*in = (const uint*)_in;
15370 uint *out = (uint*)_out;
15396 (out+j)[0] = (uint)in[i+1];
15397 (out+j)[1] = (uint)in[i+2];
15398 (out+j)[2] = (uint)in[start];
15409 const uint*in = (const uint*)_in;
15410 uint *out = (uint*)_out;
15414 (out+j)[0] = (uint)in[i+3];
15415 (out+j)[1] = (uint)in[i+2];
15416 (out+j)[2] = (uint)in[i+1];
15417 (out+j)[3] = (uint)in[i+0];
15428 const uint*in = (const uint*)_in;
15429 uint *out = (uint*)_out;
15433 (out+j)[0] = (uint)in[i+3];
15434 (out+j)[1] = (uint)in[i+2];
15435 (out+j)[2] = (uint)in[i+1];
15436 (out+j)[3] = (uint)in[i+0];
15447 const uint*in = (const uint*)_in;
15448 uint *out = (uint*)_out;
15452 (out+j)[0] = (uint)in[i+4];
15453 (out+j)[1] = (uint)in[i+5];
15454 (out+j)[2] = (uint)in[i+0];
15455 (out+j)[3] = (uint)in[i+1];
15456 (out+j)[4] = (uint)in[i+2];
15457 (out+j)[5] = (uint)in[i+3];
15468 const uint*in = (const uint*)_in;
15469 uint *out = (uint*)_out;
15475 (out+j)[0] = (uint)in[i+4];
15476 (out+j)[1] = (uint)in[i+5];
15477 (out+j)[2] = (uint)in[i+0];
15478 (out+j)[3] = (uint)in[i+1];
15479 (out+j)[4] = (uint)in[i+2];
15480 (out+j)[5] = (uint)in[i+3];
15483 (out+j)[0] = (uint)in[i+4];
15484 (out+j)[1] = (uint)in[i+6];
15485 (out+j)[2] = (uint)in[i+2];
15486 (out+j)[3] = (uint)in[i-2];
15487 (out+j)[4] = (uint)in[i+0];
15488 (out+j)[5] = (uint)in[i+3];
15500 const uint*in = (const uint*)_in;
15501 uint *out = (uint*)_out;
15505 (out+j)[0] = (uint)in[i];
15516 const uint*in = (const uint*)_in;
15517 uint *out = (uint*)_out;
15521 (out+j)[0] = (uint)in[i+1];
15522 (out+j)[1] = (uint)in[i];
15533 const uint*in = (const uint*)_in;
15534 uint *out = (uint*)_out;
15538 (out+j)[0] = (uint)in[i+1];
15539 (out+j)[1] = (uint)in[i];
15550 const uint*in = (const uint*)_in;
15551 uint *out = (uint*)_out;
15555 (out+j)[0] = (uint)in[i+1];
15556 (out+j)[1] = (uint)in[i];
15558 (out+j)[0] = (uint)in[start];
15559 (out+j)[1] = (uint)in[i];
15569 const uint*in = (const uint*)_in;
15570 uint *out = (uint*)_out;
15574 (out+j)[0] = (uint)in[i+2];
15575 (out+j)[1] = (uint)in[i];
15576 (out+j)[2] = (uint)in[i+1];
15587 const uint*in = (const uint*)_in;
15588 uint *out = (uint*)_out;
15592 (out+j)[0] = (uint)in[i+2];
15593 (out+j)[1] = (uint)in[i+(i&1)];
15594 (out+j)[2] = (uint)in[i+1-(i&1)];
15605 const uint*in = (const uint*)_in;
15606 uint *out = (uint*)_out;
15610 (out+j)[0] = (uint)in[i+2];
15611 (out+j)[1] = (uint)in[start];
15612 (out+j)[2] = (uint)in[i+1];
15623 const uint*in = (const uint*)_in;
15624 uint *out = (uint*)_out;
15628 (out+j+0)[0] = (uint)in[i+3];
15629 (out+j+0)[1] = (uint)in[i+0];
15630 (out+j+0)[2] = (uint)in[i+1];
15631 (out+j+3)[0] = (uint)in[i+3];
15632 (out+j+3)[1] = (uint)in[i+1];
15633 (out+j+3)[2] = (uint)in[i+2];
15644 const uint*in = (const uint*)_in;
15645 uint *out = (uint*)_out;
15649 (out+j+0)[0] = (uint)in[i+3];
15650 (out+j+0)[1] = (uint)in[i+2];
15651 (out+j+0)[2] = (uint)in[i+0];
15652 (out+j+3)[0] = (uint)in[i+3];
15653 (out+j+3)[1] = (uint)in[i+0];
15654 (out+j+3)[2] = (uint)in[i+1];
15665 const uint*in = (const uint*)_in;
15666 uint *out = (uint*)_out;
15670 (out+j)[0] = (uint)in[start];
15671 (out+j)[1] = (uint)in[i+1];
15672 (out+j)[2] = (uint)in[i+2];
15683 const uint*in = (const uint*)_in;
15684 uint *out = (uint*)_out;
15688 (out+j)[0] = (uint)in[i+3];
15689 (out+j)[1] = (uint)in[i+2];
15690 (out+j)[2] = (uint)in[i+1];
15691 (out+j)[3] = (uint)in[i+0];
15702 const uint*in = (const uint*)_in;
15703 uint *out = (uint*)_out;
15707 (out+j)[0] = (uint)in[i+3];
15708 (out+j)[1] = (uint)in[i+2];
15709 (out+j)[2] = (uint)in[i+1];
15710 (out+j)[3] = (uint)in[i+0];
15721 const uint*in = (const uint*)_in;
15722 uint *out = (uint*)_out;
15726 (out+j)[0] = (uint)in[i+4];
15727 (out+j)[1] = (uint)in[i+5];
15728 (out+j)[2] = (uint)in[i+0];
15729 (out+j)[3] = (uint)in[i+1];
15730 (out+j)[4] = (uint)in[i+2];
15731 (out+j)[5] = (uint)in[i+3];
15742 const uint*in = (const uint*)_in;
15743 uint *out = (uint*)_out;
15749 (out+j)[0] = (uint)in[i+4];
15750 (out+j)[1] = (uint)in[i+5];
15751 (out+j)[2] = (uint)in[i+0];
15752 (out+j)[3] = (uint)in[i+1];
15753 (out+j)[4] = (uint)in[i+2];
15754 (out+j)[5] = (uint)in[i+3];
15757 (out+j)[0] = (uint)in[i+4];
15758 (out+j)[1] = (uint)in[i+6];
15759 (out+j)[2] = (uint)in[i+2];
15760 (out+j)[3] = (uint)in[i-2];
15761 (out+j)[4] = (uint)in[i+0];
15762 (out+j)[5] = (uint)in[i+3];
15774 const uint*in = (const uint*)_in;
15775 uint *out = (uint*)_out;
15779 (out+j)[0] = (uint)in[i];
15790 const uint*in = (const uint*)_in;
15791 uint *out = (uint*)_out;
15795 (out+j)[0] = (uint)in[i+1];
15796 (out+j)[1] = (uint)in[i];
15807 const uint*in = (const uint*)_in;
15808 uint *out = (uint*)_out;
15812 (out+j)[0] = (uint)in[i+1];
15813 (out+j)[1] = (uint)in[i];
15824 const uint*in = (const uint*)_in;
15825 uint *out = (uint*)_out;
15829 (out+j)[0] = (uint)in[i+1];
15830 (out+j)[1] = (uint)in[i];
15832 (out+j)[0] = (uint)in[start];
15833 (out+j)[1] = (uint)in[i];
15843 const uint*in = (const uint*)_in;
15844 uint *out = (uint*)_out;
15848 (out+j)[0] = (uint)in[i+2];
15849 (out+j)[1] = (uint)in[i];
15850 (out+j)[2] = (uint)in[i+1];
15861 const uint*in = (const uint*)_in;
15862 uint *out = (uint*)_out;
15866 (out+j)[0] = (uint)in[i+2];
15867 (out+j)[1] = (uint)in[i+(i&1)];
15868 (out+j)[2] = (uint)in[i+1-(i&1)];
15879 const uint*in = (const uint*)_in;
15880 uint *out = (uint*)_out;
15884 (out+j)[0] = (uint)in[i+2];
15885 (out+j)[1] = (uint)in[start];
15886 (out+j)[2] = (uint)in[i+1];
15897 const uint*in = (const uint*)_in;
15898 uint *out = (uint*)_out;
15928 (out+j+0)[0] = (uint)in[i+3];
15929 (out+j+0)[1] = (uint)in[i+0];
15930 (out+j+0)[2] = (uint)in[i+1];
15931 (out+j+3)[0] = (uint)in[i+3];
15932 (out+j+3)[1] = (uint)in[i+1];
15933 (out+j+3)[2] = (uint)in[i+2];
15944 const uint*in = (const uint*)_in;
15945 uint *out = (uint*)_out;
15975 (out+j+0)[0] = (uint)in[i+3];
15976 (out+j+0)[1] = (uint)in[i+2];
15977 (out+j+0)[2] = (uint)in[i+0];
15978 (out+j+3)[0] = (uint)in[i+3];
15979 (out+j+3)[1] = (uint)in[i+0];
15980 (out+j+3)[2] = (uint)in[i+1];
15991 const uint*in = (const uint*)_in;
15992 uint *out = (uint*)_out;
16018 (out+j)[0] = (uint)in[start];
16019 (out+j)[1] = (uint)in[i+1];
16020 (out+j)[2] = (uint)in[i+2];
16031 const uint*in = (const uint*)_in;
16032 uint *out = (uint*)_out;
16036 (out+j)[0] = (uint)in[i+3];
16037 (out+j)[1] = (uint)in[i+2];
16038 (out+j)[2] = (uint)in[i+1];
16039 (out+j)[3] = (uint)in[i+0];
16050 const uint*in = (const uint*)_in;
16051 uint *out = (uint*)_out;
16055 (out+j)[0] = (uint)in[i+3];
16056 (out+j)[1] = (uint)in[i+2];
16057 (out+j)[2] = (uint)in[i+1];
16058 (out+j)[3] = (uint)in[i+0];
16069 const uint*in = (const uint*)_in;
16070 uint *out = (uint*)_out;
16074 (out+j)[0] = (uint)in[i+4];
16075 (out+j)[1] = (uint)in[i+5];
16076 (out+j)[2] = (uint)in[i+0];
16077 (out+j)[3] = (uint)in[i+1];
16078 (out+j)[4] = (uint)in[i+2];
16079 (out+j)[5] = (uint)in[i+3];
16090 const uint*in = (const uint*)_in;
16091 uint *out = (uint*)_out;
16097 (out+j)[0] = (uint)in[i+4];
16098 (out+j)[1] = (uint)in[i+5];
16099 (out+j)[2] = (uint)in[i+0];
16100 (out+j)[3] = (uint)in[i+1];
16101 (out+j)[4] = (uint)in[i+2];
16102 (out+j)[5] = (uint)in[i+3];
16105 (out+j)[0] = (uint)in[i+4];
16106 (out+j)[1] = (uint)in[i+6];
16107 (out+j)[2] = (uint)in[i+2];
16108 (out+j)[3] = (uint)in[i-2];
16109 (out+j)[4] = (uint)in[i+0];
16110 (out+j)[5] = (uint)in[i+3];
16122 const uint*in = (const uint*)_in;
16123 uint *out = (uint*)_out;
16127 (out+j)[0] = (uint)in[i];
16138 const uint*in = (const uint*)_in;
16139 uint *out = (uint*)_out;
16143 (out+j)[0] = (uint)in[i];
16144 (out+j)[1] = (uint)in[i+1];
16155 const uint*in = (const uint*)_in;
16156 uint *out = (uint*)_out;
16160 (out+j)[0] = (uint)in[i];
16161 (out+j)[1] = (uint)in[i+1];
16172 const uint*in = (const uint*)_in;
16173 uint *out = (uint*)_out;
16177 (out+j)[0] = (uint)in[i];
16178 (out+j)[1] = (uint)in[i+1];
16180 (out+j)[0] = (uint)in[i];
16181 (out+j)[1] = (uint)in[start];
16191 const uint*in = (const uint*)_in;
16192 uint *out = (uint*)_out;
16196 (out+j)[0] = (uint)in[i];
16197 (out+j)[1] = (uint)in[i+1];
16198 (out+j)[2] = (uint)in[i+2];
16209 const uint*in = (const uint*)_in;
16210 uint *out = (uint*)_out;
16214 (out+j)[0] = (uint)in[i+(i&1)];
16215 (out+j)[1] = (uint)in[i+1-(i&1)];
16216 (out+j)[2] = (uint)in[i+2];
16227 const uint*in = (const uint*)_in;
16228 uint *out = (uint*)_out;
16232 (out+j)[0] = (uint)in[start];
16233 (out+j)[1] = (uint)in[i+1];
16234 (out+j)[2] = (uint)in[i+2];
16245 const uint*in = (const uint*)_in;
16246 uint *out = (uint*)_out;
16250 (out+j+0)[0] = (uint)in[i+0];
16251 (out+j+0)[1] = (uint)in[i+1];
16252 (out+j+0)[2] = (uint)in[i+3];
16253 (out+j+3)[0] = (uint)in[i+1];
16254 (out+j+3)[1] = (uint)in[i+2];
16255 (out+j+3)[2] = (uint)in[i+3];
16266 const uint*in = (const uint*)_in;
16267 uint *out = (uint*)_out;
16271 (out+j+0)[0] = (uint)in[i+2];
16272 (out+j+0)[1] = (uint)in[i+0];
16273 (out+j+0)[2] = (uint)in[i+3];
16274 (out+j+3)[0] = (uint)in[i+0];
16275 (out+j+3)[1] = (uint)in[i+1];
16276 (out+j+3)[2] = (uint)in[i+3];
16287 const uint*in = (const uint*)_in;
16288 uint *out = (uint*)_out;
16292 (out+j)[0] = (uint)in[i+1];
16293 (out+j)[1] = (uint)in[i+2];
16294 (out+j)[2] = (uint)in[start];
16305 const uint*in = (const uint*)_in;
16306 uint *out = (uint*)_out;
16310 (out+j)[0] = (uint)in[i+0];
16311 (out+j)[1] = (uint)in[i+1];
16312 (out+j)[2] = (uint)in[i+2];
16313 (out+j)[3] = (uint)in[i+3];
16324 const uint*in = (const uint*)_in;
16325 uint *out = (uint*)_out;
16329 (out+j)[0] = (uint)in[i+0];
16330 (out+j)[1] = (uint)in[i+1];
16331 (out+j)[2] = (uint)in[i+2];
16332 (out+j)[3] = (uint)in[i+3];
16343 const uint*in = (const uint*)_in;
16344 uint *out = (uint*)_out;
16348 (out+j)[0] = (uint)in[i+0];
16349 (out+j)[1] = (uint)in[i+1];
16350 (out+j)[2] = (uint)in[i+2];
16351 (out+j)[3] = (uint)in[i+3];
16352 (out+j)[4] = (uint)in[i+4];
16353 (out+j)[5] = (uint)in[i+5];
16364 const uint*in = (const uint*)_in;
16365 uint *out = (uint*)_out;
16371 (out+j)[0] = (uint)in[i+0];
16372 (out+j)[1] = (uint)in[i+1];
16373 (out+j)[2] = (uint)in[i+2];
16374 (out+j)[3] = (uint)in[i+3];
16375 (out+j)[4] = (uint)in[i+4];
16376 (out+j)[5] = (uint)in[i+5];
16379 (out+j)[0] = (uint)in[i+2];
16380 (out+j)[1] = (uint)in[i-2];
16381 (out+j)[2] = (uint)in[i+0];
16382 (out+j)[3] = (uint)in[i+3];
16383 (out+j)[4] = (uint)in[i+4];
16384 (out+j)[5] = (uint)in[i+6];
16396 const uint*in = (const uint*)_in;
16397 uint *out = (uint*)_out;
16401 (out+j)[0] = (uint)in[i];
16412 const uint*in = (const uint*)_in;
16413 uint *out = (uint*)_out;
16417 (out+j)[0] = (uint)in[i];
16418 (out+j)[1] = (uint)in[i+1];
16429 const uint*in = (const uint*)_in;
16430 uint *out = (uint*)_out;
16434 (out+j)[0] = (uint)in[i];
16435 (out+j)[1] = (uint)in[i+1];
16446 const uint*in = (const uint*)_in;
16447 uint *out = (uint*)_out;
16451 (out+j)[0] = (uint)in[i];
16452 (out+j)[1] = (uint)in[i+1];
16454 (out+j)[0] = (uint)in[i];
16455 (out+j)[1] = (uint)in[start];
16465 const uint*in = (const uint*)_in;
16466 uint *out = (uint*)_out;
16470 (out+j)[0] = (uint)in[i];
16471 (out+j)[1] = (uint)in[i+1];
16472 (out+j)[2] = (uint)in[i+2];
16483 const uint*in = (const uint*)_in;
16484 uint *out = (uint*)_out;
16488 (out+j)[0] = (uint)in[i+(i&1)];
16489 (out+j)[1] = (uint)in[i+1-(i&1)];
16490 (out+j)[2] = (uint)in[i+2];
16501 const uint*in = (const uint*)_in;
16502 uint *out = (uint*)_out;
16506 (out+j)[0] = (uint)in[start];
16507 (out+j)[1] = (uint)in[i+1];
16508 (out+j)[2] = (uint)in[i+2];
16519 const uint*in = (const uint*)_in;
16520 uint *out = (uint*)_out;
16550 (out+j+0)[0] = (uint)in[i+0];
16551 (out+j+0)[1] = (uint)in[i+1];
16552 (out+j+0)[2] = (uint)in[i+3];
16553 (out+j+3)[0] = (uint)in[i+1];
16554 (out+j+3)[1] = (uint)in[i+2];
16555 (out+j+3)[2] = (uint)in[i+3];
16566 const uint*in = (const uint*)_in;
16567 uint *out = (uint*)_out;
16597 (out+j+0)[0] = (uint)in[i+2];
16598 (out+j+0)[1] = (uint)in[i+0];
16599 (out+j+0)[2] = (uint)in[i+3];
16600 (out+j+3)[0] = (uint)in[i+0];
16601 (out+j+3)[1] = (uint)in[i+1];
16602 (out+j+3)[2] = (uint)in[i+3];
16613 const uint*in = (const uint*)_in;
16614 uint *out = (uint*)_out;
16640 (out+j)[0] = (uint)in[i+1];
16641 (out+j)[1] = (uint)in[i+2];
16642 (out+j)[2] = (uint)in[start];
16653 const uint*in = (const uint*)_in;
16654 uint *out = (uint*)_out;
16658 (out+j)[0] = (uint)in[i+0];
16659 (out+j)[1] = (uint)in[i+1];
16660 (out+j)[2] = (uint)in[i+2];
16661 (out+j)[3] = (uint)in[i+3];
16672 const uint*in = (const uint*)_in;
16673 uint *out = (uint*)_out;
16677 (out+j)[0] = (uint)in[i+0];
16678 (out+j)[1] = (uint)in[i+1];
16679 (out+j)[2] = (uint)in[i+2];
16680 (out+j)[3] = (uint)in[i+3];
16691 const uint*in = (const uint*)_in;
16692 uint *out = (uint*)_out;
16696 (out+j)[0] = (uint)in[i+0];
16697 (out+j)[1] = (uint)in[i+1];
16698 (out+j)[2] = (uint)in[i+2];
16699 (out+j)[3] = (uint)in[i+3];
16700 (out+j)[4] = (uint)in[i+4];
16701 (out+j)[5] = (uint)in[i+5];
16712 const uint*in = (const uint*)_in;
16713 uint *out = (uint*)_out;
16719 (out+j)[0] = (uint)in[i+0];
16720 (out+j)[1] = (uint)in[i+1];
16721 (out+j)[2] = (uint)in[i+2];
16722 (out+j)[3] = (uint)in[i+3];
16723 (out+j)[4] = (uint)in[i+4];
16724 (out+j)[5] = (uint)in[i+5];
16727 (out+j)[0] = (uint)in[i+2];
16728 (out+j)[1] = (uint)in[i-2];
16729 (out+j)[2] = (uint)in[i+0];
16730 (out+j)[3] = (uint)in[i+3];
16731 (out+j)[4] = (uint)in[i+4];
16732 (out+j)[5] = (uint)in[i+6];