1/* GRF allocation: 2 g1~g30: constant buffer 3 g1~g2:intra IQ matrix 4 g3~g4:non intra IQ matrix 5 g5~g20:IDCT table 6 g31: thread payload 7 g32: message descriptor for reading reference data 8 g58~g81:reference data 9 g82: thread payload backup 10 g83~g106:IDCT data */ 11/* 12mov (8) g82.0<1>UD g31.0<8,8,1>UD {align1}; 13define(`UV_red',`0xffffffffUD') 14define(`UV_white',`0x7f7f7f7fUD') 15define(`UV_green',`0x00000000UD') 16mov(1) g31.8<1>UD 0x000f000fUD { align1 }; 17mov(16) m1<1>UD 0xFFFFFFFFUD {align1 compr}; 18mov(16) m3<1>UD 0xFFFFFFFFUD {align1 compr}; 19mov(16) m5<1>UD 0xFFFFFFFFUD {align1 compr}; 20mov(16) m7<1>UD 0xFFFFFFFFUD {align1 compr}; 21send (16) 0 acc0<1>UW g31<8,8,1>UW write(0, 0, 2, 0) mlen 9 rlen 0 { align1 }; 22shr (2) g31.0<1>UD g82.0<2,2,1>UD 1UW {align1}; 23mov(1) g31.8<1>UD 0x00070007UD { align1 }; 24mov (16) m1<1>UD UV_green {align1 compr}; 25send (16) 0 acc0<1>UW g31<8,8,1>UW write(2, 0, 2, 0) mlen 3 rlen 0 { align1 }; 26send (16) 0 acc0<1>UW g31<8,8,1>UW write(1, 0, 2, 0) mlen 3 rlen 0 { align1 }; 27send (16) 0 acc0<1>UW g0<8,8,1>UW 28 thread_spawner(0, 0, 0) mlen 1 rlen 0 { align1 EOT}; 29*/ 30 31mov (8) g82.0<1>UD g31.0<8,8,1>UD {align1}; 32mov (1) g126.8<1>UD ip {align1}; 33add (1) ip g21.0<1,1,1>UD 0x50UD {align1}; //jump to the lib to do IDCT 34 35//Y, Forward 36asr (2) g31.14<1>W g82.14<2,2,1>W 1W {align1}; 37add (2) g32.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1}; 38mov (1) g32.16<1>UW 0UW {align1}; //0:forward 1:backward 39mov (1) a0.0<1>UD 0x0A4EUD {align1}; //g82.14,motion vector 40mov (1) g126.8<1>UD ip {align1}; 41add (1) ip g21.0<1,1,1>UD 0x0UD {align1}; //jump to the lib to read reference data 42 43//Save Forward 44mov (16) g108.0<1>UD g58.0<16,16,1>UD {align1 compr}; 45mov (16) g110.0<1>UD g60.0<16,16,1>UD {align1 compr}; 46mov (16) g112.0<1>UD g62.0<16,16,1>UD {align1 compr}; 47mov (16) g114.0<1>UD g64.0<16,16,1>UD {align1 compr}; 48mov (16) g116.0<1>UD g66.0<16,16,1>UD {align1 compr}; 49mov (16) g118.0<1>UD g68.0<16,16,1>UD {align1 compr}; 50mov (16) g120.0<1>UD g70.0<16,16,1>UD {align1 compr}; 51mov (16) g122.0<1>UD g72.0<16,16,1>UD {align1 compr}; 52 53//Y, Backward 54asr (2) g31.14<1>W g82.18<2,2,1>W 1W {align1}; 55add (2) g32.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1}; 56mov (1) g32.16<1>UW 1UW {align1}; //0:forward 1:backward 57mov (1) a0.0<1>UD 0x0A52UD {align1}; //g82.18,motion vector 58mov (1) g126.8<1>UD ip {align1}; 59add (1) ip g21.0<1,1,1>UD 0x0UD {align1}; //jump to the lib to read reference data 60//Average Forward and Backward 61avg (32) g58.0<1>UW g58.0<16,16,1>UW g108.0<16,16,1>UW {align1 compr}; 62avg (32) g60.0<1>UW g60.0<16,16,1>UW g110.0<16,16,1>UW {align1 compr}; 63avg (32) g62.0<1>UW g62.0<16,16,1>UW g112.0<16,16,1>UW {align1 compr}; 64avg (32) g64.0<1>UW g64.0<16,16,1>UW g114.0<16,16,1>UW {align1 compr}; 65avg (32) g66.0<1>UW g66.0<16,16,1>UW g116.0<16,16,1>UW {align1 compr}; 66avg (32) g68.0<1>UW g68.0<16,16,1>UW g118.0<16,16,1>UW {align1 compr}; 67avg (32) g70.0<1>UW g70.0<16,16,1>UW g120.0<16,16,1>UW {align1 compr}; 68avg (32) g72.0<1>UW g72.0<16,16,1>UW g122.0<16,16,1>UW {align1 compr}; 69 70//UV, Forward 71shr (2) g31.0<1>UD g31.0<2,2,1>UD 1UD {align1}; 72asr (2) g31.14<1>W g82.14<2,2,1>W 2W {align1}; 73add (2) g32.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1}; 74mov (1) g32.16<1>UW 0UW {align1}; //0:forward 1:backward 75mov (1) g126.8<1>UD ip {align1}; 76add (1) ip g21.0<1,1,1>UD 0x10UD {align1}; //jump to the lib to read reference data 77 78//Save UV Forward 79mov (32) g108.0<1>UW g74.0<16,16,1>UW {align1 compr}; 80mov (32) g110.0<1>UW g76.0<16,16,1>UW {align1 compr}; 81mov (32) g112.0<1>UW g78.0<16,16,1>UW {align1 compr}; 82mov (32) g114.0<1>UW g80.0<16,16,1>UW {align1 compr}; 83//UV, Backward 84asr (2) g31.14<1>W g82.18<2,2,1>W 2W {align1}; 85add (2) g32.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1}; 86mov (1) g32.16<1>UW 1UW {align1}; //0:forward 1:backward 87mov (1) g126.8<1>UD ip {align1}; 88add (1) ip g21.0<1,1,1>UD 0x10UD {align1}; //jump to the lib to read reference data 89 90//Average Forward and Backward 91avg (32) g74.0<1>UW g74.0<16,16,1>UW g108.0<16,16,1>UW {align1 compr}; 92avg (32) g76.0<1>UW g76.0<16,16,1>UW g110.0<16,16,1>UW {align1 compr}; 93avg (32) g78.0<1>UW g78.0<16,16,1>UW g112.0<16,16,1>UW {align1 compr}; 94avg (32) g80.0<1>UW g80.0<16,16,1>UW g114.0<16,16,1>UW {align1 compr}; 95 96add (1) ip g21.0<1,1,1>UD 0x40UD {align1}; //jump to the lib to add the reference and idct data 97