1/* GRF allocation:
2   g1~g30: constant buffer
3           g1~g2:intra IQ matrix
4           g3~g4:non intra IQ matrix
5           g5~g20:IDCT table
6   g31:    thread payload 
7   g58~g81:reference data
8   g82:    thread payload backup
9   g84~g107:IDCT data
10   g115:   message descriptor for reading reference data   */
11mov (8) g82.0<1>UD g31.0<8,8,1>UD {align1};
12mov (1) g126.8<1>UD ip {align1};
13add (1) ip g21.0<1,1,1>UD 0x50UD {align1};          //jump to the lib to do IQ and IDCT 
14
15/*field 0 forward prediction of Y*/
16asr (2) g31.14<1>W g82.14<2,2,1>W 1W {align1};
17add (2) g115.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
18and (1) g115.4<1>UD g115.4<1,1,1>UD 0xFFFFFFFEUD {align1};
19and.nz (1) null g82.31<1,1,1>UB 0x1UW {align1};
20(f0) add (1) g115.4<1>UD g115.4<1,1,1>UD 1UD {align1};
21mov (1) g115.16<1>UW 0UW {align1};           //0:forward 1:backward
22mov (1) a0.0<1>UD 0x0A4EUD {align1};         //g82.14,motion vector
23mov (1) g126.8<1>UD ip {align1};
24add (1) ip g21.0<1,1,1>UD 0x20UD {align1};   //jump to the lib to read reference data 
25
26mov (8) g58.0<1>UD g32.0<8,8,1>UD {align1};
27mov (8) g60.0<1>UD g33.0<8,8,1>UD {align1};
28mov (8) g62.0<1>UD g34.0<8,8,1>UD {align1};
29mov (8) g64.0<1>UD g35.0<8,8,1>UD {align1};
30mov (8) g66.0<1>UD g36.0<8,8,1>UD {align1};
31mov (8) g68.0<1>UD g37.0<8,8,1>UD {align1};
32mov (8) g70.0<1>UD g38.0<8,8,1>UD {align1};
33mov (8) g72.0<1>UD g39.0<8,8,1>UD {align1};
34
35/*field 1 forward prediction of Y*/
36asr (2) g31.14<1>W g82.22<2,2,1>W 1W {align1};
37add (2) g115.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
38and (1) g115.4<1>UD g115.4<1,1,1>UD 0xFFFFFFFEUD {align1};
39and.nz (1) null g82.31<1,1,1>UB 0x4UW {align1};
40(f0) add (1) g115.4<1>UD g115.4<1,1,1>UD 1UD {align1};
41mov (1) a0.0<1>UD 0x0A56UD {align1};         //g82.22,motion vector
42mov (1) g126.8<1>UD ip {align1};
43add (1) ip g21.0<1,1,1>UD 0x20UD {align1};   //jump to the lib to read reference data 
44
45mov (8) g59.0<1>UD g32.0<8,8,1>UD {align1};
46mov (8) g61.0<1>UD g33.0<8,8,1>UD {align1};
47mov (8) g63.0<1>UD g34.0<8,8,1>UD {align1};
48mov (8) g65.0<1>UD g35.0<8,8,1>UD {align1};
49mov (8) g67.0<1>UD g36.0<8,8,1>UD {align1};
50mov (8) g69.0<1>UD g37.0<8,8,1>UD {align1};
51mov (8) g71.0<1>UD g38.0<8,8,1>UD {align1};
52mov (8) g73.0<1>UD g39.0<8,8,1>UD {align1};
53
54/*field 0 forward prediction of UV*/
55shr (2) g31.0<1>UD g31.0<2,2,1>UD 1UD {align1};
56
57asr (2) g31.14<1>W g82.14<2,2,1>W 2W {align1};
58add (2) g115.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
59and (1) g115.4<1>UD g115.4<1,1,1>UD 0xFFFFFFFEUD {align1};
60and.nz (1) null g82.31<1,1,1>UB 0x1UW {align1};
61(f0) add (1) g115.4<1>UD g115.4<1,1,1>UD 1UD {align1};
62mov (1) a0.0<1>UD 0x0A4EUD {align1};         //g82.14,motion vector
63mov (1) g126.8<1>UD ip {align1};
64add (1) ip g21.0<1,1,1>UD 0x30UD {align1};   //jump to the lib to read reference data 
65
66mov (16) g74.0<1>UW g32.0<8,8,1>UW {align1 compr};
67mov (16) g76.0<1>UW g34.0<8,8,1>UW {align1 compr};
68mov (16) g78.0<1>UW g36.0<8,8,1>UW {align1 compr};
69mov (16) g80.0<1>UW g38.0<8,8,1>UW {align1 compr};
70
71/*field 1 forward prediction of UV*/
72asr (2) g31.14<1>W g82.22<2,2,1>W 2W {align1};
73add (2) g115.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
74and (1) g115.4<1>UD g115.4<1,1,1>UD 0xFFFFFFFEUD {align1};
75and.nz (1) null g82.31<1,1,1>UB 0x4UW {align1};
76(f0) add (1) g115.4<1>UD g115.4<1,1,1>UD 1UD {align1};
77mov (1) a0.0<1>UD 0x0A56UD {align1};         //g82.22,motion vector
78mov (1) g126.8<1>UD ip {align1};
79add (1) ip g21.0<1,1,1>UD 0x30UD {align1};   //jump to the lib to read reference data 
80
81mov (16) g74.16<1>UW g32.0<8,8,1>UW {align1 compr};
82mov (16) g76.16<1>UW g34.0<8,8,1>UW {align1 compr};
83mov (16) g78.16<1>UW g36.0<8,8,1>UW {align1 compr};
84mov (16) g80.16<1>UW g38.0<8,8,1>UW {align1 compr};
85
86/*field 0 backward prediction of Y*/
87mov(2) g31.0<1>UD g82.0<2,2,1>UD {align1};
88
89asr (2) g31.14<1>W g82.18<2,2,1>W 1W {align1};
90add (2) g115.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
91and (1) g115.4<1>UD g115.4<1,1,1>UD 0xFFFFFFFEUD {align1};
92and.nz (1) null g82.31<1,1,1>UB 0x2UW {align1};
93(f0) add (1) g115.4<1>UD g115.4<1,1,1>UD 1UD {align1};
94mov (1) g115.16<1>UW 1UW {align1};           //0:forward 1:backward
95mov (1) a0.0<1>UD 0x0A52UD {align1};         //g82.18,motion vector
96mov (1) g126.8<1>UD ip {align1};
97add (1) ip g21.0<1,1,1>UD 0x20UD {align1};   //jump to the lib to read reference data 
98
99avg (16) g58.0<1>UW g58.0<16,16,1>UW g32.0<16,16,1>UW {align1};
100avg (16) g60.0<1>UW g60.0<16,16,1>UW g33.0<16,16,1>UW {align1};
101avg (16) g62.0<1>UW g62.0<16,16,1>UW g34.0<16,16,1>UW {align1};
102avg (16) g64.0<1>UW g64.0<16,16,1>UW g35.0<16,16,1>UW {align1};
103avg (16) g66.0<1>UW g66.0<16,16,1>UW g36.0<16,16,1>UW {align1};
104avg (16) g68.0<1>UW g68.0<16,16,1>UW g37.0<16,16,1>UW {align1};
105avg (16) g70.0<1>UW g70.0<16,16,1>UW g38.0<16,16,1>UW {align1};
106avg (16) g72.0<1>UW g72.0<16,16,1>UW g39.0<16,16,1>UW {align1};
107
108/*field 1 backward prediction of Y*/
109asr (2) g31.14<1>W g82.26<2,2,1>W 1W {align1};
110add (2) g115.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
111and (1) g115.4<1>UD g115.4<1,1,1>UD 0xFFFFFFFEUD {align1};
112and.nz (1) null g82.31<1,1,1>UB 0x8UW {align1};
113(f0) add (1) g115.4<1>UD g115.4<1,1,1>UD 1UD {align1};
114mov (1) a0.0<1>UD 0x0A5AUD {align1};         //g82.14,motion vector
115mov (1) g126.8<1>UD ip {align1};
116add (1) ip g21.0<1,1,1>UD 0x20UD {align1};   //jump to the lib to read reference data 
117
118avg (16) g59.0<1>UW g59.0<16,16,1>UW g32.0<16,16,1>UW {align1};
119avg (16) g61.0<1>UW g61.0<16,16,1>UW g33.0<16,16,1>UW {align1};
120avg (16) g63.0<1>UW g63.0<16,16,1>UW g34.0<16,16,1>UW {align1};
121avg (16) g65.0<1>UW g65.0<16,16,1>UW g35.0<16,16,1>UW {align1};
122avg (16) g67.0<1>UW g67.0<16,16,1>UW g36.0<16,16,1>UW {align1};
123avg (16) g69.0<1>UW g69.0<16,16,1>UW g37.0<16,16,1>UW {align1};
124avg (16) g71.0<1>UW g71.0<16,16,1>UW g38.0<16,16,1>UW {align1};
125avg (16) g73.0<1>UW g73.0<16,16,1>UW g39.0<16,16,1>UW {align1};
126
127/*field 0 backward prediction of UV*/
128shr (2) g31.0<1>UD g31.0<2,2,1>UD 1UD {align1};
129
130asr (2) g31.14<1>W g82.18<2,2,1>W 2W {align1};
131add (2) g115.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
132and (1) g115.4<1>UD g115.4<1,1,1>UD 0xFFFFFFFEUD {align1};
133and.nz (1) null g82.31<1,1,1>UB 0x2UW {align1};
134(f0) add (1) g115.4<1>UD g115.4<1,1,1>UD 1UD {align1};
135mov (1) a0.0<1>UD 0x0A52UD {align1};         //g82.18,motion vector
136mov (1) g126.8<1>UD ip {align1};
137add (1) ip g21.0<1,1,1>UD 0x30UD {align1};   //jump to the lib to read reference data 
138
139avg (16) g74.0<1>UW g74.0<8,8,1>UW g32.0<8,8,1>UW {align1 compr};
140avg (16) g76.0<1>UW g76.0<8,8,1>UW g34.0<8,8,1>UW {align1 compr};
141avg (16) g78.0<1>UW g78.0<8,8,1>UW g36.0<8,8,1>UW {align1 compr};
142avg (16) g80.0<1>UW g80.0<8,8,1>UW g38.0<8,8,1>UW {align1 compr};
143
144/*field 1 backward prediction of UV*/
145asr (2) g31.14<1>W g82.26<2,2,1>W 2W {align1};
146add (2) g115.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
147and (1) g115.4<1>UD g115.4<1,1,1>UD 0xFFFFFFFEUD {align1};
148and.nz (1) null g82.31<1,1,1>UB 0x8UW {align1};
149(f0) add (1) g115.4<1>UD g115.4<1,1,1>UD 1UD {align1};
150mov (1) a0.0<1>UD 0x0A5AUD {align1};         //g82.26,motion vector
151mov (1) g126.8<1>UD ip {align1};
152add (1) ip g21.0<1,1,1>UD 0x30UD {align1};   //jump to the lib to read reference data 
153
154avg (16) g74.16<1>UW g74.16<8,8,1>UW g32.0<8,8,1>UW {align1 compr};
155avg (16) g76.16<1>UW g76.16<8,8,1>UW g34.0<8,8,1>UW {align1 compr};
156avg (16) g78.16<1>UW g78.16<8,8,1>UW g36.0<8,8,1>UW {align1 compr};
157avg (16) g80.16<1>UW g80.16<8,8,1>UW g38.0<8,8,1>UW {align1 compr};
158
159add (1) ip g21.0<1,1,1>UD 0x40UD {align1};          //jump to the lib to add the reference and idct data
160