1428d7b3dSmrg/* GRF allocation:
2428d7b3dSmrg   g1~g30: constant buffer
3428d7b3dSmrg           g1~g2:intra IQ matrix
4428d7b3dSmrg           g3~g4:non intra IQ matrix
5428d7b3dSmrg           g5~g20:IDCT table
6428d7b3dSmrg   g31:    thread payload 
7428d7b3dSmrg   g32:    message descriptor for reading reference data
8428d7b3dSmrg   g58~g81:reference data
9428d7b3dSmrg   g82:    thread payload backup
10428d7b3dSmrg   g83~g106:IDCT data                           */
11428d7b3dSmrg/*
12428d7b3dSmrgmov (8) g82.0<1>UD g31.0<8,8,1>UD {align1};
13428d7b3dSmrgdefine(`UV_red',`0xffffffffUD')
14428d7b3dSmrgdefine(`UV_white',`0x7f7f7f7fUD')
15428d7b3dSmrgdefine(`UV_green',`0x00000000UD')
16428d7b3dSmrgmov(1) g31.8<1>UD 0x000f000fUD  { align1 };
17428d7b3dSmrgmov(16) m1<1>UD 0xFFFFFFFFUD {align1 compr};
18428d7b3dSmrgmov(16) m3<1>UD 0xFFFFFFFFUD {align1 compr};
19428d7b3dSmrgmov(16) m5<1>UD 0xFFFFFFFFUD {align1 compr};
20428d7b3dSmrgmov(16) m7<1>UD 0xFFFFFFFFUD {align1 compr};
21428d7b3dSmrgsend (16) 0 acc0<1>UW g31<8,8,1>UW write(0, 0, 2, 0) mlen 9 rlen 0 { align1 };
22428d7b3dSmrgshr (2) g31.0<1>UD g82.0<2,2,1>UD  1UW {align1};
23428d7b3dSmrgmov(1) g31.8<1>UD 0x00070007UD  { align1 };
24428d7b3dSmrgmov (16) m1<1>UD UV_green {align1 compr};
25428d7b3dSmrgsend (16) 0 acc0<1>UW g31<8,8,1>UW write(2, 0, 2, 0) mlen 3 rlen 0 { align1 };
26428d7b3dSmrgsend (16) 0 acc0<1>UW g31<8,8,1>UW write(1, 0, 2, 0) mlen 3 rlen 0 { align1 };
27428d7b3dSmrgsend (16) 0 acc0<1>UW g0<8,8,1>UW 
28428d7b3dSmrg	thread_spawner(0, 0, 0) mlen 1 rlen 0 { align1 EOT};
29428d7b3dSmrg*/
30428d7b3dSmrg
31428d7b3dSmrgmov (8) g82.0<1>UD g31.0<8,8,1>UD {align1};
32428d7b3dSmrgmov (1) g126.8<1>UD ip {align1};
33428d7b3dSmrgadd (1) ip g21.0<1,1,1>UD 0x50UD {align1};  //jump to the lib to do IDCT         
34428d7b3dSmrg 
35428d7b3dSmrg//Y, (x', y') = (x, y) + (motion_vector.x >> 1, motion_vector.y >> 1) 
36428d7b3dSmrgasr (2) g31.14<1>W g82.18<2,2,1>W 1W {align1};
37428d7b3dSmrgadd (2) g32.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
38428d7b3dSmrgmov (1) g32.16<1>UW 1UW {align1};           //0:forward 1:backward
39428d7b3dSmrgmov (1) a0.0<1>UD 0x0A52UD {align1};        //g82.18,motion vector
40428d7b3dSmrgmov (1) g126.8<1>UD ip {align1};
41428d7b3dSmrgadd (1) ip g21.0<1,1,1>UD 0x0UD {align1};   //jump to the lib to read reference data  
42428d7b3dSmrg
43428d7b3dSmrg//UV, (x', y') = (x >> 1, y >> 1) + (motion_vector.x >> 2, motion_vector.y >> 2)
44428d7b3dSmrgshr (2) g31.0<1>UD g31.0<2,2,1>UD 1UD {align1};
45428d7b3dSmrgasr (2) g31.14<1>W g82.18<2,2,1>W 2W {align1};
46428d7b3dSmrgadd (2) g32.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
47428d7b3dSmrgmov (1) g126.8<1>UD ip {align1};
48428d7b3dSmrgadd (1) ip g21.0<1,1,1>UD 0x10UD {align1};          //jump to the lib to read reference data  
49428d7b3dSmrg
50428d7b3dSmrgadd (1) ip g21.0<1,1,1>UD 0x40UD {align1};          //jump to the lib to add the reference and idct data
51