1/* GRF allocation:
2   g1~g30: constant buffer
3           g1~g2:intra IQ matrix
4           g3~g4:non intra IQ matrix
5           g5~g20:IDCT table
6   g31:    thread payload 
7   g32:    message descriptor for reading reference data
8   g58~g81:reference data
9   g82:    thread payload backup
10   g83~g106:IDCT data                           */
11mov (8) g82.0<1>UD g31.0<8,8,1>UD {align1};
12mov (1) g126.8<1>UD ip {align1};
13add (1) ip g21.0<1,1,1>UD 0x50UD {align1};   //jump to the lib to do IDCT  
14
15//Y, (x', y') = (x, y) + (motion_vector.x >> 1, motion_vector.y >> 1) 
16asr (2) g31.14<1>W g82.14<2,2,1>W 1W {align1};  
17add (2) g32.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
18mov (1) g32.16<1>UW 0UW {align1};           //0:forward 1:backward
19mov (1) a0.0<1>UD 0x0A4EUD {align1};        //g82.14,motion vector
20mov (1) g126.8<1>UD ip {align1};
21add (1) ip g21.0<1,1,1>UD 0x00UD {align1};   //jump to the lib to read reference data  
22
23//UV, (x', y') = (x >> 1, y >> 1) + (motion_vector.x >> 2, motion_vector.y >> 2)
24shr (2) g31.0<1>UD g31.0<2,2,1>UD 1UD {align1};
25asr (2) g31.14<1>W g82.14<2,2,1>W 2W {align1};
26add (2) g32.0<1>UD g31.0<2,2,1>UD g31.14<2,2,1>W {align1};
27mov (1) g126.8<1>UD ip {align1};
28add (1) ip g21.0<1,1,1>UD 0x10UD {align1};          //jump to the lib to read reference data  
29
30add (1) ip g21.0<1,1,1>UD 0x40UD {align1};          //jump to the lib to add the reference and idct data
31