1/* 2 GRF allocation: 3 g1~g30: constant buffer 4 g1~g2:intra IQ matrix 5 g3~g4:non intra IQ matrix 6 g5~g20:IDCT tab 7 g31: read and write message descriptor 8 g32~g55:DCT data 9 g58~g81:reference data 10 g82: thread payload 11 g83~g106:IDCT data 12*/ 13mov (8) g82.0<1>UD g31.0<8,8,1>UD {align1}; 14 15mov.sat (16) g33.0<2>UB g33.0<16,16,1>W {align1}; 16mov.sat (16) g34.0<2>UB g34.0<16,16,1>W {align1}; 17mov.sat (16) g35.0<2>UB g35.0<16,16,1>W {align1}; 18mov.sat (16) g36.0<2>UB g36.0<16,16,1>W {align1}; 19mov.sat (16) g37.0<2>UB g37.0<16,16,1>W {align1}; 20mov.sat (16) g38.0<2>UB g38.0<16,16,1>W {align1}; 21mov.sat (16) g39.0<2>UB g39.0<16,16,1>W {align1}; 22mov.sat (16) g40.0<2>UB g40.0<16,16,1>W {align1}; 23mov.sat (16) g41.0<2>UB g41.0<16,16,1>W {align1}; 24mov.sat (16) g42.0<2>UB g42.0<16,16,1>W {align1}; 25mov.sat (16) g43.0<2>UB g43.0<16,16,1>W {align1}; 26mov.sat (16) g44.0<2>UB g44.0<16,16,1>W {align1}; 27mov.sat (16) g45.0<2>UB g45.0<16,16,1>W {align1}; 28mov.sat (16) g46.0<2>UB g46.0<16,16,1>W {align1}; 29mov.sat (16) g47.0<2>UB g47.0<16,16,1>W {align1}; 30mov.sat (16) g48.0<2>UB g48.0<16,16,1>W {align1}; 31 32mov.sat (16) g49.0<2>UB g49.0<16,16,1>W {align1}; 33mov.sat (16) g50.0<2>UB g50.0<16,16,1>W {align1}; 34mov.sat (16) g51.0<2>UB g51.0<16,16,1>W {align1}; 35mov.sat (16) g52.0<2>UB g52.0<16,16,1>W {align1}; 36mov.sat (16) g53.0<2>UB g53.0<16,16,1>W {align1}; 37mov.sat (16) g54.0<2>UB g54.0<16,16,1>W {align1}; 38mov.sat (16) g55.0<2>UB g55.0<16,16,1>W {align1}; 39mov.sat (16) g56.0<2>UB g56.0<16,16,1>W {align1}; 40 41mov (1) g31.8<1>UD 0x00F000FUD {align1}; 42 43and.nz (1) null g82.30<1,1,1>UB 0x1UW{align1}; 44(f0) jmpi field_dct_y; 45 46mov (16) m1.0<1>UB g33.0<16,16,2>UB {align1}; 47mov (16) m1.16<1>UB g34.0<16,16,2>UB {align1}; 48mov (16) m2.0<1>UB g35.0<16,16,2>UB {align1}; 49mov (16) m2.16<1>UB g36.0<16,16,2>UB {align1}; 50mov (16) m3.0<1>UB g37.0<16,16,2>UB {align1}; 51mov (16) m3.16<1>UB g38.0<16,16,2>UB {align1}; 52mov (16) m4.0<1>UB g39.0<16,16,2>UB {align1}; 53mov (16) m4.16<1>UB g40.0<16,16,2>UB {align1}; 54mov (16) m5.0<1>UB g41.0<16,16,2>UB {align1}; 55mov (16) m5.16<1>UB g42.0<16,16,2>UB {align1}; 56mov (16) m6.0<1>UB g43.0<16,16,2>UB {align1}; 57mov (16) m6.16<1>UB g44.0<16,16,2>UB {align1}; 58mov (16) m7.0<1>UB g45.0<16,16,2>UB {align1}; 59mov (16) m7.16<1>UB g46.0<16,16,2>UB {align1}; 60mov (16) m8.0<1>UB g47.0<16,16,2>UB {align1}; 61mov (16) m8.16<1>UB g48.0<16,16,2>UB {align1}; 62jmpi write_back_y; 63 64field_dct_y: 65mov (16) m1.0<1>UB g33.0<16,16,2>UB {align1}; 66mov (16) m1.16<1>UB g41.0<16,16,2>UB {align1}; 67mov (16) m2.0<1>UB g34.0<16,16,2>UB {align1}; 68mov (16) m2.16<1>UB g42.0<16,16,2>UB {align1}; 69mov (16) m3.0<1>UB g35.0<16,16,2>UB {align1}; 70mov (16) m3.16<1>UB g43.0<16,16,2>UB {align1}; 71mov (16) m4.0<1>UB g36.0<16,16,2>UB {align1}; 72mov (16) m4.16<1>UB g44.0<16,16,2>UB {align1}; 73mov (16) m5.0<1>UB g37.0<16,16,2>UB {align1}; 74mov (16) m5.16<1>UB g45.0<16,16,2>UB {align1}; 75mov (16) m6.0<1>UB g38.0<16,16,2>UB {align1}; 76mov (16) m6.16<1>UB g46.0<16,16,2>UB {align1}; 77mov (16) m7.0<1>UB g39.0<16,16,2>UB {align1}; 78mov (16) m7.16<1>UB g47.0<16,16,2>UB {align1}; 79mov (16) m8.0<1>UB g40.0<16,16,2>UB {align1}; 80mov (16) m8.16<1>UB g48.0<16,16,2>UB {align1}; 81 82write_back_y: 83send (16) 0 acc0<1>UW g31<8,8,1>UW write(0,0,2,0) mlen 9 rlen 0 {align1}; 84 85//U 86mov (1) g31.8<1>UD 0x0070007UD { align1 }; 87shr (2) g31.0<1>UD g82.0<2,2,1>UD 1D {align1}; 88 89mov (16) m1.0<1>UB g49.0<16,16,2>UB {align1}; 90mov (16) m1.16<1>UB g50.0<16,16,2>UB {align1}; 91mov (16) m2.0<1>UB g51.0<16,16,2>UB {align1}; 92mov (16) m2.16<1>UB g52.0<16,16,2>UB {align1}; 93send (16) 0 acc0<1>UW g31<8,8,1>UW write(1, 0, 2, 0) mlen 3 rlen 0 { align1 }; 94 95//V 96mov (16) m1.0<1>UB g53.0<16,16,2>UB {align1}; 97mov (16) m1.16<1>UB g54.0<16,16,2>UB {align1}; 98mov (16) m2.0<1>UB g55.0<16,16,2>UB {align1}; 99mov (16) m2.16<1>UB g56.0<16,16,2>UB {align1}; 100send (16) 0 acc0<1>UW g31<8,8,1>UW write(2, 0, 2, 0) mlen 3 rlen 0 { align1 }; 101 102OUT: 103send (16) 0 acc0<1>UW g0<8,8,1>UW 104 thread_spawner(0, 0, 0) mlen 1 rlen 0 { align1 EOT}; 105 106