1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Author:
24 *    Zou Nan hai <nanhai.zou@intel.com>
25 *    Zhang Hua jun <huajun.zhang@intel.com>
26 *    Xing Dong sheng <dongsheng.xing@intel.com>
27 *
28 */
29and (1) g2.24<1>UD g2.0<1,1,1>UD 0x3UD {align1};
30send (16) 0 g86.0<1>UW g2<8,8,1>UW read(input_surface1, 2, 0, 2) mlen 1 rlen 4 {align1};
31send (16) 0 g94.0<1>UW g2<8,8,1>UW read(input_surface2, 2, 0, 2) mlen 1 rlen 4 {align1};
32mov (1) g2.8<1>UD 0x01001FUD {align1};
33add (1) g2.4<1>UD g2.4<1,1,1>UD 8D {align1};
34send (16) 0 g90.0<1>UW g2<8,8,1>UW read(input_surface1, 2, 0, 2) mlen 1 rlen 1 {align1};
35send (16) 0 g98.0<1>UW g2<8,8,1>UW read(input_surface2, 2, 0, 2) mlen 1 rlen 1 {align1};
36mov (1) g2.8<1>UD 0x007000fUD  {align1};
37mul(1) g2.24<1>UD g2.24<1,1,1>UD 25UD {align1};
38jmpi g2.24<1,1,1>D;
39
40add (16) g44.0<1>UW g86.0<16,8,1>UB g86.1<16,8,1>UB{align1};
41add (16) g45.0<1>UW g87.0<16,8,1>UB g87.1<16,8,1>UB{align1};
42add (16) g46.0<1>UW g88.0<16,8,1>UB g88.1<16,8,1>UB{align1};
43add (16) g47.0<1>UW g89.0<16,8,1>UB g89.1<16,8,1>UB{align1};
44add (16) g44.0<1>UW g44.0<16,16,1>UW g87.0<16,8,1>UB{align1};
45add (16) g45.0<1>UW g45.0<16,16,1>UW g88.0<16,8,1>UB{align1};
46add (16) g46.0<1>UW g46.0<16,16,1>UW g89.0<16,8,1>UB{align1};
47add (16) g47.0<1>UW g47.0<16,16,1>UW g90.0<16,8,1>UB{align1};
48
49add (16) g44.0<1>UW g44.0<16,16,1>UW g87.1<16,8,1>UB{align1};
50add (16) g45.0<1>UW g45.0<16,16,1>UW g88.1<16,8,1>UB{align1};
51add (16) g46.0<1>UW g46.0<16,16,1>UW g89.1<16,8,1>UB{align1};
52add (16) g47.0<1>UW g47.0<16,16,1>UW g90.1<16,8,1>UB{align1};
53add (16) g48.0<1>UW g94.0<16,8,1>UB g95.0<16,8,1>UB{align1};
54add (16) g49.0<1>UW g95.0<16,8,1>UB g96.0<16,8,1>UB{align1};
55add (16) g50.0<1>UW g96.0<16,8,1>UB g97.0<16,8,1>UB{align1};
56add (16) g51.0<1>UW g97.0<16,8,1>UB g98.0<16,8,1>UB{align1};
57
58add (16) g48.0<1>UW g48.0<16,16,1>UW g94.1<16,8,1>UB{align1};
59add (16) g49.0<1>UW g49.0<16,16,1>UW g95.1<16,8,1>UB{align1};
60add (16) g50.0<1>UW g50.0<16,16,1>UW g96.1<16,8,1>UB{align1};
61add (16) g51.0<1>UW g51.0<16,16,1>UW g97.1<16,8,1>UB{align1};
62add (16) g48.0<1>UW g48.0<16,16,1>UW g95.1<16,8,1>UB{align1};
63add (16) g49.0<1>UW g49.0<16,16,1>UW g96.1<16,8,1>UB{align1};
64add (16) g50.0<1>UW g50.0<16,16,1>UW g97.1<16,8,1>UB{align1};
65add (16) g51.0<1>UW g51.0<16,16,1>UW g98.1<16,8,1>UB{align1};
66jmpi out;
67
68add (16) g44.0<1>UW g86.1<16,8,1>UB g86.2<16,8,1>UB{align1};
69add (16) g45.0<1>UW g87.1<16,8,1>UB g87.2<16,8,1>UB{align1};
70add (16) g46.0<1>UW g88.1<16,8,1>UB g88.2<16,8,1>UB{align1};
71add (16) g47.0<1>UW g89.1<16,8,1>UB g89.2<16,8,1>UB{align1};
72add (16) g44.0<1>UW g44.0<16,16,1>UW g87.1<16,8,1>UB{align1};
73add (16) g45.0<1>UW g45.0<16,16,1>UW g88.1<16,8,1>UB{align1};
74add (16) g46.0<1>UW g46.0<16,16,1>UW g89.1<16,8,1>UB{align1};
75add (16) g47.0<1>UW g47.0<16,16,1>UW g90.1<16,8,1>UB{align1};
76
77add (16) g44.0<1>UW g44.0<16,16,1>UW g87.2<16,8,1>UB{align1};
78add (16) g45.0<1>UW g45.0<16,16,1>UW g88.2<16,8,1>UB{align1};
79add (16) g46.0<1>UW g46.0<16,16,1>UW g89.2<16,8,1>UB{align1};
80add (16) g47.0<1>UW g47.0<16,16,1>UW g90.2<16,8,1>UB{align1};
81add (16) g48.0<1>UW g94.1<16,8,1>UB g95.1<16,8,1>UB{align1};
82add (16) g49.0<1>UW g95.1<16,8,1>UB g96.1<16,8,1>UB{align1};
83add (16) g50.0<1>UW g96.1<16,8,1>UB g97.1<16,8,1>UB{align1};
84add (16) g51.0<1>UW g97.1<16,8,1>UB g98.1<16,8,1>UB{align1};
85
86add (16) g48.0<1>UW g48.0<16,16,1>UW g94.2<16,8,1>UB{align1};
87add (16) g49.0<1>UW g49.0<16,16,1>UW g95.2<16,8,1>UB{align1};
88add (16) g50.0<1>UW g50.0<16,16,1>UW g96.2<16,8,1>UB{align1};
89add (16) g51.0<1>UW g51.0<16,16,1>UW g97.2<16,8,1>UB{align1};
90add (16) g48.0<1>UW g48.0<16,16,1>UW g95.2<16,8,1>UB{align1};
91add (16) g49.0<1>UW g49.0<16,16,1>UW g96.2<16,8,1>UB{align1};
92add (16) g50.0<1>UW g50.0<16,16,1>UW g97.2<16,8,1>UB{align1};
93add (16) g51.0<1>UW g51.0<16,16,1>UW g98.2<16,8,1>UB{align1};
94jmpi out;
95
96add (16) g44.0<1>UW g86.2<16,8,1>UB g86.3<16,8,1>UB{align1};
97add (16) g45.0<1>UW g87.2<16,8,1>UB g87.3<16,8,1>UB{align1};
98add (16) g46.0<1>UW g88.2<16,8,1>UB g88.3<16,8,1>UB{align1};
99add (16) g47.0<1>UW g89.2<16,8,1>UB g89.3<16,8,1>UB{align1};
100add (16) g44.0<1>UW g44.0<16,16,1>UW g87.2<16,8,1>UB{align1};
101add (16) g45.0<1>UW g45.0<16,16,1>UW g88.2<16,8,1>UB{align1};
102add (16) g46.0<1>UW g46.0<16,16,1>UW g89.2<16,8,1>UB{align1};
103add (16) g47.0<1>UW g47.0<16,16,1>UW g90.2<16,8,1>UB{align1};
104
105add (16) g44.0<1>UW g44.0<16,16,1>UW g87.3<16,8,1>UB{align1};
106add (16) g45.0<1>UW g45.0<16,16,1>UW g88.3<16,8,1>UB{align1};
107add (16) g46.0<1>UW g46.0<16,16,1>UW g89.3<16,8,1>UB{align1};
108add (16) g47.0<1>UW g47.0<16,16,1>UW g90.3<16,8,1>UB{align1};
109add (16) g48.0<1>UW g94.2<16,8,1>UB g95.2<16,8,1>UB{align1};
110add (16) g49.0<1>UW g95.2<16,8,1>UB g96.2<16,8,1>UB{align1};
111add (16) g50.0<1>UW g96.2<16,8,1>UB g97.2<16,8,1>UB{align1};
112add (16) g51.0<1>UW g97.2<16,8,1>UB g98.2<16,8,1>UB{align1};
113
114add (16) g48.0<1>UW g48.0<16,16,1>UW g94.3<16,8,1>UB{align1};
115add (16) g49.0<1>UW g49.0<16,16,1>UW g95.3<16,8,1>UB{align1};
116add (16) g50.0<1>UW g50.0<16,16,1>UW g96.3<16,8,1>UB{align1};
117add (16) g51.0<1>UW g51.0<16,16,1>UW g97.3<16,8,1>UB{align1};
118add (16) g48.0<1>UW g48.0<16,16,1>UW g95.3<16,8,1>UB{align1};
119add (16) g49.0<1>UW g49.0<16,16,1>UW g96.3<16,8,1>UB{align1};
120add (16) g50.0<1>UW g50.0<16,16,1>UW g97.3<16,8,1>UB{align1};
121add (16) g51.0<1>UW g51.0<16,16,1>UW g98.3<16,8,1>UB{align1};
122jmpi out;
123
124add (16) g44.0<1>UW g86.3<16,8,1>UB g86.4<16,8,1>UB{align1};
125add (16) g45.0<1>UW g87.3<16,8,1>UB g87.4<16,8,1>UB{align1};
126add (16) g46.0<1>UW g88.3<16,8,1>UB g88.4<16,8,1>UB{align1};
127add (16) g47.0<1>UW g89.3<16,8,1>UB g89.4<16,8,1>UB{align1};
128add (16) g44.0<1>UW g44.0<16,16,1>UW g87.3<16,8,1>UB{align1};
129add (16) g45.0<1>UW g45.0<16,16,1>UW g88.3<16,8,1>UB{align1};
130add (16) g46.0<1>UW g46.0<16,16,1>UW g89.3<16,8,1>UB{align1};
131add (16) g47.0<1>UW g47.0<16,16,1>UW g90.3<16,8,1>UB{align1};
132
133add (16) g44.0<1>UW g44.0<16,16,1>UW g87.4<16,8,1>UB{align1};
134add (16) g45.0<1>UW g45.0<16,16,1>UW g88.4<16,8,1>UB{align1};
135add (16) g46.0<1>UW g46.0<16,16,1>UW g89.4<16,8,1>UB{align1};
136add (16) g47.0<1>UW g47.0<16,16,1>UW g90.4<16,8,1>UB{align1};
137add (16) g48.0<1>UW g94.3<16,8,1>UB g95.3<16,8,1>UB{align1};
138add (16) g49.0<1>UW g95.3<16,8,1>UB g96.3<16,8,1>UB{align1};
139add (16) g50.0<1>UW g96.3<16,8,1>UB g97.3<16,8,1>UB{align1};
140add (16) g51.0<1>UW g97.3<16,8,1>UB g98.3<16,8,1>UB{align1};
141
142add (16) g48.0<1>UW g48.0<16,16,1>UW g94.4<16,8,1>UB{align1};
143add (16) g49.0<1>UW g49.0<16,16,1>UW g95.4<16,8,1>UB{align1};
144add (16) g50.0<1>UW g50.0<16,16,1>UW g96.4<16,8,1>UB{align1};
145add (16) g51.0<1>UW g51.0<16,16,1>UW g97.4<16,8,1>UB{align1};
146add (16) g48.0<1>UW g48.0<16,16,1>UW g95.4<16,8,1>UB{align1};
147add (16) g49.0<1>UW g49.0<16,16,1>UW g96.4<16,8,1>UB{align1};
148add (16) g50.0<1>UW g50.0<16,16,1>UW g97.4<16,8,1>UB{align1};
149add (16) g51.0<1>UW g51.0<16,16,1>UW g98.4<16,8,1>UB{align1};
150out:
151
152shr.sat (16) g44.0<1>UW g44.0<16,16,1>UW 2UW {align1};
153shr.sat (16) g45.0<1>UW g45.0<16,16,1>UW 2UW {align1};
154shr.sat (16) g46.0<1>UW g46.0<16,16,1>UW 2UW {align1};
155shr.sat (16) g47.0<1>UW g47.0<16,16,1>UW 2UW {align1};
156shr.sat (16) g48.0<1>UW g48.0<16,16,1>UW 2UW {align1};
157shr.sat (16) g49.0<1>UW g49.0<16,16,1>UW 2UW {align1};
158shr.sat (16) g50.0<1>UW g50.0<16,16,1>UW 2UW {align1};
159shr.sat (16) g51.0<1>UW g51.0<16,16,1>UW 2UW {align1};
160