spu_ovl.S revision 1.1.1.3 1 1.1 skrll /* Overlay manager for SPU.
2 1.1 skrll
3 1.1.1.3 christos Copyright (C) 2006-2016 Free Software Foundation, Inc.
4 1.1 skrll
5 1.1 skrll This file is part of the GNU Binutils.
6 1.1 skrll
7 1.1 skrll This program is free software; you can redistribute it and/or modify
8 1.1 skrll it under the terms of the GNU General Public License as published by
9 1.1 skrll the Free Software Foundation; either version 3 of the License, or
10 1.1 skrll (at your option) any later version.
11 1.1 skrll
12 1.1 skrll This program is distributed in the hope that it will be useful,
13 1.1 skrll but WITHOUT ANY WARRANTY; without even the implied warranty of
14 1.1 skrll MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 1.1 skrll GNU General Public License for more details.
16 1.1 skrll
17 1.1 skrll You should have received a copy of the GNU General Public License
18 1.1 skrll along with this program; if not, write to the Free Software
19 1.1 skrll Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
20 1.1 skrll MA 02110-1301, USA. */
21 1.1 skrll
22 1.1 skrll /* MFC DMA defn's. */
23 1.1 skrll #define MFC_GET_CMD 0x40
24 1.1 skrll #define MFC_MAX_DMA_SIZE 0x4000
25 1.1 skrll #define MFC_TAG_UPDATE_ALL 2
26 1.1 skrll #define MFC_TAG_ID 0
27 1.1 skrll
28 1.1 skrll /* Register usage. */
29 1.1 skrll #define reserved1 $75
30 1.1 skrll #define parm $75
31 1.1 skrll #define tab1 reserved1
32 1.1 skrll #define tab2 reserved1
33 1.1 skrll #define vma reserved1
34 1.1 skrll #define oldvma reserved1
35 1.1 skrll #define newmask reserved1
36 1.1 skrll #define map reserved1
37 1.1 skrll
38 1.1 skrll #define reserved2 $76
39 1.1 skrll #define off1 reserved2
40 1.1 skrll #define off2 reserved2
41 1.1 skrll #define present1 reserved2
42 1.1 skrll #define present2 reserved2
43 1.1 skrll #define sz reserved2
44 1.1 skrll #define cmp reserved2
45 1.1 skrll #define add64 reserved2
46 1.1 skrll #define cgbits reserved2
47 1.1 skrll #define off3 reserved2
48 1.1 skrll #define off4 reserved2
49 1.1 skrll #define addr4 reserved2
50 1.1 skrll #define off5 reserved2
51 1.1 skrll #define tagstat reserved2
52 1.1 skrll
53 1.1 skrll #define reserved3 $77
54 1.1 skrll #define size1 reserved3
55 1.1 skrll #define size2 reserved3
56 1.1 skrll #define rv3 reserved3
57 1.1 skrll #define ealo reserved3
58 1.1 skrll #define cmd reserved3
59 1.1 skrll #define off64 reserved3
60 1.1 skrll #define tab3 reserved3
61 1.1 skrll #define tab4 reserved3
62 1.1 skrll #define tab5 reserved3
63 1.1 skrll
64 1.1 skrll #define reserved4 $78
65 1.1 skrll #define ovl reserved4
66 1.1 skrll #define rv2 reserved4
67 1.1 skrll #define rv5 reserved4
68 1.1 skrll #define cgshuf reserved4
69 1.1 skrll #define newovl reserved4
70 1.1 skrll #define irqtmp1 reserved4
71 1.1 skrll #define irqtmp2 reserved4
72 1.1 skrll
73 1.1 skrll #define reserved5 $79
74 1.1 skrll #define target reserved5
75 1.1 skrll
76 1.1 skrll #define save1 $74
77 1.1 skrll #define rv4 save1
78 1.1 skrll #define rv7 save1
79 1.1 skrll #define tagid save1
80 1.1 skrll #define maxsize save1
81 1.1 skrll #define pbyte save1
82 1.1 skrll #define pbit save1
83 1.1 skrll
84 1.1 skrll #define save2 $73
85 1.1 skrll #define cur save2
86 1.1 skrll #define rv6 save2
87 1.1 skrll #define osize save2
88 1.1 skrll #define zovl save2
89 1.1 skrll #define oldovl save2
90 1.1 skrll #define newvma save2
91 1.1 skrll
92 1.1 skrll #define save3 $72
93 1.1 skrll #define rv1 save3
94 1.1 skrll #define ea64 save3
95 1.1 skrll #define buf3 save3
96 1.1 skrll #define genwi save3
97 1.1 skrll #define newmap save3
98 1.1 skrll #define oldmask save3
99 1.1 skrll
100 1.1 skrll #define save4 $71
101 1.1 skrll #define irq_stat save4
102 1.1 skrll
103 1.1 skrll .text
104 1.1 skrll .align 4
105 1.1 skrll .type __rv_pattern, @object
106 1.1 skrll .size __rv_pattern, 16
107 1.1 skrll __rv_pattern:
108 1.1 skrll .word 0x00010203, 0x10111213, 0x80808080, 0x80808080
109 1.1 skrll
110 1.1 skrll .type __cg_pattern, @object
111 1.1 skrll .size __cg_pattern, 16
112 1.1 skrll __cg_pattern:
113 1.1 skrll .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
114 1.1 skrll
115 1.1 skrll .type __ovly_current, @object
116 1.1 skrll .size __ovly_current, 16
117 1.1 skrll __ovly_current:
118 1.1 skrll .space 16
119 1.1 skrll
120 1.1 skrll /*
121 1.1 skrll * __ovly_return - stub for returning from overlay functions.
122 1.1 skrll *
123 1.1 skrll * On entry the four slots of $lr are:
124 1.1 skrll * __ovly_return, prev ovl index, caller return addr, undefined.
125 1.1 skrll *
126 1.1 skrll * Load the previous overlay and jump to the caller return address.
127 1.1 skrll * Updates __ovly_current.
128 1.1 skrll */
129 1.1 skrll .align 4
130 1.1 skrll .global __ovly_return
131 1.1 skrll .type __ovly_return, @function
132 1.1 skrll __ovly_return:
133 1.1 skrll ila tab1, _ovly_table - 16 # 0,2 0
134 1.1 skrll shlqbyi ovl, $lr, 4 # 1,4 0
135 1.1 skrll #nop
136 1.1 skrll shlqbyi target, $lr, 8 # 1,4 1
137 1.1 skrll #nop; lnop
138 1.1 skrll #nop; lnop
139 1.1 skrll shli off1, ovl, 4 # 0,4 4
140 1.1 skrll #lnop
141 1.1 skrll #nop
142 1.1 skrll hbr ovly_ret9, target # 1,15 5
143 1.1 skrll #nop; lnop
144 1.1 skrll #nop; lnop
145 1.1 skrll #nop
146 1.1 skrll lqx vma, tab1, off1 # 1,6 8
147 1.1 skrll #ifdef OVLY_IRQ_SAVE
148 1.1 skrll nop
149 1.1 skrll stqd save4, -64($sp) # 1,6 9
150 1.1 skrll #else
151 1.1 skrll #nop; lnop
152 1.1 skrll #endif
153 1.1 skrll #nop; lnop
154 1.1 skrll #nop; lnop
155 1.1 skrll #nop; lnop
156 1.1 skrll #nop; lnop
157 1.1 skrll #nop
158 1.1 skrll rotqbyi size1, vma, 4 # 1,4 14
159 1.1 skrll #nop
160 1.1 skrll stqd save3, -48($sp) # 1,6 15
161 1.1 skrll #nop
162 1.1 skrll stqd save2, -32($sp) # 1,6 16
163 1.1 skrll #nop
164 1.1 skrll stqd save1, -16($sp) # 1,6 17
165 1.1 skrll andi present1, size1, 1 # 0,2 18
166 1.1 skrll stqr ovl, __ovly_current # 1,6 18
167 1.1 skrll #nop; lnop
168 1.1 skrll #nop
169 1.1 skrll brz present1, do_load # 1,4 20
170 1.1 skrll ovly_ret9:
171 1.1 skrll #nop
172 1.1 skrll bi target # 1,4 21
173 1.1 skrll
174 1.1 skrll /*
175 1.1 skrll * __ovly_load - copy an overlay partion to local store.
176 1.1 skrll *
177 1.1 skrll * On entry $75 points to a word consisting of the overlay index in
178 1.1 skrll * the top 14 bits, and the target address in the bottom 18 bits.
179 1.1 skrll *
180 1.1 skrll * Sets up $lr to return via __ovly_return. If $lr is already set
181 1.1 skrll * to return via __ovly_return, don't change it. In that case we
182 1.1 skrll * have a tail call from one overlay function to another.
183 1.1 skrll * Updates __ovly_current.
184 1.1 skrll */
185 1.1 skrll .align 3
186 1.1 skrll .global __ovly_load
187 1.1 skrll .type __ovly_load, @function
188 1.1 skrll __ovly_load:
189 1.1 skrll #if OVL_STUB_SIZE == 8
190 1.1 skrll ########
191 1.1 skrll #nop
192 1.1 skrll lqd target, 0(parm) # 1,6 -11
193 1.1 skrll #nop; lnop
194 1.1 skrll #nop; lnop
195 1.1 skrll #nop; lnop
196 1.1 skrll #nop; lnop
197 1.1 skrll #nop; lnop
198 1.1 skrll #nop
199 1.1 skrll rotqby target, target, parm # 1,4 -5
200 1.1 skrll ila tab2, _ovly_table - 16 # 0,2 -4
201 1.1 skrll stqd save3, -48($sp) # 1,6 -4
202 1.1 skrll #nop
203 1.1 skrll stqd save2, -32($sp) # 1,6 -3
204 1.1 skrll #nop
205 1.1 skrll stqd save1, -16($sp) # 1,6 -2
206 1.1 skrll rotmi ovl, target, -18 # 0,4 -1
207 1.1 skrll hbr ovly_load9, target # 1,15 -1
208 1.1 skrll ila rv1, __ovly_return # 0,2 0
209 1.1 skrll #lnop
210 1.1 skrll #nop; lnop
211 1.1 skrll #nop
212 1.1 skrll lqr cur, __ovly_current # 1,6 2
213 1.1 skrll shli off2, ovl, 4 # 0,4 3
214 1.1 skrll stqr ovl, __ovly_current # 1,6 3
215 1.1 skrll ceq rv2, $lr, rv1 # 0,2 4
216 1.1 skrll lqr rv3, __rv_pattern # 1,6 4
217 1.1 skrll #nop; lnop
218 1.1 skrll #nop; lnop
219 1.1 skrll #nop
220 1.1 skrll lqx vma, tab2, off2 # 1,6 7
221 1.1 skrll ########
222 1.1 skrll #else /* OVL_STUB_SIZE == 16 */
223 1.1 skrll ########
224 1.1 skrll ila tab2, _ovly_table - 16 # 0,2 0
225 1.1 skrll stqd save3, -48($sp) # 1,6 0
226 1.1 skrll ila rv1, __ovly_return # 0,2 1
227 1.1 skrll stqd save2, -32($sp) # 1,6 1
228 1.1 skrll shli off2, ovl, 4 # 0,4 2
229 1.1 skrll lqr cur, __ovly_current # 1,6 2
230 1.1 skrll nop
231 1.1 skrll stqr ovl, __ovly_current # 1,6 3
232 1.1 skrll ceq rv2, $lr, rv1 # 0,2 4
233 1.1 skrll lqr rv3, __rv_pattern # 1,6 4
234 1.1 skrll #nop
235 1.1 skrll hbr ovly_load9, target # 1,15 5
236 1.1 skrll #nop
237 1.1 skrll lqx vma, tab2, off2 # 1,6 6
238 1.1 skrll #nop
239 1.1 skrll stqd save1, -16($sp) # 1,6 7
240 1.1 skrll ########
241 1.1 skrll #endif
242 1.1 skrll
243 1.1 skrll #nop; lnop
244 1.1 skrll #nop; lnop
245 1.1 skrll #nop
246 1.1 skrll shufb rv4, rv1, cur, rv3 # 1,4 10
247 1.1 skrll #nop
248 1.1 skrll fsmb rv5, rv2 # 1,4 11
249 1.1 skrll #nop
250 1.1 skrll rotqmbyi rv6, $lr, -8 # 1,4 12
251 1.1 skrll #nop
252 1.1 skrll rotqbyi size2, vma, 4 # 1,4 13
253 1.1 skrll #nop
254 1.1 skrll lqd save3, -48($sp) # 1,6 14
255 1.1 skrll #nop; lnop
256 1.1 skrll or rv7, rv4, rv6 # 0,2 16
257 1.1 skrll lqd save2, -32($sp) # 1,6 16
258 1.1 skrll andi present2, size2, 1 # 0,2 17
259 1.1 skrll #ifdef OVLY_IRQ_SAVE
260 1.1 skrll stqd save4, -64($sp) # 1,6 17
261 1.1 skrll #else
262 1.1 skrll lnop # 1,0 17
263 1.1 skrll #endif
264 1.1 skrll selb $lr, rv7, $lr, rv5 # 0,2 18
265 1.1 skrll lqd save1, -16($sp) # 1,6 18
266 1.1 skrll #nop
267 1.1 skrll brz present2, do_load # 1,4 19
268 1.1 skrll ovly_load9:
269 1.1 skrll #nop
270 1.1 skrll bi target # 1,4 20
271 1.1 skrll
272 1.1 skrll /* If we get here, we are about to load a new overlay.
273 1.1 skrll * "vma" contains the relevant entry from _ovly_table[].
274 1.1 skrll * extern struct {
275 1.1 skrll * u32 vma;
276 1.1 skrll * u32 size;
277 1.1 skrll * u32 file_offset;
278 1.1 skrll * u32 buf;
279 1.1 skrll * } _ovly_table[];
280 1.1 skrll */
281 1.1 skrll .align 3
282 1.1 skrll .global __ovly_load_event
283 1.1 skrll .type __ovly_load_event, @function
284 1.1 skrll __ovly_load_event:
285 1.1 skrll do_load:
286 1.1 skrll #ifdef OVLY_IRQ_SAVE
287 1.1 skrll ila irqtmp1, do_load10 # 0,2 -5
288 1.1 skrll rotqbyi sz, vma, 8 # 1,4 -5
289 1.1 skrll #nop
290 1.1 skrll rdch irq_stat, $SPU_RdMachStat # 1,6 -4
291 1.1 skrll #nop
292 1.1 skrll bid irqtmp1 # 1,4 -3
293 1.1 skrll do_load10:
294 1.1 skrll nop
295 1.1 skrll #else
296 1.1 skrll #nop
297 1.1 skrll rotqbyi sz, vma, 8 # 1,4 0
298 1.1 skrll #endif
299 1.1 skrll rotqbyi osize, vma, 4 # 1,4 1
300 1.1 skrll #nop
301 1.1 skrll lqa ea64, _EAR_ # 1,6 2
302 1.1 skrll #nop
303 1.1 skrll lqr cgshuf, __cg_pattern # 1,6 3
304 1.1 skrll
305 1.1 skrll /* We could predict the branch at the end of this loop by adding a few
306 1.1 skrll instructions, and there are plenty of free cycles to do so without
307 1.1 skrll impacting loop execution time. However, it doesn't make a great
308 1.1 skrll deal of sense since we need to wait for the dma to complete anyway. */
309 1.1 skrll __ovly_xfer_loop:
310 1.1 skrll #nop
311 1.1 skrll rotqmbyi off64, sz, -4 # 1,4 4
312 1.1 skrll #nop; lnop
313 1.1 skrll #nop; lnop
314 1.1 skrll #nop; lnop
315 1.1 skrll cg cgbits, ea64, off64 # 0,2 8
316 1.1 skrll #lnop
317 1.1 skrll #nop; lnop
318 1.1 skrll #nop
319 1.1 skrll shufb add64, cgbits, cgbits, cgshuf # 1,4 10
320 1.1 skrll #nop; lnop
321 1.1 skrll #nop; lnop
322 1.1 skrll #nop; lnop
323 1.1 skrll addx add64, ea64, off64 # 0,2 14
324 1.1 skrll #lnop
325 1.1 skrll ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
326 1.1 skrll lnop
327 1.1 skrll ori ea64, add64, 0 # 0,2 16
328 1.1 skrll rotqbyi ealo, add64, 4 # 1,4 16
329 1.1 skrll cgt cmp, osize, maxsize # 0,2 17
330 1.1 skrll wrch $MFC_LSA, vma # 1,6 17
331 1.1 skrll #nop; lnop
332 1.1 skrll selb sz, osize, maxsize, cmp # 0,2 19
333 1.1 skrll wrch $MFC_EAH, ea64 # 1,6 19
334 1.1 skrll ila tagid, MFC_TAG_ID # 0,2 20
335 1.1 skrll wrch $MFC_EAL, ealo # 1,6 20
336 1.1 skrll ila cmd, MFC_GET_CMD # 0,2 21
337 1.1 skrll wrch $MFC_Size, sz # 1,6 21
338 1.1 skrll sf osize, sz, osize # 0,2 22
339 1.1 skrll wrch $MFC_TagId, tagid # 1,6 22
340 1.1 skrll a vma, vma, sz # 0,2 23
341 1.1 skrll wrch $MFC_Cmd, cmd # 1,6 23
342 1.1 skrll #nop
343 1.1 skrll brnz osize, __ovly_xfer_loop # 1,4 24
344 1.1 skrll
345 1.1 skrll /* Now update our data structions while waiting for DMA to complete.
346 1.1 skrll Low bit of .size needs to be cleared on the _ovly_table entry
347 1.1 skrll corresponding to the evicted overlay, and set on the entry for the
348 1.1 skrll newly loaded overlay. Note that no overlay may in fact be evicted
349 1.1 skrll as _ovly_buf_table[] starts with all zeros. Don't zap .size entry
350 1.1 skrll for zero index! Also of course update the _ovly_buf_table entry. */
351 1.1 skrll #nop
352 1.1 skrll lqr newovl, __ovly_current # 1,6 25
353 1.1 skrll #nop; lnop
354 1.1 skrll #nop; lnop
355 1.1 skrll #nop; lnop
356 1.1 skrll #nop; lnop
357 1.1 skrll #nop; lnop
358 1.1 skrll shli off3, newovl, 4 # 0,4 31
359 1.1 skrll #lnop
360 1.1 skrll ila tab3, _ovly_table - 16 # 0,2 32
361 1.1 skrll #lnop
362 1.1 skrll #nop
363 1.1 skrll fsmbi pbyte, 0x100 # 1,4 33
364 1.1 skrll #nop; lnop
365 1.1 skrll #nop
366 1.1 skrll lqx vma, tab3, off3 # 1,6 35
367 1.1 skrll #nop; lnop
368 1.1 skrll andi pbit, pbyte, 1 # 0,2 37
369 1.1 skrll lnop
370 1.1 skrll #nop; lnop
371 1.1 skrll #nop; lnop
372 1.1 skrll #nop; lnop
373 1.1 skrll or newvma, vma, pbit # 0,2 41
374 1.1 skrll rotqbyi buf3, vma, 12 # 1,4 41
375 1.1 skrll #nop; lnop
376 1.1 skrll #nop
377 1.1 skrll stqx newvma, tab3, off3 # 1,6 43
378 1.1 skrll #nop; lnop
379 1.1 skrll shli off4, buf3, 2 # 1,4 45
380 1.1 skrll #lnop
381 1.1 skrll ila tab4, _ovly_buf_table - 4 # 0,2 46
382 1.1 skrll #lnop
383 1.1 skrll #nop; lnop
384 1.1 skrll #nop; lnop
385 1.1 skrll #nop
386 1.1 skrll lqx map, tab4, off4 # 1,6 49
387 1.1 skrll #nop
388 1.1 skrll cwx genwi, tab4, off4 # 1,4 50
389 1.1 skrll a addr4, tab4, off4 # 0,2 51
390 1.1 skrll #lnop
391 1.1 skrll #nop; lnop
392 1.1 skrll #nop; lnop
393 1.1 skrll #nop; lnop
394 1.1 skrll #nop
395 1.1 skrll rotqby oldovl, map, addr4 # 1,4 55
396 1.1 skrll #nop
397 1.1 skrll shufb newmap, newovl, map, genwi # 0,4 56
398 1.1 skrll #if MFC_TAG_ID < 16
399 1.1 skrll ila newmask, 1 << MFC_TAG_ID # 0,2 57
400 1.1 skrll #else
401 1.1 skrll ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57
402 1.1 skrll #endif
403 1.1 skrll #lnop
404 1.1 skrll #nop; lnop
405 1.1 skrll #nop; lnop
406 1.1 skrll stqd newmap, 0(addr4) # 1,6 60
407 1.1 skrll
408 1.1 skrll /* Save app's tagmask, wait for DMA complete, restore mask. */
409 1.1 skrll ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61
410 1.1 skrll rdch oldmask, $MFC_RdTagMask # 1,6 61
411 1.1 skrll #nop
412 1.1 skrll wrch $MFC_WrTagMask, newmask # 1,6 62
413 1.1 skrll #nop
414 1.1 skrll wrch $MFC_WrTagUpdate, tagstat # 1,6 63
415 1.1 skrll #nop
416 1.1 skrll rdch tagstat, $MFC_RdTagStat # 1,6 64
417 1.1 skrll #nop
418 1.1 skrll sync # 1,4 65
419 1.1 skrll /* Any hint prior to the sync is lost. A hint here allows the branch
420 1.1 skrll to complete 15 cycles after the hint. With no hint the branch will
421 1.1 skrll take 18 or 19 cycles. */
422 1.1 skrll ila tab5, _ovly_table - 16 # 0,2 66
423 1.1 skrll hbr do_load99, target # 1,15 66
424 1.1 skrll shli off5, oldovl, 4 # 0,4 67
425 1.1 skrll wrch $MFC_WrTagMask, oldmask # 1,6 67
426 1.1 skrll ceqi zovl, oldovl, 0 # 0,2 68
427 1.1 skrll #lnop
428 1.1 skrll #nop; lnop
429 1.1 skrll #nop
430 1.1 skrll fsm zovl, zovl # 1,4 70
431 1.1 skrll #nop
432 1.1 skrll lqx oldvma, tab5, off5 # 1,6 71
433 1.1 skrll #nop
434 1.1 skrll lqd save3, -48($sp) # 1,6 72
435 1.1 skrll #nop; lnop
436 1.1 skrll andc pbit, pbit, zovl # 0,2 74
437 1.1 skrll lqd save2, -32($sp) # 1,6 74
438 1.1 skrll #ifdef OVLY_IRQ_SAVE
439 1.1 skrll ila irqtmp2, do_load90 # 0,2 75
440 1.1 skrll #lnop
441 1.1 skrll andi irq_stat, irq_stat, 1 # 0,2 76
442 1.1 skrll #lnop
443 1.1 skrll #else
444 1.1 skrll #nop; lnop
445 1.1 skrll #nop; lnop
446 1.1 skrll #endif
447 1.1 skrll andc oldvma, oldvma, pbit # 0,2 77
448 1.1 skrll lqd save1, -16($sp) # 1,6 77
449 1.1 skrll nop # 0,0 78
450 1.1 skrll #lnop
451 1.1 skrll #nop
452 1.1 skrll stqx oldvma, tab5, off5 # 1,6 79
453 1.1 skrll #nop
454 1.1 skrll #ifdef OVLY_IRQ_SAVE
455 1.1 skrll binze irq_stat, irqtmp2 # 1,4 80
456 1.1 skrll do_load90:
457 1.1 skrll #nop
458 1.1 skrll lqd save4, -64($sp) # 1,6 84
459 1.1 skrll #else
460 1.1 skrll #nop; lnop
461 1.1 skrll #endif
462 1.1 skrll
463 1.1 skrll .global _ovly_debug_event
464 1.1 skrll .type _ovly_debug_event, @function
465 1.1 skrll _ovly_debug_event:
466 1.1 skrll nop
467 1.1 skrll /* Branch to target address. */
468 1.1 skrll do_load99:
469 1.1 skrll bi target # 1,4 81/85
470 1.1 skrll
471 1.1 skrll .size __ovly_load, . - __ovly_load
472