Home | History | Annotate | Line # | Download | only in fuc
      1 /* fuc microcode for gf100 PGRAPH/HUB
      2  *
      3  * Copyright 2011 Red Hat Inc.
      4  *
      5  * Permission is hereby granted, free of charge, to any person obtaining a
      6  * copy of this software and associated documentation files (the "Software"),
      7  * to deal in the Software without restriction, including without limitation
      8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      9  * and/or sell copies of the Software, and to permit persons to whom the
     10  * Software is furnished to do so, subject to the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be included in
     13  * all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     21  * OTHER DEALINGS IN THE SOFTWARE.
     22  *
     23  * Authors: Ben Skeggs
     24  */
     25 
     26 #ifdef INCLUDE_DATA
     27 hub_mmio_list_head:	.b32 #hub_mmio_list_base
     28 hub_mmio_list_tail:	.b32 #hub_mmio_list_next
     29 
     30 gpc_count:		.b32 0
     31 rop_count:		.b32 0
     32 cmd_queue:		queue_init
     33 
     34 ctx_current:		.b32 0
     35 
     36 .align 256
     37 chan_data:
     38 chan_mmio_count:	.b32 0
     39 chan_mmio_address:	.b32 0
     40 
     41 .align 256
     42 xfer_data: 		.skip 256
     43 
     44 hub_mmio_list_base:
     45 .b32 0x0417e91c // 0x17e91c, 2
     46 hub_mmio_list_next:
     47 #endif
     48 
     49 #ifdef INCLUDE_CODE
     50 // reports an exception to the host
     51 //
     52 // In: $r15 error code (see os.h)
     53 //
     54 error:
     55 	nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(5), 0, $r15)
     56 	mov $r15 1
     57 	nv_iowr(NV_PGRAPH_FECS_INTR_UP_SET, 0, $r15)
     58 	ret
     59 
     60 // HUB fuc initialisation, executed by triggering ucode start, will
     61 // fall through to main loop after completion.
     62 //
     63 // Output:
     64 //   CC_SCRATCH[0]:
     65 //	     31:31: set to signal completion
     66 //   CC_SCRATCH[1]:
     67 //	      31:0: total PGRAPH context size
     68 //
     69 init:
     70 	clear b32 $r0
     71 	mov $xdbase $r0
     72 
     73 	// setup stack
     74 	nv_iord($r1, NV_PGRAPH_FECS_CAPS, 0)
     75 	extr $r1 $r1 9:17
     76 	shl b32 $r1 8
     77 	mov $sp $r1
     78 
     79 	// enable fifo access
     80 	mov $r2 NV_PGRAPH_FECS_ACCESS_FIFO
     81 	nv_iowr(NV_PGRAPH_FECS_ACCESS, 0, $r2)
     82 
     83 	// setup i0 handler, and route all interrupts to it
     84 	mov $r1 #ih
     85 	mov $iv0 $r1
     86 
     87 	clear b32 $r2
     88 	nv_iowr(NV_PGRAPH_FECS_INTR_ROUTE, 0, $r2)
     89 
     90 	// route HUB_CHSW_PULSE to fuc interrupt 8
     91 	mov $r2 0x2003		// { HUB_CHSW_PULSE, ZERO } -> intr 8
     92 	nv_iowr(NV_PGRAPH_FECS_IROUTE, 0, $r2)
     93 
     94 	// not sure what these are, route them because NVIDIA does, and
     95 	// the IRQ handler will signal the host if we ever get one.. we
     96 	// may find out if/why we need to handle these if so..
     97 	//
     98 	mov $r2 0x2004		// { 0x04, ZERO } -> intr 9
     99 	nv_iowr(NV_PGRAPH_FECS_IROUTE, 1, $r2)
    100 	mov $r2 0x200b		// { HUB_FIRMWARE_MTHD, ZERO } -> intr 10
    101 	nv_iowr(NV_PGRAPH_FECS_IROUTE, 2, $r2)
    102 	mov $r2 0x200c		// { 0x0c, ZERO } -> intr 15
    103 	nv_iowr(NV_PGRAPH_FECS_IROUTE, 7, $r2)
    104 
    105 	// enable all INTR_UP interrupts
    106 	sub b32 $r3 $r0 1
    107 	nv_iowr(NV_PGRAPH_FECS_INTR_UP_EN, 0, $r3)
    108 
    109 	// enable fifo, ctxsw, 9, fwmthd, 15 interrupts
    110 	imm32($r2, 0x8704)
    111 	nv_iowr(NV_PGRAPH_FECS_INTR_EN_SET, 0, $r2)
    112 
    113 	// fifo level triggered, rest edge
    114 	mov $r2 NV_PGRAPH_FECS_INTR_MODE_FIFO_LEVEL
    115 	nv_iowr(NV_PGRAPH_FECS_INTR_MODE, 0, $r2)
    116 
    117 	// enable interrupts
    118 	bset $flags ie0
    119 
    120 	// fetch enabled GPC/ROP counts
    121 	nv_rd32($r14, 0x409604)
    122 	extr $r1 $r15 16:20
    123 	st b32 D[$r0 + #rop_count] $r1
    124 	and $r15 0x1f
    125 	st b32 D[$r0 + #gpc_count] $r15
    126 
    127 	// set BAR_REQMASK to GPC mask
    128 	mov $r1 1
    129 	shl b32 $r1 $r15
    130 	sub b32 $r1 1
    131 	nv_iowr(NV_PGRAPH_FECS_BAR_MASK0, 0, $r1)
    132 	nv_iowr(NV_PGRAPH_FECS_BAR_MASK1, 0, $r1)
    133 
    134 	// context size calculation, reserve first 256 bytes for use by fuc
    135 	mov $r1 256
    136 
    137 	//
    138 	mov $r15 2
    139 	call(ctx_4170s)
    140 	call(ctx_4170w)
    141 	mov $r15 0x10
    142 	call(ctx_86c)
    143 
    144 	// calculate size of mmio context data
    145 	ld b32 $r14 D[$r0 + #hub_mmio_list_head]
    146 	ld b32 $r15 D[$r0 + #hub_mmio_list_tail]
    147 	call(mmctx_size)
    148 
    149 	// set mmctx base addresses now so we don't have to do it later,
    150 	// they don't (currently) ever change
    151 	shr b32 $r4 $r1 8
    152 	nv_iowr(NV_PGRAPH_FECS_MMCTX_SAVE_SWBASE, 0, $r4)
    153 	nv_iowr(NV_PGRAPH_FECS_MMCTX_LOAD_SWBASE, 0, $r4)
    154 	add b32 $r3 0x1300
    155 	add b32 $r1 $r15
    156 	shr b32 $r15 2
    157 	nv_iowr(NV_PGRAPH_FECS_MMCTX_LOAD_COUNT, 0, $r15) // wtf??
    158 
    159 	// strands, base offset needs to be aligned to 256 bytes
    160 	shr b32 $r1 8
    161 	add b32 $r1 1
    162 	shl b32 $r1 8
    163 	mov b32 $r15 $r1
    164 	call(strand_ctx_init)
    165 	add b32 $r1 $r15
    166 
    167 	// initialise each GPC in sequence by passing in the offset of its
    168 	// context data in GPCn_CC_SCRATCH[1], and starting its FUC (which
    169 	// has previously been uploaded by the host) running.
    170 	//
    171 	// the GPC fuc init sequence will set GPCn_CC_SCRATCH[0] bit 31
    172 	// when it has completed, and return the size of its context data
    173 	// in GPCn_CC_SCRATCH[1]
    174 	//
    175 	ld b32 $r3 D[$r0 + #gpc_count]
    176 	imm32($r4, 0x502000)
    177 	init_gpc:
    178 		// setup, and start GPC ucode running
    179 		add b32 $r14 $r4 0x804
    180 		mov b32 $r15 $r1
    181 		call(nv_wr32)			// CC_SCRATCH[1] = ctx offset
    182 		add b32 $r14 $r4 0x10c
    183 		clear b32 $r15
    184 		call(nv_wr32)
    185 		add b32 $r14 $r4 0x104
    186 		call(nv_wr32)			// ENTRY
    187 		add b32 $r14 $r4 0x100
    188 		mov $r15 2			// CTRL_START_TRIGGER
    189 		call(nv_wr32)			// CTRL
    190 
    191 		// wait for it to complete, and adjust context size
    192 		add b32 $r14 $r4 0x800
    193 		init_gpc_wait:
    194 			call(nv_rd32)
    195 			xbit $r15 $r15 31
    196 			bra e #init_gpc_wait
    197 		add b32 $r14 $r4 0x804
    198 		call(nv_rd32)
    199 		add b32 $r1 $r15
    200 
    201 		// next!
    202 		add b32 $r4 0x8000
    203 		sub b32 $r3 1
    204 		bra ne #init_gpc
    205 
    206 	//
    207 	mov $r15 0
    208 	call(ctx_86c)
    209 	mov $r15 0
    210 	call(ctx_4170s)
    211 
    212 	// save context size, and tell host we're ready
    213 	nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(1), 0, $r1)
    214 	clear b32 $r1
    215 	bset $r1 31
    216 	nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_SET(0), 0, $r1)
    217 
    218 // Main program loop, very simple, sleeps until woken up by the interrupt
    219 // handler, pulls a command from the queue and executes its handler
    220 //
    221 wait:
    222 	// sleep until we have something to do
    223 	sleep $p0
    224 	bset $flags $p0
    225 main:
    226 	mov $r13 #cmd_queue
    227 	call(queue_get)
    228 	bra $p1 #wait
    229 
    230 	// context switch, requested by GPU?
    231 	cmpu b32 $r14 0x4001
    232 	bra ne #main_not_ctx_switch
    233 		trace_set(T_AUTO)
    234 		nv_iord($r1, NV_PGRAPH_FECS_CHAN_ADDR, 0)
    235 		nv_iord($r2, NV_PGRAPH_FECS_CHAN_NEXT, 0)
    236 
    237 		xbit $r3 $r1 31
    238 		bra e #chsw_no_prev
    239 			xbit $r3 $r2 31
    240 			bra e #chsw_prev_no_next
    241 				push $r2
    242 				mov b32 $r2 $r1
    243 				trace_set(T_SAVE)
    244 				bclr $flags $p1
    245 				bset $flags $p2
    246 				call(ctx_xfer)
    247 				trace_clr(T_SAVE);
    248 				pop $r2
    249 				trace_set(T_LOAD);
    250 				bset $flags $p1
    251 				call(ctx_xfer)
    252 				trace_clr(T_LOAD);
    253 				bra #chsw_done
    254 			chsw_prev_no_next:
    255 				push $r2
    256 				mov b32 $r2 $r1
    257 				bclr $flags $p1
    258 				bclr $flags $p2
    259 				call(ctx_xfer)
    260 				pop $r2
    261 				nv_iowr(NV_PGRAPH_FECS_CHAN_ADDR, 0, $r2)
    262 				bra #chsw_done
    263 		chsw_no_prev:
    264 			xbit $r3 $r2 31
    265 			bra e #chsw_done
    266 				bset $flags $p1
    267 				bclr $flags $p2
    268 				call(ctx_xfer)
    269 
    270 		// ack the context switch request
    271 		chsw_done:
    272 		mov $r2 NV_PGRAPH_FECS_CHSW_ACK
    273 		nv_iowr(NV_PGRAPH_FECS_CHSW, 0, $r2)
    274 		trace_clr(T_AUTO)
    275 		bra #main
    276 
    277 	// request to set current channel? (*not* a context switch)
    278 	main_not_ctx_switch:
    279 	cmpu b32 $r14 0x0001
    280 	bra ne #main_not_ctx_chan
    281 		mov b32 $r2 $r15
    282 		call(ctx_chan)
    283 		bra #main_done
    284 
    285 	// request to store current channel context?
    286 	main_not_ctx_chan:
    287 	cmpu b32 $r14 0x0002
    288 	bra ne #main_not_ctx_save
    289 		trace_set(T_SAVE)
    290 		bclr $flags $p1
    291 		bclr $flags $p2
    292 		call(ctx_xfer)
    293 		trace_clr(T_SAVE)
    294 		bra #main_done
    295 
    296 	main_not_ctx_save:
    297 		shl b32 $r15 $r14 16
    298 		or $r15 E_BAD_COMMAND
    299 		call(error)
    300 		bra #main
    301 
    302 	main_done:
    303 	clear b32 $r2
    304 	bset $r2 31
    305 	nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_SET(0), 0, $r2)
    306 	bra #main
    307 
    308 // interrupt handler
    309 ih:
    310 	push $r0
    311 	push $r8
    312 	mov $r8 $flags
    313 	push $r8
    314 	push $r9
    315 	push $r10
    316 	push $r11
    317 	push $r13
    318 	push $r14
    319 	push $r15
    320 	clear b32 $r0
    321 
    322 	// incoming fifo command?
    323 	nv_iord($r10, NV_PGRAPH_FECS_INTR, 0)
    324 	and $r11 $r10 NV_PGRAPH_FECS_INTR_FIFO
    325 	bra e #ih_no_fifo
    326 		// queue incoming fifo command for later processing
    327 		mov $r13 #cmd_queue
    328 		nv_iord($r14, NV_PGRAPH_FECS_FIFO_CMD, 0)
    329 		nv_iord($r15, NV_PGRAPH_FECS_FIFO_DATA, 0)
    330 		call(queue_put)
    331 		add b32 $r11 0x400
    332 		mov $r14 1
    333 		nv_iowr(NV_PGRAPH_FECS_FIFO_ACK, 0, $r14)
    334 
    335 	// context switch request?
    336 	ih_no_fifo:
    337 	and $r11 $r10 NV_PGRAPH_FECS_INTR_CHSW
    338 	bra e #ih_no_ctxsw
    339 		// enqueue a context switch for later processing
    340 		mov $r13 #cmd_queue
    341 		mov $r14 0x4001
    342 		call(queue_put)
    343 
    344 	// firmware method?
    345 	ih_no_ctxsw:
    346 	and $r11 $r10 NV_PGRAPH_FECS_INTR_FWMTHD
    347 	bra e #ih_no_fwmthd
    348 		// none we handle; report to host and ack
    349 		nv_rd32($r15, NV_PGRAPH_TRAPPED_DATA_LO)
    350 		nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(4), 0, $r15)
    351 		nv_rd32($r15, NV_PGRAPH_TRAPPED_ADDR)
    352 		nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(3), 0, $r15)
    353 		extr $r14 $r15 16:18
    354 		shl b32 $r14 $r14 2
    355 		imm32($r15, NV_PGRAPH_FE_OBJECT_TABLE(0))
    356 		add b32 $r14 $r15
    357 		call(nv_rd32)
    358 		nv_iowr(NV_PGRAPH_FECS_CC_SCRATCH_VAL(2), 0, $r15)
    359 		mov $r15 E_BAD_FWMTHD
    360 		call(error)
    361 		mov $r11 0x100
    362 		nv_wr32(0x400144, $r11)
    363 
    364 	// anything we didn't handle, bring it to the host's attention
    365 	ih_no_fwmthd:
    366 	mov $r11 0x504 // FIFO | CHSW | FWMTHD
    367 	not b32 $r11
    368 	and $r11 $r10 $r11
    369 	bra e #ih_no_other
    370 		nv_iowr(NV_PGRAPH_FECS_INTR_UP_SET, 0, $r11)
    371 
    372 	// ack, and wake up main()
    373 	ih_no_other:
    374 	nv_iowr(NV_PGRAPH_FECS_INTR_ACK, 0, $r10)
    375 
    376 	pop $r15
    377 	pop $r14
    378 	pop $r13
    379 	pop $r11
    380 	pop $r10
    381 	pop $r9
    382 	pop $r8
    383 	mov $flags $r8
    384 	pop $r8
    385 	pop $r0
    386 	bclr $flags $p0
    387 	iret
    388 
    389 #if CHIPSET < GK100
    390 // Not real sure, but, MEM_CMD 7 will hang forever if this isn't done
    391 ctx_4160s:
    392 	mov $r15 1
    393 	nv_wr32(0x404160, $r15)
    394 	ctx_4160s_wait:
    395 		nv_rd32($r15, 0x404160)
    396 		xbit $r15 $r15 4
    397 		bra e #ctx_4160s_wait
    398 	ret
    399 
    400 // Without clearing again at end of xfer, some things cause PGRAPH
    401 // to hang with STATUS=0x00000007 until it's cleared.. fbcon can
    402 // still function with it set however...
    403 ctx_4160c:
    404 	clear b32 $r15
    405 	nv_wr32(0x404160, $r15)
    406 	ret
    407 #endif
    408 
    409 // Again, not real sure
    410 //
    411 // In: $r15 value to set 0x404170 to
    412 //
    413 ctx_4170s:
    414 	or $r15 0x10
    415 	nv_wr32(0x404170, $r15)
    416 	ret
    417 
    418 // Waits for a ctx_4170s() call to complete
    419 //
    420 ctx_4170w:
    421 	nv_rd32($r15, 0x404170)
    422 	and $r15 0x10
    423 	bra ne #ctx_4170w
    424 	ret
    425 
    426 // Disables various things, waits a bit, and re-enables them..
    427 //
    428 // Not sure how exactly this helps, perhaps "ENABLE" is not such a
    429 // good description for the bits we turn off?  Anyways, without this,
    430 // funny things happen.
    431 //
    432 ctx_redswitch:
    433 	mov $r14 NV_PGRAPH_FECS_RED_SWITCH_ENABLE_GPC
    434 	or  $r14 NV_PGRAPH_FECS_RED_SWITCH_POWER_ROP
    435 	or  $r14 NV_PGRAPH_FECS_RED_SWITCH_POWER_GPC
    436 	or  $r14 NV_PGRAPH_FECS_RED_SWITCH_POWER_MAIN
    437 	nv_iowr(NV_PGRAPH_FECS_RED_SWITCH, 0, $r14)
    438 	mov $r15 8
    439 	ctx_redswitch_delay:
    440 		sub b32 $r15 1
    441 		bra ne #ctx_redswitch_delay
    442 	or  $r14 NV_PGRAPH_FECS_RED_SWITCH_ENABLE_ROP
    443 	or  $r14 NV_PGRAPH_FECS_RED_SWITCH_ENABLE_MAIN
    444 	nv_iowr(NV_PGRAPH_FECS_RED_SWITCH, 0, $r14)
    445 	ret
    446 
    447 // Not a clue what this is for, except that unless the value is 0x10, the
    448 // strand context is saved (and presumably restored) incorrectly..
    449 //
    450 // In: $r15 value to set to (0x00/0x10 are used)
    451 //
    452 ctx_86c:
    453 	nv_iowr(NV_PGRAPH_FECS_UNK86C, 0, $r15)
    454 	nv_wr32(0x408a14, $r15)
    455 	nv_wr32(NV_PGRAPH_GPCX_GPCCS_UNK86C, $r15)
    456 	ret
    457 
    458 // In: $r15 NV_PGRAPH_FECS_MEM_CMD_*
    459 ctx_mem:
    460 	nv_iowr(NV_PGRAPH_FECS_MEM_CMD, 0, $r15)
    461 	ctx_mem_wait:
    462 		nv_iord($r15, NV_PGRAPH_FECS_MEM_CMD, 0)
    463 		or $r15 $r15
    464 		bra ne #ctx_mem_wait
    465 	ret
    466 
    467 // ctx_load - load's a channel's ctxctl data, and selects its vm
    468 //
    469 // In: $r2 channel address
    470 //
    471 ctx_load:
    472 	trace_set(T_CHAN)
    473 
    474 	// switch to channel, somewhat magic in parts..
    475 	mov $r10 12		// DONE_UNK12
    476 	call(wait_donez)
    477 	clear b32 $r15
    478 	nv_iowr(0x409a24, 0, $r15)
    479 	nv_iowr(NV_PGRAPH_FECS_CHAN_NEXT, 0, $r2)
    480 	nv_iowr(NV_PGRAPH_FECS_MEM_CHAN, 0, $r2)
    481 	mov $r15 NV_PGRAPH_FECS_MEM_CMD_LOAD_CHAN
    482 	call(ctx_mem)
    483 	nv_iowr(NV_PGRAPH_FECS_CHAN_ADDR, 0, $r2)
    484 
    485 	// load channel header, fetch PGRAPH context pointer
    486 	mov $xtargets $r0
    487 	bclr $r2 31
    488 	shl b32 $r2 4
    489 	add b32 $r2 2
    490 
    491 	trace_set(T_LCHAN)
    492 	nv_iowr(NV_PGRAPH_FECS_MEM_BASE, 0, $r2)
    493 	imm32($r2, NV_PGRAPH_FECS_MEM_TARGET_UNK31)
    494 	or  $r2 NV_PGRAPH_FECS_MEM_TARGET_AS_VRAM
    495 	nv_iowr(NV_PGRAPH_FECS_MEM_TARGET, 0, $r2)
    496 	mov $r1 0x10			// chan + 0x0210
    497 	mov $r2 #xfer_data
    498 	sethi $r2 0x00020000		// 16 bytes
    499 	xdld $r1 $r2
    500 	xdwait
    501 	trace_clr(T_LCHAN)
    502 
    503 	// update current context
    504 	ld b32 $r1 D[$r0 + #xfer_data + 4]
    505 	shl b32 $r1 24
    506 	ld b32 $r2 D[$r0 + #xfer_data + 0]
    507 	shr b32 $r2 8
    508 	or $r1 $r2
    509 	st b32 D[$r0 + #ctx_current] $r1
    510 
    511 	// set transfer base to start of context, and fetch context header
    512 	trace_set(T_LCTXH)
    513 	nv_iowr(NV_PGRAPH_FECS_MEM_BASE, 0, $r1)
    514 	mov $r2 NV_PGRAPH_FECS_MEM_TARGET_AS_VM
    515 	nv_iowr(NV_PGRAPH_FECS_MEM_TARGET, 0, $r2)
    516 	mov $r1 #chan_data
    517 	sethi $r1 0x00060000		// 256 bytes
    518 	xdld $r0 $r1
    519 	xdwait
    520 	trace_clr(T_LCTXH)
    521 
    522 	trace_clr(T_CHAN)
    523 	ret
    524 
    525 // ctx_chan - handler for HUB_SET_CHAN command, will set a channel as
    526 //            the active channel for ctxctl, but not actually transfer
    527 //            any context data.  intended for use only during initial
    528 //            context construction.
    529 //
    530 // In: $r2 channel address
    531 //
    532 ctx_chan:
    533 #if CHIPSET < GK100
    534 	call(ctx_4160s)
    535 #endif
    536 	call(ctx_load)
    537 	mov $r10 12			// DONE_UNK12
    538 	call(wait_donez)
    539 	mov $r15 5 // MEM_CMD 5 ???
    540 	call(ctx_mem)
    541 #if CHIPSET < GK100
    542 	call(ctx_4160c)
    543 #endif
    544 	ret
    545 
    546 // Execute per-context state overrides list
    547 //
    548 // Only executed on the first load of a channel.  Might want to look into
    549 // removing this and having the host directly modify the channel's context
    550 // to change this state...  The nouveau DRM already builds this list as
    551 // it's definitely needed for NVIDIA's, so we may as well use it for now
    552 //
    553 // Input: $r1 mmio list length
    554 //
    555 ctx_mmio_exec:
    556 	// set transfer base to be the mmio list
    557 	ld b32 $r3 D[$r0 + #chan_mmio_address]
    558 	nv_iowr(NV_PGRAPH_FECS_MEM_BASE, 0, $r3)
    559 
    560 	clear b32 $r3
    561 	ctx_mmio_loop:
    562 		// fetch next 256 bytes of mmio list if necessary
    563 		and $r4 $r3 0xff
    564 		bra ne #ctx_mmio_pull
    565 			mov $r5 #xfer_data
    566 			sethi $r5 0x00060000	// 256 bytes
    567 			xdld $r3 $r5
    568 			xdwait
    569 
    570 		// execute a single list entry
    571 		ctx_mmio_pull:
    572 		ld b32 $r14 D[$r4 + #xfer_data + 0x00]
    573 		ld b32 $r15 D[$r4 + #xfer_data + 0x04]
    574 		call(nv_wr32)
    575 
    576 		// next!
    577 		add b32 $r3 8
    578 		sub b32 $r1 1
    579 		bra ne #ctx_mmio_loop
    580 
    581 	// set transfer base back to the current context
    582 	ctx_mmio_done:
    583 	ld b32 $r3 D[$r0 + #ctx_current]
    584 	nv_iowr(NV_PGRAPH_FECS_MEM_BASE, 0, $r3)
    585 
    586 	// disable the mmio list now, we don't need/want to execute it again
    587 	st b32 D[$r0 + #chan_mmio_count] $r0
    588 	mov $r1 #chan_data
    589 	sethi $r1 0x00060000		// 256 bytes
    590 	xdst $r0 $r1
    591 	xdwait
    592 	ret
    593 
    594 // Transfer HUB context data between GPU and storage area
    595 //
    596 // In: $r2 channel address
    597 //     $p1 clear on save, set on load
    598 //     $p2 set if opposite direction done/will be done, so:
    599 //		on save it means: "a load will follow this save"
    600 //		on load it means: "a save preceeded this load"
    601 //
    602 ctx_xfer:
    603 	// according to mwk, some kind of wait for idle
    604 	mov $r14 4
    605 	nv_iowr(0x409c08, 0, $r14)
    606 	ctx_xfer_idle:
    607 		nv_iord($r14, 0x409c00, 0)
    608 		and $r14 0x2000
    609 		bra ne #ctx_xfer_idle
    610 
    611 	bra not $p1 #ctx_xfer_pre
    612 	bra $p2 #ctx_xfer_pre_load
    613 	ctx_xfer_pre:
    614 		mov $r15 0x10
    615 		call(ctx_86c)
    616 #if CHIPSET < GK100
    617 		call(ctx_4160s)
    618 #endif
    619 		bra not $p1 #ctx_xfer_exec
    620 
    621 	ctx_xfer_pre_load:
    622 		mov $r15 2
    623 		call(ctx_4170s)
    624 		call(ctx_4170w)
    625 		call(ctx_redswitch)
    626 		clear b32 $r15
    627 		call(ctx_4170s)
    628 		call(ctx_load)
    629 
    630 	// fetch context pointer, and initiate xfer on all GPCs
    631 	ctx_xfer_exec:
    632 	ld b32 $r1 D[$r0 + #ctx_current]
    633 
    634 	clear b32 $r2
    635 	nv_iowr(NV_PGRAPH_FECS_BAR, 0, $r2)
    636 
    637 	nv_wr32(0x41a500, $r1)	// GPC_BCAST_WRCMD_DATA = ctx pointer
    638 	xbit $r15 $flags $p1
    639 	xbit $r2 $flags $p2
    640 	shl b32 $r2 1
    641 	or $r15 $r2
    642 	nv_wr32(0x41a504, $r15)	// GPC_BCAST_WRCMD_CMD = GPC_XFER(type)
    643 
    644 	// strands
    645 	call(strand_pre)
    646 	clear b32 $r2
    647 	nv_iowr(NV_PGRAPH_FECS_STRAND_SELECT, 0x3f, $r2)
    648 	xbit $r2 $flags $p1	// SAVE/LOAD
    649 	add b32 $r2 NV_PGRAPH_FECS_STRAND_CMD_SAVE
    650 	nv_iowr(NV_PGRAPH_FECS_STRAND_CMD, 0x3f, $r2)
    651 
    652 	// mmio context
    653 	xbit $r10 $flags $p1	// direction
    654 	or $r10 6		// first, last
    655 	mov $r11 0		// base = 0
    656 	ld b32 $r12 D[$r0 + #hub_mmio_list_head]
    657 	ld b32 $r13 D[$r0 + #hub_mmio_list_tail]
    658 	mov $r14 0		// not multi
    659 	call(mmctx_xfer)
    660 
    661 	// wait for GPCs to all complete
    662 	mov $r10 8		// DONE_BAR
    663 	call(wait_doneo)
    664 
    665 	// wait for strand xfer to complete
    666 	call(strand_wait)
    667 
    668 	// post-op
    669 	bra $p1 #ctx_xfer_post
    670 		mov $r10 12		// DONE_UNK12
    671 		call(wait_donez)
    672 		mov $r15 5 // MEM_CMD 5 ???
    673 		call(ctx_mem)
    674 
    675 	bra $p2 #ctx_xfer_done
    676 	ctx_xfer_post:
    677 		mov $r15 2
    678 		call(ctx_4170s)
    679 		clear b32 $r15
    680 		call(ctx_86c)
    681 		call(strand_post)
    682 		call(ctx_4170w)
    683 		clear b32 $r15
    684 		call(ctx_4170s)
    685 
    686 		bra not $p1 #ctx_xfer_no_post_mmio
    687 		ld b32 $r1 D[$r0 + #chan_mmio_count]
    688 		or $r1 $r1
    689 		bra e #ctx_xfer_no_post_mmio
    690 			call(ctx_mmio_exec)
    691 
    692 		ctx_xfer_no_post_mmio:
    693 #if CHIPSET < GK100
    694 		call(ctx_4160c)
    695 #endif
    696 
    697 	ctx_xfer_done:
    698 	ret
    699 #endif
    700