Home | History | Annotate | Line # | Download | only in amdkfd
      1 /*	$NetBSD: kfd_packet_manager_v9.c,v 1.2 2021/12/18 23:44:59 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2016-2018 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  *
     24  */
     25 
     26 #include <sys/cdefs.h>
     27 __KERNEL_RCSID(0, "$NetBSD: kfd_packet_manager_v9.c,v 1.2 2021/12/18 23:44:59 riastradh Exp $");
     28 
     29 #include "kfd_kernel_queue.h"
     30 #include "kfd_device_queue_manager.h"
     31 #include "kfd_pm4_headers_ai.h"
     32 #include "kfd_pm4_opcodes.h"
     33 #include "gc/gc_10_1_0_sh_mask.h"
     34 
     35 static int pm_map_process_v9(struct packet_manager *pm,
     36 		uint32_t *buffer, struct qcm_process_device *qpd)
     37 {
     38 	struct pm4_mes_map_process *packet;
     39 	uint64_t vm_page_table_base_addr = qpd->page_table_base;
     40 
     41 	packet = (struct pm4_mes_map_process *)buffer;
     42 	memset(buffer, 0, sizeof(struct pm4_mes_map_process));
     43 
     44 	packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
     45 					sizeof(struct pm4_mes_map_process));
     46 	packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
     47 	packet->bitfields2.process_quantum = 1;
     48 	packet->bitfields2.pasid = qpd->pqm->process->pasid;
     49 	packet->bitfields14.gds_size = qpd->gds_size & 0x3F;
     50 	packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF;
     51 	packet->bitfields14.num_gws = qpd->num_gws;
     52 	packet->bitfields14.num_oac = qpd->num_oac;
     53 	packet->bitfields14.sdma_enable = 1;
     54 	packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
     55 
     56 	packet->sh_mem_config = qpd->sh_mem_config;
     57 	packet->sh_mem_bases = qpd->sh_mem_bases;
     58 	if (qpd->tba_addr) {
     59 		packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
     60 		/* On GFX9, unlike GFX10, bit TRAP_EN of SQ_SHADER_TBA_HI is
     61 		 * not defined, so setting it won't do any harm.
     62 		 */
     63 		packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8)
     64 				| 1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT;
     65 
     66 		packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
     67 		packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
     68 	}
     69 
     70 	packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
     71 	packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
     72 
     73 	packet->vm_context_page_table_base_addr_lo32 =
     74 			lower_32_bits(vm_page_table_base_addr);
     75 	packet->vm_context_page_table_base_addr_hi32 =
     76 			upper_32_bits(vm_page_table_base_addr);
     77 
     78 	return 0;
     79 }
     80 
     81 static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
     82 			uint64_t ib, size_t ib_size_in_dwords, bool chain)
     83 {
     84 	struct pm4_mes_runlist *packet;
     85 
     86 	int concurrent_proc_cnt = 0;
     87 	struct kfd_dev *kfd = pm->dqm->dev;
     88 
     89 	/* Determine the number of processes to map together to HW:
     90 	 * it can not exceed the number of VMIDs available to the
     91 	 * scheduler, and it is determined by the smaller of the number
     92 	 * of processes in the runlist and kfd module parameter
     93 	 * hws_max_conc_proc.
     94 	 * Note: the arbitration between the number of VMIDs and
     95 	 * hws_max_conc_proc has been done in
     96 	 * kgd2kfd_device_init().
     97 	 */
     98 	concurrent_proc_cnt = min(pm->dqm->processes_count,
     99 			kfd->max_proc_per_quantum);
    100 
    101 	packet = (struct pm4_mes_runlist *)buffer;
    102 
    103 	memset(buffer, 0, sizeof(struct pm4_mes_runlist));
    104 	packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
    105 						sizeof(struct pm4_mes_runlist));
    106 
    107 	packet->bitfields4.ib_size = ib_size_in_dwords;
    108 	packet->bitfields4.chain = chain ? 1 : 0;
    109 	packet->bitfields4.offload_polling = 0;
    110 	packet->bitfields4.chained_runlist_idle_disable = chain ? 1 : 0;
    111 	packet->bitfields4.valid = 1;
    112 	packet->bitfields4.process_cnt = concurrent_proc_cnt;
    113 	packet->ordinal2 = lower_32_bits(ib);
    114 	packet->ib_base_hi = upper_32_bits(ib);
    115 
    116 	return 0;
    117 }
    118 
    119 static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
    120 				struct scheduling_resources *res)
    121 {
    122 	struct pm4_mes_set_resources *packet;
    123 
    124 	packet = (struct pm4_mes_set_resources *)buffer;
    125 	memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
    126 
    127 	packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
    128 					sizeof(struct pm4_mes_set_resources));
    129 
    130 	packet->bitfields2.queue_type =
    131 			queue_type__mes_set_resources__hsa_interface_queue_hiq;
    132 	packet->bitfields2.vmid_mask = res->vmid_mask;
    133 	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
    134 	packet->bitfields7.oac_mask = res->oac_mask;
    135 	packet->bitfields8.gds_heap_base = res->gds_heap_base;
    136 	packet->bitfields8.gds_heap_size = res->gds_heap_size;
    137 
    138 	packet->gws_mask_lo = lower_32_bits(res->gws_mask);
    139 	packet->gws_mask_hi = upper_32_bits(res->gws_mask);
    140 
    141 	packet->queue_mask_lo = lower_32_bits(res->queue_mask);
    142 	packet->queue_mask_hi = upper_32_bits(res->queue_mask);
    143 
    144 	return 0;
    145 }
    146 
    147 static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
    148 		struct queue *q, bool is_static)
    149 {
    150 	struct pm4_mes_map_queues *packet;
    151 	bool use_static = is_static;
    152 
    153 	packet = (struct pm4_mes_map_queues *)buffer;
    154 	memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
    155 
    156 	packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
    157 					sizeof(struct pm4_mes_map_queues));
    158 	packet->bitfields2.num_queues = 1;
    159 	packet->bitfields2.queue_sel =
    160 		queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
    161 
    162 	packet->bitfields2.engine_sel =
    163 		engine_sel__mes_map_queues__compute_vi;
    164 	packet->bitfields2.gws_control_queue = q->gws ? 1 : 0;
    165 	packet->bitfields2.extended_engine_sel =
    166 		extended_engine_sel__mes_map_queues__legacy_engine_sel;
    167 	packet->bitfields2.queue_type =
    168 		queue_type__mes_map_queues__normal_compute_vi;
    169 
    170 	switch (q->properties.type) {
    171 	case KFD_QUEUE_TYPE_COMPUTE:
    172 		if (use_static)
    173 			packet->bitfields2.queue_type =
    174 		queue_type__mes_map_queues__normal_latency_static_queue_vi;
    175 		break;
    176 	case KFD_QUEUE_TYPE_DIQ:
    177 		packet->bitfields2.queue_type =
    178 			queue_type__mes_map_queues__debug_interface_queue_vi;
    179 		break;
    180 	case KFD_QUEUE_TYPE_SDMA:
    181 	case KFD_QUEUE_TYPE_SDMA_XGMI:
    182 		use_static = false; /* no static queues under SDMA */
    183 		if (q->properties.sdma_engine_id < 2)
    184 			packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
    185 				engine_sel__mes_map_queues__sdma0_vi;
    186 		else {
    187 			packet->bitfields2.extended_engine_sel =
    188 				extended_engine_sel__mes_map_queues__sdma0_to_7_sel;
    189 			packet->bitfields2.engine_sel = q->properties.sdma_engine_id;
    190 		}
    191 		break;
    192 	default:
    193 		WARN(1, "queue type %d", q->properties.type);
    194 		return -EINVAL;
    195 	}
    196 	packet->bitfields3.doorbell_offset =
    197 			q->properties.doorbell_off;
    198 
    199 	packet->mqd_addr_lo =
    200 			lower_32_bits(q->gart_mqd_addr);
    201 
    202 	packet->mqd_addr_hi =
    203 			upper_32_bits(q->gart_mqd_addr);
    204 
    205 	packet->wptr_addr_lo =
    206 			lower_32_bits((uint64_t)q->properties.write_ptr);
    207 
    208 	packet->wptr_addr_hi =
    209 			upper_32_bits((uint64_t)q->properties.write_ptr);
    210 
    211 	return 0;
    212 }
    213 
    214 static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
    215 			enum kfd_queue_type type,
    216 			enum kfd_unmap_queues_filter filter,
    217 			uint32_t filter_param, bool reset,
    218 			unsigned int sdma_engine)
    219 {
    220 	struct pm4_mes_unmap_queues *packet;
    221 
    222 	packet = (struct pm4_mes_unmap_queues *)buffer;
    223 	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
    224 
    225 	packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
    226 					sizeof(struct pm4_mes_unmap_queues));
    227 	switch (type) {
    228 	case KFD_QUEUE_TYPE_COMPUTE:
    229 	case KFD_QUEUE_TYPE_DIQ:
    230 		packet->bitfields2.extended_engine_sel =
    231 			extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
    232 		packet->bitfields2.engine_sel =
    233 			engine_sel__mes_unmap_queues__compute;
    234 		break;
    235 	case KFD_QUEUE_TYPE_SDMA:
    236 	case KFD_QUEUE_TYPE_SDMA_XGMI:
    237 		if (sdma_engine < 2) {
    238 			packet->bitfields2.extended_engine_sel =
    239 				extended_engine_sel__mes_unmap_queues__legacy_engine_sel;
    240 			packet->bitfields2.engine_sel =
    241 				engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
    242 		} else {
    243 			packet->bitfields2.extended_engine_sel =
    244 				extended_engine_sel__mes_unmap_queues__sdma0_to_7_sel;
    245 			packet->bitfields2.engine_sel = sdma_engine;
    246 		}
    247 		break;
    248 	default:
    249 		WARN(1, "queue type %d", type);
    250 		return -EINVAL;
    251 	}
    252 
    253 	if (reset)
    254 		packet->bitfields2.action =
    255 			action__mes_unmap_queues__reset_queues;
    256 	else
    257 		packet->bitfields2.action =
    258 			action__mes_unmap_queues__preempt_queues;
    259 
    260 	switch (filter) {
    261 	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
    262 		packet->bitfields2.queue_sel =
    263 			queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
    264 		packet->bitfields2.num_queues = 1;
    265 		packet->bitfields3b.doorbell_offset0 = filter_param;
    266 		break;
    267 	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
    268 		packet->bitfields2.queue_sel =
    269 			queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
    270 		packet->bitfields3a.pasid = filter_param;
    271 		break;
    272 	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
    273 		packet->bitfields2.queue_sel =
    274 			queue_sel__mes_unmap_queues__unmap_all_queues;
    275 		break;
    276 	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
    277 		/* in this case, we do not preempt static queues */
    278 		packet->bitfields2.queue_sel =
    279 			queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
    280 		break;
    281 	default:
    282 		WARN(1, "filter %d", filter);
    283 		return -EINVAL;
    284 	}
    285 
    286 	return 0;
    287 
    288 }
    289 
    290 static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
    291 			uint64_t fence_address,	uint32_t fence_value)
    292 {
    293 	struct pm4_mes_query_status *packet;
    294 
    295 	packet = (struct pm4_mes_query_status *)buffer;
    296 	memset(buffer, 0, sizeof(struct pm4_mes_query_status));
    297 
    298 
    299 	packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
    300 					sizeof(struct pm4_mes_query_status));
    301 
    302 	packet->bitfields2.context_id = 0;
    303 	packet->bitfields2.interrupt_sel =
    304 			interrupt_sel__mes_query_status__completion_status;
    305 	packet->bitfields2.command =
    306 			command__mes_query_status__fence_only_after_write_ack;
    307 
    308 	packet->addr_hi = upper_32_bits((uint64_t)fence_address);
    309 	packet->addr_lo = lower_32_bits((uint64_t)fence_address);
    310 	packet->data_hi = upper_32_bits((uint64_t)fence_value);
    311 	packet->data_lo = lower_32_bits((uint64_t)fence_value);
    312 
    313 	return 0;
    314 }
    315 
    316 const struct packet_manager_funcs kfd_v9_pm_funcs = {
    317 	.map_process		= pm_map_process_v9,
    318 	.runlist		= pm_runlist_v9,
    319 	.set_resources		= pm_set_resources_v9,
    320 	.map_queues		= pm_map_queues_v9,
    321 	.unmap_queues		= pm_unmap_queues_v9,
    322 	.query_status		= pm_query_status_v9,
    323 	.release_mem		= NULL,
    324 	.map_process_size	= sizeof(struct pm4_mes_map_process),
    325 	.runlist_size		= sizeof(struct pm4_mes_runlist),
    326 	.set_resources_size	= sizeof(struct pm4_mes_set_resources),
    327 	.map_queues_size	= sizeof(struct pm4_mes_map_queues),
    328 	.unmap_queues_size	= sizeof(struct pm4_mes_unmap_queues),
    329 	.query_status_size	= sizeof(struct pm4_mes_query_status),
    330 	.release_mem_size	= 0,
    331 };
    332