Home | History | Annotate | Line # | Download | only in amdkfd
      1 /*	$NetBSD: kfd_priv.h,v 1.3 2021/12/18 23:44:59 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright 2014 Advanced Micro Devices, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
     20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
     21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
     22  * OTHER DEALINGS IN THE SOFTWARE.
     23  */
     24 
     25 #ifndef KFD_PRIV_H_INCLUDED
     26 #define KFD_PRIV_H_INCLUDED
     27 
     28 #include <linux/hashtable.h>
     29 #include <linux/mmu_notifier.h>
     30 #include <linux/mutex.h>
     31 #include <linux/types.h>
     32 #include <linux/atomic.h>
     33 #include <linux/workqueue.h>
     34 #include <linux/spinlock.h>
     35 #include <linux/kfd_ioctl.h>
     36 #include <linux/idr.h>
     37 #include <linux/kfifo.h>
     38 #include <linux/seq_file.h>
     39 #include <linux/kref.h>
     40 #include <linux/sysfs.h>
     41 #include <linux/device_cgroup.h>
     42 #include <drm/drm_file.h>
     43 #include <drm/drm_drv.h>
     44 #include <drm/drm_device.h>
     45 #include <kgd_kfd_interface.h>
     46 
     47 #include "amd_shared.h"
     48 
     49 #define KFD_MAX_RING_ENTRY_SIZE	8
     50 
     51 #define KFD_SYSFS_FILE_MODE 0444
     52 
     53 /* GPU ID hash width in bits */
     54 #define KFD_GPU_ID_HASH_WIDTH 16
     55 
     56 /* Use upper bits of mmap offset to store KFD driver specific information.
     57  * BITS[63:62] - Encode MMAP type
     58  * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
     59  * BITS[45:0]  - MMAP offset value
     60  *
     61  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
     62  *  defines are w.r.t to PAGE_SIZE
     63  */
     64 #define KFD_MMAP_TYPE_SHIFT	62
     65 #define KFD_MMAP_TYPE_MASK	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
     66 #define KFD_MMAP_TYPE_DOORBELL	(0x3ULL << KFD_MMAP_TYPE_SHIFT)
     67 #define KFD_MMAP_TYPE_EVENTS	(0x2ULL << KFD_MMAP_TYPE_SHIFT)
     68 #define KFD_MMAP_TYPE_RESERVED_MEM	(0x1ULL << KFD_MMAP_TYPE_SHIFT)
     69 #define KFD_MMAP_TYPE_MMIO	(0x0ULL << KFD_MMAP_TYPE_SHIFT)
     70 
     71 #define KFD_MMAP_GPU_ID_SHIFT 46
     72 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
     73 				<< KFD_MMAP_GPU_ID_SHIFT)
     74 #define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
     75 				& KFD_MMAP_GPU_ID_MASK)
     76 #define KFD_MMAP_GET_GPU_ID(offset)    ((offset & KFD_MMAP_GPU_ID_MASK) \
     77 				>> KFD_MMAP_GPU_ID_SHIFT)
     78 
     79 /*
     80  * When working with cp scheduler we should assign the HIQ manually or via
     81  * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot
     82  * definitions for Kaveri. In Kaveri only the first ME queues participates
     83  * in the cp scheduling taking that in mind we set the HIQ slot in the
     84  * second ME.
     85  */
     86 #define KFD_CIK_HIQ_PIPE 4
     87 #define KFD_CIK_HIQ_QUEUE 0
     88 
     89 /* Macro for allocating structures */
     90 #define kfd_alloc_struct(ptr_to_struct)	\
     91 	((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
     92 
     93 #define KFD_MAX_NUM_OF_PROCESSES 512
     94 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
     95 
     96 /*
     97  * Size of the per-process TBA+TMA buffer: 2 pages
     98  *
     99  * The first page is the TBA used for the CWSR ISA code. The second
    100  * page is used as TMA for daisy changing a user-mode trap handler.
    101  */
    102 #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2)
    103 #define KFD_CWSR_TMA_OFFSET PAGE_SIZE
    104 
    105 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE		\
    106 	(KFD_MAX_NUM_OF_PROCESSES *			\
    107 			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
    108 
    109 #define KFD_KERNEL_QUEUE_SIZE 2048
    110 
    111 #define KFD_UNMAP_LATENCY_MS	(4000)
    112 
    113 /*
    114  * 512 = 0x200
    115  * The doorbell index distance between SDMA RLC (2*i) and (2*i+1) in the
    116  * same SDMA engine on SOC15, which has 8-byte doorbells for SDMA.
    117  * 512 8-byte doorbell distance (i.e. one page away) ensures that SDMA RLC
    118  * (2*i+1) doorbells (in terms of the lower 12 bit address) lie exactly in
    119  * the OFFSET and SIZE set in registers like BIF_SDMA0_DOORBELL_RANGE.
    120  */
    121 #define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
    122 
    123 
    124 /*
    125  * Kernel module parameter to specify maximum number of supported queues per
    126  * device
    127  */
    128 extern int max_num_of_queues_per_device;
    129 
    130 
    131 /* Kernel module parameter to specify the scheduling policy */
    132 extern int sched_policy;
    133 
    134 /*
    135  * Kernel module parameter to specify the maximum process
    136  * number per HW scheduler
    137  */
    138 extern int hws_max_conc_proc;
    139 
    140 extern int cwsr_enable;
    141 
    142 /*
    143  * Kernel module parameter to specify whether to send sigterm to HSA process on
    144  * unhandled exception
    145  */
    146 extern int send_sigterm;
    147 
    148 /*
    149  * This kernel module is used to simulate large bar machine on non-large bar
    150  * enabled machines.
    151  */
    152 extern int debug_largebar;
    153 
    154 /*
    155  * Ignore CRAT table during KFD initialization, can be used to work around
    156  * broken CRAT tables on some AMD systems
    157  */
    158 extern int ignore_crat;
    159 
    160 /*
    161  * Set sh_mem_config.retry_disable on Vega10
    162  */
    163 extern int amdgpu_noretry;
    164 
    165 /*
    166  * Halt if HWS hang is detected
    167  */
    168 extern int halt_if_hws_hang;
    169 
    170 /*
    171  * Whether MEC FW support GWS barriers
    172  */
    173 extern bool hws_gws_support;
    174 
    175 /*
    176  * Queue preemption timeout in ms
    177  */
    178 extern int queue_preemption_timeout_ms;
    179 
    180 enum cache_policy {
    181 	cache_policy_coherent,
    182 	cache_policy_noncoherent
    183 };
    184 
    185 #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
    186 
    187 struct kfd_event_interrupt_class {
    188 	bool (*interrupt_isr)(struct kfd_dev *dev,
    189 			const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
    190 			bool *patched_flag);
    191 	void (*interrupt_wq)(struct kfd_dev *dev,
    192 			const uint32_t *ih_ring_entry);
    193 };
    194 
    195 struct kfd_device_info {
    196 	enum amd_asic_type asic_family;
    197 	const char *asic_name;
    198 	const struct kfd_event_interrupt_class *event_interrupt_class;
    199 	unsigned int max_pasid_bits;
    200 	unsigned int max_no_of_hqd;
    201 	unsigned int doorbell_size;
    202 	size_t ih_ring_entry_size;
    203 	uint8_t num_of_watch_points;
    204 	uint16_t mqd_size_aligned;
    205 	bool supports_cwsr;
    206 	bool needs_iommu_device;
    207 	bool needs_pci_atomics;
    208 	unsigned int num_sdma_engines;
    209 	unsigned int num_xgmi_sdma_engines;
    210 	unsigned int num_sdma_queues_per_engine;
    211 };
    212 
    213 struct kfd_mem_obj {
    214 	uint32_t range_start;
    215 	uint32_t range_end;
    216 	uint64_t gpu_addr;
    217 	uint32_t *cpu_ptr;
    218 	void *gtt_mem;
    219 };
    220 
    221 struct kfd_vmid_info {
    222 	uint32_t first_vmid_kfd;
    223 	uint32_t last_vmid_kfd;
    224 	uint32_t vmid_num_kfd;
    225 };
    226 
    227 struct kfd_dev {
    228 	struct kgd_dev *kgd;
    229 
    230 	const struct kfd_device_info *device_info;
    231 	struct pci_dev *pdev;
    232 	struct drm_device *ddev;
    233 
    234 	unsigned int id;		/* topology stub index */
    235 
    236 	phys_addr_t doorbell_base;	/* Start of actual doorbells used by
    237 					 * KFD. It is aligned for mapping
    238 					 * into user mode
    239 					 */
    240 	size_t doorbell_base_dw_offset;	/* Offset from the start of the PCI
    241 					 * doorbell BAR to the first KFD
    242 					 * doorbell in dwords. GFX reserves
    243 					 * the segment before this offset.
    244 					 */
    245 	u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
    246 					   * page used by kernel queue
    247 					   */
    248 
    249 	struct kgd2kfd_shared_resources shared_resources;
    250 	struct kfd_vmid_info vm_info;
    251 
    252 	const struct kfd2kgd_calls *kfd2kgd;
    253 	struct mutex doorbell_mutex;
    254 	DECLARE_BITMAP(doorbell_available_index,
    255 			KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
    256 
    257 	void *gtt_mem;
    258 	uint64_t gtt_start_gpu_addr;
    259 	void *gtt_start_cpu_ptr;
    260 	void *gtt_sa_bitmap;
    261 	struct mutex gtt_sa_lock;
    262 	unsigned int gtt_sa_chunk_size;
    263 	unsigned int gtt_sa_num_of_chunks;
    264 
    265 	/* Interrupts */
    266 	struct kfifo ih_fifo;
    267 	struct workqueue_struct *ih_wq;
    268 	struct work_struct interrupt_work;
    269 	spinlock_t interrupt_lock;
    270 
    271 	/* QCM Device instance */
    272 	struct device_queue_manager *dqm;
    273 
    274 	bool init_complete;
    275 	/*
    276 	 * Interrupts of interest to KFD are copied
    277 	 * from the HW ring into a SW ring.
    278 	 */
    279 	bool interrupts_active;
    280 
    281 	/* Debug manager */
    282 	struct kfd_dbgmgr *dbgmgr;
    283 
    284 	/* Firmware versions */
    285 	uint16_t mec_fw_version;
    286 	uint16_t sdma_fw_version;
    287 
    288 	/* Maximum process number mapped to HW scheduler */
    289 	unsigned int max_proc_per_quantum;
    290 
    291 	/* CWSR */
    292 	bool cwsr_enabled;
    293 	const void *cwsr_isa;
    294 	unsigned int cwsr_isa_size;
    295 
    296 	/* xGMI */
    297 	uint64_t hive_id;
    298 
    299 	bool pci_atomic_requested;
    300 
    301 	/* SRAM ECC flag */
    302 	atomic_t sram_ecc_flag;
    303 
    304 	/* Compute Profile ref. count */
    305 	atomic_t compute_profile;
    306 
    307 	/* Global GWS resource shared b/t processes*/
    308 	void *gws;
    309 };
    310 
    311 enum kfd_mempool {
    312 	KFD_MEMPOOL_SYSTEM_CACHEABLE = 1,
    313 	KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2,
    314 	KFD_MEMPOOL_FRAMEBUFFER = 3,
    315 };
    316 
    317 /* Character device interface */
    318 int kfd_chardev_init(void);
    319 void kfd_chardev_exit(void);
    320 struct device *kfd_chardev(void);
    321 
    322 /**
    323  * enum kfd_unmap_queues_filter
    324  *
    325  * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
    326  *
    327  * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
    328  *						running queues list.
    329  *
    330  * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
    331  *						specific process.
    332  *
    333  */
    334 enum kfd_unmap_queues_filter {
    335 	KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE,
    336 	KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
    337 	KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
    338 	KFD_UNMAP_QUEUES_FILTER_BY_PASID
    339 };
    340 
    341 /**
    342  * enum kfd_queue_type
    343  *
    344  * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type.
    345  *
    346  * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type.
    347  *
    348  * @KFD_QUEUE_TYPE_HIQ: HIQ queue type.
    349  *
    350  * @KFD_QUEUE_TYPE_DIQ: DIQ queue type.
    351  */
    352 enum kfd_queue_type  {
    353 	KFD_QUEUE_TYPE_COMPUTE,
    354 	KFD_QUEUE_TYPE_SDMA,
    355 	KFD_QUEUE_TYPE_HIQ,
    356 	KFD_QUEUE_TYPE_DIQ,
    357 	KFD_QUEUE_TYPE_SDMA_XGMI
    358 };
    359 
    360 enum kfd_queue_format {
    361 	KFD_QUEUE_FORMAT_PM4,
    362 	KFD_QUEUE_FORMAT_AQL
    363 };
    364 
    365 enum KFD_QUEUE_PRIORITY {
    366 	KFD_QUEUE_PRIORITY_MINIMUM = 0,
    367 	KFD_QUEUE_PRIORITY_MAXIMUM = 15
    368 };
    369 
    370 /**
    371  * struct queue_properties
    372  *
    373  * @type: The queue type.
    374  *
    375  * @queue_id: Queue identifier.
    376  *
    377  * @queue_address: Queue ring buffer address.
    378  *
    379  * @queue_size: Queue ring buffer size.
    380  *
    381  * @priority: Defines the queue priority relative to other queues in the
    382  * process.
    383  * This is just an indication and HW scheduling may override the priority as
    384  * necessary while keeping the relative prioritization.
    385  * the priority granularity is from 0 to f which f is the highest priority.
    386  * currently all queues are initialized with the highest priority.
    387  *
    388  * @queue_percent: This field is partially implemented and currently a zero in
    389  * this field defines that the queue is non active.
    390  *
    391  * @read_ptr: User space address which points to the number of dwords the
    392  * cp read from the ring buffer. This field updates automatically by the H/W.
    393  *
    394  * @write_ptr: Defines the number of dwords written to the ring buffer.
    395  *
    396  * @doorbell_ptr: This field aim is to notify the H/W of new packet written to
    397  * the queue ring buffer. This field should be similar to write_ptr and the
    398  * user should update this field after he updated the write_ptr.
    399  *
    400  * @doorbell_off: The doorbell offset in the doorbell pci-bar.
    401  *
    402  * @is_interop: Defines if this is a interop queue. Interop queue means that
    403  * the queue can access both graphics and compute resources.
    404  *
    405  * @is_evicted: Defines if the queue is evicted. Only active queues
    406  * are evicted, rendering them inactive.
    407  *
    408  * @is_active: Defines if the queue is active or not. @is_active and
    409  * @is_evicted are protected by the DQM lock.
    410  *
    411  * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
    412  * of the queue.
    413  *
    414  * This structure represents the queue properties for each queue no matter if
    415  * it's user mode or kernel mode queue.
    416  *
    417  */
    418 struct queue_properties {
    419 	enum kfd_queue_type type;
    420 	enum kfd_queue_format format;
    421 	unsigned int queue_id;
    422 	uint64_t queue_address;
    423 	uint64_t  queue_size;
    424 	uint32_t priority;
    425 	uint32_t queue_percent;
    426 	uint32_t *read_ptr;
    427 	uint32_t *write_ptr;
    428 	void __iomem *doorbell_ptr;
    429 	uint32_t doorbell_off;
    430 	bool is_interop;
    431 	bool is_evicted;
    432 	bool is_active;
    433 	/* Not relevant for user mode queues in cp scheduling */
    434 	unsigned int vmid;
    435 	/* Relevant only for sdma queues*/
    436 	uint32_t sdma_engine_id;
    437 	uint32_t sdma_queue_id;
    438 	uint32_t sdma_vm_addr;
    439 	/* Relevant only for VI */
    440 	uint64_t eop_ring_buffer_address;
    441 	uint32_t eop_ring_buffer_size;
    442 	uint64_t ctx_save_restore_area_address;
    443 	uint32_t ctx_save_restore_area_size;
    444 	uint32_t ctl_stack_size;
    445 	uint64_t tba_addr;
    446 	uint64_t tma_addr;
    447 	/* Relevant for CU */
    448 	uint32_t cu_mask_count; /* Must be a multiple of 32 */
    449 	uint32_t *cu_mask;
    450 };
    451 
    452 #define QUEUE_IS_ACTIVE(q) ((q).queue_size > 0 &&	\
    453 			    (q).queue_address != 0 &&	\
    454 			    (q).queue_percent > 0 &&	\
    455 			    !(q).is_evicted)
    456 
    457 /**
    458  * struct queue
    459  *
    460  * @list: Queue linked list.
    461  *
    462  * @mqd: The queue MQD.
    463  *
    464  * @mqd_mem_obj: The MQD local gpu memory object.
    465  *
    466  * @gart_mqd_addr: The MQD gart mc address.
    467  *
    468  * @properties: The queue properties.
    469  *
    470  * @mec: Used only in no cp scheduling mode and identifies to micro engine id
    471  *	 that the queue should be execute on.
    472  *
    473  * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
    474  *	  id.
    475  *
    476  * @queue: Used only in no cp scheduliong mode and identifies the queue's slot.
    477  *
    478  * @process: The kfd process that created this queue.
    479  *
    480  * @device: The kfd device that created this queue.
    481  *
    482  * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL
    483  * otherwise.
    484  *
    485  * This structure represents user mode compute queues.
    486  * It contains all the necessary data to handle such queues.
    487  *
    488  */
    489 
    490 struct queue {
    491 	struct list_head list;
    492 	void *mqd;
    493 	struct kfd_mem_obj *mqd_mem_obj;
    494 	uint64_t gart_mqd_addr;
    495 	struct queue_properties properties;
    496 
    497 	uint32_t mec;
    498 	uint32_t pipe;
    499 	uint32_t queue;
    500 
    501 	unsigned int sdma_id;
    502 	unsigned int doorbell_id;
    503 
    504 	struct kfd_process	*process;
    505 	struct kfd_dev		*device;
    506 	void *gws;
    507 };
    508 
    509 /*
    510  * Please read the kfd_mqd_manager.h description.
    511  */
    512 enum KFD_MQD_TYPE {
    513 	KFD_MQD_TYPE_HIQ = 0,		/* for hiq */
    514 	KFD_MQD_TYPE_CP,		/* for cp queues and diq */
    515 	KFD_MQD_TYPE_SDMA,		/* for sdma queues */
    516 	KFD_MQD_TYPE_DIQ,		/* for diq */
    517 	KFD_MQD_TYPE_MAX
    518 };
    519 
    520 enum KFD_PIPE_PRIORITY {
    521 	KFD_PIPE_PRIORITY_CS_LOW = 0,
    522 	KFD_PIPE_PRIORITY_CS_MEDIUM,
    523 	KFD_PIPE_PRIORITY_CS_HIGH
    524 };
    525 
    526 struct scheduling_resources {
    527 	unsigned int vmid_mask;
    528 	enum kfd_queue_type type;
    529 	uint64_t queue_mask;
    530 	uint64_t gws_mask;
    531 	uint32_t oac_mask;
    532 	uint32_t gds_heap_base;
    533 	uint32_t gds_heap_size;
    534 };
    535 
    536 struct process_queue_manager {
    537 	/* data */
    538 	struct kfd_process	*process;
    539 	struct list_head	queues;
    540 	unsigned long		*queue_slot_bitmap;
    541 };
    542 
    543 struct qcm_process_device {
    544 	/* The Device Queue Manager that owns this data */
    545 	struct device_queue_manager *dqm;
    546 	struct process_queue_manager *pqm;
    547 	/* Queues list */
    548 	struct list_head queues_list;
    549 	struct list_head priv_queue_list;
    550 
    551 	unsigned int queue_count;
    552 	unsigned int vmid;
    553 	bool is_debug;
    554 	unsigned int evicted; /* eviction counter, 0=active */
    555 
    556 	/* This flag tells if we should reset all wavefronts on
    557 	 * process termination
    558 	 */
    559 	bool reset_wavefronts;
    560 
    561 	/*
    562 	 * All the memory management data should be here too
    563 	 */
    564 	uint64_t gds_context_area;
    565 	/* Contains page table flags such as AMDGPU_PTE_VALID since gfx9 */
    566 	uint64_t page_table_base;
    567 	uint32_t sh_mem_config;
    568 	uint32_t sh_mem_bases;
    569 	uint32_t sh_mem_ape1_base;
    570 	uint32_t sh_mem_ape1_limit;
    571 	uint32_t gds_size;
    572 	uint32_t num_gws;
    573 	uint32_t num_oac;
    574 	uint32_t sh_hidden_private_base;
    575 
    576 	/* CWSR memory */
    577 	void *cwsr_kaddr;
    578 	uint64_t cwsr_base;
    579 	uint64_t tba_addr;
    580 	uint64_t tma_addr;
    581 
    582 	/* IB memory */
    583 	uint64_t ib_base;
    584 	void *ib_kaddr;
    585 
    586 	/* doorbell resources per process per device */
    587 	unsigned long *doorbell_bitmap;
    588 };
    589 
    590 /* KFD Memory Eviction */
    591 
    592 /* Approx. wait time before attempting to restore evicted BOs */
    593 #define PROCESS_RESTORE_TIME_MS 100
    594 /* Approx. back off time if restore fails due to lack of memory */
    595 #define PROCESS_BACK_OFF_TIME_MS 100
    596 /* Approx. time before evicting the process again */
    597 #define PROCESS_ACTIVE_TIME_MS 10
    598 
    599 /* 8 byte handle containing GPU ID in the most significant 4 bytes and
    600  * idr_handle in the least significant 4 bytes
    601  */
    602 #define MAKE_HANDLE(gpu_id, idr_handle) \
    603 	(((uint64_t)(gpu_id) << 32) + idr_handle)
    604 #define GET_GPU_ID(handle) (handle >> 32)
    605 #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF)
    606 
    607 enum kfd_pdd_bound {
    608 	PDD_UNBOUND = 0,
    609 	PDD_BOUND,
    610 	PDD_BOUND_SUSPENDED,
    611 };
    612 
    613 /* Data that is per-process-per device. */
    614 struct kfd_process_device {
    615 	/*
    616 	 * List of all per-device data for a process.
    617 	 * Starts from kfd_process.per_device_data.
    618 	 */
    619 	struct list_head per_device_list;
    620 
    621 	/* The device that owns this data. */
    622 	struct kfd_dev *dev;
    623 
    624 	/* The process that owns this kfd_process_device. */
    625 	struct kfd_process *process;
    626 
    627 	/* per-process-per device QCM data structure */
    628 	struct qcm_process_device qpd;
    629 
    630 	/*Apertures*/
    631 	uint64_t lds_base;
    632 	uint64_t lds_limit;
    633 	uint64_t gpuvm_base;
    634 	uint64_t gpuvm_limit;
    635 	uint64_t scratch_base;
    636 	uint64_t scratch_limit;
    637 
    638 	/* VM context for GPUVM allocations */
    639 	struct file *drm_file;
    640 	void *vm;
    641 
    642 	/* GPUVM allocations storage */
    643 	struct idr alloc_idr;
    644 
    645 	/* Flag used to tell the pdd has dequeued from the dqm.
    646 	 * This is used to prevent dev->dqm->ops.process_termination() from
    647 	 * being called twice when it is already called in IOMMU callback
    648 	 * function.
    649 	 */
    650 	bool already_dequeued;
    651 
    652 	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
    653 	enum kfd_pdd_bound bound;
    654 };
    655 
    656 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
    657 
    658 /* Process data */
    659 struct kfd_process {
    660 	/*
    661 	 * kfd_process are stored in an mm_struct*->kfd_process*
    662 	 * hash table (kfd_processes in kfd_process.c)
    663 	 */
    664 	struct hlist_node kfd_processes;
    665 
    666 	/*
    667 	 * Opaque pointer to mm_struct. We don't hold a reference to
    668 	 * it so it should never be dereferenced from here. This is
    669 	 * only used for looking up processes by their mm.
    670 	 */
    671 	void *mm;
    672 
    673 	struct kref ref;
    674 	struct work_struct release_work;
    675 
    676 	struct mutex mutex;
    677 
    678 	/*
    679 	 * In any process, the thread that started main() is the lead
    680 	 * thread and outlives the rest.
    681 	 * It is here because amd_iommu_bind_pasid wants a task_struct.
    682 	 * It can also be used for safely getting a reference to the
    683 	 * mm_struct of the process.
    684 	 */
    685 	struct task_struct *lead_thread;
    686 
    687 	/* We want to receive a notification when the mm_struct is destroyed */
    688 	struct mmu_notifier mmu_notifier;
    689 
    690 	uint16_t pasid;
    691 	unsigned int doorbell_index;
    692 
    693 	/*
    694 	 * List of kfd_process_device structures,
    695 	 * one for each device the process is using.
    696 	 */
    697 	struct list_head per_device_data;
    698 
    699 	struct process_queue_manager pqm;
    700 
    701 	/*Is the user space process 32 bit?*/
    702 	bool is_32bit_user_mode;
    703 
    704 	/* Event-related data */
    705 	struct mutex event_mutex;
    706 	/* Event ID allocator and lookup */
    707 	struct idr event_idr;
    708 	/* Event page */
    709 	struct kfd_signal_page *signal_page;
    710 	size_t signal_mapped_size;
    711 	size_t signal_event_count;
    712 	bool signal_event_limit_reached;
    713 
    714 	/* Information used for memory eviction */
    715 	void *kgd_process_info;
    716 	/* Eviction fence that is attached to all the BOs of this process. The
    717 	 * fence will be triggered during eviction and new one will be created
    718 	 * during restore
    719 	 */
    720 	struct dma_fence *ef;
    721 
    722 	/* Work items for evicting and restoring BOs */
    723 	struct delayed_work eviction_work;
    724 	struct delayed_work restore_work;
    725 	/* seqno of the last scheduled eviction */
    726 	unsigned int last_eviction_seqno;
    727 	/* Approx. the last timestamp (in jiffies) when the process was
    728 	 * restored after an eviction
    729 	 */
    730 	unsigned long last_restore_timestamp;
    731 
    732 	/* Kobj for our procfs */
    733 	struct kobject *kobj;
    734 	struct attribute attr_pasid;
    735 };
    736 
    737 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
    738 extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE);
    739 extern struct srcu_struct kfd_processes_srcu;
    740 
    741 /**
    742  * Ioctl function type.
    743  *
    744  * \param filep pointer to file structure.
    745  * \param p amdkfd process pointer.
    746  * \param data pointer to arg that was copied from user.
    747  */
    748 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p,
    749 				void *data);
    750 
    751 struct amdkfd_ioctl_desc {
    752 	unsigned int cmd;
    753 	int flags;
    754 	amdkfd_ioctl_t *func;
    755 	unsigned int cmd_drv;
    756 	const char *name;
    757 };
    758 bool kfd_dev_is_large_bar(struct kfd_dev *dev);
    759 
    760 int kfd_process_create_wq(void);
    761 void kfd_process_destroy_wq(void);
    762 struct kfd_process *kfd_create_process(struct file *filep);
    763 struct kfd_process *kfd_get_process(const struct task_struct *);
    764 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
    765 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
    766 void kfd_unref_process(struct kfd_process *p);
    767 int kfd_process_evict_queues(struct kfd_process *p);
    768 int kfd_process_restore_queues(struct kfd_process *p);
    769 void kfd_suspend_all_processes(void);
    770 int kfd_resume_all_processes(void);
    771 
    772 int kfd_process_device_init_vm(struct kfd_process_device *pdd,
    773 			       struct file *drm_file);
    774 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
    775 						struct kfd_process *p);
    776 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
    777 							struct kfd_process *p);
    778 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
    779 							struct kfd_process *p);
    780 
    781 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
    782 			  struct vm_area_struct *vma);
    783 
    784 /* KFD process API for creating and translating handles */
    785 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd,
    786 					void *mem);
    787 void *kfd_process_device_translate_handle(struct kfd_process_device *p,
    788 					int handle);
    789 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd,
    790 					int handle);
    791 
    792 /* Process device data iterator */
    793 struct kfd_process_device *kfd_get_first_process_device_data(
    794 							struct kfd_process *p);
    795 struct kfd_process_device *kfd_get_next_process_device_data(
    796 						struct kfd_process *p,
    797 						struct kfd_process_device *pdd);
    798 bool kfd_has_process_device_data(struct kfd_process *p);
    799 
    800 /* PASIDs */
    801 int kfd_pasid_init(void);
    802 void kfd_pasid_exit(void);
    803 bool kfd_set_pasid_limit(unsigned int new_limit);
    804 unsigned int kfd_get_pasid_limit(void);
    805 unsigned int kfd_pasid_alloc(void);
    806 void kfd_pasid_free(unsigned int pasid);
    807 
    808 /* Doorbells */
    809 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
    810 int kfd_doorbell_init(struct kfd_dev *kfd);
    811 void kfd_doorbell_fini(struct kfd_dev *kfd);
    812 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
    813 		      struct vm_area_struct *vma);
    814 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
    815 					unsigned int *doorbell_off);
    816 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
    817 u32 read_kernel_doorbell(u32 __iomem *db);
    818 void write_kernel_doorbell(void __iomem *db, u32 value);
    819 void write_kernel_doorbell64(void __iomem *db, u64 value);
    820 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
    821 					struct kfd_process *process,
    822 					unsigned int doorbell_id);
    823 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
    824 					struct kfd_process *process);
    825 int kfd_alloc_process_doorbells(struct kfd_process *process);
    826 void kfd_free_process_doorbells(struct kfd_process *process);
    827 
    828 /* GTT Sub-Allocator */
    829 
    830 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
    831 			struct kfd_mem_obj **mem_obj);
    832 
    833 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj);
    834 
    835 extern struct device *kfd_device;
    836 
    837 /* KFD's procfs */
    838 void kfd_procfs_init(void);
    839 void kfd_procfs_shutdown(void);
    840 
    841 /* Topology */
    842 int kfd_topology_init(void);
    843 void kfd_topology_shutdown(void);
    844 int kfd_topology_add_device(struct kfd_dev *gpu);
    845 int kfd_topology_remove_device(struct kfd_dev *gpu);
    846 struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
    847 						uint32_t proximity_domain);
    848 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
    849 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
    850 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
    851 struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
    852 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
    853 int kfd_numa_node_to_apic_id(int numa_node_id);
    854 
    855 /* Interrupts */
    856 int kfd_interrupt_init(struct kfd_dev *dev);
    857 void kfd_interrupt_exit(struct kfd_dev *dev);
    858 bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry);
    859 bool interrupt_is_wanted(struct kfd_dev *dev,
    860 				const uint32_t *ih_ring_entry,
    861 				uint32_t *patched_ihre, bool *flag);
    862 
    863 /* amdkfd Apertures */
    864 int kfd_init_apertures(struct kfd_process *process);
    865 
    866 /* Queue Context Management */
    867 int init_queue(struct queue **q, const struct queue_properties *properties);
    868 void uninit_queue(struct queue *q);
    869 void print_queue_properties(struct queue_properties *q);
    870 void print_queue(struct queue *q);
    871 
    872 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
    873 		struct kfd_dev *dev);
    874 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type,
    875 		struct kfd_dev *dev);
    876 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
    877 		struct kfd_dev *dev);
    878 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
    879 		struct kfd_dev *dev);
    880 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
    881 		struct kfd_dev *dev);
    882 struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type,
    883 		struct kfd_dev *dev);
    884 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
    885 void device_queue_manager_uninit(struct device_queue_manager *dqm);
    886 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
    887 					enum kfd_queue_type type);
    888 void kernel_queue_uninit(struct kernel_queue *kq, bool hanging);
    889 int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid);
    890 
    891 /* Process Queue Manager */
    892 struct process_queue_node {
    893 	struct queue *q;
    894 	struct kernel_queue *kq;
    895 	struct list_head process_queue_list;
    896 };
    897 
    898 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
    899 void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
    900 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
    901 void pqm_uninit(struct process_queue_manager *pqm);
    902 int pqm_create_queue(struct process_queue_manager *pqm,
    903 			    struct kfd_dev *dev,
    904 			    struct file *f,
    905 			    struct queue_properties *properties,
    906 			    unsigned int *qid,
    907 			    uint32_t *p_doorbell_offset_in_process);
    908 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
    909 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
    910 			struct queue_properties *p);
    911 int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
    912 			struct queue_properties *p);
    913 int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid,
    914 			void *gws);
    915 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
    916 						unsigned int qid);
    917 int pqm_get_wave_state(struct process_queue_manager *pqm,
    918 		       unsigned int qid,
    919 		       void __user *ctl_stack,
    920 		       u32 *ctl_stack_used_size,
    921 		       u32 *save_area_used_size);
    922 
    923 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
    924 			      unsigned int fence_value,
    925 			      unsigned int timeout_ms);
    926 
    927 /* Packet Manager */
    928 
    929 #define KFD_FENCE_COMPLETED (100)
    930 #define KFD_FENCE_INIT   (10)
    931 
    932 struct packet_manager {
    933 	struct device_queue_manager *dqm;
    934 	struct kernel_queue *priv_queue;
    935 	struct mutex lock;
    936 	bool allocated;
    937 	struct kfd_mem_obj *ib_buffer_obj;
    938 	unsigned int ib_size_bytes;
    939 	bool is_over_subscription;
    940 
    941 	const struct packet_manager_funcs *pmf;
    942 };
    943 
    944 struct packet_manager_funcs {
    945 	/* Support ASIC-specific packet formats for PM4 packets */
    946 	int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
    947 			struct qcm_process_device *qpd);
    948 	int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
    949 			uint64_t ib, size_t ib_size_in_dwords, bool chain);
    950 	int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
    951 			struct scheduling_resources *res);
    952 	int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
    953 			struct queue *q, bool is_static);
    954 	int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
    955 			enum kfd_queue_type type,
    956 			enum kfd_unmap_queues_filter mode,
    957 			uint32_t filter_param, bool reset,
    958 			unsigned int sdma_engine);
    959 	int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
    960 			uint64_t fence_address,	uint32_t fence_value);
    961 	int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
    962 
    963 	/* Packet sizes */
    964 	int map_process_size;
    965 	int runlist_size;
    966 	int set_resources_size;
    967 	int map_queues_size;
    968 	int unmap_queues_size;
    969 	int query_status_size;
    970 	int release_mem_size;
    971 };
    972 
    973 extern const struct packet_manager_funcs kfd_vi_pm_funcs;
    974 extern const struct packet_manager_funcs kfd_v9_pm_funcs;
    975 
    976 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
    977 void pm_uninit(struct packet_manager *pm, bool hanging);
    978 int pm_send_set_resources(struct packet_manager *pm,
    979 				struct scheduling_resources *res);
    980 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
    981 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
    982 				uint32_t fence_value);
    983 
    984 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
    985 			enum kfd_unmap_queues_filter mode,
    986 			uint32_t filter_param, bool reset,
    987 			unsigned int sdma_engine);
    988 
    989 void pm_release_ib(struct packet_manager *pm);
    990 
    991 /* Following PM funcs can be shared among VI and AI */
    992 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
    993 
    994 uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
    995 
    996 /* Events */
    997 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
    998 extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
    999 
   1000 extern const struct kfd_device_global_init_class device_global_init_class_cik;
   1001 
   1002 void kfd_event_init_process(struct kfd_process *p);
   1003 void kfd_event_free_process(struct kfd_process *p);
   1004 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
   1005 int kfd_wait_on_events(struct kfd_process *p,
   1006 		       uint32_t num_events, void __user *data,
   1007 		       bool all, uint32_t user_timeout_ms,
   1008 		       uint32_t *wait_result);
   1009 void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
   1010 				uint32_t valid_id_bits);
   1011 void kfd_signal_iommu_event(struct kfd_dev *dev,
   1012 		unsigned int pasid, unsigned long address,
   1013 		bool is_write_requested, bool is_execute_requested);
   1014 void kfd_signal_hw_exception_event(unsigned int pasid);
   1015 int kfd_set_event(struct kfd_process *p, uint32_t event_id);
   1016 int kfd_reset_event(struct kfd_process *p, uint32_t event_id);
   1017 int kfd_event_page_set(struct kfd_process *p, void *kernel_address,
   1018 		       uint64_t size);
   1019 int kfd_event_create(struct file *devkfd, struct kfd_process *p,
   1020 		     uint32_t event_type, bool auto_reset, uint32_t node_id,
   1021 		     uint32_t *event_id, uint32_t *event_trigger_data,
   1022 		     uint64_t *event_page_offset, uint32_t *event_slot_index);
   1023 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id);
   1024 
   1025 void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
   1026 				struct kfd_vm_fault_info *info);
   1027 
   1028 void kfd_signal_reset_event(struct kfd_dev *dev);
   1029 
   1030 void kfd_flush_tlb(struct kfd_process_device *pdd);
   1031 
   1032 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
   1033 
   1034 bool kfd_is_locked(void);
   1035 
   1036 /* Compute profile */
   1037 void kfd_inc_compute_active(struct kfd_dev *dev);
   1038 void kfd_dec_compute_active(struct kfd_dev *dev);
   1039 
   1040 /* Cgroup Support */
   1041 /* Check with device cgroup if @kfd device is accessible */
   1042 static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
   1043 {
   1044 #if defined(CONFIG_CGROUP_DEVICE)
   1045 	struct drm_device *ddev = kfd->ddev;
   1046 
   1047 	return devcgroup_check_permission(DEVCG_DEV_CHAR, ddev->driver->major,
   1048 					  ddev->render->index,
   1049 					  DEVCG_ACC_WRITE | DEVCG_ACC_READ);
   1050 #else
   1051 	return 0;
   1052 #endif
   1053 }
   1054 
   1055 /* Debugfs */
   1056 #if defined(CONFIG_DEBUG_FS)
   1057 
   1058 void kfd_debugfs_init(void);
   1059 void kfd_debugfs_fini(void);
   1060 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data);
   1061 int pqm_debugfs_mqds(struct seq_file *m, void *data);
   1062 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data);
   1063 int dqm_debugfs_hqds(struct seq_file *m, void *data);
   1064 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data);
   1065 int pm_debugfs_runlist(struct seq_file *m, void *data);
   1066 
   1067 int kfd_debugfs_hang_hws(struct kfd_dev *dev);
   1068 int pm_debugfs_hang_hws(struct packet_manager *pm);
   1069 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm);
   1070 
   1071 #else
   1072 
   1073 static inline void kfd_debugfs_init(void) {}
   1074 static inline void kfd_debugfs_fini(void) {}
   1075 
   1076 #endif
   1077 
   1078 #endif
   1079