1 1.4 riastrad /* $NetBSD: i915_memcpy.c,v 1.4 2021/12/19 11:33:49 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /* 4 1.1 riastrad * Copyright 2016 Intel Corporation 5 1.1 riastrad * 6 1.1 riastrad * Permission is hereby granted, free of charge, to any person obtaining a 7 1.1 riastrad * copy of this software and associated documentation files (the "Software"), 8 1.1 riastrad * to deal in the Software without restriction, including without limitation 9 1.1 riastrad * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 1.1 riastrad * and/or sell copies of the Software, and to permit persons to whom the 11 1.1 riastrad * Software is furnished to do so, subject to the following conditions: 12 1.1 riastrad * 13 1.1 riastrad * The above copyright notice and this permission notice (including the next 14 1.1 riastrad * paragraph) shall be included in all copies or substantial portions of the 15 1.1 riastrad * Software. 16 1.1 riastrad * 17 1.1 riastrad * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 1.1 riastrad * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 1.1 riastrad * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 1.1 riastrad * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 1.1 riastrad * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 1.1 riastrad * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 23 1.1 riastrad * IN THE SOFTWARE. 24 1.1 riastrad * 25 1.1 riastrad */ 26 1.1 riastrad 27 1.1 riastrad #include <sys/cdefs.h> 28 1.4 riastrad __KERNEL_RCSID(0, "$NetBSD: i915_memcpy.c,v 1.4 2021/12/19 11:33:49 riastradh Exp $"); 29 1.1 riastrad 30 1.1 riastrad #include <linux/kernel.h> 31 1.1 riastrad #include <asm/fpu/api.h> 32 1.1 riastrad 33 1.1 riastrad #include "i915_memcpy.h" 34 1.1 riastrad 35 1.4 riastrad #include <linux/nbsd-namespace.h> 36 1.4 riastrad 37 1.1 riastrad #if IS_ENABLED(CONFIG_DRM_I915_DEBUG) 38 1.1 riastrad #define CI_BUG_ON(expr) BUG_ON(expr) 39 1.1 riastrad #else 40 1.1 riastrad #define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) 41 1.1 riastrad #endif 42 1.1 riastrad 43 1.1 riastrad static DEFINE_STATIC_KEY_FALSE(has_movntdqa); 44 1.1 riastrad 45 1.1 riastrad #ifdef CONFIG_AS_MOVNTDQA 46 1.1 riastrad static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) 47 1.1 riastrad { 48 1.1 riastrad kernel_fpu_begin(); 49 1.1 riastrad 50 1.1 riastrad while (len >= 4) { 51 1.1 riastrad asm("movntdqa (%0), %%xmm0\n" 52 1.1 riastrad "movntdqa 16(%0), %%xmm1\n" 53 1.1 riastrad "movntdqa 32(%0), %%xmm2\n" 54 1.1 riastrad "movntdqa 48(%0), %%xmm3\n" 55 1.1 riastrad "movaps %%xmm0, (%1)\n" 56 1.1 riastrad "movaps %%xmm1, 16(%1)\n" 57 1.1 riastrad "movaps %%xmm2, 32(%1)\n" 58 1.1 riastrad "movaps %%xmm3, 48(%1)\n" 59 1.1 riastrad :: "r" (src), "r" (dst) : "memory"); 60 1.1 riastrad src += 64; 61 1.1 riastrad dst += 64; 62 1.1 riastrad len -= 4; 63 1.1 riastrad } 64 1.1 riastrad while (len--) { 65 1.1 riastrad asm("movntdqa (%0), %%xmm0\n" 66 1.1 riastrad "movaps %%xmm0, (%1)\n" 67 1.1 riastrad :: "r" (src), "r" (dst) : "memory"); 68 1.1 riastrad src += 16; 69 1.1 riastrad dst += 16; 70 1.1 riastrad } 71 1.1 riastrad 72 1.1 riastrad kernel_fpu_end(); 73 1.1 riastrad } 74 1.1 riastrad 75 1.1 riastrad static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len) 76 1.1 riastrad { 77 1.1 riastrad kernel_fpu_begin(); 78 1.1 riastrad 79 1.1 riastrad while (len >= 4) { 80 1.1 riastrad asm("movntdqa (%0), %%xmm0\n" 81 1.1 riastrad "movntdqa 16(%0), %%xmm1\n" 82 1.1 riastrad "movntdqa 32(%0), %%xmm2\n" 83 1.1 riastrad "movntdqa 48(%0), %%xmm3\n" 84 1.1 riastrad "movups %%xmm0, (%1)\n" 85 1.1 riastrad "movups %%xmm1, 16(%1)\n" 86 1.1 riastrad "movups %%xmm2, 32(%1)\n" 87 1.1 riastrad "movups %%xmm3, 48(%1)\n" 88 1.1 riastrad :: "r" (src), "r" (dst) : "memory"); 89 1.1 riastrad src += 64; 90 1.1 riastrad dst += 64; 91 1.1 riastrad len -= 4; 92 1.1 riastrad } 93 1.1 riastrad while (len--) { 94 1.1 riastrad asm("movntdqa (%0), %%xmm0\n" 95 1.1 riastrad "movups %%xmm0, (%1)\n" 96 1.1 riastrad :: "r" (src), "r" (dst) : "memory"); 97 1.1 riastrad src += 16; 98 1.1 riastrad dst += 16; 99 1.1 riastrad } 100 1.1 riastrad 101 1.1 riastrad kernel_fpu_end(); 102 1.1 riastrad } 103 1.1 riastrad #else 104 1.1 riastrad static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) {} 105 1.1 riastrad static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len) {} 106 1.1 riastrad #endif 107 1.1 riastrad 108 1.1 riastrad /** 109 1.1 riastrad * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC 110 1.1 riastrad * @dst: destination pointer 111 1.1 riastrad * @src: source pointer 112 1.1 riastrad * @len: how many bytes to copy 113 1.1 riastrad * 114 1.1 riastrad * i915_memcpy_from_wc copies @len bytes from @src to @dst using 115 1.1 riastrad * non-temporal instructions where available. Note that all arguments 116 1.1 riastrad * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple 117 1.1 riastrad * of 16. 118 1.1 riastrad * 119 1.1 riastrad * To test whether accelerated reads from WC are supported, use 120 1.1 riastrad * i915_memcpy_from_wc(NULL, NULL, 0); 121 1.1 riastrad * 122 1.1 riastrad * Returns true if the copy was successful, false if the preconditions 123 1.1 riastrad * are not met. 124 1.1 riastrad */ 125 1.1 riastrad bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) 126 1.1 riastrad { 127 1.1 riastrad if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) 128 1.1 riastrad return false; 129 1.1 riastrad 130 1.1 riastrad if (static_branch_likely(&has_movntdqa)) { 131 1.1 riastrad if (likely(len)) 132 1.1 riastrad __memcpy_ntdqa(dst, src, len >> 4); 133 1.1 riastrad return true; 134 1.1 riastrad } 135 1.1 riastrad 136 1.1 riastrad return false; 137 1.1 riastrad } 138 1.1 riastrad 139 1.1 riastrad /** 140 1.1 riastrad * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC 141 1.1 riastrad * @dst: destination pointer 142 1.1 riastrad * @src: source pointer 143 1.1 riastrad * @len: how many bytes to copy 144 1.1 riastrad * 145 1.1 riastrad * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from 146 1.1 riastrad * @src to @dst using * non-temporal instructions where available, but 147 1.1 riastrad * accepts that its arguments may not be aligned, but are valid for the 148 1.1 riastrad * potential 16-byte read past the end. 149 1.1 riastrad */ 150 1.1 riastrad void i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len) 151 1.1 riastrad { 152 1.1 riastrad unsigned long addr; 153 1.1 riastrad 154 1.1 riastrad CI_BUG_ON(!i915_has_memcpy_from_wc()); 155 1.1 riastrad 156 1.1 riastrad addr = (unsigned long)src; 157 1.1 riastrad if (!IS_ALIGNED(addr, 16)) { 158 1.1 riastrad unsigned long x = min(ALIGN(addr, 16) - addr, len); 159 1.1 riastrad 160 1.1 riastrad memcpy(dst, src, x); 161 1.1 riastrad 162 1.1 riastrad len -= x; 163 1.1 riastrad dst += x; 164 1.1 riastrad src += x; 165 1.1 riastrad } 166 1.1 riastrad 167 1.1 riastrad if (likely(len)) 168 1.1 riastrad __memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16)); 169 1.1 riastrad } 170 1.1 riastrad 171 1.1 riastrad void i915_memcpy_init_early(struct drm_i915_private *dev_priv) 172 1.1 riastrad { 173 1.3 riastrad #ifdef CONFIG_AS_MOVNTDQA 174 1.1 riastrad /* 175 1.1 riastrad * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions 176 1.1 riastrad * emulation. So don't enable movntdqa in hypervisor guest. 177 1.1 riastrad */ 178 1.1 riastrad if (static_cpu_has(X86_FEATURE_XMM4_1) && 179 1.1 riastrad !boot_cpu_has(X86_FEATURE_HYPERVISOR)) 180 1.1 riastrad static_branch_enable(&has_movntdqa); 181 1.3 riastrad #endif 182 1.1 riastrad } 183