1 1.17 gutterid /* $NetBSD: octeon_rnm.c,v 1.17 2025/01/30 02:15:50 gutteridge Exp $ */ 2 1.1 hikaru 3 1.1 hikaru /* 4 1.1 hikaru * Copyright (c) 2007 Internet Initiative Japan, Inc. 5 1.1 hikaru * All rights reserved. 6 1.1 hikaru * 7 1.1 hikaru * Redistribution and use in source and binary forms, with or without 8 1.1 hikaru * modification, are permitted provided that the following conditions 9 1.1 hikaru * are met: 10 1.1 hikaru * 1. Redistributions of source code must retain the above copyright 11 1.1 hikaru * notice, this list of conditions and the following disclaimer. 12 1.1 hikaru * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 hikaru * notice, this list of conditions and the following disclaimer in the 14 1.1 hikaru * documentation and/or other materials provided with the distribution. 15 1.1 hikaru * 16 1.1 hikaru * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 17 1.1 hikaru * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 1.1 hikaru * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 1.1 hikaru * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 20 1.1 hikaru * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 1.1 hikaru * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 1.1 hikaru * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 1.1 hikaru * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 1.1 hikaru * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 1.1 hikaru * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 1.1 hikaru * SUCH DAMAGE. 27 1.1 hikaru */ 28 1.1 hikaru 29 1.5 riastrad /* 30 1.5 riastrad * Cavium Octeon Random Number Generator / Random Number Memory `RNM' 31 1.5 riastrad * 32 1.5 riastrad * The RNM unit consists of: 33 1.5 riastrad * 34 1.5 riastrad * 1. 128 ring oscillators 35 1.5 riastrad * 2. an LFSR/SHA-1 conditioner 36 1.5 riastrad * 3. a 512-byte FIFO 37 1.5 riastrad * 38 1.5 riastrad * When the unit is enabled, there are three modes of operation: 39 1.5 riastrad * 40 1.5 riastrad * (a) deterministic: the ring oscillators are disabled and the 41 1.5 riastrad * LFSR/SHA-1 conditioner operates on fixed inputs to give 42 1.5 riastrad * reproducible results for testing, 43 1.5 riastrad * 44 1.5 riastrad * (b) conditioned entropy: the ring oscillators are enabled and 45 1.5 riastrad * samples from them are fed through the LFSR/SHA-1 46 1.5 riastrad * conditioner before being put into the FIFO, and 47 1.5 riastrad * 48 1.5 riastrad * (c) raw entropy: the ring oscillators are enabled, and a group 49 1.5 riastrad * of eight of them selected at any one time is sampled and 50 1.5 riastrad * fed into the FIFO. 51 1.5 riastrad * 52 1.5 riastrad * Details: 53 1.5 riastrad * 54 1.5 riastrad * - The FIFO is refilled whenever we read out of it, either with 55 1.5 riastrad * a load address or an IOBDMA operation. 56 1.5 riastrad * 57 1.5 riastrad * - The conditioner takes 81 cycles to produce a 64-bit block of 58 1.5 riastrad * output in the FIFO whether in deterministic or conditioned 59 1.5 riastrad * entropy mode, each block consisting of the first 64 bits of a 60 1.5 riastrad * SHA-1 hash. 61 1.5 riastrad * 62 1.5 riastrad * - A group of eight ring oscillators take 8 cycles to produce a 63 1.5 riastrad * 64-bit block of output in the FIFO in raw entropy mode, each 64 1.5 riastrad * block consisting of eight consecutive samples from each RO in 65 1.5 riastrad * parallel. 66 1.5 riastrad * 67 1.5 riastrad * The first sample of each RO always seems to be zero. Further, 68 1.5 riastrad * consecutive samples from a single ring oscillator are not 69 1.5 riastrad * independent, so naive debiasing like a von Neumann extractor 70 1.6 riastrad * falls flat on its face. And parallel ring oscillators powered 71 1.6 riastrad * by the same source may not be independent either, if they end 72 1.6 riastrad * up locked. 73 1.6 riastrad * 74 1.6 riastrad * We read out one FIFO's worth of raw samples from groups of 8 75 1.6 riastrad * ring oscillators at a time, of 128 total, by going through them 76 1.6 riastrad * round robin. We take 32 consecutive samples from each ring 77 1.6 riastrad * oscillator in a group of 8 in parallel before we count one bit 78 1.6 riastrad * of entropy. To get 256 bits of entropy, we read 4Kbit of data 79 1.6 riastrad * from each of two 8-RO groups. 80 1.6 riastrad * 81 1.6 riastrad * We could use the on-board LFSR/SHA-1 conditioner like the Linux 82 1.6 riastrad * driver written by Cavium does, but it's not clear how many RO 83 1.6 riastrad * samples go into the conditioner, and our entropy pool is a 84 1.6 riastrad * perfectly good conditioner itself, so it seems there is little 85 1.6 riastrad * advantage -- other than expedience -- to using the LFSR/SHA-1 86 1.6 riastrad * conditioner. All the manual says is that it samples 125 of the 87 1.6 riastrad * 128 ROs. But the Cavium SHA-1 CPU instruction is advertised to 88 1.6 riastrad * have a latency of 100 cycles, so it seems implausible that much 89 1.6 riastrad * more than one sample from each RO could be squeezed in there. 90 1.6 riastrad * 91 1.6 riastrad * The hardware exposes only 64 bits of each SHA-1 hash, and the 92 1.6 riastrad * Linux driver uses 32 bits of that -- which, if treated as full 93 1.6 riastrad * entropy, would mean an assessment of 3.9 bits of RO samples to 94 1.6 riastrad * get 1 bit of entropy, whereas we take 256 bits of RO samples to 95 1.6 riastrad * get one bit of entropy, so this seems reasonably conservative. 96 1.5 riastrad * 97 1.5 riastrad * Reference: Cavium Networks OCTEON Plus CN50XX Hardware Reference 98 1.5 riastrad * Manual, CN50XX-HM-0.99E PRELIMINARY, July 2008. 99 1.5 riastrad */ 100 1.5 riastrad 101 1.1 hikaru #include <sys/cdefs.h> 102 1.17 gutterid __KERNEL_RCSID(0, "$NetBSD: octeon_rnm.c,v 1.17 2025/01/30 02:15:50 gutteridge Exp $"); 103 1.1 hikaru 104 1.1 hikaru #include <sys/param.h> 105 1.1 hikaru #include <sys/device.h> 106 1.1 hikaru #include <sys/kernel.h> 107 1.1 hikaru #include <sys/rndsource.h> 108 1.4 simonb #include <sys/systm.h> 109 1.1 hikaru 110 1.1 hikaru #include <mips/locore.h> 111 1.12 simonb #include <mips/cavium/octeonreg.h> 112 1.12 simonb #include <mips/cavium/octeonvar.h> 113 1.1 hikaru #include <mips/cavium/include/iobusvar.h> 114 1.1 hikaru #include <mips/cavium/dev/octeon_rnmreg.h> 115 1.1 hikaru #include <mips/cavium/dev/octeon_corereg.h> 116 1.1 hikaru 117 1.1 hikaru #include <sys/bus.h> 118 1.1 hikaru 119 1.9 simonb //#define OCTRNM_DEBUG 120 1.5 riastrad 121 1.5 riastrad #define ENT_DELAY_CLOCK 8 /* cycles for each 64-bit RO sample batch */ 122 1.16 riastrad #define LFSR_DELAY_CLOCK 81 /* cycles to fill LFSR buffer */ 123 1.16 riastrad #define SHA1_DELAY_CLOCK 81 /* cycles to compute SHA-1 output */ 124 1.5 riastrad #define NROGROUPS 16 125 1.5 riastrad #define RNG_FIFO_WORDS (512/sizeof(uint64_t)) 126 1.1 hikaru 127 1.9 simonb struct octrnm_softc { 128 1.7 simonb uint64_t sc_sample[RNG_FIFO_WORDS]; 129 1.1 hikaru bus_space_tag_t sc_bust; 130 1.1 hikaru bus_space_handle_t sc_regh; 131 1.1 hikaru krndsource_t sc_rndsrc; /* /dev/random source */ 132 1.5 riastrad unsigned sc_rogroup; 133 1.1 hikaru }; 134 1.1 hikaru 135 1.9 simonb static int octrnm_match(device_t, struct cfdata *, void *); 136 1.9 simonb static void octrnm_attach(device_t, device_t, void *); 137 1.9 simonb static void octrnm_rng(size_t, void *); 138 1.9 simonb static void octrnm_reset(struct octrnm_softc *); 139 1.9 simonb static void octrnm_conditioned_deterministic(struct octrnm_softc *); 140 1.9 simonb static void octrnm_conditioned_entropy(struct octrnm_softc *); 141 1.9 simonb static void octrnm_raw_entropy(struct octrnm_softc *, unsigned); 142 1.9 simonb static uint64_t octrnm_load(struct octrnm_softc *); 143 1.9 simonb static void octrnm_iobdma(struct octrnm_softc *, uint64_t *, unsigned); 144 1.9 simonb static void octrnm_delay(uint32_t); 145 1.1 hikaru 146 1.9 simonb CFATTACH_DECL_NEW(octrnm, sizeof(struct octrnm_softc), 147 1.9 simonb octrnm_match, octrnm_attach, NULL, NULL); 148 1.1 hikaru 149 1.1 hikaru static int 150 1.9 simonb octrnm_match(device_t parent, struct cfdata *cf, void *aux) 151 1.1 hikaru { 152 1.1 hikaru struct iobus_attach_args *aa = aux; 153 1.1 hikaru 154 1.1 hikaru if (strcmp(cf->cf_name, aa->aa_name) != 0) 155 1.5 riastrad return 0; 156 1.1 hikaru if (cf->cf_unit != aa->aa_unitno) 157 1.5 riastrad return 0; 158 1.5 riastrad return 1; 159 1.1 hikaru } 160 1.1 hikaru 161 1.1 hikaru static void 162 1.9 simonb octrnm_attach(device_t parent, device_t self, void *aux) 163 1.1 hikaru { 164 1.9 simonb struct octrnm_softc *sc = device_private(self); 165 1.1 hikaru struct iobus_attach_args *aa = aux; 166 1.5 riastrad uint64_t bist_status, sample, expected = UINT64_C(0xd654ff35fadf866b); 167 1.1 hikaru 168 1.1 hikaru aprint_normal("\n"); 169 1.1 hikaru 170 1.5 riastrad /* Map the device registers, all two of them. */ 171 1.1 hikaru sc->sc_bust = aa->aa_bust; 172 1.3 simonb if (bus_space_map(aa->aa_bust, aa->aa_unit->addr, RNM_SIZE, 173 1.3 simonb 0, &sc->sc_regh) != 0) { 174 1.3 simonb aprint_error_dev(self, "unable to map device\n"); 175 1.3 simonb return; 176 1.3 simonb } 177 1.3 simonb 178 1.5 riastrad /* Verify that the built-in self-test succeeded. */ 179 1.3 simonb bist_status = bus_space_read_8(sc->sc_bust, sc->sc_regh, 180 1.3 simonb RNM_BIST_STATUS_OFFSET); 181 1.3 simonb if (bist_status) { 182 1.3 simonb aprint_error_dev(self, "RNG built in self test failed: %#lx\n", 183 1.3 simonb bist_status); 184 1.3 simonb return; 185 1.3 simonb } 186 1.1 hikaru 187 1.4 simonb /* 188 1.5 riastrad * Reset the core, enable the RNG engine without entropy, wait 189 1.5 riastrad * 81 cycles for it to produce a single sample, and draw the 190 1.5 riastrad * deterministic sample to test. 191 1.4 simonb * 192 1.5 riastrad * XXX Verify that the output matches the SHA-1 computation 193 1.5 riastrad * described by the data sheet, not just a known answer. 194 1.4 simonb */ 195 1.9 simonb octrnm_reset(sc); 196 1.9 simonb octrnm_conditioned_deterministic(sc); 197 1.16 riastrad octrnm_delay(LFSR_DELAY_CLOCK + SHA1_DELAY_CLOCK); 198 1.9 simonb sample = octrnm_load(sc); 199 1.5 riastrad if (sample != expected) 200 1.5 riastrad aprint_error_dev(self, "self-test: read %016"PRIx64"," 201 1.5 riastrad " expected %016"PRIx64, sample, expected); 202 1.4 simonb 203 1.4 simonb /* 204 1.5 riastrad * Reset the core again to clear the FIFO, and enable the RNG 205 1.5 riastrad * engine with entropy exposed directly. Start from the first 206 1.5 riastrad * group of ring oscillators; as we gather samples we will 207 1.5 riastrad * rotate through the rest of them. 208 1.4 simonb */ 209 1.9 simonb octrnm_reset(sc); 210 1.5 riastrad sc->sc_rogroup = 0; 211 1.9 simonb octrnm_raw_entropy(sc, sc->sc_rogroup); 212 1.9 simonb octrnm_delay(ENT_DELAY_CLOCK*RNG_FIFO_WORDS); 213 1.4 simonb 214 1.5 riastrad /* Attach the rndsource. */ 215 1.9 simonb rndsource_setcb(&sc->sc_rndsrc, octrnm_rng, sc); 216 1.4 simonb rnd_attach_source(&sc->sc_rndsrc, device_xname(self), RND_TYPE_RNG, 217 1.4 simonb RND_FLAG_DEFAULT | RND_FLAG_HASCB); 218 1.1 hikaru } 219 1.1 hikaru 220 1.1 hikaru static void 221 1.9 simonb octrnm_rng(size_t nbytes, void *vsc) 222 1.1 hikaru { 223 1.6 riastrad const unsigned BPB = 256; /* bits of data per bit of entropy */ 224 1.9 simonb struct octrnm_softc *sc = vsc; 225 1.7 simonb uint64_t *samplepos; 226 1.5 riastrad size_t needed = NBBY*nbytes; 227 1.5 riastrad unsigned i; 228 1.1 hikaru 229 1.5 riastrad /* Sample the ring oscillators round-robin. */ 230 1.5 riastrad while (needed) { 231 1.5 riastrad /* 232 1.5 riastrad * Switch to the next RO group once we drain the FIFO. 233 1.5 riastrad * By the time rnd_add_data is done, we will have 234 1.5 riastrad * processed all 512 bytes of the FIFO. We assume it 235 1.5 riastrad * takes at least one cycle per byte (realistically, 236 1.5 riastrad * more like ~80cpb to draw from the FIFO and then 237 1.5 riastrad * process it with rnd_add_data), so there is no need 238 1.5 riastrad * for any other delays. 239 1.5 riastrad */ 240 1.5 riastrad sc->sc_rogroup++; 241 1.5 riastrad sc->sc_rogroup %= NROGROUPS; 242 1.9 simonb octrnm_raw_entropy(sc, sc->sc_rogroup); 243 1.5 riastrad 244 1.1 hikaru /* 245 1.17 gutterid * Gather a quarter of the FIFO at a time -- we are limited 246 1.7 simonb * to 128 bytes because of limits on the CVMSEG buffer. 247 1.1 hikaru */ 248 1.7 simonb CTASSERT(sizeof sc->sc_sample == 512); 249 1.7 simonb CTASSERT(__arraycount(sc->sc_sample) == RNG_FIFO_WORDS); 250 1.7 simonb for (samplepos = sc->sc_sample, i = 0; i < 4; i++) { 251 1.9 simonb octrnm_iobdma(sc, samplepos, RNG_FIFO_WORDS / 4); 252 1.7 simonb samplepos += RNG_FIFO_WORDS / 4; 253 1.7 simonb } 254 1.9 simonb #ifdef OCTRNM_DEBUG 255 1.7 simonb hexdump(printf, "rnm", sc->sc_sample, sizeof sc->sc_sample); 256 1.5 riastrad #endif 257 1.7 simonb rnd_add_data_sync(&sc->sc_rndsrc, sc->sc_sample, 258 1.7 simonb sizeof sc->sc_sample, NBBY*sizeof(sc->sc_sample)/BPB); 259 1.7 simonb needed -= MIN(needed, MAX(1, NBBY*sizeof(sc->sc_sample)/BPB)); 260 1.5 riastrad 261 1.15 riastrad /* Now's a good time to yield. */ 262 1.15 riastrad preempt_point(); 263 1.1 hikaru } 264 1.5 riastrad 265 1.5 riastrad /* Zero the sample. */ 266 1.7 simonb explicit_memset(sc->sc_sample, 0, sizeof sc->sc_sample); 267 1.1 hikaru } 268 1.1 hikaru 269 1.5 riastrad /* 270 1.9 simonb * octrnm_reset(sc) 271 1.5 riastrad * 272 1.5 riastrad * Reset the RNM unit, disabling it and clearing the FIFO. 273 1.5 riastrad */ 274 1.5 riastrad static void 275 1.9 simonb octrnm_reset(struct octrnm_softc *sc) 276 1.5 riastrad { 277 1.5 riastrad 278 1.5 riastrad bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 279 1.5 riastrad RNM_CTL_STATUS_RNG_RST|RNM_CTL_STATUS_RNM_RST); 280 1.5 riastrad } 281 1.5 riastrad 282 1.5 riastrad /* 283 1.9 simonb * octrnm_conditioned_deterministic(sc) 284 1.5 riastrad * 285 1.5 riastrad * Switch the RNM unit into the deterministic LFSR/SHA-1 mode with 286 1.5 riastrad * no entropy, for the next data loaded into the FIFO. 287 1.5 riastrad */ 288 1.5 riastrad static void 289 1.9 simonb octrnm_conditioned_deterministic(struct octrnm_softc *sc) 290 1.5 riastrad { 291 1.5 riastrad 292 1.5 riastrad bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 293 1.5 riastrad RNM_CTL_STATUS_RNG_EN); 294 1.5 riastrad } 295 1.5 riastrad 296 1.5 riastrad /* 297 1.9 simonb * octrnm_conditioned_entropy(sc) 298 1.5 riastrad * 299 1.5 riastrad * Switch the RNM unit to generate ring oscillator samples 300 1.5 riastrad * conditioned with an LFSR/SHA-1, for the next data loaded into 301 1.5 riastrad * the FIFO. 302 1.5 riastrad */ 303 1.5 riastrad static void __unused 304 1.9 simonb octrnm_conditioned_entropy(struct octrnm_softc *sc) 305 1.5 riastrad { 306 1.5 riastrad 307 1.5 riastrad bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 308 1.5 riastrad RNM_CTL_STATUS_RNG_EN|RNM_CTL_STATUS_ENT_EN); 309 1.5 riastrad } 310 1.5 riastrad 311 1.5 riastrad /* 312 1.9 simonb * octrnm_raw_entropy(sc, rogroup) 313 1.5 riastrad * 314 1.5 riastrad * Switch the RNM unit to generate raw ring oscillator samples 315 1.17 gutterid * from the specified group of eight ring oscillators. 316 1.5 riastrad */ 317 1.5 riastrad static void 318 1.9 simonb octrnm_raw_entropy(struct octrnm_softc *sc, unsigned rogroup) 319 1.5 riastrad { 320 1.5 riastrad uint64_t ctl = 0; 321 1.5 riastrad 322 1.5 riastrad ctl |= RNM_CTL_STATUS_RNG_EN; /* enable FIFO */ 323 1.5 riastrad ctl |= RNM_CTL_STATUS_ENT_EN; /* enable entropy source */ 324 1.5 riastrad ctl |= RNM_CTL_STATUS_EXP_ENT; /* expose entropy without LFSR/SHA-1 */ 325 1.5 riastrad ctl |= __SHIFTIN(rogroup, RNM_CTL_STATUS_ENT_SEL_MASK); 326 1.5 riastrad 327 1.5 riastrad bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET, 328 1.5 riastrad ctl); 329 1.5 riastrad } 330 1.5 riastrad 331 1.5 riastrad /* 332 1.9 simonb * octrnm_load(sc) 333 1.5 riastrad * 334 1.5 riastrad * Load a single 64-bit word out of the FIFO. 335 1.5 riastrad */ 336 1.4 simonb static uint64_t 337 1.9 simonb octrnm_load(struct octrnm_softc *sc) 338 1.1 hikaru { 339 1.12 simonb uint64_t addr = OCTEON_ADDR_IO_DID(RNM_MAJOR_DID, RNM_SUB_DID); 340 1.1 hikaru 341 1.1 hikaru return octeon_xkphys_read_8(addr); 342 1.1 hikaru } 343 1.5 riastrad 344 1.5 riastrad /* 345 1.9 simonb * octrnm_iobdma(sc, buf, nwords) 346 1.5 riastrad * 347 1.5 riastrad * Load nwords, at most 32, out of the FIFO into buf. 348 1.5 riastrad */ 349 1.5 riastrad static void 350 1.9 simonb octrnm_iobdma(struct octrnm_softc *sc, uint64_t *buf, unsigned nwords) 351 1.5 riastrad { 352 1.12 simonb /* ``scraddr'' part is index in 64-bit words, not address */ 353 1.5 riastrad size_t scraddr = OCTEON_CVMSEG_OFFSET(csm_rnm); 354 1.12 simonb uint64_t iobdma = IOBDMA_CREATE(RNM_MAJOR_DID, RNM_SUB_DID, 355 1.12 simonb scraddr / sizeof(uint64_t), nwords, 0); 356 1.5 riastrad 357 1.10 simonb KASSERT(nwords < 128); /* iobdma address restriction */ 358 1.11 simonb KASSERT(nwords <= CVMSEG_LM_RNM_SIZE); /* size of CVMSEG LM buffer */ 359 1.5 riastrad 360 1.5 riastrad octeon_iobdma_write_8(iobdma); 361 1.5 riastrad OCTEON_SYNCIOBDMA; 362 1.17 gutterid for (; nwords-- > 0; scraddr += 8) 363 1.5 riastrad *buf++ = octeon_cvmseg_read_8(scraddr); 364 1.5 riastrad } 365 1.5 riastrad 366 1.5 riastrad /* 367 1.9 simonb * octrnm_delay(ncycles) 368 1.5 riastrad * 369 1.5 riastrad * Wait ncycles, at most UINT32_MAX/2 so we behave reasonably even 370 1.5 riastrad * if the cycle counter rolls over. 371 1.5 riastrad */ 372 1.5 riastrad static void 373 1.9 simonb octrnm_delay(uint32_t ncycles) 374 1.5 riastrad { 375 1.5 riastrad uint32_t deadline = mips3_cp0_count_read() + ncycles; 376 1.5 riastrad 377 1.5 riastrad KASSERT(ncycles <= UINT32_MAX/2); 378 1.5 riastrad 379 1.5 riastrad while ((deadline - mips3_cp0_count_read()) < ncycles) 380 1.5 riastrad continue; 381 1.5 riastrad } 382