Home | History | Annotate | Line # | Download | only in dev
octeon_rnm.c revision 1.5
      1 /*	$NetBSD: octeon_rnm.c,v 1.5 2020/05/13 21:09:02 riastradh Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2007 Internet Initiative Japan, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  * SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * Cavium Octeon Random Number Generator / Random Number Memory `RNM'
     31  *
     32  *	The RNM unit consists of:
     33  *
     34  *	1. 128 ring oscillators
     35  *	2. an LFSR/SHA-1 conditioner
     36  *	3. a 512-byte FIFO
     37  *
     38  *	When the unit is enabled, there are three modes of operation:
     39  *
     40  *	(a) deterministic: the ring oscillators are disabled and the
     41  *	    LFSR/SHA-1 conditioner operates on fixed inputs to give
     42  *	    reproducible results for testing,
     43  *
     44  *	(b) conditioned entropy: the ring oscillators are enabled and
     45  *	    samples from them are fed through the LFSR/SHA-1
     46  *	    conditioner before being put into the FIFO, and
     47  *
     48  *	(c) raw entropy: the ring oscillators are enabled, and a group
     49  *	    of eight of them selected at any one time is sampled and
     50  *	    fed into the FIFO.
     51  *
     52  *	Details:
     53  *
     54  *	- The FIFO is refilled whenever we read out of it, either with
     55  *	  a load address or an IOBDMA operation.
     56  *
     57  *	- The conditioner takes 81 cycles to produce a 64-bit block of
     58  *	  output in the FIFO whether in deterministic or conditioned
     59  *	  entropy mode, each block consisting of the first 64 bits of a
     60  *	  SHA-1 hash.
     61  *
     62  *	- A group of eight ring oscillators take 8 cycles to produce a
     63  *	  64-bit block of output in the FIFO in raw entropy mode, each
     64  *	  block consisting of eight consecutive samples from each RO in
     65  *	  parallel.
     66  *
     67  *	The first sample of each RO always seems to be zero.  Further,
     68  *	consecutive samples from a single ring oscillator are not
     69  *	independent, so naive debiasing like a von Neumann extractor
     70  *	falls flat on its face.
     71  *
     72  *	We read out one FIFO's worth of raw samples from all 128 ring
     73  *	oscillators by going through them round-robin, and without a
     74  *	more detailed assessment of the jitter on the physical devices,
     75  *	we assume it takes a couple thousand samples of ring
     76  *	oscillators (one bit per sample) to reach one bit of entropy,
     77  *	so we read out 8 KB to get about 256 bits of entropy.
     78  *
     79  *	We could use the on-board LFSR/SHA-1 conditioner, but it's not
     80  *	clear how many RO samples go into the conditioner, and our
     81  *	entropy pool is a perfectly good conditioner itself, so it
     82  *	seems there is little advantage -- other than expedience -- to
     83  *	using the LFSR/SHA-1 conditioner.
     84  *
     85  * Reference: Cavium Networks OCTEON Plus CN50XX Hardware Reference
     86  * Manual, CN50XX-HM-0.99E PRELIMINARY, July 2008.
     87  */
     88 
     89 #include <sys/cdefs.h>
     90 __KERNEL_RCSID(0, "$NetBSD: octeon_rnm.c,v 1.5 2020/05/13 21:09:02 riastradh Exp $");
     91 
     92 #include <sys/param.h>
     93 #include <sys/device.h>
     94 #include <sys/kernel.h>
     95 #include <sys/rndsource.h>
     96 #include <sys/systm.h>
     97 
     98 #include <mips/locore.h>
     99 #include <mips/cavium/include/iobusvar.h>
    100 #include <mips/cavium/dev/octeon_rnmreg.h>
    101 #include <mips/cavium/dev/octeon_corereg.h>
    102 #include <mips/cavium/octeonvar.h>
    103 
    104 #include <sys/bus.h>
    105 
    106 //#define	OCTEON_RNM_DEBUG
    107 
    108 #define	ENT_DELAY_CLOCK 8	/* cycles for each 64-bit RO sample batch */
    109 #define	RNG_DELAY_CLOCK 81	/* cycles for each SHA-1 output */
    110 #define	NROGROUPS	16
    111 #define	RNG_FIFO_WORDS	(512/sizeof(uint64_t))
    112 
    113 struct octeon_rnm_softc {
    114 	bus_space_tag_t		sc_bust;
    115 	bus_space_handle_t	sc_regh;
    116 	kmutex_t		sc_lock;
    117 	krndsource_t		sc_rndsrc;	/* /dev/random source */
    118 	unsigned		sc_rogroup;
    119 };
    120 
    121 static int octeon_rnm_match(device_t, struct cfdata *, void *);
    122 static void octeon_rnm_attach(device_t, device_t, void *);
    123 static void octeon_rnm_rng(size_t, void *);
    124 static void octeon_rnm_reset(struct octeon_rnm_softc *);
    125 static void octeon_rnm_conditioned_deterministic(struct octeon_rnm_softc *);
    126 static void octeon_rnm_conditioned_entropy(struct octeon_rnm_softc *);
    127 static void octeon_rnm_raw_entropy(struct octeon_rnm_softc *, unsigned);
    128 static uint64_t octeon_rnm_load(struct octeon_rnm_softc *);
    129 static void octeon_rnm_iobdma(struct octeon_rnm_softc *, uint64_t *, unsigned);
    130 static void octeon_rnm_delay(uint32_t);
    131 
    132 CFATTACH_DECL_NEW(octeon_rnm, sizeof(struct octeon_rnm_softc),
    133     octeon_rnm_match, octeon_rnm_attach, NULL, NULL);
    134 
    135 static int
    136 octeon_rnm_match(device_t parent, struct cfdata *cf, void *aux)
    137 {
    138 	struct iobus_attach_args *aa = aux;
    139 
    140 	if (strcmp(cf->cf_name, aa->aa_name) != 0)
    141 		return 0;
    142 	if (cf->cf_unit != aa->aa_unitno)
    143 		return 0;
    144 	return 1;
    145 }
    146 
    147 static void
    148 octeon_rnm_attach(device_t parent, device_t self, void *aux)
    149 {
    150 	struct octeon_rnm_softc *sc = device_private(self);
    151 	struct iobus_attach_args *aa = aux;
    152 	uint64_t bist_status, sample, expected = UINT64_C(0xd654ff35fadf866b);
    153 
    154 	aprint_normal("\n");
    155 
    156 	/* Map the device registers, all two of them.  */
    157 	sc->sc_bust = aa->aa_bust;
    158 	if (bus_space_map(aa->aa_bust, aa->aa_unit->addr, RNM_SIZE,
    159 	    0, &sc->sc_regh) != 0) {
    160 		aprint_error_dev(self, "unable to map device\n");
    161 		return;
    162 	}
    163 
    164 	/* Verify that the built-in self-test succeeded.  */
    165 	bist_status = bus_space_read_8(sc->sc_bust, sc->sc_regh,
    166 	    RNM_BIST_STATUS_OFFSET);
    167 	if (bist_status) {
    168 		aprint_error_dev(self, "RNG built in self test failed: %#lx\n",
    169 		    bist_status);
    170 		return;
    171 	}
    172 
    173 	/* Create a mutex to serialize access to the FIFO.  */
    174 	mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_VM);
    175 
    176 	/*
    177 	 * Reset the core, enable the RNG engine without entropy, wait
    178 	 * 81 cycles for it to produce a single sample, and draw the
    179 	 * deterministic sample to test.
    180 	 *
    181 	 * XXX Verify that the output matches the SHA-1 computation
    182 	 * described by the data sheet, not just a known answer.
    183 	 */
    184 	octeon_rnm_reset(sc);
    185 	octeon_rnm_conditioned_deterministic(sc);
    186 	octeon_rnm_delay(RNG_DELAY_CLOCK*1);
    187 	sample = octeon_rnm_load(sc);
    188 	if (sample != expected)
    189 		aprint_error_dev(self, "self-test: read %016"PRIx64","
    190 		    " expected %016"PRIx64, sample, expected);
    191 
    192 	/*
    193 	 * Reset the core again to clear the FIFO, and enable the RNG
    194 	 * engine with entropy exposed directly.  Start from the first
    195 	 * group of ring oscillators; as we gather samples we will
    196 	 * rotate through the rest of them.
    197 	 */
    198 	octeon_rnm_reset(sc);
    199 	sc->sc_rogroup = 0;
    200 	octeon_rnm_raw_entropy(sc, sc->sc_rogroup);
    201 	octeon_rnm_delay(ENT_DELAY_CLOCK*RNG_FIFO_WORDS);
    202 
    203 	/* Attach the rndsource.  */
    204 	rndsource_setcb(&sc->sc_rndsrc, octeon_rnm_rng, sc);
    205 	rnd_attach_source(&sc->sc_rndsrc, device_xname(self), RND_TYPE_RNG,
    206 	    RND_FLAG_DEFAULT | RND_FLAG_HASCB);
    207 }
    208 
    209 static void
    210 octeon_rnm_rng(size_t nbytes, void *vsc)
    211 {
    212 	/* Assume we need 2048 RO samples to get one bit of entropy.  */
    213 	const unsigned BPB = 2048;
    214 	uint64_t sample[32];
    215 	struct octeon_rnm_softc *sc = vsc;
    216 	size_t needed = NBBY*nbytes;
    217 	unsigned i;
    218 
    219 	/* Sample the ring oscillators round-robin.  */
    220 	mutex_enter(&sc->sc_lock);
    221 	while (needed) {
    222 		/*
    223 		 * Switch to the next RO group once we drain the FIFO.
    224 		 * By the time rnd_add_data is done, we will have
    225 		 * processed all 512 bytes of the FIFO.  We assume it
    226 		 * takes at least one cycle per byte (realistically,
    227 		 * more like ~80cpb to draw from the FIFO and then
    228 		 * process it with rnd_add_data), so there is no need
    229 		 * for any other delays.
    230 		 */
    231 		sc->sc_rogroup++;
    232 		sc->sc_rogroup %= NROGROUPS;
    233 		octeon_rnm_raw_entropy(sc, sc->sc_rogroup);
    234 
    235 		/*
    236 		 * Gather half the FIFO at a time -- we are limited to
    237 		 * 256 bytes because of limits on the CVMSEG buffer.
    238 		 */
    239 		CTASSERT(sizeof sample == 256);
    240 		CTASSERT(2*__arraycount(sample) == RNG_FIFO_WORDS);
    241 		for (i = 0; i < 2; i++) {
    242 			octeon_rnm_iobdma(sc, sample, __arraycount(sample));
    243 #ifdef OCTEON_RNM_DEBUG
    244 			hexdump(printf, "rnm", sample, sizeof sample);
    245 #endif
    246 			rnd_add_data_sync(&sc->sc_rndsrc, sample,
    247 			    sizeof sample, NBBY*sizeof(sample)/BPB);
    248 			needed -= MIN(needed, MAX(1, NBBY*sizeof(sample)/BPB));
    249 		}
    250 
    251 		/* Yield if requested.  */
    252 		if (__predict_false(curcpu()->ci_schedstate.spc_flags &
    253 			SPCF_SHOULDYIELD)) {
    254 			mutex_exit(&sc->sc_lock);
    255 			preempt();
    256 			mutex_enter(&sc->sc_lock);
    257 		}
    258 	}
    259 	mutex_exit(&sc->sc_lock);
    260 
    261 	/* Zero the sample.  */
    262 	explicit_memset(sample, 0, sizeof sample);
    263 }
    264 
    265 /*
    266  * octeon_rnm_reset(sc)
    267  *
    268  *	Reset the RNM unit, disabling it and clearing the FIFO.
    269  */
    270 static void
    271 octeon_rnm_reset(struct octeon_rnm_softc *sc)
    272 {
    273 
    274 	bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
    275 	    RNM_CTL_STATUS_RNG_RST|RNM_CTL_STATUS_RNM_RST);
    276 }
    277 
    278 /*
    279  * octeon_rnm_conditioned_deterministic(sc)
    280  *
    281  *	Switch the RNM unit into the deterministic LFSR/SHA-1 mode with
    282  *	no entropy, for the next data loaded into the FIFO.
    283  */
    284 static void
    285 octeon_rnm_conditioned_deterministic(struct octeon_rnm_softc *sc)
    286 {
    287 
    288 	bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
    289 	    RNM_CTL_STATUS_RNG_EN);
    290 }
    291 
    292 /*
    293  * octeon_rnm_conditioned_entropy(sc)
    294  *
    295  *	Switch the RNM unit to generate ring oscillator samples
    296  *	conditioned with an LFSR/SHA-1, for the next data loaded into
    297  *	the FIFO.
    298  */
    299 static void __unused
    300 octeon_rnm_conditioned_entropy(struct octeon_rnm_softc *sc)
    301 {
    302 
    303 	bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
    304 	    RNM_CTL_STATUS_RNG_EN|RNM_CTL_STATUS_ENT_EN);
    305 }
    306 
    307 /*
    308  * octeon_rnm_raw_entropy(sc, rogroup)
    309  *
    310  *	Switch the RNM unit to generate raw ring oscillator samples
    311  *	from the specified group of eight ring oscillator.
    312  */
    313 static void
    314 octeon_rnm_raw_entropy(struct octeon_rnm_softc *sc, unsigned rogroup)
    315 {
    316 	uint64_t ctl = 0;
    317 
    318 	ctl |= RNM_CTL_STATUS_RNG_EN;	/* enable FIFO */
    319 	ctl |= RNM_CTL_STATUS_ENT_EN;	/* enable entropy source */
    320 	ctl |= RNM_CTL_STATUS_EXP_ENT;	/* expose entropy without LFSR/SHA-1 */
    321 	ctl |= __SHIFTIN(rogroup, RNM_CTL_STATUS_ENT_SEL_MASK);
    322 
    323 	bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
    324 	    ctl);
    325 }
    326 
    327 /*
    328  * octeon_rnm_load(sc)
    329  *
    330  *	Load a single 64-bit word out of the FIFO.
    331  */
    332 static uint64_t
    333 octeon_rnm_load(struct octeon_rnm_softc *sc)
    334 {
    335 	uint64_t addr =
    336 	    RNM_OPERATION_BASE_IO_BIT |
    337 	    __BITS64_SET(RNM_OPERATION_BASE_MAJOR_DID, 0x08) |
    338 	    __BITS64_SET(RNM_OPERATION_BASE_SUB_DID, 0x00);
    339 
    340 	return octeon_xkphys_read_8(addr);
    341 }
    342 
    343 /*
    344  * octeon_rnm_iobdma(sc, buf, nwords)
    345  *
    346  *	Load nwords, at most 32, out of the FIFO into buf.
    347  */
    348 static void
    349 octeon_rnm_iobdma(struct octeon_rnm_softc *sc, uint64_t *buf, unsigned nwords)
    350 {
    351 	size_t scraddr = OCTEON_CVMSEG_OFFSET(csm_rnm);
    352 	uint64_t iobdma =
    353 	    __SHIFTIN(scraddr/sizeof(uint64_t), IOBDMA_SCRADDR) |
    354 	    __SHIFTIN(nwords, IOBDMA_LEN) |
    355 	    __SHIFTIN(RNM_IOBDMA_MAJORDID, IOBDMA_MAJORDID) |
    356 	    __SHIFTIN(RNM_IOBDMA_SUBDID, IOBDMA_SUBDID);
    357 
    358 	KASSERT(nwords < 256);	/* iobdma address restriction */
    359 	KASSERT(nwords <= 32);	/* octeon_cvmseg_map limitation */
    360 
    361 	octeon_iobdma_write_8(iobdma);
    362 	OCTEON_SYNCIOBDMA;
    363 	for (; nwords --> 0; scraddr += 8)
    364 		*buf++ = octeon_cvmseg_read_8(scraddr);
    365 }
    366 
    367 /*
    368  * octeon_rnm_delay(ncycles)
    369  *
    370  *	Wait ncycles, at most UINT32_MAX/2 so we behave reasonably even
    371  *	if the cycle counter rolls over.
    372  */
    373 static void
    374 octeon_rnm_delay(uint32_t ncycles)
    375 {
    376 	uint32_t deadline = mips3_cp0_count_read() + ncycles;
    377 
    378 	KASSERT(ncycles <= UINT32_MAX/2);
    379 
    380 	while ((deadline - mips3_cp0_count_read()) < ncycles)
    381 		continue;
    382 }
    383