octeon_rnm.c revision 1.5 1 /* $NetBSD: octeon_rnm.c,v 1.5 2020/05/13 21:09:02 riastradh Exp $ */
2
3 /*
4 * Copyright (c) 2007 Internet Initiative Japan, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * Cavium Octeon Random Number Generator / Random Number Memory `RNM'
31 *
32 * The RNM unit consists of:
33 *
34 * 1. 128 ring oscillators
35 * 2. an LFSR/SHA-1 conditioner
36 * 3. a 512-byte FIFO
37 *
38 * When the unit is enabled, there are three modes of operation:
39 *
40 * (a) deterministic: the ring oscillators are disabled and the
41 * LFSR/SHA-1 conditioner operates on fixed inputs to give
42 * reproducible results for testing,
43 *
44 * (b) conditioned entropy: the ring oscillators are enabled and
45 * samples from them are fed through the LFSR/SHA-1
46 * conditioner before being put into the FIFO, and
47 *
48 * (c) raw entropy: the ring oscillators are enabled, and a group
49 * of eight of them selected at any one time is sampled and
50 * fed into the FIFO.
51 *
52 * Details:
53 *
54 * - The FIFO is refilled whenever we read out of it, either with
55 * a load address or an IOBDMA operation.
56 *
57 * - The conditioner takes 81 cycles to produce a 64-bit block of
58 * output in the FIFO whether in deterministic or conditioned
59 * entropy mode, each block consisting of the first 64 bits of a
60 * SHA-1 hash.
61 *
62 * - A group of eight ring oscillators take 8 cycles to produce a
63 * 64-bit block of output in the FIFO in raw entropy mode, each
64 * block consisting of eight consecutive samples from each RO in
65 * parallel.
66 *
67 * The first sample of each RO always seems to be zero. Further,
68 * consecutive samples from a single ring oscillator are not
69 * independent, so naive debiasing like a von Neumann extractor
70 * falls flat on its face.
71 *
72 * We read out one FIFO's worth of raw samples from all 128 ring
73 * oscillators by going through them round-robin, and without a
74 * more detailed assessment of the jitter on the physical devices,
75 * we assume it takes a couple thousand samples of ring
76 * oscillators (one bit per sample) to reach one bit of entropy,
77 * so we read out 8 KB to get about 256 bits of entropy.
78 *
79 * We could use the on-board LFSR/SHA-1 conditioner, but it's not
80 * clear how many RO samples go into the conditioner, and our
81 * entropy pool is a perfectly good conditioner itself, so it
82 * seems there is little advantage -- other than expedience -- to
83 * using the LFSR/SHA-1 conditioner.
84 *
85 * Reference: Cavium Networks OCTEON Plus CN50XX Hardware Reference
86 * Manual, CN50XX-HM-0.99E PRELIMINARY, July 2008.
87 */
88
89 #include <sys/cdefs.h>
90 __KERNEL_RCSID(0, "$NetBSD: octeon_rnm.c,v 1.5 2020/05/13 21:09:02 riastradh Exp $");
91
92 #include <sys/param.h>
93 #include <sys/device.h>
94 #include <sys/kernel.h>
95 #include <sys/rndsource.h>
96 #include <sys/systm.h>
97
98 #include <mips/locore.h>
99 #include <mips/cavium/include/iobusvar.h>
100 #include <mips/cavium/dev/octeon_rnmreg.h>
101 #include <mips/cavium/dev/octeon_corereg.h>
102 #include <mips/cavium/octeonvar.h>
103
104 #include <sys/bus.h>
105
106 //#define OCTEON_RNM_DEBUG
107
108 #define ENT_DELAY_CLOCK 8 /* cycles for each 64-bit RO sample batch */
109 #define RNG_DELAY_CLOCK 81 /* cycles for each SHA-1 output */
110 #define NROGROUPS 16
111 #define RNG_FIFO_WORDS (512/sizeof(uint64_t))
112
113 struct octeon_rnm_softc {
114 bus_space_tag_t sc_bust;
115 bus_space_handle_t sc_regh;
116 kmutex_t sc_lock;
117 krndsource_t sc_rndsrc; /* /dev/random source */
118 unsigned sc_rogroup;
119 };
120
121 static int octeon_rnm_match(device_t, struct cfdata *, void *);
122 static void octeon_rnm_attach(device_t, device_t, void *);
123 static void octeon_rnm_rng(size_t, void *);
124 static void octeon_rnm_reset(struct octeon_rnm_softc *);
125 static void octeon_rnm_conditioned_deterministic(struct octeon_rnm_softc *);
126 static void octeon_rnm_conditioned_entropy(struct octeon_rnm_softc *);
127 static void octeon_rnm_raw_entropy(struct octeon_rnm_softc *, unsigned);
128 static uint64_t octeon_rnm_load(struct octeon_rnm_softc *);
129 static void octeon_rnm_iobdma(struct octeon_rnm_softc *, uint64_t *, unsigned);
130 static void octeon_rnm_delay(uint32_t);
131
132 CFATTACH_DECL_NEW(octeon_rnm, sizeof(struct octeon_rnm_softc),
133 octeon_rnm_match, octeon_rnm_attach, NULL, NULL);
134
135 static int
136 octeon_rnm_match(device_t parent, struct cfdata *cf, void *aux)
137 {
138 struct iobus_attach_args *aa = aux;
139
140 if (strcmp(cf->cf_name, aa->aa_name) != 0)
141 return 0;
142 if (cf->cf_unit != aa->aa_unitno)
143 return 0;
144 return 1;
145 }
146
147 static void
148 octeon_rnm_attach(device_t parent, device_t self, void *aux)
149 {
150 struct octeon_rnm_softc *sc = device_private(self);
151 struct iobus_attach_args *aa = aux;
152 uint64_t bist_status, sample, expected = UINT64_C(0xd654ff35fadf866b);
153
154 aprint_normal("\n");
155
156 /* Map the device registers, all two of them. */
157 sc->sc_bust = aa->aa_bust;
158 if (bus_space_map(aa->aa_bust, aa->aa_unit->addr, RNM_SIZE,
159 0, &sc->sc_regh) != 0) {
160 aprint_error_dev(self, "unable to map device\n");
161 return;
162 }
163
164 /* Verify that the built-in self-test succeeded. */
165 bist_status = bus_space_read_8(sc->sc_bust, sc->sc_regh,
166 RNM_BIST_STATUS_OFFSET);
167 if (bist_status) {
168 aprint_error_dev(self, "RNG built in self test failed: %#lx\n",
169 bist_status);
170 return;
171 }
172
173 /* Create a mutex to serialize access to the FIFO. */
174 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_VM);
175
176 /*
177 * Reset the core, enable the RNG engine without entropy, wait
178 * 81 cycles for it to produce a single sample, and draw the
179 * deterministic sample to test.
180 *
181 * XXX Verify that the output matches the SHA-1 computation
182 * described by the data sheet, not just a known answer.
183 */
184 octeon_rnm_reset(sc);
185 octeon_rnm_conditioned_deterministic(sc);
186 octeon_rnm_delay(RNG_DELAY_CLOCK*1);
187 sample = octeon_rnm_load(sc);
188 if (sample != expected)
189 aprint_error_dev(self, "self-test: read %016"PRIx64","
190 " expected %016"PRIx64, sample, expected);
191
192 /*
193 * Reset the core again to clear the FIFO, and enable the RNG
194 * engine with entropy exposed directly. Start from the first
195 * group of ring oscillators; as we gather samples we will
196 * rotate through the rest of them.
197 */
198 octeon_rnm_reset(sc);
199 sc->sc_rogroup = 0;
200 octeon_rnm_raw_entropy(sc, sc->sc_rogroup);
201 octeon_rnm_delay(ENT_DELAY_CLOCK*RNG_FIFO_WORDS);
202
203 /* Attach the rndsource. */
204 rndsource_setcb(&sc->sc_rndsrc, octeon_rnm_rng, sc);
205 rnd_attach_source(&sc->sc_rndsrc, device_xname(self), RND_TYPE_RNG,
206 RND_FLAG_DEFAULT | RND_FLAG_HASCB);
207 }
208
209 static void
210 octeon_rnm_rng(size_t nbytes, void *vsc)
211 {
212 /* Assume we need 2048 RO samples to get one bit of entropy. */
213 const unsigned BPB = 2048;
214 uint64_t sample[32];
215 struct octeon_rnm_softc *sc = vsc;
216 size_t needed = NBBY*nbytes;
217 unsigned i;
218
219 /* Sample the ring oscillators round-robin. */
220 mutex_enter(&sc->sc_lock);
221 while (needed) {
222 /*
223 * Switch to the next RO group once we drain the FIFO.
224 * By the time rnd_add_data is done, we will have
225 * processed all 512 bytes of the FIFO. We assume it
226 * takes at least one cycle per byte (realistically,
227 * more like ~80cpb to draw from the FIFO and then
228 * process it with rnd_add_data), so there is no need
229 * for any other delays.
230 */
231 sc->sc_rogroup++;
232 sc->sc_rogroup %= NROGROUPS;
233 octeon_rnm_raw_entropy(sc, sc->sc_rogroup);
234
235 /*
236 * Gather half the FIFO at a time -- we are limited to
237 * 256 bytes because of limits on the CVMSEG buffer.
238 */
239 CTASSERT(sizeof sample == 256);
240 CTASSERT(2*__arraycount(sample) == RNG_FIFO_WORDS);
241 for (i = 0; i < 2; i++) {
242 octeon_rnm_iobdma(sc, sample, __arraycount(sample));
243 #ifdef OCTEON_RNM_DEBUG
244 hexdump(printf, "rnm", sample, sizeof sample);
245 #endif
246 rnd_add_data_sync(&sc->sc_rndsrc, sample,
247 sizeof sample, NBBY*sizeof(sample)/BPB);
248 needed -= MIN(needed, MAX(1, NBBY*sizeof(sample)/BPB));
249 }
250
251 /* Yield if requested. */
252 if (__predict_false(curcpu()->ci_schedstate.spc_flags &
253 SPCF_SHOULDYIELD)) {
254 mutex_exit(&sc->sc_lock);
255 preempt();
256 mutex_enter(&sc->sc_lock);
257 }
258 }
259 mutex_exit(&sc->sc_lock);
260
261 /* Zero the sample. */
262 explicit_memset(sample, 0, sizeof sample);
263 }
264
265 /*
266 * octeon_rnm_reset(sc)
267 *
268 * Reset the RNM unit, disabling it and clearing the FIFO.
269 */
270 static void
271 octeon_rnm_reset(struct octeon_rnm_softc *sc)
272 {
273
274 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
275 RNM_CTL_STATUS_RNG_RST|RNM_CTL_STATUS_RNM_RST);
276 }
277
278 /*
279 * octeon_rnm_conditioned_deterministic(sc)
280 *
281 * Switch the RNM unit into the deterministic LFSR/SHA-1 mode with
282 * no entropy, for the next data loaded into the FIFO.
283 */
284 static void
285 octeon_rnm_conditioned_deterministic(struct octeon_rnm_softc *sc)
286 {
287
288 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
289 RNM_CTL_STATUS_RNG_EN);
290 }
291
292 /*
293 * octeon_rnm_conditioned_entropy(sc)
294 *
295 * Switch the RNM unit to generate ring oscillator samples
296 * conditioned with an LFSR/SHA-1, for the next data loaded into
297 * the FIFO.
298 */
299 static void __unused
300 octeon_rnm_conditioned_entropy(struct octeon_rnm_softc *sc)
301 {
302
303 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
304 RNM_CTL_STATUS_RNG_EN|RNM_CTL_STATUS_ENT_EN);
305 }
306
307 /*
308 * octeon_rnm_raw_entropy(sc, rogroup)
309 *
310 * Switch the RNM unit to generate raw ring oscillator samples
311 * from the specified group of eight ring oscillator.
312 */
313 static void
314 octeon_rnm_raw_entropy(struct octeon_rnm_softc *sc, unsigned rogroup)
315 {
316 uint64_t ctl = 0;
317
318 ctl |= RNM_CTL_STATUS_RNG_EN; /* enable FIFO */
319 ctl |= RNM_CTL_STATUS_ENT_EN; /* enable entropy source */
320 ctl |= RNM_CTL_STATUS_EXP_ENT; /* expose entropy without LFSR/SHA-1 */
321 ctl |= __SHIFTIN(rogroup, RNM_CTL_STATUS_ENT_SEL_MASK);
322
323 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
324 ctl);
325 }
326
327 /*
328 * octeon_rnm_load(sc)
329 *
330 * Load a single 64-bit word out of the FIFO.
331 */
332 static uint64_t
333 octeon_rnm_load(struct octeon_rnm_softc *sc)
334 {
335 uint64_t addr =
336 RNM_OPERATION_BASE_IO_BIT |
337 __BITS64_SET(RNM_OPERATION_BASE_MAJOR_DID, 0x08) |
338 __BITS64_SET(RNM_OPERATION_BASE_SUB_DID, 0x00);
339
340 return octeon_xkphys_read_8(addr);
341 }
342
343 /*
344 * octeon_rnm_iobdma(sc, buf, nwords)
345 *
346 * Load nwords, at most 32, out of the FIFO into buf.
347 */
348 static void
349 octeon_rnm_iobdma(struct octeon_rnm_softc *sc, uint64_t *buf, unsigned nwords)
350 {
351 size_t scraddr = OCTEON_CVMSEG_OFFSET(csm_rnm);
352 uint64_t iobdma =
353 __SHIFTIN(scraddr/sizeof(uint64_t), IOBDMA_SCRADDR) |
354 __SHIFTIN(nwords, IOBDMA_LEN) |
355 __SHIFTIN(RNM_IOBDMA_MAJORDID, IOBDMA_MAJORDID) |
356 __SHIFTIN(RNM_IOBDMA_SUBDID, IOBDMA_SUBDID);
357
358 KASSERT(nwords < 256); /* iobdma address restriction */
359 KASSERT(nwords <= 32); /* octeon_cvmseg_map limitation */
360
361 octeon_iobdma_write_8(iobdma);
362 OCTEON_SYNCIOBDMA;
363 for (; nwords --> 0; scraddr += 8)
364 *buf++ = octeon_cvmseg_read_8(scraddr);
365 }
366
367 /*
368 * octeon_rnm_delay(ncycles)
369 *
370 * Wait ncycles, at most UINT32_MAX/2 so we behave reasonably even
371 * if the cycle counter rolls over.
372 */
373 static void
374 octeon_rnm_delay(uint32_t ncycles)
375 {
376 uint32_t deadline = mips3_cp0_count_read() + ncycles;
377
378 KASSERT(ncycles <= UINT32_MAX/2);
379
380 while ((deadline - mips3_cp0_count_read()) < ncycles)
381 continue;
382 }
383