octeon_rnm.c revision 1.9 1 /* $NetBSD: octeon_rnm.c,v 1.9 2020/05/31 06:27:06 simonb Exp $ */
2
3 /*
4 * Copyright (c) 2007 Internet Initiative Japan, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * Cavium Octeon Random Number Generator / Random Number Memory `RNM'
31 *
32 * The RNM unit consists of:
33 *
34 * 1. 128 ring oscillators
35 * 2. an LFSR/SHA-1 conditioner
36 * 3. a 512-byte FIFO
37 *
38 * When the unit is enabled, there are three modes of operation:
39 *
40 * (a) deterministic: the ring oscillators are disabled and the
41 * LFSR/SHA-1 conditioner operates on fixed inputs to give
42 * reproducible results for testing,
43 *
44 * (b) conditioned entropy: the ring oscillators are enabled and
45 * samples from them are fed through the LFSR/SHA-1
46 * conditioner before being put into the FIFO, and
47 *
48 * (c) raw entropy: the ring oscillators are enabled, and a group
49 * of eight of them selected at any one time is sampled and
50 * fed into the FIFO.
51 *
52 * Details:
53 *
54 * - The FIFO is refilled whenever we read out of it, either with
55 * a load address or an IOBDMA operation.
56 *
57 * - The conditioner takes 81 cycles to produce a 64-bit block of
58 * output in the FIFO whether in deterministic or conditioned
59 * entropy mode, each block consisting of the first 64 bits of a
60 * SHA-1 hash.
61 *
62 * - A group of eight ring oscillators take 8 cycles to produce a
63 * 64-bit block of output in the FIFO in raw entropy mode, each
64 * block consisting of eight consecutive samples from each RO in
65 * parallel.
66 *
67 * The first sample of each RO always seems to be zero. Further,
68 * consecutive samples from a single ring oscillator are not
69 * independent, so naive debiasing like a von Neumann extractor
70 * falls flat on its face. And parallel ring oscillators powered
71 * by the same source may not be independent either, if they end
72 * up locked.
73 *
74 * We read out one FIFO's worth of raw samples from groups of 8
75 * ring oscillators at a time, of 128 total, by going through them
76 * round robin. We take 32 consecutive samples from each ring
77 * oscillator in a group of 8 in parallel before we count one bit
78 * of entropy. To get 256 bits of entropy, we read 4Kbit of data
79 * from each of two 8-RO groups.
80 *
81 * We could use the on-board LFSR/SHA-1 conditioner like the Linux
82 * driver written by Cavium does, but it's not clear how many RO
83 * samples go into the conditioner, and our entropy pool is a
84 * perfectly good conditioner itself, so it seems there is little
85 * advantage -- other than expedience -- to using the LFSR/SHA-1
86 * conditioner. All the manual says is that it samples 125 of the
87 * 128 ROs. But the Cavium SHA-1 CPU instruction is advertised to
88 * have a latency of 100 cycles, so it seems implausible that much
89 * more than one sample from each RO could be squeezed in there.
90 *
91 * The hardware exposes only 64 bits of each SHA-1 hash, and the
92 * Linux driver uses 32 bits of that -- which, if treated as full
93 * entropy, would mean an assessment of 3.9 bits of RO samples to
94 * get 1 bit of entropy, whereas we take 256 bits of RO samples to
95 * get one bit of entropy, so this seems reasonably conservative.
96 *
97 * Reference: Cavium Networks OCTEON Plus CN50XX Hardware Reference
98 * Manual, CN50XX-HM-0.99E PRELIMINARY, July 2008.
99 */
100
101 #include <sys/cdefs.h>
102 __KERNEL_RCSID(0, "$NetBSD: octeon_rnm.c,v 1.9 2020/05/31 06:27:06 simonb Exp $");
103
104 #include <sys/param.h>
105 #include <sys/device.h>
106 #include <sys/kernel.h>
107 #include <sys/rndsource.h>
108 #include <sys/systm.h>
109
110 #include <mips/locore.h>
111 #include <mips/cavium/include/iobusvar.h>
112 #include <mips/cavium/dev/octeon_rnmreg.h>
113 #include <mips/cavium/dev/octeon_corereg.h>
114 #include <mips/cavium/octeonvar.h>
115
116 #include <sys/bus.h>
117
118 //#define OCTRNM_DEBUG
119
120 #define ENT_DELAY_CLOCK 8 /* cycles for each 64-bit RO sample batch */
121 #define RNG_DELAY_CLOCK 81 /* cycles for each SHA-1 output */
122 #define NROGROUPS 16
123 #define RNG_FIFO_WORDS (512/sizeof(uint64_t))
124
125 struct octrnm_softc {
126 uint64_t sc_sample[RNG_FIFO_WORDS];
127 bus_space_tag_t sc_bust;
128 bus_space_handle_t sc_regh;
129 kmutex_t sc_lock;
130 krndsource_t sc_rndsrc; /* /dev/random source */
131 unsigned sc_rogroup;
132 };
133
134 static int octrnm_match(device_t, struct cfdata *, void *);
135 static void octrnm_attach(device_t, device_t, void *);
136 static void octrnm_rng(size_t, void *);
137 static void octrnm_reset(struct octrnm_softc *);
138 static void octrnm_conditioned_deterministic(struct octrnm_softc *);
139 static void octrnm_conditioned_entropy(struct octrnm_softc *);
140 static void octrnm_raw_entropy(struct octrnm_softc *, unsigned);
141 static uint64_t octrnm_load(struct octrnm_softc *);
142 static void octrnm_iobdma(struct octrnm_softc *, uint64_t *, unsigned);
143 static void octrnm_delay(uint32_t);
144
145 CFATTACH_DECL_NEW(octrnm, sizeof(struct octrnm_softc),
146 octrnm_match, octrnm_attach, NULL, NULL);
147
148 static int
149 octrnm_match(device_t parent, struct cfdata *cf, void *aux)
150 {
151 struct iobus_attach_args *aa = aux;
152
153 if (strcmp(cf->cf_name, aa->aa_name) != 0)
154 return 0;
155 if (cf->cf_unit != aa->aa_unitno)
156 return 0;
157 return 1;
158 }
159
160 static void
161 octrnm_attach(device_t parent, device_t self, void *aux)
162 {
163 struct octrnm_softc *sc = device_private(self);
164 struct iobus_attach_args *aa = aux;
165 uint64_t bist_status, sample, expected = UINT64_C(0xd654ff35fadf866b);
166
167 aprint_normal("\n");
168
169 /* Map the device registers, all two of them. */
170 sc->sc_bust = aa->aa_bust;
171 if (bus_space_map(aa->aa_bust, aa->aa_unit->addr, RNM_SIZE,
172 0, &sc->sc_regh) != 0) {
173 aprint_error_dev(self, "unable to map device\n");
174 return;
175 }
176
177 /* Verify that the built-in self-test succeeded. */
178 bist_status = bus_space_read_8(sc->sc_bust, sc->sc_regh,
179 RNM_BIST_STATUS_OFFSET);
180 if (bist_status) {
181 aprint_error_dev(self, "RNG built in self test failed: %#lx\n",
182 bist_status);
183 return;
184 }
185
186 /* Create a mutex to serialize access to the FIFO. */
187 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_VM);
188
189 /*
190 * Reset the core, enable the RNG engine without entropy, wait
191 * 81 cycles for it to produce a single sample, and draw the
192 * deterministic sample to test.
193 *
194 * XXX Verify that the output matches the SHA-1 computation
195 * described by the data sheet, not just a known answer.
196 */
197 octrnm_reset(sc);
198 octrnm_conditioned_deterministic(sc);
199 octrnm_delay(RNG_DELAY_CLOCK*1);
200 sample = octrnm_load(sc);
201 if (sample != expected)
202 aprint_error_dev(self, "self-test: read %016"PRIx64","
203 " expected %016"PRIx64, sample, expected);
204
205 /*
206 * Reset the core again to clear the FIFO, and enable the RNG
207 * engine with entropy exposed directly. Start from the first
208 * group of ring oscillators; as we gather samples we will
209 * rotate through the rest of them.
210 */
211 octrnm_reset(sc);
212 sc->sc_rogroup = 0;
213 octrnm_raw_entropy(sc, sc->sc_rogroup);
214 octrnm_delay(ENT_DELAY_CLOCK*RNG_FIFO_WORDS);
215
216 /* Attach the rndsource. */
217 rndsource_setcb(&sc->sc_rndsrc, octrnm_rng, sc);
218 rnd_attach_source(&sc->sc_rndsrc, device_xname(self), RND_TYPE_RNG,
219 RND_FLAG_DEFAULT | RND_FLAG_HASCB);
220 }
221
222 static void
223 octrnm_rng(size_t nbytes, void *vsc)
224 {
225 const unsigned BPB = 256; /* bits of data per bit of entropy */
226 struct octrnm_softc *sc = vsc;
227 uint64_t *samplepos;
228 size_t needed = NBBY*nbytes;
229 unsigned i;
230
231 /* Sample the ring oscillators round-robin. */
232 mutex_enter(&sc->sc_lock);
233 while (needed) {
234 /*
235 * Switch to the next RO group once we drain the FIFO.
236 * By the time rnd_add_data is done, we will have
237 * processed all 512 bytes of the FIFO. We assume it
238 * takes at least one cycle per byte (realistically,
239 * more like ~80cpb to draw from the FIFO and then
240 * process it with rnd_add_data), so there is no need
241 * for any other delays.
242 */
243 sc->sc_rogroup++;
244 sc->sc_rogroup %= NROGROUPS;
245 octrnm_raw_entropy(sc, sc->sc_rogroup);
246
247 /*
248 * Gather quarter the FIFO at a time -- we are limited
249 * to 128 bytes because of limits on the CVMSEG buffer.
250 */
251 CTASSERT(sizeof sc->sc_sample == 512);
252 CTASSERT(__arraycount(sc->sc_sample) == RNG_FIFO_WORDS);
253 for (samplepos = sc->sc_sample, i = 0; i < 4; i++) {
254 octrnm_iobdma(sc, samplepos, RNG_FIFO_WORDS / 4);
255 samplepos += RNG_FIFO_WORDS / 4;
256 }
257 #ifdef OCTRNM_DEBUG
258 hexdump(printf, "rnm", sc->sc_sample, sizeof sc->sc_sample);
259 #endif
260 rnd_add_data_sync(&sc->sc_rndsrc, sc->sc_sample,
261 sizeof sc->sc_sample, NBBY*sizeof(sc->sc_sample)/BPB);
262 needed -= MIN(needed, MAX(1, NBBY*sizeof(sc->sc_sample)/BPB));
263
264 /* Yield if requested. */
265 if (__predict_false(curcpu()->ci_schedstate.spc_flags &
266 SPCF_SHOULDYIELD)) {
267 mutex_exit(&sc->sc_lock);
268 preempt();
269 mutex_enter(&sc->sc_lock);
270 }
271 }
272 mutex_exit(&sc->sc_lock);
273
274 /* Zero the sample. */
275 explicit_memset(sc->sc_sample, 0, sizeof sc->sc_sample);
276 }
277
278 /*
279 * octrnm_reset(sc)
280 *
281 * Reset the RNM unit, disabling it and clearing the FIFO.
282 */
283 static void
284 octrnm_reset(struct octrnm_softc *sc)
285 {
286
287 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
288 RNM_CTL_STATUS_RNG_RST|RNM_CTL_STATUS_RNM_RST);
289 }
290
291 /*
292 * octrnm_conditioned_deterministic(sc)
293 *
294 * Switch the RNM unit into the deterministic LFSR/SHA-1 mode with
295 * no entropy, for the next data loaded into the FIFO.
296 */
297 static void
298 octrnm_conditioned_deterministic(struct octrnm_softc *sc)
299 {
300
301 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
302 RNM_CTL_STATUS_RNG_EN);
303 }
304
305 /*
306 * octrnm_conditioned_entropy(sc)
307 *
308 * Switch the RNM unit to generate ring oscillator samples
309 * conditioned with an LFSR/SHA-1, for the next data loaded into
310 * the FIFO.
311 */
312 static void __unused
313 octrnm_conditioned_entropy(struct octrnm_softc *sc)
314 {
315
316 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
317 RNM_CTL_STATUS_RNG_EN|RNM_CTL_STATUS_ENT_EN);
318 }
319
320 /*
321 * octrnm_raw_entropy(sc, rogroup)
322 *
323 * Switch the RNM unit to generate raw ring oscillator samples
324 * from the specified group of eight ring oscillator.
325 */
326 static void
327 octrnm_raw_entropy(struct octrnm_softc *sc, unsigned rogroup)
328 {
329 uint64_t ctl = 0;
330
331 ctl |= RNM_CTL_STATUS_RNG_EN; /* enable FIFO */
332 ctl |= RNM_CTL_STATUS_ENT_EN; /* enable entropy source */
333 ctl |= RNM_CTL_STATUS_EXP_ENT; /* expose entropy without LFSR/SHA-1 */
334 ctl |= __SHIFTIN(rogroup, RNM_CTL_STATUS_ENT_SEL_MASK);
335
336 bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
337 ctl);
338 }
339
340 /*
341 * octrnm_load(sc)
342 *
343 * Load a single 64-bit word out of the FIFO.
344 */
345 static uint64_t
346 octrnm_load(struct octrnm_softc *sc)
347 {
348 uint64_t addr =
349 RNM_OPERATION_BASE_IO_BIT |
350 __BITS64_SET(RNM_OPERATION_BASE_MAJOR_DID, 0x08) |
351 __BITS64_SET(RNM_OPERATION_BASE_SUB_DID, 0x00);
352
353 return octeon_xkphys_read_8(addr);
354 }
355
356 /*
357 * octrnm_iobdma(sc, buf, nwords)
358 *
359 * Load nwords, at most 32, out of the FIFO into buf.
360 */
361 static void
362 octrnm_iobdma(struct octrnm_softc *sc, uint64_t *buf, unsigned nwords)
363 {
364 size_t scraddr = OCTEON_CVMSEG_OFFSET(csm_rnm);
365 uint64_t iobdma =
366 __SHIFTIN(scraddr/sizeof(uint64_t), IOBDMA_SCRADDR) |
367 __SHIFTIN(nwords, IOBDMA_LEN) |
368 __SHIFTIN(RNM_IOBDMA_MAJORDID, IOBDMA_MAJORDID) |
369 __SHIFTIN(RNM_IOBDMA_SUBDID, IOBDMA_SUBDID);
370
371 KASSERT(nwords < 128); /* iobdma address restriction */
372 KASSERT(nwords <= 32); /* octeon_cvmseg_map limitation */
373
374 octeon_iobdma_write_8(iobdma);
375 OCTEON_SYNCIOBDMA;
376 for (; nwords --> 0; scraddr += 8)
377 *buf++ = octeon_cvmseg_read_8(scraddr);
378 }
379
380 /*
381 * octrnm_delay(ncycles)
382 *
383 * Wait ncycles, at most UINT32_MAX/2 so we behave reasonably even
384 * if the cycle counter rolls over.
385 */
386 static void
387 octrnm_delay(uint32_t ncycles)
388 {
389 uint32_t deadline = mips3_cp0_count_read() + ncycles;
390
391 KASSERT(ncycles <= UINT32_MAX/2);
392
393 while ((deadline - mips3_cp0_count_read()) < ncycles)
394 continue;
395 }
396