octeon_rnm.c revision 1.15 1 1.15 riastrad /* $NetBSD: octeon_rnm.c,v 1.15 2022/03/19 11:55:03 riastradh Exp $ */
2 1.1 hikaru
3 1.1 hikaru /*
4 1.1 hikaru * Copyright (c) 2007 Internet Initiative Japan, Inc.
5 1.1 hikaru * All rights reserved.
6 1.1 hikaru *
7 1.1 hikaru * Redistribution and use in source and binary forms, with or without
8 1.1 hikaru * modification, are permitted provided that the following conditions
9 1.1 hikaru * are met:
10 1.1 hikaru * 1. Redistributions of source code must retain the above copyright
11 1.1 hikaru * notice, this list of conditions and the following disclaimer.
12 1.1 hikaru * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 hikaru * notice, this list of conditions and the following disclaimer in the
14 1.1 hikaru * documentation and/or other materials provided with the distribution.
15 1.1 hikaru *
16 1.1 hikaru * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
17 1.1 hikaru * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 1.1 hikaru * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 1.1 hikaru * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
20 1.1 hikaru * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 1.1 hikaru * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 1.1 hikaru * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 1.1 hikaru * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 1.1 hikaru * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 1.1 hikaru * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 1.1 hikaru * SUCH DAMAGE.
27 1.1 hikaru */
28 1.1 hikaru
29 1.5 riastrad /*
30 1.5 riastrad * Cavium Octeon Random Number Generator / Random Number Memory `RNM'
31 1.5 riastrad *
32 1.5 riastrad * The RNM unit consists of:
33 1.5 riastrad *
34 1.5 riastrad * 1. 128 ring oscillators
35 1.5 riastrad * 2. an LFSR/SHA-1 conditioner
36 1.5 riastrad * 3. a 512-byte FIFO
37 1.5 riastrad *
38 1.5 riastrad * When the unit is enabled, there are three modes of operation:
39 1.5 riastrad *
40 1.5 riastrad * (a) deterministic: the ring oscillators are disabled and the
41 1.5 riastrad * LFSR/SHA-1 conditioner operates on fixed inputs to give
42 1.5 riastrad * reproducible results for testing,
43 1.5 riastrad *
44 1.5 riastrad * (b) conditioned entropy: the ring oscillators are enabled and
45 1.5 riastrad * samples from them are fed through the LFSR/SHA-1
46 1.5 riastrad * conditioner before being put into the FIFO, and
47 1.5 riastrad *
48 1.5 riastrad * (c) raw entropy: the ring oscillators are enabled, and a group
49 1.5 riastrad * of eight of them selected at any one time is sampled and
50 1.5 riastrad * fed into the FIFO.
51 1.5 riastrad *
52 1.5 riastrad * Details:
53 1.5 riastrad *
54 1.5 riastrad * - The FIFO is refilled whenever we read out of it, either with
55 1.5 riastrad * a load address or an IOBDMA operation.
56 1.5 riastrad *
57 1.5 riastrad * - The conditioner takes 81 cycles to produce a 64-bit block of
58 1.5 riastrad * output in the FIFO whether in deterministic or conditioned
59 1.5 riastrad * entropy mode, each block consisting of the first 64 bits of a
60 1.5 riastrad * SHA-1 hash.
61 1.5 riastrad *
62 1.5 riastrad * - A group of eight ring oscillators take 8 cycles to produce a
63 1.5 riastrad * 64-bit block of output in the FIFO in raw entropy mode, each
64 1.5 riastrad * block consisting of eight consecutive samples from each RO in
65 1.5 riastrad * parallel.
66 1.5 riastrad *
67 1.5 riastrad * The first sample of each RO always seems to be zero. Further,
68 1.5 riastrad * consecutive samples from a single ring oscillator are not
69 1.5 riastrad * independent, so naive debiasing like a von Neumann extractor
70 1.6 riastrad * falls flat on its face. And parallel ring oscillators powered
71 1.6 riastrad * by the same source may not be independent either, if they end
72 1.6 riastrad * up locked.
73 1.6 riastrad *
74 1.6 riastrad * We read out one FIFO's worth of raw samples from groups of 8
75 1.6 riastrad * ring oscillators at a time, of 128 total, by going through them
76 1.6 riastrad * round robin. We take 32 consecutive samples from each ring
77 1.6 riastrad * oscillator in a group of 8 in parallel before we count one bit
78 1.6 riastrad * of entropy. To get 256 bits of entropy, we read 4Kbit of data
79 1.6 riastrad * from each of two 8-RO groups.
80 1.6 riastrad *
81 1.6 riastrad * We could use the on-board LFSR/SHA-1 conditioner like the Linux
82 1.6 riastrad * driver written by Cavium does, but it's not clear how many RO
83 1.6 riastrad * samples go into the conditioner, and our entropy pool is a
84 1.6 riastrad * perfectly good conditioner itself, so it seems there is little
85 1.6 riastrad * advantage -- other than expedience -- to using the LFSR/SHA-1
86 1.6 riastrad * conditioner. All the manual says is that it samples 125 of the
87 1.6 riastrad * 128 ROs. But the Cavium SHA-1 CPU instruction is advertised to
88 1.6 riastrad * have a latency of 100 cycles, so it seems implausible that much
89 1.6 riastrad * more than one sample from each RO could be squeezed in there.
90 1.6 riastrad *
91 1.6 riastrad * The hardware exposes only 64 bits of each SHA-1 hash, and the
92 1.6 riastrad * Linux driver uses 32 bits of that -- which, if treated as full
93 1.6 riastrad * entropy, would mean an assessment of 3.9 bits of RO samples to
94 1.6 riastrad * get 1 bit of entropy, whereas we take 256 bits of RO samples to
95 1.6 riastrad * get one bit of entropy, so this seems reasonably conservative.
96 1.5 riastrad *
97 1.5 riastrad * Reference: Cavium Networks OCTEON Plus CN50XX Hardware Reference
98 1.5 riastrad * Manual, CN50XX-HM-0.99E PRELIMINARY, July 2008.
99 1.5 riastrad */
100 1.5 riastrad
101 1.1 hikaru #include <sys/cdefs.h>
102 1.15 riastrad __KERNEL_RCSID(0, "$NetBSD: octeon_rnm.c,v 1.15 2022/03/19 11:55:03 riastradh Exp $");
103 1.1 hikaru
104 1.1 hikaru #include <sys/param.h>
105 1.1 hikaru #include <sys/device.h>
106 1.1 hikaru #include <sys/kernel.h>
107 1.1 hikaru #include <sys/rndsource.h>
108 1.4 simonb #include <sys/systm.h>
109 1.1 hikaru
110 1.1 hikaru #include <mips/locore.h>
111 1.12 simonb #include <mips/cavium/octeonreg.h>
112 1.12 simonb #include <mips/cavium/octeonvar.h>
113 1.1 hikaru #include <mips/cavium/include/iobusvar.h>
114 1.1 hikaru #include <mips/cavium/dev/octeon_rnmreg.h>
115 1.1 hikaru #include <mips/cavium/dev/octeon_corereg.h>
116 1.1 hikaru
117 1.1 hikaru #include <sys/bus.h>
118 1.1 hikaru
119 1.9 simonb //#define OCTRNM_DEBUG
120 1.5 riastrad
121 1.5 riastrad #define ENT_DELAY_CLOCK 8 /* cycles for each 64-bit RO sample batch */
122 1.5 riastrad #define RNG_DELAY_CLOCK 81 /* cycles for each SHA-1 output */
123 1.5 riastrad #define NROGROUPS 16
124 1.5 riastrad #define RNG_FIFO_WORDS (512/sizeof(uint64_t))
125 1.1 hikaru
126 1.9 simonb struct octrnm_softc {
127 1.7 simonb uint64_t sc_sample[RNG_FIFO_WORDS];
128 1.1 hikaru bus_space_tag_t sc_bust;
129 1.1 hikaru bus_space_handle_t sc_regh;
130 1.1 hikaru krndsource_t sc_rndsrc; /* /dev/random source */
131 1.5 riastrad unsigned sc_rogroup;
132 1.1 hikaru };
133 1.1 hikaru
134 1.9 simonb static int octrnm_match(device_t, struct cfdata *, void *);
135 1.9 simonb static void octrnm_attach(device_t, device_t, void *);
136 1.9 simonb static void octrnm_rng(size_t, void *);
137 1.9 simonb static void octrnm_reset(struct octrnm_softc *);
138 1.9 simonb static void octrnm_conditioned_deterministic(struct octrnm_softc *);
139 1.9 simonb static void octrnm_conditioned_entropy(struct octrnm_softc *);
140 1.9 simonb static void octrnm_raw_entropy(struct octrnm_softc *, unsigned);
141 1.9 simonb static uint64_t octrnm_load(struct octrnm_softc *);
142 1.9 simonb static void octrnm_iobdma(struct octrnm_softc *, uint64_t *, unsigned);
143 1.9 simonb static void octrnm_delay(uint32_t);
144 1.1 hikaru
145 1.9 simonb CFATTACH_DECL_NEW(octrnm, sizeof(struct octrnm_softc),
146 1.9 simonb octrnm_match, octrnm_attach, NULL, NULL);
147 1.1 hikaru
148 1.1 hikaru static int
149 1.9 simonb octrnm_match(device_t parent, struct cfdata *cf, void *aux)
150 1.1 hikaru {
151 1.1 hikaru struct iobus_attach_args *aa = aux;
152 1.1 hikaru
153 1.1 hikaru if (strcmp(cf->cf_name, aa->aa_name) != 0)
154 1.5 riastrad return 0;
155 1.1 hikaru if (cf->cf_unit != aa->aa_unitno)
156 1.5 riastrad return 0;
157 1.5 riastrad return 1;
158 1.1 hikaru }
159 1.1 hikaru
160 1.1 hikaru static void
161 1.9 simonb octrnm_attach(device_t parent, device_t self, void *aux)
162 1.1 hikaru {
163 1.9 simonb struct octrnm_softc *sc = device_private(self);
164 1.1 hikaru struct iobus_attach_args *aa = aux;
165 1.5 riastrad uint64_t bist_status, sample, expected = UINT64_C(0xd654ff35fadf866b);
166 1.1 hikaru
167 1.1 hikaru aprint_normal("\n");
168 1.1 hikaru
169 1.5 riastrad /* Map the device registers, all two of them. */
170 1.1 hikaru sc->sc_bust = aa->aa_bust;
171 1.3 simonb if (bus_space_map(aa->aa_bust, aa->aa_unit->addr, RNM_SIZE,
172 1.3 simonb 0, &sc->sc_regh) != 0) {
173 1.3 simonb aprint_error_dev(self, "unable to map device\n");
174 1.3 simonb return;
175 1.3 simonb }
176 1.3 simonb
177 1.5 riastrad /* Verify that the built-in self-test succeeded. */
178 1.3 simonb bist_status = bus_space_read_8(sc->sc_bust, sc->sc_regh,
179 1.3 simonb RNM_BIST_STATUS_OFFSET);
180 1.3 simonb if (bist_status) {
181 1.3 simonb aprint_error_dev(self, "RNG built in self test failed: %#lx\n",
182 1.3 simonb bist_status);
183 1.3 simonb return;
184 1.3 simonb }
185 1.1 hikaru
186 1.4 simonb /*
187 1.5 riastrad * Reset the core, enable the RNG engine without entropy, wait
188 1.5 riastrad * 81 cycles for it to produce a single sample, and draw the
189 1.5 riastrad * deterministic sample to test.
190 1.4 simonb *
191 1.5 riastrad * XXX Verify that the output matches the SHA-1 computation
192 1.5 riastrad * described by the data sheet, not just a known answer.
193 1.4 simonb */
194 1.9 simonb octrnm_reset(sc);
195 1.9 simonb octrnm_conditioned_deterministic(sc);
196 1.9 simonb octrnm_delay(RNG_DELAY_CLOCK*1);
197 1.9 simonb sample = octrnm_load(sc);
198 1.5 riastrad if (sample != expected)
199 1.5 riastrad aprint_error_dev(self, "self-test: read %016"PRIx64","
200 1.5 riastrad " expected %016"PRIx64, sample, expected);
201 1.4 simonb
202 1.4 simonb /*
203 1.5 riastrad * Reset the core again to clear the FIFO, and enable the RNG
204 1.5 riastrad * engine with entropy exposed directly. Start from the first
205 1.5 riastrad * group of ring oscillators; as we gather samples we will
206 1.5 riastrad * rotate through the rest of them.
207 1.4 simonb */
208 1.9 simonb octrnm_reset(sc);
209 1.5 riastrad sc->sc_rogroup = 0;
210 1.9 simonb octrnm_raw_entropy(sc, sc->sc_rogroup);
211 1.9 simonb octrnm_delay(ENT_DELAY_CLOCK*RNG_FIFO_WORDS);
212 1.4 simonb
213 1.5 riastrad /* Attach the rndsource. */
214 1.9 simonb rndsource_setcb(&sc->sc_rndsrc, octrnm_rng, sc);
215 1.4 simonb rnd_attach_source(&sc->sc_rndsrc, device_xname(self), RND_TYPE_RNG,
216 1.4 simonb RND_FLAG_DEFAULT | RND_FLAG_HASCB);
217 1.1 hikaru }
218 1.1 hikaru
219 1.1 hikaru static void
220 1.9 simonb octrnm_rng(size_t nbytes, void *vsc)
221 1.1 hikaru {
222 1.6 riastrad const unsigned BPB = 256; /* bits of data per bit of entropy */
223 1.9 simonb struct octrnm_softc *sc = vsc;
224 1.7 simonb uint64_t *samplepos;
225 1.5 riastrad size_t needed = NBBY*nbytes;
226 1.5 riastrad unsigned i;
227 1.1 hikaru
228 1.5 riastrad /* Sample the ring oscillators round-robin. */
229 1.5 riastrad while (needed) {
230 1.5 riastrad /*
231 1.5 riastrad * Switch to the next RO group once we drain the FIFO.
232 1.5 riastrad * By the time rnd_add_data is done, we will have
233 1.5 riastrad * processed all 512 bytes of the FIFO. We assume it
234 1.5 riastrad * takes at least one cycle per byte (realistically,
235 1.5 riastrad * more like ~80cpb to draw from the FIFO and then
236 1.5 riastrad * process it with rnd_add_data), so there is no need
237 1.5 riastrad * for any other delays.
238 1.5 riastrad */
239 1.5 riastrad sc->sc_rogroup++;
240 1.5 riastrad sc->sc_rogroup %= NROGROUPS;
241 1.9 simonb octrnm_raw_entropy(sc, sc->sc_rogroup);
242 1.5 riastrad
243 1.1 hikaru /*
244 1.7 simonb * Gather quarter the FIFO at a time -- we are limited
245 1.7 simonb * to 128 bytes because of limits on the CVMSEG buffer.
246 1.1 hikaru */
247 1.7 simonb CTASSERT(sizeof sc->sc_sample == 512);
248 1.7 simonb CTASSERT(__arraycount(sc->sc_sample) == RNG_FIFO_WORDS);
249 1.7 simonb for (samplepos = sc->sc_sample, i = 0; i < 4; i++) {
250 1.9 simonb octrnm_iobdma(sc, samplepos, RNG_FIFO_WORDS / 4);
251 1.7 simonb samplepos += RNG_FIFO_WORDS / 4;
252 1.7 simonb }
253 1.9 simonb #ifdef OCTRNM_DEBUG
254 1.7 simonb hexdump(printf, "rnm", sc->sc_sample, sizeof sc->sc_sample);
255 1.5 riastrad #endif
256 1.7 simonb rnd_add_data_sync(&sc->sc_rndsrc, sc->sc_sample,
257 1.7 simonb sizeof sc->sc_sample, NBBY*sizeof(sc->sc_sample)/BPB);
258 1.7 simonb needed -= MIN(needed, MAX(1, NBBY*sizeof(sc->sc_sample)/BPB));
259 1.5 riastrad
260 1.15 riastrad /* Now's a good time to yield. */
261 1.15 riastrad preempt_point();
262 1.1 hikaru }
263 1.5 riastrad
264 1.5 riastrad /* Zero the sample. */
265 1.7 simonb explicit_memset(sc->sc_sample, 0, sizeof sc->sc_sample);
266 1.1 hikaru }
267 1.1 hikaru
268 1.5 riastrad /*
269 1.9 simonb * octrnm_reset(sc)
270 1.5 riastrad *
271 1.5 riastrad * Reset the RNM unit, disabling it and clearing the FIFO.
272 1.5 riastrad */
273 1.5 riastrad static void
274 1.9 simonb octrnm_reset(struct octrnm_softc *sc)
275 1.5 riastrad {
276 1.5 riastrad
277 1.5 riastrad bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
278 1.5 riastrad RNM_CTL_STATUS_RNG_RST|RNM_CTL_STATUS_RNM_RST);
279 1.5 riastrad }
280 1.5 riastrad
281 1.5 riastrad /*
282 1.9 simonb * octrnm_conditioned_deterministic(sc)
283 1.5 riastrad *
284 1.5 riastrad * Switch the RNM unit into the deterministic LFSR/SHA-1 mode with
285 1.5 riastrad * no entropy, for the next data loaded into the FIFO.
286 1.5 riastrad */
287 1.5 riastrad static void
288 1.9 simonb octrnm_conditioned_deterministic(struct octrnm_softc *sc)
289 1.5 riastrad {
290 1.5 riastrad
291 1.5 riastrad bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
292 1.5 riastrad RNM_CTL_STATUS_RNG_EN);
293 1.5 riastrad }
294 1.5 riastrad
295 1.5 riastrad /*
296 1.9 simonb * octrnm_conditioned_entropy(sc)
297 1.5 riastrad *
298 1.5 riastrad * Switch the RNM unit to generate ring oscillator samples
299 1.5 riastrad * conditioned with an LFSR/SHA-1, for the next data loaded into
300 1.5 riastrad * the FIFO.
301 1.5 riastrad */
302 1.5 riastrad static void __unused
303 1.9 simonb octrnm_conditioned_entropy(struct octrnm_softc *sc)
304 1.5 riastrad {
305 1.5 riastrad
306 1.5 riastrad bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
307 1.5 riastrad RNM_CTL_STATUS_RNG_EN|RNM_CTL_STATUS_ENT_EN);
308 1.5 riastrad }
309 1.5 riastrad
310 1.5 riastrad /*
311 1.9 simonb * octrnm_raw_entropy(sc, rogroup)
312 1.5 riastrad *
313 1.5 riastrad * Switch the RNM unit to generate raw ring oscillator samples
314 1.5 riastrad * from the specified group of eight ring oscillator.
315 1.5 riastrad */
316 1.5 riastrad static void
317 1.9 simonb octrnm_raw_entropy(struct octrnm_softc *sc, unsigned rogroup)
318 1.5 riastrad {
319 1.5 riastrad uint64_t ctl = 0;
320 1.5 riastrad
321 1.5 riastrad ctl |= RNM_CTL_STATUS_RNG_EN; /* enable FIFO */
322 1.5 riastrad ctl |= RNM_CTL_STATUS_ENT_EN; /* enable entropy source */
323 1.5 riastrad ctl |= RNM_CTL_STATUS_EXP_ENT; /* expose entropy without LFSR/SHA-1 */
324 1.5 riastrad ctl |= __SHIFTIN(rogroup, RNM_CTL_STATUS_ENT_SEL_MASK);
325 1.5 riastrad
326 1.5 riastrad bus_space_write_8(sc->sc_bust, sc->sc_regh, RNM_CTL_STATUS_OFFSET,
327 1.5 riastrad ctl);
328 1.5 riastrad }
329 1.5 riastrad
330 1.5 riastrad /*
331 1.9 simonb * octrnm_load(sc)
332 1.5 riastrad *
333 1.5 riastrad * Load a single 64-bit word out of the FIFO.
334 1.5 riastrad */
335 1.4 simonb static uint64_t
336 1.9 simonb octrnm_load(struct octrnm_softc *sc)
337 1.1 hikaru {
338 1.12 simonb uint64_t addr = OCTEON_ADDR_IO_DID(RNM_MAJOR_DID, RNM_SUB_DID);
339 1.1 hikaru
340 1.1 hikaru return octeon_xkphys_read_8(addr);
341 1.1 hikaru }
342 1.5 riastrad
343 1.5 riastrad /*
344 1.9 simonb * octrnm_iobdma(sc, buf, nwords)
345 1.5 riastrad *
346 1.5 riastrad * Load nwords, at most 32, out of the FIFO into buf.
347 1.5 riastrad */
348 1.5 riastrad static void
349 1.9 simonb octrnm_iobdma(struct octrnm_softc *sc, uint64_t *buf, unsigned nwords)
350 1.5 riastrad {
351 1.12 simonb /* ``scraddr'' part is index in 64-bit words, not address */
352 1.5 riastrad size_t scraddr = OCTEON_CVMSEG_OFFSET(csm_rnm);
353 1.12 simonb uint64_t iobdma = IOBDMA_CREATE(RNM_MAJOR_DID, RNM_SUB_DID,
354 1.12 simonb scraddr / sizeof(uint64_t), nwords, 0);
355 1.5 riastrad
356 1.10 simonb KASSERT(nwords < 128); /* iobdma address restriction */
357 1.11 simonb KASSERT(nwords <= CVMSEG_LM_RNM_SIZE); /* size of CVMSEG LM buffer */
358 1.5 riastrad
359 1.5 riastrad octeon_iobdma_write_8(iobdma);
360 1.5 riastrad OCTEON_SYNCIOBDMA;
361 1.5 riastrad for (; nwords --> 0; scraddr += 8)
362 1.5 riastrad *buf++ = octeon_cvmseg_read_8(scraddr);
363 1.5 riastrad }
364 1.5 riastrad
365 1.5 riastrad /*
366 1.9 simonb * octrnm_delay(ncycles)
367 1.5 riastrad *
368 1.5 riastrad * Wait ncycles, at most UINT32_MAX/2 so we behave reasonably even
369 1.5 riastrad * if the cycle counter rolls over.
370 1.5 riastrad */
371 1.5 riastrad static void
372 1.9 simonb octrnm_delay(uint32_t ncycles)
373 1.5 riastrad {
374 1.5 riastrad uint32_t deadline = mips3_cp0_count_read() + ncycles;
375 1.5 riastrad
376 1.5 riastrad KASSERT(ncycles <= UINT32_MAX/2);
377 1.5 riastrad
378 1.5 riastrad while ((deadline - mips3_cp0_count_read()) < ncycles)
379 1.5 riastrad continue;
380 1.5 riastrad }
381