trap.c revision 1.17
1/*	$NetBSD: trap.c,v 1.17 1999/10/26 00:20:39 itohy Exp $	*/
2
3/*
4 * This file was taken from mvme68k/mvme68k/trap.c
5 * should probably be re-synced when needed.
6 * Darrin B. Jewell <jewell@mit.edu> Tue Aug  3 10:53:12 UTC 1999
7 * original cvs id: NetBSD: trap.c,v 1.32 1999/08/03 10:52:06 dbj Exp
8 */
9
10/*
11 * Copyright (c) 1988 University of Utah.
12 * Copyright (c) 1982, 1986, 1990, 1993
13 *	The Regents of the University of California.  All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * the Systems Programming Group of the University of Utah Computer
17 * Science Department.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 * 3. All advertising materials mentioning features or use of this software
28 *    must display the following acknowledgement:
29 *	This product includes software developed by the University of
30 *	California, Berkeley and its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 *    may be used to endorse or promote products derived from this software
33 *    without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: trap.c 1.37 92/12/20$
48 *
49 *	@(#)trap.c	8.5 (Berkeley) 1/4/94
50 */
51
52#include "opt_ddb.h"
53#include "opt_execfmt.h"
54#include "opt_ktrace.h"
55#include "opt_compat_netbsd.h"
56#include "opt_compat_sunos.h"
57#include "opt_compat_hpux.h"
58#include "opt_compat_linux.h"
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/proc.h>
63#include <sys/acct.h>
64#include <sys/kernel.h>
65#include <sys/signalvar.h>
66#include <sys/resourcevar.h>
67#include <sys/syscall.h>
68#include <sys/syslog.h>
69#include <sys/user.h>
70#ifdef KTRACE
71#include <sys/ktrace.h>
72#endif
73
74#ifdef DEBUG
75#include <dev/cons.h>
76#endif
77
78#include <machine/db_machdep.h>
79#include <machine/psl.h>
80#include <machine/trap.h>
81#include <machine/cpu.h>
82#include <machine/reg.h>
83
84#include <vm/vm.h>
85#include <vm/pmap.h>
86
87#include <m68k/cacheops.h>
88
89#include <uvm/uvm_extern.h>
90
91#ifdef COMPAT_HPUX
92#include <compat/hpux/hpux.h>
93#endif
94
95#ifdef COMPAT_SUNOS
96#include <compat/sunos/sunos_syscall.h>
97extern struct emul emul_sunos;
98#endif
99
100#ifdef COMPAT_LINUX
101#ifdef EXEC_AOUT
102extern struct emul emul_linux_aout;
103#endif
104#ifdef EXEC_ELF32
105extern struct emul emul_linux_elf32;
106#endif
107#endif
108
109int	writeback __P((struct frame *fp, int docachepush));
110void	trap __P((int type, u_int code, u_int v, struct frame frame));
111void	syscall __P((register_t code, struct frame frame));
112
113#ifdef DEBUG
114void	dumpssw __P((u_short));
115void	dumpwb __P((int, u_short, u_int, u_int));
116#endif
117
118static inline void userret __P((struct proc *p, struct frame *fp,
119	    u_quad_t oticks, u_int faultaddr, int fromtrap));
120
121int	astpending;
122
123char	*trap_type[] = {
124	"Bus error",
125	"Address error",
126	"Illegal instruction",
127	"Zero divide",
128	"CHK instruction",
129	"TRAPV instruction",
130	"Privilege violation",
131	"Trace trap",
132	"MMU fault",
133	"SSIR trap",
134	"Format error",
135	"68881 exception",
136	"Coprocessor violation",
137	"Async system trap"
138};
139int	trap_types = sizeof trap_type / sizeof trap_type[0];
140
141/*
142 * Size of various exception stack frames (minus the standard 8 bytes)
143 */
144short	exframesize[] = {
145	FMT0SIZE,	/* type 0 - normal (68020/030/040/060) */
146	FMT1SIZE,	/* type 1 - throwaway (68020/030/040) */
147	FMT2SIZE,	/* type 2 - normal 6-word (68020/030/040/060) */
148	FMT3SIZE,	/* type 3 - FP post-instruction (68040/060) */
149	FMT4SIZE,	/* type 4 - access error/fp disabled (68060) */
150	-1, -1,		/* type 5-6 - undefined */
151	FMT7SIZE,	/* type 7 - access error (68040) */
152	58,		/* type 8 - bus fault (68010) */
153	FMT9SIZE,	/* type 9 - coprocessor mid-instruction (68020/030) */
154	FMTASIZE,	/* type A - short bus fault (68020/030) */
155	FMTBSIZE,	/* type B - long bus fault (68020/030) */
156	-1, -1, -1, -1	/* type C-F - undefined */
157};
158
159#ifdef M68060
160#define	KDFAULT_060(c)	(cputype == CPU_68060 && ((c) & FSLW_TM_SV))
161#define	WRFAULT_060(c)	(cputype == CPU_68060 && ((c) & FSLW_RW_W))
162#else
163#define	KDFAULT_060(c)	0
164#define	WRFAULT_060(c)	0
165#endif
166
167#ifdef M68040
168#define	KDFAULT_040(c)	(cputype == CPU_68040 && \
169			 ((c) & SSW4_TMMASK) == SSW4_TMKD)
170#define	WRFAULT_040(c)	(cputype == CPU_68040 && \
171			 ((c) & SSW4_RW) == 0)
172#else
173#define	KDFAULT_040(c)	0
174#define	WRFAULT_040(c)	0
175#endif
176
177#if defined(M68030) || defined(M68020)
178#define	KDFAULT_OTH(c)	(cputype <= CPU_68030 && \
179			 ((c) & (SSW_DF|SSW_FCMASK)) == (SSW_DF|FC_SUPERD))
180#define	WRFAULT_OTH(c)	(cputype <= CPU_68030 && \
181			 ((c) & (SSW_DF|SSW_RW)) == SSW_DF)
182#else
183#define	KDFAULT_OTH(c)	0
184#define	WRFAULT_OTH(c)	0
185#endif
186
187#define	KDFAULT(c)	(KDFAULT_060(c) || KDFAULT_040(c) || KDFAULT_OTH(c))
188#define	WRFAULT(c)	(WRFAULT_060(c) || WRFAULT_040(c) || WRFAULT_OTH(c))
189
190#ifdef DEBUG
191int mmudebug = 0;
192int mmupid = -1;
193#define MDB_FOLLOW	1
194#define MDB_WBFOLLOW	2
195#define MDB_WBFAILED	4
196#define MDB_ISPID(p)	((p) == mmupid)
197#endif
198
199
200#define NSIR	32
201void (*sir_routines[NSIR])(void *);
202void *sir_args[NSIR];
203int next_sir;
204
205/*
206 * trap and syscall both need the following work done before returning
207 * to user mode.
208 */
209static inline void
210userret(p, fp, oticks, faultaddr, fromtrap)
211	struct proc *p;
212	struct frame *fp;
213	u_quad_t oticks;
214	u_int faultaddr;
215	int fromtrap;
216{
217	int sig, s;
218#ifdef M68040
219	int beenhere = 0;
220
221again:
222#endif
223	/* take pending signals */
224	while ((sig = CURSIG(p)) != 0)
225		postsig(sig);
226	p->p_priority = p->p_usrpri;
227	if (want_resched) {
228		/*
229		 * Since we are curproc, clock will normally just change
230		 * our priority without moving us from one queue to another
231		 * (since the running process is not on a queue.)
232		 * If that happened after we put ourselves on the run queue
233		 * but before we mi_switch()'ed, we might not be on the queue
234		 * indicated by our priority.
235		 */
236		s = splstatclock();
237		setrunqueue(p);
238		p->p_stats->p_ru.ru_nivcsw++;
239		mi_switch();
240		splx(s);
241		while ((sig = CURSIG(p)) != 0)
242			postsig(sig);
243	}
244
245	/*
246	 * If profiling, charge system time to the trapped pc.
247	 */
248	if (p->p_flag & P_PROFIL) {
249		extern int psratio;
250
251		addupc_task(p, fp->f_pc,
252			    (int)(p->p_sticks - oticks) * psratio);
253	}
254#ifdef M68040
255	/*
256	 * Deal with user mode writebacks (from trap, or from sigreturn).
257	 * If any writeback fails, go back and attempt signal delivery.
258	 * unless we have already been here and attempted the writeback
259	 * (e.g. bad address with user ignoring SIGSEGV).  In that case
260	 * we just return to the user without sucessfully completing
261	 * the writebacks.  Maybe we should just drop the sucker?
262	 */
263	if (cputype == CPU_68040 && fp->f_format == FMT7) {
264		if (beenhere) {
265#ifdef DEBUG
266			if (mmudebug & MDB_WBFAILED)
267				printf(fromtrap ?
268		"pid %d(%s): writeback aborted, pc=%x, fa=%x\n" :
269		"pid %d(%s): writeback aborted in sigreturn, pc=%x\n",
270				    p->p_pid, p->p_comm, fp->f_pc, faultaddr);
271#endif
272		} else if ((sig = writeback(fp, fromtrap))) {
273			beenhere = 1;
274			oticks = p->p_sticks;
275			trapsignal(p, sig, faultaddr);
276			goto again;
277		}
278	}
279#endif
280	curpriority = p->p_priority;
281}
282
283/*
284 * Trap is called from locore to handle most types of processor traps,
285 * including events such as simulated software interrupts/AST's.
286 * System calls are broken out for efficiency.
287 */
288/*ARGSUSED*/
289void
290trap(type, code, v, frame)
291	int type;
292	unsigned code;
293	unsigned v;
294	struct frame frame;
295{
296	extern char fubail[], subail[];
297	struct proc *p;
298	int i, s;
299	u_int ucode;
300	u_quad_t sticks = 0 /* XXX initialiser works around compiler bug */;
301	int bit;
302
303	uvmexp.traps++;
304	p = curproc;
305	ucode = 0;
306
307	/* I have verified that this DOES happen! -gwr */
308	if (p == NULL)
309		p = &proc0;
310#ifdef DIAGNOSTIC
311	if (p->p_addr == NULL)
312		panic("trap: no pcb");
313#endif
314
315	if (USERMODE(frame.f_sr)) {
316		type |= T_USER;
317		sticks = p->p_sticks;
318		p->p_md.md_regs = frame.f_regs;
319	}
320	switch (type) {
321
322	default:
323	dopanic:
324		printf("trap type %d, code = 0x%x, v = 0x%x\n", type, code, v);
325		printf("%s program counter = 0x%x\n",
326		    (type & T_USER) ? "user" : "kernel", frame.f_pc);
327		/*
328		 * Let the kernel debugger see the trap frame that
329		 * caused us to panic.  This is a convenience so
330		 * one can see registers at the point of failure.
331		 */
332		s = splhigh();
333#ifdef KGDB
334		/* If connected, step or cont returns 1 */
335		if (kgdb_trap(type, &frame))
336			goto kgdb_cont;
337#endif
338#ifdef DDB
339		(void)kdb_trap(type, (db_regs_t *)&frame);
340#endif
341#ifdef KGDB
342	kgdb_cont:
343#endif
344		splx(s);
345		if (panicstr) {
346			printf("trap during panic!\n");
347#ifdef DEBUG
348			/* XXX should be a machine-dependent hook */
349			printf("(press a key)\n"); (void)cngetc();
350#endif
351		}
352		regdump((struct trapframe *)&frame, 128);
353		type &= ~T_USER;
354		if ((u_int)type < trap_types)
355			panic(trap_type[type]);
356		panic("trap");
357
358	case T_BUSERR:		/* kernel bus error */
359		if (p->p_addr->u_pcb.pcb_onfault == 0)
360			goto dopanic;
361		/* FALLTHROUGH */
362
363	copyfault:
364		/*
365		 * If we have arranged to catch this fault in any of the
366		 * copy to/from user space routines, set PC to return to
367		 * indicated location and set flag informing buserror code
368		 * that it may need to clean up stack frame.
369		 */
370		frame.f_stackadj = exframesize[frame.f_format];
371		frame.f_format = frame.f_vector = 0;
372		frame.f_pc = (int) p->p_addr->u_pcb.pcb_onfault;
373		return;
374
375	case T_BUSERR|T_USER:	/* bus error */
376	case T_ADDRERR|T_USER:	/* address error */
377		ucode = v;
378		i = SIGBUS;
379		break;
380
381	case T_COPERR:		/* kernel coprocessor violation */
382	case T_FMTERR|T_USER:	/* do all RTE errors come in as T_USER? */
383	case T_FMTERR:		/* ...just in case... */
384	/*
385	 * The user has most likely trashed the RTE or FP state info
386	 * in the stack frame of a signal handler.
387	 */
388		printf("pid %d: kernel %s exception\n", p->p_pid,
389		       type==T_COPERR ? "coprocessor" : "format");
390		type |= T_USER;
391		p->p_sigacts->ps_sigact[SIGILL].sa_handler = SIG_DFL;
392		sigdelset(&p->p_sigignore, SIGILL);
393		sigdelset(&p->p_sigcatch, SIGILL);
394		sigdelset(&p->p_sigmask, SIGILL);
395		i = SIGILL;
396		ucode = frame.f_format;	/* XXX was ILL_RESAD_FAULT */
397		break;
398
399	case T_COPERR|T_USER:	/* user coprocessor violation */
400	/* What is a proper response here? */
401		ucode = 0;
402		i = SIGFPE;
403		break;
404
405	case T_FPERR|T_USER:	/* 68881 exceptions */
406	/*
407	 * We pass along the 68881 status register which locore stashed
408	 * in code for us.  Note that there is a possibility that the
409	 * bit pattern of this register will conflict with one of the
410	 * FPE_* codes defined in signal.h.  Fortunately for us, the
411	 * only such codes we use are all in the range 1-7 and the low
412	 * 3 bits of the status register are defined as 0 so there is
413	 * no clash.
414	 */
415		ucode = code;
416		i = SIGFPE;
417		break;
418
419#ifdef M68040
420	case T_FPEMULI|T_USER:	/* unimplemented FP instuction */
421	case T_FPEMULD|T_USER:	/* unimplemented FP data type */
422		/* XXX need to FSAVE */
423		printf("pid %d(%s): unimplemented FP %s at %x (EA %x)\n",
424		       p->p_pid, p->p_comm,
425		       frame.f_format == 2 ? "instruction" : "data type",
426		       frame.f_pc, frame.f_fmt2.f_iaddr);
427		/* XXX need to FRESTORE */
428		i = SIGFPE;
429		break;
430#endif
431
432	case T_ILLINST|T_USER:	/* illegal instruction fault */
433#ifdef COMPAT_HPUX
434		if (p->p_emul == &emul_hpux) {
435			ucode = HPUX_ILL_ILLINST_TRAP;
436			i = SIGILL;
437			break;
438		}
439		/* fall through */
440#endif
441	case T_PRIVINST|T_USER:	/* privileged instruction fault */
442#ifdef COMPAT_HPUX
443		if (p->p_emul == &emul_hpux)
444			ucode = HPUX_ILL_PRIV_TRAP;
445		else
446#endif
447		ucode = frame.f_format;	/* XXX was ILL_PRIVIN_FAULT */
448		i = SIGILL;
449		break;
450
451	case T_ZERODIV|T_USER:	/* Divide by zero */
452#ifdef COMPAT_HPUX
453		if (p->p_emul == &emul_hpux)
454			ucode = HPUX_FPE_INTDIV_TRAP;
455		else
456#endif
457		ucode = frame.f_format;	/* XXX was FPE_INTDIV_TRAP */
458		i = SIGFPE;
459		break;
460
461	case T_CHKINST|T_USER:	/* CHK instruction trap */
462#ifdef COMPAT_HPUX
463		if (p->p_emul == &emul_hpux) {
464			/* handled differently under hp-ux */
465			i = SIGILL;
466			ucode = HPUX_ILL_CHK_TRAP;
467			break;
468		}
469#endif
470		ucode = frame.f_format;	/* XXX was FPE_SUBRNG_TRAP */
471		i = SIGFPE;
472		break;
473
474	case T_TRAPVINST|T_USER:	/* TRAPV instruction trap */
475#ifdef COMPAT_HPUX
476		if (p->p_emul == &emul_hpux) {
477			/* handled differently under hp-ux */
478			i = SIGILL;
479			ucode = HPUX_ILL_TRAPV_TRAP;
480			break;
481		}
482#endif
483		ucode = frame.f_format;	/* XXX was FPE_INTOVF_TRAP */
484		i = SIGFPE;
485		break;
486
487	/*
488	 * XXX: Trace traps are a nightmare.
489	 *
490	 *	HP-UX uses trap #1 for breakpoints,
491	 *	NetBSD/m68k uses trap #2,
492	 *	SUN 3.x uses trap #15,
493	 *	DDB and KGDB uses trap #15 (for kernel breakpoints;
494	 *	handled elsewhere).
495	 *
496	 * NetBSD and HP-UX traps both get mapped by locore.s into T_TRACE.
497	 * SUN 3.x traps get passed through as T_TRAP15 and are not really
498	 * supported yet.
499	 *
500	 * XXX: We should never get kernel-mode T_TRAP15
501	 * XXX: because locore.s now gives them special treatment.
502	 */
503	case T_TRAP15:		/* kernel breakpoint */
504#ifdef DEBUG
505		printf("unexpected kernel trace trap, type = %d\n", type);
506		printf("program counter = 0x%x\n", frame.f_pc);
507#endif
508		frame.f_sr &= ~PSL_T;
509		return;
510
511	case T_TRACE|T_USER:	/* user trace trap */
512#ifdef COMPAT_SUNOS
513		/*
514		 * SunOS uses Trap #2 for a "CPU cache flush".
515		 * Just flush the on-chip caches and return.
516		 */
517		if (p->p_emul == &emul_sunos) {
518			ICIA();
519			DCIU();
520			return;
521		}
522#endif
523		/* FALLTHROUGH */
524	case T_TRACE:		/* tracing a trap instruction */
525	case T_TRAP15|T_USER:	/* SUN user trace trap */
526		frame.f_sr &= ~PSL_T;
527		i = SIGTRAP;
528		break;
529
530	case T_ASTFLT:		/* system async trap, cannot happen */
531		goto dopanic;
532
533	case T_ASTFLT|T_USER:	/* user async trap */
534		astpending = 0;
535		/*
536		 * We check for software interrupts first.  This is because
537		 * they are at a higher level than ASTs, and on a VAX would
538		 * interrupt the AST.  We assume that if we are processing
539		 * an AST that we must be at IPL0 so we don't bother to
540		 * check.  Note that we ensure that we are at least at SIR
541		 * IPL while processing the SIR.
542		 */
543		spl1();
544		/* fall into... */
545
546	case T_SSIR:		/* software interrupt */
547	case T_SSIR|T_USER:
548		while ((bit = ffs(ssir))) {
549			--bit;
550			ssir &= ~(1 << bit);
551			uvmexp.softs++;
552			if (sir_routines[bit])
553				sir_routines[bit](sir_args[bit]);
554		}
555		/*
556		 * If this was not an AST trap, we are all done.
557		 */
558		if (type != (T_ASTFLT|T_USER)) {
559			uvmexp.traps--;
560			return;
561		}
562		spl0();
563		if (p->p_flag & P_OWEUPC) {
564			p->p_flag &= ~P_OWEUPC;
565			ADDUPROF(p);
566		}
567		goto out;
568
569	case T_MMUFLT:		/* kernel mode page fault */
570		/*
571		 * If we were doing profiling ticks or other user mode
572		 * stuff from interrupt code, Just Say No.
573		 */
574		if (p->p_addr->u_pcb.pcb_onfault == fubail ||
575		    p->p_addr->u_pcb.pcb_onfault == subail)
576			goto copyfault;
577		/* fall into ... */
578
579	case T_MMUFLT|T_USER:	/* page fault */
580	    {
581		vaddr_t va;
582		struct vmspace *vm = p->p_vmspace;
583		vm_map_t map;
584		int rv;
585		vm_prot_t ftype;
586		extern vm_map_t kernel_map;
587
588#ifdef DEBUG
589		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
590		printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n",
591		       p->p_pid, code, v, frame.f_pc, frame.f_sr);
592#endif
593		/*
594		 * It is only a kernel address space fault iff:
595		 * 	1. (type & T_USER) == 0  and
596		 * 	2. pcb_onfault not set or
597		 *	3. pcb_onfault set but supervisor space data fault
598		 * The last can occur during an exec() copyin where the
599		 * argument space is lazy-allocated.
600		 */
601		if ((type & T_USER) == 0 &&
602		    ((p->p_addr->u_pcb.pcb_onfault == 0) || KDFAULT(code)))
603			map = kernel_map;
604		else
605			map = vm ? &vm->vm_map : kernel_map;
606
607		if (WRFAULT(code))
608			ftype = VM_PROT_READ | VM_PROT_WRITE;
609		else
610			ftype = VM_PROT_READ;
611
612		va = trunc_page((vaddr_t)v);
613
614		if (map == kernel_map && va == 0) {
615			printf("trap: bad kernel %s access at 0x%x\n",
616			    (ftype & VM_PROT_WRITE) ? "read/write" :
617			    "read", v);
618			goto dopanic;
619		}
620
621#ifdef COMPAT_HPUX
622		if (ISHPMMADDR(va)) {
623			int pmap_mapmulti __P((pmap_t, vaddr_t));
624			vaddr_t bva;
625
626			rv = pmap_mapmulti(map->pmap, va);
627			if (rv != KERN_SUCCESS) {
628				bva = HPMMBASEADDR(va);
629				rv = uvm_fault(map, bva, 0, ftype);
630				if (rv == KERN_SUCCESS)
631					(void) pmap_mapmulti(map->pmap, va);
632			}
633		} else
634#endif
635		rv = uvm_fault(map, va, 0, ftype);
636#ifdef DEBUG
637		if (rv && MDB_ISPID(p->p_pid))
638			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
639			    map, va, ftype, rv);
640#endif
641		/*
642		 * If this was a stack access we keep track of the maximum
643		 * accessed stack size.  Also, if vm_fault gets a protection
644		 * failure it is due to accessing the stack region outside
645		 * the current limit and we need to reflect that as an access
646		 * error.
647		 */
648		if ((vm != NULL && (caddr_t)va >= vm->vm_maxsaddr)
649		    && map != kernel_map) {
650			if (rv == KERN_SUCCESS) {
651				unsigned nss;
652
653				nss = clrnd(btoc(USRSTACK-(unsigned)va));
654				if (nss > vm->vm_ssize)
655					vm->vm_ssize = nss;
656			} else if (rv == KERN_PROTECTION_FAILURE)
657				rv = KERN_INVALID_ADDRESS;
658		}
659		if (rv == KERN_SUCCESS) {
660			if (type == T_MMUFLT) {
661#ifdef M68040
662				if (cputype == CPU_68040)
663					(void) writeback(&frame, 1);
664#endif
665				return;
666			}
667			goto out;
668		}
669		if (type == T_MMUFLT) {
670			if (p->p_addr->u_pcb.pcb_onfault)
671				goto copyfault;
672			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
673			    map, va, ftype, rv);
674			printf("  type %x, code [mmu,,ssw]: %x\n",
675			       type, code);
676			goto dopanic;
677		}
678		ucode = v;
679		if (rv == KERN_RESOURCE_SHORTAGE) {
680			printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
681			       p->p_pid, p->p_comm,
682			       p->p_cred && p->p_ucred ?
683			       p->p_ucred->cr_uid : -1);
684			i = SIGKILL;
685		} else {
686			i = SIGSEGV;
687		}
688		break;
689	    }
690	}
691	trapsignal(p, i, ucode);
692	if ((type & T_USER) == 0)
693		return;
694out:
695	userret(p, &frame, sticks, v, 1);
696}
697
698#ifdef M68040
699#ifdef DEBUG
700struct writebackstats {
701	int calls;
702	int cpushes;
703	int move16s;
704	int wb1s, wb2s, wb3s;
705	int wbsize[4];
706} wbstats;
707
708char *f7sz[] = { "longword", "byte", "word", "line" };
709char *f7tt[] = { "normal", "MOVE16", "AFC", "ACK" };
710char *f7tm[] = { "d-push", "u-data", "u-code", "M-data",
711		 "M-code", "k-data", "k-code", "RES" };
712char wberrstr[] =
713    "WARNING: pid %d(%s) writeback [%s] failed, pc=%x fa=%x wba=%x wbd=%x\n";
714#endif
715
716int
717writeback(fp, docachepush)
718	struct frame *fp;
719	int docachepush;
720{
721	struct fmt7 *f = &fp->f_fmt7;
722	struct proc *p = curproc;
723	int err = 0;
724	u_int fa;
725	caddr_t oonfault = p->p_addr->u_pcb.pcb_onfault;
726	paddr_t pa;
727
728#ifdef DEBUG
729	if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
730		printf(" pid=%d, fa=%x,", p->p_pid, f->f_fa);
731		dumpssw(f->f_ssw);
732	}
733	wbstats.calls++;
734#endif
735	/*
736	 * Deal with special cases first.
737	 */
738	if ((f->f_ssw & SSW4_TMMASK) == SSW4_TMDCP) {
739		/*
740		 * Dcache push fault.
741		 * Line-align the address and write out the push data to
742		 * the indicated physical address.
743		 */
744#ifdef DEBUG
745		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
746			printf(" pushing %s to PA %x, data %x",
747			       f7sz[(f->f_ssw & SSW4_SZMASK) >> 5],
748			       f->f_fa, f->f_pd0);
749			if ((f->f_ssw & SSW4_SZMASK) == SSW4_SZLN)
750				printf("/%x/%x/%x",
751				       f->f_pd1, f->f_pd2, f->f_pd3);
752			printf("\n");
753		}
754		if (f->f_wb1s & SSW4_WBSV)
755			panic("writeback: cache push with WB1S valid");
756		wbstats.cpushes++;
757#endif
758		/*
759		 * XXX there are security problems if we attempt to do a
760		 * cache push after a signal handler has been called.
761		 */
762		if (docachepush) {
763			pmap_enter(pmap_kernel(), (vaddr_t)vmmap,
764			    trunc_page(f->f_fa), VM_PROT_WRITE, TRUE,
765			    VM_PROT_WRITE);
766			fa = (u_int)&vmmap[(f->f_fa & PGOFSET) & ~0xF];
767			bcopy((caddr_t)&f->f_pd0, (caddr_t)fa, 16);
768			(void) pmap_extract(pmap_kernel(), (vaddr_t)fa, &pa);
769			DCFL(pa);
770			pmap_remove(pmap_kernel(), (vaddr_t)vmmap,
771				    (vaddr_t)&vmmap[NBPG]);
772		} else
773			printf("WARNING: pid %d(%s) uid %d: CPUSH not done\n",
774			       p->p_pid, p->p_comm, p->p_ucred->cr_uid);
775	} else if ((f->f_ssw & (SSW4_RW|SSW4_TTMASK)) == SSW4_TTM16) {
776		/*
777		 * MOVE16 fault.
778		 * Line-align the address and write out the push data to
779		 * the indicated virtual address.
780		 */
781#ifdef DEBUG
782		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
783			printf(" MOVE16 to VA %x(%x), data %x/%x/%x/%x\n",
784			       f->f_fa, f->f_fa & ~0xF, f->f_pd0, f->f_pd1,
785			       f->f_pd2, f->f_pd3);
786		if (f->f_wb1s & SSW4_WBSV)
787			panic("writeback: MOVE16 with WB1S valid");
788		wbstats.move16s++;
789#endif
790		if (KDFAULT(f->f_wb1s))
791			bcopy((caddr_t)&f->f_pd0, (caddr_t)(f->f_fa & ~0xF), 16);
792		else
793			err = suline((caddr_t)(f->f_fa & ~0xF), (caddr_t)&f->f_pd0);
794		if (err) {
795			fa = f->f_fa & ~0xF;
796#ifdef DEBUG
797			if (mmudebug & MDB_WBFAILED)
798				printf(wberrstr, p->p_pid, p->p_comm,
799				       "MOVE16", fp->f_pc, f->f_fa,
800				       f->f_fa & ~0xF, f->f_pd0);
801#endif
802		}
803	} else if (f->f_wb1s & SSW4_WBSV) {
804		/*
805		 * Writeback #1.
806		 * Position the "memory-aligned" data and write it out.
807		 */
808		u_int wb1d = f->f_wb1d;
809		int off;
810
811#ifdef DEBUG
812		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
813			dumpwb(1, f->f_wb1s, f->f_wb1a, f->f_wb1d);
814		wbstats.wb1s++;
815		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
816#endif
817		off = (f->f_wb1a & 3) * 8;
818		switch (f->f_wb1s & SSW4_SZMASK) {
819		case SSW4_SZLW:
820			if (off)
821				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
822			if (KDFAULT(f->f_wb1s))
823				*(long *)f->f_wb1a = wb1d;
824			else
825				err = suword((caddr_t)f->f_wb1a, wb1d);
826			break;
827		case SSW4_SZB:
828			off = 24 - off;
829			if (off)
830				wb1d >>= off;
831			if (KDFAULT(f->f_wb1s))
832				*(char *)f->f_wb1a = wb1d;
833			else
834				err = subyte((caddr_t)f->f_wb1a, wb1d);
835			break;
836		case SSW4_SZW:
837			off = (off + 16) % 32;
838			if (off)
839				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
840			if (KDFAULT(f->f_wb1s))
841				*(short *)f->f_wb1a = wb1d;
842			else
843				err = susword((caddr_t)f->f_wb1a, wb1d);
844			break;
845		}
846		if (err) {
847			fa = f->f_wb1a;
848#ifdef DEBUG
849			if (mmudebug & MDB_WBFAILED)
850				printf(wberrstr, p->p_pid, p->p_comm,
851				       "#1", fp->f_pc, f->f_fa,
852				       f->f_wb1a, f->f_wb1d);
853#endif
854		}
855	}
856	/*
857	 * Deal with the "normal" writebacks.
858	 *
859	 * XXX writeback2 is known to reflect a LINE size writeback after
860	 * a MOVE16 was already dealt with above.  Ignore it.
861	 */
862	if (err == 0 && (f->f_wb2s & SSW4_WBSV) &&
863	    (f->f_wb2s & SSW4_SZMASK) != SSW4_SZLN) {
864#ifdef DEBUG
865		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
866			dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
867		wbstats.wb2s++;
868		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
869#endif
870		switch (f->f_wb2s & SSW4_SZMASK) {
871		case SSW4_SZLW:
872			if (KDFAULT(f->f_wb2s))
873				*(long *)f->f_wb2a = f->f_wb2d;
874			else
875				err = suword((caddr_t)f->f_wb2a, f->f_wb2d);
876			break;
877		case SSW4_SZB:
878			if (KDFAULT(f->f_wb2s))
879				*(char *)f->f_wb2a = f->f_wb2d;
880			else
881				err = subyte((caddr_t)f->f_wb2a, f->f_wb2d);
882			break;
883		case SSW4_SZW:
884			if (KDFAULT(f->f_wb2s))
885				*(short *)f->f_wb2a = f->f_wb2d;
886			else
887				err = susword((caddr_t)f->f_wb2a, f->f_wb2d);
888			break;
889		}
890		if (err) {
891			fa = f->f_wb2a;
892#ifdef DEBUG
893			if (mmudebug & MDB_WBFAILED) {
894				printf(wberrstr, p->p_pid, p->p_comm,
895				       "#2", fp->f_pc, f->f_fa,
896				       f->f_wb2a, f->f_wb2d);
897				dumpssw(f->f_ssw);
898				dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
899			}
900#endif
901		}
902	}
903	if (err == 0 && (f->f_wb3s & SSW4_WBSV)) {
904#ifdef DEBUG
905		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
906			dumpwb(3, f->f_wb3s, f->f_wb3a, f->f_wb3d);
907		wbstats.wb3s++;
908		wbstats.wbsize[(f->f_wb3s&SSW4_SZMASK)>>5]++;
909#endif
910		switch (f->f_wb3s & SSW4_SZMASK) {
911		case SSW4_SZLW:
912			if (KDFAULT(f->f_wb3s))
913				*(long *)f->f_wb3a = f->f_wb3d;
914			else
915				err = suword((caddr_t)f->f_wb3a, f->f_wb3d);
916			break;
917		case SSW4_SZB:
918			if (KDFAULT(f->f_wb3s))
919				*(char *)f->f_wb3a = f->f_wb3d;
920			else
921				err = subyte((caddr_t)f->f_wb3a, f->f_wb3d);
922			break;
923		case SSW4_SZW:
924			if (KDFAULT(f->f_wb3s))
925				*(short *)f->f_wb3a = f->f_wb3d;
926			else
927				err = susword((caddr_t)f->f_wb3a, f->f_wb3d);
928			break;
929#ifdef DEBUG
930		case SSW4_SZLN:
931			panic("writeback: wb3s indicates LINE write");
932#endif
933		}
934		if (err) {
935			fa = f->f_wb3a;
936#ifdef DEBUG
937			if (mmudebug & MDB_WBFAILED)
938				printf(wberrstr, p->p_pid, p->p_comm,
939				       "#3", fp->f_pc, f->f_fa,
940				       f->f_wb3a, f->f_wb3d);
941#endif
942		}
943	}
944	p->p_addr->u_pcb.pcb_onfault = oonfault;
945	if (err)
946		err = SIGSEGV;
947	return (err);
948}
949
950#ifdef DEBUG
951void
952dumpssw(ssw)
953	u_short ssw;
954{
955	printf(" SSW: %x: ", ssw);
956	if (ssw & SSW4_CP)
957		printf("CP,");
958	if (ssw & SSW4_CU)
959		printf("CU,");
960	if (ssw & SSW4_CT)
961		printf("CT,");
962	if (ssw & SSW4_CM)
963		printf("CM,");
964	if (ssw & SSW4_MA)
965		printf("MA,");
966	if (ssw & SSW4_ATC)
967		printf("ATC,");
968	if (ssw & SSW4_LK)
969		printf("LK,");
970	if (ssw & SSW4_RW)
971		printf("RW,");
972	printf(" SZ=%s, TT=%s, TM=%s\n",
973	       f7sz[(ssw & SSW4_SZMASK) >> 5],
974	       f7tt[(ssw & SSW4_TTMASK) >> 3],
975	       f7tm[ssw & SSW4_TMMASK]);
976}
977
978void
979dumpwb(num, s, a, d)
980	int num;
981	u_short s;
982	u_int a, d;
983{
984	struct proc *p = curproc;
985	paddr_t pa;
986
987	printf(" writeback #%d: VA %x, data %x, SZ=%s, TT=%s, TM=%s\n",
988	       num, a, d, f7sz[(s & SSW4_SZMASK) >> 5],
989	       f7tt[(s & SSW4_TTMASK) >> 3], f7tm[s & SSW4_TMMASK]);
990	printf("               PA ");
991	if (pmap_extract(p->p_vmspace->vm_map.pmap, (vaddr_t)a, &pa) == FALSE)
992		printf("<invalid address>");
993	else
994		printf("%lx, current value %lx", pa, fuword((caddr_t)a));
995	printf("\n");
996}
997#endif
998#endif
999
1000/*
1001 * Process a system call.
1002 */
1003void
1004syscall(code, frame)
1005	register_t code;
1006	struct frame frame;
1007{
1008	caddr_t params;
1009	struct sysent *callp;
1010	struct proc *p;
1011	int error, opc, nsys;
1012	size_t argsize;
1013	register_t args[8], rval[2];
1014	u_quad_t sticks;
1015
1016	uvmexp.syscalls++;
1017	if (!USERMODE(frame.f_sr))
1018		panic("syscall");
1019	p = curproc;
1020	sticks = p->p_sticks;
1021	p->p_md.md_regs = frame.f_regs;
1022	opc = frame.f_pc;
1023
1024	nsys = p->p_emul->e_nsysent;
1025	callp = p->p_emul->e_sysent;
1026
1027#ifdef COMPAT_SUNOS
1028	if (p->p_emul == &emul_sunos) {
1029		/*
1030		 * SunOS passes the syscall-number on the stack, whereas
1031		 * BSD passes it in D0. So, we have to get the real "code"
1032		 * from the stack, and clean up the stack, as SunOS glue
1033		 * code assumes the kernel pops the syscall argument the
1034		 * glue pushed on the stack. Sigh...
1035		 */
1036		code = fuword((caddr_t)frame.f_regs[SP]);
1037
1038		/*
1039		 * XXX
1040		 * Don't do this for sunos_sigreturn, as there's no stored pc
1041		 * on the stack to skip, the argument follows the syscall
1042		 * number without a gap.
1043		 */
1044		if (code != SUNOS_SYS_sigreturn) {
1045			frame.f_regs[SP] += sizeof (int);
1046			/*
1047			 * remember that we adjusted the SP,
1048			 * might have to undo this if the system call
1049			 * returns ERESTART.
1050			 */
1051			p->p_md.md_flags |= MDP_STACKADJ;
1052		} else
1053			p->p_md.md_flags &= ~MDP_STACKADJ;
1054	}
1055#endif
1056
1057	params = (caddr_t)frame.f_regs[SP] + sizeof(int);
1058
1059	switch (code) {
1060	case SYS_syscall:
1061		/*
1062		 * Code is first argument, followed by actual args.
1063		 */
1064		code = fuword(params);
1065		params += sizeof(int);
1066		/*
1067		 * XXX sigreturn requires special stack manipulation
1068		 * that is only done if entered via the sigreturn
1069		 * trap.  Cannot allow it here so make sure we fail.
1070		 */
1071		switch (code) {
1072#ifdef COMPAT_13
1073		case SYS_compat_13_sigreturn13:
1074#endif
1075		case SYS___sigreturn14:
1076			code = nsys;
1077			break;
1078		}
1079		break;
1080	case SYS___syscall:
1081		/*
1082		 * Like syscall, but code is a quad, so as to maintain
1083		 * quad alignment for the rest of the arguments.
1084		 */
1085		if (callp != sysent)
1086			break;
1087		code = fuword(params + _QUAD_LOWWORD * sizeof(int));
1088		params += sizeof(quad_t);
1089		break;
1090	default:
1091		break;
1092	}
1093	if (code < 0 || code >= nsys)
1094		callp += p->p_emul->e_nosys;		/* illegal */
1095	else
1096		callp += code;
1097	argsize = callp->sy_argsize;
1098#ifdef COMPAT_LINUX
1099	if (0
1100# ifdef EXEC_AOUT
1101	    || p->p_emul == &emul_linux_aout
1102# endif
1103# ifdef EXEC_ELF32
1104	    || p->p_emul == &emul_linux_elf32
1105# endif
1106	     ) {
1107		/*
1108		 * Linux passes the args in d1-d5
1109		 */
1110		switch (argsize) {
1111		case 20:
1112			args[4] = frame.f_regs[D5];
1113		case 16:
1114			args[3] = frame.f_regs[D4];
1115		case 12:
1116			args[2] = frame.f_regs[D3];
1117		case 8:
1118			args[1] = frame.f_regs[D2];
1119		case 4:
1120			args[0] = frame.f_regs[D1];
1121		case 0:
1122			error = 0;
1123			break;
1124		default:
1125#ifdef DEBUG
1126			panic("linux syscall %d weird argsize %d",
1127				code, argsize);
1128#else
1129			error = EINVAL;
1130#endif
1131			break;
1132		}
1133	} else
1134#endif
1135	if (argsize)
1136		error = copyin(params, (caddr_t)args, argsize);
1137	else
1138		error = 0;
1139#ifdef SYSCALL_DEBUG
1140	scdebug_call(p, code, args);
1141#endif
1142#ifdef KTRACE
1143	if (KTRPOINT(p, KTR_SYSCALL))
1144		ktrsyscall(p->p_tracep, code, argsize, args);
1145#endif
1146	if (error)
1147		goto bad;
1148	rval[0] = 0;
1149	rval[1] = frame.f_regs[D1];
1150	error = (*callp->sy_call)(p, args, rval);
1151	switch (error) {
1152	case 0:
1153		frame.f_regs[D0] = rval[0];
1154		frame.f_regs[D1] = rval[1];
1155		frame.f_sr &= ~PSL_C;	/* carry bit */
1156		break;
1157	case ERESTART:
1158		/*
1159		 * We always enter through a `trap' instruction, which is 2
1160		 * bytes, so adjust the pc by that amount.
1161		 */
1162		frame.f_pc = opc - 2;
1163		break;
1164	case EJUSTRETURN:
1165		/* nothing to do */
1166		break;
1167	default:
1168	bad:
1169		if (p->p_emul->e_errno)
1170			error = p->p_emul->e_errno[error];
1171		frame.f_regs[D0] = error;
1172		frame.f_sr |= PSL_C;	/* carry bit */
1173		break;
1174	}
1175
1176#ifdef SYSCALL_DEBUG
1177	scdebug_ret(p, code, error, rval);
1178#endif
1179#ifdef COMPAT_SUNOS
1180	/* need new p-value for this */
1181	if (error == ERESTART && (p->p_md.md_flags & MDP_STACKADJ))
1182		frame.f_regs[SP] -= sizeof (int);
1183#endif
1184	userret(p, &frame, sticks, (u_int)0, 0);
1185#ifdef KTRACE
1186	if (KTRPOINT(p, KTR_SYSRET))
1187		ktrsysret(p->p_tracep, code, error, rval[0]);
1188#endif
1189}
1190
1191void
1192child_return(arg)
1193	void *arg;
1194{
1195	struct proc *p = arg;
1196	/* See cpu_fork() */
1197	struct frame *f = (struct frame *)p->p_md.md_regs;
1198
1199	f->f_regs[D0] = 0;
1200	f->f_sr &= ~PSL_C;
1201	f->f_format = FMT0;
1202
1203	userret(p, f, 0, (u_int)0, 0);
1204#ifdef KTRACE
1205	if (KTRPOINT(p, KTR_SYSRET))
1206		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1207#endif
1208}
1209
1210/*
1211 * Allocation routines for software interrupts.
1212 */
1213u_long
1214allocate_sir(proc, arg)
1215	void (*proc)(void *);
1216	void *arg;
1217{
1218	int bit;
1219
1220	if( next_sir >= NSIR )
1221		panic("allocate_sir: none left");
1222	bit = next_sir++;
1223	sir_routines[bit] = proc;
1224	sir_args[bit] = arg;
1225	return (1 << bit);
1226}
1227
1228void
1229init_sir()
1230{
1231	extern void netintr(void);
1232
1233	sir_routines[0] = (void (*)(void *))netintr;
1234	sir_routines[1] = (void (*)(void *))softclock;
1235	next_sir = 2;
1236}
1237