trap.c revision 1.20
1/*	$NetBSD: trap.c,v 1.20 2000/05/24 16:48:41 thorpej Exp $	*/
2
3/*
4 * This file was taken from mvme68k/mvme68k/trap.c
5 * should probably be re-synced when needed.
6 * Darrin B. Jewell <jewell@mit.edu> Tue Aug  3 10:53:12 UTC 1999
7 * original cvs id: NetBSD: trap.c,v 1.32 1999/08/03 10:52:06 dbj Exp
8 */
9
10/*
11 * Copyright (c) 1988 University of Utah.
12 * Copyright (c) 1982, 1986, 1990, 1993
13 *	The Regents of the University of California.  All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * the Systems Programming Group of the University of Utah Computer
17 * Science Department.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 * 3. All advertising materials mentioning features or use of this software
28 *    must display the following acknowledgement:
29 *	This product includes software developed by the University of
30 *	California, Berkeley and its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 *    may be used to endorse or promote products derived from this software
33 *    without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: trap.c 1.37 92/12/20$
48 *
49 *	@(#)trap.c	8.5 (Berkeley) 1/4/94
50 */
51
52#include "opt_ddb.h"
53#include "opt_execfmt.h"
54#include "opt_ktrace.h"
55#include "opt_compat_netbsd.h"
56#include "opt_compat_sunos.h"
57#include "opt_compat_hpux.h"
58#include "opt_compat_linux.h"
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/proc.h>
63#include <sys/acct.h>
64#include <sys/kernel.h>
65#include <sys/signalvar.h>
66#include <sys/resourcevar.h>
67#include <sys/syscall.h>
68#include <sys/syslog.h>
69#include <sys/user.h>
70#ifdef KTRACE
71#include <sys/ktrace.h>
72#endif
73
74#ifdef DEBUG
75#include <dev/cons.h>
76#endif
77
78#include <machine/db_machdep.h>
79#include <machine/psl.h>
80#include <machine/trap.h>
81#include <machine/cpu.h>
82#include <machine/reg.h>
83
84#include <vm/vm.h>
85#include <vm/pmap.h>
86
87#include <m68k/cacheops.h>
88
89#include <uvm/uvm_extern.h>
90
91#ifdef COMPAT_HPUX
92#include <compat/hpux/hpux.h>
93#endif
94
95#ifdef COMPAT_SUNOS
96#include <compat/sunos/sunos_syscall.h>
97extern struct emul emul_sunos;
98#endif
99
100#ifdef COMPAT_LINUX
101#ifdef EXEC_AOUT
102extern struct emul emul_linux_aout;
103#endif
104#ifdef EXEC_ELF32
105extern struct emul emul_linux_elf32;
106#endif
107#endif
108
109int	writeback __P((struct frame *fp, int docachepush));
110void	trap __P((int type, u_int code, u_int v, struct frame frame));
111void	syscall __P((register_t code, struct frame frame));
112
113#ifdef DEBUG
114void	dumpssw __P((u_short));
115void	dumpwb __P((int, u_short, u_int, u_int));
116#endif
117
118static inline void userret __P((struct proc *p, struct frame *fp,
119	    u_quad_t oticks, u_int faultaddr, int fromtrap));
120
121int	astpending;
122
123char	*trap_type[] = {
124	"Bus error",
125	"Address error",
126	"Illegal instruction",
127	"Zero divide",
128	"CHK instruction",
129	"TRAPV instruction",
130	"Privilege violation",
131	"Trace trap",
132	"MMU fault",
133	"SSIR trap",
134	"Format error",
135	"68881 exception",
136	"Coprocessor violation",
137	"Async system trap"
138};
139int	trap_types = sizeof trap_type / sizeof trap_type[0];
140
141/*
142 * Size of various exception stack frames (minus the standard 8 bytes)
143 */
144short	exframesize[] = {
145	FMT0SIZE,	/* type 0 - normal (68020/030/040/060) */
146	FMT1SIZE,	/* type 1 - throwaway (68020/030/040) */
147	FMT2SIZE,	/* type 2 - normal 6-word (68020/030/040/060) */
148	FMT3SIZE,	/* type 3 - FP post-instruction (68040/060) */
149	FMT4SIZE,	/* type 4 - access error/fp disabled (68060) */
150	-1, -1,		/* type 5-6 - undefined */
151	FMT7SIZE,	/* type 7 - access error (68040) */
152	58,		/* type 8 - bus fault (68010) */
153	FMT9SIZE,	/* type 9 - coprocessor mid-instruction (68020/030) */
154	FMTASIZE,	/* type A - short bus fault (68020/030) */
155	FMTBSIZE,	/* type B - long bus fault (68020/030) */
156	-1, -1, -1, -1	/* type C-F - undefined */
157};
158
159#ifdef M68060
160#define	KDFAULT_060(c)	(cputype == CPU_68060 && ((c) & FSLW_TM_SV))
161#define	WRFAULT_060(c)	(cputype == CPU_68060 && ((c) & FSLW_RW_W))
162#else
163#define	KDFAULT_060(c)	0
164#define	WRFAULT_060(c)	0
165#endif
166
167#ifdef M68040
168#define	KDFAULT_040(c)	(cputype == CPU_68040 && \
169			 ((c) & SSW4_TMMASK) == SSW4_TMKD)
170#define	WRFAULT_040(c)	(cputype == CPU_68040 && \
171			 ((c) & SSW4_RW) == 0)
172#else
173#define	KDFAULT_040(c)	0
174#define	WRFAULT_040(c)	0
175#endif
176
177#if defined(M68030) || defined(M68020)
178#define	KDFAULT_OTH(c)	(cputype <= CPU_68030 && \
179			 ((c) & (SSW_DF|SSW_FCMASK)) == (SSW_DF|FC_SUPERD))
180#define	WRFAULT_OTH(c)	(cputype <= CPU_68030 && \
181			 ((c) & (SSW_DF|SSW_RW)) == SSW_DF)
182#else
183#define	KDFAULT_OTH(c)	0
184#define	WRFAULT_OTH(c)	0
185#endif
186
187#define	KDFAULT(c)	(KDFAULT_060(c) || KDFAULT_040(c) || KDFAULT_OTH(c))
188#define	WRFAULT(c)	(WRFAULT_060(c) || WRFAULT_040(c) || WRFAULT_OTH(c))
189
190#ifdef DEBUG
191int mmudebug = 0;
192int mmupid = -1;
193#define MDB_FOLLOW	1
194#define MDB_WBFOLLOW	2
195#define MDB_WBFAILED	4
196#define MDB_ISPID(p)	((p) == mmupid)
197#endif
198
199
200#define NSIR	32
201void (*sir_routines[NSIR])(void *);
202void *sir_args[NSIR];
203int next_sir;
204
205/*
206 * trap and syscall both need the following work done before returning
207 * to user mode.
208 */
209static inline void
210userret(p, fp, oticks, faultaddr, fromtrap)
211	struct proc *p;
212	struct frame *fp;
213	u_quad_t oticks;
214	u_int faultaddr;
215	int fromtrap;
216{
217	int sig;
218#ifdef M68040
219	int beenhere = 0;
220
221again:
222#endif
223	/* take pending signals */
224	while ((sig = CURSIG(p)) != 0)
225		postsig(sig);
226	p->p_priority = p->p_usrpri;
227	if (want_resched) {
228		/*
229		 * We are being preempted.
230		 */
231		preempt(NULL);
232		while ((sig = CURSIG(p)) != 0)
233			postsig(sig);
234	}
235
236	/*
237	 * If profiling, charge system time to the trapped pc.
238	 */
239	if (p->p_flag & P_PROFIL) {
240		extern int psratio;
241
242		addupc_task(p, fp->f_pc,
243			    (int)(p->p_sticks - oticks) * psratio);
244	}
245#ifdef M68040
246	/*
247	 * Deal with user mode writebacks (from trap, or from sigreturn).
248	 * If any writeback fails, go back and attempt signal delivery.
249	 * unless we have already been here and attempted the writeback
250	 * (e.g. bad address with user ignoring SIGSEGV).  In that case
251	 * we just return to the user without sucessfully completing
252	 * the writebacks.  Maybe we should just drop the sucker?
253	 */
254	if (cputype == CPU_68040 && fp->f_format == FMT7) {
255		if (beenhere) {
256#ifdef DEBUG
257			if (mmudebug & MDB_WBFAILED)
258				printf(fromtrap ?
259		"pid %d(%s): writeback aborted, pc=%x, fa=%x\n" :
260		"pid %d(%s): writeback aborted in sigreturn, pc=%x\n",
261				    p->p_pid, p->p_comm, fp->f_pc, faultaddr);
262#endif
263		} else if ((sig = writeback(fp, fromtrap))) {
264			beenhere = 1;
265			oticks = p->p_sticks;
266			trapsignal(p, sig, faultaddr);
267			goto again;
268		}
269	}
270#endif
271	curpriority = p->p_priority;
272}
273
274/*
275 * Trap is called from locore to handle most types of processor traps,
276 * including events such as simulated software interrupts/AST's.
277 * System calls are broken out for efficiency.
278 */
279/*ARGSUSED*/
280void
281trap(type, code, v, frame)
282	int type;
283	unsigned code;
284	unsigned v;
285	struct frame frame;
286{
287	extern char fubail[], subail[];
288	struct proc *p;
289	int i, s;
290	u_int ucode;
291	u_quad_t sticks = 0 /* XXX initialiser works around compiler bug */;
292	int bit;
293
294	uvmexp.traps++;
295	p = curproc;
296	ucode = 0;
297
298	/* I have verified that this DOES happen! -gwr */
299	if (p == NULL)
300		p = &proc0;
301#ifdef DIAGNOSTIC
302	if (p->p_addr == NULL)
303		panic("trap: no pcb");
304#endif
305
306	if (USERMODE(frame.f_sr)) {
307		type |= T_USER;
308		sticks = p->p_sticks;
309		p->p_md.md_regs = frame.f_regs;
310	}
311	switch (type) {
312
313	default:
314	dopanic:
315		printf("trap type %d, code = 0x%x, v = 0x%x\n", type, code, v);
316		printf("%s program counter = 0x%x\n",
317		    (type & T_USER) ? "user" : "kernel", frame.f_pc);
318		/*
319		 * Let the kernel debugger see the trap frame that
320		 * caused us to panic.  This is a convenience so
321		 * one can see registers at the point of failure.
322		 */
323		s = splhigh();
324#ifdef KGDB
325		/* If connected, step or cont returns 1 */
326		if (kgdb_trap(type, &frame))
327			goto kgdb_cont;
328#endif
329#ifdef DDB
330		(void)kdb_trap(type, (db_regs_t *)&frame);
331#endif
332#ifdef KGDB
333	kgdb_cont:
334#endif
335		splx(s);
336		if (panicstr) {
337			printf("trap during panic!\n");
338#ifdef DEBUG
339			/* XXX should be a machine-dependent hook */
340			printf("(press a key)\n"); (void)cngetc();
341#endif
342		}
343		regdump((struct trapframe *)&frame, 128);
344		type &= ~T_USER;
345		if ((u_int)type < trap_types)
346			panic(trap_type[type]);
347		panic("trap");
348
349	case T_BUSERR:		/* kernel bus error */
350		if (p->p_addr->u_pcb.pcb_onfault == 0)
351			goto dopanic;
352		/* FALLTHROUGH */
353
354	copyfault:
355		/*
356		 * If we have arranged to catch this fault in any of the
357		 * copy to/from user space routines, set PC to return to
358		 * indicated location and set flag informing buserror code
359		 * that it may need to clean up stack frame.
360		 */
361		frame.f_stackadj = exframesize[frame.f_format];
362		frame.f_format = frame.f_vector = 0;
363		frame.f_pc = (int) p->p_addr->u_pcb.pcb_onfault;
364		return;
365
366	case T_BUSERR|T_USER:	/* bus error */
367	case T_ADDRERR|T_USER:	/* address error */
368		ucode = v;
369		i = SIGBUS;
370		break;
371
372	case T_COPERR:		/* kernel coprocessor violation */
373	case T_FMTERR|T_USER:	/* do all RTE errors come in as T_USER? */
374	case T_FMTERR:		/* ...just in case... */
375	/*
376	 * The user has most likely trashed the RTE or FP state info
377	 * in the stack frame of a signal handler.
378	 */
379		printf("pid %d: kernel %s exception\n", p->p_pid,
380		       type==T_COPERR ? "coprocessor" : "format");
381		type |= T_USER;
382		p->p_sigacts->ps_sigact[SIGILL].sa_handler = SIG_DFL;
383		sigdelset(&p->p_sigignore, SIGILL);
384		sigdelset(&p->p_sigcatch, SIGILL);
385		sigdelset(&p->p_sigmask, SIGILL);
386		i = SIGILL;
387		ucode = frame.f_format;	/* XXX was ILL_RESAD_FAULT */
388		break;
389
390	case T_COPERR|T_USER:	/* user coprocessor violation */
391	/* What is a proper response here? */
392		ucode = 0;
393		i = SIGFPE;
394		break;
395
396	case T_FPERR|T_USER:	/* 68881 exceptions */
397	/*
398	 * We pass along the 68881 status register which locore stashed
399	 * in code for us.  Note that there is a possibility that the
400	 * bit pattern of this register will conflict with one of the
401	 * FPE_* codes defined in signal.h.  Fortunately for us, the
402	 * only such codes we use are all in the range 1-7 and the low
403	 * 3 bits of the status register are defined as 0 so there is
404	 * no clash.
405	 */
406		ucode = code;
407		i = SIGFPE;
408		break;
409
410#ifdef M68040
411	case T_FPEMULI|T_USER:	/* unimplemented FP instuction */
412	case T_FPEMULD|T_USER:	/* unimplemented FP data type */
413		/* XXX need to FSAVE */
414		printf("pid %d(%s): unimplemented FP %s at %x (EA %x)\n",
415		       p->p_pid, p->p_comm,
416		       frame.f_format == 2 ? "instruction" : "data type",
417		       frame.f_pc, frame.f_fmt2.f_iaddr);
418		/* XXX need to FRESTORE */
419		i = SIGFPE;
420		break;
421#endif
422
423	case T_ILLINST|T_USER:	/* illegal instruction fault */
424#ifdef COMPAT_HPUX
425		if (p->p_emul == &emul_hpux) {
426			ucode = HPUX_ILL_ILLINST_TRAP;
427			i = SIGILL;
428			break;
429		}
430		/* fall through */
431#endif
432	case T_PRIVINST|T_USER:	/* privileged instruction fault */
433#ifdef COMPAT_HPUX
434		if (p->p_emul == &emul_hpux)
435			ucode = HPUX_ILL_PRIV_TRAP;
436		else
437#endif
438		ucode = frame.f_format;	/* XXX was ILL_PRIVIN_FAULT */
439		i = SIGILL;
440		break;
441
442	case T_ZERODIV|T_USER:	/* Divide by zero */
443#ifdef COMPAT_HPUX
444		if (p->p_emul == &emul_hpux)
445			ucode = HPUX_FPE_INTDIV_TRAP;
446		else
447#endif
448		ucode = frame.f_format;	/* XXX was FPE_INTDIV_TRAP */
449		i = SIGFPE;
450		break;
451
452	case T_CHKINST|T_USER:	/* CHK instruction trap */
453#ifdef COMPAT_HPUX
454		if (p->p_emul == &emul_hpux) {
455			/* handled differently under hp-ux */
456			i = SIGILL;
457			ucode = HPUX_ILL_CHK_TRAP;
458			break;
459		}
460#endif
461		ucode = frame.f_format;	/* XXX was FPE_SUBRNG_TRAP */
462		i = SIGFPE;
463		break;
464
465	case T_TRAPVINST|T_USER:	/* TRAPV instruction trap */
466#ifdef COMPAT_HPUX
467		if (p->p_emul == &emul_hpux) {
468			/* handled differently under hp-ux */
469			i = SIGILL;
470			ucode = HPUX_ILL_TRAPV_TRAP;
471			break;
472		}
473#endif
474		ucode = frame.f_format;	/* XXX was FPE_INTOVF_TRAP */
475		i = SIGFPE;
476		break;
477
478	/*
479	 * XXX: Trace traps are a nightmare.
480	 *
481	 *	HP-UX uses trap #1 for breakpoints,
482	 *	NetBSD/m68k uses trap #2,
483	 *	SUN 3.x uses trap #15,
484	 *	DDB and KGDB uses trap #15 (for kernel breakpoints;
485	 *	handled elsewhere).
486	 *
487	 * NetBSD and HP-UX traps both get mapped by locore.s into T_TRACE.
488	 * SUN 3.x traps get passed through as T_TRAP15 and are not really
489	 * supported yet.
490	 *
491	 * XXX: We should never get kernel-mode T_TRAP15
492	 * XXX: because locore.s now gives them special treatment.
493	 */
494	case T_TRAP15:		/* kernel breakpoint */
495#ifdef DEBUG
496		printf("unexpected kernel trace trap, type = %d\n", type);
497		printf("program counter = 0x%x\n", frame.f_pc);
498#endif
499		frame.f_sr &= ~PSL_T;
500		return;
501
502	case T_TRACE|T_USER:	/* user trace trap */
503#ifdef COMPAT_SUNOS
504		/*
505		 * SunOS uses Trap #2 for a "CPU cache flush".
506		 * Just flush the on-chip caches and return.
507		 */
508		if (p->p_emul == &emul_sunos) {
509			ICIA();
510			DCIU();
511			return;
512		}
513#endif
514		/* FALLTHROUGH */
515	case T_TRACE:		/* tracing a trap instruction */
516	case T_TRAP15|T_USER:	/* SUN user trace trap */
517		frame.f_sr &= ~PSL_T;
518		i = SIGTRAP;
519		break;
520
521	case T_ASTFLT:		/* system async trap, cannot happen */
522		goto dopanic;
523
524	case T_ASTFLT|T_USER:	/* user async trap */
525		astpending = 0;
526		/*
527		 * We check for software interrupts first.  This is because
528		 * they are at a higher level than ASTs, and on a VAX would
529		 * interrupt the AST.  We assume that if we are processing
530		 * an AST that we must be at IPL0 so we don't bother to
531		 * check.  Note that we ensure that we are at least at SIR
532		 * IPL while processing the SIR.
533		 */
534		spl1();
535		/* fall into... */
536
537	case T_SSIR:		/* software interrupt */
538	case T_SSIR|T_USER:
539		while ((bit = ffs(ssir))) {
540			--bit;
541			ssir &= ~(1 << bit);
542			uvmexp.softs++;
543			if (sir_routines[bit])
544				sir_routines[bit](sir_args[bit]);
545		}
546		/*
547		 * If this was not an AST trap, we are all done.
548		 */
549		if (type != (T_ASTFLT|T_USER)) {
550			uvmexp.traps--;
551			return;
552		}
553		spl0();
554		if (p->p_flag & P_OWEUPC) {
555			p->p_flag &= ~P_OWEUPC;
556			ADDUPROF(p);
557		}
558		goto out;
559
560	case T_MMUFLT:		/* kernel mode page fault */
561		/*
562		 * If we were doing profiling ticks or other user mode
563		 * stuff from interrupt code, Just Say No.
564		 */
565		if (p->p_addr->u_pcb.pcb_onfault == fubail ||
566		    p->p_addr->u_pcb.pcb_onfault == subail)
567			goto copyfault;
568		/* fall into ... */
569
570	case T_MMUFLT|T_USER:	/* page fault */
571	    {
572		vaddr_t va;
573		struct vmspace *vm = p->p_vmspace;
574		vm_map_t map;
575		int rv;
576		vm_prot_t ftype;
577		extern vm_map_t kernel_map;
578
579#ifdef DEBUG
580		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
581		printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n",
582		       p->p_pid, code, v, frame.f_pc, frame.f_sr);
583#endif
584		/*
585		 * It is only a kernel address space fault iff:
586		 * 	1. (type & T_USER) == 0  and
587		 * 	2. pcb_onfault not set or
588		 *	3. pcb_onfault set but supervisor space data fault
589		 * The last can occur during an exec() copyin where the
590		 * argument space is lazy-allocated.
591		 */
592		if ((type & T_USER) == 0 &&
593		    ((p->p_addr->u_pcb.pcb_onfault == 0) || KDFAULT(code)))
594			map = kernel_map;
595		else
596			map = vm ? &vm->vm_map : kernel_map;
597
598		if (WRFAULT(code))
599			ftype = VM_PROT_READ | VM_PROT_WRITE;
600		else
601			ftype = VM_PROT_READ;
602
603		va = trunc_page((vaddr_t)v);
604
605		if (map == kernel_map && va == 0) {
606			printf("trap: bad kernel %s access at 0x%x\n",
607			    (ftype & VM_PROT_WRITE) ? "read/write" :
608			    "read", v);
609			goto dopanic;
610		}
611
612#ifdef COMPAT_HPUX
613		if (ISHPMMADDR(va)) {
614			int pmap_mapmulti __P((pmap_t, vaddr_t));
615			vaddr_t bva;
616
617			rv = pmap_mapmulti(map->pmap, va);
618			if (rv != KERN_SUCCESS) {
619				bva = HPMMBASEADDR(va);
620				rv = uvm_fault(map, bva, 0, ftype);
621				if (rv == KERN_SUCCESS)
622					(void) pmap_mapmulti(map->pmap, va);
623			}
624		} else
625#endif
626		rv = uvm_fault(map, va, 0, ftype);
627#ifdef DEBUG
628		if (rv && MDB_ISPID(p->p_pid))
629			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
630			    map, va, ftype, rv);
631#endif
632		/*
633		 * If this was a stack access we keep track of the maximum
634		 * accessed stack size.  Also, if vm_fault gets a protection
635		 * failure it is due to accessing the stack region outside
636		 * the current limit and we need to reflect that as an access
637		 * error.
638		 */
639		if ((vm != NULL && (caddr_t)va >= vm->vm_maxsaddr)
640		    && map != kernel_map) {
641			if (rv == KERN_SUCCESS) {
642				unsigned nss;
643
644				nss = btoc(USRSTACK-(unsigned)va);
645				if (nss > vm->vm_ssize)
646					vm->vm_ssize = nss;
647			} else if (rv == KERN_PROTECTION_FAILURE)
648				rv = KERN_INVALID_ADDRESS;
649		}
650		if (rv == KERN_SUCCESS) {
651			if (type == T_MMUFLT) {
652#ifdef M68040
653				if (cputype == CPU_68040)
654					(void) writeback(&frame, 1);
655#endif
656				return;
657			}
658			goto out;
659		}
660		if (type == T_MMUFLT) {
661			if (p->p_addr->u_pcb.pcb_onfault)
662				goto copyfault;
663			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
664			    map, va, ftype, rv);
665			printf("  type %x, code [mmu,,ssw]: %x\n",
666			       type, code);
667			goto dopanic;
668		}
669		ucode = v;
670		if (rv == KERN_RESOURCE_SHORTAGE) {
671			printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
672			       p->p_pid, p->p_comm,
673			       p->p_cred && p->p_ucred ?
674			       p->p_ucred->cr_uid : -1);
675			i = SIGKILL;
676		} else {
677			i = SIGSEGV;
678		}
679		break;
680	    }
681	}
682	trapsignal(p, i, ucode);
683	if ((type & T_USER) == 0)
684		return;
685out:
686	userret(p, &frame, sticks, v, 1);
687}
688
689#ifdef M68040
690#ifdef DEBUG
691struct writebackstats {
692	int calls;
693	int cpushes;
694	int move16s;
695	int wb1s, wb2s, wb3s;
696	int wbsize[4];
697} wbstats;
698
699char *f7sz[] = { "longword", "byte", "word", "line" };
700char *f7tt[] = { "normal", "MOVE16", "AFC", "ACK" };
701char *f7tm[] = { "d-push", "u-data", "u-code", "M-data",
702		 "M-code", "k-data", "k-code", "RES" };
703char wberrstr[] =
704    "WARNING: pid %d(%s) writeback [%s] failed, pc=%x fa=%x wba=%x wbd=%x\n";
705#endif
706
707int
708writeback(fp, docachepush)
709	struct frame *fp;
710	int docachepush;
711{
712	struct fmt7 *f = &fp->f_fmt7;
713	struct proc *p = curproc;
714	int err = 0;
715	u_int fa;
716	caddr_t oonfault = p->p_addr->u_pcb.pcb_onfault;
717	paddr_t pa;
718
719#ifdef DEBUG
720	if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
721		printf(" pid=%d, fa=%x,", p->p_pid, f->f_fa);
722		dumpssw(f->f_ssw);
723	}
724	wbstats.calls++;
725#endif
726	/*
727	 * Deal with special cases first.
728	 */
729	if ((f->f_ssw & SSW4_TMMASK) == SSW4_TMDCP) {
730		/*
731		 * Dcache push fault.
732		 * Line-align the address and write out the push data to
733		 * the indicated physical address.
734		 */
735#ifdef DEBUG
736		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
737			printf(" pushing %s to PA %x, data %x",
738			       f7sz[(f->f_ssw & SSW4_SZMASK) >> 5],
739			       f->f_fa, f->f_pd0);
740			if ((f->f_ssw & SSW4_SZMASK) == SSW4_SZLN)
741				printf("/%x/%x/%x",
742				       f->f_pd1, f->f_pd2, f->f_pd3);
743			printf("\n");
744		}
745		if (f->f_wb1s & SSW4_WBSV)
746			panic("writeback: cache push with WB1S valid");
747		wbstats.cpushes++;
748#endif
749		/*
750		 * XXX there are security problems if we attempt to do a
751		 * cache push after a signal handler has been called.
752		 */
753		if (docachepush) {
754			pmap_enter(pmap_kernel(), (vaddr_t)vmmap,
755			    trunc_page(f->f_fa), VM_PROT_WRITE,
756			    VM_PROT_WRITE|PMAP_WIRED);
757			fa = (u_int)&vmmap[(f->f_fa & PGOFSET) & ~0xF];
758			bcopy((caddr_t)&f->f_pd0, (caddr_t)fa, 16);
759			(void) pmap_extract(pmap_kernel(), (vaddr_t)fa, &pa);
760			DCFL(pa);
761			pmap_remove(pmap_kernel(), (vaddr_t)vmmap,
762				    (vaddr_t)&vmmap[NBPG]);
763		} else
764			printf("WARNING: pid %d(%s) uid %d: CPUSH not done\n",
765			       p->p_pid, p->p_comm, p->p_ucred->cr_uid);
766	} else if ((f->f_ssw & (SSW4_RW|SSW4_TTMASK)) == SSW4_TTM16) {
767		/*
768		 * MOVE16 fault.
769		 * Line-align the address and write out the push data to
770		 * the indicated virtual address.
771		 */
772#ifdef DEBUG
773		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
774			printf(" MOVE16 to VA %x(%x), data %x/%x/%x/%x\n",
775			       f->f_fa, f->f_fa & ~0xF, f->f_pd0, f->f_pd1,
776			       f->f_pd2, f->f_pd3);
777		if (f->f_wb1s & SSW4_WBSV)
778			panic("writeback: MOVE16 with WB1S valid");
779		wbstats.move16s++;
780#endif
781		if (KDFAULT(f->f_wb1s))
782			bcopy((caddr_t)&f->f_pd0, (caddr_t)(f->f_fa & ~0xF), 16);
783		else
784			err = suline((caddr_t)(f->f_fa & ~0xF), (caddr_t)&f->f_pd0);
785		if (err) {
786			fa = f->f_fa & ~0xF;
787#ifdef DEBUG
788			if (mmudebug & MDB_WBFAILED)
789				printf(wberrstr, p->p_pid, p->p_comm,
790				       "MOVE16", fp->f_pc, f->f_fa,
791				       f->f_fa & ~0xF, f->f_pd0);
792#endif
793		}
794	} else if (f->f_wb1s & SSW4_WBSV) {
795		/*
796		 * Writeback #1.
797		 * Position the "memory-aligned" data and write it out.
798		 */
799		u_int wb1d = f->f_wb1d;
800		int off;
801
802#ifdef DEBUG
803		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
804			dumpwb(1, f->f_wb1s, f->f_wb1a, f->f_wb1d);
805		wbstats.wb1s++;
806		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
807#endif
808		off = (f->f_wb1a & 3) * 8;
809		switch (f->f_wb1s & SSW4_SZMASK) {
810		case SSW4_SZLW:
811			if (off)
812				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
813			if (KDFAULT(f->f_wb1s))
814				*(long *)f->f_wb1a = wb1d;
815			else
816				err = suword((caddr_t)f->f_wb1a, wb1d);
817			break;
818		case SSW4_SZB:
819			off = 24 - off;
820			if (off)
821				wb1d >>= off;
822			if (KDFAULT(f->f_wb1s))
823				*(char *)f->f_wb1a = wb1d;
824			else
825				err = subyte((caddr_t)f->f_wb1a, wb1d);
826			break;
827		case SSW4_SZW:
828			off = (off + 16) % 32;
829			if (off)
830				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
831			if (KDFAULT(f->f_wb1s))
832				*(short *)f->f_wb1a = wb1d;
833			else
834				err = susword((caddr_t)f->f_wb1a, wb1d);
835			break;
836		}
837		if (err) {
838			fa = f->f_wb1a;
839#ifdef DEBUG
840			if (mmudebug & MDB_WBFAILED)
841				printf(wberrstr, p->p_pid, p->p_comm,
842				       "#1", fp->f_pc, f->f_fa,
843				       f->f_wb1a, f->f_wb1d);
844#endif
845		}
846	}
847	/*
848	 * Deal with the "normal" writebacks.
849	 *
850	 * XXX writeback2 is known to reflect a LINE size writeback after
851	 * a MOVE16 was already dealt with above.  Ignore it.
852	 */
853	if (err == 0 && (f->f_wb2s & SSW4_WBSV) &&
854	    (f->f_wb2s & SSW4_SZMASK) != SSW4_SZLN) {
855#ifdef DEBUG
856		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
857			dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
858		wbstats.wb2s++;
859		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
860#endif
861		switch (f->f_wb2s & SSW4_SZMASK) {
862		case SSW4_SZLW:
863			if (KDFAULT(f->f_wb2s))
864				*(long *)f->f_wb2a = f->f_wb2d;
865			else
866				err = suword((caddr_t)f->f_wb2a, f->f_wb2d);
867			break;
868		case SSW4_SZB:
869			if (KDFAULT(f->f_wb2s))
870				*(char *)f->f_wb2a = f->f_wb2d;
871			else
872				err = subyte((caddr_t)f->f_wb2a, f->f_wb2d);
873			break;
874		case SSW4_SZW:
875			if (KDFAULT(f->f_wb2s))
876				*(short *)f->f_wb2a = f->f_wb2d;
877			else
878				err = susword((caddr_t)f->f_wb2a, f->f_wb2d);
879			break;
880		}
881		if (err) {
882			fa = f->f_wb2a;
883#ifdef DEBUG
884			if (mmudebug & MDB_WBFAILED) {
885				printf(wberrstr, p->p_pid, p->p_comm,
886				       "#2", fp->f_pc, f->f_fa,
887				       f->f_wb2a, f->f_wb2d);
888				dumpssw(f->f_ssw);
889				dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
890			}
891#endif
892		}
893	}
894	if (err == 0 && (f->f_wb3s & SSW4_WBSV)) {
895#ifdef DEBUG
896		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
897			dumpwb(3, f->f_wb3s, f->f_wb3a, f->f_wb3d);
898		wbstats.wb3s++;
899		wbstats.wbsize[(f->f_wb3s&SSW4_SZMASK)>>5]++;
900#endif
901		switch (f->f_wb3s & SSW4_SZMASK) {
902		case SSW4_SZLW:
903			if (KDFAULT(f->f_wb3s))
904				*(long *)f->f_wb3a = f->f_wb3d;
905			else
906				err = suword((caddr_t)f->f_wb3a, f->f_wb3d);
907			break;
908		case SSW4_SZB:
909			if (KDFAULT(f->f_wb3s))
910				*(char *)f->f_wb3a = f->f_wb3d;
911			else
912				err = subyte((caddr_t)f->f_wb3a, f->f_wb3d);
913			break;
914		case SSW4_SZW:
915			if (KDFAULT(f->f_wb3s))
916				*(short *)f->f_wb3a = f->f_wb3d;
917			else
918				err = susword((caddr_t)f->f_wb3a, f->f_wb3d);
919			break;
920#ifdef DEBUG
921		case SSW4_SZLN:
922			panic("writeback: wb3s indicates LINE write");
923#endif
924		}
925		if (err) {
926			fa = f->f_wb3a;
927#ifdef DEBUG
928			if (mmudebug & MDB_WBFAILED)
929				printf(wberrstr, p->p_pid, p->p_comm,
930				       "#3", fp->f_pc, f->f_fa,
931				       f->f_wb3a, f->f_wb3d);
932#endif
933		}
934	}
935	p->p_addr->u_pcb.pcb_onfault = oonfault;
936	if (err)
937		err = SIGSEGV;
938	return (err);
939}
940
941#ifdef DEBUG
942void
943dumpssw(ssw)
944	u_short ssw;
945{
946	printf(" SSW: %x: ", ssw);
947	if (ssw & SSW4_CP)
948		printf("CP,");
949	if (ssw & SSW4_CU)
950		printf("CU,");
951	if (ssw & SSW4_CT)
952		printf("CT,");
953	if (ssw & SSW4_CM)
954		printf("CM,");
955	if (ssw & SSW4_MA)
956		printf("MA,");
957	if (ssw & SSW4_ATC)
958		printf("ATC,");
959	if (ssw & SSW4_LK)
960		printf("LK,");
961	if (ssw & SSW4_RW)
962		printf("RW,");
963	printf(" SZ=%s, TT=%s, TM=%s\n",
964	       f7sz[(ssw & SSW4_SZMASK) >> 5],
965	       f7tt[(ssw & SSW4_TTMASK) >> 3],
966	       f7tm[ssw & SSW4_TMMASK]);
967}
968
969void
970dumpwb(num, s, a, d)
971	int num;
972	u_short s;
973	u_int a, d;
974{
975	struct proc *p = curproc;
976	paddr_t pa;
977
978	printf(" writeback #%d: VA %x, data %x, SZ=%s, TT=%s, TM=%s\n",
979	       num, a, d, f7sz[(s & SSW4_SZMASK) >> 5],
980	       f7tt[(s & SSW4_TTMASK) >> 3], f7tm[s & SSW4_TMMASK]);
981	printf("               PA ");
982	if (pmap_extract(p->p_vmspace->vm_map.pmap, (vaddr_t)a, &pa) == FALSE)
983		printf("<invalid address>");
984	else
985		printf("%lx, current value %lx", pa, fuword((caddr_t)a));
986	printf("\n");
987}
988#endif
989#endif
990
991/*
992 * Process a system call.
993 */
994void
995syscall(code, frame)
996	register_t code;
997	struct frame frame;
998{
999	caddr_t params;
1000	struct sysent *callp;
1001	struct proc *p;
1002	int error, opc, nsys;
1003	size_t argsize;
1004	register_t args[8], rval[2];
1005	u_quad_t sticks;
1006
1007	uvmexp.syscalls++;
1008	if (!USERMODE(frame.f_sr))
1009		panic("syscall");
1010	p = curproc;
1011	sticks = p->p_sticks;
1012	p->p_md.md_regs = frame.f_regs;
1013	opc = frame.f_pc;
1014
1015	nsys = p->p_emul->e_nsysent;
1016	callp = p->p_emul->e_sysent;
1017
1018#ifdef COMPAT_SUNOS
1019	if (p->p_emul == &emul_sunos) {
1020		/*
1021		 * SunOS passes the syscall-number on the stack, whereas
1022		 * BSD passes it in D0. So, we have to get the real "code"
1023		 * from the stack, and clean up the stack, as SunOS glue
1024		 * code assumes the kernel pops the syscall argument the
1025		 * glue pushed on the stack. Sigh...
1026		 */
1027		code = fuword((caddr_t)frame.f_regs[SP]);
1028
1029		/*
1030		 * XXX
1031		 * Don't do this for sunos_sigreturn, as there's no stored pc
1032		 * on the stack to skip, the argument follows the syscall
1033		 * number without a gap.
1034		 */
1035		if (code != SUNOS_SYS_sigreturn) {
1036			frame.f_regs[SP] += sizeof (int);
1037			/*
1038			 * remember that we adjusted the SP,
1039			 * might have to undo this if the system call
1040			 * returns ERESTART.
1041			 */
1042			p->p_md.md_flags |= MDP_STACKADJ;
1043		} else
1044			p->p_md.md_flags &= ~MDP_STACKADJ;
1045	}
1046#endif
1047
1048	params = (caddr_t)frame.f_regs[SP] + sizeof(int);
1049
1050	switch (code) {
1051	case SYS_syscall:
1052		/*
1053		 * Code is first argument, followed by actual args.
1054		 */
1055		code = fuword(params);
1056		params += sizeof(int);
1057		/*
1058		 * XXX sigreturn requires special stack manipulation
1059		 * that is only done if entered via the sigreturn
1060		 * trap.  Cannot allow it here so make sure we fail.
1061		 */
1062		switch (code) {
1063#ifdef COMPAT_13
1064		case SYS_compat_13_sigreturn13:
1065#endif
1066		case SYS___sigreturn14:
1067			code = nsys;
1068			break;
1069		}
1070		break;
1071	case SYS___syscall:
1072		/*
1073		 * Like syscall, but code is a quad, so as to maintain
1074		 * quad alignment for the rest of the arguments.
1075		 */
1076		if (callp != sysent)
1077			break;
1078		code = fuword(params + _QUAD_LOWWORD * sizeof(int));
1079		params += sizeof(quad_t);
1080		break;
1081	default:
1082		break;
1083	}
1084	if (code < 0 || code >= nsys)
1085		callp += p->p_emul->e_nosys;		/* illegal */
1086	else
1087		callp += code;
1088	argsize = callp->sy_argsize;
1089#ifdef COMPAT_LINUX
1090	if (0
1091# ifdef EXEC_AOUT
1092	    || p->p_emul == &emul_linux_aout
1093# endif
1094# ifdef EXEC_ELF32
1095	    || p->p_emul == &emul_linux_elf32
1096# endif
1097	     ) {
1098		/*
1099		 * Linux passes the args in d1-d5
1100		 */
1101		switch (argsize) {
1102		case 20:
1103			args[4] = frame.f_regs[D5];
1104		case 16:
1105			args[3] = frame.f_regs[D4];
1106		case 12:
1107			args[2] = frame.f_regs[D3];
1108		case 8:
1109			args[1] = frame.f_regs[D2];
1110		case 4:
1111			args[0] = frame.f_regs[D1];
1112		case 0:
1113			error = 0;
1114			break;
1115		default:
1116#ifdef DEBUG
1117			panic("linux syscall %d weird argsize %d",
1118				code, argsize);
1119#else
1120			error = EINVAL;
1121#endif
1122			break;
1123		}
1124	} else
1125#endif
1126	if (argsize)
1127		error = copyin(params, (caddr_t)args, argsize);
1128	else
1129		error = 0;
1130#ifdef SYSCALL_DEBUG
1131	scdebug_call(p, code, args);
1132#endif
1133#ifdef KTRACE
1134	if (KTRPOINT(p, KTR_SYSCALL))
1135		ktrsyscall(p->p_tracep, code, argsize, args);
1136#endif
1137	if (error)
1138		goto bad;
1139	rval[0] = 0;
1140	rval[1] = frame.f_regs[D1];
1141	error = (*callp->sy_call)(p, args, rval);
1142	switch (error) {
1143	case 0:
1144		frame.f_regs[D0] = rval[0];
1145		frame.f_regs[D1] = rval[1];
1146		frame.f_sr &= ~PSL_C;	/* carry bit */
1147		break;
1148	case ERESTART:
1149		/*
1150		 * We always enter through a `trap' instruction, which is 2
1151		 * bytes, so adjust the pc by that amount.
1152		 */
1153		frame.f_pc = opc - 2;
1154		break;
1155	case EJUSTRETURN:
1156		/* nothing to do */
1157		break;
1158	default:
1159	bad:
1160		if (p->p_emul->e_errno)
1161			error = p->p_emul->e_errno[error];
1162		frame.f_regs[D0] = error;
1163		frame.f_sr |= PSL_C;	/* carry bit */
1164		break;
1165	}
1166
1167#ifdef SYSCALL_DEBUG
1168	scdebug_ret(p, code, error, rval);
1169#endif
1170#ifdef COMPAT_SUNOS
1171	/* need new p-value for this */
1172	if (error == ERESTART && (p->p_md.md_flags & MDP_STACKADJ))
1173		frame.f_regs[SP] -= sizeof (int);
1174#endif
1175	userret(p, &frame, sticks, (u_int)0, 0);
1176#ifdef KTRACE
1177	if (KTRPOINT(p, KTR_SYSRET))
1178		ktrsysret(p->p_tracep, code, error, rval[0]);
1179#endif
1180}
1181
1182void
1183child_return(arg)
1184	void *arg;
1185{
1186	struct proc *p = arg;
1187	/* See cpu_fork() */
1188	struct frame *f = (struct frame *)p->p_md.md_regs;
1189
1190	f->f_regs[D0] = 0;
1191	f->f_sr &= ~PSL_C;
1192	f->f_format = FMT0;
1193
1194	userret(p, f, 0, (u_int)0, 0);
1195#ifdef KTRACE
1196	if (KTRPOINT(p, KTR_SYSRET))
1197		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1198#endif
1199}
1200
1201/*
1202 * Allocation routines for software interrupts.
1203 */
1204u_long
1205allocate_sir(proc, arg)
1206	void (*proc)(void *);
1207	void *arg;
1208{
1209	int bit;
1210
1211	if( next_sir >= NSIR )
1212		panic("allocate_sir: none left");
1213	bit = next_sir++;
1214	sir_routines[bit] = proc;
1215	sir_args[bit] = arg;
1216	return (1 << bit);
1217}
1218
1219void
1220init_sir()
1221{
1222	extern void netintr(void);
1223
1224	sir_routines[0] = (void (*)(void *))netintr;
1225	sir_routines[1] = (void (*)(void *))softclock;
1226	next_sir = 2;
1227}
1228