trap.c revision 1.16
1/*	$NetBSD: trap.c,v 1.16 1999/08/04 07:18:21 dbj Exp $	*/
2
3/*
4 * This file was taken from mvme68k/mvme68k/trap.c
5 * should probably be re-synced when needed.
6 * Darrin B. Jewell <jewell@mit.edu> Tue Aug  3 10:53:12 UTC 1999
7 * original cvs id: NetBSD: trap.c,v 1.32 1999/08/03 10:52:06 dbj Exp
8 */
9
10/*
11 * Copyright (c) 1988 University of Utah.
12 * Copyright (c) 1982, 1986, 1990, 1993
13 *	The Regents of the University of California.  All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * the Systems Programming Group of the University of Utah Computer
17 * Science Department.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 * 3. All advertising materials mentioning features or use of this software
28 *    must display the following acknowledgement:
29 *	This product includes software developed by the University of
30 *	California, Berkeley and its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 *    may be used to endorse or promote products derived from this software
33 *    without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: trap.c 1.37 92/12/20$
48 *
49 *	@(#)trap.c	8.5 (Berkeley) 1/4/94
50 */
51
52#include "opt_ddb.h"
53#include "opt_execfmt.h"
54#include "opt_ktrace.h"
55#include "opt_compat_netbsd.h"
56#include "opt_compat_sunos.h"
57#include "opt_compat_hpux.h"
58#include "opt_compat_linux.h"
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/proc.h>
63#include <sys/acct.h>
64#include <sys/kernel.h>
65#include <sys/signalvar.h>
66#include <sys/resourcevar.h>
67#include <sys/syscall.h>
68#include <sys/syslog.h>
69#include <sys/user.h>
70#ifdef KTRACE
71#include <sys/ktrace.h>
72#endif
73
74#ifdef DEBUG
75#include <dev/cons.h>
76#endif
77
78#include <machine/db_machdep.h>
79#include <machine/psl.h>
80#include <machine/trap.h>
81#include <machine/cpu.h>
82#include <machine/reg.h>
83
84#include <vm/vm.h>
85#include <vm/pmap.h>
86
87#include <m68k/cacheops.h>
88
89#include <uvm/uvm_extern.h>
90
91#ifdef COMPAT_HPUX
92#include <compat/hpux/hpux.h>
93#endif
94
95#ifdef COMPAT_SUNOS
96#include <compat/sunos/sunos_syscall.h>
97extern struct emul emul_sunos;
98#endif
99
100#ifdef COMPAT_LINUX
101#ifdef EXEC_AOUT
102extern struct emul emul_linux_aout;
103#endif
104#ifdef EXEC_ELF32
105extern struct emul emul_linux_elf32;
106#endif
107#endif
108
109int	writeback __P((struct frame *fp, int docachepush));
110void	trap __P((int type, u_int code, u_int v, struct frame frame));
111void	syscall __P((register_t code, struct frame frame));
112
113#ifdef DEBUG
114void	dumpssw __P((u_short));
115void	dumpwb __P((int, u_short, u_int, u_int));
116#endif
117
118static inline void userret __P((struct proc *p, struct frame *fp,
119	    u_quad_t oticks, u_int faultaddr, int fromtrap));
120
121int	astpending;
122
123char	*trap_type[] = {
124	"Bus error",
125	"Address error",
126	"Illegal instruction",
127	"Zero divide",
128	"CHK instruction",
129	"TRAPV instruction",
130	"Privilege violation",
131	"Trace trap",
132	"MMU fault",
133	"SSIR trap",
134	"Format error",
135	"68881 exception",
136	"Coprocessor violation",
137	"Async system trap"
138};
139int	trap_types = sizeof trap_type / sizeof trap_type[0];
140
141/*
142 * Size of various exception stack frames (minus the standard 8 bytes)
143 */
144short	exframesize[] = {
145	FMT0SIZE,	/* type 0 - normal (68020/030/040/060) */
146	FMT1SIZE,	/* type 1 - throwaway (68020/030/040) */
147	FMT2SIZE,	/* type 2 - normal 6-word (68020/030/040/060) */
148	FMT3SIZE,	/* type 3 - FP post-instruction (68040/060) */
149	FMT4SIZE,	/* type 4 - access error/fp disabled (68060) */
150	-1, -1,		/* type 5-6 - undefined */
151	FMT7SIZE,	/* type 7 - access error (68040) */
152	58,		/* type 8 - bus fault (68010) */
153	FMT9SIZE,	/* type 9 - coprocessor mid-instruction (68020/030) */
154	FMTASIZE,	/* type A - short bus fault (68020/030) */
155	FMTBSIZE,	/* type B - long bus fault (68020/030) */
156	-1, -1, -1, -1	/* type C-F - undefined */
157};
158
159#ifdef M68060
160#define	KDFAULT_060(c)	(cputype == CPU_68060 && ((c) & FSLW_TM_SV))
161#define	WRFAULT_060(c)	(cputype == CPU_68060 && ((c) & FSLW_RW_W))
162#else
163#define	KDFAULT_060(c)	0
164#define	WRFAULT_060(c)	0
165#endif
166
167#ifdef M68040
168#define	KDFAULT_040(c)	(cputype == CPU_68040 && \
169			 ((c) & SSW4_TMMASK) == SSW4_TMKD)
170#define	WRFAULT_040(c)	(cputype == CPU_68040 && \
171			 ((c) & SSW4_RW) == 0)
172#else
173#define	KDFAULT_040(c)	0
174#define	WRFAULT_040(c)	0
175#endif
176
177#if defined(M68030) || defined(M68020)
178#define	KDFAULT_OTH(c)	(cputype <= CPU_68030 && \
179			 ((c) & (SSW_DF|SSW_FCMASK)) == (SSW_DF|FC_SUPERD))
180#define	WRFAULT_OTH(c)	(cputype <= CPU_68030 && \
181			 ((c) & (SSW_DF|SSW_RW)) == SSW_DF)
182#else
183#define	KDFAULT_OTH(c)	0
184#define	WRFAULT_OTH(c)	0
185#endif
186
187#define	KDFAULT(c)	(KDFAULT_060(c) || KDFAULT_040(c) || KDFAULT_OTH(c))
188#define	WRFAULT(c)	(WRFAULT_060(c) || WRFAULT_040(c) || WRFAULT_OTH(c))
189
190#ifdef DEBUG
191int mmudebug = 0;
192int mmupid = -1;
193#define MDB_FOLLOW	1
194#define MDB_WBFOLLOW	2
195#define MDB_WBFAILED	4
196#define MDB_ISPID(p)	((p) == mmupid)
197#endif
198
199
200#define NSIR	32
201void (*sir_routines[NSIR])(void *);
202void *sir_args[NSIR];
203int next_sir;
204
205/*
206 * trap and syscall both need the following work done before returning
207 * to user mode.
208 */
209static inline void
210userret(p, fp, oticks, faultaddr, fromtrap)
211	struct proc *p;
212	struct frame *fp;
213	u_quad_t oticks;
214	u_int faultaddr;
215	int fromtrap;
216{
217	int sig, s;
218#ifdef M68040
219	int beenhere = 0;
220
221again:
222#endif
223	/* take pending signals */
224	while ((sig = CURSIG(p)) != 0)
225		postsig(sig);
226	p->p_priority = p->p_usrpri;
227	if (want_resched) {
228		/*
229		 * Since we are curproc, clock will normally just change
230		 * our priority without moving us from one queue to another
231		 * (since the running process is not on a queue.)
232		 * If that happened after we put ourselves on the run queue
233		 * but before we mi_switch()'ed, we might not be on the queue
234		 * indicated by our priority.
235		 */
236		s = splstatclock();
237		setrunqueue(p);
238		p->p_stats->p_ru.ru_nivcsw++;
239		mi_switch();
240		splx(s);
241		while ((sig = CURSIG(p)) != 0)
242			postsig(sig);
243	}
244
245	/*
246	 * If profiling, charge system time to the trapped pc.
247	 */
248	if (p->p_flag & P_PROFIL) {
249		extern int psratio;
250
251		addupc_task(p, fp->f_pc,
252			    (int)(p->p_sticks - oticks) * psratio);
253	}
254#ifdef M68040
255	/*
256	 * Deal with user mode writebacks (from trap, or from sigreturn).
257	 * If any writeback fails, go back and attempt signal delivery.
258	 * unless we have already been here and attempted the writeback
259	 * (e.g. bad address with user ignoring SIGSEGV).  In that case
260	 * we just return to the user without sucessfully completing
261	 * the writebacks.  Maybe we should just drop the sucker?
262	 */
263	if (cputype == CPU_68040 && fp->f_format == FMT7) {
264		if (beenhere) {
265#ifdef DEBUG
266			if (mmudebug & MDB_WBFAILED)
267				printf(fromtrap ?
268		"pid %d(%s): writeback aborted, pc=%x, fa=%x\n" :
269		"pid %d(%s): writeback aborted in sigreturn, pc=%x\n",
270				    p->p_pid, p->p_comm, fp->f_pc, faultaddr);
271#endif
272		} else if ((sig = writeback(fp, fromtrap))) {
273			beenhere = 1;
274			oticks = p->p_sticks;
275			trapsignal(p, sig, faultaddr);
276			goto again;
277		}
278	}
279#endif
280	curpriority = p->p_priority;
281}
282
283/*
284 * Trap is called from locore to handle most types of processor traps,
285 * including events such as simulated software interrupts/AST's.
286 * System calls are broken out for efficiency.
287 */
288/*ARGSUSED*/
289void
290trap(type, code, v, frame)
291	int type;
292	unsigned code;
293	unsigned v;
294	struct frame frame;
295{
296	extern char fubail[], subail[];
297	struct proc *p;
298	int i, s;
299	u_int ucode;
300	u_quad_t sticks = 0 /* XXX initialiser works around compiler bug */;
301	int bit;
302
303	uvmexp.traps++;
304	p = curproc;
305	ucode = 0;
306
307	/* I have verified that this DOES happen! -gwr */
308	if (p == NULL)
309		p = &proc0;
310#ifdef DIAGNOSTIC
311	if (p->p_addr == NULL)
312		panic("trap: no pcb");
313#endif
314
315	if (USERMODE(frame.f_sr)) {
316		type |= T_USER;
317		sticks = p->p_sticks;
318		p->p_md.md_regs = frame.f_regs;
319	}
320	switch (type) {
321
322	default:
323	dopanic:
324		printf("trap type %d, code = 0x%x, v = 0x%x\n", type, code, v);
325		printf("%s program counter = 0x%x\n",
326		    (type & T_USER) ? "user" : "kernel", frame.f_pc);
327		/*
328		 * Let the kernel debugger see the trap frame that
329		 * caused us to panic.  This is a convenience so
330		 * one can see registers at the point of failure.
331		 */
332		s = splhigh();
333#ifdef KGDB
334		/* If connected, step or cont returns 1 */
335		if (kgdb_trap(type, &frame))
336			goto kgdb_cont;
337#endif
338#ifdef DDB
339		(void)kdb_trap(type, (db_regs_t *)&frame);
340#endif
341#ifdef KGDB
342	kgdb_cont:
343#endif
344		splx(s);
345		if (panicstr) {
346			printf("trap during panic!\n");
347#ifdef DEBUG
348			/* XXX should be a machine-dependent hook */
349			printf("(press a key)\n"); (void)cngetc();
350#endif
351		}
352		regdump((struct trapframe *)&frame, 128);
353		type &= ~T_USER;
354		if ((u_int)type < trap_types)
355			panic(trap_type[type]);
356		panic("trap");
357
358	case T_BUSERR:		/* kernel bus error */
359		if (p->p_addr->u_pcb.pcb_onfault == 0)
360			goto dopanic;
361		/* FALLTHROUGH */
362
363	copyfault:
364		/*
365		 * If we have arranged to catch this fault in any of the
366		 * copy to/from user space routines, set PC to return to
367		 * indicated location and set flag informing buserror code
368		 * that it may need to clean up stack frame.
369		 */
370		frame.f_stackadj = exframesize[frame.f_format];
371		frame.f_format = frame.f_vector = 0;
372		frame.f_pc = (int) p->p_addr->u_pcb.pcb_onfault;
373		return;
374
375	case T_BUSERR|T_USER:	/* bus error */
376	case T_ADDRERR|T_USER:	/* address error */
377		ucode = v;
378		i = SIGBUS;
379		break;
380
381	case T_COPERR:		/* kernel coprocessor violation */
382	case T_FMTERR|T_USER:	/* do all RTE errors come in as T_USER? */
383	case T_FMTERR:		/* ...just in case... */
384	/*
385	 * The user has most likely trashed the RTE or FP state info
386	 * in the stack frame of a signal handler.
387	 */
388		printf("pid %d: kernel %s exception\n", p->p_pid,
389		       type==T_COPERR ? "coprocessor" : "format");
390		type |= T_USER;
391		p->p_sigacts->ps_sigact[SIGILL].sa_handler = SIG_DFL;
392		sigdelset(&p->p_sigignore, SIGILL);
393		sigdelset(&p->p_sigcatch, SIGILL);
394		sigdelset(&p->p_sigmask, SIGILL);
395		i = SIGILL;
396		ucode = frame.f_format;	/* XXX was ILL_RESAD_FAULT */
397		break;
398
399	case T_COPERR|T_USER:	/* user coprocessor violation */
400	/* What is a proper response here? */
401		ucode = 0;
402		i = SIGFPE;
403		break;
404
405	case T_FPERR|T_USER:	/* 68881 exceptions */
406	/*
407	 * We pass along the 68881 status register which locore stashed
408	 * in code for us.  Note that there is a possibility that the
409	 * bit pattern of this register will conflict with one of the
410	 * FPE_* codes defined in signal.h.  Fortunately for us, the
411	 * only such codes we use are all in the range 1-7 and the low
412	 * 3 bits of the status register are defined as 0 so there is
413	 * no clash.
414	 */
415		ucode = code;
416		i = SIGFPE;
417		break;
418
419#ifdef M68040
420	case T_FPEMULI|T_USER:	/* unimplemented FP instuction */
421	case T_FPEMULD|T_USER:	/* unimplemented FP data type */
422		/* XXX need to FSAVE */
423		printf("pid %d(%s): unimplemented FP %s at %x (EA %x)\n",
424		       p->p_pid, p->p_comm,
425		       frame.f_format == 2 ? "instruction" : "data type",
426		       frame.f_pc, frame.f_fmt2.f_iaddr);
427		/* XXX need to FRESTORE */
428		i = SIGFPE;
429		break;
430#endif
431
432	case T_ILLINST|T_USER:	/* illegal instruction fault */
433#ifdef COMPAT_HPUX
434		if (p->p_emul == &emul_hpux) {
435			ucode = HPUX_ILL_ILLINST_TRAP;
436			i = SIGILL;
437			break;
438		}
439		/* fall through */
440#endif
441	case T_PRIVINST|T_USER:	/* privileged instruction fault */
442#ifdef COMPAT_HPUX
443		if (p->p_emul == &emul_hpux)
444			ucode = HPUX_ILL_PRIV_TRAP;
445		else
446#endif
447		ucode = frame.f_format;	/* XXX was ILL_PRIVIN_FAULT */
448		i = SIGILL;
449		break;
450
451	case T_ZERODIV|T_USER:	/* Divide by zero */
452#ifdef COMPAT_HPUX
453		if (p->p_emul == &emul_hpux)
454			ucode = HPUX_FPE_INTDIV_TRAP;
455		else
456#endif
457		ucode = frame.f_format;	/* XXX was FPE_INTDIV_TRAP */
458		i = SIGFPE;
459		break;
460
461	case T_CHKINST|T_USER:	/* CHK instruction trap */
462#ifdef COMPAT_HPUX
463		if (p->p_emul == &emul_hpux) {
464			/* handled differently under hp-ux */
465			i = SIGILL;
466			ucode = HPUX_ILL_CHK_TRAP;
467			break;
468		}
469#endif
470		ucode = frame.f_format;	/* XXX was FPE_SUBRNG_TRAP */
471		i = SIGFPE;
472		break;
473
474	case T_TRAPVINST|T_USER:	/* TRAPV instruction trap */
475#ifdef COMPAT_HPUX
476		if (p->p_emul == &emul_hpux) {
477			/* handled differently under hp-ux */
478			i = SIGILL;
479			ucode = HPUX_ILL_TRAPV_TRAP;
480			break;
481		}
482#endif
483		ucode = frame.f_format;	/* XXX was FPE_INTOVF_TRAP */
484		i = SIGFPE;
485		break;
486
487	/*
488	 * XXX: Trace traps are a nightmare.
489	 *
490	 *	HP-UX uses trap #1 for breakpoints,
491	 *	NetBSD/m68k uses trap #2,
492	 *	SUN 3.x uses trap #15,
493	 *	DDB and KGDB uses trap #15 (for kernel breakpoints;
494	 *	handled elsewhere).
495	 *
496	 * NetBSD and HP-UX traps both get mapped by locore.s into T_TRACE.
497	 * SUN 3.x traps get passed through as T_TRAP15 and are not really
498	 * supported yet.
499	 *
500	 * XXX: We should never get kernel-mode T_TRACE or T_TRAP15
501	 * XXX: because locore.s now gives them special treatment.
502	 */
503	case T_TRACE:		/* kernel trace trap */
504	case T_TRAP15:		/* kernel breakpoint */
505#ifdef DEBUG
506		printf("unexpected kernel trace trap, type = %d\n", type);
507		printf("program counter = 0x%x\n", frame.f_pc);
508#endif
509		frame.f_sr &= ~PSL_T;
510		return;
511
512	case T_TRACE|T_USER:	/* user trace trap */
513	case T_TRAP15|T_USER:	/* SUN user trace trap */
514#ifdef COMPAT_SUNOS
515		/*
516		 * SunOS uses Trap #2 for a "CPU cache flush".
517		 * Just flush the on-chip caches and return.
518		 */
519		if (p->p_emul == &emul_sunos) {
520			ICIA();
521			DCIU();
522			return;
523		}
524#endif
525		frame.f_sr &= ~PSL_T;
526		i = SIGTRAP;
527		break;
528
529	case T_ASTFLT:		/* system async trap, cannot happen */
530		goto dopanic;
531
532	case T_ASTFLT|T_USER:	/* user async trap */
533		astpending = 0;
534		/*
535		 * We check for software interrupts first.  This is because
536		 * they are at a higher level than ASTs, and on a VAX would
537		 * interrupt the AST.  We assume that if we are processing
538		 * an AST that we must be at IPL0 so we don't bother to
539		 * check.  Note that we ensure that we are at least at SIR
540		 * IPL while processing the SIR.
541		 */
542		spl1();
543		/* fall into... */
544
545	case T_SSIR:		/* software interrupt */
546	case T_SSIR|T_USER:
547		while ((bit = ffs(ssir))) {
548			--bit;
549			ssir &= ~(1 << bit);
550			uvmexp.softs++;
551			if (sir_routines[bit])
552				sir_routines[bit](sir_args[bit]);
553		}
554		/*
555		 * If this was not an AST trap, we are all done.
556		 */
557		if (type != (T_ASTFLT|T_USER)) {
558			uvmexp.traps--;
559			return;
560		}
561		spl0();
562		if (p->p_flag & P_OWEUPC) {
563			p->p_flag &= ~P_OWEUPC;
564			ADDUPROF(p);
565		}
566		goto out;
567
568	case T_MMUFLT:		/* kernel mode page fault */
569		/*
570		 * If we were doing profiling ticks or other user mode
571		 * stuff from interrupt code, Just Say No.
572		 */
573		if (p->p_addr->u_pcb.pcb_onfault == fubail ||
574		    p->p_addr->u_pcb.pcb_onfault == subail)
575			goto copyfault;
576		/* fall into ... */
577
578	case T_MMUFLT|T_USER:	/* page fault */
579	    {
580		vaddr_t va;
581		struct vmspace *vm = p->p_vmspace;
582		vm_map_t map;
583		int rv;
584		vm_prot_t ftype;
585		extern vm_map_t kernel_map;
586
587#ifdef DEBUG
588		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
589		printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n",
590		       p->p_pid, code, v, frame.f_pc, frame.f_sr);
591#endif
592		/*
593		 * It is only a kernel address space fault iff:
594		 * 	1. (type & T_USER) == 0  and
595		 * 	2. pcb_onfault not set or
596		 *	3. pcb_onfault set but supervisor space data fault
597		 * The last can occur during an exec() copyin where the
598		 * argument space is lazy-allocated.
599		 */
600		if ((type & T_USER) == 0 &&
601		    ((p->p_addr->u_pcb.pcb_onfault == 0) || KDFAULT(code)))
602			map = kernel_map;
603		else
604			map = vm ? &vm->vm_map : kernel_map;
605
606		if (WRFAULT(code))
607			ftype = VM_PROT_READ | VM_PROT_WRITE;
608		else
609			ftype = VM_PROT_READ;
610
611		va = trunc_page((vaddr_t)v);
612
613		if (map == kernel_map && va == 0) {
614			printf("trap: bad kernel %s access at 0x%x\n",
615			    (ftype & VM_PROT_WRITE) ? "read/write" :
616			    "read", v);
617			goto dopanic;
618		}
619
620#ifdef COMPAT_HPUX
621		if (ISHPMMADDR(va)) {
622			int pmap_mapmulti __P((pmap_t, vaddr_t));
623			vaddr_t bva;
624
625			rv = pmap_mapmulti(map->pmap, va);
626			if (rv != KERN_SUCCESS) {
627				bva = HPMMBASEADDR(va);
628				rv = uvm_fault(map, bva, 0, ftype);
629				if (rv == KERN_SUCCESS)
630					(void) pmap_mapmulti(map->pmap, va);
631			}
632		} else
633#endif
634		rv = uvm_fault(map, va, 0, ftype);
635#ifdef DEBUG
636		if (rv && MDB_ISPID(p->p_pid))
637			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
638			    map, va, ftype, rv);
639#endif
640		/*
641		 * If this was a stack access we keep track of the maximum
642		 * accessed stack size.  Also, if vm_fault gets a protection
643		 * failure it is due to accessing the stack region outside
644		 * the current limit and we need to reflect that as an access
645		 * error.
646		 */
647		if ((vm != NULL && (caddr_t)va >= vm->vm_maxsaddr)
648		    && map != kernel_map) {
649			if (rv == KERN_SUCCESS) {
650				unsigned nss;
651
652				nss = clrnd(btoc(USRSTACK-(unsigned)va));
653				if (nss > vm->vm_ssize)
654					vm->vm_ssize = nss;
655			} else if (rv == KERN_PROTECTION_FAILURE)
656				rv = KERN_INVALID_ADDRESS;
657		}
658		if (rv == KERN_SUCCESS) {
659			if (type == T_MMUFLT) {
660#ifdef M68040
661				if (cputype == CPU_68040)
662					(void) writeback(&frame, 1);
663#endif
664				return;
665			}
666			goto out;
667		}
668		if (type == T_MMUFLT) {
669			if (p->p_addr->u_pcb.pcb_onfault)
670				goto copyfault;
671			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
672			    map, va, ftype, rv);
673			printf("  type %x, code [mmu,,ssw]: %x\n",
674			       type, code);
675			goto dopanic;
676		}
677		ucode = v;
678		if (rv == KERN_RESOURCE_SHORTAGE) {
679			printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
680			       p->p_pid, p->p_comm,
681			       p->p_cred && p->p_ucred ?
682			       p->p_ucred->cr_uid : -1);
683			i = SIGKILL;
684		} else {
685			i = SIGSEGV;
686		}
687		break;
688	    }
689	}
690	trapsignal(p, i, ucode);
691	if ((type & T_USER) == 0)
692		return;
693out:
694	userret(p, &frame, sticks, v, 1);
695}
696
697#ifdef M68040
698#ifdef DEBUG
699struct writebackstats {
700	int calls;
701	int cpushes;
702	int move16s;
703	int wb1s, wb2s, wb3s;
704	int wbsize[4];
705} wbstats;
706
707char *f7sz[] = { "longword", "byte", "word", "line" };
708char *f7tt[] = { "normal", "MOVE16", "AFC", "ACK" };
709char *f7tm[] = { "d-push", "u-data", "u-code", "M-data",
710		 "M-code", "k-data", "k-code", "RES" };
711char wberrstr[] =
712    "WARNING: pid %d(%s) writeback [%s] failed, pc=%x fa=%x wba=%x wbd=%x\n";
713#endif
714
715int
716writeback(fp, docachepush)
717	struct frame *fp;
718	int docachepush;
719{
720	struct fmt7 *f = &fp->f_fmt7;
721	struct proc *p = curproc;
722	int err = 0;
723	u_int fa;
724	caddr_t oonfault = p->p_addr->u_pcb.pcb_onfault;
725	paddr_t pa;
726
727#ifdef DEBUG
728	if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
729		printf(" pid=%d, fa=%x,", p->p_pid, f->f_fa);
730		dumpssw(f->f_ssw);
731	}
732	wbstats.calls++;
733#endif
734	/*
735	 * Deal with special cases first.
736	 */
737	if ((f->f_ssw & SSW4_TMMASK) == SSW4_TMDCP) {
738		/*
739		 * Dcache push fault.
740		 * Line-align the address and write out the push data to
741		 * the indicated physical address.
742		 */
743#ifdef DEBUG
744		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
745			printf(" pushing %s to PA %x, data %x",
746			       f7sz[(f->f_ssw & SSW4_SZMASK) >> 5],
747			       f->f_fa, f->f_pd0);
748			if ((f->f_ssw & SSW4_SZMASK) == SSW4_SZLN)
749				printf("/%x/%x/%x",
750				       f->f_pd1, f->f_pd2, f->f_pd3);
751			printf("\n");
752		}
753		if (f->f_wb1s & SSW4_WBSV)
754			panic("writeback: cache push with WB1S valid");
755		wbstats.cpushes++;
756#endif
757		/*
758		 * XXX there are security problems if we attempt to do a
759		 * cache push after a signal handler has been called.
760		 */
761		if (docachepush) {
762			pmap_enter(pmap_kernel(), (vaddr_t)vmmap,
763			    trunc_page(f->f_fa), VM_PROT_WRITE, TRUE,
764			    VM_PROT_WRITE);
765			fa = (u_int)&vmmap[(f->f_fa & PGOFSET) & ~0xF];
766			bcopy((caddr_t)&f->f_pd0, (caddr_t)fa, 16);
767			(void) pmap_extract(pmap_kernel(), (vaddr_t)fa, &pa);
768			DCFL(pa);
769			pmap_remove(pmap_kernel(), (vaddr_t)vmmap,
770				    (vaddr_t)&vmmap[NBPG]);
771		} else
772			printf("WARNING: pid %d(%s) uid %d: CPUSH not done\n",
773			       p->p_pid, p->p_comm, p->p_ucred->cr_uid);
774	} else if ((f->f_ssw & (SSW4_RW|SSW4_TTMASK)) == SSW4_TTM16) {
775		/*
776		 * MOVE16 fault.
777		 * Line-align the address and write out the push data to
778		 * the indicated virtual address.
779		 */
780#ifdef DEBUG
781		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
782			printf(" MOVE16 to VA %x(%x), data %x/%x/%x/%x\n",
783			       f->f_fa, f->f_fa & ~0xF, f->f_pd0, f->f_pd1,
784			       f->f_pd2, f->f_pd3);
785		if (f->f_wb1s & SSW4_WBSV)
786			panic("writeback: MOVE16 with WB1S valid");
787		wbstats.move16s++;
788#endif
789		if (KDFAULT(f->f_wb1s))
790			bcopy((caddr_t)&f->f_pd0, (caddr_t)(f->f_fa & ~0xF), 16);
791		else
792			err = suline((caddr_t)(f->f_fa & ~0xF), (caddr_t)&f->f_pd0);
793		if (err) {
794			fa = f->f_fa & ~0xF;
795#ifdef DEBUG
796			if (mmudebug & MDB_WBFAILED)
797				printf(wberrstr, p->p_pid, p->p_comm,
798				       "MOVE16", fp->f_pc, f->f_fa,
799				       f->f_fa & ~0xF, f->f_pd0);
800#endif
801		}
802	} else if (f->f_wb1s & SSW4_WBSV) {
803		/*
804		 * Writeback #1.
805		 * Position the "memory-aligned" data and write it out.
806		 */
807		u_int wb1d = f->f_wb1d;
808		int off;
809
810#ifdef DEBUG
811		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
812			dumpwb(1, f->f_wb1s, f->f_wb1a, f->f_wb1d);
813		wbstats.wb1s++;
814		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
815#endif
816		off = (f->f_wb1a & 3) * 8;
817		switch (f->f_wb1s & SSW4_SZMASK) {
818		case SSW4_SZLW:
819			if (off)
820				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
821			if (KDFAULT(f->f_wb1s))
822				*(long *)f->f_wb1a = wb1d;
823			else
824				err = suword((caddr_t)f->f_wb1a, wb1d);
825			break;
826		case SSW4_SZB:
827			off = 24 - off;
828			if (off)
829				wb1d >>= off;
830			if (KDFAULT(f->f_wb1s))
831				*(char *)f->f_wb1a = wb1d;
832			else
833				err = subyte((caddr_t)f->f_wb1a, wb1d);
834			break;
835		case SSW4_SZW:
836			off = (off + 16) % 32;
837			if (off)
838				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
839			if (KDFAULT(f->f_wb1s))
840				*(short *)f->f_wb1a = wb1d;
841			else
842				err = susword((caddr_t)f->f_wb1a, wb1d);
843			break;
844		}
845		if (err) {
846			fa = f->f_wb1a;
847#ifdef DEBUG
848			if (mmudebug & MDB_WBFAILED)
849				printf(wberrstr, p->p_pid, p->p_comm,
850				       "#1", fp->f_pc, f->f_fa,
851				       f->f_wb1a, f->f_wb1d);
852#endif
853		}
854	}
855	/*
856	 * Deal with the "normal" writebacks.
857	 *
858	 * XXX writeback2 is known to reflect a LINE size writeback after
859	 * a MOVE16 was already dealt with above.  Ignore it.
860	 */
861	if (err == 0 && (f->f_wb2s & SSW4_WBSV) &&
862	    (f->f_wb2s & SSW4_SZMASK) != SSW4_SZLN) {
863#ifdef DEBUG
864		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
865			dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
866		wbstats.wb2s++;
867		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
868#endif
869		switch (f->f_wb2s & SSW4_SZMASK) {
870		case SSW4_SZLW:
871			if (KDFAULT(f->f_wb2s))
872				*(long *)f->f_wb2a = f->f_wb2d;
873			else
874				err = suword((caddr_t)f->f_wb2a, f->f_wb2d);
875			break;
876		case SSW4_SZB:
877			if (KDFAULT(f->f_wb2s))
878				*(char *)f->f_wb2a = f->f_wb2d;
879			else
880				err = subyte((caddr_t)f->f_wb2a, f->f_wb2d);
881			break;
882		case SSW4_SZW:
883			if (KDFAULT(f->f_wb2s))
884				*(short *)f->f_wb2a = f->f_wb2d;
885			else
886				err = susword((caddr_t)f->f_wb2a, f->f_wb2d);
887			break;
888		}
889		if (err) {
890			fa = f->f_wb2a;
891#ifdef DEBUG
892			if (mmudebug & MDB_WBFAILED) {
893				printf(wberrstr, p->p_pid, p->p_comm,
894				       "#2", fp->f_pc, f->f_fa,
895				       f->f_wb2a, f->f_wb2d);
896				dumpssw(f->f_ssw);
897				dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
898			}
899#endif
900		}
901	}
902	if (err == 0 && (f->f_wb3s & SSW4_WBSV)) {
903#ifdef DEBUG
904		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
905			dumpwb(3, f->f_wb3s, f->f_wb3a, f->f_wb3d);
906		wbstats.wb3s++;
907		wbstats.wbsize[(f->f_wb3s&SSW4_SZMASK)>>5]++;
908#endif
909		switch (f->f_wb3s & SSW4_SZMASK) {
910		case SSW4_SZLW:
911			if (KDFAULT(f->f_wb3s))
912				*(long *)f->f_wb3a = f->f_wb3d;
913			else
914				err = suword((caddr_t)f->f_wb3a, f->f_wb3d);
915			break;
916		case SSW4_SZB:
917			if (KDFAULT(f->f_wb3s))
918				*(char *)f->f_wb3a = f->f_wb3d;
919			else
920				err = subyte((caddr_t)f->f_wb3a, f->f_wb3d);
921			break;
922		case SSW4_SZW:
923			if (KDFAULT(f->f_wb3s))
924				*(short *)f->f_wb3a = f->f_wb3d;
925			else
926				err = susword((caddr_t)f->f_wb3a, f->f_wb3d);
927			break;
928#ifdef DEBUG
929		case SSW4_SZLN:
930			panic("writeback: wb3s indicates LINE write");
931#endif
932		}
933		if (err) {
934			fa = f->f_wb3a;
935#ifdef DEBUG
936			if (mmudebug & MDB_WBFAILED)
937				printf(wberrstr, p->p_pid, p->p_comm,
938				       "#3", fp->f_pc, f->f_fa,
939				       f->f_wb3a, f->f_wb3d);
940#endif
941		}
942	}
943	p->p_addr->u_pcb.pcb_onfault = oonfault;
944	if (err)
945		err = SIGSEGV;
946	return (err);
947}
948
949#ifdef DEBUG
950void
951dumpssw(ssw)
952	u_short ssw;
953{
954	printf(" SSW: %x: ", ssw);
955	if (ssw & SSW4_CP)
956		printf("CP,");
957	if (ssw & SSW4_CU)
958		printf("CU,");
959	if (ssw & SSW4_CT)
960		printf("CT,");
961	if (ssw & SSW4_CM)
962		printf("CM,");
963	if (ssw & SSW4_MA)
964		printf("MA,");
965	if (ssw & SSW4_ATC)
966		printf("ATC,");
967	if (ssw & SSW4_LK)
968		printf("LK,");
969	if (ssw & SSW4_RW)
970		printf("RW,");
971	printf(" SZ=%s, TT=%s, TM=%s\n",
972	       f7sz[(ssw & SSW4_SZMASK) >> 5],
973	       f7tt[(ssw & SSW4_TTMASK) >> 3],
974	       f7tm[ssw & SSW4_TMMASK]);
975}
976
977void
978dumpwb(num, s, a, d)
979	int num;
980	u_short s;
981	u_int a, d;
982{
983	struct proc *p = curproc;
984	paddr_t pa;
985
986	printf(" writeback #%d: VA %x, data %x, SZ=%s, TT=%s, TM=%s\n",
987	       num, a, d, f7sz[(s & SSW4_SZMASK) >> 5],
988	       f7tt[(s & SSW4_TTMASK) >> 3], f7tm[s & SSW4_TMMASK]);
989	printf("               PA ");
990	if (pmap_extract(p->p_vmspace->vm_map.pmap, (vaddr_t)a, &pa) == FALSE)
991		printf("<invalid address>");
992	else
993		printf("%lx, current value %lx", pa, fuword((caddr_t)a));
994	printf("\n");
995}
996#endif
997#endif
998
999/*
1000 * Process a system call.
1001 */
1002void
1003syscall(code, frame)
1004	register_t code;
1005	struct frame frame;
1006{
1007	caddr_t params;
1008	struct sysent *callp;
1009	struct proc *p;
1010	int error, opc, nsys;
1011	size_t argsize;
1012	register_t args[8], rval[2];
1013	u_quad_t sticks;
1014
1015	uvmexp.syscalls++;
1016	if (!USERMODE(frame.f_sr))
1017		panic("syscall");
1018	p = curproc;
1019	sticks = p->p_sticks;
1020	p->p_md.md_regs = frame.f_regs;
1021	opc = frame.f_pc;
1022
1023	nsys = p->p_emul->e_nsysent;
1024	callp = p->p_emul->e_sysent;
1025
1026#ifdef COMPAT_SUNOS
1027	if (p->p_emul == &emul_sunos) {
1028		/*
1029		 * SunOS passes the syscall-number on the stack, whereas
1030		 * BSD passes it in D0. So, we have to get the real "code"
1031		 * from the stack, and clean up the stack, as SunOS glue
1032		 * code assumes the kernel pops the syscall argument the
1033		 * glue pushed on the stack. Sigh...
1034		 */
1035		code = fuword((caddr_t)frame.f_regs[SP]);
1036
1037		/*
1038		 * XXX
1039		 * Don't do this for sunos_sigreturn, as there's no stored pc
1040		 * on the stack to skip, the argument follows the syscall
1041		 * number without a gap.
1042		 */
1043		if (code != SUNOS_SYS_sigreturn) {
1044			frame.f_regs[SP] += sizeof (int);
1045			/*
1046			 * remember that we adjusted the SP,
1047			 * might have to undo this if the system call
1048			 * returns ERESTART.
1049			 */
1050			p->p_md.md_flags |= MDP_STACKADJ;
1051		} else
1052			p->p_md.md_flags &= ~MDP_STACKADJ;
1053	}
1054#endif
1055
1056	params = (caddr_t)frame.f_regs[SP] + sizeof(int);
1057
1058	switch (code) {
1059	case SYS_syscall:
1060		/*
1061		 * Code is first argument, followed by actual args.
1062		 */
1063		code = fuword(params);
1064		params += sizeof(int);
1065		/*
1066		 * XXX sigreturn requires special stack manipulation
1067		 * that is only done if entered via the sigreturn
1068		 * trap.  Cannot allow it here so make sure we fail.
1069		 */
1070		switch (code) {
1071#ifdef COMPAT_13
1072		case SYS_compat_13_sigreturn13:
1073#endif
1074		case SYS___sigreturn14:
1075			code = nsys;
1076			break;
1077		}
1078		break;
1079	case SYS___syscall:
1080		/*
1081		 * Like syscall, but code is a quad, so as to maintain
1082		 * quad alignment for the rest of the arguments.
1083		 */
1084		if (callp != sysent)
1085			break;
1086		code = fuword(params + _QUAD_LOWWORD * sizeof(int));
1087		params += sizeof(quad_t);
1088		break;
1089	default:
1090		break;
1091	}
1092	if (code < 0 || code >= nsys)
1093		callp += p->p_emul->e_nosys;		/* illegal */
1094	else
1095		callp += code;
1096	argsize = callp->sy_argsize;
1097#ifdef COMPAT_LINUX
1098	if (0
1099# ifdef EXEC_AOUT
1100	    || p->p_emul == &emul_linux_aout
1101# endif
1102# ifdef EXEC_ELF32
1103	    || p->p_emul == &emul_linux_elf32
1104# endif
1105	     ) {
1106		/*
1107		 * Linux passes the args in d1-d5
1108		 */
1109		switch (argsize) {
1110		case 20:
1111			args[4] = frame.f_regs[D5];
1112		case 16:
1113			args[3] = frame.f_regs[D4];
1114		case 12:
1115			args[2] = frame.f_regs[D3];
1116		case 8:
1117			args[1] = frame.f_regs[D2];
1118		case 4:
1119			args[0] = frame.f_regs[D1];
1120		case 0:
1121			error = 0;
1122			break;
1123		default:
1124#ifdef DEBUG
1125			panic("linux syscall %d weird argsize %d",
1126				code, argsize);
1127#else
1128			error = EINVAL;
1129#endif
1130			break;
1131		}
1132	} else
1133#endif
1134	if (argsize)
1135		error = copyin(params, (caddr_t)args, argsize);
1136	else
1137		error = 0;
1138#ifdef SYSCALL_DEBUG
1139	scdebug_call(p, code, args);
1140#endif
1141#ifdef KTRACE
1142	if (KTRPOINT(p, KTR_SYSCALL))
1143		ktrsyscall(p->p_tracep, code, argsize, args);
1144#endif
1145	if (error)
1146		goto bad;
1147	rval[0] = 0;
1148	rval[1] = frame.f_regs[D1];
1149	error = (*callp->sy_call)(p, args, rval);
1150	switch (error) {
1151	case 0:
1152		frame.f_regs[D0] = rval[0];
1153		frame.f_regs[D1] = rval[1];
1154		frame.f_sr &= ~PSL_C;	/* carry bit */
1155		break;
1156	case ERESTART:
1157		/*
1158		 * We always enter through a `trap' instruction, which is 2
1159		 * bytes, so adjust the pc by that amount.
1160		 */
1161		frame.f_pc = opc - 2;
1162		break;
1163	case EJUSTRETURN:
1164		/* nothing to do */
1165		break;
1166	default:
1167	bad:
1168		if (p->p_emul->e_errno)
1169			error = p->p_emul->e_errno[error];
1170		frame.f_regs[D0] = error;
1171		frame.f_sr |= PSL_C;	/* carry bit */
1172		break;
1173	}
1174
1175#ifdef SYSCALL_DEBUG
1176	scdebug_ret(p, code, error, rval);
1177#endif
1178#ifdef COMPAT_SUNOS
1179	/* need new p-value for this */
1180	if (error == ERESTART && (p->p_md.md_flags & MDP_STACKADJ))
1181		frame.f_regs[SP] -= sizeof (int);
1182#endif
1183	userret(p, &frame, sticks, (u_int)0, 0);
1184#ifdef KTRACE
1185	if (KTRPOINT(p, KTR_SYSRET))
1186		ktrsysret(p->p_tracep, code, error, rval[0]);
1187#endif
1188}
1189
1190void
1191child_return(arg)
1192	void *arg;
1193{
1194	struct proc *p = arg;
1195	/* See cpu_fork() */
1196	struct frame *f = (struct frame *)p->p_md.md_regs;
1197
1198	f->f_regs[D0] = 0;
1199	f->f_sr &= ~PSL_C;
1200	f->f_format = FMT0;
1201
1202	userret(p, f, 0, (u_int)0, 0);
1203#ifdef KTRACE
1204	if (KTRPOINT(p, KTR_SYSRET))
1205		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1206#endif
1207}
1208
1209/*
1210 * Allocation routines for software interrupts.
1211 */
1212u_long
1213allocate_sir(proc, arg)
1214	void (*proc)(void *);
1215	void *arg;
1216{
1217	int bit;
1218
1219	if( next_sir >= NSIR )
1220		panic("allocate_sir: none left");
1221	bit = next_sir++;
1222	sir_routines[bit] = proc;
1223	sir_args[bit] = arg;
1224	return (1 << bit);
1225}
1226
1227void
1228init_sir()
1229{
1230	extern void netintr(void);
1231
1232	sir_routines[0] = (void (*)(void *))netintr;
1233	sir_routines[1] = (void (*)(void *))softclock;
1234	next_sir = 2;
1235}
1236