trap.c revision 1.13
1/*	$NetBSD: trap.c,v 1.13 1999/03/26 23:41:32 mycroft Exp $ */
2
3/*
4 * This file was taken from mvme68k/mvme68k/trap.c
5 * should probably be re-synced when needed.
6 * Darrin B. Jewell <jewell@mit.edu>  Tue Nov 10 05:07:16 1998
7 * original cvs id: NetBSD: trap.c,v 1.24 1998/10/01 02:53:54 thorpej Exp
8 */
9
10/*
11 * Copyright (c) 1988 University of Utah.
12 * Copyright (c) 1982, 1986, 1990, 1993
13 *	The Regents of the University of California.  All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * the Systems Programming Group of the University of Utah Computer
17 * Science Department.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 * 3. All advertising materials mentioning features or use of this software
28 *    must display the following acknowledgement:
29 *	This product includes software developed by the University of
30 *	California, Berkeley and its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 *    may be used to endorse or promote products derived from this software
33 *    without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: trap.c 1.37 92/12/20$
48 *
49 *	@(#)trap.c	8.5 (Berkeley) 1/4/94
50 */
51
52#include "opt_ddb.h"
53#include "opt_execfmt.h"
54#include "opt_ktrace.h"
55#include "opt_compat_netbsd.h"
56#include "opt_compat_sunos.h"
57#include "opt_compat_hpux.h"
58#include "opt_compat_linux.h"
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/proc.h>
63#include <sys/acct.h>
64#include <sys/kernel.h>
65#include <sys/signalvar.h>
66#include <sys/resourcevar.h>
67#include <sys/syscall.h>
68#include <sys/syslog.h>
69#include <sys/user.h>
70#ifdef KTRACE
71#include <sys/ktrace.h>
72#endif
73
74#include <machine/psl.h>
75#include <machine/trap.h>
76#include <machine/cpu.h>
77#include <machine/reg.h>
78
79#include <vm/vm.h>
80#include <vm/pmap.h>
81
82#include <uvm/uvm_extern.h>
83
84#ifdef COMPAT_HPUX
85#include <compat/hpux/hpux.h>
86#endif
87
88#ifdef COMPAT_SUNOS
89#include <compat/sunos/sunos_syscall.h>
90extern struct emul emul_sunos;
91#endif
92
93#ifdef COMPAT_LINUX
94#ifdef EXEC_AOUT
95extern struct emul emul_linux_aout;
96#endif
97#ifdef EXEC_ELF32
98extern struct emul emul_linux_elf32;
99#endif
100#endif
101
102#include <m68k/cacheops.h>
103
104int	astpending;
105
106char	*trap_type[] = {
107	"Bus error",
108	"Address error",
109	"Illegal instruction",
110	"Zero divide",
111	"CHK instruction",
112	"TRAPV instruction",
113	"Privilege violation",
114	"Trace trap",
115	"MMU fault",
116	"SSIR trap",
117	"Format error",
118	"68881 exception",
119	"Coprocessor violation",
120	"Async system trap"
121};
122int	trap_types = sizeof trap_type / sizeof trap_type[0];
123
124/*
125 * Size of various exception stack frames (minus the standard 8 bytes)
126 */
127short	exframesize[] = {
128	FMT0SIZE,	/* type 0 - normal (68020/030/040) */
129	FMT1SIZE,	/* type 1 - throwaway (68020/030/040) */
130	FMT2SIZE,	/* type 2 - normal 6-word (68020/030/040) */
131	FMT3SIZE,	/* type 3 - FP post-instruction (68040) */
132	-1, -1, -1,	/* type 4-6 - undefined */
133	FMT7SIZE,	/* type 7 - access error (68040) */
134	58,		/* type 8 - bus fault (68010) */
135	FMT9SIZE,	/* type 9 - coprocessor mid-instruction (68020/030) */
136	FMTASIZE,	/* type A - short bus fault (68020/030) */
137	FMTBSIZE,	/* type B - long bus fault (68020/030) */
138	-1, -1, -1, -1	/* type C-F - undefined */
139};
140
141#ifdef M68040
142#define KDFAULT(c)    (mmutype == MMU_68040 ? \
143			    ((c) & SSW4_TMMASK) == SSW4_TMKD : \
144			    ((c) & (SSW_DF|FC_SUPERD)) == (SSW_DF|FC_SUPERD))
145#define WRFAULT(c)    (mmutype == MMU_68040 ? \
146			    ((c) & SSW4_RW) == 0 : \
147			    ((c) & (SSW_DF|SSW_RW)) == SSW_DF)
148#else
149#define KDFAULT(c)	(((c) & (SSW_DF|SSW_FCMASK)) == (SSW_DF|FC_SUPERD))
150#define WRFAULT(c)	(((c) & (SSW_DF|SSW_RW)) == SSW_DF)
151#endif
152
153#ifdef DEBUG
154int mmudebug = 0;
155int mmupid = -1;
156#define MDB_FOLLOW	1
157#define MDB_WBFOLLOW	2
158#define MDB_WBFAILED	4
159#define MDB_ISPID(p)	(p) == mmupid
160#endif
161
162#define NSIR	32
163void (*sir_routines[NSIR])();
164void *sir_args[NSIR];
165int next_sir;
166
167/*
168 * trap and syscall both need the following work done before returning
169 * to user mode.
170 */
171static inline void
172userret(p, fp, oticks, faultaddr, fromtrap)
173	struct proc *p;
174	struct frame *fp;
175	u_quad_t oticks;
176	u_int faultaddr;
177	int fromtrap;
178{
179	int sig, s;
180#ifdef M68040
181	int beenhere = 0;
182
183again:
184#endif
185	/* take pending signals */
186	while ((sig = CURSIG(p)) != 0)
187		postsig(sig);
188	p->p_priority = p->p_usrpri;
189	if (want_resched) {
190		/*
191		 * Since we are curproc, clock will normally just change
192		 * our priority without moving us from one queue to another
193		 * (since the running process is not on a queue.)
194		 * If that happened after we put ourselves on the run queue
195		 * but before we mi_switch()'ed, we might not be on the queue
196		 * indicated by our priority.
197		 */
198		s = splstatclock();
199		setrunqueue(p);
200		p->p_stats->p_ru.ru_nivcsw++;
201		mi_switch();
202		splx(s);
203		while ((sig = CURSIG(p)) != 0)
204			postsig(sig);
205	}
206
207	/*
208	 * If profiling, charge system time to the trapped pc.
209	 */
210	if (p->p_flag & P_PROFIL) {
211		extern int psratio;
212
213		addupc_task(p, fp->f_pc,
214			    (int)(p->p_sticks - oticks) * psratio);
215	}
216#ifdef M68040
217	/*
218	 * Deal with user mode writebacks (from trap, or from sigreturn).
219	 * If any writeback fails, go back and attempt signal delivery.
220	 * unless we have already been here and attempted the writeback
221	 * (e.g. bad address with user ignoring SIGSEGV).  In that case
222	 * we just return to the user without sucessfully completing
223	 * the writebacks.  Maybe we should just drop the sucker?
224	 */
225	if (mmutype == MMU_68040 && fp->f_format == FMT7) {
226		if (beenhere) {
227#ifdef DEBUG
228			if (mmudebug & MDB_WBFAILED)
229				printf(fromtrap ?
230		"pid %d(%s): writeback aborted, pc=%x, fa=%x\n" :
231		"pid %d(%s): writeback aborted in sigreturn, pc=%x\n",
232				    p->p_pid, p->p_comm, fp->f_pc, faultaddr);
233#endif
234		} else if (sig = writeback(fp, fromtrap)) {
235			beenhere = 1;
236			oticks = p->p_sticks;
237			trapsignal(p, sig, faultaddr);
238			goto again;
239		}
240	}
241#endif
242	curpriority = p->p_priority;
243}
244
245/*
246 * Trap is called from locore to handle most types of processor traps,
247 * including events such as simulated software interrupts/AST's.
248 * System calls are broken out for efficiency.
249 */
250/*ARGSUSED*/
251trap(type, code, v, frame)
252	int type;
253	unsigned code;
254	unsigned v;
255	struct frame frame;
256{
257	extern char fubail[], subail[];
258#ifdef DDB
259	extern char trap0[], trap1[], trap2[], trap12[], trap15[], illinst[];
260#endif
261	struct proc *p;
262	int i;
263	u_int ucode;
264	u_quad_t sticks;
265#ifdef COMPAT_HPUX
266	extern struct emul emul_hpux;
267#endif
268	int bit;
269
270	uvmexp.traps++;
271	p = curproc;
272	ucode = 0;
273	if (USERMODE(frame.f_sr)) {
274		type |= T_USER;
275		sticks = p->p_sticks;
276		p->p_md.md_regs = frame.f_regs;
277	}
278	switch (type) {
279
280	default:
281dopanic:
282		printf("trap type %d, code = %x, v = %x\n", type, code, v);
283#ifdef DDB
284		if (kdb_trap(type, &frame))
285			return;
286#endif
287		regdump((struct trapframe *)&frame, 128);
288		type &= ~T_USER;
289		if ((unsigned)type < trap_types)
290			panic(trap_type[type]);
291		panic("trap");
292
293	case T_BUSERR:		/* kernel bus error */
294		if (!p->p_addr->u_pcb.pcb_onfault)
295			goto dopanic;
296		/*
297		 * If we have arranged to catch this fault in any of the
298		 * copy to/from user space routines, set PC to return to
299		 * indicated location and set flag informing buserror code
300		 * that it may need to clean up stack frame.
301		 */
302copyfault:
303		frame.f_stackadj = exframesize[frame.f_format];
304		frame.f_format = frame.f_vector = 0;
305		frame.f_pc = (int) p->p_addr->u_pcb.pcb_onfault;
306		return;
307
308	case T_BUSERR|T_USER:	/* bus error */
309	case T_ADDRERR|T_USER:	/* address error */
310		ucode = v;
311		i = SIGBUS;
312		break;
313
314	case T_COPERR:		/* kernel coprocessor violation */
315	case T_FMTERR|T_USER:	/* do all RTE errors come in as T_USER? */
316	case T_FMTERR:		/* ...just in case... */
317	/*
318	 * The user has most likely trashed the RTE or FP state info
319	 * in the stack frame of a signal handler.
320	 */
321		printf("pid %d: kernel %s exception\n", p->p_pid,
322		       type==T_COPERR ? "coprocessor" : "format");
323		type |= T_USER;
324		p->p_sigacts->ps_sigact[SIGILL].sa_handler = SIG_DFL;
325		sigdelset(&p->p_sigignore, SIGILL);
326		sigdelset(&p->p_sigcatch, SIGILL);
327		sigdelset(&p->p_sigmask, SIGILL);
328		i = SIGILL;
329		ucode = frame.f_format;	/* XXX was ILL_RESAD_FAULT */
330		break;
331
332	case T_COPERR|T_USER:	/* user coprocessor violation */
333	/* What is a proper response here? */
334		ucode = 0;
335		i = SIGFPE;
336		break;
337
338	case T_FPERR|T_USER:	/* 68881 exceptions */
339	/*
340	 * We pass along the 68881 status register which locore stashed
341	 * in code for us.  Note that there is a possibility that the
342	 * bit pattern of this register will conflict with one of the
343	 * FPE_* codes defined in signal.h.  Fortunately for us, the
344	 * only such codes we use are all in the range 1-7 and the low
345	 * 3 bits of the status register are defined as 0 so there is
346	 * no clash.
347	 */
348		ucode = code;
349		i = SIGFPE;
350		break;
351
352#ifdef M68040
353	case T_FPEMULI|T_USER:	/* unimplemented FP instuction */
354	case T_FPEMULD|T_USER:	/* unimplemented FP data type */
355		/* XXX need to FSAVE */
356		printf("pid %d(%s): unimplemented FP %s at %x (EA %x)\n",
357		       p->p_pid, p->p_comm,
358		       frame.f_format == 2 ? "instruction" : "data type",
359		       frame.f_pc, frame.f_fmt2.f_iaddr);
360		/* XXX need to FRESTORE */
361		i = SIGFPE;
362		break;
363#endif
364
365	case T_ILLINST|T_USER:	/* illegal instruction fault */
366#ifdef COMPAT_HPUX
367		if (p->p_emul == &emul_hpux) {
368			ucode = HPUX_ILL_ILLINST_TRAP;
369			i = SIGILL;
370			break;
371		}
372		/* fall through */
373#endif
374	case T_PRIVINST|T_USER:	/* privileged instruction fault */
375#ifdef COMPAT_HPUX
376		if (p->p_emul == &emul_hpux)
377			ucode = HPUX_ILL_PRIV_TRAP;
378		else
379#endif
380		ucode = frame.f_format;	/* XXX was ILL_PRIVIN_FAULT */
381		i = SIGILL;
382		break;
383
384	case T_ZERODIV|T_USER:	/* Divide by zero */
385#ifdef COMPAT_HPUX
386		if (p->p_emul == &emul_hpux)
387			ucode = HPUX_FPE_INTDIV_TRAP;
388		else
389#endif
390		ucode = frame.f_format;	/* XXX was FPE_INTDIV_TRAP */
391		i = SIGFPE;
392		break;
393
394	case T_CHKINST|T_USER:	/* CHK instruction trap */
395#ifdef COMPAT_HPUX
396		if (p->p_emul == &emul_hpux) {
397			/* handled differently under hp-ux */
398			i = SIGILL;
399			ucode = HPUX_ILL_CHK_TRAP;
400			break;
401		}
402#endif
403		ucode = frame.f_format;	/* XXX was FPE_SUBRNG_TRAP */
404		i = SIGFPE;
405		break;
406
407	case T_TRAPVINST|T_USER:	/* TRAPV instruction trap */
408#ifdef COMPAT_HPUX
409		if (p->p_emul == &emul_hpux) {
410			/* handled differently under hp-ux */
411			i = SIGILL;
412			ucode = HPUX_ILL_TRAPV_TRAP;
413			break;
414		}
415#endif
416		ucode = frame.f_format;	/* XXX was FPE_INTOVF_TRAP */
417		i = SIGFPE;
418		break;
419
420	/*
421	 * XXX: Trace traps are a nightmare.
422	 *
423	 *	HP-UX uses trap #1 for breakpoints,
424	 *	HPBSD uses trap #2,
425	 *	SUN 3.x uses trap #15,
426	 *	KGDB uses trap #15 (for kernel breakpoints; handled elsewhere).
427	 *
428	 * HPBSD and HP-UX traps both get mapped by locore.s into T_TRACE.
429	 * SUN 3.x traps get passed through as T_TRAP15 and are not really
430	 * supported yet.
431	 */
432	case T_TRACE:		/* kernel trace trap */
433	case T_TRAP15:		/* SUN trace trap */
434#ifdef DDB
435		if (type == T_TRAP15 ||
436		    ((caddr_t)frame.f_pc != trap0 &&
437		     (caddr_t)frame.f_pc != trap1 &&
438		     (caddr_t)frame.f_pc != trap2 &&
439		     (caddr_t)frame.f_pc != trap12 &&
440		     (caddr_t)frame.f_pc != trap15 &&
441		     (caddr_t)frame.f_pc != illinst)) {
442			if (kdb_trap(type, &frame))
443				return;
444		}
445#endif
446		frame.f_sr &= ~PSL_T;
447		i = SIGTRAP;
448		break;
449
450	case T_TRACE|T_USER:	/* user trace trap */
451	case T_TRAP15|T_USER:	/* SUN user trace trap */
452#ifdef COMPAT_SUNOS
453		/*
454		 * SunOS uses Trap #2 for a "CPU cache flush".
455		 * Just flush the on-chip caches and return.
456		 */
457		if (p->p_emul == &emul_sunos) {
458			ICIA();
459			DCIU();
460			return;
461		}
462#endif COMPAT_SUNOS
463		frame.f_sr &= ~PSL_T;
464		i = SIGTRAP;
465		break;
466
467	case T_ASTFLT:		/* system async trap, cannot happen */
468		goto dopanic;
469
470	case T_ASTFLT|T_USER:	/* user async trap */
471		astpending = 0;
472		/*
473		 * We check for software interrupts first.  This is because
474		 * they are at a higher level than ASTs, and on a VAX would
475		 * interrupt the AST.  We assume that if we are processing
476		 * an AST that we must be at IPL0 so we don't bother to
477		 * check.  Note that we ensure that we are at least at SIR
478		 * IPL while processing the SIR.
479		 */
480		spl1();
481		/* fall into... */
482
483	case T_SSIR:		/* software interrupt */
484	case T_SSIR|T_USER:
485		while (bit = ffs(ssir)) {
486			--bit;
487			ssir &= ~(1 << bit);
488			uvmexp.softs++;
489			if (sir_routines[bit])
490				sir_routines[bit](sir_args[bit]);
491		}
492
493		/*
494		 * If this was not an AST trap, we are all done.
495		 */
496		if (type != (T_ASTFLT|T_USER)) {
497			uvmexp.traps++;
498			return;
499		}
500		spl0();
501		if (p->p_flag & P_OWEUPC) {
502			p->p_flag &= ~P_OWEUPC;
503			ADDUPROF(p);
504		}
505		goto out;
506
507	case T_MMUFLT:		/* kernel mode page fault */
508		/*
509		 * If we were doing profiling ticks or other user mode
510		 * stuff from interrupt code, Just Say No.
511		 */
512		if (p->p_addr->u_pcb.pcb_onfault == fubail ||
513		    p->p_addr->u_pcb.pcb_onfault == subail)
514			goto copyfault;
515		/* fall into ... */
516
517	case T_MMUFLT|T_USER:	/* page fault */
518	    {
519		vaddr_t va;
520		struct vmspace *vm = p->p_vmspace;
521		vm_map_t map;
522		int rv;
523		vm_prot_t ftype;
524		extern vm_map_t kernel_map;
525
526#ifdef DEBUG
527		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
528		printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n",
529		       p->p_pid, code, v, frame.f_pc, frame.f_sr);
530#endif
531		/*
532		 * It is only a kernel address space fault iff:
533		 * 	1. (type & T_USER) == 0  and
534		 * 	2. pcb_onfault not set or
535		 *	3. pcb_onfault set but supervisor space data fault
536		 * The last can occur during an exec() copyin where the
537		 * argument space is lazy-allocated.
538		 */
539		if (type == T_MMUFLT &&
540		    (!p->p_addr->u_pcb.pcb_onfault || KDFAULT(code)))
541			map = kernel_map;
542		else
543			map = &vm->vm_map;
544		if (WRFAULT(code))
545			ftype = VM_PROT_READ | VM_PROT_WRITE;
546		else
547			ftype = VM_PROT_READ;
548		va = trunc_page((vaddr_t)v);
549#ifdef DEBUG
550		if (map == kernel_map && va == 0) {
551			printf("trap: bad kernel access at %x\n", v);
552			goto dopanic;
553		}
554#endif
555#ifdef COMPAT_HPUX
556		if (ISHPMMADDR(va)) {
557			vaddr_t bva;
558
559			rv = pmap_mapmulti(map->pmap, va);
560			if (rv != KERN_SUCCESS) {
561				bva = HPMMBASEADDR(va);
562				rv = uvm_fault(map, bva, 0, ftype);
563				if (rv == KERN_SUCCESS)
564					(void) pmap_mapmulti(map->pmap, va);
565			}
566		} else
567#endif
568		rv = uvm_fault(map, va, 0, ftype);
569#ifdef DEBUG
570		if (rv && MDB_ISPID(p->p_pid))
571			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
572			       map, va, ftype, rv);
573#endif
574		/*
575		 * If this was a stack access we keep track of the maximum
576		 * accessed stack size.  Also, if vm_fault gets a protection
577		 * failure it is due to accessing the stack region outside
578		 * the current limit and we need to reflect that as an access
579		 * error.
580		 */
581		if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map) {
582			if (rv == KERN_SUCCESS) {
583				unsigned nss;
584
585				nss = clrnd(btoc(USRSTACK-(unsigned)va));
586				if (nss > vm->vm_ssize)
587					vm->vm_ssize = nss;
588			} else if (rv == KERN_PROTECTION_FAILURE)
589				rv = KERN_INVALID_ADDRESS;
590		}
591		if (rv == KERN_SUCCESS) {
592			if (type == T_MMUFLT) {
593#if defined(M68040)
594				if (mmutype == MMU_68040)
595					(void) writeback(&frame, 1);
596#endif
597				return;
598			}
599			goto out;
600		}
601		if (type == T_MMUFLT) {
602			if (p->p_addr->u_pcb.pcb_onfault)
603				goto copyfault;
604			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
605			       map, va, ftype, rv);
606			printf("  type %x, code [mmu,,ssw]: %x\n",
607			       type, code);
608			goto dopanic;
609		}
610		ucode = v;
611		if (rv == KERN_RESOURCE_SHORTAGE) {
612			printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
613			       p->p_pid, p->p_comm,
614			       p->p_cred && p->p_ucred ?
615			       p->p_ucred->cr_uid : -1);
616			i = SIGKILL;
617		} else {
618			i = SIGSEGV;
619		}
620		break;
621	    }
622	}
623	trapsignal(p, i, ucode);
624	if ((type & T_USER) == 0)
625		return;
626out:
627	userret(p, &frame, sticks, v, 1);
628}
629
630#ifdef M68040
631#ifdef DEBUG
632struct writebackstats {
633	int calls;
634	int cpushes;
635	int move16s;
636	int wb1s, wb2s, wb3s;
637	int wbsize[4];
638} wbstats;
639
640char *f7sz[] = { "longword", "byte", "word", "line" };
641char *f7tt[] = { "normal", "MOVE16", "AFC", "ACK" };
642char *f7tm[] = { "d-push", "u-data", "u-code", "M-data",
643		 "M-code", "k-data", "k-code", "RES" };
644char wberrstr[] =
645	"WARNING: pid %d(%s) writeback [%s] failed, pc=%x fa=%x wba=%x wbd=%x\n";
646#endif
647
648writeback(fp, docachepush)
649	struct frame *fp;
650	int docachepush;
651{
652	struct fmt7 *f = &fp->f_fmt7;
653	struct proc *p = curproc;
654	int err = 0;
655	u_int fa;
656	caddr_t oonfault = p->p_addr->u_pcb.pcb_onfault;
657
658#ifdef DEBUG
659	if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
660		printf(" pid=%d, fa=%x,", p->p_pid, f->f_fa);
661		dumpssw(f->f_ssw);
662	}
663	wbstats.calls++;
664#endif
665	/*
666	 * Deal with special cases first.
667	 */
668	if ((f->f_ssw & SSW4_TMMASK) == SSW4_TMDCP) {
669		/*
670		 * Dcache push fault.
671		 * Line-align the address and write out the push data to
672		 * the indicated physical address.
673		 */
674#ifdef DEBUG
675		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
676			printf(" pushing %s to PA %x, data %x",
677			       f7sz[(f->f_ssw & SSW4_SZMASK) >> 5],
678			       f->f_fa, f->f_pd0);
679			if ((f->f_ssw & SSW4_SZMASK) == SSW4_SZLN)
680				printf("/%x/%x/%x",
681				       f->f_pd1, f->f_pd2, f->f_pd3);
682			printf("\n");
683		}
684		if (f->f_wb1s & SSW4_WBSV)
685			panic("writeback: cache push with WB1S valid");
686		wbstats.cpushes++;
687#endif
688		/*
689		 * XXX there are security problems if we attempt to do a
690		 * cache push after a signal handler has been called.
691		 */
692		if (docachepush) {
693			pmap_enter(pmap_kernel(), (vaddr_t)vmmap,
694			    trunc_page(f->f_fa), VM_PROT_WRITE, TRUE,
695			    VM_PROT_WRITE);
696			fa = (u_int)&vmmap[(f->f_fa & PGOFSET) & ~0xF];
697			bcopy((caddr_t)&f->f_pd0, (caddr_t)fa, 16);
698			DCFL(pmap_extract(pmap_kernel(), (vaddr_t)fa));
699			pmap_remove(pmap_kernel(), (vaddr_t)vmmap,
700				    (vaddr_t)&vmmap[NBPG]);
701		} else
702			printf("WARNING: pid %d(%s) uid %d: CPUSH not done\n",
703			       p->p_pid, p->p_comm, p->p_ucred->cr_uid);
704	} else if ((f->f_ssw & (SSW4_RW|SSW4_TTMASK)) == SSW4_TTM16) {
705		/*
706		 * MOVE16 fault.
707		 * Line-align the address and write out the push data to
708		 * the indicated virtual address.
709		 */
710#ifdef DEBUG
711		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
712			printf(" MOVE16 to VA %x(%x), data %x/%x/%x/%x\n",
713			       f->f_fa, f->f_fa & ~0xF, f->f_pd0, f->f_pd1,
714			       f->f_pd2, f->f_pd3);
715		if (f->f_wb1s & SSW4_WBSV)
716			panic("writeback: MOVE16 with WB1S valid");
717		wbstats.move16s++;
718#endif
719		if (KDFAULT(f->f_wb1s))
720			bcopy((caddr_t)&f->f_pd0, (caddr_t)(f->f_fa & ~0xF), 16);
721		else
722			err = suline((caddr_t)(f->f_fa & ~0xF), (caddr_t)&f->f_pd0);
723		if (err) {
724			fa = f->f_fa & ~0xF;
725#ifdef DEBUG
726			if (mmudebug & MDB_WBFAILED)
727				printf(wberrstr, p->p_pid, p->p_comm,
728				       "MOVE16", fp->f_pc, f->f_fa,
729				       f->f_fa & ~0xF, f->f_pd0);
730#endif
731		}
732	} else if (f->f_wb1s & SSW4_WBSV) {
733		/*
734		 * Writeback #1.
735		 * Position the "memory-aligned" data and write it out.
736		 */
737		u_int wb1d = f->f_wb1d;
738		int off;
739
740#ifdef DEBUG
741		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
742			dumpwb(1, f->f_wb1s, f->f_wb1a, f->f_wb1d);
743		wbstats.wb1s++;
744		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
745#endif
746		off = (f->f_wb1a & 3) * 8;
747		switch (f->f_wb1s & SSW4_SZMASK) {
748		case SSW4_SZLW:
749			if (off)
750				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
751			if (KDFAULT(f->f_wb1s))
752				*(long *)f->f_wb1a = wb1d;
753			else
754				err = suword((caddr_t)f->f_wb1a, wb1d);
755			break;
756		case SSW4_SZB:
757			off = 24 - off;
758			if (off)
759				wb1d >>= off;
760			if (KDFAULT(f->f_wb1s))
761				*(char *)f->f_wb1a = wb1d;
762			else
763				err = subyte((caddr_t)f->f_wb1a, wb1d);
764			break;
765		case SSW4_SZW:
766			off = (off + 16) % 32;
767			if (off)
768				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
769			if (KDFAULT(f->f_wb1s))
770				*(short *)f->f_wb1a = wb1d;
771			else
772				err = susword((caddr_t)f->f_wb1a, wb1d);
773			break;
774		}
775		if (err) {
776			fa = f->f_wb1a;
777#ifdef DEBUG
778			if (mmudebug & MDB_WBFAILED)
779				printf(wberrstr, p->p_pid, p->p_comm,
780				       "#1", fp->f_pc, f->f_fa,
781				       f->f_wb1a, f->f_wb1d);
782#endif
783		}
784	}
785	/*
786	 * Deal with the "normal" writebacks.
787	 *
788	 * XXX writeback2 is known to reflect a LINE size writeback after
789	 * a MOVE16 was already dealt with above.  Ignore it.
790	 */
791	if (err == 0 && (f->f_wb2s & SSW4_WBSV) &&
792	    (f->f_wb2s & SSW4_SZMASK) != SSW4_SZLN) {
793#ifdef DEBUG
794		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
795			dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
796		wbstats.wb2s++;
797		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
798#endif
799		switch (f->f_wb2s & SSW4_SZMASK) {
800		case SSW4_SZLW:
801			if (KDFAULT(f->f_wb2s))
802				*(long *)f->f_wb2a = f->f_wb2d;
803			else
804				err = suword((caddr_t)f->f_wb2a, f->f_wb2d);
805			break;
806		case SSW4_SZB:
807			if (KDFAULT(f->f_wb2s))
808				*(char *)f->f_wb2a = f->f_wb2d;
809			else
810				err = subyte((caddr_t)f->f_wb2a, f->f_wb2d);
811			break;
812		case SSW4_SZW:
813			if (KDFAULT(f->f_wb2s))
814				*(short *)f->f_wb2a = f->f_wb2d;
815			else
816				err = susword((caddr_t)f->f_wb2a, f->f_wb2d);
817			break;
818		}
819		if (err) {
820			fa = f->f_wb2a;
821#ifdef DEBUG
822			if (mmudebug & MDB_WBFAILED) {
823				printf(wberrstr, p->p_pid, p->p_comm,
824				       "#2", fp->f_pc, f->f_fa,
825				       f->f_wb2a, f->f_wb2d);
826				dumpssw(f->f_ssw);
827				dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
828			}
829#endif
830		}
831	}
832	if (err == 0 && (f->f_wb3s & SSW4_WBSV)) {
833#ifdef DEBUG
834		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
835			dumpwb(3, f->f_wb3s, f->f_wb3a, f->f_wb3d);
836		wbstats.wb3s++;
837		wbstats.wbsize[(f->f_wb3s&SSW4_SZMASK)>>5]++;
838#endif
839		switch (f->f_wb3s & SSW4_SZMASK) {
840		case SSW4_SZLW:
841			if (KDFAULT(f->f_wb3s))
842				*(long *)f->f_wb3a = f->f_wb3d;
843			else
844				err = suword((caddr_t)f->f_wb3a, f->f_wb3d);
845			break;
846		case SSW4_SZB:
847			if (KDFAULT(f->f_wb3s))
848				*(char *)f->f_wb3a = f->f_wb3d;
849			else
850				err = subyte((caddr_t)f->f_wb3a, f->f_wb3d);
851			break;
852		case SSW4_SZW:
853			if (KDFAULT(f->f_wb3s))
854				*(short *)f->f_wb3a = f->f_wb3d;
855			else
856				err = susword((caddr_t)f->f_wb3a, f->f_wb3d);
857			break;
858#ifdef DEBUG
859		case SSW4_SZLN:
860			panic("writeback: wb3s indicates LINE write");
861#endif
862		}
863		if (err) {
864			fa = f->f_wb3a;
865#ifdef DEBUG
866			if (mmudebug & MDB_WBFAILED)
867				printf(wberrstr, p->p_pid, p->p_comm,
868				       "#3", fp->f_pc, f->f_fa,
869				       f->f_wb3a, f->f_wb3d);
870#endif
871		}
872	}
873	p->p_addr->u_pcb.pcb_onfault = oonfault;
874	if (err)
875		err = SIGSEGV;
876	return(err);
877}
878
879#ifdef DEBUG
880dumpssw(ssw)
881	u_short ssw;
882{
883	printf(" SSW: %x: ", ssw);
884	if (ssw & SSW4_CP)
885		printf("CP,");
886	if (ssw & SSW4_CU)
887		printf("CU,");
888	if (ssw & SSW4_CT)
889		printf("CT,");
890	if (ssw & SSW4_CM)
891		printf("CM,");
892	if (ssw & SSW4_MA)
893		printf("MA,");
894	if (ssw & SSW4_ATC)
895		printf("ATC,");
896	if (ssw & SSW4_LK)
897		printf("LK,");
898	if (ssw & SSW4_RW)
899		printf("RW,");
900	printf(" SZ=%s, TT=%s, TM=%s\n",
901	       f7sz[(ssw & SSW4_SZMASK) >> 5],
902	       f7tt[(ssw & SSW4_TTMASK) >> 3],
903	       f7tm[ssw & SSW4_TMMASK]);
904}
905
906dumpwb(num, s, a, d)
907	int num;
908	u_short s;
909	u_int a, d;
910{
911	struct proc *p = curproc;
912	paddr_t pa;
913
914	printf(" writeback #%d: VA %x, data %x, SZ=%s, TT=%s, TM=%s\n",
915	       num, a, d, f7sz[(s & SSW4_SZMASK) >> 5],
916	       f7tt[(s & SSW4_TTMASK) >> 3], f7tm[s & SSW4_TMMASK]);
917	printf("	       PA ");
918	pa = pmap_extract(p->p_vmspace->vm_map.pmap, (vaddr_t)a);
919	if (pa == 0)
920		printf("<invalid address>");
921	else
922		printf("%x, current value %x", pa, fuword((caddr_t)a));
923	printf("\n");
924}
925#endif
926#endif
927
928/*
929 * Process a system call.
930 */
931syscall(code, frame)
932	int code;
933	struct frame frame;
934{
935	caddr_t params;
936	struct sysent *callp;
937	struct proc *p;
938	int error, opc, nsys;
939	size_t argsize;
940	int args[8], rval[2];
941	u_quad_t sticks;
942
943	uvmexp.syscalls++;
944	if (!USERMODE(frame.f_sr))
945		panic("syscall");
946	p = curproc;
947	sticks = p->p_sticks;
948	p->p_md.md_regs = frame.f_regs;
949	opc = frame.f_pc;
950
951	nsys = p->p_emul->e_nsysent;
952	callp = p->p_emul->e_sysent;
953
954#ifdef COMPAT_SUNOS
955	if (p->p_emul == &emul_sunos) {
956		/*
957		 * SunOS passes the syscall-number on the stack, whereas
958		 * BSD passes it in D0. So, we have to get the real "code"
959		 * from the stack, and clean up the stack, as SunOS glue
960		 * code assumes the kernel pops the syscall argument the
961		 * glue pushed on the stack. Sigh...
962		 */
963		code = fuword((caddr_t)frame.f_regs[SP]);
964
965		/*
966		 * XXX
967		 * Don't do this for sunos_sigreturn, as there's no stored pc
968		 * on the stack to skip, the argument follows the syscall
969		 * number without a gap.
970		 */
971		if (code != SUNOS_SYS_sigreturn) {
972			frame.f_regs[SP] += sizeof (int);
973			/*
974			 * remember that we adjusted the SP,
975			 * might have to undo this if the system call
976			 * returns ERESTART.
977			 */
978			p->p_md.md_flags |= MDP_STACKADJ;
979		} else
980			p->p_md.md_flags &= ~MDP_STACKADJ;
981	}
982#endif
983
984	params = (caddr_t)frame.f_regs[SP] + sizeof(int);
985
986	switch (code) {
987	case SYS_syscall:
988		/*
989		 * Code is first argument, followed by actual args.
990		 */
991		code = fuword(params);
992		params += sizeof(int);
993		/*
994		 * XXX sigreturn requires special stack manipulation
995		 * that is only done if entered via the sigreturn
996		 * trap.  Cannot allow it here so make sure we fail.
997		 */
998		switch (code) {
999#ifdef COMPAT_13
1000		case SYS_compat_13_sigreturn13:
1001#endif
1002		case SYS___sigreturn14:
1003			code = nsys;
1004			break;
1005		}
1006		break;
1007	case SYS___syscall:
1008		/*
1009		 * Like syscall, but code is a quad, so as to maintain
1010		 * quad alignment for the rest of the arguments.
1011		 */
1012		if (callp != sysent)
1013			break;
1014		code = fuword(params + _QUAD_LOWWORD * sizeof(int));
1015		params += sizeof(quad_t);
1016		break;
1017	default:
1018		break;
1019	}
1020	if (code < 0 || code >= nsys)
1021		callp += p->p_emul->e_nosys;		/* illegal */
1022	else
1023		callp += code;
1024	argsize = callp->sy_argsize;
1025#ifdef COMPAT_LINUX
1026	if (0
1027# ifdef EXEC_AOUT
1028	    || p->p_emul == &emul_linux_aout
1029# endif
1030# ifdef EXEC_ELF32
1031	    || p->p_emul == &emul_linux_elf32
1032# endif
1033	     ) {
1034		/*
1035		 * Linux passes the args in d1-d5
1036		 */
1037		switch (argsize) {
1038		case 20:
1039			args[4] = frame.f_regs[D5];
1040		case 16:
1041			args[3] = frame.f_regs[D4];
1042		case 12:
1043			args[2] = frame.f_regs[D3];
1044		case 8:
1045			args[1] = frame.f_regs[D2];
1046		case 4:
1047			args[0] = frame.f_regs[D1];
1048		case 0:
1049			error = 0;
1050			break;
1051		default:
1052#ifdef DEBUG
1053			panic("linux syscall %d weird argsize %d",
1054				code, argsize);
1055#else
1056			error = EINVAL;
1057#endif
1058			break;
1059		}
1060	} else
1061#endif
1062	if (argsize)
1063		error = copyin(params, (caddr_t)args, argsize);
1064	else
1065		error = 0;
1066#ifdef SYSCALL_DEBUG
1067	scdebug_call(p, code, args);
1068#endif
1069#ifdef KTRACE
1070	if (KTRPOINT(p, KTR_SYSCALL))
1071		ktrsyscall(p->p_tracep, code, argsize, args);
1072#endif
1073	if (error)
1074		goto bad;
1075	rval[0] = 0;
1076	rval[1] = frame.f_regs[D1];
1077	error = (*callp->sy_call)(p, args, rval);
1078	switch (error) {
1079	case 0:
1080		frame.f_regs[D0] = rval[0];
1081		frame.f_regs[D1] = rval[1];
1082		frame.f_sr &= ~PSL_C;	/* carry bit */
1083		break;
1084	case ERESTART:
1085		/*
1086		 * We always enter through a `trap' instruction, which is 2
1087		 * bytes, so adjust the pc by that amount.
1088		 */
1089		frame.f_pc = opc - 2;
1090		break;
1091	case EJUSTRETURN:
1092		/* nothing to do */
1093		break;
1094	default:
1095	bad:
1096		if (p->p_emul->e_errno)
1097			error = p->p_emul->e_errno[error];
1098		frame.f_regs[D0] = error;
1099		frame.f_sr |= PSL_C;	/* carry bit */
1100		break;
1101	}
1102
1103#ifdef SYSCALL_DEBUG
1104	scdebug_ret(p, code, error, rval);
1105#endif
1106#ifdef COMPAT_SUNOS
1107	/* need new p-value for this */
1108	if (error == ERESTART && (p->p_md.md_flags & MDP_STACKADJ))
1109		frame.f_regs[SP] -= sizeof (int);
1110#endif
1111	userret(p, &frame, sticks, (u_int)0, 0);
1112#ifdef KTRACE
1113	if (KTRPOINT(p, KTR_SYSRET))
1114		ktrsysret(p->p_tracep, code, error, rval[0]);
1115#endif
1116}
1117
1118void
1119child_return(arg)
1120	void *arg;
1121{
1122	struct proc *p = arg;
1123	/* See cpu_fork() */
1124	struct frame *f = (struct frame *)p->p_md.md_regs;
1125
1126	f->f_regs[D0] = 0;
1127	f->f_sr &= ~PSL_C;
1128	f->f_format = FMT0;
1129
1130	userret(p, f, p->p_sticks, (u_int)0, 0);
1131#ifdef KTRACE
1132	if (KTRPOINT(p, KTR_SYSRET))
1133		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1134#endif
1135}
1136
1137/*
1138 * Allocation routines for software interrupts.
1139 */
1140u_long
1141allocate_sir(proc, arg)
1142	void (*proc)();
1143	void *arg;
1144{
1145	int bit;
1146
1147	if( next_sir >= NSIR )
1148		panic("allocate_sir: none left");
1149	bit = next_sir++;
1150	sir_routines[bit] = proc;
1151	sir_args[bit] = arg;
1152	return (1 << bit);
1153}
1154
1155void
1156init_sir()
1157{
1158	extern void netintr();
1159
1160	sir_routines[0] = netintr;
1161	sir_routines[1] = softclock;
1162	next_sir = 2;
1163}
1164