trap.c revision 1.7
1/*	$NetBSD: trap.c,v 1.7 1998/11/10 22:45:45 dbj Exp $ */
2
3/*
4 * This file was taken from from mvme68k/mvme68k/trap.c
5 * should probably be re-synced when needed.
6 * Darrin B. Jewell <jewell@mit.edu>  Tue Nov 10 05:07:16 1998
7 * original cvs id: NetBSD: trap.c,v 1.24 1998/10/01 02:53:54 thorpej Exp
8 */
9
10/*
11 * Copyright (c) 1988 University of Utah.
12 * Copyright (c) 1982, 1986, 1990, 1993
13 *	The Regents of the University of California.  All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * the Systems Programming Group of the University of Utah Computer
17 * Science Department.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 * 3. All advertising materials mentioning features or use of this software
28 *    must display the following acknowledgement:
29 *	This product includes software developed by the University of
30 *	California, Berkeley and its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 *    may be used to endorse or promote products derived from this software
33 *    without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: trap.c 1.37 92/12/20$
48 *
49 *	@(#)trap.c	8.5 (Berkeley) 1/4/94
50 */
51
52#include "opt_ddb.h"
53#include "opt_ktrace.h"
54#include "opt_uvm.h"
55#include "opt_compat_netbsd.h"
56#include "opt_compat_sunos.h"
57#include "opt_compat_hpux.h"
58
59#include <sys/param.h>
60#include <sys/systm.h>
61#include <sys/proc.h>
62#include <sys/acct.h>
63#include <sys/kernel.h>
64#include <sys/signalvar.h>
65#include <sys/resourcevar.h>
66#include <sys/syscall.h>
67#include <sys/syslog.h>
68#include <sys/user.h>
69#ifdef KTRACE
70#include <sys/ktrace.h>
71#endif
72
73#include <machine/psl.h>
74#include <machine/trap.h>
75#include <machine/cpu.h>
76#include <machine/reg.h>
77
78#include <vm/vm.h>
79#include <vm/pmap.h>
80
81#if defined(UVM)
82#include <uvm/uvm_extern.h>
83#endif
84
85#ifdef COMPAT_HPUX
86#include <compat/hpux/hpux.h>
87#endif
88
89#ifdef COMPAT_SUNOS
90#include <compat/sunos/sunos_syscall.h>
91extern struct emul emul_sunos;
92#endif
93
94#include <m68k/cacheops.h>
95
96int	astpending;
97
98char	*trap_type[] = {
99	"Bus error",
100	"Address error",
101	"Illegal instruction",
102	"Zero divide",
103	"CHK instruction",
104	"TRAPV instruction",
105	"Privilege violation",
106	"Trace trap",
107	"MMU fault",
108	"SSIR trap",
109	"Format error",
110	"68881 exception",
111	"Coprocessor violation",
112	"Async system trap"
113};
114int	trap_types = sizeof trap_type / sizeof trap_type[0];
115
116/*
117 * Size of various exception stack frames (minus the standard 8 bytes)
118 */
119short	exframesize[] = {
120	FMT0SIZE,	/* type 0 - normal (68020/030/040) */
121	FMT1SIZE,	/* type 1 - throwaway (68020/030/040) */
122	FMT2SIZE,	/* type 2 - normal 6-word (68020/030/040) */
123	FMT3SIZE,	/* type 3 - FP post-instruction (68040) */
124	-1, -1, -1,	/* type 4-6 - undefined */
125	FMT7SIZE,	/* type 7 - access error (68040) */
126	58,		/* type 8 - bus fault (68010) */
127	FMT9SIZE,	/* type 9 - coprocessor mid-instruction (68020/030) */
128	FMTASIZE,	/* type A - short bus fault (68020/030) */
129	FMTBSIZE,	/* type B - long bus fault (68020/030) */
130	-1, -1, -1, -1	/* type C-F - undefined */
131};
132
133#ifdef M68040
134#define KDFAULT(c)    (mmutype == MMU_68040 ? \
135			    ((c) & SSW4_TMMASK) == SSW4_TMKD : \
136			    ((c) & (SSW_DF|FC_SUPERD)) == (SSW_DF|FC_SUPERD))
137#define WRFAULT(c)    (mmutype == MMU_68040 ? \
138			    ((c) & SSW4_RW) == 0 : \
139			    ((c) & (SSW_DF|SSW_RW)) == SSW_DF)
140#else
141#define KDFAULT(c)	(((c) & (SSW_DF|SSW_FCMASK)) == (SSW_DF|FC_SUPERD))
142#define WRFAULT(c)	(((c) & (SSW_DF|SSW_RW)) == SSW_DF)
143#endif
144
145#ifdef DEBUG
146int mmudebug = 0;
147int mmupid = -1;
148#define MDB_FOLLOW	1
149#define MDB_WBFOLLOW	2
150#define MDB_WBFAILED	4
151#define MDB_ISPID(p)	(p) == mmupid
152#endif
153
154#define NSIR	32
155void (*sir_routines[NSIR])();
156void *sir_args[NSIR];
157int next_sir;
158
159/*
160 * trap and syscall both need the following work done before returning
161 * to user mode.
162 */
163static inline void
164userret(p, fp, oticks, faultaddr, fromtrap)
165	struct proc *p;
166	struct frame *fp;
167	u_quad_t oticks;
168	u_int faultaddr;
169	int fromtrap;
170{
171	int sig, s;
172#ifdef M68040
173	int beenhere = 0;
174
175again:
176#endif
177	/* take pending signals */
178	while ((sig = CURSIG(p)) != 0)
179		postsig(sig);
180	p->p_priority = p->p_usrpri;
181	if (want_resched) {
182		/*
183		 * Since we are curproc, clock will normally just change
184		 * our priority without moving us from one queue to another
185		 * (since the running process is not on a queue.)
186		 * If that happened after we put ourselves on the run queue
187		 * but before we mi_switch()'ed, we might not be on the queue
188		 * indicated by our priority.
189		 */
190		s = splstatclock();
191		setrunqueue(p);
192		p->p_stats->p_ru.ru_nivcsw++;
193		mi_switch();
194		splx(s);
195		while ((sig = CURSIG(p)) != 0)
196			postsig(sig);
197	}
198
199	/*
200	 * If profiling, charge system time to the trapped pc.
201	 */
202	if (p->p_flag & P_PROFIL) {
203		extern int psratio;
204
205		addupc_task(p, fp->f_pc,
206			    (int)(p->p_sticks - oticks) * psratio);
207	}
208#ifdef M68040
209	/*
210	 * Deal with user mode writebacks (from trap, or from sigreturn).
211	 * If any writeback fails, go back and attempt signal delivery.
212	 * unless we have already been here and attempted the writeback
213	 * (e.g. bad address with user ignoring SIGSEGV).  In that case
214	 * we just return to the user without sucessfully completing
215	 * the writebacks.  Maybe we should just drop the sucker?
216	 */
217	if (mmutype == MMU_68040 && fp->f_format == FMT7) {
218		if (beenhere) {
219#ifdef DEBUG
220			if (mmudebug & MDB_WBFAILED)
221				printf(fromtrap ?
222		"pid %d(%s): writeback aborted, pc=%x, fa=%x\n" :
223		"pid %d(%s): writeback aborted in sigreturn, pc=%x\n",
224				    p->p_pid, p->p_comm, fp->f_pc, faultaddr);
225#endif
226		} else if (sig = writeback(fp, fromtrap)) {
227			beenhere = 1;
228			oticks = p->p_sticks;
229			trapsignal(p, sig, faultaddr);
230			goto again;
231		}
232	}
233#endif
234	curpriority = p->p_priority;
235}
236
237/*
238 * Trap is called from locore to handle most types of processor traps,
239 * including events such as simulated software interrupts/AST's.
240 * System calls are broken out for efficiency.
241 */
242/*ARGSUSED*/
243trap(type, code, v, frame)
244	int type;
245	unsigned code;
246	unsigned v;
247	struct frame frame;
248{
249	extern char fubail[], subail[];
250#ifdef DDB
251	extern char trap0[], trap1[], trap2[], trap12[], trap15[], illinst[];
252#endif
253	struct proc *p;
254	int i;
255	u_int ucode;
256	u_quad_t sticks;
257#ifdef COMPAT_HPUX
258	extern struct emul emul_hpux;
259#endif
260	int bit;
261
262#if defined(UVM)
263	uvmexp.traps++;
264#else
265	cnt.v_trap++;
266#endif
267	p = curproc;
268	ucode = 0;
269	if (USERMODE(frame.f_sr)) {
270		type |= T_USER;
271		sticks = p->p_sticks;
272		p->p_md.md_regs = frame.f_regs;
273	}
274	switch (type) {
275
276	default:
277dopanic:
278		printf("trap type %d, code = %x, v = %x\n", type, code, v);
279#ifdef DDB
280		if (kdb_trap(type, &frame))
281			return;
282#endif
283		regdump((struct trapframe *)&frame, 128);
284		type &= ~T_USER;
285		if ((unsigned)type < trap_types)
286			panic(trap_type[type]);
287		panic("trap");
288
289	case T_BUSERR:		/* kernel bus error */
290		if (!p->p_addr->u_pcb.pcb_onfault)
291			goto dopanic;
292		/*
293		 * If we have arranged to catch this fault in any of the
294		 * copy to/from user space routines, set PC to return to
295		 * indicated location and set flag informing buserror code
296		 * that it may need to clean up stack frame.
297		 */
298copyfault:
299		frame.f_stackadj = exframesize[frame.f_format];
300		frame.f_format = frame.f_vector = 0;
301		frame.f_pc = (int) p->p_addr->u_pcb.pcb_onfault;
302		return;
303
304	case T_BUSERR|T_USER:	/* bus error */
305	case T_ADDRERR|T_USER:	/* address error */
306		ucode = v;
307		i = SIGBUS;
308		break;
309
310	case T_COPERR:		/* kernel coprocessor violation */
311	case T_FMTERR|T_USER:	/* do all RTE errors come in as T_USER? */
312	case T_FMTERR:		/* ...just in case... */
313	/*
314	 * The user has most likely trashed the RTE or FP state info
315	 * in the stack frame of a signal handler.
316	 */
317		printf("pid %d: kernel %s exception\n", p->p_pid,
318		       type==T_COPERR ? "coprocessor" : "format");
319		type |= T_USER;
320		p->p_sigacts->ps_sigact[SIGILL].sa_handler = SIG_DFL;
321		sigdelset(&p->p_sigignore, SIGILL);
322		sigdelset(&p->p_sigcatch, SIGILL);
323		sigdelset(&p->p_sigmask, SIGILL);
324		i = SIGILL;
325		ucode = frame.f_format;	/* XXX was ILL_RESAD_FAULT */
326		break;
327
328	case T_COPERR|T_USER:	/* user coprocessor violation */
329	/* What is a proper response here? */
330		ucode = 0;
331		i = SIGFPE;
332		break;
333
334	case T_FPERR|T_USER:	/* 68881 exceptions */
335	/*
336	 * We pass along the 68881 status register which locore stashed
337	 * in code for us.  Note that there is a possibility that the
338	 * bit pattern of this register will conflict with one of the
339	 * FPE_* codes defined in signal.h.  Fortunately for us, the
340	 * only such codes we use are all in the range 1-7 and the low
341	 * 3 bits of the status register are defined as 0 so there is
342	 * no clash.
343	 */
344		ucode = code;
345		i = SIGFPE;
346		break;
347
348#ifdef M68040
349	case T_FPEMULI|T_USER:	/* unimplemented FP instuction */
350	case T_FPEMULD|T_USER:	/* unimplemented FP data type */
351		/* XXX need to FSAVE */
352		printf("pid %d(%s): unimplemented FP %s at %x (EA %x)\n",
353		       p->p_pid, p->p_comm,
354		       frame.f_format == 2 ? "instruction" : "data type",
355		       frame.f_pc, frame.f_fmt2.f_iaddr);
356		/* XXX need to FRESTORE */
357		i = SIGFPE;
358		break;
359#endif
360
361	case T_ILLINST|T_USER:	/* illegal instruction fault */
362#ifdef COMPAT_HPUX
363		if (p->p_emul == &emul_hpux) {
364			ucode = HPUX_ILL_ILLINST_TRAP;
365			i = SIGILL;
366			break;
367		}
368		/* fall through */
369#endif
370	case T_PRIVINST|T_USER:	/* privileged instruction fault */
371#ifdef COMPAT_HPUX
372		if (p->p_emul == &emul_hpux)
373			ucode = HPUX_ILL_PRIV_TRAP;
374		else
375#endif
376		ucode = frame.f_format;	/* XXX was ILL_PRIVIN_FAULT */
377		i = SIGILL;
378		break;
379
380	case T_ZERODIV|T_USER:	/* Divide by zero */
381#ifdef COMPAT_HPUX
382		if (p->p_emul == &emul_hpux)
383			ucode = HPUX_FPE_INTDIV_TRAP;
384		else
385#endif
386		ucode = frame.f_format;	/* XXX was FPE_INTDIV_TRAP */
387		i = SIGFPE;
388		break;
389
390	case T_CHKINST|T_USER:	/* CHK instruction trap */
391#ifdef COMPAT_HPUX
392		if (p->p_emul == &emul_hpux) {
393			/* handled differently under hp-ux */
394			i = SIGILL;
395			ucode = HPUX_ILL_CHK_TRAP;
396			break;
397		}
398#endif
399		ucode = frame.f_format;	/* XXX was FPE_SUBRNG_TRAP */
400		i = SIGFPE;
401		break;
402
403	case T_TRAPVINST|T_USER:	/* TRAPV instruction trap */
404#ifdef COMPAT_HPUX
405		if (p->p_emul == &emul_hpux) {
406			/* handled differently under hp-ux */
407			i = SIGILL;
408			ucode = HPUX_ILL_TRAPV_TRAP;
409			break;
410		}
411#endif
412		ucode = frame.f_format;	/* XXX was FPE_INTOVF_TRAP */
413		i = SIGFPE;
414		break;
415
416	/*
417	 * XXX: Trace traps are a nightmare.
418	 *
419	 *	HP-UX uses trap #1 for breakpoints,
420	 *	HPBSD uses trap #2,
421	 *	SUN 3.x uses trap #15,
422	 *	KGDB uses trap #15 (for kernel breakpoints; handled elsewhere).
423	 *
424	 * HPBSD and HP-UX traps both get mapped by locore.s into T_TRACE.
425	 * SUN 3.x traps get passed through as T_TRAP15 and are not really
426	 * supported yet.
427	 */
428	case T_TRACE:		/* kernel trace trap */
429	case T_TRAP15:		/* SUN trace trap */
430#ifdef DDB
431		if (type == T_TRAP15 ||
432		    ((caddr_t)frame.f_pc != trap0 &&
433		     (caddr_t)frame.f_pc != trap1 &&
434		     (caddr_t)frame.f_pc != trap2 &&
435		     (caddr_t)frame.f_pc != trap12 &&
436		     (caddr_t)frame.f_pc != trap15 &&
437		     (caddr_t)frame.f_pc != illinst)) {
438			if (kdb_trap(type, &frame))
439				return;
440		}
441#endif
442		frame.f_sr &= ~PSL_T;
443		i = SIGTRAP;
444		break;
445
446	case T_TRACE|T_USER:	/* user trace trap */
447	case T_TRAP15|T_USER:	/* SUN user trace trap */
448#ifdef COMPAT_SUNOS
449		/*
450		 * SunOS uses Trap #2 for a "CPU cache flush".
451		 * Just flush the on-chip caches and return.
452		 */
453		if (p->p_emul == &emul_sunos) {
454			ICIA();
455			DCIU();
456			return;
457		}
458#endif COMPAT_SUNOS
459		frame.f_sr &= ~PSL_T;
460		i = SIGTRAP;
461		break;
462
463	case T_ASTFLT:		/* system async trap, cannot happen */
464		goto dopanic;
465
466	case T_ASTFLT|T_USER:	/* user async trap */
467		astpending = 0;
468		/*
469		 * We check for software interrupts first.  This is because
470		 * they are at a higher level than ASTs, and on a VAX would
471		 * interrupt the AST.  We assume that if we are processing
472		 * an AST that we must be at IPL0 so we don't bother to
473		 * check.  Note that we ensure that we are at least at SIR
474		 * IPL while processing the SIR.
475		 */
476		spl1();
477		/* fall into... */
478
479	case T_SSIR:		/* software interrupt */
480	case T_SSIR|T_USER:
481		while (bit = ffs(ssir)) {
482			--bit;
483			ssir &= ~(1 << bit);
484#if defined(UVM)
485			uvmexp.softs++;
486#else
487			cnt.v_soft++;
488#endif
489			if (sir_routines[bit])
490				sir_routines[bit](sir_args[bit]);
491		}
492
493		/*
494		 * If this was not an AST trap, we are all done.
495		 */
496		if (type != (T_ASTFLT|T_USER)) {
497#if defined(UVM)
498			uvmexp.traps++;
499#else
500			cnt.v_trap--;
501#endif
502			return;
503		}
504		spl0();
505		if (p->p_flag & P_OWEUPC) {
506			p->p_flag &= ~P_OWEUPC;
507			ADDUPROF(p);
508		}
509		goto out;
510
511	case T_MMUFLT:		/* kernel mode page fault */
512		/*
513		 * If we were doing profiling ticks or other user mode
514		 * stuff from interrupt code, Just Say No.
515		 */
516		if (p->p_addr->u_pcb.pcb_onfault == fubail ||
517		    p->p_addr->u_pcb.pcb_onfault == subail)
518			goto copyfault;
519		/* fall into ... */
520
521	case T_MMUFLT|T_USER:	/* page fault */
522	    {
523		vaddr_t va;
524		struct vmspace *vm = p->p_vmspace;
525		vm_map_t map;
526		int rv;
527		vm_prot_t ftype;
528		extern vm_map_t kernel_map;
529
530#ifdef DEBUG
531		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
532		printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n",
533		       p->p_pid, code, v, frame.f_pc, frame.f_sr);
534#endif
535		/*
536		 * It is only a kernel address space fault iff:
537		 * 	1. (type & T_USER) == 0  and
538		 * 	2. pcb_onfault not set or
539		 *	3. pcb_onfault set but supervisor space data fault
540		 * The last can occur during an exec() copyin where the
541		 * argument space is lazy-allocated.
542		 */
543		if (type == T_MMUFLT &&
544		    (!p->p_addr->u_pcb.pcb_onfault || KDFAULT(code)))
545			map = kernel_map;
546		else
547			map = &vm->vm_map;
548		if (WRFAULT(code))
549			ftype = VM_PROT_READ | VM_PROT_WRITE;
550		else
551			ftype = VM_PROT_READ;
552		va = trunc_page((vaddr_t)v);
553#ifdef DEBUG
554		if (map == kernel_map && va == 0) {
555			printf("trap: bad kernel access at %x\n", v);
556			goto dopanic;
557		}
558#endif
559#ifdef COMPAT_HPUX
560		if (ISHPMMADDR(va)) {
561			vaddr_t bva;
562
563			rv = pmap_mapmulti(map->pmap, va);
564			if (rv != KERN_SUCCESS) {
565				bva = HPMMBASEADDR(va);
566#if defined(UVM)
567				rv = uvm_fault(map, bva, 0, ftype);
568#else
569				rv = vm_fault(map, bva, ftype, FALSE);
570#endif
571				if (rv == KERN_SUCCESS)
572					(void) pmap_mapmulti(map->pmap, va);
573			}
574		} else
575#endif
576#if defined(UVM)
577		rv = uvm_fault(map, va, 0, ftype);
578#ifdef DEBUG
579		if (rv && MDB_ISPID(p->p_pid))
580			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
581			       map, va, ftype, rv);
582#endif
583#else /* ! UVM */
584		rv = vm_fault(map, va, ftype, FALSE);
585#ifdef DEBUG
586		if (rv && MDB_ISPID(p->p_pid))
587			printf("vm_fault(%x, %x, %x, 0) -> %x\n",
588			       map, va, ftype, rv);
589#endif
590#endif /* UVM */
591		/*
592		 * If this was a stack access we keep track of the maximum
593		 * accessed stack size.  Also, if vm_fault gets a protection
594		 * failure it is due to accessing the stack region outside
595		 * the current limit and we need to reflect that as an access
596		 * error.
597		 */
598		if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map) {
599			if (rv == KERN_SUCCESS) {
600				unsigned nss;
601
602				nss = clrnd(btoc(USRSTACK-(unsigned)va));
603				if (nss > vm->vm_ssize)
604					vm->vm_ssize = nss;
605			} else if (rv == KERN_PROTECTION_FAILURE)
606				rv = KERN_INVALID_ADDRESS;
607		}
608		if (rv == KERN_SUCCESS) {
609			if (type == T_MMUFLT) {
610#if defined(M68040)
611				if (mmutype == MMU_68040)
612					(void) writeback(&frame, 1);
613#endif
614				return;
615			}
616			goto out;
617		}
618		if (type == T_MMUFLT) {
619			if (p->p_addr->u_pcb.pcb_onfault)
620				goto copyfault;
621#if defined(UVM)
622			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
623			       map, va, ftype, rv);
624#else
625			printf("vm_fault(%x, %x, %x, 0) -> %x\n",
626			       map, va, ftype, rv);
627#endif
628			printf("  type %x, code [mmu,,ssw]: %x\n",
629			       type, code);
630			goto dopanic;
631		}
632		ucode = v;
633		i = SIGSEGV;
634		break;
635	    }
636	}
637	trapsignal(p, i, ucode);
638	if ((type & T_USER) == 0)
639		return;
640out:
641	userret(p, &frame, sticks, v, 1);
642}
643
644#ifdef M68040
645#ifdef DEBUG
646struct writebackstats {
647	int calls;
648	int cpushes;
649	int move16s;
650	int wb1s, wb2s, wb3s;
651	int wbsize[4];
652} wbstats;
653
654char *f7sz[] = { "longword", "byte", "word", "line" };
655char *f7tt[] = { "normal", "MOVE16", "AFC", "ACK" };
656char *f7tm[] = { "d-push", "u-data", "u-code", "M-data",
657		 "M-code", "k-data", "k-code", "RES" };
658char wberrstr[] =
659	"WARNING: pid %d(%s) writeback [%s] failed, pc=%x fa=%x wba=%x wbd=%x\n";
660#endif
661
662writeback(fp, docachepush)
663	struct frame *fp;
664	int docachepush;
665{
666	struct fmt7 *f = &fp->f_fmt7;
667	struct proc *p = curproc;
668	int err = 0;
669	u_int fa;
670	caddr_t oonfault = p->p_addr->u_pcb.pcb_onfault;
671
672#ifdef DEBUG
673	if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
674		printf(" pid=%d, fa=%x,", p->p_pid, f->f_fa);
675		dumpssw(f->f_ssw);
676	}
677	wbstats.calls++;
678#endif
679	/*
680	 * Deal with special cases first.
681	 */
682	if ((f->f_ssw & SSW4_TMMASK) == SSW4_TMDCP) {
683		/*
684		 * Dcache push fault.
685		 * Line-align the address and write out the push data to
686		 * the indicated physical address.
687		 */
688#ifdef DEBUG
689		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
690			printf(" pushing %s to PA %x, data %x",
691			       f7sz[(f->f_ssw & SSW4_SZMASK) >> 5],
692			       f->f_fa, f->f_pd0);
693			if ((f->f_ssw & SSW4_SZMASK) == SSW4_SZLN)
694				printf("/%x/%x/%x",
695				       f->f_pd1, f->f_pd2, f->f_pd3);
696			printf("\n");
697		}
698		if (f->f_wb1s & SSW4_WBSV)
699			panic("writeback: cache push with WB1S valid");
700		wbstats.cpushes++;
701#endif
702		/*
703		 * XXX there are security problems if we attempt to do a
704		 * cache push after a signal handler has been called.
705		 */
706		if (docachepush) {
707			pmap_enter(pmap_kernel(), (vaddr_t)vmmap,
708				   trunc_page(f->f_fa), VM_PROT_WRITE, TRUE);
709			fa = (u_int)&vmmap[(f->f_fa & PGOFSET) & ~0xF];
710			bcopy((caddr_t)&f->f_pd0, (caddr_t)fa, 16);
711			DCFL(pmap_extract(pmap_kernel(), (vaddr_t)fa));
712			pmap_remove(pmap_kernel(), (vaddr_t)vmmap,
713				    (vaddr_t)&vmmap[NBPG]);
714		} else
715			printf("WARNING: pid %d(%s) uid %d: CPUSH not done\n",
716			       p->p_pid, p->p_comm, p->p_ucred->cr_uid);
717	} else if ((f->f_ssw & (SSW4_RW|SSW4_TTMASK)) == SSW4_TTM16) {
718		/*
719		 * MOVE16 fault.
720		 * Line-align the address and write out the push data to
721		 * the indicated virtual address.
722		 */
723#ifdef DEBUG
724		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
725			printf(" MOVE16 to VA %x(%x), data %x/%x/%x/%x\n",
726			       f->f_fa, f->f_fa & ~0xF, f->f_pd0, f->f_pd1,
727			       f->f_pd2, f->f_pd3);
728		if (f->f_wb1s & SSW4_WBSV)
729			panic("writeback: MOVE16 with WB1S valid");
730		wbstats.move16s++;
731#endif
732		if (KDFAULT(f->f_wb1s))
733			bcopy((caddr_t)&f->f_pd0, (caddr_t)(f->f_fa & ~0xF), 16);
734		else
735			err = suline((caddr_t)(f->f_fa & ~0xF), (caddr_t)&f->f_pd0);
736		if (err) {
737			fa = f->f_fa & ~0xF;
738#ifdef DEBUG
739			if (mmudebug & MDB_WBFAILED)
740				printf(wberrstr, p->p_pid, p->p_comm,
741				       "MOVE16", fp->f_pc, f->f_fa,
742				       f->f_fa & ~0xF, f->f_pd0);
743#endif
744		}
745	} else if (f->f_wb1s & SSW4_WBSV) {
746		/*
747		 * Writeback #1.
748		 * Position the "memory-aligned" data and write it out.
749		 */
750		u_int wb1d = f->f_wb1d;
751		int off;
752
753#ifdef DEBUG
754		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
755			dumpwb(1, f->f_wb1s, f->f_wb1a, f->f_wb1d);
756		wbstats.wb1s++;
757		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
758#endif
759		off = (f->f_wb1a & 3) * 8;
760		switch (f->f_wb1s & SSW4_SZMASK) {
761		case SSW4_SZLW:
762			if (off)
763				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
764			if (KDFAULT(f->f_wb1s))
765				*(long *)f->f_wb1a = wb1d;
766			else
767				err = suword((caddr_t)f->f_wb1a, wb1d);
768			break;
769		case SSW4_SZB:
770			off = 24 - off;
771			if (off)
772				wb1d >>= off;
773			if (KDFAULT(f->f_wb1s))
774				*(char *)f->f_wb1a = wb1d;
775			else
776				err = subyte((caddr_t)f->f_wb1a, wb1d);
777			break;
778		case SSW4_SZW:
779			off = (off + 16) % 32;
780			if (off)
781				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
782			if (KDFAULT(f->f_wb1s))
783				*(short *)f->f_wb1a = wb1d;
784			else
785				err = susword((caddr_t)f->f_wb1a, wb1d);
786			break;
787		}
788		if (err) {
789			fa = f->f_wb1a;
790#ifdef DEBUG
791			if (mmudebug & MDB_WBFAILED)
792				printf(wberrstr, p->p_pid, p->p_comm,
793				       "#1", fp->f_pc, f->f_fa,
794				       f->f_wb1a, f->f_wb1d);
795#endif
796		}
797	}
798	/*
799	 * Deal with the "normal" writebacks.
800	 *
801	 * XXX writeback2 is known to reflect a LINE size writeback after
802	 * a MOVE16 was already dealt with above.  Ignore it.
803	 */
804	if (err == 0 && (f->f_wb2s & SSW4_WBSV) &&
805	    (f->f_wb2s & SSW4_SZMASK) != SSW4_SZLN) {
806#ifdef DEBUG
807		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
808			dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
809		wbstats.wb2s++;
810		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
811#endif
812		switch (f->f_wb2s & SSW4_SZMASK) {
813		case SSW4_SZLW:
814			if (KDFAULT(f->f_wb2s))
815				*(long *)f->f_wb2a = f->f_wb2d;
816			else
817				err = suword((caddr_t)f->f_wb2a, f->f_wb2d);
818			break;
819		case SSW4_SZB:
820			if (KDFAULT(f->f_wb2s))
821				*(char *)f->f_wb2a = f->f_wb2d;
822			else
823				err = subyte((caddr_t)f->f_wb2a, f->f_wb2d);
824			break;
825		case SSW4_SZW:
826			if (KDFAULT(f->f_wb2s))
827				*(short *)f->f_wb2a = f->f_wb2d;
828			else
829				err = susword((caddr_t)f->f_wb2a, f->f_wb2d);
830			break;
831		}
832		if (err) {
833			fa = f->f_wb2a;
834#ifdef DEBUG
835			if (mmudebug & MDB_WBFAILED) {
836				printf(wberrstr, p->p_pid, p->p_comm,
837				       "#2", fp->f_pc, f->f_fa,
838				       f->f_wb2a, f->f_wb2d);
839				dumpssw(f->f_ssw);
840				dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
841			}
842#endif
843		}
844	}
845	if (err == 0 && (f->f_wb3s & SSW4_WBSV)) {
846#ifdef DEBUG
847		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
848			dumpwb(3, f->f_wb3s, f->f_wb3a, f->f_wb3d);
849		wbstats.wb3s++;
850		wbstats.wbsize[(f->f_wb3s&SSW4_SZMASK)>>5]++;
851#endif
852		switch (f->f_wb3s & SSW4_SZMASK) {
853		case SSW4_SZLW:
854			if (KDFAULT(f->f_wb3s))
855				*(long *)f->f_wb3a = f->f_wb3d;
856			else
857				err = suword((caddr_t)f->f_wb3a, f->f_wb3d);
858			break;
859		case SSW4_SZB:
860			if (KDFAULT(f->f_wb3s))
861				*(char *)f->f_wb3a = f->f_wb3d;
862			else
863				err = subyte((caddr_t)f->f_wb3a, f->f_wb3d);
864			break;
865		case SSW4_SZW:
866			if (KDFAULT(f->f_wb3s))
867				*(short *)f->f_wb3a = f->f_wb3d;
868			else
869				err = susword((caddr_t)f->f_wb3a, f->f_wb3d);
870			break;
871#ifdef DEBUG
872		case SSW4_SZLN:
873			panic("writeback: wb3s indicates LINE write");
874#endif
875		}
876		if (err) {
877			fa = f->f_wb3a;
878#ifdef DEBUG
879			if (mmudebug & MDB_WBFAILED)
880				printf(wberrstr, p->p_pid, p->p_comm,
881				       "#3", fp->f_pc, f->f_fa,
882				       f->f_wb3a, f->f_wb3d);
883#endif
884		}
885	}
886	p->p_addr->u_pcb.pcb_onfault = oonfault;
887	if (err)
888		err = SIGSEGV;
889	return(err);
890}
891
892#ifdef DEBUG
893dumpssw(ssw)
894	u_short ssw;
895{
896	printf(" SSW: %x: ", ssw);
897	if (ssw & SSW4_CP)
898		printf("CP,");
899	if (ssw & SSW4_CU)
900		printf("CU,");
901	if (ssw & SSW4_CT)
902		printf("CT,");
903	if (ssw & SSW4_CM)
904		printf("CM,");
905	if (ssw & SSW4_MA)
906		printf("MA,");
907	if (ssw & SSW4_ATC)
908		printf("ATC,");
909	if (ssw & SSW4_LK)
910		printf("LK,");
911	if (ssw & SSW4_RW)
912		printf("RW,");
913	printf(" SZ=%s, TT=%s, TM=%s\n",
914	       f7sz[(ssw & SSW4_SZMASK) >> 5],
915	       f7tt[(ssw & SSW4_TTMASK) >> 3],
916	       f7tm[ssw & SSW4_TMMASK]);
917}
918
919dumpwb(num, s, a, d)
920	int num;
921	u_short s;
922	u_int a, d;
923{
924	struct proc *p = curproc;
925	paddr_t pa;
926
927	printf(" writeback #%d: VA %x, data %x, SZ=%s, TT=%s, TM=%s\n",
928	       num, a, d, f7sz[(s & SSW4_SZMASK) >> 5],
929	       f7tt[(s & SSW4_TTMASK) >> 3], f7tm[s & SSW4_TMMASK]);
930	printf("	       PA ");
931	pa = pmap_extract(p->p_vmspace->vm_map.pmap, (vaddr_t)a);
932	if (pa == 0)
933		printf("<invalid address>");
934	else
935		printf("%x, current value %x", pa, fuword((caddr_t)a));
936	printf("\n");
937}
938#endif
939#endif
940
941/*
942 * Process a system call.
943 */
944syscall(code, frame)
945	int code;
946	struct frame frame;
947{
948	caddr_t params;
949	struct sysent *callp;
950	struct proc *p;
951	int error, opc, nsys;
952	size_t argsize;
953	int args[8], rval[2];
954	u_quad_t sticks;
955
956#if defined(UVM)
957	uvmexp.syscalls++;
958#else
959	cnt.v_syscall++;
960#endif
961	if (!USERMODE(frame.f_sr))
962		panic("syscall");
963	p = curproc;
964	sticks = p->p_sticks;
965	p->p_md.md_regs = frame.f_regs;
966	opc = frame.f_pc;
967
968	nsys = p->p_emul->e_nsysent;
969	callp = p->p_emul->e_sysent;
970
971#ifdef COMPAT_SUNOS
972	if (p->p_emul == &emul_sunos) {
973		/*
974		 * SunOS passes the syscall-number on the stack, whereas
975		 * BSD passes it in D0. So, we have to get the real "code"
976		 * from the stack, and clean up the stack, as SunOS glue
977		 * code assumes the kernel pops the syscall argument the
978		 * glue pushed on the stack. Sigh...
979		 */
980		code = fuword((caddr_t)frame.f_regs[SP]);
981
982		/*
983		 * XXX
984		 * Don't do this for sunos_sigreturn, as there's no stored pc
985		 * on the stack to skip, the argument follows the syscall
986		 * number without a gap.
987		 */
988		if (code != SUNOS_SYS_sigreturn) {
989			frame.f_regs[SP] += sizeof (int);
990			/*
991			 * remember that we adjusted the SP,
992			 * might have to undo this if the system call
993			 * returns ERESTART.
994			 */
995			p->p_md.md_flags |= MDP_STACKADJ;
996		} else
997			p->p_md.md_flags &= ~MDP_STACKADJ;
998	}
999#endif
1000
1001	params = (caddr_t)frame.f_regs[SP] + sizeof(int);
1002
1003	switch (code) {
1004	case SYS_syscall:
1005		/*
1006		 * Code is first argument, followed by actual args.
1007		 */
1008		code = fuword(params);
1009		params += sizeof(int);
1010		/*
1011		 * XXX sigreturn requires special stack manipulation
1012		 * that is only done if entered via the sigreturn
1013		 * trap.  Cannot allow it here so make sure we fail.
1014		 */
1015		switch (code) {
1016#ifdef COMPAT_13
1017		case SYS_compat_13_sigreturn13:
1018#endif
1019		case SYS___sigreturn14:
1020			code = nsys;
1021			break;
1022		}
1023		break;
1024	case SYS___syscall:
1025		/*
1026		 * Like syscall, but code is a quad, so as to maintain
1027		 * quad alignment for the rest of the arguments.
1028		 */
1029		if (callp != sysent)
1030			break;
1031		code = fuword(params + _QUAD_LOWWORD * sizeof(int));
1032		params += sizeof(quad_t);
1033		break;
1034	default:
1035		break;
1036	}
1037	if (code < 0 || code >= nsys)
1038		callp += p->p_emul->e_nosys;		/* illegal */
1039	else
1040		callp += code;
1041	argsize = callp->sy_argsize;
1042	if (argsize)
1043		error = copyin(params, (caddr_t)args, argsize);
1044	else
1045		error = 0;
1046#ifdef SYSCALL_DEBUG
1047	scdebug_call(p, code, args);
1048#endif
1049#ifdef KTRACE
1050	if (KTRPOINT(p, KTR_SYSCALL))
1051		ktrsyscall(p->p_tracep, code, argsize, args);
1052#endif
1053	if (error)
1054		goto bad;
1055	rval[0] = 0;
1056	rval[1] = frame.f_regs[D1];
1057	error = (*callp->sy_call)(p, args, rval);
1058	switch (error) {
1059	case 0:
1060		frame.f_regs[D0] = rval[0];
1061		frame.f_regs[D1] = rval[1];
1062		frame.f_sr &= ~PSL_C;	/* carry bit */
1063		break;
1064	case ERESTART:
1065		/*
1066		 * We always enter through a `trap' instruction, which is 2
1067		 * bytes, so adjust the pc by that amount.
1068		 */
1069		frame.f_pc = opc - 2;
1070		break;
1071	case EJUSTRETURN:
1072		/* nothing to do */
1073		break;
1074	default:
1075	bad:
1076		if (p->p_emul->e_errno)
1077			error = p->p_emul->e_errno[error];
1078		frame.f_regs[D0] = error;
1079		frame.f_sr |= PSL_C;	/* carry bit */
1080		break;
1081	}
1082
1083#ifdef SYSCALL_DEBUG
1084	scdebug_ret(p, code, error, rval);
1085#endif
1086#ifdef COMPAT_SUNOS
1087	/* need new p-value for this */
1088	if (error == ERESTART && (p->p_md.md_flags & MDP_STACKADJ))
1089		frame.f_regs[SP] -= sizeof (int);
1090#endif
1091	userret(p, &frame, sticks, (u_int)0, 0);
1092#ifdef KTRACE
1093	if (KTRPOINT(p, KTR_SYSRET))
1094		ktrsysret(p->p_tracep, code, error, rval[0]);
1095#endif
1096}
1097
1098void
1099child_return(p, frame)
1100	struct proc *p;
1101	struct frame frame;
1102{
1103
1104	frame.f_regs[D0] = 0;
1105	frame.f_sr &= ~PSL_C;
1106	frame.f_format = FMT0;
1107
1108	userret(p, &frame, p->p_sticks, (u_int)0, 0);
1109#ifdef KTRACE
1110	if (KTRPOINT(p, KTR_SYSRET))
1111		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1112#endif
1113}
1114
1115/*
1116 * Allocation routines for software interrupts.
1117 */
1118u_long
1119allocate_sir(proc, arg)
1120	void (*proc)();
1121	void *arg;
1122{
1123	int bit;
1124
1125	if( next_sir >= NSIR )
1126		panic("allocate_sir: none left");
1127	bit = next_sir++;
1128	sir_routines[bit] = proc;
1129	sir_args[bit] = arg;
1130	return (1 << bit);
1131}
1132
1133void
1134init_sir()
1135{
1136	extern void netintr();
1137
1138	sir_routines[0] = netintr;
1139	sir_routines[1] = softclock;
1140	next_sir = 2;
1141}
1142