trap.c revision 1.14
1/*	$NetBSD: trap.c,v 1.14 1999/03/27 02:59:41 dbj Exp $ */
2
3/*
4 * This file was taken from mvme68k/mvme68k/trap.c
5 * should probably be re-synced when needed.
6 * Darrin B. Jewell <jewell@mit.edu>  Tue Nov 10 05:07:16 1998
7 * original cvs id: NetBSD: trap.c,v 1.24 1998/10/01 02:53:54 thorpej Exp
8 */
9
10/*
11 * Copyright (c) 1988 University of Utah.
12 * Copyright (c) 1982, 1986, 1990, 1993
13 *	The Regents of the University of California.  All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * the Systems Programming Group of the University of Utah Computer
17 * Science Department.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 * 3. All advertising materials mentioning features or use of this software
28 *    must display the following acknowledgement:
29 *	This product includes software developed by the University of
30 *	California, Berkeley and its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 *    may be used to endorse or promote products derived from this software
33 *    without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: trap.c 1.37 92/12/20$
48 *
49 *	@(#)trap.c	8.5 (Berkeley) 1/4/94
50 */
51
52#include "opt_ddb.h"
53#include "opt_execfmt.h"
54#include "opt_ktrace.h"
55#include "opt_compat_netbsd.h"
56#include "opt_compat_sunos.h"
57#include "opt_compat_hpux.h"
58#include "opt_compat_linux.h"
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/proc.h>
63#include <sys/acct.h>
64#include <sys/kernel.h>
65#include <sys/signalvar.h>
66#include <sys/resourcevar.h>
67#include <sys/syscall.h>
68#include <sys/syslog.h>
69#include <sys/user.h>
70#ifdef KTRACE
71#include <sys/ktrace.h>
72#endif
73#ifdef KGDB
74#include <sys/kgdb.h>
75#endif
76
77#include <machine/psl.h>
78#include <machine/trap.h>
79#include <machine/cpu.h>
80#include <machine/reg.h>
81
82#include <vm/vm.h>
83#include <vm/pmap.h>
84
85#include <uvm/uvm_extern.h>
86
87#ifdef COMPAT_HPUX
88#include <compat/hpux/hpux.h>
89#endif
90
91#ifdef COMPAT_SUNOS
92#include <compat/sunos/sunos_syscall.h>
93extern struct emul emul_sunos;
94#endif
95
96#ifdef COMPAT_LINUX
97#ifdef EXEC_AOUT
98extern struct emul emul_linux_aout;
99#endif
100#ifdef EXEC_ELF32
101extern struct emul emul_linux_elf32;
102#endif
103#endif
104
105#include <m68k/cacheops.h>
106
107int	astpending;
108
109char	*trap_type[] = {
110	"Bus error",
111	"Address error",
112	"Illegal instruction",
113	"Zero divide",
114	"CHK instruction",
115	"TRAPV instruction",
116	"Privilege violation",
117	"Trace trap",
118	"MMU fault",
119	"SSIR trap",
120	"Format error",
121	"68881 exception",
122	"Coprocessor violation",
123	"Async system trap"
124};
125int	trap_types = sizeof trap_type / sizeof trap_type[0];
126
127/*
128 * Size of various exception stack frames (minus the standard 8 bytes)
129 */
130short	exframesize[] = {
131	FMT0SIZE,	/* type 0 - normal (68020/030/040) */
132	FMT1SIZE,	/* type 1 - throwaway (68020/030/040) */
133	FMT2SIZE,	/* type 2 - normal 6-word (68020/030/040) */
134	FMT3SIZE,	/* type 3 - FP post-instruction (68040) */
135	-1, -1, -1,	/* type 4-6 - undefined */
136	FMT7SIZE,	/* type 7 - access error (68040) */
137	58,		/* type 8 - bus fault (68010) */
138	FMT9SIZE,	/* type 9 - coprocessor mid-instruction (68020/030) */
139	FMTASIZE,	/* type A - short bus fault (68020/030) */
140	FMTBSIZE,	/* type B - long bus fault (68020/030) */
141	-1, -1, -1, -1	/* type C-F - undefined */
142};
143
144#ifdef M68040
145#define KDFAULT(c)    (mmutype == MMU_68040 ? \
146			    ((c) & SSW4_TMMASK) == SSW4_TMKD : \
147			    ((c) & (SSW_DF|FC_SUPERD)) == (SSW_DF|FC_SUPERD))
148#define WRFAULT(c)    (mmutype == MMU_68040 ? \
149			    ((c) & SSW4_RW) == 0 : \
150			    ((c) & (SSW_DF|SSW_RW)) == SSW_DF)
151#else
152#define KDFAULT(c)	(((c) & (SSW_DF|SSW_FCMASK)) == (SSW_DF|FC_SUPERD))
153#define WRFAULT(c)	(((c) & (SSW_DF|SSW_RW)) == SSW_DF)
154#endif
155
156#ifdef DEBUG
157int mmudebug = 0;
158int mmupid = -1;
159#define MDB_FOLLOW	1
160#define MDB_WBFOLLOW	2
161#define MDB_WBFAILED	4
162#define MDB_ISPID(p)	(p) == mmupid
163#endif
164
165#define NSIR	32
166void (*sir_routines[NSIR])();
167void *sir_args[NSIR];
168int next_sir;
169
170/*
171 * trap and syscall both need the following work done before returning
172 * to user mode.
173 */
174static inline void
175userret(p, fp, oticks, faultaddr, fromtrap)
176	struct proc *p;
177	struct frame *fp;
178	u_quad_t oticks;
179	u_int faultaddr;
180	int fromtrap;
181{
182	int sig, s;
183#ifdef M68040
184	int beenhere = 0;
185
186again:
187#endif
188	/* take pending signals */
189	while ((sig = CURSIG(p)) != 0)
190		postsig(sig);
191	p->p_priority = p->p_usrpri;
192	if (want_resched) {
193		/*
194		 * Since we are curproc, clock will normally just change
195		 * our priority without moving us from one queue to another
196		 * (since the running process is not on a queue.)
197		 * If that happened after we put ourselves on the run queue
198		 * but before we mi_switch()'ed, we might not be on the queue
199		 * indicated by our priority.
200		 */
201		s = splstatclock();
202		setrunqueue(p);
203		p->p_stats->p_ru.ru_nivcsw++;
204		mi_switch();
205		splx(s);
206		while ((sig = CURSIG(p)) != 0)
207			postsig(sig);
208	}
209
210	/*
211	 * If profiling, charge system time to the trapped pc.
212	 */
213	if (p->p_flag & P_PROFIL) {
214		extern int psratio;
215
216		addupc_task(p, fp->f_pc,
217			    (int)(p->p_sticks - oticks) * psratio);
218	}
219#ifdef M68040
220	/*
221	 * Deal with user mode writebacks (from trap, or from sigreturn).
222	 * If any writeback fails, go back and attempt signal delivery.
223	 * unless we have already been here and attempted the writeback
224	 * (e.g. bad address with user ignoring SIGSEGV).  In that case
225	 * we just return to the user without sucessfully completing
226	 * the writebacks.  Maybe we should just drop the sucker?
227	 */
228	if (mmutype == MMU_68040 && fp->f_format == FMT7) {
229		if (beenhere) {
230#ifdef DEBUG
231			if (mmudebug & MDB_WBFAILED)
232				printf(fromtrap ?
233		"pid %d(%s): writeback aborted, pc=%x, fa=%x\n" :
234		"pid %d(%s): writeback aborted in sigreturn, pc=%x\n",
235				    p->p_pid, p->p_comm, fp->f_pc, faultaddr);
236#endif
237		} else if (sig = writeback(fp, fromtrap)) {
238			beenhere = 1;
239			oticks = p->p_sticks;
240			trapsignal(p, sig, faultaddr);
241			goto again;
242		}
243	}
244#endif
245	curpriority = p->p_priority;
246}
247
248/*
249 * Trap is called from locore to handle most types of processor traps,
250 * including events such as simulated software interrupts/AST's.
251 * System calls are broken out for efficiency.
252 */
253/*ARGSUSED*/
254trap(type, code, v, frame)
255	int type;
256	unsigned code;
257	unsigned v;
258	struct frame frame;
259{
260	extern char fubail[], subail[];
261#ifdef DDB
262	extern char trap0[], trap1[], trap2[], trap12[], trap15[], illinst[];
263#endif
264	struct proc *p;
265	register int i, tmp;
266	u_int ucode;
267	u_quad_t sticks;
268#ifdef COMPAT_HPUX
269	extern struct emul emul_hpux;
270#endif
271	int bit;
272
273	uvmexp.traps++;
274	p = curproc;
275	ucode = 0;
276	if (USERMODE(frame.f_sr)) {
277		type |= T_USER;
278		sticks = p->p_sticks;
279		p->p_md.md_regs = frame.f_regs;
280	}
281	switch (type) {
282
283	default:
284	dopanic:
285		printf("trap type %d, code = %x, v = %x\n", type, code, v);
286		/*
287		 * Let the kernel debugger see the trap frame that
288		 * caused us to panic.  This is a convenience so
289		 * one can see registers at the point of failure.
290		 */
291		tmp = splhigh();
292#ifdef KGDB
293		/* If connected, step or cont returns 1 */
294		if (kgdb_trap(type, (struct trapframe *)&frame))
295			goto kgdb_cont;
296#endif
297#ifdef	DDB
298		if (kdb_trap(type, &frame))
299			return;
300#endif
301#ifdef KGDB
302	kgdb_cont:
303#endif
304		splx(tmp);
305		if (panicstr) {
306			/*
307			 * Note: panic is smart enough to do:
308			 *   boot(RB_AUTOBOOT | RB_NOSYNC, NULL)
309			 * if we call it again.
310			 */
311			panic("trap during panic!");
312		}
313
314		regdump((struct trapframe *)&frame, 128);
315		type &= ~T_USER;
316		if ((unsigned)type < trap_types)
317			panic(trap_type[type]);
318		panic("trap");
319
320	case T_BUSERR:		/* kernel bus error */
321		if (!p->p_addr->u_pcb.pcb_onfault)
322			goto dopanic;
323		/*
324		 * If we have arranged to catch this fault in any of the
325		 * copy to/from user space routines, set PC to return to
326		 * indicated location and set flag informing buserror code
327		 * that it may need to clean up stack frame.
328		 */
329copyfault:
330		frame.f_stackadj = exframesize[frame.f_format];
331		frame.f_format = frame.f_vector = 0;
332		frame.f_pc = (int) p->p_addr->u_pcb.pcb_onfault;
333		return;
334
335	case T_BUSERR|T_USER:	/* bus error */
336	case T_ADDRERR|T_USER:	/* address error */
337		ucode = v;
338		i = SIGBUS;
339		break;
340
341	case T_COPERR:		/* kernel coprocessor violation */
342	case T_FMTERR|T_USER:	/* do all RTE errors come in as T_USER? */
343	case T_FMTERR:		/* ...just in case... */
344	/*
345	 * The user has most likely trashed the RTE or FP state info
346	 * in the stack frame of a signal handler.
347	 */
348		printf("pid %d: kernel %s exception\n", p->p_pid,
349		       type==T_COPERR ? "coprocessor" : "format");
350		type |= T_USER;
351		p->p_sigacts->ps_sigact[SIGILL].sa_handler = SIG_DFL;
352		sigdelset(&p->p_sigignore, SIGILL);
353		sigdelset(&p->p_sigcatch, SIGILL);
354		sigdelset(&p->p_sigmask, SIGILL);
355		i = SIGILL;
356		ucode = frame.f_format;	/* XXX was ILL_RESAD_FAULT */
357		break;
358
359	case T_COPERR|T_USER:	/* user coprocessor violation */
360	/* What is a proper response here? */
361		ucode = 0;
362		i = SIGFPE;
363		break;
364
365	case T_FPERR|T_USER:	/* 68881 exceptions */
366	/*
367	 * We pass along the 68881 status register which locore stashed
368	 * in code for us.  Note that there is a possibility that the
369	 * bit pattern of this register will conflict with one of the
370	 * FPE_* codes defined in signal.h.  Fortunately for us, the
371	 * only such codes we use are all in the range 1-7 and the low
372	 * 3 bits of the status register are defined as 0 so there is
373	 * no clash.
374	 */
375		ucode = code;
376		i = SIGFPE;
377		break;
378
379#ifdef M68040
380	case T_FPEMULI|T_USER:	/* unimplemented FP instuction */
381	case T_FPEMULD|T_USER:	/* unimplemented FP data type */
382		/* XXX need to FSAVE */
383		printf("pid %d(%s): unimplemented FP %s at %x (EA %x)\n",
384		       p->p_pid, p->p_comm,
385		       frame.f_format == 2 ? "instruction" : "data type",
386		       frame.f_pc, frame.f_fmt2.f_iaddr);
387		/* XXX need to FRESTORE */
388		i = SIGFPE;
389		break;
390#endif
391
392	case T_ILLINST|T_USER:	/* illegal instruction fault */
393#ifdef COMPAT_HPUX
394		if (p->p_emul == &emul_hpux) {
395			ucode = HPUX_ILL_ILLINST_TRAP;
396			i = SIGILL;
397			break;
398		}
399		/* fall through */
400#endif
401	case T_PRIVINST|T_USER:	/* privileged instruction fault */
402#ifdef COMPAT_HPUX
403		if (p->p_emul == &emul_hpux)
404			ucode = HPUX_ILL_PRIV_TRAP;
405		else
406#endif
407		ucode = frame.f_format;	/* XXX was ILL_PRIVIN_FAULT */
408		i = SIGILL;
409		break;
410
411	case T_ZERODIV|T_USER:	/* Divide by zero */
412#ifdef COMPAT_HPUX
413		if (p->p_emul == &emul_hpux)
414			ucode = HPUX_FPE_INTDIV_TRAP;
415		else
416#endif
417		ucode = frame.f_format;	/* XXX was FPE_INTDIV_TRAP */
418		i = SIGFPE;
419		break;
420
421	case T_CHKINST|T_USER:	/* CHK instruction trap */
422#ifdef COMPAT_HPUX
423		if (p->p_emul == &emul_hpux) {
424			/* handled differently under hp-ux */
425			i = SIGILL;
426			ucode = HPUX_ILL_CHK_TRAP;
427			break;
428		}
429#endif
430		ucode = frame.f_format;	/* XXX was FPE_SUBRNG_TRAP */
431		i = SIGFPE;
432		break;
433
434	case T_TRAPVINST|T_USER:	/* TRAPV instruction trap */
435#ifdef COMPAT_HPUX
436		if (p->p_emul == &emul_hpux) {
437			/* handled differently under hp-ux */
438			i = SIGILL;
439			ucode = HPUX_ILL_TRAPV_TRAP;
440			break;
441		}
442#endif
443		ucode = frame.f_format;	/* XXX was FPE_INTOVF_TRAP */
444		i = SIGFPE;
445		break;
446
447	/*
448	 * XXX: Trace traps are a nightmare.
449	 *
450	 *	HP-UX uses trap #1 for breakpoints,
451	 *	HPBSD uses trap #2,
452	 *	SUN 3.x uses trap #15,
453	 *	KGDB uses trap #15 (for kernel breakpoints; handled elsewhere).
454	 *
455	 * HPBSD and HP-UX traps both get mapped by locore.s into T_TRACE.
456	 * SUN 3.x traps get passed through as T_TRAP15 and are not really
457	 * supported yet.
458	 */
459	case T_TRACE:		/* kernel trace trap */
460	case T_TRAP15:		/* SUN trace trap */
461#ifdef DDB
462		if (type == T_TRAP15 ||
463		    ((caddr_t)frame.f_pc != trap0 &&
464		     (caddr_t)frame.f_pc != trap1 &&
465		     (caddr_t)frame.f_pc != trap2 &&
466		     (caddr_t)frame.f_pc != trap12 &&
467		     (caddr_t)frame.f_pc != trap15 &&
468		     (caddr_t)frame.f_pc != illinst)) {
469			if (kdb_trap(type, &frame))
470				return;
471		}
472#endif
473		frame.f_sr &= ~PSL_T;
474		i = SIGTRAP;
475		break;
476
477	case T_TRACE|T_USER:	/* user trace trap */
478	case T_TRAP15|T_USER:	/* SUN user trace trap */
479#ifdef COMPAT_SUNOS
480		/*
481		 * SunOS uses Trap #2 for a "CPU cache flush".
482		 * Just flush the on-chip caches and return.
483		 */
484		if (p->p_emul == &emul_sunos) {
485			ICIA();
486			DCIU();
487			return;
488		}
489#endif COMPAT_SUNOS
490		frame.f_sr &= ~PSL_T;
491		i = SIGTRAP;
492		break;
493
494	case T_ASTFLT:		/* system async trap, cannot happen */
495		goto dopanic;
496
497	case T_ASTFLT|T_USER:	/* user async trap */
498		astpending = 0;
499		/*
500		 * We check for software interrupts first.  This is because
501		 * they are at a higher level than ASTs, and on a VAX would
502		 * interrupt the AST.  We assume that if we are processing
503		 * an AST that we must be at IPL0 so we don't bother to
504		 * check.  Note that we ensure that we are at least at SIR
505		 * IPL while processing the SIR.
506		 */
507		spl1();
508		/* fall into... */
509
510	case T_SSIR:		/* software interrupt */
511	case T_SSIR|T_USER:
512		while (bit = ffs(ssir)) {
513			--bit;
514			ssir &= ~(1 << bit);
515			uvmexp.softs++;
516			if (sir_routines[bit])
517				sir_routines[bit](sir_args[bit]);
518		}
519
520		/*
521		 * If this was not an AST trap, we are all done.
522		 */
523		if (type != (T_ASTFLT|T_USER)) {
524			uvmexp.traps++;
525			return;
526		}
527		spl0();
528		if (p->p_flag & P_OWEUPC) {
529			p->p_flag &= ~P_OWEUPC;
530			ADDUPROF(p);
531		}
532		goto out;
533
534	case T_MMUFLT:		/* kernel mode page fault */
535#if 0
536#ifdef	DDB
537		if (db_recover != 0)
538			goto dopanic;
539#endif
540#ifdef	KGDB
541		if (kgdb_recover != 0)
542			goto dopanic;
543#endif
544#endif
545		/*
546		 * If we were doing profiling ticks or other user mode
547		 * stuff from interrupt code, Just Say No.
548		 */
549		if (p->p_addr->u_pcb.pcb_onfault == fubail ||
550		    p->p_addr->u_pcb.pcb_onfault == subail)
551			goto copyfault;
552		/* fall into ... */
553
554	case T_MMUFLT|T_USER:	/* page fault */
555	    {
556		vaddr_t va;
557		struct vmspace *vm = p->p_vmspace;
558		vm_map_t map;
559		int rv;
560		vm_prot_t ftype;
561		extern vm_map_t kernel_map;
562
563#ifdef DEBUG
564		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
565		printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n",
566		       p->p_pid, code, v, frame.f_pc, frame.f_sr);
567#endif
568		/*
569		 * It is only a kernel address space fault iff:
570		 * 	1. (type & T_USER) == 0  and
571		 * 	2. pcb_onfault not set or
572		 *	3. pcb_onfault set but supervisor space data fault
573		 * The last can occur during an exec() copyin where the
574		 * argument space is lazy-allocated.
575		 */
576		if (type == T_MMUFLT &&
577		    (!p->p_addr->u_pcb.pcb_onfault || KDFAULT(code)))
578			map = kernel_map;
579		else
580			map = &vm->vm_map;
581		if (WRFAULT(code))
582			ftype = VM_PROT_READ | VM_PROT_WRITE;
583		else
584			ftype = VM_PROT_READ;
585		va = trunc_page((vaddr_t)v);
586#ifdef DEBUG
587		if (map == kernel_map && va == 0) {
588			printf("trap: bad kernel access at %x\n", v);
589			goto dopanic;
590		}
591#endif
592#ifdef COMPAT_HPUX
593		if (ISHPMMADDR(va)) {
594			vaddr_t bva;
595
596			rv = pmap_mapmulti(map->pmap, va);
597			if (rv != KERN_SUCCESS) {
598				bva = HPMMBASEADDR(va);
599				rv = uvm_fault(map, bva, 0, ftype);
600				if (rv == KERN_SUCCESS)
601					(void) pmap_mapmulti(map->pmap, va);
602			}
603		} else
604#endif
605		rv = uvm_fault(map, va, 0, ftype);
606#ifdef DEBUG
607		if (rv && MDB_ISPID(p->p_pid))
608			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
609			       map, va, ftype, rv);
610#endif
611		/*
612		 * If this was a stack access we keep track of the maximum
613		 * accessed stack size.  Also, if vm_fault gets a protection
614		 * failure it is due to accessing the stack region outside
615		 * the current limit and we need to reflect that as an access
616		 * error.
617		 */
618		if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map) {
619			if (rv == KERN_SUCCESS) {
620				unsigned nss;
621
622				nss = clrnd(btoc(USRSTACK-(unsigned)va));
623				if (nss > vm->vm_ssize)
624					vm->vm_ssize = nss;
625			} else if (rv == KERN_PROTECTION_FAILURE)
626				rv = KERN_INVALID_ADDRESS;
627		}
628		if (rv == KERN_SUCCESS) {
629			if (type == T_MMUFLT) {
630#if defined(M68040)
631				if (mmutype == MMU_68040)
632					(void) writeback(&frame, 1);
633#endif
634				return;
635			}
636			goto out;
637		}
638		if (type == T_MMUFLT) {
639			if (p->p_addr->u_pcb.pcb_onfault)
640				goto copyfault;
641			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
642			       map, va, ftype, rv);
643			printf("  type %x, code [mmu,,ssw]: %x\n",
644			       type, code);
645			goto dopanic;
646		}
647		ucode = v;
648		if (rv == KERN_RESOURCE_SHORTAGE) {
649			printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
650			       p->p_pid, p->p_comm,
651			       p->p_cred && p->p_ucred ?
652			       p->p_ucred->cr_uid : -1);
653			i = SIGKILL;
654		} else {
655			i = SIGSEGV;
656		}
657		break;
658	    }
659	}
660	trapsignal(p, i, ucode);
661	if ((type & T_USER) == 0)
662		return;
663out:
664	userret(p, &frame, sticks, v, 1);
665}
666
667#ifdef M68040
668#ifdef DEBUG
669struct writebackstats {
670	int calls;
671	int cpushes;
672	int move16s;
673	int wb1s, wb2s, wb3s;
674	int wbsize[4];
675} wbstats;
676
677char *f7sz[] = { "longword", "byte", "word", "line" };
678char *f7tt[] = { "normal", "MOVE16", "AFC", "ACK" };
679char *f7tm[] = { "d-push", "u-data", "u-code", "M-data",
680		 "M-code", "k-data", "k-code", "RES" };
681char wberrstr[] =
682	"WARNING: pid %d(%s) writeback [%s] failed, pc=%x fa=%x wba=%x wbd=%x\n";
683#endif
684
685writeback(fp, docachepush)
686	struct frame *fp;
687	int docachepush;
688{
689	struct fmt7 *f = &fp->f_fmt7;
690	struct proc *p = curproc;
691	int err = 0;
692	u_int fa;
693	caddr_t oonfault = p->p_addr->u_pcb.pcb_onfault;
694
695#ifdef DEBUG
696	if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
697		printf(" pid=%d, fa=%x,", p->p_pid, f->f_fa);
698		dumpssw(f->f_ssw);
699	}
700	wbstats.calls++;
701#endif
702	/*
703	 * Deal with special cases first.
704	 */
705	if ((f->f_ssw & SSW4_TMMASK) == SSW4_TMDCP) {
706		/*
707		 * Dcache push fault.
708		 * Line-align the address and write out the push data to
709		 * the indicated physical address.
710		 */
711#ifdef DEBUG
712		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
713			printf(" pushing %s to PA %x, data %x",
714			       f7sz[(f->f_ssw & SSW4_SZMASK) >> 5],
715			       f->f_fa, f->f_pd0);
716			if ((f->f_ssw & SSW4_SZMASK) == SSW4_SZLN)
717				printf("/%x/%x/%x",
718				       f->f_pd1, f->f_pd2, f->f_pd3);
719			printf("\n");
720		}
721		if (f->f_wb1s & SSW4_WBSV)
722			panic("writeback: cache push with WB1S valid");
723		wbstats.cpushes++;
724#endif
725		/*
726		 * XXX there are security problems if we attempt to do a
727		 * cache push after a signal handler has been called.
728		 */
729		if (docachepush) {
730			pmap_enter(pmap_kernel(), (vaddr_t)vmmap,
731			    trunc_page(f->f_fa), VM_PROT_WRITE, TRUE,
732			    VM_PROT_WRITE);
733			fa = (u_int)&vmmap[(f->f_fa & PGOFSET) & ~0xF];
734			bcopy((caddr_t)&f->f_pd0, (caddr_t)fa, 16);
735			DCFL(pmap_extract(pmap_kernel(), (vaddr_t)fa));
736			pmap_remove(pmap_kernel(), (vaddr_t)vmmap,
737				    (vaddr_t)&vmmap[NBPG]);
738		} else
739			printf("WARNING: pid %d(%s) uid %d: CPUSH not done\n",
740			       p->p_pid, p->p_comm, p->p_ucred->cr_uid);
741	} else if ((f->f_ssw & (SSW4_RW|SSW4_TTMASK)) == SSW4_TTM16) {
742		/*
743		 * MOVE16 fault.
744		 * Line-align the address and write out the push data to
745		 * the indicated virtual address.
746		 */
747#ifdef DEBUG
748		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
749			printf(" MOVE16 to VA %x(%x), data %x/%x/%x/%x\n",
750			       f->f_fa, f->f_fa & ~0xF, f->f_pd0, f->f_pd1,
751			       f->f_pd2, f->f_pd3);
752		if (f->f_wb1s & SSW4_WBSV)
753			panic("writeback: MOVE16 with WB1S valid");
754		wbstats.move16s++;
755#endif
756		if (KDFAULT(f->f_wb1s))
757			bcopy((caddr_t)&f->f_pd0, (caddr_t)(f->f_fa & ~0xF), 16);
758		else
759			err = suline((caddr_t)(f->f_fa & ~0xF), (caddr_t)&f->f_pd0);
760		if (err) {
761			fa = f->f_fa & ~0xF;
762#ifdef DEBUG
763			if (mmudebug & MDB_WBFAILED)
764				printf(wberrstr, p->p_pid, p->p_comm,
765				       "MOVE16", fp->f_pc, f->f_fa,
766				       f->f_fa & ~0xF, f->f_pd0);
767#endif
768		}
769	} else if (f->f_wb1s & SSW4_WBSV) {
770		/*
771		 * Writeback #1.
772		 * Position the "memory-aligned" data and write it out.
773		 */
774		u_int wb1d = f->f_wb1d;
775		int off;
776
777#ifdef DEBUG
778		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
779			dumpwb(1, f->f_wb1s, f->f_wb1a, f->f_wb1d);
780		wbstats.wb1s++;
781		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
782#endif
783		off = (f->f_wb1a & 3) * 8;
784		switch (f->f_wb1s & SSW4_SZMASK) {
785		case SSW4_SZLW:
786			if (off)
787				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
788			if (KDFAULT(f->f_wb1s))
789				*(long *)f->f_wb1a = wb1d;
790			else
791				err = suword((caddr_t)f->f_wb1a, wb1d);
792			break;
793		case SSW4_SZB:
794			off = 24 - off;
795			if (off)
796				wb1d >>= off;
797			if (KDFAULT(f->f_wb1s))
798				*(char *)f->f_wb1a = wb1d;
799			else
800				err = subyte((caddr_t)f->f_wb1a, wb1d);
801			break;
802		case SSW4_SZW:
803			off = (off + 16) % 32;
804			if (off)
805				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
806			if (KDFAULT(f->f_wb1s))
807				*(short *)f->f_wb1a = wb1d;
808			else
809				err = susword((caddr_t)f->f_wb1a, wb1d);
810			break;
811		}
812		if (err) {
813			fa = f->f_wb1a;
814#ifdef DEBUG
815			if (mmudebug & MDB_WBFAILED)
816				printf(wberrstr, p->p_pid, p->p_comm,
817				       "#1", fp->f_pc, f->f_fa,
818				       f->f_wb1a, f->f_wb1d);
819#endif
820		}
821	}
822	/*
823	 * Deal with the "normal" writebacks.
824	 *
825	 * XXX writeback2 is known to reflect a LINE size writeback after
826	 * a MOVE16 was already dealt with above.  Ignore it.
827	 */
828	if (err == 0 && (f->f_wb2s & SSW4_WBSV) &&
829	    (f->f_wb2s & SSW4_SZMASK) != SSW4_SZLN) {
830#ifdef DEBUG
831		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
832			dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
833		wbstats.wb2s++;
834		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
835#endif
836		switch (f->f_wb2s & SSW4_SZMASK) {
837		case SSW4_SZLW:
838			if (KDFAULT(f->f_wb2s))
839				*(long *)f->f_wb2a = f->f_wb2d;
840			else
841				err = suword((caddr_t)f->f_wb2a, f->f_wb2d);
842			break;
843		case SSW4_SZB:
844			if (KDFAULT(f->f_wb2s))
845				*(char *)f->f_wb2a = f->f_wb2d;
846			else
847				err = subyte((caddr_t)f->f_wb2a, f->f_wb2d);
848			break;
849		case SSW4_SZW:
850			if (KDFAULT(f->f_wb2s))
851				*(short *)f->f_wb2a = f->f_wb2d;
852			else
853				err = susword((caddr_t)f->f_wb2a, f->f_wb2d);
854			break;
855		}
856		if (err) {
857			fa = f->f_wb2a;
858#ifdef DEBUG
859			if (mmudebug & MDB_WBFAILED) {
860				printf(wberrstr, p->p_pid, p->p_comm,
861				       "#2", fp->f_pc, f->f_fa,
862				       f->f_wb2a, f->f_wb2d);
863				dumpssw(f->f_ssw);
864				dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
865			}
866#endif
867		}
868	}
869	if (err == 0 && (f->f_wb3s & SSW4_WBSV)) {
870#ifdef DEBUG
871		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
872			dumpwb(3, f->f_wb3s, f->f_wb3a, f->f_wb3d);
873		wbstats.wb3s++;
874		wbstats.wbsize[(f->f_wb3s&SSW4_SZMASK)>>5]++;
875#endif
876		switch (f->f_wb3s & SSW4_SZMASK) {
877		case SSW4_SZLW:
878			if (KDFAULT(f->f_wb3s))
879				*(long *)f->f_wb3a = f->f_wb3d;
880			else
881				err = suword((caddr_t)f->f_wb3a, f->f_wb3d);
882			break;
883		case SSW4_SZB:
884			if (KDFAULT(f->f_wb3s))
885				*(char *)f->f_wb3a = f->f_wb3d;
886			else
887				err = subyte((caddr_t)f->f_wb3a, f->f_wb3d);
888			break;
889		case SSW4_SZW:
890			if (KDFAULT(f->f_wb3s))
891				*(short *)f->f_wb3a = f->f_wb3d;
892			else
893				err = susword((caddr_t)f->f_wb3a, f->f_wb3d);
894			break;
895#ifdef DEBUG
896		case SSW4_SZLN:
897			panic("writeback: wb3s indicates LINE write");
898#endif
899		}
900		if (err) {
901			fa = f->f_wb3a;
902#ifdef DEBUG
903			if (mmudebug & MDB_WBFAILED)
904				printf(wberrstr, p->p_pid, p->p_comm,
905				       "#3", fp->f_pc, f->f_fa,
906				       f->f_wb3a, f->f_wb3d);
907#endif
908		}
909	}
910	p->p_addr->u_pcb.pcb_onfault = oonfault;
911	if (err)
912		err = SIGSEGV;
913	return(err);
914}
915
916#ifdef DEBUG
917dumpssw(ssw)
918	u_short ssw;
919{
920	printf(" SSW: %x: ", ssw);
921	if (ssw & SSW4_CP)
922		printf("CP,");
923	if (ssw & SSW4_CU)
924		printf("CU,");
925	if (ssw & SSW4_CT)
926		printf("CT,");
927	if (ssw & SSW4_CM)
928		printf("CM,");
929	if (ssw & SSW4_MA)
930		printf("MA,");
931	if (ssw & SSW4_ATC)
932		printf("ATC,");
933	if (ssw & SSW4_LK)
934		printf("LK,");
935	if (ssw & SSW4_RW)
936		printf("RW,");
937	printf(" SZ=%s, TT=%s, TM=%s\n",
938	       f7sz[(ssw & SSW4_SZMASK) >> 5],
939	       f7tt[(ssw & SSW4_TTMASK) >> 3],
940	       f7tm[ssw & SSW4_TMMASK]);
941}
942
943dumpwb(num, s, a, d)
944	int num;
945	u_short s;
946	u_int a, d;
947{
948	struct proc *p = curproc;
949	paddr_t pa;
950
951	printf(" writeback #%d: VA %x, data %x, SZ=%s, TT=%s, TM=%s\n",
952	       num, a, d, f7sz[(s & SSW4_SZMASK) >> 5],
953	       f7tt[(s & SSW4_TTMASK) >> 3], f7tm[s & SSW4_TMMASK]);
954	printf("	       PA ");
955	pa = pmap_extract(p->p_vmspace->vm_map.pmap, (vaddr_t)a);
956	if (pa == 0)
957		printf("<invalid address>");
958	else
959		printf("%x, current value %x", pa, fuword((caddr_t)a));
960	printf("\n");
961}
962#endif
963#endif
964
965/*
966 * Process a system call.
967 */
968syscall(code, frame)
969	int code;
970	struct frame frame;
971{
972	caddr_t params;
973	struct sysent *callp;
974	struct proc *p;
975	int error, opc, nsys;
976	size_t argsize;
977	int args[8], rval[2];
978	u_quad_t sticks;
979
980	uvmexp.syscalls++;
981	if (!USERMODE(frame.f_sr))
982		panic("syscall");
983	p = curproc;
984	sticks = p->p_sticks;
985	p->p_md.md_regs = frame.f_regs;
986	opc = frame.f_pc;
987
988	nsys = p->p_emul->e_nsysent;
989	callp = p->p_emul->e_sysent;
990
991#ifdef COMPAT_SUNOS
992	if (p->p_emul == &emul_sunos) {
993		/*
994		 * SunOS passes the syscall-number on the stack, whereas
995		 * BSD passes it in D0. So, we have to get the real "code"
996		 * from the stack, and clean up the stack, as SunOS glue
997		 * code assumes the kernel pops the syscall argument the
998		 * glue pushed on the stack. Sigh...
999		 */
1000		code = fuword((caddr_t)frame.f_regs[SP]);
1001
1002		/*
1003		 * XXX
1004		 * Don't do this for sunos_sigreturn, as there's no stored pc
1005		 * on the stack to skip, the argument follows the syscall
1006		 * number without a gap.
1007		 */
1008		if (code != SUNOS_SYS_sigreturn) {
1009			frame.f_regs[SP] += sizeof (int);
1010			/*
1011			 * remember that we adjusted the SP,
1012			 * might have to undo this if the system call
1013			 * returns ERESTART.
1014			 */
1015			p->p_md.md_flags |= MDP_STACKADJ;
1016		} else
1017			p->p_md.md_flags &= ~MDP_STACKADJ;
1018	}
1019#endif
1020
1021	params = (caddr_t)frame.f_regs[SP] + sizeof(int);
1022
1023	switch (code) {
1024	case SYS_syscall:
1025		/*
1026		 * Code is first argument, followed by actual args.
1027		 */
1028		code = fuword(params);
1029		params += sizeof(int);
1030		/*
1031		 * XXX sigreturn requires special stack manipulation
1032		 * that is only done if entered via the sigreturn
1033		 * trap.  Cannot allow it here so make sure we fail.
1034		 */
1035		switch (code) {
1036#ifdef COMPAT_13
1037		case SYS_compat_13_sigreturn13:
1038#endif
1039		case SYS___sigreturn14:
1040			code = nsys;
1041			break;
1042		}
1043		break;
1044	case SYS___syscall:
1045		/*
1046		 * Like syscall, but code is a quad, so as to maintain
1047		 * quad alignment for the rest of the arguments.
1048		 */
1049		if (callp != sysent)
1050			break;
1051		code = fuword(params + _QUAD_LOWWORD * sizeof(int));
1052		params += sizeof(quad_t);
1053		break;
1054	default:
1055		break;
1056	}
1057	if (code < 0 || code >= nsys)
1058		callp += p->p_emul->e_nosys;		/* illegal */
1059	else
1060		callp += code;
1061	argsize = callp->sy_argsize;
1062#ifdef COMPAT_LINUX
1063	if (0
1064# ifdef EXEC_AOUT
1065	    || p->p_emul == &emul_linux_aout
1066# endif
1067# ifdef EXEC_ELF32
1068	    || p->p_emul == &emul_linux_elf32
1069# endif
1070	     ) {
1071		/*
1072		 * Linux passes the args in d1-d5
1073		 */
1074		switch (argsize) {
1075		case 20:
1076			args[4] = frame.f_regs[D5];
1077		case 16:
1078			args[3] = frame.f_regs[D4];
1079		case 12:
1080			args[2] = frame.f_regs[D3];
1081		case 8:
1082			args[1] = frame.f_regs[D2];
1083		case 4:
1084			args[0] = frame.f_regs[D1];
1085		case 0:
1086			error = 0;
1087			break;
1088		default:
1089#ifdef DEBUG
1090			panic("linux syscall %d weird argsize %d",
1091				code, argsize);
1092#else
1093			error = EINVAL;
1094#endif
1095			break;
1096		}
1097	} else
1098#endif
1099	if (argsize)
1100		error = copyin(params, (caddr_t)args, argsize);
1101	else
1102		error = 0;
1103#ifdef SYSCALL_DEBUG
1104	scdebug_call(p, code, args);
1105#endif
1106#ifdef KTRACE
1107	if (KTRPOINT(p, KTR_SYSCALL))
1108		ktrsyscall(p->p_tracep, code, argsize, args);
1109#endif
1110	if (error)
1111		goto bad;
1112	rval[0] = 0;
1113	rval[1] = frame.f_regs[D1];
1114	error = (*callp->sy_call)(p, args, rval);
1115	switch (error) {
1116	case 0:
1117		frame.f_regs[D0] = rval[0];
1118		frame.f_regs[D1] = rval[1];
1119		frame.f_sr &= ~PSL_C;	/* carry bit */
1120		break;
1121	case ERESTART:
1122		/*
1123		 * We always enter through a `trap' instruction, which is 2
1124		 * bytes, so adjust the pc by that amount.
1125		 */
1126		frame.f_pc = opc - 2;
1127		break;
1128	case EJUSTRETURN:
1129		/* nothing to do */
1130		break;
1131	default:
1132	bad:
1133		if (p->p_emul->e_errno)
1134			error = p->p_emul->e_errno[error];
1135		frame.f_regs[D0] = error;
1136		frame.f_sr |= PSL_C;	/* carry bit */
1137		break;
1138	}
1139
1140#ifdef SYSCALL_DEBUG
1141	scdebug_ret(p, code, error, rval);
1142#endif
1143#ifdef COMPAT_SUNOS
1144	/* need new p-value for this */
1145	if (error == ERESTART && (p->p_md.md_flags & MDP_STACKADJ))
1146		frame.f_regs[SP] -= sizeof (int);
1147#endif
1148	userret(p, &frame, sticks, (u_int)0, 0);
1149#ifdef KTRACE
1150	if (KTRPOINT(p, KTR_SYSRET))
1151		ktrsysret(p->p_tracep, code, error, rval[0]);
1152#endif
1153}
1154
1155void
1156child_return(arg)
1157	void *arg;
1158{
1159	struct proc *p = arg;
1160	/* See cpu_fork() */
1161	struct frame *f = (struct frame *)p->p_md.md_regs;
1162
1163	f->f_regs[D0] = 0;
1164	f->f_sr &= ~PSL_C;
1165	f->f_format = FMT0;
1166
1167	userret(p, f, p->p_sticks, (u_int)0, 0);
1168#ifdef KTRACE
1169	if (KTRPOINT(p, KTR_SYSRET))
1170		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1171#endif
1172}
1173
1174/*
1175 * Allocation routines for software interrupts.
1176 */
1177u_long
1178allocate_sir(proc, arg)
1179	void (*proc)();
1180	void *arg;
1181{
1182	int bit;
1183
1184	if( next_sir >= NSIR )
1185		panic("allocate_sir: none left");
1186	bit = next_sir++;
1187	sir_routines[bit] = proc;
1188	sir_args[bit] = arg;
1189	return (1 << bit);
1190}
1191
1192void
1193init_sir()
1194{
1195	extern void netintr();
1196
1197	sir_routines[0] = netintr;
1198	sir_routines[1] = softclock;
1199	next_sir = 2;
1200}
1201