trap.c revision 1.15
1/*	$NetBSD: trap.c,v 1.15 1999/07/08 18:08:57 thorpej Exp $ */
2
3/*
4 * This file was taken from mvme68k/mvme68k/trap.c
5 * should probably be re-synced when needed.
6 * Darrin B. Jewell <jewell@mit.edu>  Tue Nov 10 05:07:16 1998
7 * original cvs id: NetBSD: trap.c,v 1.24 1998/10/01 02:53:54 thorpej Exp
8 */
9
10/*
11 * Copyright (c) 1988 University of Utah.
12 * Copyright (c) 1982, 1986, 1990, 1993
13 *	The Regents of the University of California.  All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * the Systems Programming Group of the University of Utah Computer
17 * Science Department.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 * 3. All advertising materials mentioning features or use of this software
28 *    must display the following acknowledgement:
29 *	This product includes software developed by the University of
30 *	California, Berkeley and its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 *    may be used to endorse or promote products derived from this software
33 *    without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: trap.c 1.37 92/12/20$
48 *
49 *	@(#)trap.c	8.5 (Berkeley) 1/4/94
50 */
51
52#include "opt_ddb.h"
53#include "opt_execfmt.h"
54#include "opt_ktrace.h"
55#include "opt_compat_netbsd.h"
56#include "opt_compat_sunos.h"
57#include "opt_compat_hpux.h"
58#include "opt_compat_linux.h"
59
60#include <sys/param.h>
61#include <sys/systm.h>
62#include <sys/proc.h>
63#include <sys/acct.h>
64#include <sys/kernel.h>
65#include <sys/signalvar.h>
66#include <sys/resourcevar.h>
67#include <sys/syscall.h>
68#include <sys/syslog.h>
69#include <sys/user.h>
70#ifdef KTRACE
71#include <sys/ktrace.h>
72#endif
73#ifdef KGDB
74#include <sys/kgdb.h>
75#endif
76
77#include <machine/psl.h>
78#include <machine/trap.h>
79#include <machine/cpu.h>
80#include <machine/reg.h>
81
82#include <vm/vm.h>
83#include <vm/pmap.h>
84
85#include <uvm/uvm_extern.h>
86
87#ifdef COMPAT_HPUX
88#include <compat/hpux/hpux.h>
89#endif
90
91#ifdef COMPAT_SUNOS
92#include <compat/sunos/sunos_syscall.h>
93extern struct emul emul_sunos;
94#endif
95
96#ifdef COMPAT_LINUX
97#ifdef EXEC_AOUT
98extern struct emul emul_linux_aout;
99#endif
100#ifdef EXEC_ELF32
101extern struct emul emul_linux_elf32;
102#endif
103#endif
104
105#include <m68k/cacheops.h>
106
107int	astpending;
108
109char	*trap_type[] = {
110	"Bus error",
111	"Address error",
112	"Illegal instruction",
113	"Zero divide",
114	"CHK instruction",
115	"TRAPV instruction",
116	"Privilege violation",
117	"Trace trap",
118	"MMU fault",
119	"SSIR trap",
120	"Format error",
121	"68881 exception",
122	"Coprocessor violation",
123	"Async system trap"
124};
125int	trap_types = sizeof trap_type / sizeof trap_type[0];
126
127/*
128 * Size of various exception stack frames (minus the standard 8 bytes)
129 */
130short	exframesize[] = {
131	FMT0SIZE,	/* type 0 - normal (68020/030/040) */
132	FMT1SIZE,	/* type 1 - throwaway (68020/030/040) */
133	FMT2SIZE,	/* type 2 - normal 6-word (68020/030/040) */
134	FMT3SIZE,	/* type 3 - FP post-instruction (68040) */
135	-1, -1, -1,	/* type 4-6 - undefined */
136	FMT7SIZE,	/* type 7 - access error (68040) */
137	58,		/* type 8 - bus fault (68010) */
138	FMT9SIZE,	/* type 9 - coprocessor mid-instruction (68020/030) */
139	FMTASIZE,	/* type A - short bus fault (68020/030) */
140	FMTBSIZE,	/* type B - long bus fault (68020/030) */
141	-1, -1, -1, -1	/* type C-F - undefined */
142};
143
144#ifdef M68040
145#define KDFAULT(c)    (mmutype == MMU_68040 ? \
146			    ((c) & SSW4_TMMASK) == SSW4_TMKD : \
147			    ((c) & (SSW_DF|FC_SUPERD)) == (SSW_DF|FC_SUPERD))
148#define WRFAULT(c)    (mmutype == MMU_68040 ? \
149			    ((c) & SSW4_RW) == 0 : \
150			    ((c) & (SSW_DF|SSW_RW)) == SSW_DF)
151#else
152#define KDFAULT(c)	(((c) & (SSW_DF|SSW_FCMASK)) == (SSW_DF|FC_SUPERD))
153#define WRFAULT(c)	(((c) & (SSW_DF|SSW_RW)) == SSW_DF)
154#endif
155
156#ifdef DEBUG
157int mmudebug = 0;
158int mmupid = -1;
159#define MDB_FOLLOW	1
160#define MDB_WBFOLLOW	2
161#define MDB_WBFAILED	4
162#define MDB_ISPID(p)	(p) == mmupid
163#endif
164
165#define NSIR	32
166void (*sir_routines[NSIR])();
167void *sir_args[NSIR];
168int next_sir;
169
170/*
171 * trap and syscall both need the following work done before returning
172 * to user mode.
173 */
174static inline void
175userret(p, fp, oticks, faultaddr, fromtrap)
176	struct proc *p;
177	struct frame *fp;
178	u_quad_t oticks;
179	u_int faultaddr;
180	int fromtrap;
181{
182	int sig, s;
183#ifdef M68040
184	int beenhere = 0;
185
186again:
187#endif
188	/* take pending signals */
189	while ((sig = CURSIG(p)) != 0)
190		postsig(sig);
191	p->p_priority = p->p_usrpri;
192	if (want_resched) {
193		/*
194		 * Since we are curproc, clock will normally just change
195		 * our priority without moving us from one queue to another
196		 * (since the running process is not on a queue.)
197		 * If that happened after we put ourselves on the run queue
198		 * but before we mi_switch()'ed, we might not be on the queue
199		 * indicated by our priority.
200		 */
201		s = splstatclock();
202		setrunqueue(p);
203		p->p_stats->p_ru.ru_nivcsw++;
204		mi_switch();
205		splx(s);
206		while ((sig = CURSIG(p)) != 0)
207			postsig(sig);
208	}
209
210	/*
211	 * If profiling, charge system time to the trapped pc.
212	 */
213	if (p->p_flag & P_PROFIL) {
214		extern int psratio;
215
216		addupc_task(p, fp->f_pc,
217			    (int)(p->p_sticks - oticks) * psratio);
218	}
219#ifdef M68040
220	/*
221	 * Deal with user mode writebacks (from trap, or from sigreturn).
222	 * If any writeback fails, go back and attempt signal delivery.
223	 * unless we have already been here and attempted the writeback
224	 * (e.g. bad address with user ignoring SIGSEGV).  In that case
225	 * we just return to the user without sucessfully completing
226	 * the writebacks.  Maybe we should just drop the sucker?
227	 */
228	if (mmutype == MMU_68040 && fp->f_format == FMT7) {
229		if (beenhere) {
230#ifdef DEBUG
231			if (mmudebug & MDB_WBFAILED)
232				printf(fromtrap ?
233		"pid %d(%s): writeback aborted, pc=%x, fa=%x\n" :
234		"pid %d(%s): writeback aborted in sigreturn, pc=%x\n",
235				    p->p_pid, p->p_comm, fp->f_pc, faultaddr);
236#endif
237		} else if (sig = writeback(fp, fromtrap)) {
238			beenhere = 1;
239			oticks = p->p_sticks;
240			trapsignal(p, sig, faultaddr);
241			goto again;
242		}
243	}
244#endif
245	curpriority = p->p_priority;
246}
247
248/*
249 * Trap is called from locore to handle most types of processor traps,
250 * including events such as simulated software interrupts/AST's.
251 * System calls are broken out for efficiency.
252 */
253/*ARGSUSED*/
254trap(type, code, v, frame)
255	int type;
256	unsigned code;
257	unsigned v;
258	struct frame frame;
259{
260	extern char fubail[], subail[];
261#ifdef DDB
262	extern char trap0[], trap1[], trap2[], trap12[], trap15[], illinst[];
263#endif
264	struct proc *p;
265	register int i, tmp;
266	u_int ucode;
267	u_quad_t sticks;
268#ifdef COMPAT_HPUX
269	extern struct emul emul_hpux;
270#endif
271	int bit;
272
273	uvmexp.traps++;
274	p = curproc;
275	ucode = 0;
276	if (USERMODE(frame.f_sr)) {
277		type |= T_USER;
278		sticks = p->p_sticks;
279		p->p_md.md_regs = frame.f_regs;
280	}
281	switch (type) {
282
283	default:
284	dopanic:
285		printf("trap type %d, code = %x, v = %x\n", type, code, v);
286		/*
287		 * Let the kernel debugger see the trap frame that
288		 * caused us to panic.  This is a convenience so
289		 * one can see registers at the point of failure.
290		 */
291		tmp = splhigh();
292#ifdef KGDB
293		/* If connected, step or cont returns 1 */
294		if (kgdb_trap(type, (struct trapframe *)&frame))
295			goto kgdb_cont;
296#endif
297#ifdef	DDB
298		if (kdb_trap(type, &frame))
299			return;
300#endif
301#ifdef KGDB
302	kgdb_cont:
303#endif
304		splx(tmp);
305		if (panicstr) {
306			/*
307			 * Note: panic is smart enough to do:
308			 *   boot(RB_AUTOBOOT | RB_NOSYNC, NULL)
309			 * if we call it again.
310			 */
311			panic("trap during panic!");
312		}
313
314		regdump((struct trapframe *)&frame, 128);
315		type &= ~T_USER;
316		if ((unsigned)type < trap_types)
317			panic(trap_type[type]);
318		panic("trap");
319
320	case T_BUSERR:		/* kernel bus error */
321		if (!p->p_addr->u_pcb.pcb_onfault)
322			goto dopanic;
323		/*
324		 * If we have arranged to catch this fault in any of the
325		 * copy to/from user space routines, set PC to return to
326		 * indicated location and set flag informing buserror code
327		 * that it may need to clean up stack frame.
328		 */
329copyfault:
330		frame.f_stackadj = exframesize[frame.f_format];
331		frame.f_format = frame.f_vector = 0;
332		frame.f_pc = (int) p->p_addr->u_pcb.pcb_onfault;
333		return;
334
335	case T_BUSERR|T_USER:	/* bus error */
336	case T_ADDRERR|T_USER:	/* address error */
337		ucode = v;
338		i = SIGBUS;
339		break;
340
341	case T_COPERR:		/* kernel coprocessor violation */
342	case T_FMTERR|T_USER:	/* do all RTE errors come in as T_USER? */
343	case T_FMTERR:		/* ...just in case... */
344	/*
345	 * The user has most likely trashed the RTE or FP state info
346	 * in the stack frame of a signal handler.
347	 */
348		printf("pid %d: kernel %s exception\n", p->p_pid,
349		       type==T_COPERR ? "coprocessor" : "format");
350		type |= T_USER;
351		p->p_sigacts->ps_sigact[SIGILL].sa_handler = SIG_DFL;
352		sigdelset(&p->p_sigignore, SIGILL);
353		sigdelset(&p->p_sigcatch, SIGILL);
354		sigdelset(&p->p_sigmask, SIGILL);
355		i = SIGILL;
356		ucode = frame.f_format;	/* XXX was ILL_RESAD_FAULT */
357		break;
358
359	case T_COPERR|T_USER:	/* user coprocessor violation */
360	/* What is a proper response here? */
361		ucode = 0;
362		i = SIGFPE;
363		break;
364
365	case T_FPERR|T_USER:	/* 68881 exceptions */
366	/*
367	 * We pass along the 68881 status register which locore stashed
368	 * in code for us.  Note that there is a possibility that the
369	 * bit pattern of this register will conflict with one of the
370	 * FPE_* codes defined in signal.h.  Fortunately for us, the
371	 * only such codes we use are all in the range 1-7 and the low
372	 * 3 bits of the status register are defined as 0 so there is
373	 * no clash.
374	 */
375		ucode = code;
376		i = SIGFPE;
377		break;
378
379#ifdef M68040
380	case T_FPEMULI|T_USER:	/* unimplemented FP instuction */
381	case T_FPEMULD|T_USER:	/* unimplemented FP data type */
382		/* XXX need to FSAVE */
383		printf("pid %d(%s): unimplemented FP %s at %x (EA %x)\n",
384		       p->p_pid, p->p_comm,
385		       frame.f_format == 2 ? "instruction" : "data type",
386		       frame.f_pc, frame.f_fmt2.f_iaddr);
387		/* XXX need to FRESTORE */
388		i = SIGFPE;
389		break;
390#endif
391
392	case T_ILLINST|T_USER:	/* illegal instruction fault */
393#ifdef COMPAT_HPUX
394		if (p->p_emul == &emul_hpux) {
395			ucode = HPUX_ILL_ILLINST_TRAP;
396			i = SIGILL;
397			break;
398		}
399		/* fall through */
400#endif
401	case T_PRIVINST|T_USER:	/* privileged instruction fault */
402#ifdef COMPAT_HPUX
403		if (p->p_emul == &emul_hpux)
404			ucode = HPUX_ILL_PRIV_TRAP;
405		else
406#endif
407		ucode = frame.f_format;	/* XXX was ILL_PRIVIN_FAULT */
408		i = SIGILL;
409		break;
410
411	case T_ZERODIV|T_USER:	/* Divide by zero */
412#ifdef COMPAT_HPUX
413		if (p->p_emul == &emul_hpux)
414			ucode = HPUX_FPE_INTDIV_TRAP;
415		else
416#endif
417		ucode = frame.f_format;	/* XXX was FPE_INTDIV_TRAP */
418		i = SIGFPE;
419		break;
420
421	case T_CHKINST|T_USER:	/* CHK instruction trap */
422#ifdef COMPAT_HPUX
423		if (p->p_emul == &emul_hpux) {
424			/* handled differently under hp-ux */
425			i = SIGILL;
426			ucode = HPUX_ILL_CHK_TRAP;
427			break;
428		}
429#endif
430		ucode = frame.f_format;	/* XXX was FPE_SUBRNG_TRAP */
431		i = SIGFPE;
432		break;
433
434	case T_TRAPVINST|T_USER:	/* TRAPV instruction trap */
435#ifdef COMPAT_HPUX
436		if (p->p_emul == &emul_hpux) {
437			/* handled differently under hp-ux */
438			i = SIGILL;
439			ucode = HPUX_ILL_TRAPV_TRAP;
440			break;
441		}
442#endif
443		ucode = frame.f_format;	/* XXX was FPE_INTOVF_TRAP */
444		i = SIGFPE;
445		break;
446
447	/*
448	 * XXX: Trace traps are a nightmare.
449	 *
450	 *	HP-UX uses trap #1 for breakpoints,
451	 *	HPBSD uses trap #2,
452	 *	SUN 3.x uses trap #15,
453	 *	KGDB uses trap #15 (for kernel breakpoints; handled elsewhere).
454	 *
455	 * HPBSD and HP-UX traps both get mapped by locore.s into T_TRACE.
456	 * SUN 3.x traps get passed through as T_TRAP15 and are not really
457	 * supported yet.
458	 */
459	case T_TRACE:		/* kernel trace trap */
460	case T_TRAP15:		/* SUN trace trap */
461#ifdef DDB
462		if (type == T_TRAP15 ||
463		    ((caddr_t)frame.f_pc != trap0 &&
464		     (caddr_t)frame.f_pc != trap1 &&
465		     (caddr_t)frame.f_pc != trap2 &&
466		     (caddr_t)frame.f_pc != trap12 &&
467		     (caddr_t)frame.f_pc != trap15 &&
468		     (caddr_t)frame.f_pc != illinst)) {
469			if (kdb_trap(type, &frame))
470				return;
471		}
472#endif
473		frame.f_sr &= ~PSL_T;
474		i = SIGTRAP;
475		break;
476
477	case T_TRACE|T_USER:	/* user trace trap */
478	case T_TRAP15|T_USER:	/* SUN user trace trap */
479#ifdef COMPAT_SUNOS
480		/*
481		 * SunOS uses Trap #2 for a "CPU cache flush".
482		 * Just flush the on-chip caches and return.
483		 */
484		if (p->p_emul == &emul_sunos) {
485			ICIA();
486			DCIU();
487			return;
488		}
489#endif COMPAT_SUNOS
490		frame.f_sr &= ~PSL_T;
491		i = SIGTRAP;
492		break;
493
494	case T_ASTFLT:		/* system async trap, cannot happen */
495		goto dopanic;
496
497	case T_ASTFLT|T_USER:	/* user async trap */
498		astpending = 0;
499		/*
500		 * We check for software interrupts first.  This is because
501		 * they are at a higher level than ASTs, and on a VAX would
502		 * interrupt the AST.  We assume that if we are processing
503		 * an AST that we must be at IPL0 so we don't bother to
504		 * check.  Note that we ensure that we are at least at SIR
505		 * IPL while processing the SIR.
506		 */
507		spl1();
508		/* fall into... */
509
510	case T_SSIR:		/* software interrupt */
511	case T_SSIR|T_USER:
512		while (bit = ffs(ssir)) {
513			--bit;
514			ssir &= ~(1 << bit);
515			uvmexp.softs++;
516			if (sir_routines[bit])
517				sir_routines[bit](sir_args[bit]);
518		}
519
520		/*
521		 * If this was not an AST trap, we are all done.
522		 */
523		if (type != (T_ASTFLT|T_USER)) {
524			uvmexp.traps++;
525			return;
526		}
527		spl0();
528		if (p->p_flag & P_OWEUPC) {
529			p->p_flag &= ~P_OWEUPC;
530			ADDUPROF(p);
531		}
532		goto out;
533
534	case T_MMUFLT:		/* kernel mode page fault */
535#if 0
536#ifdef	DDB
537		if (db_recover != 0)
538			goto dopanic;
539#endif
540#ifdef	KGDB
541		if (kgdb_recover != 0)
542			goto dopanic;
543#endif
544#endif
545		/*
546		 * If we were doing profiling ticks or other user mode
547		 * stuff from interrupt code, Just Say No.
548		 */
549		if (p->p_addr->u_pcb.pcb_onfault == fubail ||
550		    p->p_addr->u_pcb.pcb_onfault == subail)
551			goto copyfault;
552		/* fall into ... */
553
554	case T_MMUFLT|T_USER:	/* page fault */
555	    {
556		vaddr_t va;
557		struct vmspace *vm = p->p_vmspace;
558		vm_map_t map;
559		int rv;
560		vm_prot_t ftype;
561		extern vm_map_t kernel_map;
562
563#ifdef DEBUG
564		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
565		printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n",
566		       p->p_pid, code, v, frame.f_pc, frame.f_sr);
567#endif
568		/*
569		 * It is only a kernel address space fault iff:
570		 * 	1. (type & T_USER) == 0  and
571		 * 	2. pcb_onfault not set or
572		 *	3. pcb_onfault set but supervisor space data fault
573		 * The last can occur during an exec() copyin where the
574		 * argument space is lazy-allocated.
575		 */
576		if (type == T_MMUFLT &&
577		    (!p->p_addr->u_pcb.pcb_onfault || KDFAULT(code)))
578			map = kernel_map;
579		else
580			map = &vm->vm_map;
581		if (WRFAULT(code))
582			ftype = VM_PROT_READ | VM_PROT_WRITE;
583		else
584			ftype = VM_PROT_READ;
585		va = trunc_page((vaddr_t)v);
586#ifdef DEBUG
587		if (map == kernel_map && va == 0) {
588			printf("trap: bad kernel access at %x\n", v);
589			goto dopanic;
590		}
591#endif
592#ifdef COMPAT_HPUX
593		if (ISHPMMADDR(va)) {
594			vaddr_t bva;
595
596			rv = pmap_mapmulti(map->pmap, va);
597			if (rv != KERN_SUCCESS) {
598				bva = HPMMBASEADDR(va);
599				rv = uvm_fault(map, bva, 0, ftype);
600				if (rv == KERN_SUCCESS)
601					(void) pmap_mapmulti(map->pmap, va);
602			}
603		} else
604#endif
605		rv = uvm_fault(map, va, 0, ftype);
606#ifdef DEBUG
607		if (rv && MDB_ISPID(p->p_pid))
608			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
609			       map, va, ftype, rv);
610#endif
611		/*
612		 * If this was a stack access we keep track of the maximum
613		 * accessed stack size.  Also, if vm_fault gets a protection
614		 * failure it is due to accessing the stack region outside
615		 * the current limit and we need to reflect that as an access
616		 * error.
617		 */
618		if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map) {
619			if (rv == KERN_SUCCESS) {
620				unsigned nss;
621
622				nss = clrnd(btoc(USRSTACK-(unsigned)va));
623				if (nss > vm->vm_ssize)
624					vm->vm_ssize = nss;
625			} else if (rv == KERN_PROTECTION_FAILURE)
626				rv = KERN_INVALID_ADDRESS;
627		}
628		if (rv == KERN_SUCCESS) {
629			if (type == T_MMUFLT) {
630#if defined(M68040)
631				if (mmutype == MMU_68040)
632					(void) writeback(&frame, 1);
633#endif
634				return;
635			}
636			goto out;
637		}
638		if (type == T_MMUFLT) {
639			if (p->p_addr->u_pcb.pcb_onfault)
640				goto copyfault;
641			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
642			       map, va, ftype, rv);
643			printf("  type %x, code [mmu,,ssw]: %x\n",
644			       type, code);
645			goto dopanic;
646		}
647		ucode = v;
648		if (rv == KERN_RESOURCE_SHORTAGE) {
649			printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
650			       p->p_pid, p->p_comm,
651			       p->p_cred && p->p_ucred ?
652			       p->p_ucred->cr_uid : -1);
653			i = SIGKILL;
654		} else {
655			i = SIGSEGV;
656		}
657		break;
658	    }
659	}
660	trapsignal(p, i, ucode);
661	if ((type & T_USER) == 0)
662		return;
663out:
664	userret(p, &frame, sticks, v, 1);
665}
666
667#ifdef M68040
668#ifdef DEBUG
669struct writebackstats {
670	int calls;
671	int cpushes;
672	int move16s;
673	int wb1s, wb2s, wb3s;
674	int wbsize[4];
675} wbstats;
676
677char *f7sz[] = { "longword", "byte", "word", "line" };
678char *f7tt[] = { "normal", "MOVE16", "AFC", "ACK" };
679char *f7tm[] = { "d-push", "u-data", "u-code", "M-data",
680		 "M-code", "k-data", "k-code", "RES" };
681char wberrstr[] =
682	"WARNING: pid %d(%s) writeback [%s] failed, pc=%x fa=%x wba=%x wbd=%x\n";
683#endif
684
685writeback(fp, docachepush)
686	struct frame *fp;
687	int docachepush;
688{
689	struct fmt7 *f = &fp->f_fmt7;
690	struct proc *p = curproc;
691	int err = 0;
692	u_int fa;
693	caddr_t oonfault = p->p_addr->u_pcb.pcb_onfault;
694	paddr_t pa;
695
696#ifdef DEBUG
697	if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
698		printf(" pid=%d, fa=%x,", p->p_pid, f->f_fa);
699		dumpssw(f->f_ssw);
700	}
701	wbstats.calls++;
702#endif
703	/*
704	 * Deal with special cases first.
705	 */
706	if ((f->f_ssw & SSW4_TMMASK) == SSW4_TMDCP) {
707		/*
708		 * Dcache push fault.
709		 * Line-align the address and write out the push data to
710		 * the indicated physical address.
711		 */
712#ifdef DEBUG
713		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
714			printf(" pushing %s to PA %x, data %x",
715			       f7sz[(f->f_ssw & SSW4_SZMASK) >> 5],
716			       f->f_fa, f->f_pd0);
717			if ((f->f_ssw & SSW4_SZMASK) == SSW4_SZLN)
718				printf("/%x/%x/%x",
719				       f->f_pd1, f->f_pd2, f->f_pd3);
720			printf("\n");
721		}
722		if (f->f_wb1s & SSW4_WBSV)
723			panic("writeback: cache push with WB1S valid");
724		wbstats.cpushes++;
725#endif
726		/*
727		 * XXX there are security problems if we attempt to do a
728		 * cache push after a signal handler has been called.
729		 */
730		if (docachepush) {
731			pmap_enter(pmap_kernel(), (vaddr_t)vmmap,
732			    trunc_page(f->f_fa), VM_PROT_WRITE, TRUE,
733			    VM_PROT_WRITE);
734			fa = (u_int)&vmmap[(f->f_fa & PGOFSET) & ~0xF];
735			bcopy((caddr_t)&f->f_pd0, (caddr_t)fa, 16);
736			(void) pmap_extract(pmap_kernel(), (vaddr_t)fa, &pa);
737			DCFL(pa);
738			pmap_remove(pmap_kernel(), (vaddr_t)vmmap,
739				    (vaddr_t)&vmmap[NBPG]);
740		} else
741			printf("WARNING: pid %d(%s) uid %d: CPUSH not done\n",
742			       p->p_pid, p->p_comm, p->p_ucred->cr_uid);
743	} else if ((f->f_ssw & (SSW4_RW|SSW4_TTMASK)) == SSW4_TTM16) {
744		/*
745		 * MOVE16 fault.
746		 * Line-align the address and write out the push data to
747		 * the indicated virtual address.
748		 */
749#ifdef DEBUG
750		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
751			printf(" MOVE16 to VA %x(%x), data %x/%x/%x/%x\n",
752			       f->f_fa, f->f_fa & ~0xF, f->f_pd0, f->f_pd1,
753			       f->f_pd2, f->f_pd3);
754		if (f->f_wb1s & SSW4_WBSV)
755			panic("writeback: MOVE16 with WB1S valid");
756		wbstats.move16s++;
757#endif
758		if (KDFAULT(f->f_wb1s))
759			bcopy((caddr_t)&f->f_pd0, (caddr_t)(f->f_fa & ~0xF), 16);
760		else
761			err = suline((caddr_t)(f->f_fa & ~0xF), (caddr_t)&f->f_pd0);
762		if (err) {
763			fa = f->f_fa & ~0xF;
764#ifdef DEBUG
765			if (mmudebug & MDB_WBFAILED)
766				printf(wberrstr, p->p_pid, p->p_comm,
767				       "MOVE16", fp->f_pc, f->f_fa,
768				       f->f_fa & ~0xF, f->f_pd0);
769#endif
770		}
771	} else if (f->f_wb1s & SSW4_WBSV) {
772		/*
773		 * Writeback #1.
774		 * Position the "memory-aligned" data and write it out.
775		 */
776		u_int wb1d = f->f_wb1d;
777		int off;
778
779#ifdef DEBUG
780		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
781			dumpwb(1, f->f_wb1s, f->f_wb1a, f->f_wb1d);
782		wbstats.wb1s++;
783		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
784#endif
785		off = (f->f_wb1a & 3) * 8;
786		switch (f->f_wb1s & SSW4_SZMASK) {
787		case SSW4_SZLW:
788			if (off)
789				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
790			if (KDFAULT(f->f_wb1s))
791				*(long *)f->f_wb1a = wb1d;
792			else
793				err = suword((caddr_t)f->f_wb1a, wb1d);
794			break;
795		case SSW4_SZB:
796			off = 24 - off;
797			if (off)
798				wb1d >>= off;
799			if (KDFAULT(f->f_wb1s))
800				*(char *)f->f_wb1a = wb1d;
801			else
802				err = subyte((caddr_t)f->f_wb1a, wb1d);
803			break;
804		case SSW4_SZW:
805			off = (off + 16) % 32;
806			if (off)
807				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
808			if (KDFAULT(f->f_wb1s))
809				*(short *)f->f_wb1a = wb1d;
810			else
811				err = susword((caddr_t)f->f_wb1a, wb1d);
812			break;
813		}
814		if (err) {
815			fa = f->f_wb1a;
816#ifdef DEBUG
817			if (mmudebug & MDB_WBFAILED)
818				printf(wberrstr, p->p_pid, p->p_comm,
819				       "#1", fp->f_pc, f->f_fa,
820				       f->f_wb1a, f->f_wb1d);
821#endif
822		}
823	}
824	/*
825	 * Deal with the "normal" writebacks.
826	 *
827	 * XXX writeback2 is known to reflect a LINE size writeback after
828	 * a MOVE16 was already dealt with above.  Ignore it.
829	 */
830	if (err == 0 && (f->f_wb2s & SSW4_WBSV) &&
831	    (f->f_wb2s & SSW4_SZMASK) != SSW4_SZLN) {
832#ifdef DEBUG
833		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
834			dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
835		wbstats.wb2s++;
836		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
837#endif
838		switch (f->f_wb2s & SSW4_SZMASK) {
839		case SSW4_SZLW:
840			if (KDFAULT(f->f_wb2s))
841				*(long *)f->f_wb2a = f->f_wb2d;
842			else
843				err = suword((caddr_t)f->f_wb2a, f->f_wb2d);
844			break;
845		case SSW4_SZB:
846			if (KDFAULT(f->f_wb2s))
847				*(char *)f->f_wb2a = f->f_wb2d;
848			else
849				err = subyte((caddr_t)f->f_wb2a, f->f_wb2d);
850			break;
851		case SSW4_SZW:
852			if (KDFAULT(f->f_wb2s))
853				*(short *)f->f_wb2a = f->f_wb2d;
854			else
855				err = susword((caddr_t)f->f_wb2a, f->f_wb2d);
856			break;
857		}
858		if (err) {
859			fa = f->f_wb2a;
860#ifdef DEBUG
861			if (mmudebug & MDB_WBFAILED) {
862				printf(wberrstr, p->p_pid, p->p_comm,
863				       "#2", fp->f_pc, f->f_fa,
864				       f->f_wb2a, f->f_wb2d);
865				dumpssw(f->f_ssw);
866				dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
867			}
868#endif
869		}
870	}
871	if (err == 0 && (f->f_wb3s & SSW4_WBSV)) {
872#ifdef DEBUG
873		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
874			dumpwb(3, f->f_wb3s, f->f_wb3a, f->f_wb3d);
875		wbstats.wb3s++;
876		wbstats.wbsize[(f->f_wb3s&SSW4_SZMASK)>>5]++;
877#endif
878		switch (f->f_wb3s & SSW4_SZMASK) {
879		case SSW4_SZLW:
880			if (KDFAULT(f->f_wb3s))
881				*(long *)f->f_wb3a = f->f_wb3d;
882			else
883				err = suword((caddr_t)f->f_wb3a, f->f_wb3d);
884			break;
885		case SSW4_SZB:
886			if (KDFAULT(f->f_wb3s))
887				*(char *)f->f_wb3a = f->f_wb3d;
888			else
889				err = subyte((caddr_t)f->f_wb3a, f->f_wb3d);
890			break;
891		case SSW4_SZW:
892			if (KDFAULT(f->f_wb3s))
893				*(short *)f->f_wb3a = f->f_wb3d;
894			else
895				err = susword((caddr_t)f->f_wb3a, f->f_wb3d);
896			break;
897#ifdef DEBUG
898		case SSW4_SZLN:
899			panic("writeback: wb3s indicates LINE write");
900#endif
901		}
902		if (err) {
903			fa = f->f_wb3a;
904#ifdef DEBUG
905			if (mmudebug & MDB_WBFAILED)
906				printf(wberrstr, p->p_pid, p->p_comm,
907				       "#3", fp->f_pc, f->f_fa,
908				       f->f_wb3a, f->f_wb3d);
909#endif
910		}
911	}
912	p->p_addr->u_pcb.pcb_onfault = oonfault;
913	if (err)
914		err = SIGSEGV;
915	return(err);
916}
917
918#ifdef DEBUG
919dumpssw(ssw)
920	u_short ssw;
921{
922	printf(" SSW: %x: ", ssw);
923	if (ssw & SSW4_CP)
924		printf("CP,");
925	if (ssw & SSW4_CU)
926		printf("CU,");
927	if (ssw & SSW4_CT)
928		printf("CT,");
929	if (ssw & SSW4_CM)
930		printf("CM,");
931	if (ssw & SSW4_MA)
932		printf("MA,");
933	if (ssw & SSW4_ATC)
934		printf("ATC,");
935	if (ssw & SSW4_LK)
936		printf("LK,");
937	if (ssw & SSW4_RW)
938		printf("RW,");
939	printf(" SZ=%s, TT=%s, TM=%s\n",
940	       f7sz[(ssw & SSW4_SZMASK) >> 5],
941	       f7tt[(ssw & SSW4_TTMASK) >> 3],
942	       f7tm[ssw & SSW4_TMMASK]);
943}
944
945dumpwb(num, s, a, d)
946	int num;
947	u_short s;
948	u_int a, d;
949{
950	struct proc *p = curproc;
951	paddr_t pa;
952
953	printf(" writeback #%d: VA %x, data %x, SZ=%s, TT=%s, TM=%s\n",
954	       num, a, d, f7sz[(s & SSW4_SZMASK) >> 5],
955	       f7tt[(s & SSW4_TTMASK) >> 3], f7tm[s & SSW4_TMMASK]);
956	printf("	       PA ");
957	if (pmap_extract(p->p_vmspace->vm_map.pmap, (vaddr_t)a, &pa) == FALSE)
958		printf("<invalid address>");
959	else
960		printf("%x, current value %x", pa, fuword((caddr_t)a));
961	printf("\n");
962}
963#endif
964#endif
965
966/*
967 * Process a system call.
968 */
969syscall(code, frame)
970	int code;
971	struct frame frame;
972{
973	caddr_t params;
974	struct sysent *callp;
975	struct proc *p;
976	int error, opc, nsys;
977	size_t argsize;
978	int args[8], rval[2];
979	u_quad_t sticks;
980
981	uvmexp.syscalls++;
982	if (!USERMODE(frame.f_sr))
983		panic("syscall");
984	p = curproc;
985	sticks = p->p_sticks;
986	p->p_md.md_regs = frame.f_regs;
987	opc = frame.f_pc;
988
989	nsys = p->p_emul->e_nsysent;
990	callp = p->p_emul->e_sysent;
991
992#ifdef COMPAT_SUNOS
993	if (p->p_emul == &emul_sunos) {
994		/*
995		 * SunOS passes the syscall-number on the stack, whereas
996		 * BSD passes it in D0. So, we have to get the real "code"
997		 * from the stack, and clean up the stack, as SunOS glue
998		 * code assumes the kernel pops the syscall argument the
999		 * glue pushed on the stack. Sigh...
1000		 */
1001		code = fuword((caddr_t)frame.f_regs[SP]);
1002
1003		/*
1004		 * XXX
1005		 * Don't do this for sunos_sigreturn, as there's no stored pc
1006		 * on the stack to skip, the argument follows the syscall
1007		 * number without a gap.
1008		 */
1009		if (code != SUNOS_SYS_sigreturn) {
1010			frame.f_regs[SP] += sizeof (int);
1011			/*
1012			 * remember that we adjusted the SP,
1013			 * might have to undo this if the system call
1014			 * returns ERESTART.
1015			 */
1016			p->p_md.md_flags |= MDP_STACKADJ;
1017		} else
1018			p->p_md.md_flags &= ~MDP_STACKADJ;
1019	}
1020#endif
1021
1022	params = (caddr_t)frame.f_regs[SP] + sizeof(int);
1023
1024	switch (code) {
1025	case SYS_syscall:
1026		/*
1027		 * Code is first argument, followed by actual args.
1028		 */
1029		code = fuword(params);
1030		params += sizeof(int);
1031		/*
1032		 * XXX sigreturn requires special stack manipulation
1033		 * that is only done if entered via the sigreturn
1034		 * trap.  Cannot allow it here so make sure we fail.
1035		 */
1036		switch (code) {
1037#ifdef COMPAT_13
1038		case SYS_compat_13_sigreturn13:
1039#endif
1040		case SYS___sigreturn14:
1041			code = nsys;
1042			break;
1043		}
1044		break;
1045	case SYS___syscall:
1046		/*
1047		 * Like syscall, but code is a quad, so as to maintain
1048		 * quad alignment for the rest of the arguments.
1049		 */
1050		if (callp != sysent)
1051			break;
1052		code = fuword(params + _QUAD_LOWWORD * sizeof(int));
1053		params += sizeof(quad_t);
1054		break;
1055	default:
1056		break;
1057	}
1058	if (code < 0 || code >= nsys)
1059		callp += p->p_emul->e_nosys;		/* illegal */
1060	else
1061		callp += code;
1062	argsize = callp->sy_argsize;
1063#ifdef COMPAT_LINUX
1064	if (0
1065# ifdef EXEC_AOUT
1066	    || p->p_emul == &emul_linux_aout
1067# endif
1068# ifdef EXEC_ELF32
1069	    || p->p_emul == &emul_linux_elf32
1070# endif
1071	     ) {
1072		/*
1073		 * Linux passes the args in d1-d5
1074		 */
1075		switch (argsize) {
1076		case 20:
1077			args[4] = frame.f_regs[D5];
1078		case 16:
1079			args[3] = frame.f_regs[D4];
1080		case 12:
1081			args[2] = frame.f_regs[D3];
1082		case 8:
1083			args[1] = frame.f_regs[D2];
1084		case 4:
1085			args[0] = frame.f_regs[D1];
1086		case 0:
1087			error = 0;
1088			break;
1089		default:
1090#ifdef DEBUG
1091			panic("linux syscall %d weird argsize %d",
1092				code, argsize);
1093#else
1094			error = EINVAL;
1095#endif
1096			break;
1097		}
1098	} else
1099#endif
1100	if (argsize)
1101		error = copyin(params, (caddr_t)args, argsize);
1102	else
1103		error = 0;
1104#ifdef SYSCALL_DEBUG
1105	scdebug_call(p, code, args);
1106#endif
1107#ifdef KTRACE
1108	if (KTRPOINT(p, KTR_SYSCALL))
1109		ktrsyscall(p->p_tracep, code, argsize, args);
1110#endif
1111	if (error)
1112		goto bad;
1113	rval[0] = 0;
1114	rval[1] = frame.f_regs[D1];
1115	error = (*callp->sy_call)(p, args, rval);
1116	switch (error) {
1117	case 0:
1118		frame.f_regs[D0] = rval[0];
1119		frame.f_regs[D1] = rval[1];
1120		frame.f_sr &= ~PSL_C;	/* carry bit */
1121		break;
1122	case ERESTART:
1123		/*
1124		 * We always enter through a `trap' instruction, which is 2
1125		 * bytes, so adjust the pc by that amount.
1126		 */
1127		frame.f_pc = opc - 2;
1128		break;
1129	case EJUSTRETURN:
1130		/* nothing to do */
1131		break;
1132	default:
1133	bad:
1134		if (p->p_emul->e_errno)
1135			error = p->p_emul->e_errno[error];
1136		frame.f_regs[D0] = error;
1137		frame.f_sr |= PSL_C;	/* carry bit */
1138		break;
1139	}
1140
1141#ifdef SYSCALL_DEBUG
1142	scdebug_ret(p, code, error, rval);
1143#endif
1144#ifdef COMPAT_SUNOS
1145	/* need new p-value for this */
1146	if (error == ERESTART && (p->p_md.md_flags & MDP_STACKADJ))
1147		frame.f_regs[SP] -= sizeof (int);
1148#endif
1149	userret(p, &frame, sticks, (u_int)0, 0);
1150#ifdef KTRACE
1151	if (KTRPOINT(p, KTR_SYSRET))
1152		ktrsysret(p->p_tracep, code, error, rval[0]);
1153#endif
1154}
1155
1156void
1157child_return(arg)
1158	void *arg;
1159{
1160	struct proc *p = arg;
1161	/* See cpu_fork() */
1162	struct frame *f = (struct frame *)p->p_md.md_regs;
1163
1164	f->f_regs[D0] = 0;
1165	f->f_sr &= ~PSL_C;
1166	f->f_format = FMT0;
1167
1168	userret(p, f, p->p_sticks, (u_int)0, 0);
1169#ifdef KTRACE
1170	if (KTRPOINT(p, KTR_SYSRET))
1171		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1172#endif
1173}
1174
1175/*
1176 * Allocation routines for software interrupts.
1177 */
1178u_long
1179allocate_sir(proc, arg)
1180	void (*proc)();
1181	void *arg;
1182{
1183	int bit;
1184
1185	if( next_sir >= NSIR )
1186		panic("allocate_sir: none left");
1187	bit = next_sir++;
1188	sir_routines[bit] = proc;
1189	sir_args[bit] = arg;
1190	return (1 << bit);
1191}
1192
1193void
1194init_sir()
1195{
1196	extern void netintr();
1197
1198	sir_routines[0] = netintr;
1199	sir_routines[1] = softclock;
1200	next_sir = 2;
1201}
1202