trap.c revision 1.11
1/*	$NetBSD: trap.c,v 1.11 1999/03/18 04:56:03 chs Exp $ */
2
3/*
4 * This file was taken from mvme68k/mvme68k/trap.c
5 * should probably be re-synced when needed.
6 * Darrin B. Jewell <jewell@mit.edu>  Tue Nov 10 05:07:16 1998
7 * original cvs id: NetBSD: trap.c,v 1.24 1998/10/01 02:53:54 thorpej Exp
8 */
9
10/*
11 * Copyright (c) 1988 University of Utah.
12 * Copyright (c) 1982, 1986, 1990, 1993
13 *	The Regents of the University of California.  All rights reserved.
14 *
15 * This code is derived from software contributed to Berkeley by
16 * the Systems Programming Group of the University of Utah Computer
17 * Science Department.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 * 3. All advertising materials mentioning features or use of this software
28 *    must display the following acknowledgement:
29 *	This product includes software developed by the University of
30 *	California, Berkeley and its contributors.
31 * 4. Neither the name of the University nor the names of its contributors
32 *    may be used to endorse or promote products derived from this software
33 *    without specific prior written permission.
34 *
35 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
36 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
39 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
40 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
41 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
42 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
43 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
44 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45 * SUCH DAMAGE.
46 *
47 * from: Utah $Hdr: trap.c 1.37 92/12/20$
48 *
49 *	@(#)trap.c	8.5 (Berkeley) 1/4/94
50 */
51
52#include "opt_ddb.h"
53#include "opt_execfmt.h"
54#include "opt_ktrace.h"
55#include "opt_uvm.h"
56#include "opt_compat_netbsd.h"
57#include "opt_compat_sunos.h"
58#include "opt_compat_hpux.h"
59#include "opt_compat_linux.h"
60
61#include <sys/param.h>
62#include <sys/systm.h>
63#include <sys/proc.h>
64#include <sys/acct.h>
65#include <sys/kernel.h>
66#include <sys/signalvar.h>
67#include <sys/resourcevar.h>
68#include <sys/syscall.h>
69#include <sys/syslog.h>
70#include <sys/user.h>
71#ifdef KTRACE
72#include <sys/ktrace.h>
73#endif
74
75#include <machine/psl.h>
76#include <machine/trap.h>
77#include <machine/cpu.h>
78#include <machine/reg.h>
79
80#include <vm/vm.h>
81#include <vm/pmap.h>
82
83#if defined(UVM)
84#include <uvm/uvm_extern.h>
85#endif
86
87#ifdef COMPAT_HPUX
88#include <compat/hpux/hpux.h>
89#endif
90
91#ifdef COMPAT_SUNOS
92#include <compat/sunos/sunos_syscall.h>
93extern struct emul emul_sunos;
94#endif
95
96#ifdef COMPAT_LINUX
97#ifdef EXEC_AOUT
98extern struct emul emul_linux_aout;
99#endif
100#ifdef EXEC_ELF32
101extern struct emul emul_linux_elf32;
102#endif
103#endif
104
105#include <m68k/cacheops.h>
106
107int	astpending;
108
109char	*trap_type[] = {
110	"Bus error",
111	"Address error",
112	"Illegal instruction",
113	"Zero divide",
114	"CHK instruction",
115	"TRAPV instruction",
116	"Privilege violation",
117	"Trace trap",
118	"MMU fault",
119	"SSIR trap",
120	"Format error",
121	"68881 exception",
122	"Coprocessor violation",
123	"Async system trap"
124};
125int	trap_types = sizeof trap_type / sizeof trap_type[0];
126
127/*
128 * Size of various exception stack frames (minus the standard 8 bytes)
129 */
130short	exframesize[] = {
131	FMT0SIZE,	/* type 0 - normal (68020/030/040) */
132	FMT1SIZE,	/* type 1 - throwaway (68020/030/040) */
133	FMT2SIZE,	/* type 2 - normal 6-word (68020/030/040) */
134	FMT3SIZE,	/* type 3 - FP post-instruction (68040) */
135	-1, -1, -1,	/* type 4-6 - undefined */
136	FMT7SIZE,	/* type 7 - access error (68040) */
137	58,		/* type 8 - bus fault (68010) */
138	FMT9SIZE,	/* type 9 - coprocessor mid-instruction (68020/030) */
139	FMTASIZE,	/* type A - short bus fault (68020/030) */
140	FMTBSIZE,	/* type B - long bus fault (68020/030) */
141	-1, -1, -1, -1	/* type C-F - undefined */
142};
143
144#ifdef M68040
145#define KDFAULT(c)    (mmutype == MMU_68040 ? \
146			    ((c) & SSW4_TMMASK) == SSW4_TMKD : \
147			    ((c) & (SSW_DF|FC_SUPERD)) == (SSW_DF|FC_SUPERD))
148#define WRFAULT(c)    (mmutype == MMU_68040 ? \
149			    ((c) & SSW4_RW) == 0 : \
150			    ((c) & (SSW_DF|SSW_RW)) == SSW_DF)
151#else
152#define KDFAULT(c)	(((c) & (SSW_DF|SSW_FCMASK)) == (SSW_DF|FC_SUPERD))
153#define WRFAULT(c)	(((c) & (SSW_DF|SSW_RW)) == SSW_DF)
154#endif
155
156#ifdef DEBUG
157int mmudebug = 0;
158int mmupid = -1;
159#define MDB_FOLLOW	1
160#define MDB_WBFOLLOW	2
161#define MDB_WBFAILED	4
162#define MDB_ISPID(p)	(p) == mmupid
163#endif
164
165#define NSIR	32
166void (*sir_routines[NSIR])();
167void *sir_args[NSIR];
168int next_sir;
169
170/*
171 * trap and syscall both need the following work done before returning
172 * to user mode.
173 */
174static inline void
175userret(p, fp, oticks, faultaddr, fromtrap)
176	struct proc *p;
177	struct frame *fp;
178	u_quad_t oticks;
179	u_int faultaddr;
180	int fromtrap;
181{
182	int sig, s;
183#ifdef M68040
184	int beenhere = 0;
185
186again:
187#endif
188	/* take pending signals */
189	while ((sig = CURSIG(p)) != 0)
190		postsig(sig);
191	p->p_priority = p->p_usrpri;
192	if (want_resched) {
193		/*
194		 * Since we are curproc, clock will normally just change
195		 * our priority without moving us from one queue to another
196		 * (since the running process is not on a queue.)
197		 * If that happened after we put ourselves on the run queue
198		 * but before we mi_switch()'ed, we might not be on the queue
199		 * indicated by our priority.
200		 */
201		s = splstatclock();
202		setrunqueue(p);
203		p->p_stats->p_ru.ru_nivcsw++;
204		mi_switch();
205		splx(s);
206		while ((sig = CURSIG(p)) != 0)
207			postsig(sig);
208	}
209
210	/*
211	 * If profiling, charge system time to the trapped pc.
212	 */
213	if (p->p_flag & P_PROFIL) {
214		extern int psratio;
215
216		addupc_task(p, fp->f_pc,
217			    (int)(p->p_sticks - oticks) * psratio);
218	}
219#ifdef M68040
220	/*
221	 * Deal with user mode writebacks (from trap, or from sigreturn).
222	 * If any writeback fails, go back and attempt signal delivery.
223	 * unless we have already been here and attempted the writeback
224	 * (e.g. bad address with user ignoring SIGSEGV).  In that case
225	 * we just return to the user without sucessfully completing
226	 * the writebacks.  Maybe we should just drop the sucker?
227	 */
228	if (mmutype == MMU_68040 && fp->f_format == FMT7) {
229		if (beenhere) {
230#ifdef DEBUG
231			if (mmudebug & MDB_WBFAILED)
232				printf(fromtrap ?
233		"pid %d(%s): writeback aborted, pc=%x, fa=%x\n" :
234		"pid %d(%s): writeback aborted in sigreturn, pc=%x\n",
235				    p->p_pid, p->p_comm, fp->f_pc, faultaddr);
236#endif
237		} else if (sig = writeback(fp, fromtrap)) {
238			beenhere = 1;
239			oticks = p->p_sticks;
240			trapsignal(p, sig, faultaddr);
241			goto again;
242		}
243	}
244#endif
245	curpriority = p->p_priority;
246}
247
248/*
249 * Trap is called from locore to handle most types of processor traps,
250 * including events such as simulated software interrupts/AST's.
251 * System calls are broken out for efficiency.
252 */
253/*ARGSUSED*/
254trap(type, code, v, frame)
255	int type;
256	unsigned code;
257	unsigned v;
258	struct frame frame;
259{
260	extern char fubail[], subail[];
261#ifdef DDB
262	extern char trap0[], trap1[], trap2[], trap12[], trap15[], illinst[];
263#endif
264	struct proc *p;
265	int i;
266	u_int ucode;
267	u_quad_t sticks;
268#ifdef COMPAT_HPUX
269	extern struct emul emul_hpux;
270#endif
271	int bit;
272
273#if defined(UVM)
274	uvmexp.traps++;
275#else
276	cnt.v_trap++;
277#endif
278	p = curproc;
279	ucode = 0;
280	if (USERMODE(frame.f_sr)) {
281		type |= T_USER;
282		sticks = p->p_sticks;
283		p->p_md.md_regs = frame.f_regs;
284	}
285	switch (type) {
286
287	default:
288dopanic:
289		printf("trap type %d, code = %x, v = %x\n", type, code, v);
290#ifdef DDB
291		if (kdb_trap(type, &frame))
292			return;
293#endif
294		regdump((struct trapframe *)&frame, 128);
295		type &= ~T_USER;
296		if ((unsigned)type < trap_types)
297			panic(trap_type[type]);
298		panic("trap");
299
300	case T_BUSERR:		/* kernel bus error */
301		if (!p->p_addr->u_pcb.pcb_onfault)
302			goto dopanic;
303		/*
304		 * If we have arranged to catch this fault in any of the
305		 * copy to/from user space routines, set PC to return to
306		 * indicated location and set flag informing buserror code
307		 * that it may need to clean up stack frame.
308		 */
309copyfault:
310		frame.f_stackadj = exframesize[frame.f_format];
311		frame.f_format = frame.f_vector = 0;
312		frame.f_pc = (int) p->p_addr->u_pcb.pcb_onfault;
313		return;
314
315	case T_BUSERR|T_USER:	/* bus error */
316	case T_ADDRERR|T_USER:	/* address error */
317		ucode = v;
318		i = SIGBUS;
319		break;
320
321	case T_COPERR:		/* kernel coprocessor violation */
322	case T_FMTERR|T_USER:	/* do all RTE errors come in as T_USER? */
323	case T_FMTERR:		/* ...just in case... */
324	/*
325	 * The user has most likely trashed the RTE or FP state info
326	 * in the stack frame of a signal handler.
327	 */
328		printf("pid %d: kernel %s exception\n", p->p_pid,
329		       type==T_COPERR ? "coprocessor" : "format");
330		type |= T_USER;
331		p->p_sigacts->ps_sigact[SIGILL].sa_handler = SIG_DFL;
332		sigdelset(&p->p_sigignore, SIGILL);
333		sigdelset(&p->p_sigcatch, SIGILL);
334		sigdelset(&p->p_sigmask, SIGILL);
335		i = SIGILL;
336		ucode = frame.f_format;	/* XXX was ILL_RESAD_FAULT */
337		break;
338
339	case T_COPERR|T_USER:	/* user coprocessor violation */
340	/* What is a proper response here? */
341		ucode = 0;
342		i = SIGFPE;
343		break;
344
345	case T_FPERR|T_USER:	/* 68881 exceptions */
346	/*
347	 * We pass along the 68881 status register which locore stashed
348	 * in code for us.  Note that there is a possibility that the
349	 * bit pattern of this register will conflict with one of the
350	 * FPE_* codes defined in signal.h.  Fortunately for us, the
351	 * only such codes we use are all in the range 1-7 and the low
352	 * 3 bits of the status register are defined as 0 so there is
353	 * no clash.
354	 */
355		ucode = code;
356		i = SIGFPE;
357		break;
358
359#ifdef M68040
360	case T_FPEMULI|T_USER:	/* unimplemented FP instuction */
361	case T_FPEMULD|T_USER:	/* unimplemented FP data type */
362		/* XXX need to FSAVE */
363		printf("pid %d(%s): unimplemented FP %s at %x (EA %x)\n",
364		       p->p_pid, p->p_comm,
365		       frame.f_format == 2 ? "instruction" : "data type",
366		       frame.f_pc, frame.f_fmt2.f_iaddr);
367		/* XXX need to FRESTORE */
368		i = SIGFPE;
369		break;
370#endif
371
372	case T_ILLINST|T_USER:	/* illegal instruction fault */
373#ifdef COMPAT_HPUX
374		if (p->p_emul == &emul_hpux) {
375			ucode = HPUX_ILL_ILLINST_TRAP;
376			i = SIGILL;
377			break;
378		}
379		/* fall through */
380#endif
381	case T_PRIVINST|T_USER:	/* privileged instruction fault */
382#ifdef COMPAT_HPUX
383		if (p->p_emul == &emul_hpux)
384			ucode = HPUX_ILL_PRIV_TRAP;
385		else
386#endif
387		ucode = frame.f_format;	/* XXX was ILL_PRIVIN_FAULT */
388		i = SIGILL;
389		break;
390
391	case T_ZERODIV|T_USER:	/* Divide by zero */
392#ifdef COMPAT_HPUX
393		if (p->p_emul == &emul_hpux)
394			ucode = HPUX_FPE_INTDIV_TRAP;
395		else
396#endif
397		ucode = frame.f_format;	/* XXX was FPE_INTDIV_TRAP */
398		i = SIGFPE;
399		break;
400
401	case T_CHKINST|T_USER:	/* CHK instruction trap */
402#ifdef COMPAT_HPUX
403		if (p->p_emul == &emul_hpux) {
404			/* handled differently under hp-ux */
405			i = SIGILL;
406			ucode = HPUX_ILL_CHK_TRAP;
407			break;
408		}
409#endif
410		ucode = frame.f_format;	/* XXX was FPE_SUBRNG_TRAP */
411		i = SIGFPE;
412		break;
413
414	case T_TRAPVINST|T_USER:	/* TRAPV instruction trap */
415#ifdef COMPAT_HPUX
416		if (p->p_emul == &emul_hpux) {
417			/* handled differently under hp-ux */
418			i = SIGILL;
419			ucode = HPUX_ILL_TRAPV_TRAP;
420			break;
421		}
422#endif
423		ucode = frame.f_format;	/* XXX was FPE_INTOVF_TRAP */
424		i = SIGFPE;
425		break;
426
427	/*
428	 * XXX: Trace traps are a nightmare.
429	 *
430	 *	HP-UX uses trap #1 for breakpoints,
431	 *	HPBSD uses trap #2,
432	 *	SUN 3.x uses trap #15,
433	 *	KGDB uses trap #15 (for kernel breakpoints; handled elsewhere).
434	 *
435	 * HPBSD and HP-UX traps both get mapped by locore.s into T_TRACE.
436	 * SUN 3.x traps get passed through as T_TRAP15 and are not really
437	 * supported yet.
438	 */
439	case T_TRACE:		/* kernel trace trap */
440	case T_TRAP15:		/* SUN trace trap */
441#ifdef DDB
442		if (type == T_TRAP15 ||
443		    ((caddr_t)frame.f_pc != trap0 &&
444		     (caddr_t)frame.f_pc != trap1 &&
445		     (caddr_t)frame.f_pc != trap2 &&
446		     (caddr_t)frame.f_pc != trap12 &&
447		     (caddr_t)frame.f_pc != trap15 &&
448		     (caddr_t)frame.f_pc != illinst)) {
449			if (kdb_trap(type, &frame))
450				return;
451		}
452#endif
453		frame.f_sr &= ~PSL_T;
454		i = SIGTRAP;
455		break;
456
457	case T_TRACE|T_USER:	/* user trace trap */
458	case T_TRAP15|T_USER:	/* SUN user trace trap */
459#ifdef COMPAT_SUNOS
460		/*
461		 * SunOS uses Trap #2 for a "CPU cache flush".
462		 * Just flush the on-chip caches and return.
463		 */
464		if (p->p_emul == &emul_sunos) {
465			ICIA();
466			DCIU();
467			return;
468		}
469#endif COMPAT_SUNOS
470		frame.f_sr &= ~PSL_T;
471		i = SIGTRAP;
472		break;
473
474	case T_ASTFLT:		/* system async trap, cannot happen */
475		goto dopanic;
476
477	case T_ASTFLT|T_USER:	/* user async trap */
478		astpending = 0;
479		/*
480		 * We check for software interrupts first.  This is because
481		 * they are at a higher level than ASTs, and on a VAX would
482		 * interrupt the AST.  We assume that if we are processing
483		 * an AST that we must be at IPL0 so we don't bother to
484		 * check.  Note that we ensure that we are at least at SIR
485		 * IPL while processing the SIR.
486		 */
487		spl1();
488		/* fall into... */
489
490	case T_SSIR:		/* software interrupt */
491	case T_SSIR|T_USER:
492		while (bit = ffs(ssir)) {
493			--bit;
494			ssir &= ~(1 << bit);
495#if defined(UVM)
496			uvmexp.softs++;
497#else
498			cnt.v_soft++;
499#endif
500			if (sir_routines[bit])
501				sir_routines[bit](sir_args[bit]);
502		}
503
504		/*
505		 * If this was not an AST trap, we are all done.
506		 */
507		if (type != (T_ASTFLT|T_USER)) {
508#if defined(UVM)
509			uvmexp.traps++;
510#else
511			cnt.v_trap--;
512#endif
513			return;
514		}
515		spl0();
516		if (p->p_flag & P_OWEUPC) {
517			p->p_flag &= ~P_OWEUPC;
518			ADDUPROF(p);
519		}
520		goto out;
521
522	case T_MMUFLT:		/* kernel mode page fault */
523		/*
524		 * If we were doing profiling ticks or other user mode
525		 * stuff from interrupt code, Just Say No.
526		 */
527		if (p->p_addr->u_pcb.pcb_onfault == fubail ||
528		    p->p_addr->u_pcb.pcb_onfault == subail)
529			goto copyfault;
530		/* fall into ... */
531
532	case T_MMUFLT|T_USER:	/* page fault */
533	    {
534		vaddr_t va;
535		struct vmspace *vm = p->p_vmspace;
536		vm_map_t map;
537		int rv;
538		vm_prot_t ftype;
539		extern vm_map_t kernel_map;
540
541#ifdef DEBUG
542		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
543		printf("trap: T_MMUFLT pid=%d, code=%x, v=%x, pc=%x, sr=%x\n",
544		       p->p_pid, code, v, frame.f_pc, frame.f_sr);
545#endif
546		/*
547		 * It is only a kernel address space fault iff:
548		 * 	1. (type & T_USER) == 0  and
549		 * 	2. pcb_onfault not set or
550		 *	3. pcb_onfault set but supervisor space data fault
551		 * The last can occur during an exec() copyin where the
552		 * argument space is lazy-allocated.
553		 */
554		if (type == T_MMUFLT &&
555		    (!p->p_addr->u_pcb.pcb_onfault || KDFAULT(code)))
556			map = kernel_map;
557		else
558			map = &vm->vm_map;
559		if (WRFAULT(code))
560			ftype = VM_PROT_READ | VM_PROT_WRITE;
561		else
562			ftype = VM_PROT_READ;
563		va = trunc_page((vaddr_t)v);
564#ifdef DEBUG
565		if (map == kernel_map && va == 0) {
566			printf("trap: bad kernel access at %x\n", v);
567			goto dopanic;
568		}
569#endif
570#ifdef COMPAT_HPUX
571		if (ISHPMMADDR(va)) {
572			vaddr_t bva;
573
574			rv = pmap_mapmulti(map->pmap, va);
575			if (rv != KERN_SUCCESS) {
576				bva = HPMMBASEADDR(va);
577#if defined(UVM)
578				rv = uvm_fault(map, bva, 0, ftype);
579#else
580				rv = vm_fault(map, bva, ftype, FALSE);
581#endif
582				if (rv == KERN_SUCCESS)
583					(void) pmap_mapmulti(map->pmap, va);
584			}
585		} else
586#endif
587#if defined(UVM)
588		rv = uvm_fault(map, va, 0, ftype);
589#ifdef DEBUG
590		if (rv && MDB_ISPID(p->p_pid))
591			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
592			       map, va, ftype, rv);
593#endif
594#else /* ! UVM */
595		rv = vm_fault(map, va, ftype, FALSE);
596#ifdef DEBUG
597		if (rv && MDB_ISPID(p->p_pid))
598			printf("vm_fault(%x, %x, %x, 0) -> %x\n",
599			       map, va, ftype, rv);
600#endif
601#endif /* UVM */
602		/*
603		 * If this was a stack access we keep track of the maximum
604		 * accessed stack size.  Also, if vm_fault gets a protection
605		 * failure it is due to accessing the stack region outside
606		 * the current limit and we need to reflect that as an access
607		 * error.
608		 */
609		if ((caddr_t)va >= vm->vm_maxsaddr && map != kernel_map) {
610			if (rv == KERN_SUCCESS) {
611				unsigned nss;
612
613				nss = clrnd(btoc(USRSTACK-(unsigned)va));
614				if (nss > vm->vm_ssize)
615					vm->vm_ssize = nss;
616			} else if (rv == KERN_PROTECTION_FAILURE)
617				rv = KERN_INVALID_ADDRESS;
618		}
619		if (rv == KERN_SUCCESS) {
620			if (type == T_MMUFLT) {
621#if defined(M68040)
622				if (mmutype == MMU_68040)
623					(void) writeback(&frame, 1);
624#endif
625				return;
626			}
627			goto out;
628		}
629		if (type == T_MMUFLT) {
630			if (p->p_addr->u_pcb.pcb_onfault)
631				goto copyfault;
632#if defined(UVM)
633			printf("uvm_fault(%p, 0x%lx, 0, 0x%x) -> 0x%x\n",
634			       map, va, ftype, rv);
635#else
636			printf("vm_fault(%x, %x, %x, 0) -> %x\n",
637			       map, va, ftype, rv);
638#endif
639			printf("  type %x, code [mmu,,ssw]: %x\n",
640			       type, code);
641			goto dopanic;
642		}
643		ucode = v;
644		if (rv == KERN_RESOURCE_SHORTAGE) {
645			printf("UVM: pid %d (%s), uid %d killed: out of swap\n",
646			       p->p_pid, p->p_comm,
647			       p->p_cred && p->p_ucred ?
648			       p->p_ucred->cr_uid : -1);
649			i = SIGKILL;
650		} else {
651			i = SIGSEGV;
652		}
653		break;
654	    }
655	}
656	trapsignal(p, i, ucode);
657	if ((type & T_USER) == 0)
658		return;
659out:
660	userret(p, &frame, sticks, v, 1);
661}
662
663#ifdef M68040
664#ifdef DEBUG
665struct writebackstats {
666	int calls;
667	int cpushes;
668	int move16s;
669	int wb1s, wb2s, wb3s;
670	int wbsize[4];
671} wbstats;
672
673char *f7sz[] = { "longword", "byte", "word", "line" };
674char *f7tt[] = { "normal", "MOVE16", "AFC", "ACK" };
675char *f7tm[] = { "d-push", "u-data", "u-code", "M-data",
676		 "M-code", "k-data", "k-code", "RES" };
677char wberrstr[] =
678	"WARNING: pid %d(%s) writeback [%s] failed, pc=%x fa=%x wba=%x wbd=%x\n";
679#endif
680
681writeback(fp, docachepush)
682	struct frame *fp;
683	int docachepush;
684{
685	struct fmt7 *f = &fp->f_fmt7;
686	struct proc *p = curproc;
687	int err = 0;
688	u_int fa;
689	caddr_t oonfault = p->p_addr->u_pcb.pcb_onfault;
690
691#ifdef DEBUG
692	if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
693		printf(" pid=%d, fa=%x,", p->p_pid, f->f_fa);
694		dumpssw(f->f_ssw);
695	}
696	wbstats.calls++;
697#endif
698	/*
699	 * Deal with special cases first.
700	 */
701	if ((f->f_ssw & SSW4_TMMASK) == SSW4_TMDCP) {
702		/*
703		 * Dcache push fault.
704		 * Line-align the address and write out the push data to
705		 * the indicated physical address.
706		 */
707#ifdef DEBUG
708		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid)) {
709			printf(" pushing %s to PA %x, data %x",
710			       f7sz[(f->f_ssw & SSW4_SZMASK) >> 5],
711			       f->f_fa, f->f_pd0);
712			if ((f->f_ssw & SSW4_SZMASK) == SSW4_SZLN)
713				printf("/%x/%x/%x",
714				       f->f_pd1, f->f_pd2, f->f_pd3);
715			printf("\n");
716		}
717		if (f->f_wb1s & SSW4_WBSV)
718			panic("writeback: cache push with WB1S valid");
719		wbstats.cpushes++;
720#endif
721		/*
722		 * XXX there are security problems if we attempt to do a
723		 * cache push after a signal handler has been called.
724		 */
725		if (docachepush) {
726			pmap_enter(pmap_kernel(), (vaddr_t)vmmap,
727				   trunc_page(f->f_fa), VM_PROT_WRITE, TRUE);
728			fa = (u_int)&vmmap[(f->f_fa & PGOFSET) & ~0xF];
729			bcopy((caddr_t)&f->f_pd0, (caddr_t)fa, 16);
730			DCFL(pmap_extract(pmap_kernel(), (vaddr_t)fa));
731			pmap_remove(pmap_kernel(), (vaddr_t)vmmap,
732				    (vaddr_t)&vmmap[NBPG]);
733		} else
734			printf("WARNING: pid %d(%s) uid %d: CPUSH not done\n",
735			       p->p_pid, p->p_comm, p->p_ucred->cr_uid);
736	} else if ((f->f_ssw & (SSW4_RW|SSW4_TTMASK)) == SSW4_TTM16) {
737		/*
738		 * MOVE16 fault.
739		 * Line-align the address and write out the push data to
740		 * the indicated virtual address.
741		 */
742#ifdef DEBUG
743		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
744			printf(" MOVE16 to VA %x(%x), data %x/%x/%x/%x\n",
745			       f->f_fa, f->f_fa & ~0xF, f->f_pd0, f->f_pd1,
746			       f->f_pd2, f->f_pd3);
747		if (f->f_wb1s & SSW4_WBSV)
748			panic("writeback: MOVE16 with WB1S valid");
749		wbstats.move16s++;
750#endif
751		if (KDFAULT(f->f_wb1s))
752			bcopy((caddr_t)&f->f_pd0, (caddr_t)(f->f_fa & ~0xF), 16);
753		else
754			err = suline((caddr_t)(f->f_fa & ~0xF), (caddr_t)&f->f_pd0);
755		if (err) {
756			fa = f->f_fa & ~0xF;
757#ifdef DEBUG
758			if (mmudebug & MDB_WBFAILED)
759				printf(wberrstr, p->p_pid, p->p_comm,
760				       "MOVE16", fp->f_pc, f->f_fa,
761				       f->f_fa & ~0xF, f->f_pd0);
762#endif
763		}
764	} else if (f->f_wb1s & SSW4_WBSV) {
765		/*
766		 * Writeback #1.
767		 * Position the "memory-aligned" data and write it out.
768		 */
769		u_int wb1d = f->f_wb1d;
770		int off;
771
772#ifdef DEBUG
773		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
774			dumpwb(1, f->f_wb1s, f->f_wb1a, f->f_wb1d);
775		wbstats.wb1s++;
776		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
777#endif
778		off = (f->f_wb1a & 3) * 8;
779		switch (f->f_wb1s & SSW4_SZMASK) {
780		case SSW4_SZLW:
781			if (off)
782				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
783			if (KDFAULT(f->f_wb1s))
784				*(long *)f->f_wb1a = wb1d;
785			else
786				err = suword((caddr_t)f->f_wb1a, wb1d);
787			break;
788		case SSW4_SZB:
789			off = 24 - off;
790			if (off)
791				wb1d >>= off;
792			if (KDFAULT(f->f_wb1s))
793				*(char *)f->f_wb1a = wb1d;
794			else
795				err = subyte((caddr_t)f->f_wb1a, wb1d);
796			break;
797		case SSW4_SZW:
798			off = (off + 16) % 32;
799			if (off)
800				wb1d = (wb1d >> (32 - off)) | (wb1d << off);
801			if (KDFAULT(f->f_wb1s))
802				*(short *)f->f_wb1a = wb1d;
803			else
804				err = susword((caddr_t)f->f_wb1a, wb1d);
805			break;
806		}
807		if (err) {
808			fa = f->f_wb1a;
809#ifdef DEBUG
810			if (mmudebug & MDB_WBFAILED)
811				printf(wberrstr, p->p_pid, p->p_comm,
812				       "#1", fp->f_pc, f->f_fa,
813				       f->f_wb1a, f->f_wb1d);
814#endif
815		}
816	}
817	/*
818	 * Deal with the "normal" writebacks.
819	 *
820	 * XXX writeback2 is known to reflect a LINE size writeback after
821	 * a MOVE16 was already dealt with above.  Ignore it.
822	 */
823	if (err == 0 && (f->f_wb2s & SSW4_WBSV) &&
824	    (f->f_wb2s & SSW4_SZMASK) != SSW4_SZLN) {
825#ifdef DEBUG
826		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
827			dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
828		wbstats.wb2s++;
829		wbstats.wbsize[(f->f_wb2s&SSW4_SZMASK)>>5]++;
830#endif
831		switch (f->f_wb2s & SSW4_SZMASK) {
832		case SSW4_SZLW:
833			if (KDFAULT(f->f_wb2s))
834				*(long *)f->f_wb2a = f->f_wb2d;
835			else
836				err = suword((caddr_t)f->f_wb2a, f->f_wb2d);
837			break;
838		case SSW4_SZB:
839			if (KDFAULT(f->f_wb2s))
840				*(char *)f->f_wb2a = f->f_wb2d;
841			else
842				err = subyte((caddr_t)f->f_wb2a, f->f_wb2d);
843			break;
844		case SSW4_SZW:
845			if (KDFAULT(f->f_wb2s))
846				*(short *)f->f_wb2a = f->f_wb2d;
847			else
848				err = susword((caddr_t)f->f_wb2a, f->f_wb2d);
849			break;
850		}
851		if (err) {
852			fa = f->f_wb2a;
853#ifdef DEBUG
854			if (mmudebug & MDB_WBFAILED) {
855				printf(wberrstr, p->p_pid, p->p_comm,
856				       "#2", fp->f_pc, f->f_fa,
857				       f->f_wb2a, f->f_wb2d);
858				dumpssw(f->f_ssw);
859				dumpwb(2, f->f_wb2s, f->f_wb2a, f->f_wb2d);
860			}
861#endif
862		}
863	}
864	if (err == 0 && (f->f_wb3s & SSW4_WBSV)) {
865#ifdef DEBUG
866		if ((mmudebug & MDB_WBFOLLOW) || MDB_ISPID(p->p_pid))
867			dumpwb(3, f->f_wb3s, f->f_wb3a, f->f_wb3d);
868		wbstats.wb3s++;
869		wbstats.wbsize[(f->f_wb3s&SSW4_SZMASK)>>5]++;
870#endif
871		switch (f->f_wb3s & SSW4_SZMASK) {
872		case SSW4_SZLW:
873			if (KDFAULT(f->f_wb3s))
874				*(long *)f->f_wb3a = f->f_wb3d;
875			else
876				err = suword((caddr_t)f->f_wb3a, f->f_wb3d);
877			break;
878		case SSW4_SZB:
879			if (KDFAULT(f->f_wb3s))
880				*(char *)f->f_wb3a = f->f_wb3d;
881			else
882				err = subyte((caddr_t)f->f_wb3a, f->f_wb3d);
883			break;
884		case SSW4_SZW:
885			if (KDFAULT(f->f_wb3s))
886				*(short *)f->f_wb3a = f->f_wb3d;
887			else
888				err = susword((caddr_t)f->f_wb3a, f->f_wb3d);
889			break;
890#ifdef DEBUG
891		case SSW4_SZLN:
892			panic("writeback: wb3s indicates LINE write");
893#endif
894		}
895		if (err) {
896			fa = f->f_wb3a;
897#ifdef DEBUG
898			if (mmudebug & MDB_WBFAILED)
899				printf(wberrstr, p->p_pid, p->p_comm,
900				       "#3", fp->f_pc, f->f_fa,
901				       f->f_wb3a, f->f_wb3d);
902#endif
903		}
904	}
905	p->p_addr->u_pcb.pcb_onfault = oonfault;
906	if (err)
907		err = SIGSEGV;
908	return(err);
909}
910
911#ifdef DEBUG
912dumpssw(ssw)
913	u_short ssw;
914{
915	printf(" SSW: %x: ", ssw);
916	if (ssw & SSW4_CP)
917		printf("CP,");
918	if (ssw & SSW4_CU)
919		printf("CU,");
920	if (ssw & SSW4_CT)
921		printf("CT,");
922	if (ssw & SSW4_CM)
923		printf("CM,");
924	if (ssw & SSW4_MA)
925		printf("MA,");
926	if (ssw & SSW4_ATC)
927		printf("ATC,");
928	if (ssw & SSW4_LK)
929		printf("LK,");
930	if (ssw & SSW4_RW)
931		printf("RW,");
932	printf(" SZ=%s, TT=%s, TM=%s\n",
933	       f7sz[(ssw & SSW4_SZMASK) >> 5],
934	       f7tt[(ssw & SSW4_TTMASK) >> 3],
935	       f7tm[ssw & SSW4_TMMASK]);
936}
937
938dumpwb(num, s, a, d)
939	int num;
940	u_short s;
941	u_int a, d;
942{
943	struct proc *p = curproc;
944	paddr_t pa;
945
946	printf(" writeback #%d: VA %x, data %x, SZ=%s, TT=%s, TM=%s\n",
947	       num, a, d, f7sz[(s & SSW4_SZMASK) >> 5],
948	       f7tt[(s & SSW4_TTMASK) >> 3], f7tm[s & SSW4_TMMASK]);
949	printf("	       PA ");
950	pa = pmap_extract(p->p_vmspace->vm_map.pmap, (vaddr_t)a);
951	if (pa == 0)
952		printf("<invalid address>");
953	else
954		printf("%x, current value %x", pa, fuword((caddr_t)a));
955	printf("\n");
956}
957#endif
958#endif
959
960/*
961 * Process a system call.
962 */
963syscall(code, frame)
964	int code;
965	struct frame frame;
966{
967	caddr_t params;
968	struct sysent *callp;
969	struct proc *p;
970	int error, opc, nsys;
971	size_t argsize;
972	int args[8], rval[2];
973	u_quad_t sticks;
974
975#if defined(UVM)
976	uvmexp.syscalls++;
977#else
978	cnt.v_syscall++;
979#endif
980	if (!USERMODE(frame.f_sr))
981		panic("syscall");
982	p = curproc;
983	sticks = p->p_sticks;
984	p->p_md.md_regs = frame.f_regs;
985	opc = frame.f_pc;
986
987	nsys = p->p_emul->e_nsysent;
988	callp = p->p_emul->e_sysent;
989
990#ifdef COMPAT_SUNOS
991	if (p->p_emul == &emul_sunos) {
992		/*
993		 * SunOS passes the syscall-number on the stack, whereas
994		 * BSD passes it in D0. So, we have to get the real "code"
995		 * from the stack, and clean up the stack, as SunOS glue
996		 * code assumes the kernel pops the syscall argument the
997		 * glue pushed on the stack. Sigh...
998		 */
999		code = fuword((caddr_t)frame.f_regs[SP]);
1000
1001		/*
1002		 * XXX
1003		 * Don't do this for sunos_sigreturn, as there's no stored pc
1004		 * on the stack to skip, the argument follows the syscall
1005		 * number without a gap.
1006		 */
1007		if (code != SUNOS_SYS_sigreturn) {
1008			frame.f_regs[SP] += sizeof (int);
1009			/*
1010			 * remember that we adjusted the SP,
1011			 * might have to undo this if the system call
1012			 * returns ERESTART.
1013			 */
1014			p->p_md.md_flags |= MDP_STACKADJ;
1015		} else
1016			p->p_md.md_flags &= ~MDP_STACKADJ;
1017	}
1018#endif
1019
1020	params = (caddr_t)frame.f_regs[SP] + sizeof(int);
1021
1022	switch (code) {
1023	case SYS_syscall:
1024		/*
1025		 * Code is first argument, followed by actual args.
1026		 */
1027		code = fuword(params);
1028		params += sizeof(int);
1029		/*
1030		 * XXX sigreturn requires special stack manipulation
1031		 * that is only done if entered via the sigreturn
1032		 * trap.  Cannot allow it here so make sure we fail.
1033		 */
1034		switch (code) {
1035#ifdef COMPAT_13
1036		case SYS_compat_13_sigreturn13:
1037#endif
1038		case SYS___sigreturn14:
1039			code = nsys;
1040			break;
1041		}
1042		break;
1043	case SYS___syscall:
1044		/*
1045		 * Like syscall, but code is a quad, so as to maintain
1046		 * quad alignment for the rest of the arguments.
1047		 */
1048		if (callp != sysent)
1049			break;
1050		code = fuword(params + _QUAD_LOWWORD * sizeof(int));
1051		params += sizeof(quad_t);
1052		break;
1053	default:
1054		break;
1055	}
1056	if (code < 0 || code >= nsys)
1057		callp += p->p_emul->e_nosys;		/* illegal */
1058	else
1059		callp += code;
1060	argsize = callp->sy_argsize;
1061#ifdef COMPAT_LINUX
1062	if (0
1063# ifdef EXEC_AOUT
1064	    || p->p_emul == &emul_linux_aout
1065# endif
1066# ifdef EXEC_ELF32
1067	    || p->p_emul == &emul_linux_elf32
1068# endif
1069	     ) {
1070		/*
1071		 * Linux passes the args in d1-d5
1072		 */
1073		switch (argsize) {
1074		case 20:
1075			args[4] = frame.f_regs[D5];
1076		case 16:
1077			args[3] = frame.f_regs[D4];
1078		case 12:
1079			args[2] = frame.f_regs[D3];
1080		case 8:
1081			args[1] = frame.f_regs[D2];
1082		case 4:
1083			args[0] = frame.f_regs[D1];
1084		case 0:
1085			error = 0;
1086			break;
1087		default:
1088#ifdef DEBUG
1089			panic("linux syscall %d weird argsize %d",
1090				code, argsize);
1091#else
1092			error = EINVAL;
1093#endif
1094			break;
1095		}
1096	} else
1097#endif
1098	if (argsize)
1099		error = copyin(params, (caddr_t)args, argsize);
1100	else
1101		error = 0;
1102#ifdef SYSCALL_DEBUG
1103	scdebug_call(p, code, args);
1104#endif
1105#ifdef KTRACE
1106	if (KTRPOINT(p, KTR_SYSCALL))
1107		ktrsyscall(p->p_tracep, code, argsize, args);
1108#endif
1109	if (error)
1110		goto bad;
1111	rval[0] = 0;
1112	rval[1] = frame.f_regs[D1];
1113	error = (*callp->sy_call)(p, args, rval);
1114	switch (error) {
1115	case 0:
1116		frame.f_regs[D0] = rval[0];
1117		frame.f_regs[D1] = rval[1];
1118		frame.f_sr &= ~PSL_C;	/* carry bit */
1119		break;
1120	case ERESTART:
1121		/*
1122		 * We always enter through a `trap' instruction, which is 2
1123		 * bytes, so adjust the pc by that amount.
1124		 */
1125		frame.f_pc = opc - 2;
1126		break;
1127	case EJUSTRETURN:
1128		/* nothing to do */
1129		break;
1130	default:
1131	bad:
1132		if (p->p_emul->e_errno)
1133			error = p->p_emul->e_errno[error];
1134		frame.f_regs[D0] = error;
1135		frame.f_sr |= PSL_C;	/* carry bit */
1136		break;
1137	}
1138
1139#ifdef SYSCALL_DEBUG
1140	scdebug_ret(p, code, error, rval);
1141#endif
1142#ifdef COMPAT_SUNOS
1143	/* need new p-value for this */
1144	if (error == ERESTART && (p->p_md.md_flags & MDP_STACKADJ))
1145		frame.f_regs[SP] -= sizeof (int);
1146#endif
1147	userret(p, &frame, sticks, (u_int)0, 0);
1148#ifdef KTRACE
1149	if (KTRPOINT(p, KTR_SYSRET))
1150		ktrsysret(p->p_tracep, code, error, rval[0]);
1151#endif
1152}
1153
1154void
1155child_return(arg)
1156	void *arg;
1157{
1158	struct proc *p = arg;
1159	/* See cpu_fork() */
1160	struct frame *f = (struct frame *)p->p_md.md_regs;
1161
1162	f->f_regs[D0] = 0;
1163	f->f_sr &= ~PSL_C;
1164	f->f_format = FMT0;
1165
1166	userret(p, f, p->p_sticks, (u_int)0, 0);
1167#ifdef KTRACE
1168	if (KTRPOINT(p, KTR_SYSRET))
1169		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
1170#endif
1171}
1172
1173/*
1174 * Allocation routines for software interrupts.
1175 */
1176u_long
1177allocate_sir(proc, arg)
1178	void (*proc)();
1179	void *arg;
1180{
1181	int bit;
1182
1183	if( next_sir >= NSIR )
1184		panic("allocate_sir: none left");
1185	bit = next_sir++;
1186	sir_routines[bit] = proc;
1187	sir_args[bit] = arg;
1188	return (1 << bit);
1189}
1190
1191void
1192init_sir()
1193{
1194	extern void netintr();
1195
1196	sir_routines[0] = netintr;
1197	sir_routines[1] = softclock;
1198	next_sir = 2;
1199}
1200