]> git.karo-electronics.de Git - karo-tx-linux.git/blob - arch/x86/kernel/vm86_32.c
Merge remote-tracking branch 'mkp-scsi/4.11/scsi-fixes' into fixes
[karo-tx-linux.git] / arch / x86 / kernel / vm86_32.c
1 /*
2  *  Copyright (C) 1994  Linus Torvalds
3  *
4  *  29 dec 2001 - Fixed oopses caused by unchecked access to the vm86
5  *                stack - Manfred Spraul <manfred@colorfullife.com>
6  *
7  *  22 mar 2002 - Manfred detected the stackfaults, but didn't handle
8  *                them correctly. Now the emulation will be in a
9  *                consistent state after stackfaults - Kasper Dupont
10  *                <kasperd@daimi.au.dk>
11  *
12  *  22 mar 2002 - Added missing clear_IF in set_vflags_* Kasper Dupont
13  *                <kasperd@daimi.au.dk>
14  *
15  *  ?? ??? 2002 - Fixed premature returns from handle_vm86_fault
16  *                caused by Kasper Dupont's changes - Stas Sergeev
17  *
18  *   4 apr 2002 - Fixed CHECK_IF_IN_TRAP broken by Stas' changes.
19  *                Kasper Dupont <kasperd@daimi.au.dk>
20  *
21  *   9 apr 2002 - Changed syntax of macros in handle_vm86_fault.
22  *                Kasper Dupont <kasperd@daimi.au.dk>
23  *
24  *   9 apr 2002 - Changed stack access macros to jump to a label
25  *                instead of returning to userspace. This simplifies
26  *                do_int, and is needed by handle_vm6_fault. Kasper
27  *                Dupont <kasperd@daimi.au.dk>
28  *
29  */
30
31 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
32
33 #include <linux/capability.h>
34 #include <linux/errno.h>
35 #include <linux/interrupt.h>
36 #include <linux/syscalls.h>
37 #include <linux/sched.h>
38 #include <linux/sched/task_stack.h>
39 #include <linux/kernel.h>
40 #include <linux/signal.h>
41 #include <linux/string.h>
42 #include <linux/mm.h>
43 #include <linux/smp.h>
44 #include <linux/highmem.h>
45 #include <linux/ptrace.h>
46 #include <linux/audit.h>
47 #include <linux/stddef.h>
48 #include <linux/slab.h>
49 #include <linux/security.h>
50
51 #include <linux/uaccess.h>
52 #include <asm/io.h>
53 #include <asm/tlbflush.h>
54 #include <asm/irq.h>
55 #include <asm/traps.h>
56 #include <asm/vm86.h>
57
58 /*
59  * Known problems:
60  *
61  * Interrupt handling is not guaranteed:
62  * - a real x86 will disable all interrupts for one instruction
63  *   after a "mov ss,xx" to make stack handling atomic even without
64  *   the 'lss' instruction. We can't guarantee this in v86 mode,
65  *   as the next instruction might result in a page fault or similar.
66  * - a real x86 will have interrupts disabled for one instruction
67  *   past the 'sti' that enables them. We don't bother with all the
68  *   details yet.
69  *
70  * Let's hope these problems do not actually matter for anything.
71  */
72
73
74 /*
75  * 8- and 16-bit register defines..
76  */
77 #define AL(regs)        (((unsigned char *)&((regs)->pt.ax))[0])
78 #define AH(regs)        (((unsigned char *)&((regs)->pt.ax))[1])
79 #define IP(regs)        (*(unsigned short *)&((regs)->pt.ip))
80 #define SP(regs)        (*(unsigned short *)&((regs)->pt.sp))
81
82 /*
83  * virtual flags (16 and 32-bit versions)
84  */
85 #define VFLAGS  (*(unsigned short *)&(current->thread.vm86->veflags))
86 #define VEFLAGS (current->thread.vm86->veflags)
87
88 #define set_flags(X, new, mask) \
89 ((X) = ((X) & ~(mask)) | ((new) & (mask)))
90
91 #define SAFE_MASK       (0xDD5)
92 #define RETURN_MASK     (0xDFF)
93
94 void save_v86_state(struct kernel_vm86_regs *regs, int retval)
95 {
96         struct tss_struct *tss;
97         struct task_struct *tsk = current;
98         struct vm86plus_struct __user *user;
99         struct vm86 *vm86 = current->thread.vm86;
100         long err = 0;
101
102         /*
103          * This gets called from entry.S with interrupts disabled, but
104          * from process context. Enable interrupts here, before trying
105          * to access user space.
106          */
107         local_irq_enable();
108
109         if (!vm86 || !vm86->user_vm86) {
110                 pr_alert("no user_vm86: BAD\n");
111                 do_exit(SIGSEGV);
112         }
113         set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
114         user = vm86->user_vm86;
115
116         if (!access_ok(VERIFY_WRITE, user, vm86->vm86plus.is_vm86pus ?
117                        sizeof(struct vm86plus_struct) :
118                        sizeof(struct vm86_struct))) {
119                 pr_alert("could not access userspace vm86 info\n");
120                 do_exit(SIGSEGV);
121         }
122
123         put_user_try {
124                 put_user_ex(regs->pt.bx, &user->regs.ebx);
125                 put_user_ex(regs->pt.cx, &user->regs.ecx);
126                 put_user_ex(regs->pt.dx, &user->regs.edx);
127                 put_user_ex(regs->pt.si, &user->regs.esi);
128                 put_user_ex(regs->pt.di, &user->regs.edi);
129                 put_user_ex(regs->pt.bp, &user->regs.ebp);
130                 put_user_ex(regs->pt.ax, &user->regs.eax);
131                 put_user_ex(regs->pt.ip, &user->regs.eip);
132                 put_user_ex(regs->pt.cs, &user->regs.cs);
133                 put_user_ex(regs->pt.flags, &user->regs.eflags);
134                 put_user_ex(regs->pt.sp, &user->regs.esp);
135                 put_user_ex(regs->pt.ss, &user->regs.ss);
136                 put_user_ex(regs->es, &user->regs.es);
137                 put_user_ex(regs->ds, &user->regs.ds);
138                 put_user_ex(regs->fs, &user->regs.fs);
139                 put_user_ex(regs->gs, &user->regs.gs);
140
141                 put_user_ex(vm86->screen_bitmap, &user->screen_bitmap);
142         } put_user_catch(err);
143         if (err) {
144                 pr_alert("could not access userspace vm86 info\n");
145                 do_exit(SIGSEGV);
146         }
147
148         tss = &per_cpu(cpu_tss, get_cpu());
149         tsk->thread.sp0 = vm86->saved_sp0;
150         tsk->thread.sysenter_cs = __KERNEL_CS;
151         load_sp0(tss, &tsk->thread);
152         vm86->saved_sp0 = 0;
153         put_cpu();
154
155         memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
156
157         lazy_load_gs(vm86->regs32.gs);
158
159         regs->pt.ax = retval;
160 }
161
162 static void mark_screen_rdonly(struct mm_struct *mm)
163 {
164         struct vm_area_struct *vma;
165         spinlock_t *ptl;
166         pgd_t *pgd;
167         pud_t *pud;
168         pmd_t *pmd;
169         pte_t *pte;
170         int i;
171
172         down_write(&mm->mmap_sem);
173         pgd = pgd_offset(mm, 0xA0000);
174         if (pgd_none_or_clear_bad(pgd))
175                 goto out;
176         pud = pud_offset(pgd, 0xA0000);
177         if (pud_none_or_clear_bad(pud))
178                 goto out;
179         pmd = pmd_offset(pud, 0xA0000);
180
181         if (pmd_trans_huge(*pmd)) {
182                 vma = find_vma(mm, 0xA0000);
183                 split_huge_pmd(vma, pmd, 0xA0000);
184         }
185         if (pmd_none_or_clear_bad(pmd))
186                 goto out;
187         pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl);
188         for (i = 0; i < 32; i++) {
189                 if (pte_present(*pte))
190                         set_pte(pte, pte_wrprotect(*pte));
191                 pte++;
192         }
193         pte_unmap_unlock(pte, ptl);
194 out:
195         up_write(&mm->mmap_sem);
196         flush_tlb();
197 }
198
199
200
201 static int do_vm86_irq_handling(int subfunction, int irqnumber);
202 static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus);
203
204 SYSCALL_DEFINE1(vm86old, struct vm86_struct __user *, user_vm86)
205 {
206         return do_sys_vm86((struct vm86plus_struct __user *) user_vm86, false);
207 }
208
209
210 SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
211 {
212         switch (cmd) {
213         case VM86_REQUEST_IRQ:
214         case VM86_FREE_IRQ:
215         case VM86_GET_IRQ_BITS:
216         case VM86_GET_AND_RESET_IRQ:
217                 return do_vm86_irq_handling(cmd, (int)arg);
218         case VM86_PLUS_INSTALL_CHECK:
219                 /*
220                  * NOTE: on old vm86 stuff this will return the error
221                  *  from access_ok(), because the subfunction is
222                  *  interpreted as (invalid) address to vm86_struct.
223                  *  So the installation check works.
224                  */
225                 return 0;
226         }
227
228         /* we come here only for functions VM86_ENTER, VM86_ENTER_NO_BYPASS */
229         return do_sys_vm86((struct vm86plus_struct __user *) arg, true);
230 }
231
232
233 static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
234 {
235         struct tss_struct *tss;
236         struct task_struct *tsk = current;
237         struct vm86 *vm86 = tsk->thread.vm86;
238         struct kernel_vm86_regs vm86regs;
239         struct pt_regs *regs = current_pt_regs();
240         unsigned long err = 0;
241
242         err = security_mmap_addr(0);
243         if (err) {
244                 /*
245                  * vm86 cannot virtualize the address space, so vm86 users
246                  * need to manage the low 1MB themselves using mmap.  Given
247                  * that BIOS places important data in the first page, vm86
248                  * is essentially useless if mmap_min_addr != 0.  DOSEMU,
249                  * for example, won't even bother trying to use vm86 if it
250                  * can't map a page at virtual address 0.
251                  *
252                  * To reduce the available kernel attack surface, simply
253                  * disallow vm86(old) for users who cannot mmap at va 0.
254                  *
255                  * The implementation of security_mmap_addr will allow
256                  * suitably privileged users to map va 0 even if
257                  * vm.mmap_min_addr is set above 0, and we want this
258                  * behavior for vm86 as well, as it ensures that legacy
259                  * tools like vbetool will not fail just because of
260                  * vm.mmap_min_addr.
261                  */
262                 pr_info_once("Denied a call to vm86(old) from %s[%d] (uid: %d).  Set the vm.mmap_min_addr sysctl to 0 and/or adjust LSM mmap_min_addr policy to enable vm86 if you are using a vm86-based DOS emulator.\n",
263                              current->comm, task_pid_nr(current),
264                              from_kuid_munged(&init_user_ns, current_uid()));
265                 return -EPERM;
266         }
267
268         if (!vm86) {
269                 if (!(vm86 = kzalloc(sizeof(*vm86), GFP_KERNEL)))
270                         return -ENOMEM;
271                 tsk->thread.vm86 = vm86;
272         }
273         if (vm86->saved_sp0)
274                 return -EPERM;
275
276         if (!access_ok(VERIFY_READ, user_vm86, plus ?
277                        sizeof(struct vm86_struct) :
278                        sizeof(struct vm86plus_struct)))
279                 return -EFAULT;
280
281         memset(&vm86regs, 0, sizeof(vm86regs));
282         get_user_try {
283                 unsigned short seg;
284                 get_user_ex(vm86regs.pt.bx, &user_vm86->regs.ebx);
285                 get_user_ex(vm86regs.pt.cx, &user_vm86->regs.ecx);
286                 get_user_ex(vm86regs.pt.dx, &user_vm86->regs.edx);
287                 get_user_ex(vm86regs.pt.si, &user_vm86->regs.esi);
288                 get_user_ex(vm86regs.pt.di, &user_vm86->regs.edi);
289                 get_user_ex(vm86regs.pt.bp, &user_vm86->regs.ebp);
290                 get_user_ex(vm86regs.pt.ax, &user_vm86->regs.eax);
291                 get_user_ex(vm86regs.pt.ip, &user_vm86->regs.eip);
292                 get_user_ex(seg, &user_vm86->regs.cs);
293                 vm86regs.pt.cs = seg;
294                 get_user_ex(vm86regs.pt.flags, &user_vm86->regs.eflags);
295                 get_user_ex(vm86regs.pt.sp, &user_vm86->regs.esp);
296                 get_user_ex(seg, &user_vm86->regs.ss);
297                 vm86regs.pt.ss = seg;
298                 get_user_ex(vm86regs.es, &user_vm86->regs.es);
299                 get_user_ex(vm86regs.ds, &user_vm86->regs.ds);
300                 get_user_ex(vm86regs.fs, &user_vm86->regs.fs);
301                 get_user_ex(vm86regs.gs, &user_vm86->regs.gs);
302
303                 get_user_ex(vm86->flags, &user_vm86->flags);
304                 get_user_ex(vm86->screen_bitmap, &user_vm86->screen_bitmap);
305                 get_user_ex(vm86->cpu_type, &user_vm86->cpu_type);
306         } get_user_catch(err);
307         if (err)
308                 return err;
309
310         if (copy_from_user(&vm86->int_revectored,
311                            &user_vm86->int_revectored,
312                            sizeof(struct revectored_struct)))
313                 return -EFAULT;
314         if (copy_from_user(&vm86->int21_revectored,
315                            &user_vm86->int21_revectored,
316                            sizeof(struct revectored_struct)))
317                 return -EFAULT;
318         if (plus) {
319                 if (copy_from_user(&vm86->vm86plus, &user_vm86->vm86plus,
320                                    sizeof(struct vm86plus_info_struct)))
321                         return -EFAULT;
322                 vm86->vm86plus.is_vm86pus = 1;
323         } else
324                 memset(&vm86->vm86plus, 0,
325                        sizeof(struct vm86plus_info_struct));
326
327         memcpy(&vm86->regs32, regs, sizeof(struct pt_regs));
328         vm86->user_vm86 = user_vm86;
329
330 /*
331  * The flags register is also special: we cannot trust that the user
332  * has set it up safely, so this makes sure interrupt etc flags are
333  * inherited from protected mode.
334  */
335         VEFLAGS = vm86regs.pt.flags;
336         vm86regs.pt.flags &= SAFE_MASK;
337         vm86regs.pt.flags |= regs->flags & ~SAFE_MASK;
338         vm86regs.pt.flags |= X86_VM_MASK;
339
340         vm86regs.pt.orig_ax = regs->orig_ax;
341
342         switch (vm86->cpu_type) {
343         case CPU_286:
344                 vm86->veflags_mask = 0;
345                 break;
346         case CPU_386:
347                 vm86->veflags_mask = X86_EFLAGS_NT | X86_EFLAGS_IOPL;
348                 break;
349         case CPU_486:
350                 vm86->veflags_mask = X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
351                 break;
352         default:
353                 vm86->veflags_mask = X86_EFLAGS_ID | X86_EFLAGS_AC | X86_EFLAGS_NT | X86_EFLAGS_IOPL;
354                 break;
355         }
356
357 /*
358  * Save old state
359  */
360         vm86->saved_sp0 = tsk->thread.sp0;
361         lazy_save_gs(vm86->regs32.gs);
362
363         tss = &per_cpu(cpu_tss, get_cpu());
364         /* make room for real-mode segments */
365         tsk->thread.sp0 += 16;
366
367         if (static_cpu_has(X86_FEATURE_SEP))
368                 tsk->thread.sysenter_cs = 0;
369
370         load_sp0(tss, &tsk->thread);
371         put_cpu();
372
373         if (vm86->flags & VM86_SCREEN_BITMAP)
374                 mark_screen_rdonly(tsk->mm);
375
376         memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs));
377         force_iret();
378         return regs->ax;
379 }
380
381 static inline void set_IF(struct kernel_vm86_regs *regs)
382 {
383         VEFLAGS |= X86_EFLAGS_VIF;
384 }
385
386 static inline void clear_IF(struct kernel_vm86_regs *regs)
387 {
388         VEFLAGS &= ~X86_EFLAGS_VIF;
389 }
390
391 static inline void clear_TF(struct kernel_vm86_regs *regs)
392 {
393         regs->pt.flags &= ~X86_EFLAGS_TF;
394 }
395
396 static inline void clear_AC(struct kernel_vm86_regs *regs)
397 {
398         regs->pt.flags &= ~X86_EFLAGS_AC;
399 }
400
401 /*
402  * It is correct to call set_IF(regs) from the set_vflags_*
403  * functions. However someone forgot to call clear_IF(regs)
404  * in the opposite case.
405  * After the command sequence CLI PUSHF STI POPF you should
406  * end up with interrupts disabled, but you ended up with
407  * interrupts enabled.
408  *  ( I was testing my own changes, but the only bug I
409  *    could find was in a function I had not changed. )
410  * [KD]
411  */
412
413 static inline void set_vflags_long(unsigned long flags, struct kernel_vm86_regs *regs)
414 {
415         set_flags(VEFLAGS, flags, current->thread.vm86->veflags_mask);
416         set_flags(regs->pt.flags, flags, SAFE_MASK);
417         if (flags & X86_EFLAGS_IF)
418                 set_IF(regs);
419         else
420                 clear_IF(regs);
421 }
422
423 static inline void set_vflags_short(unsigned short flags, struct kernel_vm86_regs *regs)
424 {
425         set_flags(VFLAGS, flags, current->thread.vm86->veflags_mask);
426         set_flags(regs->pt.flags, flags, SAFE_MASK);
427         if (flags & X86_EFLAGS_IF)
428                 set_IF(regs);
429         else
430                 clear_IF(regs);
431 }
432
433 static inline unsigned long get_vflags(struct kernel_vm86_regs *regs)
434 {
435         unsigned long flags = regs->pt.flags & RETURN_MASK;
436
437         if (VEFLAGS & X86_EFLAGS_VIF)
438                 flags |= X86_EFLAGS_IF;
439         flags |= X86_EFLAGS_IOPL;
440         return flags | (VEFLAGS & current->thread.vm86->veflags_mask);
441 }
442
443 static inline int is_revectored(int nr, struct revectored_struct *bitmap)
444 {
445         return test_bit(nr, bitmap->__map);
446 }
447
448 #define val_byte(val, n) (((__u8 *)&val)[n])
449
450 #define pushb(base, ptr, val, err_label) \
451         do { \
452                 __u8 __val = val; \
453                 ptr--; \
454                 if (put_user(__val, base + ptr) < 0) \
455                         goto err_label; \
456         } while (0)
457
458 #define pushw(base, ptr, val, err_label) \
459         do { \
460                 __u16 __val = val; \
461                 ptr--; \
462                 if (put_user(val_byte(__val, 1), base + ptr) < 0) \
463                         goto err_label; \
464                 ptr--; \
465                 if (put_user(val_byte(__val, 0), base + ptr) < 0) \
466                         goto err_label; \
467         } while (0)
468
469 #define pushl(base, ptr, val, err_label) \
470         do { \
471                 __u32 __val = val; \
472                 ptr--; \
473                 if (put_user(val_byte(__val, 3), base + ptr) < 0) \
474                         goto err_label; \
475                 ptr--; \
476                 if (put_user(val_byte(__val, 2), base + ptr) < 0) \
477                         goto err_label; \
478                 ptr--; \
479                 if (put_user(val_byte(__val, 1), base + ptr) < 0) \
480                         goto err_label; \
481                 ptr--; \
482                 if (put_user(val_byte(__val, 0), base + ptr) < 0) \
483                         goto err_label; \
484         } while (0)
485
486 #define popb(base, ptr, err_label) \
487         ({ \
488                 __u8 __res; \
489                 if (get_user(__res, base + ptr) < 0) \
490                         goto err_label; \
491                 ptr++; \
492                 __res; \
493         })
494
495 #define popw(base, ptr, err_label) \
496         ({ \
497                 __u16 __res; \
498                 if (get_user(val_byte(__res, 0), base + ptr) < 0) \
499                         goto err_label; \
500                 ptr++; \
501                 if (get_user(val_byte(__res, 1), base + ptr) < 0) \
502                         goto err_label; \
503                 ptr++; \
504                 __res; \
505         })
506
507 #define popl(base, ptr, err_label) \
508         ({ \
509                 __u32 __res; \
510                 if (get_user(val_byte(__res, 0), base + ptr) < 0) \
511                         goto err_label; \
512                 ptr++; \
513                 if (get_user(val_byte(__res, 1), base + ptr) < 0) \
514                         goto err_label; \
515                 ptr++; \
516                 if (get_user(val_byte(__res, 2), base + ptr) < 0) \
517                         goto err_label; \
518                 ptr++; \
519                 if (get_user(val_byte(__res, 3), base + ptr) < 0) \
520                         goto err_label; \
521                 ptr++; \
522                 __res; \
523         })
524
525 /* There are so many possible reasons for this function to return
526  * VM86_INTx, so adding another doesn't bother me. We can expect
527  * userspace programs to be able to handle it. (Getting a problem
528  * in userspace is always better than an Oops anyway.) [KD]
529  */
530 static void do_int(struct kernel_vm86_regs *regs, int i,
531     unsigned char __user *ssp, unsigned short sp)
532 {
533         unsigned long __user *intr_ptr;
534         unsigned long segoffs;
535         struct vm86 *vm86 = current->thread.vm86;
536
537         if (regs->pt.cs == BIOSSEG)
538                 goto cannot_handle;
539         if (is_revectored(i, &vm86->int_revectored))
540                 goto cannot_handle;
541         if (i == 0x21 && is_revectored(AH(regs), &vm86->int21_revectored))
542                 goto cannot_handle;
543         intr_ptr = (unsigned long __user *) (i << 2);
544         if (get_user(segoffs, intr_ptr))
545                 goto cannot_handle;
546         if ((segoffs >> 16) == BIOSSEG)
547                 goto cannot_handle;
548         pushw(ssp, sp, get_vflags(regs), cannot_handle);
549         pushw(ssp, sp, regs->pt.cs, cannot_handle);
550         pushw(ssp, sp, IP(regs), cannot_handle);
551         regs->pt.cs = segoffs >> 16;
552         SP(regs) -= 6;
553         IP(regs) = segoffs & 0xffff;
554         clear_TF(regs);
555         clear_IF(regs);
556         clear_AC(regs);
557         return;
558
559 cannot_handle:
560         save_v86_state(regs, VM86_INTx + (i << 8));
561 }
562
563 int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
564 {
565         struct vm86 *vm86 = current->thread.vm86;
566
567         if (vm86->vm86plus.is_vm86pus) {
568                 if ((trapno == 3) || (trapno == 1)) {
569                         save_v86_state(regs, VM86_TRAP + (trapno << 8));
570                         return 0;
571                 }
572                 do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
573                 return 0;
574         }
575         if (trapno != 1)
576                 return 1; /* we let this handle by the calling routine */
577         current->thread.trap_nr = trapno;
578         current->thread.error_code = error_code;
579         force_sig(SIGTRAP, current);
580         return 0;
581 }
582
583 void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code)
584 {
585         unsigned char opcode;
586         unsigned char __user *csp;
587         unsigned char __user *ssp;
588         unsigned short ip, sp, orig_flags;
589         int data32, pref_done;
590         struct vm86plus_info_struct *vmpi = &current->thread.vm86->vm86plus;
591
592 #define CHECK_IF_IN_TRAP \
593         if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \
594                 newflags |= X86_EFLAGS_TF
595
596         orig_flags = *(unsigned short *)&regs->pt.flags;
597
598         csp = (unsigned char __user *) (regs->pt.cs << 4);
599         ssp = (unsigned char __user *) (regs->pt.ss << 4);
600         sp = SP(regs);
601         ip = IP(regs);
602
603         data32 = 0;
604         pref_done = 0;
605         do {
606                 switch (opcode = popb(csp, ip, simulate_sigsegv)) {
607                 case 0x66:      /* 32-bit data */     data32 = 1; break;
608                 case 0x67:      /* 32-bit address */  break;
609                 case 0x2e:      /* CS */              break;
610                 case 0x3e:      /* DS */              break;
611                 case 0x26:      /* ES */              break;
612                 case 0x36:      /* SS */              break;
613                 case 0x65:      /* GS */              break;
614                 case 0x64:      /* FS */              break;
615                 case 0xf2:      /* repnz */       break;
616                 case 0xf3:      /* rep */             break;
617                 default: pref_done = 1;
618                 }
619         } while (!pref_done);
620
621         switch (opcode) {
622
623         /* pushf */
624         case 0x9c:
625                 if (data32) {
626                         pushl(ssp, sp, get_vflags(regs), simulate_sigsegv);
627                         SP(regs) -= 4;
628                 } else {
629                         pushw(ssp, sp, get_vflags(regs), simulate_sigsegv);
630                         SP(regs) -= 2;
631                 }
632                 IP(regs) = ip;
633                 goto vm86_fault_return;
634
635         /* popf */
636         case 0x9d:
637                 {
638                 unsigned long newflags;
639                 if (data32) {
640                         newflags = popl(ssp, sp, simulate_sigsegv);
641                         SP(regs) += 4;
642                 } else {
643                         newflags = popw(ssp, sp, simulate_sigsegv);
644                         SP(regs) += 2;
645                 }
646                 IP(regs) = ip;
647                 CHECK_IF_IN_TRAP;
648                 if (data32)
649                         set_vflags_long(newflags, regs);
650                 else
651                         set_vflags_short(newflags, regs);
652
653                 goto check_vip;
654                 }
655
656         /* int xx */
657         case 0xcd: {
658                 int intno = popb(csp, ip, simulate_sigsegv);
659                 IP(regs) = ip;
660                 if (vmpi->vm86dbg_active) {
661                         if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) {
662                                 save_v86_state(regs, VM86_INTx + (intno << 8));
663                                 return;
664                         }
665                 }
666                 do_int(regs, intno, ssp, sp);
667                 return;
668         }
669
670         /* iret */
671         case 0xcf:
672                 {
673                 unsigned long newip;
674                 unsigned long newcs;
675                 unsigned long newflags;
676                 if (data32) {
677                         newip = popl(ssp, sp, simulate_sigsegv);
678                         newcs = popl(ssp, sp, simulate_sigsegv);
679                         newflags = popl(ssp, sp, simulate_sigsegv);
680                         SP(regs) += 12;
681                 } else {
682                         newip = popw(ssp, sp, simulate_sigsegv);
683                         newcs = popw(ssp, sp, simulate_sigsegv);
684                         newflags = popw(ssp, sp, simulate_sigsegv);
685                         SP(regs) += 6;
686                 }
687                 IP(regs) = newip;
688                 regs->pt.cs = newcs;
689                 CHECK_IF_IN_TRAP;
690                 if (data32) {
691                         set_vflags_long(newflags, regs);
692                 } else {
693                         set_vflags_short(newflags, regs);
694                 }
695                 goto check_vip;
696                 }
697
698         /* cli */
699         case 0xfa:
700                 IP(regs) = ip;
701                 clear_IF(regs);
702                 goto vm86_fault_return;
703
704         /* sti */
705         /*
706          * Damn. This is incorrect: the 'sti' instruction should actually
707          * enable interrupts after the /next/ instruction. Not good.
708          *
709          * Probably needs some horsing around with the TF flag. Aiee..
710          */
711         case 0xfb:
712                 IP(regs) = ip;
713                 set_IF(regs);
714                 goto check_vip;
715
716         default:
717                 save_v86_state(regs, VM86_UNKNOWN);
718         }
719
720         return;
721
722 check_vip:
723         if (VEFLAGS & X86_EFLAGS_VIP) {
724                 save_v86_state(regs, VM86_STI);
725                 return;
726         }
727
728 vm86_fault_return:
729         if (vmpi->force_return_for_pic  && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) {
730                 save_v86_state(regs, VM86_PICRETURN);
731                 return;
732         }
733         if (orig_flags & X86_EFLAGS_TF)
734                 handle_vm86_trap(regs, 0, X86_TRAP_DB);
735         return;
736
737 simulate_sigsegv:
738         /* FIXME: After a long discussion with Stas we finally
739          *        agreed, that this is wrong. Here we should
740          *        really send a SIGSEGV to the user program.
741          *        But how do we create the correct context? We
742          *        are inside a general protection fault handler
743          *        and has just returned from a page fault handler.
744          *        The correct context for the signal handler
745          *        should be a mixture of the two, but how do we
746          *        get the information? [KD]
747          */
748         save_v86_state(regs, VM86_UNKNOWN);
749 }
750
751 /* ---------------- vm86 special IRQ passing stuff ----------------- */
752
753 #define VM86_IRQNAME            "vm86irq"
754
755 static struct vm86_irqs {
756         struct task_struct *tsk;
757         int sig;
758 } vm86_irqs[16];
759
760 static DEFINE_SPINLOCK(irqbits_lock);
761 static int irqbits;
762
763 #define ALLOWED_SIGS (1 /* 0 = don't send a signal */ \
764         | (1 << SIGUSR1) | (1 << SIGUSR2) | (1 << SIGIO)  | (1 << SIGURG) \
765         | (1 << SIGUNUSED))
766
767 static irqreturn_t irq_handler(int intno, void *dev_id)
768 {
769         int irq_bit;
770         unsigned long flags;
771
772         spin_lock_irqsave(&irqbits_lock, flags);
773         irq_bit = 1 << intno;
774         if ((irqbits & irq_bit) || !vm86_irqs[intno].tsk)
775                 goto out;
776         irqbits |= irq_bit;
777         if (vm86_irqs[intno].sig)
778                 send_sig(vm86_irqs[intno].sig, vm86_irqs[intno].tsk, 1);
779         /*
780          * IRQ will be re-enabled when user asks for the irq (whether
781          * polling or as a result of the signal)
782          */
783         disable_irq_nosync(intno);
784         spin_unlock_irqrestore(&irqbits_lock, flags);
785         return IRQ_HANDLED;
786
787 out:
788         spin_unlock_irqrestore(&irqbits_lock, flags);
789         return IRQ_NONE;
790 }
791
792 static inline void free_vm86_irq(int irqnumber)
793 {
794         unsigned long flags;
795
796         free_irq(irqnumber, NULL);
797         vm86_irqs[irqnumber].tsk = NULL;
798
799         spin_lock_irqsave(&irqbits_lock, flags);
800         irqbits &= ~(1 << irqnumber);
801         spin_unlock_irqrestore(&irqbits_lock, flags);
802 }
803
804 void release_vm86_irqs(struct task_struct *task)
805 {
806         int i;
807         for (i = FIRST_VM86_IRQ ; i <= LAST_VM86_IRQ; i++)
808             if (vm86_irqs[i].tsk == task)
809                 free_vm86_irq(i);
810 }
811
812 static inline int get_and_reset_irq(int irqnumber)
813 {
814         int bit;
815         unsigned long flags;
816         int ret = 0;
817
818         if (invalid_vm86_irq(irqnumber)) return 0;
819         if (vm86_irqs[irqnumber].tsk != current) return 0;
820         spin_lock_irqsave(&irqbits_lock, flags);
821         bit = irqbits & (1 << irqnumber);
822         irqbits &= ~bit;
823         if (bit) {
824                 enable_irq(irqnumber);
825                 ret = 1;
826         }
827
828         spin_unlock_irqrestore(&irqbits_lock, flags);
829         return ret;
830 }
831
832
833 static int do_vm86_irq_handling(int subfunction, int irqnumber)
834 {
835         int ret;
836         switch (subfunction) {
837                 case VM86_GET_AND_RESET_IRQ: {
838                         return get_and_reset_irq(irqnumber);
839                 }
840                 case VM86_GET_IRQ_BITS: {
841                         return irqbits;
842                 }
843                 case VM86_REQUEST_IRQ: {
844                         int sig = irqnumber >> 8;
845                         int irq = irqnumber & 255;
846                         if (!capable(CAP_SYS_ADMIN)) return -EPERM;
847                         if (!((1 << sig) & ALLOWED_SIGS)) return -EPERM;
848                         if (invalid_vm86_irq(irq)) return -EPERM;
849                         if (vm86_irqs[irq].tsk) return -EPERM;
850                         ret = request_irq(irq, &irq_handler, 0, VM86_IRQNAME, NULL);
851                         if (ret) return ret;
852                         vm86_irqs[irq].sig = sig;
853                         vm86_irqs[irq].tsk = current;
854                         return irq;
855                 }
856                 case  VM86_FREE_IRQ: {
857                         if (invalid_vm86_irq(irqnumber)) return -EPERM;
858                         if (!vm86_irqs[irqnumber].tsk) return 0;
859                         if (vm86_irqs[irqnumber].tsk != current) return -EPERM;
860                         free_vm86_irq(irqnumber);
861                         return 0;
862                 }
863         }
864         return -EINVAL;
865 }
866