]> git.karo-electronics.de Git - karo-tx-linux.git/blob - tools/kvm/kvm.c
Merge branch 'linus'
[karo-tx-linux.git] / tools / kvm / kvm.c
1 #include "kvm/kvm.h"
2 #include "kvm/read-write.h"
3 #include "kvm/util.h"
4 #include "kvm/strbuf.h"
5 #include "kvm/mutex.h"
6 #include "kvm/kvm-cpu.h"
7 #include "kvm/kvm-ipc.h"
8
9 #include <linux/kvm.h>
10 #include <linux/err.h>
11
12 #include <sys/un.h>
13 #include <sys/stat.h>
14 #include <sys/types.h>
15 #include <sys/socket.h>
16 #include <sys/ioctl.h>
17 #include <sys/mman.h>
18 #include <stdbool.h>
19 #include <limits.h>
20 #include <signal.h>
21 #include <stdarg.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <unistd.h>
25 #include <stdio.h>
26 #include <fcntl.h>
27 #include <time.h>
28 #include <sys/eventfd.h>
29 #include <asm/unistd.h>
30 #include <dirent.h>
31
32 #define DEFINE_KVM_EXIT_REASON(reason) [reason] = #reason
33
34 const char *kvm_exit_reasons[] = {
35         DEFINE_KVM_EXIT_REASON(KVM_EXIT_UNKNOWN),
36         DEFINE_KVM_EXIT_REASON(KVM_EXIT_EXCEPTION),
37         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IO),
38         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HYPERCALL),
39         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DEBUG),
40         DEFINE_KVM_EXIT_REASON(KVM_EXIT_HLT),
41         DEFINE_KVM_EXIT_REASON(KVM_EXIT_MMIO),
42         DEFINE_KVM_EXIT_REASON(KVM_EXIT_IRQ_WINDOW_OPEN),
43         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SHUTDOWN),
44         DEFINE_KVM_EXIT_REASON(KVM_EXIT_FAIL_ENTRY),
45         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTR),
46         DEFINE_KVM_EXIT_REASON(KVM_EXIT_SET_TPR),
47         DEFINE_KVM_EXIT_REASON(KVM_EXIT_TPR_ACCESS),
48         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_SIEIC),
49         DEFINE_KVM_EXIT_REASON(KVM_EXIT_S390_RESET),
50         DEFINE_KVM_EXIT_REASON(KVM_EXIT_DCR),
51         DEFINE_KVM_EXIT_REASON(KVM_EXIT_NMI),
52         DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR),
53 #ifdef CONFIG_PPC64
54         DEFINE_KVM_EXIT_REASON(KVM_EXIT_PAPR_HCALL),
55 #endif
56 };
57
58 static int pause_event;
59 static DEFINE_MUTEX(pause_lock);
60 extern struct kvm_ext kvm_req_ext[];
61
62 static char kvm_dir[PATH_MAX];
63
64 static int set_dir(const char *fmt, va_list args)
65 {
66         char tmp[PATH_MAX];
67
68         vsnprintf(tmp, sizeof(tmp), fmt, args);
69
70         mkdir(tmp, 0777);
71
72         if (!realpath(tmp, kvm_dir))
73                 return -errno;
74
75         strcat(kvm_dir, "/");
76
77         return 0;
78 }
79
80 void kvm__set_dir(const char *fmt, ...)
81 {
82         va_list args;
83
84         va_start(args, fmt);
85         set_dir(fmt, args);
86         va_end(args);
87 }
88
89 const char *kvm__get_dir(void)
90 {
91         return kvm_dir;
92 }
93
94 bool kvm__supports_extension(struct kvm *kvm, unsigned int extension)
95 {
96         int ret;
97
98         ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, extension);
99         if (ret < 0)
100                 return false;
101
102         return ret;
103 }
104
105 static int kvm__check_extensions(struct kvm *kvm)
106 {
107         int i;
108
109         for (i = 0; ; i++) {
110                 if (!kvm_req_ext[i].name)
111                         break;
112                 if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) {
113                         pr_err("Unsuppored KVM extension detected: %s",
114                                 kvm_req_ext[i].name);
115                         return -i;
116                 }
117         }
118
119         return 0;
120 }
121
122 struct kvm *kvm__new(void)
123 {
124         struct kvm *kvm = calloc(1, sizeof(*kvm));
125         if (!kvm)
126                 return ERR_PTR(-ENOMEM);
127
128         kvm->sys_fd = -1;
129         kvm->vm_fd = -1;
130
131         return kvm;
132 }
133
134 int kvm__exit(struct kvm *kvm)
135 {
136         kvm__arch_delete_ram(kvm);
137         free(kvm);
138
139         return 0;
140 }
141 core_exit(kvm__exit);
142
143 /*
144  * Note: KVM_SET_USER_MEMORY_REGION assumes that we don't pass overlapping
145  * memory regions to it. Therefore, be careful if you use this function for
146  * registering memory regions for emulating hardware.
147  */
148 int kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size, void *userspace_addr)
149 {
150         struct kvm_userspace_memory_region mem;
151         int ret;
152
153         mem = (struct kvm_userspace_memory_region) {
154                 .slot                   = kvm->mem_slots++,
155                 .guest_phys_addr        = guest_phys,
156                 .memory_size            = size,
157                 .userspace_addr         = (unsigned long)userspace_addr,
158         };
159
160         ret = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &mem);
161         if (ret < 0)
162                 return -errno;
163
164         return 0;
165 }
166
167 int kvm__recommended_cpus(struct kvm *kvm)
168 {
169         int ret;
170
171         ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_NR_VCPUS);
172         if (ret <= 0)
173                 /*
174                  * api.txt states that if KVM_CAP_NR_VCPUS does not exist,
175                  * assume 4.
176                  */
177                 return 4;
178
179         return ret;
180 }
181
182 /*
183  * The following hack should be removed once 'x86: Raise the hard
184  * VCPU count limit' makes it's way into the mainline.
185  */
186 #ifndef KVM_CAP_MAX_VCPUS
187 #define KVM_CAP_MAX_VCPUS 66
188 #endif
189
190 int kvm__max_cpus(struct kvm *kvm)
191 {
192         int ret;
193
194         ret = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_MAX_VCPUS);
195         if (ret <= 0)
196                 ret = kvm__recommended_cpus(kvm);
197
198         return ret;
199 }
200
201 int kvm__init(struct kvm *kvm)
202 {
203         int ret;
204
205         if (!kvm__arch_cpu_supports_vm()) {
206                 pr_err("Your CPU does not support hardware virtualization");
207                 ret = -ENOSYS;
208                 goto err;
209         }
210
211         kvm->sys_fd = open(kvm->cfg.dev, O_RDWR);
212         if (kvm->sys_fd < 0) {
213                 if (errno == ENOENT)
214                         pr_err("'%s' not found. Please make sure your kernel has CONFIG_KVM "
215                                "enabled and that the KVM modules are loaded.", kvm->cfg.dev);
216                 else if (errno == ENODEV)
217                         pr_err("'%s' KVM driver not available.\n  # (If the KVM "
218                                "module is loaded then 'dmesg' may offer further clues "
219                                "about the failure.)", kvm->cfg.dev);
220                 else
221                         pr_err("Could not open %s: ", kvm->cfg.dev);
222
223                 ret = -errno;
224                 goto err_free;
225         }
226
227         ret = ioctl(kvm->sys_fd, KVM_GET_API_VERSION, 0);
228         if (ret != KVM_API_VERSION) {
229                 pr_err("KVM_API_VERSION ioctl");
230                 ret = -errno;
231                 goto err_sys_fd;
232         }
233
234         kvm->vm_fd = ioctl(kvm->sys_fd, KVM_CREATE_VM, 0);
235         if (kvm->vm_fd < 0) {
236                 ret = kvm->vm_fd;
237                 goto err_sys_fd;
238         }
239
240         if (kvm__check_extensions(kvm)) {
241                 pr_err("A required KVM extention is not supported by OS");
242                 ret = -ENOSYS;
243                 goto err_vm_fd;
244         }
245
246         kvm__arch_init(kvm, kvm->cfg.hugetlbfs_path, kvm->cfg.ram_size);
247
248         kvm__init_ram(kvm);
249
250         if (!kvm->cfg.firmware_filename) {
251                 if (!kvm__load_kernel(kvm, kvm->cfg.kernel_filename,
252                                 kvm->cfg.initrd_filename, kvm->cfg.real_cmdline, kvm->cfg.vidmode))
253                         die("unable to load kernel %s", kvm->cfg.kernel_filename);
254         }
255
256         if (kvm->cfg.firmware_filename) {
257                 if (!kvm__load_firmware(kvm, kvm->cfg.firmware_filename))
258                         die("unable to load firmware image %s: %s", kvm->cfg.firmware_filename, strerror(errno));
259         } else {
260                 ret = kvm__arch_setup_firmware(kvm);
261                 if (ret < 0)
262                         die("kvm__arch_setup_firmware() failed with error %d\n", ret);
263         }
264
265         return 0;
266
267 err_vm_fd:
268         close(kvm->vm_fd);
269 err_sys_fd:
270         close(kvm->sys_fd);
271 err_free:
272         free(kvm);
273 err:
274         return ret;
275 }
276 core_init(kvm__init);
277
278 /* RFC 1952 */
279 #define GZIP_ID1                0x1f
280 #define GZIP_ID2                0x8b
281 #define CPIO_MAGIC              "0707"
282 /* initrd may be gzipped, or a plain cpio */
283 static bool initrd_check(int fd)
284 {
285         unsigned char id[4];
286
287         if (read_in_full(fd, id, ARRAY_SIZE(id)) < 0)
288                 return false;
289
290         if (lseek(fd, 0, SEEK_SET) < 0)
291                 die_perror("lseek");
292
293         return (id[0] == GZIP_ID1 && id[1] == GZIP_ID2) ||
294                 !memcmp(id, CPIO_MAGIC, 4);
295 }
296
297 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
298                 const char *initrd_filename, const char *kernel_cmdline, u16 vidmode)
299 {
300         bool ret;
301         int fd_kernel = -1, fd_initrd = -1;
302
303         fd_kernel = open(kernel_filename, O_RDONLY);
304         if (fd_kernel < 0)
305                 die("Unable to open kernel %s", kernel_filename);
306
307         if (initrd_filename) {
308                 fd_initrd = open(initrd_filename, O_RDONLY);
309                 if (fd_initrd < 0)
310                         die("Unable to open initrd %s", initrd_filename);
311
312                 if (!initrd_check(fd_initrd))
313                         die("%s is not an initrd", initrd_filename);
314         }
315
316         ret = load_bzimage(kvm, fd_kernel, fd_initrd, kernel_cmdline, vidmode);
317
318         if (ret)
319                 goto found_kernel;
320
321         pr_warning("%s is not a bzImage. Trying to load it as a flat binary...", kernel_filename);
322
323         ret = load_flat_binary(kvm, fd_kernel, fd_initrd, kernel_cmdline);
324
325         if (ret)
326                 goto found_kernel;
327
328         if (initrd_filename)
329                 close(fd_initrd);
330         close(fd_kernel);
331
332         die("%s is not a valid bzImage or flat binary", kernel_filename);
333
334 found_kernel:
335         if (initrd_filename)
336                 close(fd_initrd);
337         close(fd_kernel);
338
339         return ret;
340 }
341
342 #define TIMER_INTERVAL_NS 1000000       /* 1 msec */
343
344 /*
345  * This function sets up a timer that's used to inject interrupts from the
346  * userspace hypervisor into the guest at periodical intervals. Please note
347  * that clock interrupt, for example, is not handled here.
348  */
349 int kvm_timer__init(struct kvm *kvm)
350 {
351         struct itimerspec its;
352         struct sigevent sev;
353         int r;
354
355         memset(&sev, 0, sizeof(struct sigevent));
356         sev.sigev_value.sival_int       = 0;
357         sev.sigev_notify                = SIGEV_THREAD_ID;
358         sev.sigev_signo                 = SIGALRM;
359         sev.sigev_value.sival_ptr       = kvm;
360         sev._sigev_un._tid              = syscall(__NR_gettid);
361
362         r = timer_create(CLOCK_REALTIME, &sev, &kvm->timerid);
363         if (r < 0)
364                 return r;
365
366         its.it_value.tv_sec             = TIMER_INTERVAL_NS / 1000000000;
367         its.it_value.tv_nsec            = TIMER_INTERVAL_NS % 1000000000;
368         its.it_interval.tv_sec          = its.it_value.tv_sec;
369         its.it_interval.tv_nsec         = its.it_value.tv_nsec;
370
371         r = timer_settime(kvm->timerid, 0, &its, NULL);
372         if (r < 0) {
373                 timer_delete(kvm->timerid);
374                 return r;
375         }
376
377         return 0;
378 }
379 firmware_init(kvm_timer__init);
380
381 int kvm_timer__exit(struct kvm *kvm)
382 {
383         if (kvm->timerid)
384                 if (timer_delete(kvm->timerid) < 0)
385                         die("timer_delete()");
386
387         kvm->timerid = 0;
388
389         return 0;
390 }
391 firmware_exit(kvm_timer__exit);
392
393 void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size)
394 {
395         unsigned char *p;
396         unsigned long n;
397
398         size &= ~7; /* mod 8 */
399         if (!size)
400                 return;
401
402         p = guest_flat_to_host(kvm, addr);
403
404         for (n = 0; n < size; n += 8) {
405                 if (!host_ptr_in_ram(kvm, p + n))
406                         break;
407
408                 printf("  0x%08lx: %02x %02x %02x %02x  %02x %02x %02x %02x\n",
409                         addr + n, p[n + 0], p[n + 1], p[n + 2], p[n + 3],
410                                   p[n + 4], p[n + 5], p[n + 6], p[n + 7]);
411         }
412 }
413
414 void kvm__pause(struct kvm *kvm)
415 {
416         int i, paused_vcpus = 0;
417
418         /* Check if the guest is running */
419         if (!kvm->cpus[0] || kvm->cpus[0]->thread == 0)
420                 return;
421
422         mutex_lock(&pause_lock);
423
424         pause_event = eventfd(0, 0);
425         if (pause_event < 0)
426                 die("Failed creating pause notification event");
427         for (i = 0; i < kvm->nrcpus; i++)
428                 pthread_kill(kvm->cpus[i]->thread, SIGKVMPAUSE);
429
430         while (paused_vcpus < kvm->nrcpus) {
431                 u64 cur_read;
432
433                 if (read(pause_event, &cur_read, sizeof(cur_read)) < 0)
434                         die("Failed reading pause event");
435                 paused_vcpus += cur_read;
436         }
437         close(pause_event);
438 }
439
440 void kvm__continue(struct kvm *kvm)
441 {
442         /* Check if the guest is running */
443         if (!kvm->cpus[0] || kvm->cpus[0]->thread == 0)
444                 return;
445
446         mutex_unlock(&pause_lock);
447 }
448
449 void kvm__notify_paused(void)
450 {
451         u64 p = 1;
452
453         if (write(pause_event, &p, sizeof(p)) < 0)
454                 die("Failed notifying of paused VCPU.");
455
456         mutex_lock(&pause_lock);
457         mutex_unlock(&pause_lock);
458 }