]> git.karo-electronics.de Git - karo-tx-linux.git/blob - drivers/misc/vmw_balloon.c
VMware balloon: Show capabilities of balloon and resulting capabilities in the debug...
[karo-tx-linux.git] / drivers / misc / vmw_balloon.c
1 /*
2  * VMware Balloon driver.
3  *
4  * Copyright (C) 2000-2013, VMware, Inc. All Rights Reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation; version 2 of the License and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13  * NON INFRINGEMENT.  See the GNU General Public License for more
14  * details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19  *
20  * Maintained by:       Xavier Deguillard <xdeguillard@vmware.com>
21  *                      Philip Moltmann <moltmann@vmware.com>
22  */
23
24 /*
25  * This is VMware physical memory management driver for Linux. The driver
26  * acts like a "balloon" that can be inflated to reclaim physical pages by
27  * reserving them in the guest and invalidating them in the monitor,
28  * freeing up the underlying machine pages so they can be allocated to
29  * other guests.  The balloon can also be deflated to allow the guest to
30  * use more physical memory. Higher level policies can control the sizes
31  * of balloons in VMs in order to manage physical memory resources.
32  */
33
34 //#define DEBUG
35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
36
37 #include <linux/types.h>
38 #include <linux/kernel.h>
39 #include <linux/mm.h>
40 #include <linux/vmalloc.h>
41 #include <linux/sched.h>
42 #include <linux/module.h>
43 #include <linux/workqueue.h>
44 #include <linux/debugfs.h>
45 #include <linux/seq_file.h>
46 #include <asm/hypervisor.h>
47
48 MODULE_AUTHOR("VMware, Inc.");
49 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
50 MODULE_VERSION("1.3.3.0-k");
51 MODULE_ALIAS("dmi:*:svnVMware*:*");
52 MODULE_ALIAS("vmware_vmmemctl");
53 MODULE_LICENSE("GPL");
54
55 /*
56  * Various constants controlling rate of inflaint/deflating balloon,
57  * measured in pages.
58  */
59
60 /*
61  * Rate of allocating memory when there is no memory pressure
62  * (driver performs non-sleeping allocations).
63  */
64 #define VMW_BALLOON_NOSLEEP_ALLOC_MAX   16384U
65
66 /*
67  * Rates of memory allocaton when guest experiences memory pressure
68  * (driver performs sleeping allocations).
69  */
70 #define VMW_BALLOON_RATE_ALLOC_MIN      512U
71 #define VMW_BALLOON_RATE_ALLOC_MAX      2048U
72 #define VMW_BALLOON_RATE_ALLOC_INC      16U
73
74 /*
75  * Rates for releasing pages while deflating balloon.
76  */
77 #define VMW_BALLOON_RATE_FREE_MIN       512U
78 #define VMW_BALLOON_RATE_FREE_MAX       16384U
79 #define VMW_BALLOON_RATE_FREE_INC       16U
80
81 /*
82  * When guest is under memory pressure, use a reduced page allocation
83  * rate for next several cycles.
84  */
85 #define VMW_BALLOON_SLOW_CYCLES         4
86
87 /*
88  * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
89  * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
90  * __GFP_NOWARN, to suppress page allocation failure warnings.
91  */
92 #define VMW_PAGE_ALLOC_NOSLEEP          (__GFP_HIGHMEM|__GFP_NOWARN)
93
94 /*
95  * Use GFP_HIGHUSER when executing in a separate kernel thread
96  * context and allocation can sleep.  This is less stressful to
97  * the guest memory system, since it allows the thread to block
98  * while memory is reclaimed, and won't take pages from emergency
99  * low-memory pools.
100  */
101 #define VMW_PAGE_ALLOC_CANSLEEP         (GFP_HIGHUSER)
102
103 /* Maximum number of page allocations without yielding processor */
104 #define VMW_BALLOON_YIELD_THRESHOLD     1024
105
106 /* Maximum number of refused pages we accumulate during inflation cycle */
107 #define VMW_BALLOON_MAX_REFUSED         16
108
109 /*
110  * Hypervisor communication port definitions.
111  */
112 #define VMW_BALLOON_HV_PORT             0x5670
113 #define VMW_BALLOON_HV_MAGIC            0x456c6d6f
114 #define VMW_BALLOON_GUEST_ID            1       /* Linux */
115
116 enum vmwballoon_capabilities {
117         /*
118          * Bit 0 is reserved and not associated to any capability.
119          */
120         VMW_BALLOON_BASIC_CMDS          = (1 << 1),
121         VMW_BALLOON_BATCHED_CMDS        = (1 << 2)
122 };
123
124 #define VMW_BALLOON_CAPABILITIES        (VMW_BALLOON_BASIC_CMDS \
125                                         | VMW_BALLOON_BATCHED_CMDS)
126
127 /*
128  * Backdoor commands availability:
129  *
130  * START, GET_TARGET and GUEST_ID are always available,
131  *
132  * VMW_BALLOON_BASIC_CMDS:
133  *      LOCK and UNLOCK commands,
134  * VMW_BALLOON_BATCHED_CMDS:
135  *      BATCHED_LOCK and BATCHED_UNLOCK commands.
136  */
137 #define VMW_BALLOON_CMD_START           0
138 #define VMW_BALLOON_CMD_GET_TARGET      1
139 #define VMW_BALLOON_CMD_LOCK            2
140 #define VMW_BALLOON_CMD_UNLOCK          3
141 #define VMW_BALLOON_CMD_GUEST_ID        4
142 #define VMW_BALLOON_CMD_BATCHED_LOCK    6
143 #define VMW_BALLOON_CMD_BATCHED_UNLOCK  7
144
145 /* error codes */
146 #define VMW_BALLOON_SUCCESS                     0
147 #define VMW_BALLOON_FAILURE                     -1
148 #define VMW_BALLOON_ERROR_CMD_INVALID           1
149 #define VMW_BALLOON_ERROR_PPN_INVALID           2
150 #define VMW_BALLOON_ERROR_PPN_LOCKED            3
151 #define VMW_BALLOON_ERROR_PPN_UNLOCKED          4
152 #define VMW_BALLOON_ERROR_PPN_PINNED            5
153 #define VMW_BALLOON_ERROR_PPN_NOTNEEDED         6
154 #define VMW_BALLOON_ERROR_RESET                 7
155 #define VMW_BALLOON_ERROR_BUSY                  8
156
157 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES   (0x03000000)
158
159 /* Batch page description */
160
161 /*
162  * Layout of a page in the batch page:
163  *
164  * +-------------+----------+--------+
165  * |             |          |        |
166  * | Page number | Reserved | Status |
167  * |             |          |        |
168  * +-------------+----------+--------+
169  * 64  PAGE_SHIFT          6         0
170  *
171  * For now only 4K pages are supported, but we can easily support large pages
172  * by using bits in the reserved field.
173  *
174  * The reserved field should be set to 0.
175  */
176 #define VMW_BALLOON_BATCH_MAX_PAGES     (PAGE_SIZE / sizeof(u64))
177 #define VMW_BALLOON_BATCH_STATUS_MASK   ((1UL << 5) - 1)
178 #define VMW_BALLOON_BATCH_PAGE_MASK     (~((1UL << PAGE_SHIFT) - 1))
179
180 struct vmballoon_batch_page {
181         u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
182 };
183
184 static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
185 {
186         return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
187 }
188
189 static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
190                                 int idx)
191 {
192         return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
193 }
194
195 static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
196                                 u64 pa)
197 {
198         batch->pages[idx] = pa;
199 }
200
201
202 #define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result)             \
203 ({                                                              \
204         unsigned long __status, __dummy1, __dummy2, __dummy3;   \
205         __asm__ __volatile__ ("inl %%dx" :                      \
206                 "=a"(__status),                                 \
207                 "=c"(__dummy1),                                 \
208                 "=d"(__dummy2),                                 \
209                 "=b"(result),                                   \
210                 "=S" (__dummy3) :                               \
211                 "0"(VMW_BALLOON_HV_MAGIC),                      \
212                 "1"(VMW_BALLOON_CMD_##cmd),                     \
213                 "2"(VMW_BALLOON_HV_PORT),                       \
214                 "3"(arg1),                                      \
215                 "4" (arg2) :                                    \
216                 "memory");                                      \
217         if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START)     \
218                 result = __dummy1;                              \
219         result &= -1UL;                                         \
220         __status & -1UL;                                        \
221 })
222
223 #ifdef CONFIG_DEBUG_FS
224 struct vmballoon_stats {
225         unsigned int timer;
226
227         /* allocation statistics */
228         unsigned int alloc;
229         unsigned int alloc_fail;
230         unsigned int sleep_alloc;
231         unsigned int sleep_alloc_fail;
232         unsigned int refused_alloc;
233         unsigned int refused_free;
234         unsigned int free;
235
236         /* monitor operations */
237         unsigned int lock;
238         unsigned int lock_fail;
239         unsigned int unlock;
240         unsigned int unlock_fail;
241         unsigned int target;
242         unsigned int target_fail;
243         unsigned int start;
244         unsigned int start_fail;
245         unsigned int guest_type;
246         unsigned int guest_type_fail;
247 };
248
249 #define STATS_INC(stat) (stat)++
250 #else
251 #define STATS_INC(stat)
252 #endif
253
254 struct vmballoon;
255
256 struct vmballoon_ops {
257         void (*add_page)(struct vmballoon *b, int idx, struct page *p);
258         int (*lock)(struct vmballoon *b, unsigned int num_pages,
259                                                 unsigned int *target);
260         int (*unlock)(struct vmballoon *b, unsigned int num_pages,
261                                                 unsigned int *target);
262 };
263
264 struct vmballoon {
265
266         /* list of reserved physical pages */
267         struct list_head pages;
268
269         /* transient list of non-balloonable pages */
270         struct list_head refused_pages;
271         unsigned int n_refused_pages;
272
273         /* balloon size in pages */
274         unsigned int size;
275         unsigned int target;
276
277         /* reset flag */
278         bool reset_required;
279
280         /* adjustment rates (pages per second) */
281         unsigned int rate_alloc;
282         unsigned int rate_free;
283
284         /* slowdown page allocations for next few cycles */
285         unsigned int slow_allocation_cycles;
286
287         unsigned long capabilities;
288
289         struct vmballoon_batch_page *batch_page;
290         unsigned int batch_max_pages;
291         struct page *page;
292
293         const struct vmballoon_ops *ops;
294
295 #ifdef CONFIG_DEBUG_FS
296         /* statistics */
297         struct vmballoon_stats stats;
298
299         /* debugfs file exporting statistics */
300         struct dentry *dbg_entry;
301 #endif
302
303         struct sysinfo sysinfo;
304
305         struct delayed_work dwork;
306 };
307
308 static struct vmballoon balloon;
309
310 /*
311  * Send "start" command to the host, communicating supported version
312  * of the protocol.
313  */
314 static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
315 {
316         unsigned long status, capabilities, dummy = 0;
317
318         STATS_INC(b->stats.start);
319
320         status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
321
322         switch (status) {
323         case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
324                 b->capabilities = capabilities;
325                 return true;
326         case VMW_BALLOON_SUCCESS:
327                 b->capabilities = VMW_BALLOON_BASIC_CMDS;
328                 return true;
329         }
330
331         pr_debug("%s - failed, hv returns %ld\n", __func__, status);
332         STATS_INC(b->stats.start_fail);
333         return false;
334 }
335
336 static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
337 {
338         switch (status) {
339         case VMW_BALLOON_SUCCESS:
340                 return true;
341
342         case VMW_BALLOON_ERROR_RESET:
343                 b->reset_required = true;
344                 /* fall through */
345
346         default:
347                 return false;
348         }
349 }
350
351 /*
352  * Communicate guest type to the host so that it can adjust ballooning
353  * algorithm to the one most appropriate for the guest. This command
354  * is normally issued after sending "start" command and is part of
355  * standard reset sequence.
356  */
357 static bool vmballoon_send_guest_id(struct vmballoon *b)
358 {
359         unsigned long status, dummy = 0;
360
361         status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
362                                 dummy);
363
364         STATS_INC(b->stats.guest_type);
365
366         if (vmballoon_check_status(b, status))
367                 return true;
368
369         pr_debug("%s - failed, hv returns %ld\n", __func__, status);
370         STATS_INC(b->stats.guest_type_fail);
371         return false;
372 }
373
374 /*
375  * Retrieve desired balloon size from the host.
376  */
377 static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
378 {
379         unsigned long status;
380         unsigned long target;
381         unsigned long limit;
382         unsigned long dummy = 0;
383         u32 limit32;
384
385         /*
386          * si_meminfo() is cheap. Moreover, we want to provide dynamic
387          * max balloon size later. So let us call si_meminfo() every
388          * iteration.
389          */
390         si_meminfo(&b->sysinfo);
391         limit = b->sysinfo.totalram;
392
393         /* Ensure limit fits in 32-bits */
394         limit32 = (u32)limit;
395         if (limit != limit32)
396                 return false;
397
398         /* update stats */
399         STATS_INC(b->stats.target);
400
401         status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
402         if (vmballoon_check_status(b, status)) {
403                 *new_target = target;
404                 return true;
405         }
406
407         pr_debug("%s - failed, hv returns %ld\n", __func__, status);
408         STATS_INC(b->stats.target_fail);
409         return false;
410 }
411
412 /*
413  * Notify the host about allocated page so that host can use it without
414  * fear that guest will need it. Host may reject some pages, we need to
415  * check the return value and maybe submit a different page.
416  */
417 static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
418                                 unsigned int *hv_status, unsigned int *target)
419 {
420         unsigned long status, dummy = 0;
421         u32 pfn32;
422
423         pfn32 = (u32)pfn;
424         if (pfn32 != pfn)
425                 return -1;
426
427         STATS_INC(b->stats.lock);
428
429         *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
430         if (vmballoon_check_status(b, status))
431                 return 0;
432
433         pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
434         STATS_INC(b->stats.lock_fail);
435         return 1;
436 }
437
438 static int vmballoon_send_batched_lock(struct vmballoon *b,
439                                 unsigned int num_pages, unsigned int *target)
440 {
441         unsigned long status;
442         unsigned long pfn = page_to_pfn(b->page);
443
444         STATS_INC(b->stats.lock);
445
446         status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages, *target);
447         if (vmballoon_check_status(b, status))
448                 return 0;
449
450         pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
451         STATS_INC(b->stats.lock_fail);
452         return 1;
453 }
454
455 /*
456  * Notify the host that guest intends to release given page back into
457  * the pool of available (to the guest) pages.
458  */
459 static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
460                                                         unsigned int *target)
461 {
462         unsigned long status, dummy = 0;
463         u32 pfn32;
464
465         pfn32 = (u32)pfn;
466         if (pfn32 != pfn)
467                 return false;
468
469         STATS_INC(b->stats.unlock);
470
471         status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
472         if (vmballoon_check_status(b, status))
473                 return true;
474
475         pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
476         STATS_INC(b->stats.unlock_fail);
477         return false;
478 }
479
480 static bool vmballoon_send_batched_unlock(struct vmballoon *b,
481                                 unsigned int num_pages, unsigned int *target)
482 {
483         unsigned long status;
484         unsigned long pfn = page_to_pfn(b->page);
485
486         STATS_INC(b->stats.unlock);
487
488         status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages, *target);
489         if (vmballoon_check_status(b, status))
490                 return true;
491
492         pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
493         STATS_INC(b->stats.unlock_fail);
494         return false;
495 }
496
497 /*
498  * Quickly release all pages allocated for the balloon. This function is
499  * called when host decides to "reset" balloon for one reason or another.
500  * Unlike normal "deflate" we do not (shall not) notify host of the pages
501  * being released.
502  */
503 static void vmballoon_pop(struct vmballoon *b)
504 {
505         struct page *page, *next;
506         unsigned int count = 0;
507
508         list_for_each_entry_safe(page, next, &b->pages, lru) {
509                 list_del(&page->lru);
510                 __free_page(page);
511                 STATS_INC(b->stats.free);
512                 b->size--;
513
514                 if (++count >= b->rate_free) {
515                         count = 0;
516                         cond_resched();
517                 }
518         }
519
520         if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
521                 if (b->batch_page)
522                         vunmap(b->batch_page);
523
524                 if (b->page)
525                         __free_page(b->page);
526         }
527 }
528
529 /*
530  * Notify the host of a ballooned page. If host rejects the page put it on the
531  * refuse list, those refused page are then released at the end of the
532  * inflation cycle.
533  */
534 static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
535                                                         unsigned int *target)
536 {
537         int locked, hv_status;
538         struct page *page = b->page;
539
540         locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
541                                                                 target);
542         if (locked > 0) {
543                 STATS_INC(b->stats.refused_alloc);
544
545                 if (hv_status == VMW_BALLOON_ERROR_RESET ||
546                                 hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
547                         __free_page(page);
548                         return -EIO;
549                 }
550
551                 /*
552                  * Place page on the list of non-balloonable pages
553                  * and retry allocation, unless we already accumulated
554                  * too many of them, in which case take a breather.
555                  */
556                 if (b->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
557                         b->n_refused_pages++;
558                         list_add(&page->lru, &b->refused_pages);
559                 } else {
560                         __free_page(page);
561                 }
562                 return -EIO;
563         }
564
565         /* track allocated page */
566         list_add(&page->lru, &b->pages);
567
568         /* update balloon size */
569         b->size++;
570
571         return 0;
572 }
573
574 static int vmballoon_lock_batched_page(struct vmballoon *b,
575                                 unsigned int num_pages, unsigned int *target)
576 {
577         int locked, i;
578
579         locked = vmballoon_send_batched_lock(b, num_pages, target);
580         if (locked > 0) {
581                 for (i = 0; i < num_pages; i++) {
582                         u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
583                         struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
584
585                         __free_page(p);
586                 }
587
588                 return -EIO;
589         }
590
591         for (i = 0; i < num_pages; i++) {
592                 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
593                 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
594
595                 locked = vmballoon_batch_get_status(b->batch_page, i);
596
597                 switch (locked) {
598                 case VMW_BALLOON_SUCCESS:
599                         list_add(&p->lru, &b->pages);
600                         b->size++;
601                         break;
602                 case VMW_BALLOON_ERROR_PPN_PINNED:
603                 case VMW_BALLOON_ERROR_PPN_INVALID:
604                         if (b->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
605                                 list_add(&p->lru, &b->refused_pages);
606                                 b->n_refused_pages++;
607                                 break;
608                         }
609                         /* Fallthrough */
610                 case VMW_BALLOON_ERROR_RESET:
611                 case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
612                         __free_page(p);
613                         break;
614                 default:
615                         /* This should never happen */
616                         WARN_ON_ONCE(true);
617                 }
618         }
619
620         return 0;
621 }
622
623 /*
624  * Release the page allocated for the balloon. Note that we first notify
625  * the host so it can make sure the page will be available for the guest
626  * to use, if needed.
627  */
628 static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
629                                                         unsigned int *target)
630 {
631         struct page *page = b->page;
632
633         if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
634                 list_add(&page->lru, &b->pages);
635                 return -EIO;
636         }
637
638         /* deallocate page */
639         __free_page(page);
640         STATS_INC(b->stats.free);
641
642         /* update balloon size */
643         b->size--;
644
645         return 0;
646 }
647
648 static int vmballoon_unlock_batched_page(struct vmballoon *b,
649                                 unsigned int num_pages, unsigned int *target)
650 {
651         int locked, i, ret = 0;
652         bool hv_success;
653
654         hv_success = vmballoon_send_batched_unlock(b, num_pages, target);
655         if (!hv_success)
656                 ret = -EIO;
657
658         for (i = 0; i < num_pages; i++) {
659                 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
660                 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
661
662                 locked = vmballoon_batch_get_status(b->batch_page, i);
663                 if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
664                         /*
665                          * That page wasn't successfully unlocked by the
666                          * hypervisor, re-add it to the list of pages owned by
667                          * the balloon driver.
668                          */
669                         list_add(&p->lru, &b->pages);
670                 } else {
671                         /* deallocate page */
672                         __free_page(p);
673                         STATS_INC(b->stats.free);
674
675                         /* update balloon size */
676                         b->size--;
677                 }
678         }
679
680         return ret;
681 }
682
683 /*
684  * Release pages that were allocated while attempting to inflate the
685  * balloon but were refused by the host for one reason or another.
686  */
687 static void vmballoon_release_refused_pages(struct vmballoon *b)
688 {
689         struct page *page, *next;
690
691         list_for_each_entry_safe(page, next, &b->refused_pages, lru) {
692                 list_del(&page->lru);
693                 __free_page(page);
694                 STATS_INC(b->stats.refused_free);
695         }
696
697         b->n_refused_pages = 0;
698 }
699
700 static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
701 {
702         b->page = p;
703 }
704
705 static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
706                                 struct page *p)
707 {
708         vmballoon_batch_set_pa(b->batch_page, idx,
709                         (u64)page_to_pfn(p) << PAGE_SHIFT);
710 }
711
712 /*
713  * Inflate the balloon towards its target size. Note that we try to limit
714  * the rate of allocation to make sure we are not choking the rest of the
715  * system.
716  */
717 static void vmballoon_inflate(struct vmballoon *b)
718 {
719         unsigned int rate;
720         unsigned int allocations = 0;
721         unsigned int num_pages = 0;
722         int error = 0;
723         gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
724
725         pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
726
727         /*
728          * First try NOSLEEP page allocations to inflate balloon.
729          *
730          * If we do not throttle nosleep allocations, we can drain all
731          * free pages in the guest quickly (if the balloon target is high).
732          * As a side-effect, draining free pages helps to inform (force)
733          * the guest to start swapping if balloon target is not met yet,
734          * which is a desired behavior. However, balloon driver can consume
735          * all available CPU cycles if too many pages are allocated in a
736          * second. Therefore, we throttle nosleep allocations even when
737          * the guest is not under memory pressure. OTOH, if we have already
738          * predicted that the guest is under memory pressure, then we
739          * slowdown page allocations considerably.
740          */
741
742         /*
743          * Start with no sleep allocation rate which may be higher
744          * than sleeping allocation rate.
745          */
746         rate = b->slow_allocation_cycles ?
747                         b->rate_alloc : VMW_BALLOON_NOSLEEP_ALLOC_MAX;
748
749         pr_debug("%s - goal: %d, no-sleep rate: %d, sleep rate: %d\n",
750                  __func__, b->target - b->size, rate, b->rate_alloc);
751
752         while (b->size < b->target && num_pages < b->target - b->size) {
753                 struct page *page;
754
755                 if (flags == VMW_PAGE_ALLOC_NOSLEEP)
756                         STATS_INC(b->stats.alloc);
757                 else
758                         STATS_INC(b->stats.sleep_alloc);
759
760                 page = alloc_page(flags);
761                 if (!page) {
762                         if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
763                                 /*
764                                  * CANSLEEP page allocation failed, so guest
765                                  * is under severe memory pressure. Quickly
766                                  * decrease allocation rate.
767                                  */
768                                 b->rate_alloc = max(b->rate_alloc / 2,
769                                                     VMW_BALLOON_RATE_ALLOC_MIN);
770                                 STATS_INC(b->stats.sleep_alloc_fail);
771                                 break;
772                         }
773                         STATS_INC(b->stats.alloc_fail);
774
775                         /*
776                          * NOSLEEP page allocation failed, so the guest is
777                          * under memory pressure. Let us slow down page
778                          * allocations for next few cycles so that the guest
779                          * gets out of memory pressure. Also, if we already
780                          * allocated b->rate_alloc pages, let's pause,
781                          * otherwise switch to sleeping allocations.
782                          */
783                         b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
784
785                         if (allocations >= b->rate_alloc)
786                                 break;
787
788                         flags = VMW_PAGE_ALLOC_CANSLEEP;
789                         /* Lower rate for sleeping allocations. */
790                         rate = b->rate_alloc;
791                         continue;
792                 }
793
794                 b->ops->add_page(b, num_pages++, page);
795                 if (num_pages == b->batch_max_pages) {
796                         error = b->ops->lock(b, num_pages, &b->target);
797                         num_pages = 0;
798                         if (error)
799                                 break;
800                 }
801
802                 if (++allocations > VMW_BALLOON_YIELD_THRESHOLD) {
803                         cond_resched();
804                         allocations = 0;
805                 }
806
807                 if (allocations >= rate) {
808                         /* We allocated enough pages, let's take a break. */
809                         break;
810                 }
811         }
812
813         if (num_pages > 0)
814                 b->ops->lock(b, num_pages, &b->target);
815
816         /*
817          * We reached our goal without failures so try increasing
818          * allocation rate.
819          */
820         if (error == 0 && allocations >= b->rate_alloc) {
821                 unsigned int mult = allocations / b->rate_alloc;
822
823                 b->rate_alloc =
824                         min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
825                             VMW_BALLOON_RATE_ALLOC_MAX);
826         }
827
828         vmballoon_release_refused_pages(b);
829 }
830
831 /*
832  * Decrease the size of the balloon allowing guest to use more memory.
833  */
834 static void vmballoon_deflate(struct vmballoon *b)
835 {
836         struct page *page, *next;
837         unsigned int i = 0;
838         unsigned int num_pages = 0;
839         int error;
840
841         pr_debug("%s - size: %d, target %d, rate: %d\n", __func__, b->size,
842                                                 b->target, b->rate_free);
843
844         /* free pages to reach target */
845         list_for_each_entry_safe(page, next, &b->pages, lru) {
846                 list_del(&page->lru);
847                 b->ops->add_page(b, num_pages++, page);
848
849                 if (num_pages == b->batch_max_pages) {
850                         error = b->ops->unlock(b, num_pages, &b->target);
851                         num_pages = 0;
852                         if (error) {
853                                 /* quickly decrease rate in case of error */
854                                 b->rate_free = max(b->rate_free / 2,
855                                                 VMW_BALLOON_RATE_FREE_MIN);
856                                 return;
857                         }
858                 }
859
860                 if (++i >= b->size - b->target)
861                         break;
862         }
863
864         if (num_pages > 0)
865                 b->ops->unlock(b, num_pages, &b->target);
866
867         /* slowly increase rate if there were no errors */
868         if (error == 0)
869                 b->rate_free = min(b->rate_free + VMW_BALLOON_RATE_FREE_INC,
870                                    VMW_BALLOON_RATE_FREE_MAX);
871 }
872
873 static const struct vmballoon_ops vmballoon_basic_ops = {
874         .add_page = vmballoon_add_page,
875         .lock = vmballoon_lock_page,
876         .unlock = vmballoon_unlock_page
877 };
878
879 static const struct vmballoon_ops vmballoon_batched_ops = {
880         .add_page = vmballoon_add_batched_page,
881         .lock = vmballoon_lock_batched_page,
882         .unlock = vmballoon_unlock_batched_page
883 };
884
885 static bool vmballoon_init_batching(struct vmballoon *b)
886 {
887         b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
888         if (!b->page)
889                 return false;
890
891         b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
892         if (!b->batch_page) {
893                 __free_page(b->page);
894                 return false;
895         }
896
897         return true;
898 }
899
900 /*
901  * Perform standard reset sequence by popping the balloon (in case it
902  * is not  empty) and then restarting protocol. This operation normally
903  * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
904  */
905 static void vmballoon_reset(struct vmballoon *b)
906 {
907         /* free all pages, skipping monitor unlock */
908         vmballoon_pop(b);
909
910         if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
911                 return;
912
913         if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
914                 b->ops = &vmballoon_batched_ops;
915                 b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
916                 if (!vmballoon_init_batching(b)) {
917                         /*
918                          * We failed to initialize batching, inform the monitor
919                          * about it by sending a null capability.
920                          *
921                          * The guest will retry in one second.
922                          */
923                         vmballoon_send_start(b, 0);
924                         return;
925                 }
926         } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
927                 b->ops = &vmballoon_basic_ops;
928                 b->batch_max_pages = 1;
929         }
930
931         b->reset_required = false;
932         if (!vmballoon_send_guest_id(b))
933                 pr_err("failed to send guest ID to the host\n");
934 }
935
936 /*
937  * Balloon work function: reset protocol, if needed, get the new size and
938  * adjust balloon as needed. Repeat in 1 sec.
939  */
940 static void vmballoon_work(struct work_struct *work)
941 {
942         struct delayed_work *dwork = to_delayed_work(work);
943         struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
944         unsigned int target;
945
946         STATS_INC(b->stats.timer);
947
948         if (b->reset_required)
949                 vmballoon_reset(b);
950
951         if (b->slow_allocation_cycles > 0)
952                 b->slow_allocation_cycles--;
953
954         if (vmballoon_send_get_target(b, &target)) {
955                 /* update target, adjust size */
956                 b->target = target;
957
958                 if (b->size < target)
959                         vmballoon_inflate(b);
960                 else if (b->size > target)
961                         vmballoon_deflate(b);
962         }
963
964         /*
965          * We are using a freezable workqueue so that balloon operations are
966          * stopped while the system transitions to/from sleep/hibernation.
967          */
968         queue_delayed_work(system_freezable_wq,
969                            dwork, round_jiffies_relative(HZ));
970 }
971
972 /*
973  * DEBUGFS Interface
974  */
975 #ifdef CONFIG_DEBUG_FS
976
977 static int vmballoon_debug_show(struct seq_file *f, void *offset)
978 {
979         struct vmballoon *b = f->private;
980         struct vmballoon_stats *stats = &b->stats;
981
982         /* format capabilities info */
983         seq_printf(f,
984                    "balloon capabilities:   %#4x\n"
985                    "used capabilities:      %#4lx\n",
986                    VMW_BALLOON_CAPABILITIES, b->capabilities);
987
988         /* format size info */
989         seq_printf(f,
990                    "target:             %8d pages\n"
991                    "current:            %8d pages\n",
992                    b->target, b->size);
993
994         /* format rate info */
995         seq_printf(f,
996                    "rateNoSleepAlloc:   %8d pages/sec\n"
997                    "rateSleepAlloc:     %8d pages/sec\n"
998                    "rateFree:           %8d pages/sec\n",
999                    VMW_BALLOON_NOSLEEP_ALLOC_MAX,
1000                    b->rate_alloc, b->rate_free);
1001
1002         seq_printf(f,
1003                    "\n"
1004                    "timer:              %8u\n"
1005                    "start:              %8u (%4u failed)\n"
1006                    "guestType:          %8u (%4u failed)\n"
1007                    "lock:               %8u (%4u failed)\n"
1008                    "unlock:             %8u (%4u failed)\n"
1009                    "target:             %8u (%4u failed)\n"
1010                    "primNoSleepAlloc:   %8u (%4u failed)\n"
1011                    "primCanSleepAlloc:  %8u (%4u failed)\n"
1012                    "primFree:           %8u\n"
1013                    "errAlloc:           %8u\n"
1014                    "errFree:            %8u\n",
1015                    stats->timer,
1016                    stats->start, stats->start_fail,
1017                    stats->guest_type, stats->guest_type_fail,
1018                    stats->lock,  stats->lock_fail,
1019                    stats->unlock, stats->unlock_fail,
1020                    stats->target, stats->target_fail,
1021                    stats->alloc, stats->alloc_fail,
1022                    stats->sleep_alloc, stats->sleep_alloc_fail,
1023                    stats->free,
1024                    stats->refused_alloc, stats->refused_free);
1025
1026         return 0;
1027 }
1028
1029 static int vmballoon_debug_open(struct inode *inode, struct file *file)
1030 {
1031         return single_open(file, vmballoon_debug_show, inode->i_private);
1032 }
1033
1034 static const struct file_operations vmballoon_debug_fops = {
1035         .owner          = THIS_MODULE,
1036         .open           = vmballoon_debug_open,
1037         .read           = seq_read,
1038         .llseek         = seq_lseek,
1039         .release        = single_release,
1040 };
1041
1042 static int __init vmballoon_debugfs_init(struct vmballoon *b)
1043 {
1044         int error;
1045
1046         b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1047                                            &vmballoon_debug_fops);
1048         if (IS_ERR(b->dbg_entry)) {
1049                 error = PTR_ERR(b->dbg_entry);
1050                 pr_err("failed to create debugfs entry, error: %d\n", error);
1051                 return error;
1052         }
1053
1054         return 0;
1055 }
1056
1057 static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
1058 {
1059         debugfs_remove(b->dbg_entry);
1060 }
1061
1062 #else
1063
1064 static inline int vmballoon_debugfs_init(struct vmballoon *b)
1065 {
1066         return 0;
1067 }
1068
1069 static inline void vmballoon_debugfs_exit(struct vmballoon *b)
1070 {
1071 }
1072
1073 #endif  /* CONFIG_DEBUG_FS */
1074
1075 static int __init vmballoon_init(void)
1076 {
1077         int error;
1078
1079         /*
1080          * Check if we are running on VMware's hypervisor and bail out
1081          * if we are not.
1082          */
1083         if (x86_hyper != &x86_hyper_vmware)
1084                 return -ENODEV;
1085
1086         INIT_LIST_HEAD(&balloon.pages);
1087         INIT_LIST_HEAD(&balloon.refused_pages);
1088
1089         /* initialize rates */
1090         balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
1091         balloon.rate_free = VMW_BALLOON_RATE_FREE_MAX;
1092
1093         INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
1094
1095         /*
1096          * Start balloon.
1097          */
1098         if (!vmballoon_send_start(&balloon, VMW_BALLOON_CAPABILITIES)) {
1099                 pr_err("failed to send start command to the host\n");
1100                 return -EIO;
1101         }
1102
1103         if ((balloon.capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
1104                 balloon.ops = &vmballoon_batched_ops;
1105                 balloon.batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
1106                 if (!vmballoon_init_batching(&balloon)) {
1107                         pr_err("failed to init batching\n");
1108                         return -EIO;
1109                 }
1110         } else if ((balloon.capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
1111                 balloon.ops = &vmballoon_basic_ops;
1112                 balloon.batch_max_pages = 1;
1113         }
1114
1115         if (!vmballoon_send_guest_id(&balloon)) {
1116                 pr_err("failed to send guest ID to the host\n");
1117                 return -EIO;
1118         }
1119
1120         error = vmballoon_debugfs_init(&balloon);
1121         if (error)
1122                 return error;
1123
1124         queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
1125
1126         return 0;
1127 }
1128 module_init(vmballoon_init);
1129
1130 static void __exit vmballoon_exit(void)
1131 {
1132         cancel_delayed_work_sync(&balloon.dwork);
1133
1134         vmballoon_debugfs_exit(&balloon);
1135
1136         /*
1137          * Deallocate all reserved memory, and reset connection with monitor.
1138          * Reset connection before deallocating memory to avoid potential for
1139          * additional spurious resets from guest touching deallocated pages.
1140          */
1141         vmballoon_send_start(&balloon, VMW_BALLOON_CAPABILITIES);
1142         vmballoon_pop(&balloon);
1143 }
1144 module_exit(vmballoon_exit);