2 * VMware Balloon driver.
4 * Copyright (C) 2000-2013, VMware, Inc. All Rights Reserved.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License and no later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 * NON INFRINGEMENT. See the GNU General Public License for more
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 * Maintained by: Xavier Deguillard <xdeguillard@vmware.com>
21 * Philip Moltmann <moltmann@vmware.com>
25 * This is VMware physical memory management driver for Linux. The driver
26 * acts like a "balloon" that can be inflated to reclaim physical pages by
27 * reserving them in the guest and invalidating them in the monitor,
28 * freeing up the underlying machine pages so they can be allocated to
29 * other guests. The balloon can also be deflated to allow the guest to
30 * use more physical memory. Higher level policies can control the sizes
31 * of balloons in VMs in order to manage physical memory resources.
35 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
37 #include <linux/types.h>
38 #include <linux/kernel.h>
40 #include <linux/vmalloc.h>
41 #include <linux/sched.h>
42 #include <linux/module.h>
43 #include <linux/workqueue.h>
44 #include <linux/debugfs.h>
45 #include <linux/seq_file.h>
46 #include <asm/hypervisor.h>
48 MODULE_AUTHOR("VMware, Inc.");
49 MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
50 MODULE_VERSION("1.4.0.0-k");
51 MODULE_ALIAS("dmi:*:svnVMware*:*");
52 MODULE_ALIAS("vmware_vmmemctl");
53 MODULE_LICENSE("GPL");
56 * Various constants controlling rate of inflaint/deflating balloon,
61 * Rates of memory allocaton when guest experiences memory pressure
62 * (driver performs sleeping allocations).
64 #define VMW_BALLOON_RATE_ALLOC_MIN 512U
65 #define VMW_BALLOON_RATE_ALLOC_MAX 2048U
66 #define VMW_BALLOON_RATE_ALLOC_INC 16U
69 * When guest is under memory pressure, use a reduced page allocation
70 * rate for next several cycles.
72 #define VMW_BALLOON_SLOW_CYCLES 4
75 * Use __GFP_HIGHMEM to allow pages from HIGHMEM zone. We don't
76 * allow wait (__GFP_WAIT) for NOSLEEP page allocations. Use
77 * __GFP_NOWARN, to suppress page allocation failure warnings.
79 #define VMW_PAGE_ALLOC_NOSLEEP (__GFP_HIGHMEM|__GFP_NOWARN)
82 * Use GFP_HIGHUSER when executing in a separate kernel thread
83 * context and allocation can sleep. This is less stressful to
84 * the guest memory system, since it allows the thread to block
85 * while memory is reclaimed, and won't take pages from emergency
88 #define VMW_PAGE_ALLOC_CANSLEEP (GFP_HIGHUSER)
90 /* Maximum number of refused pages we accumulate during inflation cycle */
91 #define VMW_BALLOON_MAX_REFUSED 16
94 * Hypervisor communication port definitions.
96 #define VMW_BALLOON_HV_PORT 0x5670
97 #define VMW_BALLOON_HV_MAGIC 0x456c6d6f
98 #define VMW_BALLOON_GUEST_ID 1 /* Linux */
100 enum vmwballoon_capabilities {
102 * Bit 0 is reserved and not associated to any capability.
104 VMW_BALLOON_BASIC_CMDS = (1 << 1),
105 VMW_BALLOON_BATCHED_CMDS = (1 << 2),
106 VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
109 #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_BASIC_CMDS \
110 | VMW_BALLOON_BATCHED_CMDS \
111 | VMW_BALLOON_BATCHED_2M_CMDS)
113 #define VMW_BALLOON_2M_SHIFT (9)
114 #define VMW_BALLOON_NUM_PAGE_SIZES (2)
117 * Backdoor commands availability:
119 * START, GET_TARGET and GUEST_ID are always available,
121 * VMW_BALLOON_BASIC_CMDS:
122 * LOCK and UNLOCK commands,
123 * VMW_BALLOON_BATCHED_CMDS:
124 * BATCHED_LOCK and BATCHED_UNLOCK commands.
125 * VMW BALLOON_BATCHED_2M_CMDS:
126 * BATCHED_2M_LOCK and BATCHED_2M_UNLOCK commands.
128 #define VMW_BALLOON_CMD_START 0
129 #define VMW_BALLOON_CMD_GET_TARGET 1
130 #define VMW_BALLOON_CMD_LOCK 2
131 #define VMW_BALLOON_CMD_UNLOCK 3
132 #define VMW_BALLOON_CMD_GUEST_ID 4
133 #define VMW_BALLOON_CMD_BATCHED_LOCK 6
134 #define VMW_BALLOON_CMD_BATCHED_UNLOCK 7
135 #define VMW_BALLOON_CMD_BATCHED_2M_LOCK 8
136 #define VMW_BALLOON_CMD_BATCHED_2M_UNLOCK 9
140 #define VMW_BALLOON_SUCCESS 0
141 #define VMW_BALLOON_FAILURE -1
142 #define VMW_BALLOON_ERROR_CMD_INVALID 1
143 #define VMW_BALLOON_ERROR_PPN_INVALID 2
144 #define VMW_BALLOON_ERROR_PPN_LOCKED 3
145 #define VMW_BALLOON_ERROR_PPN_UNLOCKED 4
146 #define VMW_BALLOON_ERROR_PPN_PINNED 5
147 #define VMW_BALLOON_ERROR_PPN_NOTNEEDED 6
148 #define VMW_BALLOON_ERROR_RESET 7
149 #define VMW_BALLOON_ERROR_BUSY 8
151 #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
153 /* Batch page description */
156 * Layout of a page in the batch page:
158 * +-------------+----------+--------+
160 * | Page number | Reserved | Status |
162 * +-------------+----------+--------+
165 * The reserved field should be set to 0.
167 #define VMW_BALLOON_BATCH_MAX_PAGES (PAGE_SIZE / sizeof(u64))
168 #define VMW_BALLOON_BATCH_STATUS_MASK ((1UL << 5) - 1)
169 #define VMW_BALLOON_BATCH_PAGE_MASK (~((1UL << PAGE_SHIFT) - 1))
171 struct vmballoon_batch_page {
172 u64 pages[VMW_BALLOON_BATCH_MAX_PAGES];
175 static u64 vmballoon_batch_get_pa(struct vmballoon_batch_page *batch, int idx)
177 return batch->pages[idx] & VMW_BALLOON_BATCH_PAGE_MASK;
180 static int vmballoon_batch_get_status(struct vmballoon_batch_page *batch,
183 return (int)(batch->pages[idx] & VMW_BALLOON_BATCH_STATUS_MASK);
186 static void vmballoon_batch_set_pa(struct vmballoon_batch_page *batch, int idx,
189 batch->pages[idx] = pa;
193 #define VMWARE_BALLOON_CMD(cmd, arg1, arg2, result) \
195 unsigned long __status, __dummy1, __dummy2, __dummy3; \
196 __asm__ __volatile__ ("inl %%dx" : \
202 "0"(VMW_BALLOON_HV_MAGIC), \
203 "1"(VMW_BALLOON_CMD_##cmd), \
204 "2"(VMW_BALLOON_HV_PORT), \
208 if (VMW_BALLOON_CMD_##cmd == VMW_BALLOON_CMD_START) \
214 #ifdef CONFIG_DEBUG_FS
215 struct vmballoon_stats {
218 /* allocation statistics */
219 unsigned int alloc[VMW_BALLOON_NUM_PAGE_SIZES];
220 unsigned int alloc_fail[VMW_BALLOON_NUM_PAGE_SIZES];
221 unsigned int sleep_alloc;
222 unsigned int sleep_alloc_fail;
223 unsigned int refused_alloc[VMW_BALLOON_NUM_PAGE_SIZES];
224 unsigned int refused_free[VMW_BALLOON_NUM_PAGE_SIZES];
225 unsigned int free[VMW_BALLOON_NUM_PAGE_SIZES];
227 /* monitor operations */
228 unsigned int lock[VMW_BALLOON_NUM_PAGE_SIZES];
229 unsigned int lock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
230 unsigned int unlock[VMW_BALLOON_NUM_PAGE_SIZES];
231 unsigned int unlock_fail[VMW_BALLOON_NUM_PAGE_SIZES];
233 unsigned int target_fail;
235 unsigned int start_fail;
236 unsigned int guest_type;
237 unsigned int guest_type_fail;
240 #define STATS_INC(stat) (stat)++
242 #define STATS_INC(stat)
247 struct vmballoon_ops {
248 void (*add_page)(struct vmballoon *b, int idx, struct page *p);
249 int (*lock)(struct vmballoon *b, unsigned int num_pages,
250 bool is_2m_pages, unsigned int *target);
251 int (*unlock)(struct vmballoon *b, unsigned int num_pages,
252 bool is_2m_pages, unsigned int *target);
255 struct vmballoon_page_size {
256 /* list of reserved physical pages */
257 struct list_head pages;
259 /* transient list of non-balloonable pages */
260 struct list_head refused_pages;
261 unsigned int n_refused_pages;
265 struct vmballoon_page_size page_sizes[VMW_BALLOON_NUM_PAGE_SIZES];
267 /* supported page sizes. 1 == 4k pages only, 2 == 4k and 2m pages */
268 unsigned supported_page_sizes;
270 /* balloon size in pages */
277 /* adjustment rates (pages per second) */
278 unsigned int rate_alloc;
280 /* slowdown page allocations for next few cycles */
281 unsigned int slow_allocation_cycles;
283 unsigned long capabilities;
285 struct vmballoon_batch_page *batch_page;
286 unsigned int batch_max_pages;
289 const struct vmballoon_ops *ops;
291 #ifdef CONFIG_DEBUG_FS
293 struct vmballoon_stats stats;
295 /* debugfs file exporting statistics */
296 struct dentry *dbg_entry;
299 struct sysinfo sysinfo;
301 struct delayed_work dwork;
304 static struct vmballoon balloon;
307 * Send "start" command to the host, communicating supported version
310 static bool vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
312 unsigned long status, capabilities, dummy = 0;
315 STATS_INC(b->stats.start);
317 status = VMWARE_BALLOON_CMD(START, req_caps, dummy, capabilities);
320 case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
321 b->capabilities = capabilities;
324 case VMW_BALLOON_SUCCESS:
325 b->capabilities = VMW_BALLOON_BASIC_CMDS;
332 if (b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS)
333 b->supported_page_sizes = 2;
335 b->supported_page_sizes = 1;
338 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
339 STATS_INC(b->stats.start_fail);
344 static bool vmballoon_check_status(struct vmballoon *b, unsigned long status)
347 case VMW_BALLOON_SUCCESS:
350 case VMW_BALLOON_ERROR_RESET:
351 b->reset_required = true;
360 * Communicate guest type to the host so that it can adjust ballooning
361 * algorithm to the one most appropriate for the guest. This command
362 * is normally issued after sending "start" command and is part of
363 * standard reset sequence.
365 static bool vmballoon_send_guest_id(struct vmballoon *b)
367 unsigned long status, dummy = 0;
369 status = VMWARE_BALLOON_CMD(GUEST_ID, VMW_BALLOON_GUEST_ID, dummy,
372 STATS_INC(b->stats.guest_type);
374 if (vmballoon_check_status(b, status))
377 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
378 STATS_INC(b->stats.guest_type_fail);
382 static u16 vmballoon_page_size(bool is_2m_page)
385 return 1 << VMW_BALLOON_2M_SHIFT;
391 * Retrieve desired balloon size from the host.
393 static bool vmballoon_send_get_target(struct vmballoon *b, u32 *new_target)
395 unsigned long status;
396 unsigned long target;
398 unsigned long dummy = 0;
402 * si_meminfo() is cheap. Moreover, we want to provide dynamic
403 * max balloon size later. So let us call si_meminfo() every
406 si_meminfo(&b->sysinfo);
407 limit = b->sysinfo.totalram;
409 /* Ensure limit fits in 32-bits */
410 limit32 = (u32)limit;
411 if (limit != limit32)
415 STATS_INC(b->stats.target);
417 status = VMWARE_BALLOON_CMD(GET_TARGET, limit, dummy, target);
418 if (vmballoon_check_status(b, status)) {
419 *new_target = target;
423 pr_debug("%s - failed, hv returns %ld\n", __func__, status);
424 STATS_INC(b->stats.target_fail);
429 * Notify the host about allocated page so that host can use it without
430 * fear that guest will need it. Host may reject some pages, we need to
431 * check the return value and maybe submit a different page.
433 static int vmballoon_send_lock_page(struct vmballoon *b, unsigned long pfn,
434 unsigned int *hv_status, unsigned int *target)
436 unsigned long status, dummy = 0;
443 STATS_INC(b->stats.lock[false]);
445 *hv_status = status = VMWARE_BALLOON_CMD(LOCK, pfn, dummy, *target);
446 if (vmballoon_check_status(b, status))
449 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
450 STATS_INC(b->stats.lock_fail[false]);
454 static int vmballoon_send_batched_lock(struct vmballoon *b,
455 unsigned int num_pages, bool is_2m_pages, unsigned int *target)
457 unsigned long status;
458 unsigned long pfn = page_to_pfn(b->page);
460 STATS_INC(b->stats.lock[is_2m_pages]);
463 status = VMWARE_BALLOON_CMD(BATCHED_2M_LOCK, pfn, num_pages,
466 status = VMWARE_BALLOON_CMD(BATCHED_LOCK, pfn, num_pages,
469 if (vmballoon_check_status(b, status))
472 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
473 STATS_INC(b->stats.lock_fail[is_2m_pages]);
478 * Notify the host that guest intends to release given page back into
479 * the pool of available (to the guest) pages.
481 static bool vmballoon_send_unlock_page(struct vmballoon *b, unsigned long pfn,
482 unsigned int *target)
484 unsigned long status, dummy = 0;
491 STATS_INC(b->stats.unlock[false]);
493 status = VMWARE_BALLOON_CMD(UNLOCK, pfn, dummy, *target);
494 if (vmballoon_check_status(b, status))
497 pr_debug("%s - ppn %lx, hv returns %ld\n", __func__, pfn, status);
498 STATS_INC(b->stats.unlock_fail[false]);
502 static bool vmballoon_send_batched_unlock(struct vmballoon *b,
503 unsigned int num_pages, bool is_2m_pages, unsigned int *target)
505 unsigned long status;
506 unsigned long pfn = page_to_pfn(b->page);
508 STATS_INC(b->stats.unlock[is_2m_pages]);
511 status = VMWARE_BALLOON_CMD(BATCHED_2M_UNLOCK, pfn, num_pages,
514 status = VMWARE_BALLOON_CMD(BATCHED_UNLOCK, pfn, num_pages,
517 if (vmballoon_check_status(b, status))
520 pr_debug("%s - batch ppn %lx, hv returns %ld\n", __func__, pfn, status);
521 STATS_INC(b->stats.unlock_fail[is_2m_pages]);
525 static struct page *vmballoon_alloc_page(gfp_t flags, bool is_2m_page)
528 return alloc_pages(flags, VMW_BALLOON_2M_SHIFT);
530 return alloc_page(flags);
533 static void vmballoon_free_page(struct page *page, bool is_2m_page)
536 __free_pages(page, VMW_BALLOON_2M_SHIFT);
542 * Quickly release all pages allocated for the balloon. This function is
543 * called when host decides to "reset" balloon for one reason or another.
544 * Unlike normal "deflate" we do not (shall not) notify host of the pages
547 static void vmballoon_pop(struct vmballoon *b)
549 struct page *page, *next;
550 unsigned is_2m_pages;
552 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
554 struct vmballoon_page_size *page_size =
555 &b->page_sizes[is_2m_pages];
556 u16 size_per_page = vmballoon_page_size(is_2m_pages);
558 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
559 list_del(&page->lru);
560 vmballoon_free_page(page, is_2m_pages);
561 STATS_INC(b->stats.free[is_2m_pages]);
562 b->size -= size_per_page;
567 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
569 vunmap(b->batch_page);
572 __free_page(b->page);
577 * Notify the host of a ballooned page. If host rejects the page put it on the
578 * refuse list, those refused page are then released at the end of the
581 static int vmballoon_lock_page(struct vmballoon *b, unsigned int num_pages,
582 bool is_2m_pages, unsigned int *target)
584 int locked, hv_status;
585 struct page *page = b->page;
586 struct vmballoon_page_size *page_size = &b->page_sizes[false];
588 /* is_2m_pages can never happen as 2m pages support implies batching */
590 locked = vmballoon_send_lock_page(b, page_to_pfn(page), &hv_status,
593 STATS_INC(b->stats.refused_alloc[false]);
595 if (hv_status == VMW_BALLOON_ERROR_RESET ||
596 hv_status == VMW_BALLOON_ERROR_PPN_NOTNEEDED) {
597 vmballoon_free_page(page, false);
602 * Place page on the list of non-balloonable pages
603 * and retry allocation, unless we already accumulated
604 * too many of them, in which case take a breather.
606 if (page_size->n_refused_pages < VMW_BALLOON_MAX_REFUSED) {
607 page_size->n_refused_pages++;
608 list_add(&page->lru, &page_size->refused_pages);
610 vmballoon_free_page(page, false);
615 /* track allocated page */
616 list_add(&page->lru, &page_size->pages);
618 /* update balloon size */
624 static int vmballoon_lock_batched_page(struct vmballoon *b,
625 unsigned int num_pages, bool is_2m_pages, unsigned int *target)
628 u16 size_per_page = vmballoon_page_size(is_2m_pages);
630 locked = vmballoon_send_batched_lock(b, num_pages, is_2m_pages,
633 for (i = 0; i < num_pages; i++) {
634 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
635 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
637 vmballoon_free_page(p, is_2m_pages);
643 for (i = 0; i < num_pages; i++) {
644 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
645 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
646 struct vmballoon_page_size *page_size =
647 &b->page_sizes[is_2m_pages];
649 locked = vmballoon_batch_get_status(b->batch_page, i);
652 case VMW_BALLOON_SUCCESS:
653 list_add(&p->lru, &page_size->pages);
654 b->size += size_per_page;
656 case VMW_BALLOON_ERROR_PPN_PINNED:
657 case VMW_BALLOON_ERROR_PPN_INVALID:
658 if (page_size->n_refused_pages
659 < VMW_BALLOON_MAX_REFUSED) {
660 list_add(&p->lru, &page_size->refused_pages);
661 page_size->n_refused_pages++;
665 case VMW_BALLOON_ERROR_RESET:
666 case VMW_BALLOON_ERROR_PPN_NOTNEEDED:
667 vmballoon_free_page(p, is_2m_pages);
670 /* This should never happen */
679 * Release the page allocated for the balloon. Note that we first notify
680 * the host so it can make sure the page will be available for the guest
683 static int vmballoon_unlock_page(struct vmballoon *b, unsigned int num_pages,
684 bool is_2m_pages, unsigned int *target)
686 struct page *page = b->page;
687 struct vmballoon_page_size *page_size = &b->page_sizes[false];
689 /* is_2m_pages can never happen as 2m pages support implies batching */
691 if (!vmballoon_send_unlock_page(b, page_to_pfn(page), target)) {
692 list_add(&page->lru, &page_size->pages);
696 /* deallocate page */
697 vmballoon_free_page(page, false);
698 STATS_INC(b->stats.free[false]);
700 /* update balloon size */
706 static int vmballoon_unlock_batched_page(struct vmballoon *b,
707 unsigned int num_pages, bool is_2m_pages,
708 unsigned int *target)
710 int locked, i, ret = 0;
712 u16 size_per_page = vmballoon_page_size(is_2m_pages);
714 hv_success = vmballoon_send_batched_unlock(b, num_pages, is_2m_pages,
719 for (i = 0; i < num_pages; i++) {
720 u64 pa = vmballoon_batch_get_pa(b->batch_page, i);
721 struct page *p = pfn_to_page(pa >> PAGE_SHIFT);
722 struct vmballoon_page_size *page_size =
723 &b->page_sizes[is_2m_pages];
725 locked = vmballoon_batch_get_status(b->batch_page, i);
726 if (!hv_success || locked != VMW_BALLOON_SUCCESS) {
728 * That page wasn't successfully unlocked by the
729 * hypervisor, re-add it to the list of pages owned by
730 * the balloon driver.
732 list_add(&p->lru, &page_size->pages);
734 /* deallocate page */
735 vmballoon_free_page(p, is_2m_pages);
736 STATS_INC(b->stats.free[is_2m_pages]);
738 /* update balloon size */
739 b->size -= size_per_page;
747 * Release pages that were allocated while attempting to inflate the
748 * balloon but were refused by the host for one reason or another.
750 static void vmballoon_release_refused_pages(struct vmballoon *b,
753 struct page *page, *next;
754 struct vmballoon_page_size *page_size =
755 &b->page_sizes[is_2m_pages];
757 list_for_each_entry_safe(page, next, &page_size->refused_pages, lru) {
758 list_del(&page->lru);
759 vmballoon_free_page(page, is_2m_pages);
760 STATS_INC(b->stats.refused_free[is_2m_pages]);
763 page_size->n_refused_pages = 0;
766 static void vmballoon_add_page(struct vmballoon *b, int idx, struct page *p)
771 static void vmballoon_add_batched_page(struct vmballoon *b, int idx,
774 vmballoon_batch_set_pa(b->batch_page, idx,
775 (u64)page_to_pfn(p) << PAGE_SHIFT);
779 * Inflate the balloon towards its target size. Note that we try to limit
780 * the rate of allocation to make sure we are not choking the rest of the
783 static void vmballoon_inflate(struct vmballoon *b)
786 unsigned int allocations = 0;
787 unsigned int num_pages = 0;
789 gfp_t flags = VMW_PAGE_ALLOC_NOSLEEP;
792 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
795 * First try NOSLEEP page allocations to inflate balloon.
797 * If we do not throttle nosleep allocations, we can drain all
798 * free pages in the guest quickly (if the balloon target is high).
799 * As a side-effect, draining free pages helps to inform (force)
800 * the guest to start swapping if balloon target is not met yet,
801 * which is a desired behavior. However, balloon driver can consume
802 * all available CPU cycles if too many pages are allocated in a
803 * second. Therefore, we throttle nosleep allocations even when
804 * the guest is not under memory pressure. OTOH, if we have already
805 * predicted that the guest is under memory pressure, then we
806 * slowdown page allocations considerably.
810 * Start with no sleep allocation rate which may be higher
811 * than sleeping allocation rate.
813 if (b->slow_allocation_cycles) {
814 rate = b->rate_alloc;
819 b->supported_page_sizes == VMW_BALLOON_NUM_PAGE_SIZES;
822 pr_debug("%s - goal: %d, no-sleep rate: %u, sleep rate: %d\n",
823 __func__, b->target - b->size, rate, b->rate_alloc);
825 while (!b->reset_required &&
826 b->size + num_pages * vmballoon_page_size(is_2m_pages)
830 if (flags == VMW_PAGE_ALLOC_NOSLEEP)
831 STATS_INC(b->stats.alloc[is_2m_pages]);
833 STATS_INC(b->stats.sleep_alloc);
835 page = vmballoon_alloc_page(flags, is_2m_pages);
837 STATS_INC(b->stats.alloc_fail[is_2m_pages]);
840 b->ops->lock(b, num_pages, true, &b->target);
843 * ignore errors from locking as we now switch
844 * to 4k pages and we might get different
853 if (flags == VMW_PAGE_ALLOC_CANSLEEP) {
855 * CANSLEEP page allocation failed, so guest
856 * is under severe memory pressure. Quickly
857 * decrease allocation rate.
859 b->rate_alloc = max(b->rate_alloc / 2,
860 VMW_BALLOON_RATE_ALLOC_MIN);
861 STATS_INC(b->stats.sleep_alloc_fail);
866 * NOSLEEP page allocation failed, so the guest is
867 * under memory pressure. Let us slow down page
868 * allocations for next few cycles so that the guest
869 * gets out of memory pressure. Also, if we already
870 * allocated b->rate_alloc pages, let's pause,
871 * otherwise switch to sleeping allocations.
873 b->slow_allocation_cycles = VMW_BALLOON_SLOW_CYCLES;
875 if (allocations >= b->rate_alloc)
878 flags = VMW_PAGE_ALLOC_CANSLEEP;
879 /* Lower rate for sleeping allocations. */
880 rate = b->rate_alloc;
884 b->ops->add_page(b, num_pages++, page);
885 if (num_pages == b->batch_max_pages) {
886 error = b->ops->lock(b, num_pages, is_2m_pages,
895 if (allocations >= rate) {
896 /* We allocated enough pages, let's take a break. */
902 b->ops->lock(b, num_pages, is_2m_pages, &b->target);
905 * We reached our goal without failures so try increasing
908 if (error == 0 && allocations >= b->rate_alloc) {
909 unsigned int mult = allocations / b->rate_alloc;
912 min(b->rate_alloc + mult * VMW_BALLOON_RATE_ALLOC_INC,
913 VMW_BALLOON_RATE_ALLOC_MAX);
916 vmballoon_release_refused_pages(b, true);
917 vmballoon_release_refused_pages(b, false);
921 * Decrease the size of the balloon allowing guest to use more memory.
923 static void vmballoon_deflate(struct vmballoon *b)
925 unsigned is_2m_pages;
927 pr_debug("%s - size: %d, target %d\n", __func__, b->size, b->target);
929 /* free pages to reach target */
930 for (is_2m_pages = 0; is_2m_pages < b->supported_page_sizes;
932 struct page *page, *next;
933 unsigned int num_pages = 0;
934 struct vmballoon_page_size *page_size =
935 &b->page_sizes[is_2m_pages];
937 list_for_each_entry_safe(page, next, &page_size->pages, lru) {
938 if (b->reset_required ||
941 * vmballoon_page_size(is_2m_pages)
942 < b->target + vmballoon_page_size(true)))
945 list_del(&page->lru);
946 b->ops->add_page(b, num_pages++, page);
948 if (num_pages == b->batch_max_pages) {
951 error = b->ops->unlock(b, num_pages,
952 is_2m_pages, &b->target);
962 b->ops->unlock(b, num_pages, is_2m_pages, &b->target);
966 static const struct vmballoon_ops vmballoon_basic_ops = {
967 .add_page = vmballoon_add_page,
968 .lock = vmballoon_lock_page,
969 .unlock = vmballoon_unlock_page
972 static const struct vmballoon_ops vmballoon_batched_ops = {
973 .add_page = vmballoon_add_batched_page,
974 .lock = vmballoon_lock_batched_page,
975 .unlock = vmballoon_unlock_batched_page
978 static bool vmballoon_init_batching(struct vmballoon *b)
980 b->page = alloc_page(VMW_PAGE_ALLOC_NOSLEEP);
984 b->batch_page = vmap(&b->page, 1, VM_MAP, PAGE_KERNEL);
985 if (!b->batch_page) {
986 __free_page(b->page);
994 * Perform standard reset sequence by popping the balloon (in case it
995 * is not empty) and then restarting protocol. This operation normally
996 * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
998 static void vmballoon_reset(struct vmballoon *b)
1000 /* free all pages, skipping monitor unlock */
1003 if (!vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
1006 if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
1007 b->ops = &vmballoon_batched_ops;
1008 b->batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
1009 if (!vmballoon_init_batching(b)) {
1011 * We failed to initialize batching, inform the monitor
1012 * about it by sending a null capability.
1014 * The guest will retry in one second.
1016 vmballoon_send_start(b, 0);
1019 } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
1020 b->ops = &vmballoon_basic_ops;
1021 b->batch_max_pages = 1;
1024 b->reset_required = false;
1025 if (!vmballoon_send_guest_id(b))
1026 pr_err("failed to send guest ID to the host\n");
1030 * Balloon work function: reset protocol, if needed, get the new size and
1031 * adjust balloon as needed. Repeat in 1 sec.
1033 static void vmballoon_work(struct work_struct *work)
1035 struct delayed_work *dwork = to_delayed_work(work);
1036 struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
1037 unsigned int target;
1039 STATS_INC(b->stats.timer);
1041 if (b->reset_required)
1044 if (b->slow_allocation_cycles > 0)
1045 b->slow_allocation_cycles--;
1047 if (vmballoon_send_get_target(b, &target)) {
1048 /* update target, adjust size */
1051 if (b->size < target)
1052 vmballoon_inflate(b);
1053 else if (target == 0 ||
1054 b->size > target + vmballoon_page_size(true))
1055 vmballoon_deflate(b);
1059 * We are using a freezable workqueue so that balloon operations are
1060 * stopped while the system transitions to/from sleep/hibernation.
1062 queue_delayed_work(system_freezable_wq,
1063 dwork, round_jiffies_relative(HZ));
1069 #ifdef CONFIG_DEBUG_FS
1071 static int vmballoon_debug_show(struct seq_file *f, void *offset)
1073 struct vmballoon *b = f->private;
1074 struct vmballoon_stats *stats = &b->stats;
1076 /* format capabilities info */
1078 "balloon capabilities: %#4x\n"
1079 "used capabilities: %#4lx\n",
1080 VMW_BALLOON_CAPABILITIES, b->capabilities);
1082 /* format size info */
1084 "target: %8d pages\n"
1085 "current: %8d pages\n",
1086 b->target, b->size);
1088 /* format rate info */
1090 "rateSleepAlloc: %8d pages/sec\n",
1096 "start: %8u (%4u failed)\n"
1097 "guestType: %8u (%4u failed)\n"
1098 "2m-lock: %8u (%4u failed)\n"
1099 "lock: %8u (%4u failed)\n"
1100 "2m-unlock: %8u (%4u failed)\n"
1101 "unlock: %8u (%4u failed)\n"
1102 "target: %8u (%4u failed)\n"
1103 "prim2mAlloc: %8u (%4u failed)\n"
1104 "primNoSleepAlloc: %8u (%4u failed)\n"
1105 "primCanSleepAlloc: %8u (%4u failed)\n"
1113 stats->start, stats->start_fail,
1114 stats->guest_type, stats->guest_type_fail,
1115 stats->lock[true], stats->lock_fail[true],
1116 stats->lock[false], stats->lock_fail[false],
1117 stats->unlock[true], stats->unlock_fail[true],
1118 stats->unlock[false], stats->unlock_fail[false],
1119 stats->target, stats->target_fail,
1120 stats->alloc[true], stats->alloc_fail[true],
1121 stats->alloc[false], stats->alloc_fail[false],
1122 stats->sleep_alloc, stats->sleep_alloc_fail,
1125 stats->refused_alloc[true], stats->refused_alloc[false],
1126 stats->refused_free[true], stats->refused_free[false]);
1131 static int vmballoon_debug_open(struct inode *inode, struct file *file)
1133 return single_open(file, vmballoon_debug_show, inode->i_private);
1136 static const struct file_operations vmballoon_debug_fops = {
1137 .owner = THIS_MODULE,
1138 .open = vmballoon_debug_open,
1140 .llseek = seq_lseek,
1141 .release = single_release,
1144 static int __init vmballoon_debugfs_init(struct vmballoon *b)
1148 b->dbg_entry = debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
1149 &vmballoon_debug_fops);
1150 if (IS_ERR(b->dbg_entry)) {
1151 error = PTR_ERR(b->dbg_entry);
1152 pr_err("failed to create debugfs entry, error: %d\n", error);
1159 static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
1161 debugfs_remove(b->dbg_entry);
1166 static inline int vmballoon_debugfs_init(struct vmballoon *b)
1171 static inline void vmballoon_debugfs_exit(struct vmballoon *b)
1175 #endif /* CONFIG_DEBUG_FS */
1177 static int __init vmballoon_init(void)
1180 unsigned is_2m_pages;
1182 * Check if we are running on VMware's hypervisor and bail out
1185 if (x86_hyper != &x86_hyper_vmware)
1188 for (is_2m_pages = 0; is_2m_pages < VMW_BALLOON_NUM_PAGE_SIZES;
1190 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].pages);
1191 INIT_LIST_HEAD(&balloon.page_sizes[is_2m_pages].refused_pages);
1194 /* initialize rates */
1195 balloon.rate_alloc = VMW_BALLOON_RATE_ALLOC_MAX;
1197 INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
1202 if (!vmballoon_send_start(&balloon, VMW_BALLOON_CAPABILITIES)) {
1203 pr_err("failed to send start command to the host\n");
1207 if ((balloon.capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
1208 balloon.ops = &vmballoon_batched_ops;
1209 balloon.batch_max_pages = VMW_BALLOON_BATCH_MAX_PAGES;
1210 if (!vmballoon_init_batching(&balloon)) {
1211 pr_err("failed to init batching\n");
1214 } else if ((balloon.capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
1215 balloon.ops = &vmballoon_basic_ops;
1216 balloon.batch_max_pages = 1;
1219 if (!vmballoon_send_guest_id(&balloon)) {
1220 pr_err("failed to send guest ID to the host\n");
1224 error = vmballoon_debugfs_init(&balloon);
1228 queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
1232 module_init(vmballoon_init);
1234 static void __exit vmballoon_exit(void)
1236 cancel_delayed_work_sync(&balloon.dwork);
1238 vmballoon_debugfs_exit(&balloon);
1241 * Deallocate all reserved memory, and reset connection with monitor.
1242 * Reset connection before deallocating memory to avoid potential for
1243 * additional spurious resets from guest touching deallocated pages.
1245 vmballoon_send_start(&balloon, VMW_BALLOON_CAPABILITIES);
1246 vmballoon_pop(&balloon);
1248 module_exit(vmballoon_exit);