4 * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp.
6 * RAMster implements peer-to-peer transcendent memory, allowing a "cluster" of
7 * kernels to dynamically pool their RAM so that a RAM-hungry workload on one
8 * machine can temporarily and transparently utilize RAM on another machine
9 * which is presumably idle or running a non-RAM-hungry workload.
11 * RAMster combines a clustering and messaging foundation based on the ocfs2
12 * cluster layer with the in-kernel compression implementation of zcache, and
13 * adds code to glue them together. When a page is "put" to RAMster, it is
14 * compressed and stored locally. Periodically, a thread will "remotify" these
15 * pages by sending them via messages to a remote machine. When the page is
16 * later needed as indicated by a page fault, a "get" is issued. If the data
17 * is local, it is uncompressed and the fault is resolved. If the data is
18 * remote, a message is sent to fetch the data and the faulting thread sleeps;
19 * when the data arrives, the thread awakens, the data is decompressed and
20 * the fault is resolved.
22 * As of V5, clusters up to eight nodes are supported; each node can remotify
23 * pages to one specified node, so clusters can be configured as clients to
24 * a "memory server". Some simple policy is in place that will need to be
25 * refined over time. Larger clusters and fault-resistant protocols can also
29 #include <linux/module.h>
30 #include <linux/cpu.h>
31 #include <linux/highmem.h>
32 #include <linux/list.h>
33 #include <linux/lzo.h>
34 #include <linux/slab.h>
35 #include <linux/spinlock.h>
36 #include <linux/types.h>
37 #include <linux/atomic.h>
38 #include <linux/frontswap.h>
40 #include "../zcache.h"
43 #include "ramster_nodemanager.h"
47 #define RAMSTER_TESTING
50 #error "ramster needs sysfs to define cluster nodes to use"
53 static bool use_cleancache __read_mostly;
54 static bool use_frontswap __read_mostly;
55 static bool use_frontswap_exclusive_gets __read_mostly;
57 /* These must be sysfs not debugfs as they are checked/used by userland!! */
58 static unsigned long ramster_interface_revision __read_mostly =
59 R2NM_API_VERSION; /* interface revision must match userspace! */
60 static unsigned long ramster_pers_remotify_enable __read_mostly;
61 static unsigned long ramster_eph_remotify_enable __read_mostly;
62 static atomic_t ramster_remote_pers_pages = ATOMIC_INIT(0);
63 #define MANUAL_NODES 8
64 static bool ramster_nodes_manual_up[MANUAL_NODES] __read_mostly;
65 static int ramster_remote_target_nodenum __read_mostly = -1;
67 /* Used by this code. */
69 ssize_t ramster_foreign_eph_pages;
70 ssize_t ramster_foreign_pers_pages;
71 /* FIXME frontswap selfshrinking knobs in debugfs? */
73 static LIST_HEAD(ramster_rem_op_list);
74 static DEFINE_SPINLOCK(ramster_rem_op_list_lock);
75 static DEFINE_PER_CPU(struct ramster_preload, ramster_preloads);
77 static DEFINE_PER_CPU(unsigned char *, ramster_remoteputmem1);
78 static DEFINE_PER_CPU(unsigned char *, ramster_remoteputmem2);
80 static struct kmem_cache *ramster_flnode_cache __read_mostly;
82 static struct flushlist_node *ramster_flnode_alloc(struct tmem_pool *pool)
84 struct flushlist_node *flnode = NULL;
85 struct ramster_preload *kp;
87 kp = &__get_cpu_var(ramster_preloads);
89 BUG_ON(flnode == NULL);
91 inc_ramster_flnodes();
95 /* the "flush list" asynchronously collects pages to remotely flush */
96 #define FLUSH_ENTIRE_OBJECT ((uint32_t)-1)
97 static void ramster_flnode_free(struct flushlist_node *flnode,
98 struct tmem_pool *pool)
100 dec_ramster_flnodes();
101 BUG_ON(ramster_flnodes < 0);
102 kmem_cache_free(ramster_flnode_cache, flnode);
105 int ramster_do_preload_flnode(struct tmem_pool *pool)
107 struct ramster_preload *kp;
108 struct flushlist_node *flnode;
111 BUG_ON(!irqs_disabled());
112 if (unlikely(ramster_flnode_cache == NULL))
114 kp = &__get_cpu_var(ramster_preloads);
115 flnode = kmem_cache_alloc(ramster_flnode_cache, GFP_ATOMIC);
116 if (unlikely(flnode == NULL) && kp->flnode == NULL)
117 BUG(); /* FIXME handle more gracefully, but how??? */
118 else if (kp->flnode == NULL)
121 kmem_cache_free(ramster_flnode_cache, flnode);
126 * Called by the message handler after a (still compressed) page has been
127 * fetched from the remote machine in response to an "is_remote" tmem_get
128 * or persistent tmem_localify. For a tmem_get, "extra" is the address of
129 * the page that is to be filled to successfully resolve the tmem_get; for
130 * a (persistent) tmem_localify, "extra" is NULL (as the data is placed only
131 * in the local zcache). "data" points to "size" bytes of (compressed) data
132 * passed in the message. In the case of a persistent remote get, if
133 * pre-allocation was successful (see ramster_repatriate_preload), the page
134 * is placed into both local zcache and at "extra".
136 int ramster_localify(int pool_id, struct tmem_oid *oidp, uint32_t index,
137 char *data, unsigned int size, void *extra)
141 struct tmem_pool *pool;
142 bool eph, delete = false;
143 void *pampd, *saved_hb;
144 struct tmem_obj *obj;
146 pool = zcache_get_pool_by_id(LOCAL_CLIENT, pool_id);
147 if (unlikely(pool == NULL))
148 /* pool doesn't exist anymore */
150 eph = is_ephemeral(pool);
151 local_irq_save(flags); /* FIXME: maybe only disable softirqs? */
152 pampd = tmem_localify_get_pampd(pool, oidp, index, &obj, &saved_hb);
154 /* hmmm... must have been a flush while waiting */
155 #ifdef RAMSTER_TESTING
156 pr_err("UNTESTED pampd==NULL in ramster_localify\n");
159 ramster_remote_eph_pages_unsucc_get++;
161 ramster_remote_pers_pages_unsucc_get++;
164 } else if (unlikely(!pampd_is_remote(pampd))) {
165 /* hmmm... must have been a dup put while waiting */
166 #ifdef RAMSTER_TESTING
167 pr_err("UNTESTED dup while waiting in ramster_localify\n");
170 ramster_remote_eph_pages_unsucc_get++;
172 ramster_remote_pers_pages_unsucc_get++;
177 } else if (size == 0) {
178 /* no remote data, delete the local is_remote pampd */
181 ramster_remote_eph_pages_unsucc_get++;
187 if (pampd_is_intransit(pampd)) {
189 * a pampd is marked intransit if it is remote and space has
190 * been allocated for it locally (note, only happens for
191 * persistent pages, in which case the remote copy is freed)
194 pampd = pampd_mask_intransit_and_remote(pampd);
195 zbud_copy_to_zbud(pampd, data, size);
198 * setting pampd to NULL tells tmem_localify_finish to leave
199 * pampd alone... meaning it is left pointing to the
206 * but in all cases, we decompress direct-to-memory to complete
207 * the remotify and return success
209 BUG_ON(extra == NULL);
210 zcache_decompress_to_page(data, size, (struct page *)extra);
212 ramster_remote_eph_pages_succ_get++;
214 ramster_remote_pers_pages_succ_get++;
217 tmem_localify_finish(obj, index, pampd, saved_hb, delete);
218 zcache_put_pool(pool);
219 local_irq_restore(flags);
224 void ramster_pampd_new_obj(struct tmem_obj *obj)
229 void ramster_pampd_free_obj(struct tmem_pool *pool, struct tmem_obj *obj,
232 struct flushlist_node *flnode;
234 BUG_ON(preemptible());
235 if (obj->extra == NULL)
237 if (pool_destroy && is_ephemeral(pool))
238 /* FIXME don't bother with remote eph data for now */
240 BUG_ON(!pampd_is_remote(obj->extra));
241 flnode = ramster_flnode_alloc(pool);
242 flnode->xh.client_id = pampd_remote_node(obj->extra);
243 flnode->xh.pool_id = pool->pool_id;
244 flnode->xh.oid = obj->oid;
245 flnode->xh.index = FLUSH_ENTIRE_OBJECT;
246 flnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_OBJ;
247 spin_lock(&ramster_rem_op_list_lock);
248 list_add(&flnode->rem_op.list, &ramster_rem_op_list);
249 spin_unlock(&ramster_rem_op_list_lock);
253 * Called on a remote persistent tmem_get to attempt to preallocate
254 * local storage for the data contained in the remote persistent page.
255 * If successfully preallocated, returns the pampd, marked as remote and
256 * in_transit. Else returns NULL. Note that the appropriate tmem data
257 * structure must be locked.
259 void *ramster_pampd_repatriate_preload(void *pampd, struct tmem_pool *pool,
260 struct tmem_oid *oidp, uint32_t index,
263 int clen = pampd_remote_size(pampd), c;
264 void *ret_pampd = NULL;
266 struct tmem_handle th;
268 BUG_ON(!pampd_is_remote(pampd));
269 BUG_ON(is_ephemeral(pool));
270 if (use_frontswap_exclusive_gets)
271 /* don't need local storage */
273 if (pampd_is_intransit(pampd)) {
275 * to avoid multiple allocations (and maybe a memory leak)
276 * don't preallocate if already in the process of being
283 local_irq_save(flags);
284 th.client_id = pampd_remote_node(pampd);
285 th.pool_id = pool->pool_id;
288 ret_pampd = zcache_pampd_create(NULL, clen, true, false, &th);
289 if (ret_pampd != NULL) {
291 * a pampd is marked intransit if it is remote and space has
292 * been allocated for it locally (note, only happens for
293 * persistent pages, in which case the remote copy is freed)
295 ret_pampd = pampd_mark_intransit(ret_pampd);
296 c = atomic_dec_return(&ramster_remote_pers_pages);
299 ramster_pers_pages_remote_nomem++;
301 local_irq_restore(flags);
307 * Called on a remote tmem_get to invoke a message to fetch the page.
308 * Might sleep so no tmem locks can be held. "extra" is passed
309 * all the way through the round-trip messaging to ramster_localify.
311 int ramster_pampd_repatriate(void *fake_pampd, void *real_pampd,
312 struct tmem_pool *pool,
313 struct tmem_oid *oid, uint32_t index,
314 bool free, void *extra)
316 struct tmem_xhandle xh;
319 if (pampd_is_intransit(real_pampd))
320 /* have local space pre-reserved, so free remote copy */
322 xh = tmem_xhandle_fill(LOCAL_CLIENT, pool, oid, index);
323 /* unreliable request/response for now */
324 ret = r2net_remote_async_get(&xh, free,
325 pampd_remote_node(fake_pampd),
326 pampd_remote_size(fake_pampd),
327 pampd_remote_cksum(fake_pampd),
332 bool ramster_pampd_is_remote(void *pampd)
334 return pampd_is_remote(pampd);
337 int ramster_pampd_replace_in_obj(void *new_pampd, struct tmem_obj *obj)
341 if (new_pampd != NULL) {
342 if (obj->extra == NULL)
343 obj->extra = new_pampd;
344 /* enforce that all remote pages in an object reside
345 * in the same node! */
346 else if (pampd_remote_node(new_pampd) !=
347 pampd_remote_node((void *)(obj->extra)))
354 void *ramster_pampd_free(void *pampd, struct tmem_pool *pool,
355 struct tmem_oid *oid, uint32_t index, bool acct)
357 bool eph = is_ephemeral(pool);
358 void *local_pampd = NULL;
361 BUG_ON(preemptible());
362 BUG_ON(!pampd_is_remote(pampd));
363 WARN_ON(acct == false);
366 * a NULL oid means to ignore this pampd free
367 * as the remote freeing will be handled elsewhere
370 /* FIXME remote flush optional but probably good idea */
371 } else if (pampd_is_intransit(pampd)) {
372 /* did a pers remote get_and_free, so just free local */
373 local_pampd = pampd_mask_intransit_and_remote(pampd);
375 struct flushlist_node *flnode =
376 ramster_flnode_alloc(pool);
378 flnode->xh.client_id = pampd_remote_node(pampd);
379 flnode->xh.pool_id = pool->pool_id;
380 flnode->xh.oid = *oid;
381 flnode->xh.index = index;
382 flnode->rem_op.op = RAMSTER_REMOTIFY_FLUSH_PAGE;
383 spin_lock(&ramster_rem_op_list_lock);
384 list_add(&flnode->rem_op.list, &ramster_rem_op_list);
385 spin_unlock(&ramster_rem_op_list_lock);
386 c = atomic_dec_return(&ramster_remote_pers_pages);
392 void ramster_count_foreign_pages(bool eph, int count)
394 BUG_ON(count != 1 && count != -1);
397 inc_ramster_foreign_eph_pages();
399 dec_ramster_foreign_eph_pages();
400 WARN_ON_ONCE(ramster_foreign_eph_pages < 0);
404 inc_ramster_foreign_pers_pages();
406 dec_ramster_foreign_pers_pages();
407 WARN_ON_ONCE(ramster_foreign_pers_pages < 0);
413 * For now, just push over a few pages every few seconds to
414 * ensure that it basically works
416 static struct workqueue_struct *ramster_remotify_workqueue;
417 static void ramster_remotify_process(struct work_struct *work);
418 static DECLARE_DELAYED_WORK(ramster_remotify_worker,
419 ramster_remotify_process);
421 static void ramster_remotify_queue_delayed_work(unsigned long delay)
423 if (!queue_delayed_work(ramster_remotify_workqueue,
424 &ramster_remotify_worker, delay))
425 pr_err("ramster_remotify: bad workqueue\n");
428 static void ramster_remote_flush_page(struct flushlist_node *flnode)
430 struct tmem_xhandle *xh;
435 remotenode = flnode->xh.client_id;
436 ret = r2net_remote_flush(xh, remotenode);
438 ramster_remote_pages_flushed++;
440 ramster_remote_page_flushes_failed++;
441 preempt_enable_no_resched();
442 ramster_flnode_free(flnode, NULL);
445 static void ramster_remote_flush_object(struct flushlist_node *flnode)
447 struct tmem_xhandle *xh;
452 remotenode = flnode->xh.client_id;
453 ret = r2net_remote_flush_object(xh, remotenode);
455 ramster_remote_objects_flushed++;
457 ramster_remote_object_flushes_failed++;
458 preempt_enable_no_resched();
459 ramster_flnode_free(flnode, NULL);
462 int ramster_remotify_pageframe(bool eph)
464 struct tmem_xhandle xh;
466 int remotenode, ret, zbuds;
467 struct tmem_pool *pool;
472 unsigned char *tmpmem[2];
473 struct tmem_handle th[2];
474 unsigned int zsize[2];
476 tmpmem[0] = __get_cpu_var(ramster_remoteputmem1);
477 tmpmem[1] = __get_cpu_var(ramster_remoteputmem2);
479 zbuds = zbud_make_zombie_lru(&th[0], &tmpmem[0], &zsize[0], eph);
480 /* now OK to release lock set in caller */
485 for (i = 0; i < zbuds; i++) {
486 xh.client_id = th[i].client_id;
487 xh.pool_id = th[i].pool_id;
489 xh.index = th[i].index;
491 BUG_ON(size == 0 || size > zbud_max_buddy_size());
492 for (p = tmpmem[i], cksum = 0, j = 0; j < size; j++)
494 ret = r2net_remote_put(&xh, tmpmem[i], size, eph, &remotenode);
497 * This is some form of a memory leak... if the remote put
498 * fails, there will never be another attempt to remotify
499 * this page. But since we've dropped the zv pointer,
500 * the page may have been freed or the data replaced
501 * so we can't just "put it back" in the remote op list.
502 * Even if we could, not sure where to put it in the list
503 * because there may be flushes that must be strictly
504 * ordered vs the put. So leave this as a FIXME for now.
505 * But count them so we know if it becomes a problem.
508 ramster_eph_pages_remote_failed++;
510 ramster_pers_pages_remote_failed++;
514 atomic_inc(&ramster_remote_pers_pages);
517 ramster_eph_pages_remoted++;
519 ramster_pers_pages_remoted++;
521 * data was successfully remoted so change the local version to
522 * point to the remote node where it landed
525 pool = zcache_get_pool_by_id(LOCAL_CLIENT, xh.pool_id);
526 local_irq_save(flags);
527 (void)tmem_replace(pool, &xh.oid, xh.index,
528 pampd_make_remote(remotenode, size, cksum));
529 local_irq_restore(flags);
530 zcache_put_pool(pool);
537 static void zcache_do_remotify_flushes(void)
539 struct ramster_remotify_hdr *rem_op;
540 union remotify_list_node *u;
543 spin_lock(&ramster_rem_op_list_lock);
544 if (list_empty(&ramster_rem_op_list)) {
545 spin_unlock(&ramster_rem_op_list_lock);
548 rem_op = list_first_entry(&ramster_rem_op_list,
549 struct ramster_remotify_hdr, list);
550 list_del_init(&rem_op->list);
551 spin_unlock(&ramster_rem_op_list_lock);
552 u = (union remotify_list_node *)rem_op;
553 switch (rem_op->op) {
554 case RAMSTER_REMOTIFY_FLUSH_PAGE:
555 ramster_remote_flush_page((struct flushlist_node *)u);
557 case RAMSTER_REMOTIFY_FLUSH_OBJ:
558 ramster_remote_flush_object((struct flushlist_node *)u);
568 static void ramster_remotify_process(struct work_struct *work)
570 static bool remotify_in_progress;
573 BUG_ON(irqs_disabled());
574 if (remotify_in_progress)
576 if (ramster_remote_target_nodenum == -1)
578 remotify_in_progress = true;
579 if (use_cleancache && ramster_eph_remotify_enable) {
580 for (i = 0; i < 100; i++) {
581 zcache_do_remotify_flushes();
582 (void)ramster_remotify_pageframe(true);
585 if (use_frontswap && ramster_pers_remotify_enable) {
586 for (i = 0; i < 100; i++) {
587 zcache_do_remotify_flushes();
588 (void)ramster_remotify_pageframe(false);
591 remotify_in_progress = false;
593 ramster_remotify_queue_delayed_work(HZ);
596 void __init ramster_remotify_init(void)
598 unsigned long n = 60UL;
599 ramster_remotify_workqueue =
600 create_singlethread_workqueue("ramster_remotify");
601 ramster_remotify_queue_delayed_work(n * HZ);
604 static ssize_t ramster_manual_node_up_show(struct kobject *kobj,
605 struct kobj_attribute *attr, char *buf)
609 for (i = 0; i < MANUAL_NODES; i++)
610 if (ramster_nodes_manual_up[i])
611 p += sprintf(p, "%d ", i);
612 p += sprintf(p, "\n");
616 static ssize_t ramster_manual_node_up_store(struct kobject *kobj,
617 struct kobj_attribute *attr, const char *buf, size_t count)
620 unsigned long node_num;
622 err = kstrtoul(buf, 10, &node_num);
624 pr_err("ramster: bad strtoul?\n");
627 if (node_num >= MANUAL_NODES) {
628 pr_err("ramster: bad node_num=%lu?\n", node_num);
631 if (ramster_nodes_manual_up[node_num]) {
632 pr_err("ramster: node %d already up, ignoring\n",
635 ramster_nodes_manual_up[node_num] = true;
636 r2net_hb_node_up_manual((int)node_num);
641 static struct kobj_attribute ramster_manual_node_up_attr = {
642 .attr = { .name = "manual_node_up", .mode = 0644 },
643 .show = ramster_manual_node_up_show,
644 .store = ramster_manual_node_up_store,
647 static ssize_t ramster_remote_target_nodenum_show(struct kobject *kobj,
648 struct kobj_attribute *attr, char *buf)
650 if (ramster_remote_target_nodenum == -1UL)
651 return sprintf(buf, "unset\n");
653 return sprintf(buf, "%d\n", ramster_remote_target_nodenum);
656 static ssize_t ramster_remote_target_nodenum_store(struct kobject *kobj,
657 struct kobj_attribute *attr, const char *buf, size_t count)
660 unsigned long node_num;
662 err = kstrtoul(buf, 10, &node_num);
664 pr_err("ramster: bad strtoul?\n");
666 } else if (node_num == -1UL) {
667 pr_err("ramster: disabling all remotification, "
668 "data may still reside on remote nodes however\n");
670 } else if (node_num >= MANUAL_NODES) {
671 pr_err("ramster: bad node_num=%lu?\n", node_num);
673 } else if (!ramster_nodes_manual_up[node_num]) {
674 pr_err("ramster: node %d not up, ignoring setting "
675 "of remotification target\n", (int)node_num);
676 } else if (r2net_remote_target_node_set((int)node_num) >= 0) {
677 pr_info("ramster: node %d set as remotification target\n",
679 ramster_remote_target_nodenum = (int)node_num;
681 pr_err("ramster: bad num to node node_num=%d?\n",
688 static struct kobj_attribute ramster_remote_target_nodenum_attr = {
689 .attr = { .name = "remote_target_nodenum", .mode = 0644 },
690 .show = ramster_remote_target_nodenum_show,
691 .store = ramster_remote_target_nodenum_store,
694 #define RAMSTER_SYSFS_RO(_name) \
695 static ssize_t ramster_##_name##_show(struct kobject *kobj, \
696 struct kobj_attribute *attr, char *buf) \
698 return sprintf(buf, "%lu\n", ramster_##_name); \
700 static struct kobj_attribute ramster_##_name##_attr = { \
701 .attr = { .name = __stringify(_name), .mode = 0444 }, \
702 .show = ramster_##_name##_show, \
705 #define RAMSTER_SYSFS_RW(_name) \
706 static ssize_t ramster_##_name##_show(struct kobject *kobj, \
707 struct kobj_attribute *attr, char *buf) \
709 return sprintf(buf, "%lu\n", ramster_##_name); \
711 static ssize_t ramster_##_name##_store(struct kobject *kobj, \
712 struct kobj_attribute *attr, const char *buf, size_t count) \
715 unsigned long enable; \
716 err = kstrtoul(buf, 10, &enable); \
719 ramster_##_name = enable; \
722 static struct kobj_attribute ramster_##_name##_attr = { \
723 .attr = { .name = __stringify(_name), .mode = 0644 }, \
724 .show = ramster_##_name##_show, \
725 .store = ramster_##_name##_store, \
728 #define RAMSTER_SYSFS_RO_ATOMIC(_name) \
729 static ssize_t ramster_##_name##_show(struct kobject *kobj, \
730 struct kobj_attribute *attr, char *buf) \
732 return sprintf(buf, "%d\n", atomic_read(&ramster_##_name)); \
734 static struct kobj_attribute ramster_##_name##_attr = { \
735 .attr = { .name = __stringify(_name), .mode = 0444 }, \
736 .show = ramster_##_name##_show, \
739 RAMSTER_SYSFS_RO(interface_revision);
740 RAMSTER_SYSFS_RO_ATOMIC(remote_pers_pages);
741 RAMSTER_SYSFS_RW(pers_remotify_enable);
742 RAMSTER_SYSFS_RW(eph_remotify_enable);
744 static struct attribute *ramster_attrs[] = {
745 &ramster_interface_revision_attr.attr,
746 &ramster_remote_pers_pages_attr.attr,
747 &ramster_manual_node_up_attr.attr,
748 &ramster_remote_target_nodenum_attr.attr,
749 &ramster_pers_remotify_enable_attr.attr,
750 &ramster_eph_remotify_enable_attr.attr,
754 static struct attribute_group ramster_attr_group = {
755 .attrs = ramster_attrs,
760 * frontswap selfshrinking
763 /* In HZ, controls frequency of worker invocation. */
764 static unsigned int selfshrink_interval __read_mostly = 5;
765 /* Enable/disable with sysfs. */
766 static bool frontswap_selfshrinking __read_mostly;
768 static void selfshrink_process(struct work_struct *work);
769 static DECLARE_DELAYED_WORK(selfshrink_worker, selfshrink_process);
771 /* Enable/disable with kernel boot option. */
772 static bool use_frontswap_selfshrink __initdata = true;
775 * The default values for the following parameters were deemed reasonable
776 * by experimentation, may be workload-dependent, and can all be
777 * adjusted via sysfs.
780 /* Control rate for frontswap shrinking. Higher hysteresis is slower. */
781 static unsigned int frontswap_hysteresis __read_mostly = 20;
784 * Number of selfshrink worker invocations to wait before observing that
785 * frontswap selfshrinking should commence. Note that selfshrinking does
786 * not use a separate worker thread.
788 static unsigned int frontswap_inertia __read_mostly = 3;
790 /* Countdown to next invocation of frontswap_shrink() */
791 static unsigned long frontswap_inertia_counter;
794 * Invoked by the selfshrink worker thread, uses current number of pages
795 * in frontswap (frontswap_curr_pages()), previous status, and control
796 * values (hysteresis and inertia) to determine if frontswap should be
797 * shrunk and what the new frontswap size should be. Note that
798 * frontswap_shrink is essentially a partial swapoff that immediately
799 * transfers pages from the "swap device" (frontswap) back into kernel
800 * RAM; despite the name, frontswap "shrinking" is very different from
801 * the "shrinker" interface used by the kernel MM subsystem to reclaim
804 static void frontswap_selfshrink(void)
806 static unsigned long cur_frontswap_pages;
807 static unsigned long last_frontswap_pages;
808 static unsigned long tgt_frontswap_pages;
810 last_frontswap_pages = cur_frontswap_pages;
811 cur_frontswap_pages = frontswap_curr_pages();
812 if (!cur_frontswap_pages ||
813 (cur_frontswap_pages > last_frontswap_pages)) {
814 frontswap_inertia_counter = frontswap_inertia;
817 if (frontswap_inertia_counter && --frontswap_inertia_counter)
819 if (cur_frontswap_pages <= frontswap_hysteresis)
820 tgt_frontswap_pages = 0;
822 tgt_frontswap_pages = cur_frontswap_pages -
823 (cur_frontswap_pages / frontswap_hysteresis);
824 frontswap_shrink(tgt_frontswap_pages);
827 static int __init ramster_nofrontswap_selfshrink_setup(char *s)
829 use_frontswap_selfshrink = false;
833 __setup("noselfshrink", ramster_nofrontswap_selfshrink_setup);
835 static void selfshrink_process(struct work_struct *work)
837 if (frontswap_selfshrinking && frontswap_enabled) {
838 frontswap_selfshrink();
839 schedule_delayed_work(&selfshrink_worker,
840 selfshrink_interval * HZ);
844 void ramster_cpu_up(int cpu)
846 unsigned char *p1 = kzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT);
847 unsigned char *p2 = kzalloc(PAGE_SIZE, GFP_KERNEL | __GFP_REPEAT);
849 per_cpu(ramster_remoteputmem1, cpu) = p1;
850 per_cpu(ramster_remoteputmem2, cpu) = p2;
853 void ramster_cpu_down(int cpu)
855 struct ramster_preload *kp;
857 kfree(per_cpu(ramster_remoteputmem1, cpu));
858 per_cpu(ramster_remoteputmem1, cpu) = NULL;
859 kfree(per_cpu(ramster_remoteputmem2, cpu));
860 per_cpu(ramster_remoteputmem2, cpu) = NULL;
861 kp = &per_cpu(ramster_preloads, cpu);
863 kmem_cache_free(ramster_flnode_cache, kp->flnode);
868 void ramster_register_pamops(struct tmem_pamops *pamops)
870 pamops->free_obj = ramster_pampd_free_obj;
871 pamops->new_obj = ramster_pampd_new_obj;
872 pamops->replace_in_obj = ramster_pampd_replace_in_obj;
873 pamops->is_remote = ramster_pampd_is_remote;
874 pamops->repatriate = ramster_pampd_repatriate;
875 pamops->repatriate_preload = ramster_pampd_repatriate_preload;
878 void __init ramster_init(bool cleancache, bool frontswap,
879 bool frontswap_exclusive_gets)
884 use_cleancache = true;
886 use_frontswap = true;
887 if (frontswap_exclusive_gets)
888 use_frontswap_exclusive_gets = true;
889 ramster_debugfs_init();
890 ret = sysfs_create_group(mm_kobj, &ramster_attr_group);
892 pr_err("ramster: can't create sysfs for ramster\n");
893 (void)r2net_register_handlers();
894 INIT_LIST_HEAD(&ramster_rem_op_list);
895 ramster_flnode_cache = kmem_cache_create("ramster_flnode",
896 sizeof(struct flushlist_node), 0, 0, NULL);
897 frontswap_selfshrinking = use_frontswap_selfshrink;
898 if (frontswap_selfshrinking) {
899 pr_info("ramster: Initializing frontswap selfshrink driver.\n");
900 schedule_delayed_work(&selfshrink_worker,
901 selfshrink_interval * HZ);
903 ramster_remotify_init();