2 * Compressed RAM based swap device
4 * Copyright (C) 2008, 2009, 2010 Nitin Gupta
6 * This code is released using a dual license strategy: BSD/GPL
7 * You can choose the licence that better fits your requirements.
9 * Released under the terms of 3-clause BSD License
10 * Released under the terms of GNU General Public License Version 2.0
12 * Project home: http://compcache.googlecode.com
15 #define KMSG_COMPONENT "ramzswap"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bitops.h>
21 #include <linux/blkdev.h>
22 #include <linux/buffer_head.h>
23 #include <linux/device.h>
24 #include <linux/genhd.h>
25 #include <linux/highmem.h>
26 #include <linux/slab.h>
27 #include <linux/lzo.h>
28 #include <linux/string.h>
29 #include <linux/swap.h>
30 #include <linux/swapops.h>
31 #include <linux/vmalloc.h>
33 #include "ramzswap_drv.h"
36 static int ramzswap_major;
37 static struct ramzswap *devices;
39 /* Module params (documentation at end) */
40 static unsigned int num_devices;
42 static int rzs_test_flag(struct ramzswap *rzs, u32 index,
43 enum rzs_pageflags flag)
45 return rzs->table[index].flags & BIT(flag);
48 static void rzs_set_flag(struct ramzswap *rzs, u32 index,
49 enum rzs_pageflags flag)
51 rzs->table[index].flags |= BIT(flag);
54 static void rzs_clear_flag(struct ramzswap *rzs, u32 index,
55 enum rzs_pageflags flag)
57 rzs->table[index].flags &= ~BIT(flag);
60 static int page_zero_filled(void *ptr)
65 page = (unsigned long *)ptr;
67 for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
75 static void ramzswap_set_disksize(struct ramzswap *rzs, size_t totalram_bytes)
79 "disk size not provided. You can use disksize_kb module "
80 "param to specify size.\nUsing default: (%u%% of RAM).\n",
81 default_disksize_perc_ram
83 rzs->disksize = default_disksize_perc_ram *
84 (totalram_bytes / 100);
87 if (rzs->disksize > 2 * (totalram_bytes)) {
89 "There is little point creating a ramzswap of greater than "
90 "twice the size of memory since we expect a 2:1 compression "
91 "ratio. Note that ramzswap uses about 0.1%% of the size of "
92 "the swap device when not in use so a huge ramzswap is "
94 "\tMemory Size: %zu kB\n"
95 "\tSize you selected: %zu kB\n"
96 "Continuing anyway ...\n",
97 totalram_bytes >> 10, rzs->disksize
101 rzs->disksize &= PAGE_MASK;
105 * Swap header (1st page of swap device) contains information
106 * about a swap file/partition. Prepare such a header for the
107 * given ramzswap device so that swapon can identify it as a
110 static void setup_swap_header(struct ramzswap *rzs, union swap_header *s)
113 s->info.last_page = (rzs->disksize >> PAGE_SHIFT) - 1;
114 s->info.nr_badpages = 0;
115 memcpy(s->magic.magic, "SWAPSPACE2", 10);
118 static void ramzswap_ioctl_get_stats(struct ramzswap *rzs,
119 struct ramzswap_ioctl_stats *s)
121 s->disksize = rzs->disksize;
123 #if defined(CONFIG_RAMZSWAP_STATS)
125 struct ramzswap_stats *rs = &rzs->stats;
126 size_t succ_writes, mem_used;
127 unsigned int good_compress_perc = 0, no_compress_perc = 0;
129 mem_used = xv_get_total_size_bytes(rzs->mem_pool)
130 + (rs->pages_expand << PAGE_SHIFT);
131 succ_writes = rzs_stat64_read(rzs, &rs->num_writes) -
132 rzs_stat64_read(rzs, &rs->failed_writes);
134 if (succ_writes && rs->pages_stored) {
135 good_compress_perc = rs->good_compress * 100
137 no_compress_perc = rs->pages_expand * 100
141 s->num_reads = rzs_stat64_read(rzs, &rs->num_reads);
142 s->num_writes = rzs_stat64_read(rzs, &rs->num_writes);
143 s->failed_reads = rzs_stat64_read(rzs, &rs->failed_reads);
144 s->failed_writes = rzs_stat64_read(rzs, &rs->failed_writes);
145 s->invalid_io = rzs_stat64_read(rzs, &rs->invalid_io);
146 s->notify_free = rzs_stat64_read(rzs, &rs->notify_free);
147 s->pages_zero = rs->pages_zero;
149 s->good_compress_pct = good_compress_perc;
150 s->pages_expand_pct = no_compress_perc;
152 s->pages_stored = rs->pages_stored;
153 s->pages_used = mem_used >> PAGE_SHIFT;
154 s->orig_data_size = rs->pages_stored << PAGE_SHIFT;
155 s->compr_data_size = rs->compr_size;
156 s->mem_used_total = mem_used;
158 #endif /* CONFIG_RAMZSWAP_STATS */
161 static void ramzswap_free_page(struct ramzswap *rzs, size_t index)
166 struct page *page = rzs->table[index].page;
167 u32 offset = rzs->table[index].offset;
169 if (unlikely(!page)) {
171 * No memory is allocated for zero filled pages.
172 * Simply clear zero page flag.
174 if (rzs_test_flag(rzs, index, RZS_ZERO)) {
175 rzs_clear_flag(rzs, index, RZS_ZERO);
176 rzs_stat_dec(&rzs->stats.pages_zero);
181 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED))) {
184 rzs_clear_flag(rzs, index, RZS_UNCOMPRESSED);
185 rzs_stat_dec(&rzs->stats.pages_expand);
189 obj = kmap_atomic(page, KM_USER0) + offset;
190 clen = xv_get_object_size(obj) - sizeof(struct zobj_header);
191 kunmap_atomic(obj, KM_USER0);
193 xv_free(rzs->mem_pool, page, offset);
194 if (clen <= PAGE_SIZE / 2)
195 rzs_stat_dec(&rzs->stats.good_compress);
198 rzs->stats.compr_size -= clen;
199 rzs_stat_dec(&rzs->stats.pages_stored);
201 rzs->table[index].page = NULL;
202 rzs->table[index].offset = 0;
205 static int handle_zero_page(struct bio *bio)
208 struct page *page = bio->bi_io_vec[0].bv_page;
210 user_mem = kmap_atomic(page, KM_USER0);
211 memset(user_mem, 0, PAGE_SIZE);
212 kunmap_atomic(user_mem, KM_USER0);
214 flush_dcache_page(page);
216 set_bit(BIO_UPTODATE, &bio->bi_flags);
221 static int handle_uncompressed_page(struct ramzswap *rzs, struct bio *bio)
225 unsigned char *user_mem, *cmem;
227 page = bio->bi_io_vec[0].bv_page;
228 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
230 user_mem = kmap_atomic(page, KM_USER0);
231 cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
232 rzs->table[index].offset;
234 memcpy(user_mem, cmem, PAGE_SIZE);
235 kunmap_atomic(user_mem, KM_USER0);
236 kunmap_atomic(cmem, KM_USER1);
238 flush_dcache_page(page);
240 set_bit(BIO_UPTODATE, &bio->bi_flags);
246 * Called when request page is not present in ramzswap.
247 * This is an attempt to read before any previous write
248 * to this location - this happens due to readahead when
249 * swap device is read from user-space (e.g. during swapon)
251 static int handle_ramzswap_fault(struct ramzswap *rzs, struct bio *bio)
253 pr_debug("Read before write on swap device: "
254 "sector=%lu, size=%u, offset=%u\n",
255 (ulong)(bio->bi_sector), bio->bi_size,
256 bio->bi_io_vec[0].bv_offset);
258 /* Do nothing. Just return success */
259 set_bit(BIO_UPTODATE, &bio->bi_flags);
264 static int ramzswap_read(struct ramzswap *rzs, struct bio *bio)
270 struct zobj_header *zheader;
271 unsigned char *user_mem, *cmem;
273 rzs_stat64_inc(rzs, &rzs->stats.num_reads);
275 page = bio->bi_io_vec[0].bv_page;
276 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
278 if (rzs_test_flag(rzs, index, RZS_ZERO))
279 return handle_zero_page(bio);
281 /* Requested page is not present in compressed area */
282 if (!rzs->table[index].page)
283 return handle_ramzswap_fault(rzs, bio);
285 /* Page is stored uncompressed since it's incompressible */
286 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
287 return handle_uncompressed_page(rzs, bio);
289 user_mem = kmap_atomic(page, KM_USER0);
292 cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
293 rzs->table[index].offset;
295 ret = lzo1x_decompress_safe(
296 cmem + sizeof(*zheader),
297 xv_get_object_size(cmem) - sizeof(*zheader),
300 kunmap_atomic(user_mem, KM_USER0);
301 kunmap_atomic(cmem, KM_USER1);
303 /* should NEVER happen */
304 if (unlikely(ret != LZO_E_OK)) {
305 pr_err("Decompression failed! err=%d, page=%u\n",
307 rzs_stat64_inc(rzs, &rzs->stats.failed_reads);
311 flush_dcache_page(page);
313 set_bit(BIO_UPTODATE, &bio->bi_flags);
322 static int ramzswap_write(struct ramzswap *rzs, struct bio *bio)
327 struct zobj_header *zheader;
328 struct page *page, *page_store;
329 unsigned char *user_mem, *cmem, *src;
331 rzs_stat64_inc(rzs, &rzs->stats.num_writes);
333 page = bio->bi_io_vec[0].bv_page;
334 index = bio->bi_sector >> SECTORS_PER_PAGE_SHIFT;
336 src = rzs->compress_buffer;
339 * System swaps to same sector again when the stored page
340 * is no longer referenced by any process. So, its now safe
341 * to free the memory that was allocated for this page.
343 if (rzs->table[index].page || rzs_test_flag(rzs, index, RZS_ZERO))
344 ramzswap_free_page(rzs, index);
346 mutex_lock(&rzs->lock);
348 user_mem = kmap_atomic(page, KM_USER0);
349 if (page_zero_filled(user_mem)) {
350 kunmap_atomic(user_mem, KM_USER0);
351 mutex_unlock(&rzs->lock);
352 rzs_stat_inc(&rzs->stats.pages_zero);
353 rzs_set_flag(rzs, index, RZS_ZERO);
355 set_bit(BIO_UPTODATE, &bio->bi_flags);
360 ret = lzo1x_1_compress(user_mem, PAGE_SIZE, src, &clen,
361 rzs->compress_workmem);
363 kunmap_atomic(user_mem, KM_USER0);
365 if (unlikely(ret != LZO_E_OK)) {
366 mutex_unlock(&rzs->lock);
367 pr_err("Compression failed! err=%d\n", ret);
368 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
373 * Page is incompressible. Store it as-is (uncompressed)
374 * since we do not want to return too many swap write
375 * errors which has side effect of hanging the system.
377 if (unlikely(clen > max_zpage_size)) {
379 page_store = alloc_page(GFP_NOIO | __GFP_HIGHMEM);
380 if (unlikely(!page_store)) {
381 mutex_unlock(&rzs->lock);
382 pr_info("Error allocating memory for incompressible "
383 "page: %u\n", index);
384 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
389 rzs_set_flag(rzs, index, RZS_UNCOMPRESSED);
390 rzs_stat_inc(&rzs->stats.pages_expand);
391 rzs->table[index].page = page_store;
392 src = kmap_atomic(page, KM_USER0);
396 if (xv_malloc(rzs->mem_pool, clen + sizeof(*zheader),
397 &rzs->table[index].page, &offset,
398 GFP_NOIO | __GFP_HIGHMEM)) {
399 mutex_unlock(&rzs->lock);
400 pr_info("Error allocating memory for compressed "
401 "page: %u, size=%zu\n", index, clen);
402 rzs_stat64_inc(rzs, &rzs->stats.failed_writes);
407 rzs->table[index].offset = offset;
409 cmem = kmap_atomic(rzs->table[index].page, KM_USER1) +
410 rzs->table[index].offset;
413 /* Back-reference needed for memory defragmentation */
414 if (!rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)) {
415 zheader = (struct zobj_header *)cmem;
416 zheader->table_idx = index;
417 cmem += sizeof(*zheader);
421 memcpy(cmem, src, clen);
423 kunmap_atomic(cmem, KM_USER1);
424 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
425 kunmap_atomic(src, KM_USER0);
428 rzs->stats.compr_size += clen;
429 rzs_stat_inc(&rzs->stats.pages_stored);
430 if (clen <= PAGE_SIZE / 2)
431 rzs_stat_inc(&rzs->stats.good_compress);
433 mutex_unlock(&rzs->lock);
435 set_bit(BIO_UPTODATE, &bio->bi_flags);
445 * Check if request is within bounds and page aligned.
447 static inline int valid_swap_request(struct ramzswap *rzs, struct bio *bio)
450 (bio->bi_sector >= (rzs->disksize >> SECTOR_SHIFT)) ||
451 (bio->bi_sector & (SECTORS_PER_PAGE - 1)) ||
452 (bio->bi_vcnt != 1) ||
453 (bio->bi_size != PAGE_SIZE) ||
454 (bio->bi_io_vec[0].bv_offset != 0))) {
459 /* swap request is valid */
464 * Handler function for all ramzswap I/O requests.
466 static int ramzswap_make_request(struct request_queue *queue, struct bio *bio)
469 struct ramzswap *rzs = queue->queuedata;
471 if (unlikely(!rzs->init_done)) {
476 if (!valid_swap_request(rzs, bio)) {
477 rzs_stat64_inc(rzs, &rzs->stats.invalid_io);
482 switch (bio_data_dir(bio)) {
484 ret = ramzswap_read(rzs, bio);
488 ret = ramzswap_write(rzs, bio);
495 static void reset_device(struct ramzswap *rzs)
499 /* Do not accept any new I/O request */
502 /* Free various per-device buffers */
503 kfree(rzs->compress_workmem);
504 free_pages((unsigned long)rzs->compress_buffer, 1);
506 rzs->compress_workmem = NULL;
507 rzs->compress_buffer = NULL;
509 /* Free all pages that are still in this ramzswap device */
510 for (index = 0; index < rzs->disksize >> PAGE_SHIFT; index++) {
514 page = rzs->table[index].page;
515 offset = rzs->table[index].offset;
520 if (unlikely(rzs_test_flag(rzs, index, RZS_UNCOMPRESSED)))
523 xv_free(rzs->mem_pool, page, offset);
529 xv_destroy_pool(rzs->mem_pool);
530 rzs->mem_pool = NULL;
533 memset(&rzs->stats, 0, sizeof(rzs->stats));
538 static int ramzswap_ioctl_init_device(struct ramzswap *rzs)
543 union swap_header *swap_header;
545 if (rzs->init_done) {
546 pr_info("Device already initialized!\n");
550 ramzswap_set_disksize(rzs, totalram_pages << PAGE_SHIFT);
552 rzs->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
553 if (!rzs->compress_workmem) {
554 pr_err("Error allocating compressor working memory!\n");
559 rzs->compress_buffer = (void *)__get_free_pages(__GFP_ZERO, 1);
560 if (!rzs->compress_buffer) {
561 pr_err("Error allocating compressor buffer space\n");
566 num_pages = rzs->disksize >> PAGE_SHIFT;
567 rzs->table = vmalloc(num_pages * sizeof(*rzs->table));
569 pr_err("Error allocating ramzswap address table\n");
570 /* To prevent accessing table entries during cleanup */
575 memset(rzs->table, 0, num_pages * sizeof(*rzs->table));
577 page = alloc_page(__GFP_ZERO);
579 pr_err("Error allocating swap header page\n");
583 rzs->table[0].page = page;
584 rzs_set_flag(rzs, 0, RZS_UNCOMPRESSED);
586 swap_header = kmap(page);
587 setup_swap_header(rzs, swap_header);
590 set_capacity(rzs->disk, rzs->disksize >> SECTOR_SHIFT);
592 /* ramzswap devices sort of resembles non-rotational disks */
593 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, rzs->disk->queue);
595 rzs->mem_pool = xv_create_pool();
596 if (!rzs->mem_pool) {
597 pr_err("Error creating memory pool\n");
604 pr_debug("Initialization done!\n");
610 pr_err("Initialization failed: err=%d\n", ret);
614 static int ramzswap_ioctl_reset_device(struct ramzswap *rzs)
622 static int ramzswap_ioctl(struct block_device *bdev, fmode_t mode,
623 unsigned int cmd, unsigned long arg)
628 struct ramzswap *rzs = bdev->bd_disk->private_data;
631 case RZSIO_SET_DISKSIZE_KB:
632 if (rzs->init_done) {
636 if (copy_from_user(&disksize_kb, (void *)arg,
641 rzs->disksize = disksize_kb << 10;
642 pr_info("Disk size set to %zu kB\n", disksize_kb);
645 case RZSIO_GET_STATS:
647 struct ramzswap_ioctl_stats *stats;
648 if (!rzs->init_done) {
652 stats = kzalloc(sizeof(*stats), GFP_KERNEL);
657 ramzswap_ioctl_get_stats(rzs, stats);
658 if (copy_to_user((void *)arg, stats, sizeof(*stats))) {
667 ret = ramzswap_ioctl_init_device(rzs);
671 /* Do not reset an active device! */
672 if (bdev->bd_holders) {
677 /* Make sure all pending I/O is finished */
681 ret = ramzswap_ioctl_reset_device(rzs);
685 pr_info("Invalid ioctl %u\n", cmd);
693 static struct block_device_operations ramzswap_devops = {
694 .ioctl = ramzswap_ioctl,
695 .owner = THIS_MODULE,
698 static int create_device(struct ramzswap *rzs, int device_id)
702 mutex_init(&rzs->lock);
703 spin_lock_init(&rzs->stat64_lock);
705 rzs->queue = blk_alloc_queue(GFP_KERNEL);
707 pr_err("Error allocating disk queue for device %d\n",
713 blk_queue_make_request(rzs->queue, ramzswap_make_request);
714 rzs->queue->queuedata = rzs;
716 /* gendisk structure */
717 rzs->disk = alloc_disk(1);
719 blk_cleanup_queue(rzs->queue);
720 pr_warning("Error allocating disk structure for device %d\n",
726 rzs->disk->major = ramzswap_major;
727 rzs->disk->first_minor = device_id;
728 rzs->disk->fops = &ramzswap_devops;
729 rzs->disk->queue = rzs->queue;
730 rzs->disk->private_data = rzs;
731 snprintf(rzs->disk->disk_name, 16, "ramzswap%d", device_id);
733 /* Actual capacity set using RZSIO_SET_DISKSIZE_KB ioctl */
734 set_capacity(rzs->disk, 0);
736 blk_queue_physical_block_size(rzs->disk->queue, PAGE_SIZE);
737 blk_queue_logical_block_size(rzs->disk->queue, PAGE_SIZE);
747 static void destroy_device(struct ramzswap *rzs)
750 del_gendisk(rzs->disk);
755 blk_cleanup_queue(rzs->queue);
758 static int __init ramzswap_init(void)
762 if (num_devices > max_num_devices) {
763 pr_warning("Invalid value for num_devices: %u\n",
769 ramzswap_major = register_blkdev(0, "ramzswap");
770 if (ramzswap_major <= 0) {
771 pr_warning("Unable to get major number\n");
777 pr_info("num_devices not specified. Using default: 1\n");
781 /* Allocate the device array and initialize each one */
782 pr_info("Creating %u devices ...\n", num_devices);
783 devices = kzalloc(num_devices * sizeof(struct ramzswap), GFP_KERNEL);
789 for (dev_id = 0; dev_id < num_devices; dev_id++) {
790 ret = create_device(&devices[dev_id], dev_id);
799 destroy_device(&devices[--dev_id]);
801 unregister_blkdev(ramzswap_major, "ramzswap");
806 static void __exit ramzswap_exit(void)
809 struct ramzswap *rzs;
811 for (i = 0; i < num_devices; i++) {
819 unregister_blkdev(ramzswap_major, "ramzswap");
822 pr_debug("Cleanup done!\n");
825 module_param(num_devices, uint, 0);
826 MODULE_PARM_DESC(num_devices, "Number of ramzswap devices");
828 module_init(ramzswap_init);
829 module_exit(ramzswap_exit);
831 MODULE_LICENSE("Dual BSD/GPL");
832 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
833 MODULE_DESCRIPTION("Compressed RAM Based Swap Device");