#include <linux/kernel.h>
#include <linux/types.h>
+
+static inline int qcow_pwrite_sync(int fd,
+ void *buf, size_t count, off_t offset)
+{
+ if (pwrite_in_full(fd, buf, count, offset) < 0)
+ return -1;
+
+ return fdatasync(fd);
+}
+
static int l2_table_insert(struct rb_root *root, struct qcow_l2_table *new)
{
struct rb_node **link = &(root->rb_node), *parent = NULL;
size = 1 << header->l2_bits;
- if (pwrite_in_full(q->fd, c->table, size * sizeof(u64), c->offset) < 0)
+ if (qcow_pwrite_sync(q->fd, c->table,
+ size * sizeof(u64), c->offset) < 0)
return -1;
c->dirty = 0;
*/
lru = list_first_entry(&l1t->lru_list, struct qcow_l2_table, list);
- if (qcow_l2_cache_write(q, lru) < 0)
- goto error;
-
/* Remove the node from the cache */
rb_erase(&lru->node, r);
list_del_init(&lru->list);
return total;
}
-static inline u64 file_size(int fd)
-{
- struct stat st;
-
- if (fstat(fd, &st) < 0)
- return 0;
-
- return st.st_size;
-}
-
-static inline int qcow_pwrite_sync(int fd, void *buf, size_t count, off_t offset)
-{
- if (pwrite_in_full(fd, buf, count, offset) < 0)
- return -1;
-
- return fdatasync(fd);
-}
-
-/* Writes a level 2 table at the end of the file. */
-static u64 qcow_write_l2_table(struct qcow *q, u64 *table)
-{
- struct qcow_header *header = q->header;
- u64 clust_sz;
- u64 f_sz;
- u64 off;
- u64 sz;
-
- f_sz = file_size(q->fd);
- if (!f_sz)
- return 0;
-
- sz = 1 << header->l2_bits;
- clust_sz = 1 << header->cluster_bits;
- off = ALIGN(f_sz, clust_sz);
-
- if (pwrite_in_full(q->fd, table, sz * sizeof(u64), off) < 0)
- return 0;
-
- return off;
-}
-
static void refcount_table_free_cache(struct qcow_refcount_table *rft)
{
struct rb_root *r = &rft->root;
if (!rfb->dirty)
return 0;
- if (pwrite_in_full(q->fd, rfb->entries, rfb->size * sizeof(u16), rfb->offset) < 0)
+ if (qcow_pwrite_sync(q->fd, rfb->entries,
+ rfb->size * sizeof(u16), rfb->offset) < 0)
return -1;
rfb->dirty = 0;
if (rft->nr_cached == MAX_CACHE_NODES) {
lru = list_first_entry(&rft->lru_list, struct qcow_refcount_block, list);
- if (write_refcount_block(q, lru) < 0)
- goto error;
-
rb_erase(&lru->node, r);
list_del_init(&lru->list);
rft->nr_cached--;
rfb_offset = be64_to_cpu(rft->rf_table[rft_idx]);
+ if (!rfb_offset) {
+ pr_warning("Don't support to grow refcount table");
+ return NULL;
+ }
+
rfb = refcount_block_search(q, rfb_offset);
if (rfb)
return rfb;
return NULL;
}
+static u16 qcow_get_refcount(struct qcow *q, u64 clust_idx)
+{
+ struct qcow_refcount_block *rfb = NULL;
+ struct qcow_header *header = q->header;
+ u64 rfb_idx;
+
+ rfb = qcow_read_refcount_block(q, clust_idx);
+ if (!rfb) {
+ pr_warning("Error while reading refcount table");
+ return -1;
+ }
+
+ rfb_idx = clust_idx & (((1ULL <<
+ (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
+
+ if (rfb_idx >= rfb->size) {
+ pr_warning("L1: refcount block index out of bounds");
+ return -1;
+ }
+
+ return be16_to_cpu(rfb->entries[rfb_idx]);
+}
+
+static int update_cluster_refcount(struct qcow *q, u64 clust_idx, u16 append)
+{
+ struct qcow_refcount_block *rfb = NULL;
+ struct qcow_header *header = q->header;
+ u16 refcount;
+ u64 rfb_idx;
+
+ rfb = qcow_read_refcount_block(q, clust_idx);
+ if (!rfb) {
+ pr_warning("error while reading refcount table");
+ return -1;
+ }
+
+ rfb_idx = clust_idx & (((1ULL <<
+ (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
+ if (rfb_idx >= rfb->size) {
+ pr_warning("refcount block index out of bounds");
+ return -1;
+ }
+
+ refcount = be16_to_cpu(rfb->entries[rfb_idx]) + append;
+ rfb->entries[rfb_idx] = cpu_to_be16(refcount);
+ rfb->dirty = 1;
+
+ /* write refcount block */
+ if (write_refcount_block(q, rfb) < 0) {
+ pr_warning("refcount block index out of bounds");
+ return -1;
+ }
+
+ /* update free_clust_idx since refcount becomes zero */
+ if (!refcount && clust_idx < q->free_clust_idx)
+ q->free_clust_idx = clust_idx;
+
+ return 0;
+}
+
+static void qcow_free_clusters(struct qcow *q, u64 clust_start, u64 size)
+{
+ struct qcow_header *header = q->header;
+ u64 start, end, offset;
+
+ start = clust_start & ~(q->cluster_size - 1);
+ end = (clust_start + size - 1) & ~(q->cluster_size - 1);
+ for (offset = start; offset <= end; offset += q->cluster_size)
+ update_cluster_refcount(q, offset >> header->cluster_bits, -1);
+}
+
/*
- * QCOW file might grow during a write operation. Not only data but metadata is
- * also written at the end of the file. Therefore it is necessary to ensure
- * every write is committed to disk. Hence we use uses qcow_pwrite_sync() to
- * synchronize the in-core state of QCOW image to disk.
- *
- * We also try to restore the image to a consistent state if the metdata
- * operation fails. The two metadat operations are: level 1 and level 2 table
- * update. If either of them fails the image is truncated to a consistent state.
+ * Allocate clusters according to the size. Find a postion that
+ * can satisfy the size. free_clust_idx is initialized to zero and
+ * Record last position.
*/
-static ssize_t qcow_write_cluster(struct qcow *q, u64 offset, void *buf, u32 src_len)
+static u64 qcow_alloc_clusters(struct qcow *q, u64 size)
+{
+ struct qcow_header *header = q->header;
+ u16 clust_refcount;
+ u32 clust_idx, i;
+ u64 clust_num;
+
+ clust_num = (size + (q->cluster_size - 1)) >> header->cluster_bits;
+
+again:
+ for (i = 0; i < clust_num; i++) {
+ clust_idx = q->free_clust_idx++;
+ clust_refcount = qcow_get_refcount(q, clust_idx);
+ if (clust_refcount < 0)
+ return -1;
+ else if (clust_refcount > 0)
+ goto again;
+ }
+
+ for (i = 0; i < clust_num; i++)
+ if (update_cluster_refcount(q,
+ q->free_clust_idx - clust_num + i, 1))
+ return -1;
+
+ return (q->free_clust_idx - clust_num) << header->cluster_bits;
+}
+
+static int qcow_write_l1_table(struct qcow *q)
+{
+ struct qcow_l1_table *l1t = &q->table;
+ struct qcow_header *header = q->header;
+
+ if (qcow_pwrite_sync(q->fd, l1t->l1_table,
+ l1t->table_size * sizeof(u64),
+ header->l1_table_offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Get l2 table. If the table has been copied, read table directly.
+ * If the table exists, allocate a new cluster and copy the table
+ * to the new cluster.
+ */
+static int get_cluster_table(struct qcow *q, u64 offset,
+ struct qcow_l2_table **result_l2t, u64 *result_l2_index)
{
struct qcow_header *header = q->header;
struct qcow_l1_table *l1t = &q->table;
struct qcow_l2_table *l2t;
- u64 clust_start;
- u64 clust_flags;
- u64 l2t_offset;
- u64 clust_off;
- u64 l2t_size;
- u64 clust_sz;
u64 l1t_idx;
+ u64 l2t_offset;
u64 l2t_idx;
- u64 f_sz;
- u64 len;
+ u64 l2t_size;
+ u64 l2t_new_offset;
- l2t = NULL;
- l2t_size = 1 << header->l2_bits;
- clust_sz = 1 << header->cluster_bits;
+ l2t_size = 1 << header->l2_bits;
l1t_idx = get_l1_index(q, offset);
if (l1t_idx >= l1t->table_size)
if (l2t_idx >= l2t_size)
return -1;
- clust_off = get_cluster_offset(q, offset);
- if (clust_off >= clust_sz)
- return -1;
-
- len = clust_sz - clust_off;
- if (len > src_len)
- len = src_len;
-
- mutex_lock(&q->mutex);
-
l2t_offset = be64_to_cpu(l1t->l1_table[l1t_idx]);
- if (l2t_offset & QCOW2_OFLAG_COMPRESSED) {
- pr_warning("compressed clusters are not supported");
- goto error;
- }
- if (!(l2t_offset & QCOW2_OFLAG_COPIED)) {
- pr_warning("L2 copy-on-write clusters are not supported");
- goto error;
- }
-
- l2t_offset &= QCOW2_OFFSET_MASK;
- if (l2t_offset) {
- /* read and cache l2 table */
+ if (l2t_offset & QCOW2_OFLAG_COPIED) {
+ l2t_offset &= ~QCOW2_OFLAG_COPIED;
l2t = qcow_read_l2_table(q, l2t_offset);
if (!l2t)
goto error;
} else {
- l2t = new_cache_table(q, l2t_offset);
- if (!l2t)
+ l2t_new_offset = qcow_alloc_clusters(q, l2t_size*sizeof(u64));
+ if (l2t_new_offset < 0)
goto error;
- /* Capture the state of the consistent QCOW image */
- f_sz = file_size(q->fd);
- if (!f_sz)
- goto free_cache;
+ l2t = new_cache_table(q, l2t_new_offset);
+ if (!l2t)
+ goto free_cluster;
+
+ if (l2t_offset) {
+ l2t = qcow_read_l2_table(q, l2t_offset);
+ if (!l2t)
+ goto free_cache;
+ } else
+ memset(l2t->table, 0x00, l2t_size * sizeof(u64));
- /* Write the l2 table of 0's at the end of the file */
- l2t_offset = qcow_write_l2_table(q, l2t->table);
- if (!l2t_offset)
+ /* write l2 table */
+ l2t->dirty = 1;
+ if (qcow_l2_cache_write(q, l2t) < 0)
goto free_cache;
- if (cache_table(q, l2t) < 0) {
- if (ftruncate(q->fd, f_sz) < 0)
- goto free_cache;
+ /* cache l2 table */
+ if (cache_table(q, l2t))
+ goto free_cache;
+ /* update the l1 talble */
+ l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_new_offset
+ | QCOW2_OFLAG_COPIED);
+ if (qcow_write_l1_table(q)) {
+ pr_warning("Update l1 table error");
goto free_cache;
}
- /* Update the in-core entry */
- l1t->l1_table[l1t_idx] = cpu_to_be64(l2t_offset);
+ /* free old cluster */
+ qcow_free_clusters(q, l2t_offset, q->cluster_size);
}
- /* Capture the state of the consistent QCOW image */
- f_sz = file_size(q->fd);
- if (!f_sz)
- goto error;
+ *result_l2t = l2t;
+ *result_l2_index = l2t_idx;
- clust_start = be64_to_cpu(l2t->table[l2t_idx]);
+ return 0;
- clust_flags = clust_start & QCOW2_OFLAGS_MASK;
- if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
- pr_warning("compressed clusters are not supported");
- goto error;
- }
+free_cache:
+ free(l2t);
- clust_start &= QCOW2_OFFSET_MASK;
- if (!clust_start) {
- clust_start = ALIGN(f_sz, clust_sz);
- l2t->table[l2t_idx] = cpu_to_be64(clust_start | QCOW2_OFLAG_COPIED);
- l2t->dirty = 1;
- }
+free_cluster:
+ qcow_free_clusters(q, l2t_new_offset, q->cluster_size);
- if (!(clust_flags & QCOW2_OFLAG_COPIED)) {
- struct qcow_refcount_block *rfb = NULL;
- u16 clust_refcount;
- u64 clust_idx;
- u64 rfb_idx;
+error:
+ return -1;
+}
- clust_idx = (clust_start & QCOW2_OFFSET_MASK)
- >> (header->cluster_bits);
+/*
+ * If the cluster has been copied, write data directly. If not,
+ * read the original data and write it to the new cluster with
+ * modification.
+ */
+static ssize_t qcow_write_cluster(struct qcow *q, u64 offset,
+ void *buf, u32 src_len)
+{
+ struct qcow_l2_table *l2t;
+ u64 clust_new_start;
+ u64 clust_start;
+ u64 clust_flags;
+ u64 clust_off;
+ u64 l2t_idx;
+ u64 len;
- rfb = qcow_read_refcount_block(q, clust_idx);
- if (!rfb) {
- pr_warning("L1: error while reading refcount table");
- goto error;
- }
+ l2t = NULL;
- rfb_idx = clust_idx & (((1ULL << (header->cluster_bits - QCOW_REFCOUNT_BLOCK_SHIFT)) - 1));
- if (rfb_idx >= rfb->size) {
- pr_warning("L1: refcount block index out of bounds");
+ clust_off = get_cluster_offset(q, offset);
+ if (clust_off >= q->cluster_size)
+ return -1;
+
+ len = q->cluster_size - clust_off;
+ if (len > src_len)
+ len = src_len;
+
+ mutex_lock(&q->mutex);
+
+ if (get_cluster_table(q, offset, &l2t, &l2t_idx)) {
+ pr_warning("Get l2 table error");
+ goto error;
+ }
+
+ clust_start = be64_to_cpu(l2t->table[l2t_idx]);
+ clust_flags = clust_start & QCOW2_OFLAGS_MASK;
+
+ clust_start &= QCOW2_OFFSET_MASK;
+ if (!(clust_flags & QCOW2_OFLAG_COPIED)) {
+ clust_new_start = qcow_alloc_clusters(q, q->cluster_size);
+ if (clust_new_start < 0) {
+ pr_warning("Cluster alloc error");
goto error;
}
- clust_refcount = be16_to_cpu(rfb->entries[rfb_idx]);
- if (!clust_refcount) {
- clust_refcount = 1;
- rfb->entries[rfb_idx] = cpu_to_be16(clust_refcount);
- rfb->dirty = 1;
- }
+ offset &= ~(q->cluster_size - 1);
+
+ /* if clust_start is not zero, read the original data*/
+ if (clust_start) {
+ mutex_unlock(&q->mutex);
+ if (qcow2_read_cluster(q, offset, q->copy_buff,
+ q->cluster_size) < 0) {
+ pr_warning("Read copy cluster error");
+ qcow_free_clusters(q, clust_new_start,
+ q->cluster_size);
+ return -1;
+ }
+ mutex_lock(&q->mutex);
+ } else
+ memset(q->copy_buff, 0x00, q->cluster_size);
+
+ memcpy(q->copy_buff + clust_off, buf, len);
+
+ /* Write actual data */
+ if (pwrite_in_full(q->fd, q->copy_buff, q->cluster_size,
+ clust_new_start) < 0)
+ goto free_cluster;
+
+ /* update l2 table*/
+ l2t->table[l2t_idx] = cpu_to_be64(clust_new_start
+ | QCOW2_OFLAG_COPIED);
+ l2t->dirty = 1;
+
+ if (qcow_l2_cache_write(q, l2t))
+ goto free_cluster;
+
+ /* free old cluster*/
+ if (clust_flags & QCOW2_OFLAG_COMPRESSED) {
+ int size;
+ size = ((clust_start >> q->csize_shift) &
+ q->csize_mask) + 1;
+ size *= 512;
+ clust_start &= q->cluster_offset_mask;
+ clust_start &= ~511;
+
+ qcow_free_clusters(q, clust_start, size);
+ } else if (clust_start)
+ qcow_free_clusters(q, clust_start, q->cluster_size);
- if (clust_refcount > 1) {
- pr_warning("L1 copy-on-write clusters are not supported");
+ } else {
+ /* Write actual data */
+ if (pwrite_in_full(q->fd, buf, len,
+ clust_start + clust_off) < 0)
goto error;
- }
}
-
mutex_unlock(&q->mutex);
-
- /* Write actual data */
- if (pwrite_in_full(q->fd, buf, len, clust_start + clust_off) < 0)
- return -1;
-
return len;
-free_cache:
- free(l2t);
+free_cluster:
+ qcow_free_clusters(q, clust_new_start, q->cluster_size);
+
error:
mutex_unlock(&q->mutex);
return -1;
{
struct qcow *q = disk->priv;
struct qcow_refcount_table *rft;
- struct qcow_header *header;
struct list_head *pos, *n;
struct qcow_l1_table *l1t;
- header = q->header;
l1t = &q->table;
rft = &q->refcount_table;
goto error_unlock;
}
- if (fdatasync(disk->fd) < 0)
- goto error_unlock;
-
list_for_each_safe(pos, n, &l1t->lru_list) {
struct qcow_l2_table *c = list_entry(pos, struct qcow_l2_table, list);
goto error_unlock;
}
- if (fdatasync(disk->fd) < 0)
- goto error_unlock;
-
- if (pwrite_in_full(disk->fd, l1t->l1_table, l1t->table_size * sizeof(u64), header->l1_table_offset) < 0)
+ if (qcow_write_l1_table < 0)
goto error_unlock;
mutex_unlock(&q->mutex);
refcount_table_free_cache(&q->refcount_table);
l1_table_free_cache(&q->table);
+ free(q->copy_buff);
free(q->cluster_data);
free(q->cluster_cache);
free(q->refcount_table.rf_table);
q->cluster_offset_mask = (1LL << q->csize_shift) - 1;
q->cluster_size = 1 << q->header->cluster_bits;
+ q->copy_buff = malloc(q->cluster_size);
+ if (!q->copy_buff) {
+ pr_warning("copy buff malloc error");
+ goto free_header;
+ }
+
q->cluster_data = malloc(q->cluster_size);
if (!q->cluster_data) {
- pr_warning("cluster data malloc error!");
- goto free_header;
+ pr_warning("cluster data malloc error");
+ goto free_copy_buff;
}
q->cluster_cache = malloc(q->cluster_size);
if (!q->cluster_cache) {
- pr_warning("cluster cache malloc error!");
+ pr_warning("cluster cache malloc error");
goto free_cluster_data;
}
free_cluster_data:
if (q->cluster_data)
free(q->cluster_data);
+free_copy_buff:
+ if (q->copy_buff)
+ free(q->copy_buff);
free_header:
if (q->header)
free(q->header);
q->version = QCOW1_VERSION;
q->cluster_size = 1 << q->header->cluster_bits;
q->cluster_offset_mask = (1LL << (63 - q->header->cluster_bits)) - 1;
+ q->free_clust_idx = 0;
q->cluster_data = malloc(q->cluster_size);
if (!q->cluster_data) {
- pr_warning("cluster data malloc error!");
+ pr_warning("cluster data malloc error");
goto free_header;
}
q->cluster_cache = malloc(q->cluster_size);
if (!q->cluster_cache) {
- pr_warning("cluster cache malloc error!");
+ pr_warning("cluster cache malloc error");
goto free_cluster_data;
}