1 /* CacheFiles path walking and related routines
3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public Licence
8 * as published by the Free Software Foundation; either version
9 * 2 of the Licence, or (at your option) any later version.
12 #include <linux/module.h>
13 #include <linux/sched.h>
14 #include <linux/file.h>
16 #include <linux/fsnotify.h>
17 #include <linux/quotaops.h>
18 #include <linux/xattr.h>
19 #include <linux/mount.h>
20 #include <linux/namei.h>
21 #include <linux/security.h>
22 #include <linux/slab.h>
25 #define CACHEFILES_KEYBUF_SIZE 512
28 * dump debugging info about an object
31 void __cachefiles_printk_object(struct cachefiles_object *object,
35 struct fscache_cookie *cookie;
36 unsigned keylen, loop;
38 pr_err("%sobject: OBJ%x\n", prefix, object->fscache.debug_id);
39 pr_err("%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n",
40 prefix, object->fscache.state->name,
41 object->fscache.flags, work_busy(&object->fscache.work),
42 object->fscache.events, object->fscache.event_mask);
43 pr_err("%sops=%u inp=%u exc=%u\n",
44 prefix, object->fscache.n_ops, object->fscache.n_in_progress,
45 object->fscache.n_exclusive);
46 pr_err("%sparent=%p\n",
47 prefix, object->fscache.parent);
49 spin_lock(&object->fscache.lock);
50 cookie = object->fscache.cookie;
52 pr_err("%scookie=%p [pr=%p nd=%p fl=%lx]\n",
54 object->fscache.cookie,
55 object->fscache.cookie->parent,
56 object->fscache.cookie->netfs_data,
57 object->fscache.cookie->flags);
58 if (keybuf && cookie->def)
59 keylen = cookie->def->get_key(cookie->netfs_data, keybuf,
60 CACHEFILES_KEYBUF_SIZE);
64 pr_err("%scookie=NULL\n", prefix);
67 spin_unlock(&object->fscache.lock);
70 pr_err("%skey=[%u] '", prefix, keylen);
71 for (loop = 0; loop < keylen; loop++)
72 pr_cont("%02x", keybuf[loop]);
78 * dump debugging info about a pair of objects
80 static noinline void cachefiles_printk_object(struct cachefiles_object *object,
81 struct cachefiles_object *xobject)
85 keybuf = kmalloc(CACHEFILES_KEYBUF_SIZE, GFP_NOIO);
87 __cachefiles_printk_object(object, "", keybuf);
89 __cachefiles_printk_object(xobject, "x", keybuf);
94 * mark the owner of a dentry, if there is one, to indicate that that dentry
95 * has been preemptively deleted
96 * - the caller must hold the i_mutex on the dentry's parent as required to
97 * call vfs_unlink(), vfs_rmdir() or vfs_rename()
99 static void cachefiles_mark_object_buried(struct cachefiles_cache *cache,
100 struct dentry *dentry)
102 struct cachefiles_object *object;
106 dentry->d_name.len, dentry->d_name.len, dentry->d_name.name);
108 write_lock(&cache->active_lock);
110 p = cache->active_nodes.rb_node;
112 object = rb_entry(p, struct cachefiles_object, active_node);
113 if (object->dentry > dentry)
115 else if (object->dentry < dentry)
121 write_unlock(&cache->active_lock);
122 _leave(" [no owner]");
125 /* found the dentry for */
127 kdebug("preemptive burial: OBJ%x [%s] %p",
128 object->fscache.debug_id,
129 object->fscache.state->name,
132 if (fscache_object_is_live(&object->fscache)) {
134 pr_err("Error: Can't preemptively bury live object\n");
135 cachefiles_printk_object(object, NULL);
136 } else if (test_and_set_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
137 pr_err("Error: Object already preemptively buried\n");
140 write_unlock(&cache->active_lock);
141 _leave(" [owner marked]");
145 * record the fact that an object is now active
147 static int cachefiles_mark_object_active(struct cachefiles_cache *cache,
148 struct cachefiles_object *object)
150 struct cachefiles_object *xobject;
151 struct rb_node **_p, *_parent = NULL;
152 struct dentry *dentry;
154 _enter(",%p", object);
157 write_lock(&cache->active_lock);
159 if (test_and_set_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) {
160 pr_err("Error: Object already active\n");
161 cachefiles_printk_object(object, NULL);
165 dentry = object->dentry;
166 _p = &cache->active_nodes.rb_node;
169 xobject = rb_entry(_parent,
170 struct cachefiles_object, active_node);
172 ASSERT(xobject != object);
174 if (xobject->dentry > dentry)
175 _p = &(*_p)->rb_left;
176 else if (xobject->dentry < dentry)
177 _p = &(*_p)->rb_right;
179 goto wait_for_old_object;
182 rb_link_node(&object->active_node, _parent, _p);
183 rb_insert_color(&object->active_node, &cache->active_nodes);
185 write_unlock(&cache->active_lock);
189 /* an old object from a previous incarnation is hogging the slot - we
190 * need to wait for it to be destroyed */
192 if (fscache_object_is_live(&object->fscache)) {
194 pr_err("Error: Unexpected object collision\n");
195 cachefiles_printk_object(object, xobject);
198 atomic_inc(&xobject->usage);
199 write_unlock(&cache->active_lock);
201 if (test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) {
202 wait_queue_head_t *wq;
204 signed long timeout = 60 * HZ;
208 /* if the object we're waiting for is queued for processing,
209 * then just put ourselves on the queue behind it */
210 if (work_pending(&xobject->fscache.work)) {
211 _debug("queue OBJ%x behind OBJ%x immediately",
212 object->fscache.debug_id,
213 xobject->fscache.debug_id);
217 /* otherwise we sleep until either the object we're waiting for
218 * is done, or the fscache_object is congested */
219 wq = bit_waitqueue(&xobject->flags, CACHEFILES_OBJECT_ACTIVE);
223 prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
224 if (!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags))
227 requeue = fscache_object_sleep_till_congested(&timeout);
228 } while (timeout > 0 && !requeue);
229 finish_wait(wq, &wait);
232 test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags)) {
233 _debug("queue OBJ%x behind OBJ%x after wait",
234 object->fscache.debug_id,
235 xobject->fscache.debug_id);
241 pr_err("Error: Overlong wait for old active object to go away\n");
242 cachefiles_printk_object(object, xobject);
247 ASSERT(!test_bit(CACHEFILES_OBJECT_ACTIVE, &xobject->flags));
249 cache->cache.ops->put_object(&xobject->fscache);
253 clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
254 cache->cache.ops->put_object(&xobject->fscache);
255 _leave(" = -ETIMEDOUT");
260 * delete an object representation from the cache
261 * - file backed objects are unlinked
262 * - directory backed objects are stuffed into the graveyard for userspace to
264 * - unlocks the directory mutex
266 static int cachefiles_bury_object(struct cachefiles_cache *cache,
271 struct dentry *grave, *trap;
272 struct path path, path_to_graveyard;
273 char nbuffer[8 + 8 + 1];
276 _enter(",'%*.*s','%*.*s'",
277 dir->d_name.len, dir->d_name.len, dir->d_name.name,
278 rep->d_name.len, rep->d_name.len, rep->d_name.name);
280 _debug("remove %p from %p", rep, dir);
282 /* non-directories can just be unlinked */
283 if (!S_ISDIR(rep->d_inode->i_mode)) {
284 _debug("unlink stale object");
286 path.mnt = cache->mnt;
288 ret = security_path_unlink(&path, rep);
290 cachefiles_io_error(cache, "Unlink security error");
292 ret = vfs_unlink(dir->d_inode, rep, NULL);
295 cachefiles_mark_object_buried(cache, rep);
298 mutex_unlock(&dir->d_inode->i_mutex);
301 cachefiles_io_error(cache, "Unlink failed");
303 _leave(" = %d", ret);
307 /* directories have to be moved to the graveyard */
308 _debug("move stale object to graveyard");
309 mutex_unlock(&dir->d_inode->i_mutex);
312 /* first step is to make up a grave dentry in the graveyard */
313 sprintf(nbuffer, "%08x%08x",
314 (uint32_t) get_seconds(),
315 (uint32_t) atomic_inc_return(&cache->gravecounter));
317 /* do the multiway lock magic */
318 trap = lock_rename(cache->graveyard, dir);
320 /* do some checks before getting the grave dentry */
321 if (rep->d_parent != dir) {
322 /* the entry was probably culled when we dropped the parent dir
324 unlock_rename(cache->graveyard, dir);
325 _leave(" = 0 [culled?]");
329 if (!S_ISDIR(cache->graveyard->d_inode->i_mode)) {
330 unlock_rename(cache->graveyard, dir);
331 cachefiles_io_error(cache, "Graveyard no longer a directory");
336 unlock_rename(cache->graveyard, dir);
337 cachefiles_io_error(cache, "May not make directory loop");
341 if (d_mountpoint(rep)) {
342 unlock_rename(cache->graveyard, dir);
343 cachefiles_io_error(cache, "Mountpoint in cache");
347 grave = lookup_one_len(nbuffer, cache->graveyard, strlen(nbuffer));
349 unlock_rename(cache->graveyard, dir);
351 if (PTR_ERR(grave) == -ENOMEM) {
352 _leave(" = -ENOMEM");
356 cachefiles_io_error(cache, "Lookup error %ld",
361 if (grave->d_inode) {
362 unlock_rename(cache->graveyard, dir);
369 if (d_mountpoint(grave)) {
370 unlock_rename(cache->graveyard, dir);
372 cachefiles_io_error(cache, "Mountpoint in graveyard");
376 /* target should not be an ancestor of source */
378 unlock_rename(cache->graveyard, dir);
380 cachefiles_io_error(cache, "May not make directory loop");
384 /* attempt the rename */
385 path.mnt = cache->mnt;
387 path_to_graveyard.mnt = cache->mnt;
388 path_to_graveyard.dentry = cache->graveyard;
389 ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0);
391 cachefiles_io_error(cache, "Rename security error %d", ret);
393 ret = vfs_rename(dir->d_inode, rep,
394 cache->graveyard->d_inode, grave, NULL, 0);
395 if (ret != 0 && ret != -ENOMEM)
396 cachefiles_io_error(cache,
397 "Rename failed with error %d", ret);
400 cachefiles_mark_object_buried(cache, rep);
403 unlock_rename(cache->graveyard, dir);
410 * delete an object representation from the cache
412 int cachefiles_delete_object(struct cachefiles_cache *cache,
413 struct cachefiles_object *object)
418 _enter(",OBJ%x{%p}", object->fscache.debug_id, object->dentry);
420 ASSERT(object->dentry);
421 ASSERT(object->dentry->d_inode);
422 ASSERT(object->dentry->d_parent);
424 dir = dget_parent(object->dentry);
426 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
428 if (test_bit(CACHEFILES_OBJECT_BURIED, &object->flags)) {
429 /* object allocation for the same key preemptively deleted this
430 * object's file so that it could create its own file */
431 _debug("object preemptively buried");
432 mutex_unlock(&dir->d_inode->i_mutex);
435 /* we need to check that our parent is _still_ our parent - it
436 * may have been renamed */
437 if (dir == object->dentry->d_parent) {
438 ret = cachefiles_bury_object(cache, dir,
439 object->dentry, false);
441 /* it got moved, presumably by cachefilesd culling it,
442 * so it's no longer in the key path and we can ignore
444 mutex_unlock(&dir->d_inode->i_mutex);
450 _leave(" = %d", ret);
455 * walk from the parent object to the child object through the backing
456 * filesystem, creating directories as we go
458 int cachefiles_walk_to_object(struct cachefiles_object *parent,
459 struct cachefiles_object *object,
461 struct cachefiles_xattr *auxdata)
463 struct cachefiles_cache *cache;
464 struct dentry *dir, *next = NULL;
470 _enter("OBJ%x{%p},OBJ%x,%s,",
471 parent->fscache.debug_id, parent->dentry,
472 object->fscache.debug_id, key);
474 cache = container_of(parent->fscache.cache,
475 struct cachefiles_cache, cache);
476 path.mnt = cache->mnt;
478 ASSERT(parent->dentry);
479 ASSERT(parent->dentry->d_inode);
481 if (!(S_ISDIR(parent->dentry->d_inode->i_mode))) {
482 // TODO: convert file to dir
483 _leave("looking up in none directory");
487 dir = dget(parent->dentry);
490 /* attempt to transit the first directory component */
494 /* key ends in a double NUL */
495 key = key + nlen + 1;
500 /* search the current directory for the element name */
501 _debug("lookup '%s'", name);
503 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
506 next = lookup_one_len(name, dir, nlen);
507 cachefiles_hist(cachefiles_lookup_histogram, start);
511 _debug("next -> %p %s", next, next->d_inode ? "positive" : "negative");
514 object->new = !next->d_inode;
516 /* if this element of the path doesn't exist, then the lookup phase
517 * failed, and we can release any readers in the certain knowledge that
518 * there's nothing for them to actually read */
520 fscache_object_lookup_negative(&object->fscache);
522 /* we need to create the object if it's negative */
523 if (key || object->type == FSCACHE_COOKIE_TYPE_INDEX) {
524 /* index objects and intervening tree levels must be subdirs */
525 if (!next->d_inode) {
526 ret = cachefiles_has_space(cache, 1, 0);
531 ret = security_path_mkdir(&path, next, 0);
535 ret = vfs_mkdir(dir->d_inode, next, 0);
536 cachefiles_hist(cachefiles_mkdir_histogram, start);
540 ASSERT(next->d_inode);
542 _debug("mkdir -> %p{%p{ino=%lu}}",
543 next, next->d_inode, next->d_inode->i_ino);
545 } else if (!S_ISDIR(next->d_inode->i_mode)) {
546 pr_err("inode %lu is not a directory",
547 next->d_inode->i_ino);
553 /* non-index objects start out life as files */
554 if (!next->d_inode) {
555 ret = cachefiles_has_space(cache, 1, 0);
560 ret = security_path_mknod(&path, next, S_IFREG, 0);
564 ret = vfs_create(dir->d_inode, next, S_IFREG, true);
565 cachefiles_hist(cachefiles_create_histogram, start);
569 ASSERT(next->d_inode);
571 _debug("create -> %p{%p{ino=%lu}}",
572 next, next->d_inode, next->d_inode->i_ino);
574 } else if (!S_ISDIR(next->d_inode->i_mode) &&
575 !S_ISREG(next->d_inode->i_mode)
577 pr_err("inode %lu is not a file or directory",
578 next->d_inode->i_ino);
584 /* process the next component */
587 mutex_unlock(&dir->d_inode->i_mutex);
594 /* we've found the object we were looking for */
595 object->dentry = next;
597 /* if we've found that the terminal object exists, then we need to
598 * check its attributes and delete it if it's out of date */
600 _debug("validate '%*.*s'",
601 next->d_name.len, next->d_name.len, next->d_name.name);
603 ret = cachefiles_check_object_xattr(object, auxdata);
604 if (ret == -ESTALE) {
605 /* delete the object (the deleter drops the directory
607 object->dentry = NULL;
609 ret = cachefiles_bury_object(cache, dir, next, true);
616 _debug("redo lookup");
621 /* note that we're now using this object */
622 ret = cachefiles_mark_object_active(cache, object);
624 mutex_unlock(&dir->d_inode->i_mutex);
628 if (ret == -ETIMEDOUT)
629 goto mark_active_timed_out;
631 _debug("=== OBTAINED_OBJECT ===");
634 /* attach data to a newly constructed terminal object */
635 ret = cachefiles_set_object_xattr(object, auxdata);
639 /* always update the atime on an object we've just looked up
640 * (this is used to keep track of culling, and atimes are only
641 * updated by read, write and readdir but not lookup or
647 /* open a file interface onto a data file */
648 if (object->type != FSCACHE_COOKIE_TYPE_INDEX) {
649 if (S_ISREG(object->dentry->d_inode->i_mode)) {
650 const struct address_space_operations *aops;
653 aops = object->dentry->d_inode->i_mapping->a_ops;
657 object->backer = object->dentry;
659 BUG(); // TODO: open file in data-class subdir
664 fscache_obtained_object(&object->fscache);
666 _leave(" = 0 [%lu]", object->dentry->d_inode->i_ino);
670 _debug("create error %d", ret);
672 cachefiles_io_error(cache, "Create/mkdir failed");
675 mark_active_timed_out:
676 _debug("mark active timed out");
680 _debug("check error %d", ret);
681 write_lock(&cache->active_lock);
682 rb_erase(&object->active_node, &cache->active_nodes);
683 clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
684 wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
685 write_unlock(&cache->active_lock);
687 dput(object->dentry);
688 object->dentry = NULL;
692 _debug("delete error %d", ret);
696 _debug("lookup error %ld", PTR_ERR(next));
699 cachefiles_io_error(cache, "Lookup failed");
702 mutex_unlock(&dir->d_inode->i_mutex);
707 _leave(" = error %d", -ret);
714 struct dentry *cachefiles_get_directory(struct cachefiles_cache *cache,
718 struct dentry *subdir;
723 _enter(",,%s", dirname);
725 /* search the current directory for the element name */
726 mutex_lock(&dir->d_inode->i_mutex);
729 subdir = lookup_one_len(dirname, dir, strlen(dirname));
730 cachefiles_hist(cachefiles_lookup_histogram, start);
731 if (IS_ERR(subdir)) {
732 if (PTR_ERR(subdir) == -ENOMEM)
737 _debug("subdir -> %p %s",
738 subdir, subdir->d_inode ? "positive" : "negative");
740 /* we need to create the subdir if it doesn't exist yet */
741 if (!subdir->d_inode) {
742 ret = cachefiles_has_space(cache, 1, 0);
746 _debug("attempt mkdir");
748 path.mnt = cache->mnt;
750 ret = security_path_mkdir(&path, subdir, 0700);
753 ret = vfs_mkdir(dir->d_inode, subdir, 0700);
757 ASSERT(subdir->d_inode);
759 _debug("mkdir -> %p{%p{ino=%lu}}",
762 subdir->d_inode->i_ino);
765 mutex_unlock(&dir->d_inode->i_mutex);
767 /* we need to make sure the subdir is a directory */
768 ASSERT(subdir->d_inode);
770 if (!S_ISDIR(subdir->d_inode->i_mode)) {
771 pr_err("%s is not a directory", dirname);
777 if (!subdir->d_inode->i_op->setxattr ||
778 !subdir->d_inode->i_op->getxattr ||
779 !subdir->d_inode->i_op->lookup ||
780 !subdir->d_inode->i_op->mkdir ||
781 !subdir->d_inode->i_op->create ||
782 !subdir->d_inode->i_op->rename ||
783 !subdir->d_inode->i_op->rmdir ||
784 !subdir->d_inode->i_op->unlink)
787 _leave(" = [%lu]", subdir->d_inode->i_ino);
792 _leave(" = %d [check]", ret);
796 mutex_unlock(&dir->d_inode->i_mutex);
798 pr_err("mkdir %s failed with error %d", dirname, ret);
802 mutex_unlock(&dir->d_inode->i_mutex);
803 ret = PTR_ERR(subdir);
804 pr_err("Lookup %s failed with error %d", dirname, ret);
808 mutex_unlock(&dir->d_inode->i_mutex);
809 _leave(" = -ENOMEM");
810 return ERR_PTR(-ENOMEM);
814 * find out if an object is in use or not
815 * - if finds object and it's not in use:
816 * - returns a pointer to the object and a reference on it
817 * - returns with the directory locked
819 static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache,
823 struct cachefiles_object *object;
825 struct dentry *victim;
829 //_enter(",%*.*s/,%s",
830 // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
832 /* look up the victim */
833 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
836 victim = lookup_one_len(filename, dir, strlen(filename));
837 cachefiles_hist(cachefiles_lookup_histogram, start);
841 //_debug("victim -> %p %s",
842 // victim, victim->d_inode ? "positive" : "negative");
844 /* if the object is no longer there then we probably retired the object
845 * at the netfs's request whilst the cull was in progress
847 if (!victim->d_inode) {
848 mutex_unlock(&dir->d_inode->i_mutex);
850 _leave(" = -ENOENT [absent]");
851 return ERR_PTR(-ENOENT);
854 /* check to see if we're using this object */
855 read_lock(&cache->active_lock);
857 _n = cache->active_nodes.rb_node;
860 object = rb_entry(_n, struct cachefiles_object, active_node);
862 if (object->dentry > victim)
864 else if (object->dentry < victim)
870 read_unlock(&cache->active_lock);
872 //_leave(" = %p", victim);
876 read_unlock(&cache->active_lock);
877 mutex_unlock(&dir->d_inode->i_mutex);
879 //_leave(" = -EBUSY [in use]");
880 return ERR_PTR(-EBUSY);
883 mutex_unlock(&dir->d_inode->i_mutex);
884 ret = PTR_ERR(victim);
885 if (ret == -ENOENT) {
886 /* file or dir now absent - probably retired by netfs */
887 _leave(" = -ESTALE [absent]");
888 return ERR_PTR(-ESTALE);
892 cachefiles_io_error(cache, "Lookup failed");
893 } else if (ret != -ENOMEM) {
894 pr_err("Internal error: %d", ret);
898 _leave(" = %d", ret);
903 * cull an object if it's not in use
904 * - called only by cache manager daemon
906 int cachefiles_cull(struct cachefiles_cache *cache, struct dentry *dir,
909 struct dentry *victim;
913 dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
915 victim = cachefiles_check_active(cache, dir, filename);
917 return PTR_ERR(victim);
919 _debug("victim -> %p %s",
920 victim, victim->d_inode ? "positive" : "negative");
922 /* okay... the victim is not being used so we can cull it
923 * - start by marking it as stale
925 _debug("victim is cullable");
927 ret = cachefiles_remove_object_xattr(cache, victim);
931 /* actually remove the victim (drops the dir mutex) */
934 ret = cachefiles_bury_object(cache, dir, victim, false);
943 mutex_unlock(&dir->d_inode->i_mutex);
946 if (ret == -ENOENT) {
947 /* file or dir now absent - probably retired by netfs */
948 _leave(" = -ESTALE [absent]");
952 if (ret != -ENOMEM) {
953 pr_err("Internal error: %d", ret);
957 _leave(" = %d", ret);
962 * find out if an object is in use or not
963 * - called only by cache manager daemon
964 * - returns -EBUSY or 0 to indicate whether an object is in use or not
966 int cachefiles_check_in_use(struct cachefiles_cache *cache, struct dentry *dir,
969 struct dentry *victim;
971 //_enter(",%*.*s/,%s",
972 // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename);
974 victim = cachefiles_check_active(cache, dir, filename);
976 return PTR_ERR(victim);
978 mutex_unlock(&dir->d_inode->i_mutex);