2 #include <linux/fsnotify_backend.h>
3 #include <linux/namei.h>
4 #include <linux/mount.h>
5 #include <linux/kthread.h>
6 #include <linux/slab.h>
14 struct audit_chunk *root;
15 struct list_head chunks;
16 struct list_head rules;
17 struct list_head list;
18 struct list_head same_root;
24 struct list_head hash;
25 struct fsnotify_mark mark;
26 struct list_head trees; /* with root here */
32 struct list_head list;
33 struct audit_tree *owner;
34 unsigned index; /* index; upper bit indicates 'will prune' */
38 static LIST_HEAD(tree_list);
39 static LIST_HEAD(prune_list);
40 static struct task_struct *prune_thread;
43 * One struct chunk is attached to each inode of interest.
44 * We replace struct chunk on tagging/untagging.
45 * Rules have pointer to struct audit_tree.
46 * Rules have struct list_head rlist forming a list of rules over
48 * References to struct chunk are collected at audit_inode{,_child}()
49 * time and used in AUDIT_TREE rule matching.
50 * These references are dropped at the same time we are calling
51 * audit_free_names(), etc.
53 * Cyclic lists galore:
54 * tree.chunks anchors chunk.owners[].list hash_lock
55 * tree.rules anchors rule.rlist audit_filter_mutex
56 * chunk.trees anchors tree.same_root hash_lock
57 * chunk.hash is a hash with middle bits of watch.inode as
58 * a hash function. RCU, hash_lock
60 * tree is refcounted; one reference for "some rules on rules_list refer to
61 * it", one for each chunk with pointer to it.
63 * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
64 * of watch contributes 1 to .refs).
66 * node.index allows to get from node.list to containing chunk.
67 * MSB of that sucker is stolen to mark taggings that we might have to
68 * revert - several operations have very unpleasant cleanup logics and
69 * that makes a difference. Some.
72 static struct fsnotify_group *audit_tree_group;
74 static struct audit_tree *alloc_tree(const char *s)
76 struct audit_tree *tree;
78 tree = kmalloc(sizeof(struct audit_tree) + strlen(s) + 1, GFP_KERNEL);
80 atomic_set(&tree->count, 1);
82 INIT_LIST_HEAD(&tree->chunks);
83 INIT_LIST_HEAD(&tree->rules);
84 INIT_LIST_HEAD(&tree->list);
85 INIT_LIST_HEAD(&tree->same_root);
87 strcpy(tree->pathname, s);
92 static inline void get_tree(struct audit_tree *tree)
94 atomic_inc(&tree->count);
97 static inline void put_tree(struct audit_tree *tree)
99 if (atomic_dec_and_test(&tree->count))
100 kfree_rcu(tree, head);
103 /* to avoid bringing the entire thing in audit.h */
104 const char *audit_tree_path(struct audit_tree *tree)
106 return tree->pathname;
109 static void free_chunk(struct audit_chunk *chunk)
113 for (i = 0; i < chunk->count; i++) {
114 if (chunk->owners[i].owner)
115 put_tree(chunk->owners[i].owner);
120 void audit_put_chunk(struct audit_chunk *chunk)
122 if (atomic_long_dec_and_test(&chunk->refs))
126 static void __put_chunk(struct rcu_head *rcu)
128 struct audit_chunk *chunk = container_of(rcu, struct audit_chunk, head);
129 audit_put_chunk(chunk);
132 static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
134 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
135 call_rcu(&chunk->head, __put_chunk);
138 static struct audit_chunk *alloc_chunk(int count)
140 struct audit_chunk *chunk;
144 size = offsetof(struct audit_chunk, owners) + count * sizeof(struct node);
145 chunk = kzalloc(size, GFP_KERNEL);
149 INIT_LIST_HEAD(&chunk->hash);
150 INIT_LIST_HEAD(&chunk->trees);
151 chunk->count = count;
152 atomic_long_set(&chunk->refs, 1);
153 for (i = 0; i < count; i++) {
154 INIT_LIST_HEAD(&chunk->owners[i].list);
155 chunk->owners[i].index = i;
157 fsnotify_init_mark(&chunk->mark, audit_tree_destroy_watch);
158 chunk->mark.mask = FS_IN_IGNORED;
162 enum {HASH_SIZE = 128};
163 static struct list_head chunk_hash_heads[HASH_SIZE];
164 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(hash_lock);
166 /* Function to return search key in our hash from inode. */
167 static unsigned long inode_to_key(const struct inode *inode)
169 return (unsigned long)inode;
173 * Function to return search key in our hash from chunk. Key 0 is special and
174 * should never be present in the hash.
176 static unsigned long chunk_to_key(struct audit_chunk *chunk)
178 return (unsigned long)chunk->mark.inode;
181 static inline struct list_head *chunk_hash(unsigned long key)
183 unsigned long n = key / L1_CACHE_BYTES;
184 return chunk_hash_heads + n % HASH_SIZE;
187 /* hash_lock & entry->lock is held by caller */
188 static void insert_hash(struct audit_chunk *chunk)
190 unsigned long key = chunk_to_key(chunk);
191 struct list_head *list;
193 if (!(chunk->mark.flags & FSNOTIFY_MARK_FLAG_ATTACHED))
195 list = chunk_hash(key);
196 list_add_rcu(&chunk->hash, list);
199 /* called under rcu_read_lock */
200 struct audit_chunk *audit_tree_lookup(const struct inode *inode)
202 unsigned long key = inode_to_key(inode);
203 struct list_head *list = chunk_hash(key);
204 struct audit_chunk *p;
206 list_for_each_entry_rcu(p, list, hash) {
207 if (chunk_to_key(p) == key) {
208 atomic_long_inc(&p->refs);
215 bool audit_tree_match(struct audit_chunk *chunk, struct audit_tree *tree)
218 for (n = 0; n < chunk->count; n++)
219 if (chunk->owners[n].owner == tree)
224 /* tagging and untagging inodes with trees */
226 static struct audit_chunk *find_chunk(struct node *p)
228 int index = p->index & ~(1U<<31);
230 return container_of(p, struct audit_chunk, owners[0]);
233 static void untag_chunk(struct node *p)
235 struct audit_chunk *chunk = find_chunk(p);
236 struct fsnotify_mark *entry = &chunk->mark;
237 struct audit_chunk *new = NULL;
238 struct audit_tree *owner;
239 int size = chunk->count - 1;
242 fsnotify_get_mark(entry);
244 spin_unlock(&hash_lock);
247 new = alloc_chunk(size);
249 mutex_lock(&entry->group->mark_mutex);
250 spin_lock(&entry->lock);
251 if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
252 spin_unlock(&entry->lock);
253 mutex_unlock(&entry->group->mark_mutex);
263 spin_lock(&hash_lock);
264 list_del_init(&chunk->trees);
265 if (owner->root == chunk)
267 list_del_init(&p->list);
268 list_del_rcu(&chunk->hash);
269 spin_unlock(&hash_lock);
270 spin_unlock(&entry->lock);
271 mutex_unlock(&entry->group->mark_mutex);
272 fsnotify_destroy_mark(entry, audit_tree_group);
279 if (fsnotify_add_mark_locked(&new->mark, entry->group, entry->inode,
281 fsnotify_put_mark(&new->mark);
286 spin_lock(&hash_lock);
287 list_replace_init(&chunk->trees, &new->trees);
288 if (owner->root == chunk) {
289 list_del_init(&owner->same_root);
293 for (i = j = 0; j <= size; i++, j++) {
294 struct audit_tree *s;
295 if (&chunk->owners[j] == p) {
296 list_del_init(&p->list);
300 s = chunk->owners[j].owner;
301 new->owners[i].owner = s;
302 new->owners[i].index = chunk->owners[j].index - j + i;
303 if (!s) /* result of earlier fallback */
306 list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
309 list_replace_rcu(&chunk->hash, &new->hash);
310 list_for_each_entry(owner, &new->trees, same_root)
312 spin_unlock(&hash_lock);
313 spin_unlock(&entry->lock);
314 mutex_unlock(&entry->group->mark_mutex);
315 fsnotify_destroy_mark(entry, audit_tree_group);
316 fsnotify_put_mark(&new->mark); /* drop initial reference */
320 // do the best we can
321 spin_lock(&hash_lock);
322 if (owner->root == chunk) {
323 list_del_init(&owner->same_root);
326 list_del_init(&p->list);
329 spin_unlock(&hash_lock);
330 spin_unlock(&entry->lock);
331 mutex_unlock(&entry->group->mark_mutex);
333 fsnotify_put_mark(entry);
334 spin_lock(&hash_lock);
337 static int create_chunk(struct inode *inode, struct audit_tree *tree)
339 struct fsnotify_mark *entry;
340 struct audit_chunk *chunk = alloc_chunk(1);
344 entry = &chunk->mark;
345 if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) {
346 fsnotify_put_mark(entry);
350 spin_lock(&entry->lock);
351 spin_lock(&hash_lock);
353 spin_unlock(&hash_lock);
355 spin_unlock(&entry->lock);
356 fsnotify_destroy_mark(entry, audit_tree_group);
357 fsnotify_put_mark(entry);
360 chunk->owners[0].index = (1U << 31);
361 chunk->owners[0].owner = tree;
363 list_add(&chunk->owners[0].list, &tree->chunks);
366 list_add(&tree->same_root, &chunk->trees);
369 spin_unlock(&hash_lock);
370 spin_unlock(&entry->lock);
371 fsnotify_put_mark(entry); /* drop initial reference */
375 /* the first tagged inode becomes root of tree */
376 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
378 struct fsnotify_mark *old_entry, *chunk_entry;
379 struct audit_tree *owner;
380 struct audit_chunk *chunk, *old;
384 old_entry = fsnotify_find_inode_mark(audit_tree_group, inode);
386 return create_chunk(inode, tree);
388 old = container_of(old_entry, struct audit_chunk, mark);
390 /* are we already there? */
391 spin_lock(&hash_lock);
392 for (n = 0; n < old->count; n++) {
393 if (old->owners[n].owner == tree) {
394 spin_unlock(&hash_lock);
395 fsnotify_put_mark(old_entry);
399 spin_unlock(&hash_lock);
401 chunk = alloc_chunk(old->count + 1);
403 fsnotify_put_mark(old_entry);
407 chunk_entry = &chunk->mark;
409 mutex_lock(&old_entry->group->mark_mutex);
410 spin_lock(&old_entry->lock);
411 if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
412 /* old_entry is being shot, lets just lie */
413 spin_unlock(&old_entry->lock);
414 mutex_unlock(&old_entry->group->mark_mutex);
415 fsnotify_put_mark(old_entry);
420 if (fsnotify_add_mark_locked(chunk_entry, old_entry->group,
421 old_entry->inode, NULL, 1)) {
422 spin_unlock(&old_entry->lock);
423 mutex_unlock(&old_entry->group->mark_mutex);
424 fsnotify_put_mark(chunk_entry);
425 fsnotify_put_mark(old_entry);
429 /* even though we hold old_entry->lock, this is safe since chunk_entry->lock could NEVER have been grabbed before */
430 spin_lock(&chunk_entry->lock);
431 spin_lock(&hash_lock);
433 /* we now hold old_entry->lock, chunk_entry->lock, and hash_lock */
435 spin_unlock(&hash_lock);
437 spin_unlock(&chunk_entry->lock);
438 spin_unlock(&old_entry->lock);
439 mutex_unlock(&old_entry->group->mark_mutex);
441 fsnotify_destroy_mark(chunk_entry, audit_tree_group);
443 fsnotify_put_mark(chunk_entry);
444 fsnotify_put_mark(old_entry);
447 list_replace_init(&old->trees, &chunk->trees);
448 for (n = 0, p = chunk->owners; n < old->count; n++, p++) {
449 struct audit_tree *s = old->owners[n].owner;
451 p->index = old->owners[n].index;
452 if (!s) /* result of fallback in untag */
455 list_replace_init(&old->owners[n].list, &p->list);
457 p->index = (chunk->count - 1) | (1U<<31);
460 list_add(&p->list, &tree->chunks);
461 list_replace_rcu(&old->hash, &chunk->hash);
462 list_for_each_entry(owner, &chunk->trees, same_root)
467 list_add(&tree->same_root, &chunk->trees);
469 spin_unlock(&hash_lock);
470 spin_unlock(&chunk_entry->lock);
471 spin_unlock(&old_entry->lock);
472 mutex_unlock(&old_entry->group->mark_mutex);
473 fsnotify_destroy_mark(old_entry, audit_tree_group);
474 fsnotify_put_mark(chunk_entry); /* drop initial reference */
475 fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
479 static void audit_tree_log_remove_rule(struct audit_krule *rule)
481 struct audit_buffer *ab;
483 ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
486 audit_log_format(ab, "op=remove_rule");
487 audit_log_format(ab, " dir=");
488 audit_log_untrustedstring(ab, rule->tree->pathname);
489 audit_log_key(ab, rule->filterkey);
490 audit_log_format(ab, " list=%d res=1", rule->listnr);
494 static void kill_rules(struct audit_tree *tree)
496 struct audit_krule *rule, *next;
497 struct audit_entry *entry;
499 list_for_each_entry_safe(rule, next, &tree->rules, rlist) {
500 entry = container_of(rule, struct audit_entry, rule);
502 list_del_init(&rule->rlist);
504 /* not a half-baked one */
505 audit_tree_log_remove_rule(rule);
507 audit_remove_mark(entry->rule.exe);
509 list_del_rcu(&entry->list);
510 list_del(&entry->rule.list);
511 call_rcu(&entry->rcu, audit_free_rule_rcu);
517 * finish killing struct audit_tree
519 static void prune_one(struct audit_tree *victim)
521 spin_lock(&hash_lock);
522 while (!list_empty(&victim->chunks)) {
525 p = list_entry(victim->chunks.next, struct node, list);
529 spin_unlock(&hash_lock);
533 /* trim the uncommitted chunks from tree */
535 static void trim_marked(struct audit_tree *tree)
537 struct list_head *p, *q;
538 spin_lock(&hash_lock);
540 spin_unlock(&hash_lock);
544 for (p = tree->chunks.next; p != &tree->chunks; p = q) {
545 struct node *node = list_entry(p, struct node, list);
547 if (node->index & (1U<<31)) {
549 list_add(p, &tree->chunks);
553 while (!list_empty(&tree->chunks)) {
556 node = list_entry(tree->chunks.next, struct node, list);
558 /* have we run out of marked? */
559 if (!(node->index & (1U<<31)))
564 if (!tree->root && !tree->goner) {
566 spin_unlock(&hash_lock);
567 mutex_lock(&audit_filter_mutex);
569 list_del_init(&tree->list);
570 mutex_unlock(&audit_filter_mutex);
573 spin_unlock(&hash_lock);
577 static void audit_schedule_prune(void);
579 /* called with audit_filter_mutex */
580 int audit_remove_tree_rule(struct audit_krule *rule)
582 struct audit_tree *tree;
585 spin_lock(&hash_lock);
586 list_del_init(&rule->rlist);
587 if (list_empty(&tree->rules) && !tree->goner) {
589 list_del_init(&tree->same_root);
591 list_move(&tree->list, &prune_list);
593 spin_unlock(&hash_lock);
594 audit_schedule_prune();
598 spin_unlock(&hash_lock);
604 static int compare_root(struct vfsmount *mnt, void *arg)
606 return inode_to_key(d_backing_inode(mnt->mnt_root)) ==
610 void audit_trim_trees(void)
612 struct list_head cursor;
614 mutex_lock(&audit_filter_mutex);
615 list_add(&cursor, &tree_list);
616 while (cursor.next != &tree_list) {
617 struct audit_tree *tree;
619 struct vfsmount *root_mnt;
623 tree = container_of(cursor.next, struct audit_tree, list);
626 list_add(&cursor, &tree->list);
627 mutex_unlock(&audit_filter_mutex);
629 err = kern_path(tree->pathname, 0, &path);
633 root_mnt = collect_mounts(&path);
635 if (IS_ERR(root_mnt))
638 spin_lock(&hash_lock);
639 list_for_each_entry(node, &tree->chunks, list) {
640 struct audit_chunk *chunk = find_chunk(node);
641 /* this could be NULL if the watch is dying else where... */
642 node->index |= 1U<<31;
643 if (iterate_mounts(compare_root,
644 (void *)chunk_to_key(chunk),
646 node->index &= ~(1U<<31);
648 spin_unlock(&hash_lock);
650 drop_collected_mounts(root_mnt);
653 mutex_lock(&audit_filter_mutex);
656 mutex_unlock(&audit_filter_mutex);
659 int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op)
662 if (pathname[0] != '/' ||
663 rule->listnr != AUDIT_FILTER_EXIT ||
665 rule->inode_f || rule->watch || rule->tree)
667 rule->tree = alloc_tree(pathname);
673 void audit_put_tree(struct audit_tree *tree)
678 static int tag_mount(struct vfsmount *mnt, void *arg)
680 return tag_chunk(d_backing_inode(mnt->mnt_root), arg);
684 * That gets run when evict_chunk() ends up needing to kill audit_tree.
685 * Runs from a separate thread.
687 static int prune_tree_thread(void *unused)
690 if (list_empty(&prune_list)) {
691 set_current_state(TASK_INTERRUPTIBLE);
695 mutex_lock(&audit_cmd_mutex);
696 mutex_lock(&audit_filter_mutex);
698 while (!list_empty(&prune_list)) {
699 struct audit_tree *victim;
701 victim = list_entry(prune_list.next,
702 struct audit_tree, list);
703 list_del_init(&victim->list);
705 mutex_unlock(&audit_filter_mutex);
709 mutex_lock(&audit_filter_mutex);
712 mutex_unlock(&audit_filter_mutex);
713 mutex_unlock(&audit_cmd_mutex);
718 static int audit_launch_prune(void)
722 prune_thread = kthread_run(prune_tree_thread, NULL,
724 if (IS_ERR(prune_thread)) {
725 pr_err("cannot start thread audit_prune_tree");
732 /* called with audit_filter_mutex */
733 int audit_add_tree_rule(struct audit_krule *rule)
735 struct audit_tree *seed = rule->tree, *tree;
737 struct vfsmount *mnt;
741 list_for_each_entry(tree, &tree_list, list) {
742 if (!strcmp(seed->pathname, tree->pathname)) {
745 list_add(&rule->rlist, &tree->rules);
750 list_add(&tree->list, &tree_list);
751 list_add(&rule->rlist, &tree->rules);
752 /* do not set rule->tree yet */
753 mutex_unlock(&audit_filter_mutex);
755 if (unlikely(!prune_thread)) {
756 err = audit_launch_prune();
761 err = kern_path(tree->pathname, 0, &path);
764 mnt = collect_mounts(&path);
772 err = iterate_mounts(tag_mount, tree, mnt);
773 drop_collected_mounts(mnt);
777 spin_lock(&hash_lock);
778 list_for_each_entry(node, &tree->chunks, list)
779 node->index &= ~(1U<<31);
780 spin_unlock(&hash_lock);
786 mutex_lock(&audit_filter_mutex);
787 if (list_empty(&rule->rlist)) {
796 mutex_lock(&audit_filter_mutex);
797 list_del_init(&tree->list);
798 list_del_init(&tree->rules);
803 int audit_tag_tree(char *old, char *new)
805 struct list_head cursor, barrier;
807 struct path path1, path2;
808 struct vfsmount *tagged;
811 err = kern_path(new, 0, &path2);
814 tagged = collect_mounts(&path2);
817 return PTR_ERR(tagged);
819 err = kern_path(old, 0, &path1);
821 drop_collected_mounts(tagged);
825 mutex_lock(&audit_filter_mutex);
826 list_add(&barrier, &tree_list);
827 list_add(&cursor, &barrier);
829 while (cursor.next != &tree_list) {
830 struct audit_tree *tree;
833 tree = container_of(cursor.next, struct audit_tree, list);
836 list_add(&cursor, &tree->list);
837 mutex_unlock(&audit_filter_mutex);
839 err = kern_path(tree->pathname, 0, &path2);
841 good_one = path_is_under(&path1, &path2);
847 mutex_lock(&audit_filter_mutex);
851 failed = iterate_mounts(tag_mount, tree, tagged);
854 mutex_lock(&audit_filter_mutex);
858 mutex_lock(&audit_filter_mutex);
859 spin_lock(&hash_lock);
861 list_del(&tree->list);
862 list_add(&tree->list, &tree_list);
864 spin_unlock(&hash_lock);
868 while (barrier.prev != &tree_list) {
869 struct audit_tree *tree;
871 tree = container_of(barrier.prev, struct audit_tree, list);
873 list_del(&tree->list);
874 list_add(&tree->list, &barrier);
875 mutex_unlock(&audit_filter_mutex);
879 spin_lock(&hash_lock);
880 list_for_each_entry(node, &tree->chunks, list)
881 node->index &= ~(1U<<31);
882 spin_unlock(&hash_lock);
888 mutex_lock(&audit_filter_mutex);
892 mutex_unlock(&audit_filter_mutex);
894 drop_collected_mounts(tagged);
899 static void audit_schedule_prune(void)
901 wake_up_process(prune_thread);
905 * ... and that one is done if evict_chunk() decides to delay until the end
906 * of syscall. Runs synchronously.
908 void audit_kill_trees(struct list_head *list)
910 mutex_lock(&audit_cmd_mutex);
911 mutex_lock(&audit_filter_mutex);
913 while (!list_empty(list)) {
914 struct audit_tree *victim;
916 victim = list_entry(list->next, struct audit_tree, list);
918 list_del_init(&victim->list);
920 mutex_unlock(&audit_filter_mutex);
924 mutex_lock(&audit_filter_mutex);
927 mutex_unlock(&audit_filter_mutex);
928 mutex_unlock(&audit_cmd_mutex);
932 * Here comes the stuff asynchronous to auditctl operations
935 static void evict_chunk(struct audit_chunk *chunk)
937 struct audit_tree *owner;
938 struct list_head *postponed = audit_killed_trees();
946 mutex_lock(&audit_filter_mutex);
947 spin_lock(&hash_lock);
948 while (!list_empty(&chunk->trees)) {
949 owner = list_entry(chunk->trees.next,
950 struct audit_tree, same_root);
953 list_del_init(&owner->same_root);
954 spin_unlock(&hash_lock);
957 list_move(&owner->list, &prune_list);
960 list_move(&owner->list, postponed);
962 spin_lock(&hash_lock);
964 list_del_rcu(&chunk->hash);
965 for (n = 0; n < chunk->count; n++)
966 list_del_init(&chunk->owners[n].list);
967 spin_unlock(&hash_lock);
968 mutex_unlock(&audit_filter_mutex);
970 audit_schedule_prune();
973 static int audit_tree_handle_event(struct fsnotify_group *group,
974 struct inode *to_tell,
975 struct fsnotify_mark *inode_mark,
976 struct fsnotify_mark *vfsmount_mark,
977 u32 mask, const void *data, int data_type,
978 const unsigned char *file_name, u32 cookie)
983 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
985 struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);
990 * We are guaranteed to have at least one reference to the mark from
991 * either the inode or the caller of fsnotify_destroy_mark().
993 BUG_ON(atomic_read(&entry->refcnt) < 1);
996 static const struct fsnotify_ops audit_tree_ops = {
997 .handle_event = audit_tree_handle_event,
998 .freeing_mark = audit_tree_freeing_mark,
1001 static int __init audit_tree_init(void)
1005 audit_tree_group = fsnotify_alloc_group(&audit_tree_ops);
1006 if (IS_ERR(audit_tree_group))
1007 audit_panic("cannot initialize fsnotify group for rectree watches");
1009 for (i = 0; i < HASH_SIZE; i++)
1010 INIT_LIST_HEAD(&chunk_hash_heads[i]);
1014 __initcall(audit_tree_init);