]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - kernel/trace/trace_events_filter.c
trace, filters: Initialize the match variable in process_ops() properly
[mv-sheeva.git] / kernel / trace / trace_events_filter.c
index 36d40104b17f6d53cdc89b45841cff2679551c56..8008ddcfbf20add080624f6714a5b38424b3789e 100644 (file)
@@ -123,9 +123,13 @@ struct filter_parse_state {
        } operand;
 };
 
+struct pred_stack {
+       struct filter_pred      **preds;
+       int                     index;
+};
+
 #define DEFINE_COMPARISON_PRED(type)                                   \
-static int filter_pred_##type(struct filter_pred *pred, void *event,   \
-                             int val1, int val2)                       \
+static int filter_pred_##type(struct filter_pred *pred, void *event)   \
 {                                                                      \
        type *addr = (type *)(event + pred->offset);                    \
        type val = (type)pred->val;                                     \
@@ -152,8 +156,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event,        \
 }
 
 #define DEFINE_EQUALITY_PRED(size)                                     \
-static int filter_pred_##size(struct filter_pred *pred, void *event,   \
-                             int val1, int val2)                       \
+static int filter_pred_##size(struct filter_pred *pred, void *event)   \
 {                                                                      \
        u##size *addr = (u##size *)(event + pred->offset);              \
        u##size val = (u##size)pred->val;                               \
@@ -178,23 +181,8 @@ DEFINE_EQUALITY_PRED(32);
 DEFINE_EQUALITY_PRED(16);
 DEFINE_EQUALITY_PRED(8);
 
-static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
-                          void *event __attribute((unused)),
-                          int val1, int val2)
-{
-       return val1 && val2;
-}
-
-static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
-                         void *event __attribute((unused)),
-                         int val1, int val2)
-{
-       return val1 || val2;
-}
-
 /* Filter predicate for fixed sized arrays of characters */
-static int filter_pred_string(struct filter_pred *pred, void *event,
-                             int val1, int val2)
+static int filter_pred_string(struct filter_pred *pred, void *event)
 {
        char *addr = (char *)(event + pred->offset);
        int cmp, match;
@@ -207,8 +195,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
 }
 
 /* Filter predicate for char * pointers */
-static int filter_pred_pchar(struct filter_pred *pred, void *event,
-                            int val1, int val2)
+static int filter_pred_pchar(struct filter_pred *pred, void *event)
 {
        char **addr = (char **)(event + pred->offset);
        int cmp, match;
@@ -231,8 +218,7 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
  * and add it to the address of the entry, and at last we have
  * the address of the string.
  */
-static int filter_pred_strloc(struct filter_pred *pred, void *event,
-                             int val1, int val2)
+static int filter_pred_strloc(struct filter_pred *pred, void *event)
 {
        u32 str_item = *(u32 *)(event + pred->offset);
        int str_loc = str_item & 0xffff;
@@ -247,8 +233,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
        return match;
 }
 
-static int filter_pred_none(struct filter_pred *pred, void *event,
-                           int val1, int val2)
+static int filter_pred_none(struct filter_pred *pred, void *event)
 {
        return 0;
 }
@@ -377,32 +362,147 @@ static void filter_build_regex(struct filter_pred *pred)
        pred->not ^= not;
 }
 
+enum move_type {
+       MOVE_DOWN,
+       MOVE_UP_FROM_LEFT,
+       MOVE_UP_FROM_RIGHT
+};
+
+static struct filter_pred *
+get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
+               int index, enum move_type *move)
+{
+       if (pred->parent & FILTER_PRED_IS_RIGHT)
+               *move = MOVE_UP_FROM_RIGHT;
+       else
+               *move = MOVE_UP_FROM_LEFT;
+       pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
+
+       return pred;
+}
+
+/*
+ * A series of AND or ORs where found together. Instead of
+ * climbing up and down the tree branches, an array of the
+ * ops were made in order of checks. We can just move across
+ * the array and short circuit if needed.
+ */
+static int process_ops(struct filter_pred *preds,
+                      struct filter_pred *op, void *rec)
+{
+       struct filter_pred *pred;
+       int match = 0;
+       int type;
+       int i;
+
+       /*
+        * Micro-optimization: We set type to true if op
+        * is an OR and false otherwise (AND). Then we
+        * just need to test if the match is equal to
+        * the type, and if it is, we can short circuit the
+        * rest of the checks:
+        *
+        * if ((match && op->op == OP_OR) ||
+        *     (!match && op->op == OP_AND))
+        *        return match;
+        */
+       type = op->op == OP_OR;
+
+       for (i = 0; i < op->val; i++) {
+               pred = &preds[op->ops[i]];
+               match = pred->fn(pred, rec);
+               if (!!match == type)
+                       return match;
+       }
+       return match;
+}
+
 /* return 1 if event matches, 0 otherwise (discard) */
 int filter_match_preds(struct event_filter *filter, void *rec)
 {
-       int match, top = 0, val1 = 0, val2 = 0;
-       int stack[MAX_FILTER_PRED];
+       int match = -1;
+       enum move_type move = MOVE_DOWN;
+       struct filter_pred *preds;
        struct filter_pred *pred;
-       int i;
+       struct filter_pred *root;
+       int n_preds;
+       int done = 0;
+
+       /* no filter is considered a match */
+       if (!filter)
+               return 1;
+
+       n_preds = filter->n_preds;
+
+       if (!n_preds)
+               return 1;
+
+       /*
+        * n_preds, root and filter->preds are protect with preemption disabled.
+        */
+       preds = rcu_dereference_sched(filter->preds);
+       root = rcu_dereference_sched(filter->root);
+       if (!root)
+               return 1;
+
+       pred = root;
 
-       for (i = 0; i < filter->n_preds; i++) {
-               pred = filter->preds[i];
-               if (!pred->pop_n) {
-                       match = pred->fn(pred, rec, val1, val2);
-                       stack[top++] = match;
+       /* match is currently meaningless */
+       match = -1;
+
+       do {
+               switch (move) {
+               case MOVE_DOWN:
+                       /* only AND and OR have children */
+                       if (pred->left != FILTER_PRED_INVALID) {
+                               /* If ops is set, then it was folded. */
+                               if (!pred->ops) {
+                                       /* keep going to down the left side */
+                                       pred = &preds[pred->left];
+                                       continue;
+                               }
+                               /* We can treat folded ops as a leaf node */
+                               match = process_ops(preds, pred, rec);
+                       } else
+                               match = pred->fn(pred, rec);
+                       /* If this pred is the only pred */
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               case MOVE_UP_FROM_LEFT:
+                       /*
+                        * Check for short circuits.
+                        *
+                        * Optimization: !!match == (pred->op == OP_OR)
+                        *   is the same as:
+                        * if ((match && pred->op == OP_OR) ||
+                        *     (!match && pred->op == OP_AND))
+                        */
+                       if (!!match == (pred->op == OP_OR)) {
+                               if (pred == root)
+                                       break;
+                               pred = get_pred_parent(pred, preds,
+                                                      pred->parent, &move);
+                               continue;
+                       }
+                       /* now go down the right side of the tree. */
+                       pred = &preds[pred->right];
+                       move = MOVE_DOWN;
+                       continue;
+               case MOVE_UP_FROM_RIGHT:
+                       /* We finished this equation. */
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
                        continue;
                }
-               if (pred->pop_n > top) {
-                       WARN_ON_ONCE(1);
-                       return 0;
-               }
-               val1 = stack[--top];
-               val2 = stack[--top];
-               match = pred->fn(pred, rec, val1, val2);
-               stack[top++] = match;
-       }
+               done = 1;
+       } while (!done);
 
-       return stack[--top];
+       return match;
 }
 EXPORT_SYMBOL_GPL(filter_match_preds);
 
@@ -414,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos)
 
 static void remove_filter_string(struct event_filter *filter)
 {
+       if (!filter)
+               return;
+
        kfree(filter->filter_string);
        filter->filter_string = NULL;
 }
@@ -473,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps,
 
 void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 {
-       struct event_filter *filter = call->filter;
+       struct event_filter *filter;
 
        mutex_lock(&event_mutex);
+       filter = call->filter;
        if (filter && filter->filter_string)
                trace_seq_printf(s, "%s\n", filter->filter_string);
        else
@@ -486,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 void print_subsystem_event_filter(struct event_subsystem *system,
                                  struct trace_seq *s)
 {
-       struct event_filter *filter = system->filter;
+       struct event_filter *filter;
 
        mutex_lock(&event_mutex);
+       filter = system->filter;
        if (filter && filter->filter_string)
                trace_seq_printf(s, "%s\n", filter->filter_string);
        else
@@ -539,10 +644,58 @@ static void filter_clear_pred(struct filter_pred *pred)
        pred->regex.len = 0;
 }
 
-static int filter_set_pred(struct filter_pred *dest,
+static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
+{
+       stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
+       if (!stack->preds)
+               return -ENOMEM;
+       stack->index = n_preds;
+       return 0;
+}
+
+static void __free_pred_stack(struct pred_stack *stack)
+{
+       kfree(stack->preds);
+       stack->index = 0;
+}
+
+static int __push_pred_stack(struct pred_stack *stack,
+                            struct filter_pred *pred)
+{
+       int index = stack->index;
+
+       if (WARN_ON(index == 0))
+               return -ENOSPC;
+
+       stack->preds[--index] = pred;
+       stack->index = index;
+       return 0;
+}
+
+static struct filter_pred *
+__pop_pred_stack(struct pred_stack *stack)
+{
+       struct filter_pred *pred;
+       int index = stack->index;
+
+       pred = stack->preds[index++];
+       if (!pred)
+               return NULL;
+
+       stack->index = index;
+       return pred;
+}
+
+static int filter_set_pred(struct event_filter *filter,
+                          int idx,
+                          struct pred_stack *stack,
                           struct filter_pred *src,
                           filter_pred_fn_t fn)
 {
+       struct filter_pred *dest = &filter->preds[idx];
+       struct filter_pred *left;
+       struct filter_pred *right;
+
        *dest = *src;
        if (src->field_name) {
                dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
@@ -550,116 +703,140 @@ static int filter_set_pred(struct filter_pred *dest,
                        return -ENOMEM;
        }
        dest->fn = fn;
+       dest->index = idx;
 
-       return 0;
+       if (dest->op == OP_OR || dest->op == OP_AND) {
+               right = __pop_pred_stack(stack);
+               left = __pop_pred_stack(stack);
+               if (!left || !right)
+                       return -EINVAL;
+               /*
+                * If both children can be folded
+                * and they are the same op as this op or a leaf,
+                * then this op can be folded.
+                */
+               if (left->index & FILTER_PRED_FOLD &&
+                   (left->op == dest->op ||
+                    left->left == FILTER_PRED_INVALID) &&
+                   right->index & FILTER_PRED_FOLD &&
+                   (right->op == dest->op ||
+                    right->left == FILTER_PRED_INVALID))
+                       dest->index |= FILTER_PRED_FOLD;
+
+               dest->left = left->index & ~FILTER_PRED_FOLD;
+               dest->right = right->index & ~FILTER_PRED_FOLD;
+               left->parent = dest->index & ~FILTER_PRED_FOLD;
+               right->parent = dest->index | FILTER_PRED_IS_RIGHT;
+       } else {
+               /*
+                * Make dest->left invalid to be used as a quick
+                * way to know this is a leaf node.
+                */
+               dest->left = FILTER_PRED_INVALID;
+
+               /* All leafs allow folding the parent ops. */
+               dest->index |= FILTER_PRED_FOLD;
+       }
+
+       return __push_pred_stack(stack, dest);
 }
 
-static void filter_disable_preds(struct ftrace_event_call *call)
+static void __free_preds(struct event_filter *filter)
 {
-       struct event_filter *filter = call->filter;
        int i;
 
-       call->flags &= ~TRACE_EVENT_FL_FILTERED;
+       if (filter->preds) {
+               for (i = 0; i < filter->a_preds; i++)
+                       kfree(filter->preds[i].field_name);
+               kfree(filter->preds);
+               filter->preds = NULL;
+       }
+       filter->a_preds = 0;
        filter->n_preds = 0;
-
-       for (i = 0; i < MAX_FILTER_PRED; i++)
-               filter->preds[i]->fn = filter_pred_none;
 }
 
-static void __free_preds(struct event_filter *filter)
+static void filter_disable(struct ftrace_event_call *call)
 {
-       int i;
+       call->flags &= ~TRACE_EVENT_FL_FILTERED;
+}
 
+static void __free_filter(struct event_filter *filter)
+{
        if (!filter)
                return;
 
-       for (i = 0; i < MAX_FILTER_PRED; i++) {
-               if (filter->preds[i])
-                       filter_free_pred(filter->preds[i]);
-       }
-       kfree(filter->preds);
+       __free_preds(filter);
        kfree(filter->filter_string);
        kfree(filter);
 }
 
+/*
+ * Called when destroying the ftrace_event_call.
+ * The call is being freed, so we do not need to worry about
+ * the call being currently used. This is for module code removing
+ * the tracepoints from within it.
+ */
 void destroy_preds(struct ftrace_event_call *call)
 {
-       __free_preds(call->filter);
+       __free_filter(call->filter);
        call->filter = NULL;
-       call->flags &= ~TRACE_EVENT_FL_FILTERED;
 }
 
-static struct event_filter *__alloc_preds(void)
+static struct event_filter *__alloc_filter(void)
 {
        struct event_filter *filter;
+
+       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+       return filter;
+}
+
+static int __alloc_preds(struct event_filter *filter, int n_preds)
+{
        struct filter_pred *pred;
        int i;
 
-       filter = kzalloc(sizeof(*filter), GFP_KERNEL);
-       if (!filter)
-               return ERR_PTR(-ENOMEM);
+       if (filter->preds)
+               __free_preds(filter);
 
-       filter->n_preds = 0;
+       filter->preds =
+               kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
 
-       filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
        if (!filter->preds)
-               goto oom;
+               return -ENOMEM;
 
-       for (i = 0; i < MAX_FILTER_PRED; i++) {
-               pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-               if (!pred)
-                       goto oom;
+       filter->a_preds = n_preds;
+       filter->n_preds = 0;
+
+       for (i = 0; i < n_preds; i++) {
+               pred = &filter->preds[i];
                pred->fn = filter_pred_none;
-               filter->preds[i] = pred;
        }
 
-       return filter;
-
-oom:
-       __free_preds(filter);
-       return ERR_PTR(-ENOMEM);
-}
-
-static int init_preds(struct ftrace_event_call *call)
-{
-       if (call->filter)
-               return 0;
-
-       call->flags &= ~TRACE_EVENT_FL_FILTERED;
-       call->filter = __alloc_preds();
-       if (IS_ERR(call->filter))
-               return PTR_ERR(call->filter);
-
        return 0;
 }
 
-static int init_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
 {
        struct ftrace_event_call *call;
-       int err;
 
        list_for_each_entry(call, &ftrace_events, list) {
                if (strcmp(call->class->system, system->name) != 0)
                        continue;
 
-               err = init_preds(call);
-               if (err)
-                       return err;
+               filter_disable(call);
+               remove_filter_string(call->filter);
        }
-
-       return 0;
 }
 
-static void filter_free_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_filters(struct event_subsystem *system)
 {
        struct ftrace_event_call *call;
 
        list_for_each_entry(call, &ftrace_events, list) {
                if (strcmp(call->class->system, system->name) != 0)
                        continue;
-
-               filter_disable_preds(call);
-               remove_filter_string(call->filter);
+               __free_filter(call->filter);
+               call->filter = NULL;
        }
 }
 
@@ -667,18 +844,19 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
                              struct ftrace_event_call *call,
                              struct event_filter *filter,
                              struct filter_pred *pred,
+                             struct pred_stack *stack,
                              filter_pred_fn_t fn)
 {
        int idx, err;
 
-       if (filter->n_preds == MAX_FILTER_PRED) {
+       if (WARN_ON(filter->n_preds == filter->a_preds)) {
                parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
                return -ENOSPC;
        }
 
        idx = filter->n_preds;
-       filter_clear_pred(filter->preds[idx]);
-       err = filter_set_pred(filter->preds[idx], pred, fn);
+       filter_clear_pred(&filter->preds[idx]);
+       err = filter_set_pred(filter, idx, stack, pred, fn);
        if (err)
                return err;
 
@@ -763,6 +941,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
                           struct ftrace_event_call *call,
                           struct event_filter *filter,
                           struct filter_pred *pred,
+                          struct pred_stack *stack,
                           bool dry_run)
 {
        struct ftrace_event_field *field;
@@ -770,17 +949,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
        unsigned long long val;
        int ret;
 
-       pred->fn = filter_pred_none;
+       fn = pred->fn = filter_pred_none;
 
-       if (pred->op == OP_AND) {
-               pred->pop_n = 2;
-               fn = filter_pred_and;
+       if (pred->op == OP_AND)
                goto add_pred_fn;
-       } else if (pred->op == OP_OR) {
-               pred->pop_n = 2;
-               fn = filter_pred_or;
+       else if (pred->op == OP_OR)
                goto add_pred_fn;
-       }
 
        field = find_event_field(call, pred->field_name);
        if (!field) {
@@ -829,7 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
 
 add_pred_fn:
        if (!dry_run)
-               return filter_add_pred_fn(ps, call, filter, pred, fn);
+               return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
        return 0;
 }
 
@@ -1187,6 +1361,234 @@ static int check_preds(struct filter_parse_state *ps)
        return 0;
 }
 
+static int count_preds(struct filter_parse_state *ps)
+{
+       struct postfix_elt *elt;
+       int n_preds = 0;
+
+       list_for_each_entry(elt, &ps->postfix, list) {
+               if (elt->op == OP_NONE)
+                       continue;
+               n_preds++;
+       }
+
+       return n_preds;
+}
+
+/*
+ * The tree is walked at filtering of an event. If the tree is not correctly
+ * built, it may cause an infinite loop. Check here that the tree does
+ * indeed terminate.
+ */
+static int check_pred_tree(struct event_filter *filter,
+                          struct filter_pred *root)
+{
+       struct filter_pred *preds;
+       struct filter_pred *pred;
+       enum move_type move = MOVE_DOWN;
+       int count = 0;
+       int done = 0;
+       int max;
+
+       /*
+        * The max that we can hit a node is three times.
+        * Once going down, once coming up from left, and
+        * once coming up from right. This is more than enough
+        * since leafs are only hit a single time.
+        */
+       max = 3 * filter->n_preds;
+
+       preds = filter->preds;
+       if  (!preds)
+               return -EINVAL;
+       pred = root;
+
+       do {
+               if (WARN_ON(count++ > max))
+                       return -EINVAL;
+
+               switch (move) {
+               case MOVE_DOWN:
+                       if (pred->left != FILTER_PRED_INVALID) {
+                               pred = &preds[pred->left];
+                               continue;
+                       }
+                       /* A leaf at the root is just a leaf in the tree */
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               case MOVE_UP_FROM_LEFT:
+                       pred = &preds[pred->right];
+                       move = MOVE_DOWN;
+                       continue;
+               case MOVE_UP_FROM_RIGHT:
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               }
+               done = 1;
+       } while (!done);
+
+       /* We are fine. */
+       return 0;
+}
+
+static int count_leafs(struct filter_pred *preds, struct filter_pred *root)
+{
+       struct filter_pred *pred;
+       enum move_type move = MOVE_DOWN;
+       int count = 0;
+       int done = 0;
+
+       pred = root;
+
+       do {
+               switch (move) {
+               case MOVE_DOWN:
+                       if (pred->left != FILTER_PRED_INVALID) {
+                               pred = &preds[pred->left];
+                               continue;
+                       }
+                       /* A leaf at the root is just a leaf in the tree */
+                       if (pred == root)
+                               return 1;
+                       count++;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               case MOVE_UP_FROM_LEFT:
+                       pred = &preds[pred->right];
+                       move = MOVE_DOWN;
+                       continue;
+               case MOVE_UP_FROM_RIGHT:
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               }
+               done = 1;
+       } while (!done);
+
+       return count;
+}
+
+static int fold_pred(struct filter_pred *preds, struct filter_pred *root)
+{
+       struct filter_pred *pred;
+       enum move_type move = MOVE_DOWN;
+       int count = 0;
+       int children;
+       int done = 0;
+
+       /* No need to keep the fold flag */
+       root->index &= ~FILTER_PRED_FOLD;
+
+       /* If the root is a leaf then do nothing */
+       if (root->left == FILTER_PRED_INVALID)
+               return 0;
+
+       /* count the children */
+       children = count_leafs(preds, &preds[root->left]);
+       children += count_leafs(preds, &preds[root->right]);
+
+       root->ops = kzalloc(sizeof(*root->ops) * children, GFP_KERNEL);
+       if (!root->ops)
+               return -ENOMEM;
+
+       root->val = children;
+
+       pred = root;
+       do {
+               switch (move) {
+               case MOVE_DOWN:
+                       if (pred->left != FILTER_PRED_INVALID) {
+                               pred = &preds[pred->left];
+                               continue;
+                       }
+                       if (WARN_ON(count == children))
+                               return -EINVAL;
+                       pred->index &= ~FILTER_PRED_FOLD;
+                       root->ops[count++] = pred->index;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               case MOVE_UP_FROM_LEFT:
+                       pred = &preds[pred->right];
+                       move = MOVE_DOWN;
+                       continue;
+               case MOVE_UP_FROM_RIGHT:
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               }
+               done = 1;
+       } while (!done);
+
+       return 0;
+}
+
+/*
+ * To optimize the processing of the ops, if we have several "ors" or
+ * "ands" together, we can put them in an array and process them all
+ * together speeding up the filter logic.
+ */
+static int fold_pred_tree(struct event_filter *filter,
+                          struct filter_pred *root)
+{
+       struct filter_pred *preds;
+       struct filter_pred *pred;
+       enum move_type move = MOVE_DOWN;
+       int done = 0;
+       int err;
+
+       preds = filter->preds;
+       if  (!preds)
+               return -EINVAL;
+       pred = root;
+
+       do {
+               switch (move) {
+               case MOVE_DOWN:
+                       if (pred->index & FILTER_PRED_FOLD) {
+                               err = fold_pred(preds, pred);
+                               if (err)
+                                       return err;
+                               /* Folded nodes are like leafs */
+                       } else if (pred->left != FILTER_PRED_INVALID) {
+                               pred = &preds[pred->left];
+                               continue;
+                       }
+
+                       /* A leaf at the root is just a leaf in the tree */
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               case MOVE_UP_FROM_LEFT:
+                       pred = &preds[pred->right];
+                       move = MOVE_DOWN;
+                       continue;
+               case MOVE_UP_FROM_RIGHT:
+                       if (pred == root)
+                               break;
+                       pred = get_pred_parent(pred, preds,
+                                              pred->parent, &move);
+                       continue;
+               }
+               done = 1;
+       } while (!done);
+
+       return 0;
+}
+
 static int replace_preds(struct ftrace_event_call *call,
                         struct event_filter *filter,
                         struct filter_parse_state *ps,
@@ -1195,14 +1597,32 @@ static int replace_preds(struct ftrace_event_call *call,
 {
        char *operand1 = NULL, *operand2 = NULL;
        struct filter_pred *pred;
+       struct filter_pred *root;
        struct postfix_elt *elt;
+       struct pred_stack stack = { }; /* init to NULL */
        int err;
        int n_preds = 0;
 
+       n_preds = count_preds(ps);
+       if (n_preds >= MAX_FILTER_PRED) {
+               parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
+               return -ENOSPC;
+       }
+
        err = check_preds(ps);
        if (err)
                return err;
 
+       if (!dry_run) {
+               err = __alloc_pred_stack(&stack, n_preds);
+               if (err)
+                       return err;
+               err = __alloc_preds(filter, n_preds);
+               if (err)
+                       goto fail;
+       }
+
+       n_preds = 0;
        list_for_each_entry(elt, &ps->postfix, list) {
                if (elt->op == OP_NONE) {
                        if (!operand1)
@@ -1211,14 +1631,16 @@ static int replace_preds(struct ftrace_event_call *call,
                                operand2 = elt->operand;
                        else {
                                parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
-                               return -EINVAL;
+                               err = -EINVAL;
+                               goto fail;
                        }
                        continue;
                }
 
-               if (n_preds++ == MAX_FILTER_PRED) {
+               if (WARN_ON(n_preds++ == MAX_FILTER_PRED)) {
                        parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
-                       return -ENOSPC;
+                       err = -ENOSPC;
+                       goto fail;
                }
 
                if (elt->op == OP_AND || elt->op == OP_OR) {
@@ -1228,76 +1650,181 @@ static int replace_preds(struct ftrace_event_call *call,
 
                if (!operand1 || !operand2) {
                        parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
-                       return -EINVAL;
+                       err = -EINVAL;
+                       goto fail;
                }
 
                pred = create_pred(elt->op, operand1, operand2);
 add_pred:
-               if (!pred)
-                       return -ENOMEM;
-               err = filter_add_pred(ps, call, filter, pred, dry_run);
+               if (!pred) {
+                       err = -ENOMEM;
+                       goto fail;
+               }
+               err = filter_add_pred(ps, call, filter, pred, &stack, dry_run);
                filter_free_pred(pred);
                if (err)
-                       return err;
+                       goto fail;
 
                operand1 = operand2 = NULL;
        }
 
-       return 0;
+       if (!dry_run) {
+               /* We should have one item left on the stack */
+               pred = __pop_pred_stack(&stack);
+               if (!pred)
+                       return -EINVAL;
+               /* This item is where we start from in matching */
+               root = pred;
+               /* Make sure the stack is empty */
+               pred = __pop_pred_stack(&stack);
+               if (WARN_ON(pred)) {
+                       err = -EINVAL;
+                       filter->root = NULL;
+                       goto fail;
+               }
+               err = check_pred_tree(filter, root);
+               if (err)
+                       goto fail;
+
+               /* Optimize the tree */
+               err = fold_pred_tree(filter, root);
+               if (err)
+                       goto fail;
+
+               /* We don't set root until we know it works */
+               barrier();
+               filter->root = root;
+       }
+
+       err = 0;
+fail:
+       __free_pred_stack(&stack);
+       return err;
 }
 
+struct filter_list {
+       struct list_head        list;
+       struct event_filter     *filter;
+};
+
 static int replace_system_preds(struct event_subsystem *system,
                                struct filter_parse_state *ps,
                                char *filter_string)
 {
        struct ftrace_event_call *call;
+       struct filter_list *filter_item;
+       struct filter_list *tmp;
+       LIST_HEAD(filter_list);
        bool fail = true;
        int err;
 
        list_for_each_entry(call, &ftrace_events, list) {
-               struct event_filter *filter = call->filter;
 
                if (strcmp(call->class->system, system->name) != 0)
                        continue;
 
-               /* try to see if the filter can be applied */
-               err = replace_preds(call, filter, ps, filter_string, true);
+               /*
+                * Try to see if the filter can be applied
+                *  (filter arg is ignored on dry_run)
+                */
+               err = replace_preds(call, NULL, ps, filter_string, true);
                if (err)
+                       goto fail;
+       }
+
+       list_for_each_entry(call, &ftrace_events, list) {
+               struct event_filter *filter;
+
+               if (strcmp(call->class->system, system->name) != 0)
                        continue;
 
-               /* really apply the filter */
-               filter_disable_preds(call);
-               err = replace_preds(call, filter, ps, filter_string, false);
+               filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
+               if (!filter_item)
+                       goto fail_mem;
+
+               list_add_tail(&filter_item->list, &filter_list);
+
+               filter_item->filter = __alloc_filter();
+               if (!filter_item->filter)
+                       goto fail_mem;
+               filter = filter_item->filter;
+
+               /* Can only fail on no memory */
+               err = replace_filter_string(filter, filter_string);
                if (err)
-                       filter_disable_preds(call);
-               else {
+                       goto fail_mem;
+
+               err = replace_preds(call, filter, ps, filter_string, false);
+               if (err) {
+                       filter_disable(call);
+                       parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+                       append_filter_err(ps, filter);
+               } else
                        call->flags |= TRACE_EVENT_FL_FILTERED;
-                       replace_filter_string(filter, filter_string);
-               }
+               /*
+                * Regardless of if this returned an error, we still
+                * replace the filter for the call.
+                */
+               filter = call->filter;
+               call->filter = filter_item->filter;
+               filter_item->filter = filter;
+
                fail = false;
        }
 
-       if (fail) {
-               parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
-               return -EINVAL;
+       if (fail)
+               goto fail;
+
+       /*
+        * The calls can still be using the old filters.
+        * Do a synchronize_sched() to ensure all calls are
+        * done with them before we free them.
+        */
+       synchronize_sched();
+       list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
+               __free_filter(filter_item->filter);
+               list_del(&filter_item->list);
+               kfree(filter_item);
        }
        return 0;
+ fail:
+       /* No call succeeded */
+       list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
+               list_del(&filter_item->list);
+               kfree(filter_item);
+       }
+       parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+       return -EINVAL;
+ fail_mem:
+       /* If any call succeeded, we still need to sync */
+       if (!fail)
+               synchronize_sched();
+       list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
+               __free_filter(filter_item->filter);
+               list_del(&filter_item->list);
+               kfree(filter_item);
+       }
+       return -ENOMEM;
 }
 
 int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
 {
-       int err;
        struct filter_parse_state *ps;
+       struct event_filter *filter;
+       struct event_filter *tmp;
+       int err = 0;
 
        mutex_lock(&event_mutex);
 
-       err = init_preds(call);
-       if (err)
-               goto out_unlock;
-
        if (!strcmp(strstrip(filter_string), "0")) {
-               filter_disable_preds(call);
-               remove_filter_string(call->filter);
+               filter_disable(call);
+               filter = call->filter;
+               if (!filter)
+                       goto out_unlock;
+               call->filter = NULL;
+               /* Make sure the filter is not being used */
+               synchronize_sched();
+               __free_filter(filter);
                goto out_unlock;
        }
 
@@ -1306,22 +1833,41 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
        if (!ps)
                goto out_unlock;
 
-       filter_disable_preds(call);
-       replace_filter_string(call->filter, filter_string);
+       filter = __alloc_filter();
+       if (!filter) {
+               kfree(ps);
+               goto out_unlock;
+       }
+
+       replace_filter_string(filter, filter_string);
 
        parse_init(ps, filter_ops, filter_string);
        err = filter_parse(ps);
        if (err) {
-               append_filter_err(ps, call->filter);
+               append_filter_err(ps, filter);
                goto out;
        }
 
-       err = replace_preds(call, call->filter, ps, filter_string, false);
-       if (err)
-               append_filter_err(ps, call->filter);
-       else
+       err = replace_preds(call, filter, ps, filter_string, false);
+       if (err) {
+               filter_disable(call);
+               append_filter_err(ps, filter);
+       } else
                call->flags |= TRACE_EVENT_FL_FILTERED;
 out:
+       /*
+        * Always swap the call filter with the new filter
+        * even if there was an error. If there was an error
+        * in the filter, we disable the filter and show the error
+        * string
+        */
+       tmp = call->filter;
+       call->filter = filter;
+       if (tmp) {
+               /* Make sure the call is done with the filter */
+               synchronize_sched();
+               __free_filter(tmp);
+       }
        filter_opstack_clear(ps);
        postfix_clear(ps);
        kfree(ps);
@@ -1334,18 +1880,21 @@ out_unlock:
 int apply_subsystem_event_filter(struct event_subsystem *system,
                                 char *filter_string)
 {
-       int err;
        struct filter_parse_state *ps;
+       struct event_filter *filter;
+       int err = 0;
 
        mutex_lock(&event_mutex);
 
-       err = init_subsystem_preds(system);
-       if (err)
-               goto out_unlock;
-
        if (!strcmp(strstrip(filter_string), "0")) {
                filter_free_subsystem_preds(system);
                remove_filter_string(system->filter);
+               filter = system->filter;
+               system->filter = NULL;
+               /* Ensure all filters are no longer used */
+               synchronize_sched();
+               filter_free_subsystem_filters(system);
+               __free_filter(filter);
                goto out_unlock;
        }
 
@@ -1354,7 +1903,17 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
        if (!ps)
                goto out_unlock;
 
-       replace_filter_string(system->filter, filter_string);
+       filter = __alloc_filter();
+       if (!filter)
+               goto out;
+
+       replace_filter_string(filter, filter_string);
+       /*
+        * No event actually uses the system filter
+        * we can free it without synchronize_sched().
+        */
+       __free_filter(system->filter);
+       system->filter = filter;
 
        parse_init(ps, filter_ops, filter_string);
        err = filter_parse(ps);
@@ -1384,7 +1943,7 @@ void ftrace_profile_free_filter(struct perf_event *event)
        struct event_filter *filter = event->filter;
 
        event->filter = NULL;
-       __free_preds(filter);
+       __free_filter(filter);
 }
 
 int ftrace_profile_set_filter(struct perf_event *event, int event_id,
@@ -1410,8 +1969,8 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
        if (event->filter)
                goto out_unlock;
 
-       filter = __alloc_preds();
-       if (IS_ERR(filter)) {
+       filter = __alloc_filter();
+       if (!filter) {
                err = PTR_ERR(filter);
                goto out_unlock;
        }
@@ -1419,7 +1978,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
        err = -ENOMEM;
        ps = kzalloc(sizeof(*ps), GFP_KERNEL);
        if (!ps)
-               goto free_preds;
+               goto free_filter;
 
        parse_init(ps, filter_ops, filter_str);
        err = filter_parse(ps);
@@ -1435,9 +1994,9 @@ free_ps:
        postfix_clear(ps);
        kfree(ps);
 
-free_preds:
+free_filter:
        if (err)
-               __free_preds(filter);
+               __free_filter(filter);
 
 out_unlock:
        mutex_unlock(&event_mutex);