kernel/bpf/syscall.c

   1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of version 2 of the GNU General Public
   5  * License as published by the Free Software Foundation.
   6  *
   7  * This program is distributed in the hope that it will be useful, but
   8  * WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  10  * General Public License for more details.
  11  */
  12 #include <linux/bpf.h>
  13 #include <linux/syscalls.h>
  14 #include <linux/slab.h>
  15 #include <linux/anon_inodes.h>
  16 #include <linux/file.h>
  17 #include <linux/license.h>
  18 #include <linux/filter.h>
  19 #include <linux/version.h>
  20
  21 int sysctl_unprivileged_bpf_disabled __read_mostly;
  22
  23 static LIST_HEAD(bpf_map_types);
  24
  25 static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
  26 {
  27         struct bpf_map_type_list *tl;
  28         struct bpf_map *map;
  29
  30         list_for_each_entry(tl, &bpf_map_types, list_node) {
  31                 if (tl->type == attr->map_type) {
  32                         map = tl->ops->map_alloc(attr);
  33                         if (IS_ERR(map))
  34                                 return map;
  35                         map->ops = tl->ops;
  36                         map->map_type = attr->map_type;
  37                         return map;
  38                 }
  39         }
  40         return ERR_PTR(-EINVAL);
  41 }
  42
  43 /* boot time registration of different map implementations */
  44 void bpf_register_map_type(struct bpf_map_type_list *tl)
  45 {
  46         list_add(&tl->list_node, &bpf_map_types);
  47 }
  48
  49 static int bpf_map_charge_memlock(struct bpf_map *map)
  50 {
  51         struct user_struct *user = get_current_user();
  52         unsigned long memlock_limit;
  53
  54         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  55
  56         atomic_long_add(map->pages, &user->locked_vm);
  57
  58         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
  59                 atomic_long_sub(map->pages, &user->locked_vm);
  60                 free_uid(user);
  61                 return -EPERM;
  62         }
  63         map->user = user;
  64         return 0;
  65 }
  66
  67 static void bpf_map_uncharge_memlock(struct bpf_map *map)
  68 {
  69         struct user_struct *user = map->user;
  70
  71         atomic_long_sub(map->pages, &user->locked_vm);
  72         free_uid(user);
  73 }
  74
  75 /* called from workqueue */
  76 static void bpf_map_free_deferred(struct work_struct *work)
  77 {
  78         struct bpf_map *map = container_of(work, struct bpf_map, work);
  79
  80         bpf_map_uncharge_memlock(map);
  81         /* implementation dependent freeing */
  82         map->ops->map_free(map);
  83 }
  84
  85 static void bpf_map_put_uref(struct bpf_map *map)
  86 {
  87         if (atomic_dec_and_test(&map->usercnt)) {
  88                 if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
  89                         bpf_fd_array_map_clear(map);
  90         }
  91 }
  92
  93 /* decrement map refcnt and schedule it for freeing via workqueue
  94  * (unrelying map implementation ops->map_free() might sleep)
  95  */
  96 void bpf_map_put(struct bpf_map *map)
  97 {
  98         if (atomic_dec_and_test(&map->refcnt)) {
  99                 INIT_WORK(&map->work, bpf_map_free_deferred);
 100                 schedule_work(&map->work);
 101         }
 102 }
 103
 104 void bpf_map_put_with_uref(struct bpf_map *map)
 105 {
 106         bpf_map_put_uref(map);
 107         bpf_map_put(map);
 108 }
 109
 110 static int bpf_map_release(struct inode *inode, struct file *filp)
 111 {
 112         bpf_map_put_with_uref(filp->private_data);
 113         return 0;
 114 }
 115
 116 #ifdef CONFIG_PROC_FS
 117 static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
 118 {
 119         const struct bpf_map *map = filp->private_data;
 120
 121         seq_printf(m,
 122                    "map_type:\t%u\n"
 123                    "key_size:\t%u\n"
 124                    "value_size:\t%u\n"
 125                    "max_entries:\t%u\n",
 126                    map->map_type,
 127                    map->key_size,
 128                    map->value_size,
 129                    map->max_entries);
 130 }
 131 #endif
 132
 133 static const struct file_operations bpf_map_fops = {
 134 #ifdef CONFIG_PROC_FS
 135         .show_fdinfo    = bpf_map_show_fdinfo,
 136 #endif
 137         .release        = bpf_map_release,
 138 };
 139
 140 int bpf_map_new_fd(struct bpf_map *map)
 141 {
 142         return anon_inode_getfd("bpf-map", &bpf_map_fops, map,
 143                                 O_RDWR | O_CLOEXEC);
 144 }
 145
 146 /* helper macro to check that unused fields 'union bpf_attr' are zero */
 147 #define CHECK_ATTR(CMD) \
 148         memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
 149                    sizeof(attr->CMD##_LAST_FIELD), 0, \
 150                    sizeof(*attr) - \
 151                    offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
 152                    sizeof(attr->CMD##_LAST_FIELD)) != NULL
 153
 154 #define BPF_MAP_CREATE_LAST_FIELD max_entries
 155 /* called via syscall */
 156 static int map_create(union bpf_attr *attr)
 157 {
 158         struct bpf_map *map;
 159         int err;
 160
 161         err = CHECK_ATTR(BPF_MAP_CREATE);
 162         if (err)
 163                 return -EINVAL;
 164
 165         /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
 166         map = find_and_alloc_map(attr);
 167         if (IS_ERR(map))
 168                 return PTR_ERR(map);
 169
 170         atomic_set(&map->refcnt, 1);
 171         atomic_set(&map->usercnt, 1);
 172
 173         err = bpf_map_charge_memlock(map);
 174         if (err)
 175                 goto free_map;
 176
 177         err = bpf_map_new_fd(map);
 178         if (err < 0)
 179                 /* failed to allocate fd */
 180                 goto free_map;
 181
 182         return err;
 183
 184 free_map:
 185         map->ops->map_free(map);
 186         return err;
 187 }
 188
 189 /* if error is returned, fd is released.
 190  * On success caller should complete fd access with matching fdput()
 191  */
 192 struct bpf_map *__bpf_map_get(struct fd f)
 193 {
 194         if (!f.file)
 195                 return ERR_PTR(-EBADF);
 196         if (f.file->f_op != &bpf_map_fops) {
 197                 fdput(f);
 198                 return ERR_PTR(-EINVAL);
 199         }
 200
 201         return f.file->private_data;
 202 }
 203
 204 void bpf_map_inc(struct bpf_map *map, bool uref)
 205 {
 206         atomic_inc(&map->refcnt);
 207         if (uref)
 208                 atomic_inc(&map->usercnt);
 209 }
 210
 211 struct bpf_map *bpf_map_get_with_uref(u32 ufd)
 212 {
 213         struct fd f = fdget(ufd);
 214         struct bpf_map *map;
 215
 216         map = __bpf_map_get(f);
 217         if (IS_ERR(map))
 218                 return map;
 219
 220         bpf_map_inc(map, true);
 221         fdput(f);
 222
 223         return map;
 224 }
 225
 226 /* helper to convert user pointers passed inside __aligned_u64 fields */
 227 static void __user *u64_to_ptr(__u64 val)
 228 {
 229         return (void __user *) (unsigned long) val;
 230 }
 231
 232 /* last field in 'union bpf_attr' used by this command */
 233 #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
 234
 235 static int map_lookup_elem(union bpf_attr *attr)
 236 {
 237         void __user *ukey = u64_to_ptr(attr->key);
 238         void __user *uvalue = u64_to_ptr(attr->value);
 239         int ufd = attr->map_fd;
 240         struct bpf_map *map;
 241         void *key, *value, *ptr;
 242         u32 value_size;
 243         struct fd f;
 244         int err;
 245
 246         if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
 247                 return -EINVAL;
 248
 249         f = fdget(ufd);
 250         map = __bpf_map_get(f);
 251         if (IS_ERR(map))
 252                 return PTR_ERR(map);
 253
 254         err = -ENOMEM;
 255         key = kmalloc(map->key_size, GFP_USER);
 256         if (!key)
 257                 goto err_put;
 258
 259         err = -EFAULT;
 260         if (copy_from_user(key, ukey, map->key_size) != 0)
 261                 goto free_key;
 262
 263         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 264             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 265                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 266         else
 267                 value_size = map->value_size;
 268
 269         err = -ENOMEM;
 270         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 271         if (!value)
 272                 goto free_key;
 273
 274         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
 275                 err = bpf_percpu_hash_copy(map, key, value);
 276         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 277                 err = bpf_percpu_array_copy(map, key, value);
 278         } else {
 279                 rcu_read_lock();
 280                 ptr = map->ops->map_lookup_elem(map, key);
 281                 if (ptr)
 282                         memcpy(value, ptr, value_size);
 283                 rcu_read_unlock();
 284                 err = ptr ? 0 : -ENOENT;
 285         }
 286
 287         if (err)
 288                 goto free_value;
 289
 290         err = -EFAULT;
 291         if (copy_to_user(uvalue, value, value_size) != 0)
 292                 goto free_value;
 293
 294         err = 0;
 295
 296 free_value:
 297         kfree(value);
 298 free_key:
 299         kfree(key);
 300 err_put:
 301         fdput(f);
 302         return err;
 303 }
 304
 305 #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 306
 307 static int map_update_elem(union bpf_attr *attr)
 308 {
 309         void __user *ukey = u64_to_ptr(attr->key);
 310         void __user *uvalue = u64_to_ptr(attr->value);
 311         int ufd = attr->map_fd;
 312         struct bpf_map *map;
 313         void *key, *value;
 314         u32 value_size;
 315         struct fd f;
 316         int err;
 317
 318         if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
 319                 return -EINVAL;
 320
 321         f = fdget(ufd);
 322         map = __bpf_map_get(f);
 323         if (IS_ERR(map))
 324                 return PTR_ERR(map);
 325
 326         err = -ENOMEM;
 327         key = kmalloc(map->key_size, GFP_USER);
 328         if (!key)
 329                 goto err_put;
 330
 331         err = -EFAULT;
 332         if (copy_from_user(key, ukey, map->key_size) != 0)
 333                 goto free_key;
 334
 335         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
 336             map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 337                 value_size = round_up(map->value_size, 8) * num_possible_cpus();
 338         else
 339                 value_size = map->value_size;
 340
 341         err = -ENOMEM;
 342         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
 343         if (!value)
 344                 goto free_key;
 345
 346         err = -EFAULT;
 347         if (copy_from_user(value, uvalue, value_size) != 0)
 348                 goto free_value;
 349
 350         if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) {
 351                 err = bpf_percpu_hash_update(map, key, value, attr->flags);
 352         } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
 353                 err = bpf_percpu_array_update(map, key, value, attr->flags);
 354         } else {
 355                 rcu_read_lock();
 356                 err = map->ops->map_update_elem(map, key, value, attr->flags);
 357                 rcu_read_unlock();
 358         }
 359
 360 free_value:
 361         kfree(value);
 362 free_key:
 363         kfree(key);
 364 err_put:
 365         fdput(f);
 366         return err;
 367 }
 368
 369 #define BPF_MAP_DELETE_ELEM_LAST_FIELD key
 370
 371 static int map_delete_elem(union bpf_attr *attr)
 372 {
 373         void __user *ukey = u64_to_ptr(attr->key);
 374         int ufd = attr->map_fd;
 375         struct bpf_map *map;
 376         struct fd f;
 377         void *key;
 378         int err;
 379
 380         if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
 381                 return -EINVAL;
 382
 383         f = fdget(ufd);
 384         map = __bpf_map_get(f);
 385         if (IS_ERR(map))
 386                 return PTR_ERR(map);
 387
 388         err = -ENOMEM;
 389         key = kmalloc(map->key_size, GFP_USER);
 390         if (!key)
 391                 goto err_put;
 392
 393         err = -EFAULT;
 394         if (copy_from_user(key, ukey, map->key_size) != 0)
 395                 goto free_key;
 396
 397         rcu_read_lock();
 398         err = map->ops->map_delete_elem(map, key);
 399         rcu_read_unlock();
 400
 401 free_key:
 402         kfree(key);
 403 err_put:
 404         fdput(f);
 405         return err;
 406 }
 407
 408 /* last field in 'union bpf_attr' used by this command */
 409 #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
 410
 411 static int map_get_next_key(union bpf_attr *attr)
 412 {
 413         void __user *ukey = u64_to_ptr(attr->key);
 414         void __user *unext_key = u64_to_ptr(attr->next_key);
 415         int ufd = attr->map_fd;
 416         struct bpf_map *map;
 417         void *key, *next_key;
 418         struct fd f;
 419         int err;
 420
 421         if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
 422                 return -EINVAL;
 423
 424         f = fdget(ufd);
 425         map = __bpf_map_get(f);
 426         if (IS_ERR(map))
 427                 return PTR_ERR(map);
 428
 429         err = -ENOMEM;
 430         key = kmalloc(map->key_size, GFP_USER);
 431         if (!key)
 432                 goto err_put;
 433
 434         err = -EFAULT;
 435         if (copy_from_user(key, ukey, map->key_size) != 0)
 436                 goto free_key;
 437
 438         err = -ENOMEM;
 439         next_key = kmalloc(map->key_size, GFP_USER);
 440         if (!next_key)
 441                 goto free_key;
 442
 443         rcu_read_lock();
 444         err = map->ops->map_get_next_key(map, key, next_key);
 445         rcu_read_unlock();
 446         if (err)
 447                 goto free_next_key;
 448
 449         err = -EFAULT;
 450         if (copy_to_user(unext_key, next_key, map->key_size) != 0)
 451                 goto free_next_key;
 452
 453         err = 0;
 454
 455 free_next_key:
 456         kfree(next_key);
 457 free_key:
 458         kfree(key);
 459 err_put:
 460         fdput(f);
 461         return err;
 462 }
 463
 464 static LIST_HEAD(bpf_prog_types);
 465
 466 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
 467 {
 468         struct bpf_prog_type_list *tl;
 469
 470         list_for_each_entry(tl, &bpf_prog_types, list_node) {
 471                 if (tl->type == type) {
 472                         prog->aux->ops = tl->ops;
 473                         prog->type = type;
 474                         return 0;
 475                 }
 476         }
 477
 478         return -EINVAL;
 479 }
 480
 481 void bpf_register_prog_type(struct bpf_prog_type_list *tl)
 482 {
 483         list_add(&tl->list_node, &bpf_prog_types);
 484 }
 485
 486 /* fixup insn->imm field of bpf_call instructions:
 487  * if (insn->imm == BPF_FUNC_map_lookup_elem)
 488  *      insn->imm = bpf_map_lookup_elem - __bpf_call_base;
 489  * else if (insn->imm == BPF_FUNC_map_update_elem)
 490  *      insn->imm = bpf_map_update_elem - __bpf_call_base;
 491  * else ...
 492  *
 493  * this function is called after eBPF program passed verification
 494  */
 495 static void fixup_bpf_calls(struct bpf_prog *prog)
 496 {
 497         const struct bpf_func_proto *fn;
 498         int i;
 499
 500         for (i = 0; i < prog->len; i++) {
 501                 struct bpf_insn *insn = &prog->insnsi[i];
 502
 503                 if (insn->code == (BPF_JMP | BPF_CALL)) {
 504                         /* we reach here when program has bpf_call instructions
 505                          * and it passed bpf_check(), means that
 506                          * ops->get_func_proto must have been supplied, check it
 507                          */
 508                         BUG_ON(!prog->aux->ops->get_func_proto);
 509
 510                         if (insn->imm == BPF_FUNC_get_route_realm)
 511                                 prog->dst_needed = 1;
 512                         if (insn->imm == BPF_FUNC_get_prandom_u32)
 513                                 bpf_user_rnd_init_once();
 514                         if (insn->imm == BPF_FUNC_tail_call) {
 515                                 /* mark bpf_tail_call as different opcode
 516                                  * to avoid conditional branch in
 517                                  * interpeter for every normal call
 518                                  * and to prevent accidental JITing by
 519                                  * JIT compiler that doesn't support
 520                                  * bpf_tail_call yet
 521                                  */
 522                                 insn->imm = 0;
 523                                 insn->code |= BPF_X;
 524                                 continue;
 525                         }
 526
 527                         fn = prog->aux->ops->get_func_proto(insn->imm);
 528                         /* all functions that have prototype and verifier allowed
 529                          * programs to call them, must be real in-kernel functions
 530                          */
 531                         BUG_ON(!fn->func);
 532                         insn->imm = fn->func - __bpf_call_base;
 533                 }
 534         }
 535 }
 536
 537 /* drop refcnt on maps used by eBPF program and free auxilary data */
 538 static void free_used_maps(struct bpf_prog_aux *aux)
 539 {
 540         int i;
 541
 542         for (i = 0; i < aux->used_map_cnt; i++)
 543                 bpf_map_put(aux->used_maps[i]);
 544
 545         kfree(aux->used_maps);
 546 }
 547
 548 static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 549 {
 550         struct user_struct *user = get_current_user();
 551         unsigned long memlock_limit;
 552
 553         memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 554
 555         atomic_long_add(prog->pages, &user->locked_vm);
 556         if (atomic_long_read(&user->locked_vm) > memlock_limit) {
 557                 atomic_long_sub(prog->pages, &user->locked_vm);
 558                 free_uid(user);
 559                 return -EPERM;
 560         }
 561         prog->aux->user = user;
 562         return 0;
 563 }
 564
 565 static void bpf_prog_uncharge_memlock(struct bpf_prog *prog)
 566 {
 567         struct user_struct *user = prog->aux->user;
 568
 569         atomic_long_sub(prog->pages, &user->locked_vm);
 570         free_uid(user);
 571 }
 572
 573 static void __prog_put_common(struct rcu_head *rcu)
 574 {
 575         struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu);
 576
 577         free_used_maps(aux);
 578         bpf_prog_uncharge_memlock(aux->prog);
 579         bpf_prog_free(aux->prog);
 580 }
 581
 582 /* version of bpf_prog_put() that is called after a grace period */
 583 void bpf_prog_put_rcu(struct bpf_prog *prog)
 584 {
 585         if (atomic_dec_and_test(&prog->aux->refcnt))
 586                 call_rcu(&prog->aux->rcu, __prog_put_common);
 587 }
 588
 589 void bpf_prog_put(struct bpf_prog *prog)
 590 {
 591         if (atomic_dec_and_test(&prog->aux->refcnt))
 592                 __prog_put_common(&prog->aux->rcu);
 593 }
 594 EXPORT_SYMBOL_GPL(bpf_prog_put);
 595
 596 static int bpf_prog_release(struct inode *inode, struct file *filp)
 597 {
 598         struct bpf_prog *prog = filp->private_data;
 599
 600         bpf_prog_put_rcu(prog);
 601         return 0;
 602 }
 603
 604 static const struct file_operations bpf_prog_fops = {
 605         .release = bpf_prog_release,
 606 };
 607
 608 int bpf_prog_new_fd(struct bpf_prog *prog)
 609 {
 610         return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog,
 611                                 O_RDWR | O_CLOEXEC);
 612 }
 613
 614 static struct bpf_prog *__bpf_prog_get(struct fd f)
 615 {
 616         if (!f.file)
 617                 return ERR_PTR(-EBADF);
 618         if (f.file->f_op != &bpf_prog_fops) {
 619                 fdput(f);
 620                 return ERR_PTR(-EINVAL);
 621         }
 622
 623         return f.file->private_data;
 624 }
 625
 626 /* called by sockets/tracing/seccomp before attaching program to an event
 627  * pairs with bpf_prog_put()
 628  */
 629 struct bpf_prog *bpf_prog_get(u32 ufd)
 630 {
 631         struct fd f = fdget(ufd);
 632         struct bpf_prog *prog;
 633
 634         prog = __bpf_prog_get(f);
 635         if (IS_ERR(prog))
 636                 return prog;
 637
 638         atomic_inc(&prog->aux->refcnt);
 639         fdput(f);
 640
 641         return prog;
 642 }
 643 EXPORT_SYMBOL_GPL(bpf_prog_get);
 644
 645 /* last field in 'union bpf_attr' used by this command */
 646 #define BPF_PROG_LOAD_LAST_FIELD kern_version
 647
 648 static int bpf_prog_load(union bpf_attr *attr)
 649 {
 650         enum bpf_prog_type type = attr->prog_type;
 651         struct bpf_prog *prog;
 652         int err;
 653         char license[128];
 654         bool is_gpl;
 655
 656         if (CHECK_ATTR(BPF_PROG_LOAD))
 657                 return -EINVAL;
 658
 659         /* copy eBPF program license from user space */
 660         if (strncpy_from_user(license, u64_to_ptr(attr->license),
 661                               sizeof(license) - 1) < 0)
 662                 return -EFAULT;
 663         license[sizeof(license) - 1] = 0;
 664
 665         /* eBPF programs must be GPL compatible to use GPL-ed functions */
 666         is_gpl = license_is_gpl_compatible(license);
 667
 668         if (attr->insn_cnt >= BPF_MAXINSNS)
 669                 return -EINVAL;
 670
 671         if (type == BPF_PROG_TYPE_KPROBE &&
 672             attr->kern_version != LINUX_VERSION_CODE)
 673                 return -EINVAL;
 674
 675         if (type != BPF_PROG_TYPE_SOCKET_FILTER && !capable(CAP_SYS_ADMIN))
 676                 return -EPERM;
 677
 678         /* plain bpf_prog allocation */
 679         prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 680         if (!prog)
 681                 return -ENOMEM;
 682
 683         err = bpf_prog_charge_memlock(prog);
 684         if (err)
 685                 goto free_prog_nouncharge;
 686
 687         prog->len = attr->insn_cnt;
 688
 689         err = -EFAULT;
 690         if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
 691                            prog->len * sizeof(struct bpf_insn)) != 0)
 692                 goto free_prog;
 693
 694         prog->orig_prog = NULL;
 695         prog->jited = 0;
 696
 697         atomic_set(&prog->aux->refcnt, 1);
 698         prog->gpl_compatible = is_gpl ? 1 : 0;
 699
 700         /* find program type: socket_filter vs tracing_filter */
 701         err = find_prog_type(type, prog);
 702         if (err < 0)
 703                 goto free_prog;
 704
 705         /* run eBPF verifier */
 706         err = bpf_check(&prog, attr);
 707         if (err < 0)
 708                 goto free_used_maps;
 709
 710         /* fixup BPF_CALL->imm field */
 711         fixup_bpf_calls(prog);
 712
 713         /* eBPF program is ready to be JITed */
 714         err = bpf_prog_select_runtime(prog);
 715         if (err < 0)
 716                 goto free_used_maps;
 717
 718         err = bpf_prog_new_fd(prog);
 719         if (err < 0)
 720                 /* failed to allocate fd */
 721                 goto free_used_maps;
 722
 723         return err;
 724
 725 free_used_maps:
 726         free_used_maps(prog->aux);
 727 free_prog:
 728         bpf_prog_uncharge_memlock(prog);
 729 free_prog_nouncharge:
 730         bpf_prog_free(prog);
 731         return err;
 732 }
 733
 734 #define BPF_OBJ_LAST_FIELD bpf_fd
 735
 736 static int bpf_obj_pin(const union bpf_attr *attr)
 737 {
 738         if (CHECK_ATTR(BPF_OBJ))
 739                 return -EINVAL;
 740
 741         return bpf_obj_pin_user(attr->bpf_fd, u64_to_ptr(attr->pathname));
 742 }
 743
 744 static int bpf_obj_get(const union bpf_attr *attr)
 745 {
 746         if (CHECK_ATTR(BPF_OBJ) || attr->bpf_fd != 0)
 747                 return -EINVAL;
 748
 749         return bpf_obj_get_user(u64_to_ptr(attr->pathname));
 750 }
 751
 752 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 753 {
 754         union bpf_attr attr = {};
 755         int err;
 756
 757         if (!capable(CAP_SYS_ADMIN) && sysctl_unprivileged_bpf_disabled)
 758                 return -EPERM;
 759
 760         if (!access_ok(VERIFY_READ, uattr, 1))
 761                 return -EFAULT;
 762
 763         if (size > PAGE_SIZE)   /* silly large */
 764                 return -E2BIG;
 765
 766         /* If we're handed a bigger struct than we know of,
 767          * ensure all the unknown bits are 0 - i.e. new
 768          * user-space does not rely on any kernel feature
 769          * extensions we dont know about yet.
 770          */
 771         if (size > sizeof(attr)) {
 772                 unsigned char __user *addr;
 773                 unsigned char __user *end;
 774                 unsigned char val;
 775
 776                 addr = (void __user *)uattr + sizeof(attr);
 777                 end  = (void __user *)uattr + size;
 778
 779                 for (; addr < end; addr++) {
 780                         err = get_user(val, addr);
 781                         if (err)
 782                                 return err;
 783                         if (val)
 784                                 return -E2BIG;
 785                 }
 786                 size = sizeof(attr);
 787         }
 788
 789         /* copy attributes from user space, may be less than sizeof(bpf_attr) */
 790         if (copy_from_user(&attr, uattr, size) != 0)
 791                 return -EFAULT;
 792
 793         switch (cmd) {
 794         case BPF_MAP_CREATE:
 795                 err = map_create(&attr);
 796                 break;
 797         case BPF_MAP_LOOKUP_ELEM:
 798                 err = map_lookup_elem(&attr);
 799                 break;
 800         case BPF_MAP_UPDATE_ELEM:
 801                 err = map_update_elem(&attr);
 802                 break;
 803         case BPF_MAP_DELETE_ELEM:
 804                 err = map_delete_elem(&attr);
 805                 break;
 806         case BPF_MAP_GET_NEXT_KEY:
 807                 err = map_get_next_key(&attr);
 808                 break;
 809         case BPF_PROG_LOAD:
 810                 err = bpf_prog_load(&attr);
 811                 break;
 812         case BPF_OBJ_PIN:
 813                 err = bpf_obj_pin(&attr);
 814                 break;
 815         case BPF_OBJ_GET:
 816                 err = bpf_obj_get(&attr);
 817                 break;
 818         default:
 819                 err = -EINVAL;
 820                 break;
 821         }
 822
 823         return err;
 824 }