2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
73 struct rsvp_session __rcu *ht[256];
78 struct rsvp_session __rcu *next;
79 __be32 dst[RSVP_DST_LEN];
80 struct tc_rsvp_gpi dpi;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu *ht[16 + 1];
90 struct rsvp_filter __rcu *next;
91 __be32 src[RSVP_DST_LEN];
92 struct tc_rsvp_gpi spi;
95 struct tcf_result res;
99 struct rsvp_session *sess;
103 static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
105 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
109 return (h ^ protocol ^ tunnelid) & 0xFF;
112 static inline unsigned int hash_src(__be32 *src)
114 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
122 #define RSVP_APPLY_RESULT() \
124 int r = tcf_exts_exec(skb, &f->exts, res); \
131 static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
132 struct tcf_result *res)
134 struct rsvp_head *head = rcu_dereference_bh(tp->root);
135 struct rsvp_session *s;
136 struct rsvp_filter *f;
142 #if RSVP_DST_LEN == 4
143 struct ipv6hdr *nhptr;
145 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
147 nhptr = ipv6_hdr(skb);
151 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
159 #if RSVP_DST_LEN == 4
160 src = &nhptr->saddr.s6_addr32[0];
161 dst = &nhptr->daddr.s6_addr32[0];
162 protocol = nhptr->nexthdr;
163 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
167 protocol = nhptr->protocol;
168 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
169 if (ip_is_fragment(nhptr))
173 h1 = hash_dst(dst, protocol, tunnelid);
176 for (s = rcu_dereference_bh(head->ht[h1]); s;
177 s = rcu_dereference_bh(s->next)) {
178 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
179 protocol == s->protocol &&
181 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
182 #if RSVP_DST_LEN == 4
183 dst[0] == s->dst[0] &&
184 dst[1] == s->dst[1] &&
185 dst[2] == s->dst[2] &&
187 tunnelid == s->tunnelid) {
189 for (f = rcu_dereference_bh(s->ht[h2]); f;
190 f = rcu_dereference_bh(f->next)) {
191 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
192 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
193 #if RSVP_DST_LEN == 4
195 src[0] == f->src[0] &&
196 src[1] == f->src[1] &&
204 if (f->tunnelhdr == 0)
207 tunnelid = f->res.classid;
208 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
213 /* And wildcard bucket... */
214 for (f = rcu_dereference_bh(s->ht[16]); f;
215 f = rcu_dereference_bh(f->next)) {
226 static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
228 struct rsvp_head *head = rtnl_dereference(tp->root);
229 struct rsvp_session *s;
230 struct rsvp_filter __rcu **ins;
231 struct rsvp_filter *pins;
232 unsigned int h1 = h & 0xFF;
233 unsigned int h2 = (h >> 8) & 0xFF;
235 for (s = rtnl_dereference(head->ht[h1]); s;
236 s = rtnl_dereference(s->next)) {
237 for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
238 ins = &pins->next, pins = rtnl_dereference(*ins)) {
239 if (pins->handle == h) {
240 RCU_INIT_POINTER(n->next, pins->next);
241 rcu_assign_pointer(*ins, n);
247 /* Something went wrong if we are trying to replace a non-existant
248 * node. Mind as well halt instead of silently failing.
253 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
255 struct rsvp_head *head = rtnl_dereference(tp->root);
256 struct rsvp_session *s;
257 struct rsvp_filter *f;
258 unsigned int h1 = handle & 0xFF;
259 unsigned int h2 = (handle >> 8) & 0xFF;
264 for (s = rtnl_dereference(head->ht[h1]); s;
265 s = rtnl_dereference(s->next)) {
266 for (f = rtnl_dereference(s->ht[h2]); f;
267 f = rtnl_dereference(f->next)) {
268 if (f->handle == handle)
269 return (unsigned long)f;
275 static int rsvp_init(struct tcf_proto *tp)
277 struct rsvp_head *data;
279 data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
281 rcu_assign_pointer(tp->root, data);
287 static void rsvp_delete_filter_rcu(struct rcu_head *head)
289 struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
291 tcf_exts_destroy(&f->exts);
295 static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
297 tcf_unbind_filter(tp, &f->res);
298 /* all classifiers are required to call tcf_exts_destroy() after rcu
299 * grace period, since converted-to-rcu actions are relying on that
300 * in cleanup() callback
302 call_rcu(&f->rcu, rsvp_delete_filter_rcu);
305 static void rsvp_destroy(struct tcf_proto *tp)
307 struct rsvp_head *data = rtnl_dereference(tp->root);
313 RCU_INIT_POINTER(tp->root, NULL);
315 for (h1 = 0; h1 < 256; h1++) {
316 struct rsvp_session *s;
318 while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
319 RCU_INIT_POINTER(data->ht[h1], s->next);
321 for (h2 = 0; h2 <= 16; h2++) {
322 struct rsvp_filter *f;
324 while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
325 rcu_assign_pointer(s->ht[h2], f->next);
326 rsvp_delete_filter(tp, f);
332 kfree_rcu(data, rcu);
335 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
337 struct rsvp_head *head = rtnl_dereference(tp->root);
338 struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
339 struct rsvp_filter __rcu **fp;
340 unsigned int h = f->handle;
341 struct rsvp_session __rcu **sp;
342 struct rsvp_session *nsp, *s = f->sess;
345 fp = &s->ht[(h >> 8) & 0xFF];
346 for (nfp = rtnl_dereference(*fp); nfp;
347 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
349 RCU_INIT_POINTER(*fp, f->next);
350 rsvp_delete_filter(tp, f);
354 for (i = 0; i <= 16; i++)
358 /* OK, session has no flows */
359 sp = &head->ht[h & 0xFF];
360 for (nsp = rtnl_dereference(*sp); nsp;
361 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
363 RCU_INIT_POINTER(*sp, s->next);
375 for (h1 = 0; h1 < 256; h1++) {
376 if (rcu_access_pointer(head->ht[h1])) {
385 static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
387 struct rsvp_head *data = rtnl_dereference(tp->root);
393 if ((data->hgenerator += 0x10000) == 0)
394 data->hgenerator = 0x10000;
395 h = data->hgenerator|salt;
396 if (rsvp_get(tp, h) == 0)
402 static int tunnel_bts(struct rsvp_head *data)
404 int n = data->tgenerator >> 5;
405 u32 b = 1 << (data->tgenerator & 0x1F);
407 if (data->tmap[n] & b)
413 static void tunnel_recycle(struct rsvp_head *data)
415 struct rsvp_session __rcu **sht = data->ht;
419 memset(tmap, 0, sizeof(tmap));
421 for (h1 = 0; h1 < 256; h1++) {
422 struct rsvp_session *s;
423 for (s = rtnl_dereference(sht[h1]); s;
424 s = rtnl_dereference(s->next)) {
425 for (h2 = 0; h2 <= 16; h2++) {
426 struct rsvp_filter *f;
428 for (f = rtnl_dereference(s->ht[h2]); f;
429 f = rtnl_dereference(f->next)) {
430 if (f->tunnelhdr == 0)
432 data->tgenerator = f->res.classid;
439 memcpy(data->tmap, tmap, sizeof(tmap));
442 static u32 gen_tunnel(struct rsvp_head *data)
446 for (k = 0; k < 2; k++) {
447 for (i = 255; i > 0; i--) {
448 if (++data->tgenerator == 0)
449 data->tgenerator = 1;
450 if (tunnel_bts(data))
451 return data->tgenerator;
453 tunnel_recycle(data);
458 static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
459 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
460 [TCA_RSVP_DST] = { .type = NLA_BINARY,
461 .len = RSVP_DST_LEN * sizeof(u32) },
462 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
463 .len = RSVP_DST_LEN * sizeof(u32) },
464 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
467 static int rsvp_change(struct net *net, struct sk_buff *in_skb,
468 struct tcf_proto *tp, unsigned long base,
471 unsigned long *arg, bool ovr)
473 struct rsvp_head *data = rtnl_dereference(tp->root);
474 struct rsvp_filter *f, *nfp;
475 struct rsvp_filter __rcu **fp;
476 struct rsvp_session *nsp, *s;
477 struct rsvp_session __rcu **sp;
478 struct tc_rsvp_pinfo *pinfo = NULL;
479 struct nlattr *opt = tca[TCA_OPTIONS];
480 struct nlattr *tb[TCA_RSVP_MAX + 1];
487 return handle ? -EINVAL : 0;
489 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL);
493 err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
496 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
500 f = (struct rsvp_filter *)*arg;
502 /* Node exists: adjust only classid */
503 struct rsvp_filter *n;
505 if (f->handle != handle && handle)
508 n = kmemdup(f, sizeof(*f), GFP_KERNEL);
514 err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
520 if (tb[TCA_RSVP_CLASSID]) {
521 n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
522 tcf_bind_filter(tp, &n->res, base);
525 tcf_exts_change(tp, &n->exts, &e);
526 rsvp_replace(tp, n, handle);
530 /* Now more serious part... */
534 if (tb[TCA_RSVP_DST] == NULL)
538 f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
542 err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
546 if (tb[TCA_RSVP_SRC]) {
547 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
548 h2 = hash_src(f->src);
550 if (tb[TCA_RSVP_PINFO]) {
551 pinfo = nla_data(tb[TCA_RSVP_PINFO]);
553 f->tunnelhdr = pinfo->tunnelhdr;
555 if (tb[TCA_RSVP_CLASSID])
556 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
558 dst = nla_data(tb[TCA_RSVP_DST]);
559 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
562 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
567 if (f->res.classid > 255)
571 if (f->res.classid == 0 &&
572 (f->res.classid = gen_tunnel(data)) == 0)
576 for (sp = &data->ht[h1];
577 (s = rtnl_dereference(*sp)) != NULL;
579 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
580 pinfo && pinfo->protocol == s->protocol &&
581 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
582 #if RSVP_DST_LEN == 4
583 dst[0] == s->dst[0] &&
584 dst[1] == s->dst[1] &&
585 dst[2] == s->dst[2] &&
587 pinfo->tunnelid == s->tunnelid) {
590 /* OK, we found appropriate session */
595 if (f->tunnelhdr == 0)
596 tcf_bind_filter(tp, &f->res, base);
598 tcf_exts_change(tp, &f->exts, &e);
601 for (nfp = rtnl_dereference(*fp); nfp;
602 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
603 __u32 mask = nfp->spi.mask & f->spi.mask;
605 if (mask != f->spi.mask)
608 RCU_INIT_POINTER(f->next, nfp);
609 rcu_assign_pointer(*fp, f);
611 *arg = (unsigned long)f;
616 /* No session found. Create new one. */
619 s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
622 memcpy(s->dst, dst, sizeof(s->dst));
626 s->protocol = pinfo->protocol;
627 s->tunnelid = pinfo->tunnelid;
630 for (nsp = rtnl_dereference(*sp); nsp;
631 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
632 if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
635 RCU_INIT_POINTER(s->next, nsp);
636 rcu_assign_pointer(*sp, s);
641 tcf_exts_destroy(&f->exts);
644 tcf_exts_destroy(&e);
648 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
650 struct rsvp_head *head = rtnl_dereference(tp->root);
656 for (h = 0; h < 256; h++) {
657 struct rsvp_session *s;
659 for (s = rtnl_dereference(head->ht[h]); s;
660 s = rtnl_dereference(s->next)) {
661 for (h1 = 0; h1 <= 16; h1++) {
662 struct rsvp_filter *f;
664 for (f = rtnl_dereference(s->ht[h1]); f;
665 f = rtnl_dereference(f->next)) {
666 if (arg->count < arg->skip) {
670 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
681 static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
682 struct sk_buff *skb, struct tcmsg *t)
684 struct rsvp_filter *f = (struct rsvp_filter *)fh;
685 struct rsvp_session *s;
687 struct tc_rsvp_pinfo pinfo;
693 t->tcm_handle = f->handle;
695 nest = nla_nest_start(skb, TCA_OPTIONS);
697 goto nla_put_failure;
699 if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
700 goto nla_put_failure;
703 pinfo.protocol = s->protocol;
704 pinfo.tunnelid = s->tunnelid;
705 pinfo.tunnelhdr = f->tunnelhdr;
707 if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
708 goto nla_put_failure;
709 if (f->res.classid &&
710 nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
711 goto nla_put_failure;
712 if (((f->handle >> 8) & 0xFF) != 16 &&
713 nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
714 goto nla_put_failure;
716 if (tcf_exts_dump(skb, &f->exts) < 0)
717 goto nla_put_failure;
719 nla_nest_end(skb, nest);
721 if (tcf_exts_dump_stats(skb, &f->exts) < 0)
722 goto nla_put_failure;
726 nla_nest_cancel(skb, nest);
730 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
732 .classify = rsvp_classify,
734 .destroy = rsvp_destroy,
736 .change = rsvp_change,
737 .delete = rsvp_delete,
740 .owner = THIS_MODULE,
743 static int __init init_rsvp(void)
745 return register_tcf_proto_ops(&RSVP_OPS);
748 static void __exit exit_rsvp(void)
750 unregister_tcf_proto_ops(&RSVP_OPS);
753 module_init(init_rsvp)
754 module_exit(exit_rsvp)