2 * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 Comparing to general packet classification problem,
14 RSVP needs only sevaral relatively simple rules:
16 * (dst, protocol) are always specified,
17 so that we are able to hash them.
18 * src may be exact, or may be wildcard, so that
19 we can keep a hash table plus one wildcard entry.
20 * source port (or flow label) is important only if src is given.
24 We use a two level hash table: The top level is keyed by
25 destination address and protocol ID, every bucket contains a list
26 of "rsvp sessions", identified by destination address, protocol and
27 DPI(="Destination Port ID"): triple (key, mask, offset).
29 Every bucket has a smaller hash table keyed by source address
30 (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31 Every bucket is again a list of "RSVP flows", selected by
32 source address and SPI(="Source Port ID" here rather than
33 "security parameter index"): triple (key, mask, offset).
36 NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37 and all fragmented packets go to the best-effort traffic class.
40 NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41 only one "Generalized Port Identifier". So that for classic
42 ah, esp (and udp,tcp) both *pi should coincide or one of them
45 At first sight, this redundancy is just a waste of CPU
46 resources. But DPI and SPI add the possibility to assign different
47 priorities to GPIs. Look also at note 4 about tunnels below.
50 NOTE 3. One complication is the case of tunneled packets.
51 We implement it as following: if the first lookup
52 matches a special session with "tunnelhdr" value not zero,
53 flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54 In this case, we pull tunnelhdr bytes and restart lookup
55 with tunnel ID added to the list of keys. Simple and stupid 8)8)
56 It's enough for PIMREG and IPIP.
59 NOTE 4. Two GPIs make it possible to parse even GRE packets.
60 F.e. DPI can select ETH_P_IP (and necessary flags to make
61 tunnelhdr correct) in GRE protocol field and SPI matches
62 GRE key. Is it not nice? 8)8)
65 Well, as result, despite its simplicity, we get a pretty
66 powerful classification engine. */
73 struct rsvp_session __rcu *ht[256];
78 struct rsvp_session __rcu *next;
79 __be32 dst[RSVP_DST_LEN];
80 struct tc_rsvp_gpi dpi;
83 /* 16 (src,sport) hash slots, and one wildcard source slot */
84 struct rsvp_filter __rcu *ht[16 + 1];
90 struct rsvp_filter __rcu *next;
91 __be32 src[RSVP_DST_LEN];
92 struct tc_rsvp_gpi spi;
95 struct tcf_result res;
99 struct rsvp_session *sess;
103 static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
105 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
109 return (h ^ protocol ^ tunnelid) & 0xFF;
112 static inline unsigned int hash_src(__be32 *src)
114 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
122 #define RSVP_APPLY_RESULT() \
124 int r = tcf_exts_exec(skb, &f->exts, res); \
131 static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp,
132 struct tcf_result *res)
134 struct rsvp_head *head = rcu_dereference_bh(tp->root);
135 struct rsvp_session *s;
136 struct rsvp_filter *f;
142 #if RSVP_DST_LEN == 4
143 struct ipv6hdr *nhptr;
145 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
147 nhptr = ipv6_hdr(skb);
151 if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
157 #if RSVP_DST_LEN == 4
158 src = &nhptr->saddr.s6_addr32[0];
159 dst = &nhptr->daddr.s6_addr32[0];
160 protocol = nhptr->nexthdr;
161 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
165 protocol = nhptr->protocol;
166 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
167 if (ip_is_fragment(nhptr))
171 h1 = hash_dst(dst, protocol, tunnelid);
174 for (s = rcu_dereference_bh(head->ht[h1]); s;
175 s = rcu_dereference_bh(s->next)) {
176 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
177 protocol == s->protocol &&
179 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
180 #if RSVP_DST_LEN == 4
181 dst[0] == s->dst[0] &&
182 dst[1] == s->dst[1] &&
183 dst[2] == s->dst[2] &&
185 tunnelid == s->tunnelid) {
187 for (f = rcu_dereference_bh(s->ht[h2]); f;
188 f = rcu_dereference_bh(f->next)) {
189 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
190 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
191 #if RSVP_DST_LEN == 4
193 src[0] == f->src[0] &&
194 src[1] == f->src[1] &&
202 if (f->tunnelhdr == 0)
205 tunnelid = f->res.classid;
206 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
211 /* And wildcard bucket... */
212 for (f = rcu_dereference_bh(s->ht[16]); f;
213 f = rcu_dereference_bh(f->next)) {
224 static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
226 struct rsvp_head *head = rtnl_dereference(tp->root);
227 struct rsvp_session *s;
228 struct rsvp_filter __rcu **ins;
229 struct rsvp_filter *pins;
230 unsigned int h1 = h & 0xFF;
231 unsigned int h2 = (h >> 8) & 0xFF;
233 for (s = rtnl_dereference(head->ht[h1]); s;
234 s = rtnl_dereference(s->next)) {
235 for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
236 ins = &pins->next, pins = rtnl_dereference(*ins)) {
237 if (pins->handle == h) {
238 RCU_INIT_POINTER(n->next, pins->next);
239 rcu_assign_pointer(*ins, n);
245 /* Something went wrong if we are trying to replace a non-existant
246 * node. Mind as well halt instead of silently failing.
251 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
253 struct rsvp_head *head = rtnl_dereference(tp->root);
254 struct rsvp_session *s;
255 struct rsvp_filter *f;
256 unsigned int h1 = handle & 0xFF;
257 unsigned int h2 = (handle >> 8) & 0xFF;
262 for (s = rtnl_dereference(head->ht[h1]); s;
263 s = rtnl_dereference(s->next)) {
264 for (f = rtnl_dereference(s->ht[h2]); f;
265 f = rtnl_dereference(f->next)) {
266 if (f->handle == handle)
267 return (unsigned long)f;
273 static int rsvp_init(struct tcf_proto *tp)
275 struct rsvp_head *data;
277 data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
279 rcu_assign_pointer(tp->root, data);
285 static void rsvp_delete_filter_rcu(struct rcu_head *head)
287 struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
289 tcf_exts_destroy(&f->exts);
293 static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
295 tcf_unbind_filter(tp, &f->res);
296 /* all classifiers are required to call tcf_exts_destroy() after rcu
297 * grace period, since converted-to-rcu actions are relying on that
298 * in cleanup() callback
300 call_rcu(&f->rcu, rsvp_delete_filter_rcu);
303 static void rsvp_destroy(struct tcf_proto *tp)
305 struct rsvp_head *data = rtnl_dereference(tp->root);
311 for (h1 = 0; h1 < 256; h1++) {
312 struct rsvp_session *s;
314 while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
315 RCU_INIT_POINTER(data->ht[h1], s->next);
317 for (h2 = 0; h2 <= 16; h2++) {
318 struct rsvp_filter *f;
320 while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
321 rcu_assign_pointer(s->ht[h2], f->next);
322 rsvp_delete_filter(tp, f);
328 kfree_rcu(data, rcu);
331 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last)
333 struct rsvp_head *head = rtnl_dereference(tp->root);
334 struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg;
335 struct rsvp_filter __rcu **fp;
336 unsigned int h = f->handle;
337 struct rsvp_session __rcu **sp;
338 struct rsvp_session *nsp, *s = f->sess;
341 fp = &s->ht[(h >> 8) & 0xFF];
342 for (nfp = rtnl_dereference(*fp); nfp;
343 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
345 RCU_INIT_POINTER(*fp, f->next);
346 rsvp_delete_filter(tp, f);
350 for (i = 0; i <= 16; i++)
354 /* OK, session has no flows */
355 sp = &head->ht[h & 0xFF];
356 for (nsp = rtnl_dereference(*sp); nsp;
357 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
359 RCU_INIT_POINTER(*sp, s->next);
371 for (h1 = 0; h1 < 256; h1++) {
372 if (rcu_access_pointer(head->ht[h1])) {
381 static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
383 struct rsvp_head *data = rtnl_dereference(tp->root);
389 if ((data->hgenerator += 0x10000) == 0)
390 data->hgenerator = 0x10000;
391 h = data->hgenerator|salt;
392 if (rsvp_get(tp, h) == 0)
398 static int tunnel_bts(struct rsvp_head *data)
400 int n = data->tgenerator >> 5;
401 u32 b = 1 << (data->tgenerator & 0x1F);
403 if (data->tmap[n] & b)
409 static void tunnel_recycle(struct rsvp_head *data)
411 struct rsvp_session __rcu **sht = data->ht;
415 memset(tmap, 0, sizeof(tmap));
417 for (h1 = 0; h1 < 256; h1++) {
418 struct rsvp_session *s;
419 for (s = rtnl_dereference(sht[h1]); s;
420 s = rtnl_dereference(s->next)) {
421 for (h2 = 0; h2 <= 16; h2++) {
422 struct rsvp_filter *f;
424 for (f = rtnl_dereference(s->ht[h2]); f;
425 f = rtnl_dereference(f->next)) {
426 if (f->tunnelhdr == 0)
428 data->tgenerator = f->res.classid;
435 memcpy(data->tmap, tmap, sizeof(tmap));
438 static u32 gen_tunnel(struct rsvp_head *data)
442 for (k = 0; k < 2; k++) {
443 for (i = 255; i > 0; i--) {
444 if (++data->tgenerator == 0)
445 data->tgenerator = 1;
446 if (tunnel_bts(data))
447 return data->tgenerator;
449 tunnel_recycle(data);
454 static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
455 [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
456 [TCA_RSVP_DST] = { .type = NLA_BINARY,
457 .len = RSVP_DST_LEN * sizeof(u32) },
458 [TCA_RSVP_SRC] = { .type = NLA_BINARY,
459 .len = RSVP_DST_LEN * sizeof(u32) },
460 [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
463 static int rsvp_change(struct net *net, struct sk_buff *in_skb,
464 struct tcf_proto *tp, unsigned long base,
467 unsigned long *arg, bool ovr)
469 struct rsvp_head *data = rtnl_dereference(tp->root);
470 struct rsvp_filter *f, *nfp;
471 struct rsvp_filter __rcu **fp;
472 struct rsvp_session *nsp, *s;
473 struct rsvp_session __rcu **sp;
474 struct tc_rsvp_pinfo *pinfo = NULL;
475 struct nlattr *opt = tca[TCA_OPTIONS];
476 struct nlattr *tb[TCA_RSVP_MAX + 1];
483 return handle ? -EINVAL : 0;
485 err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy, NULL);
489 err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
492 err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr);
496 f = (struct rsvp_filter *)*arg;
498 /* Node exists: adjust only classid */
499 struct rsvp_filter *n;
501 if (f->handle != handle && handle)
504 n = kmemdup(f, sizeof(*f), GFP_KERNEL);
510 err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
516 if (tb[TCA_RSVP_CLASSID]) {
517 n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
518 tcf_bind_filter(tp, &n->res, base);
521 tcf_exts_change(tp, &n->exts, &e);
522 rsvp_replace(tp, n, handle);
526 /* Now more serious part... */
530 if (tb[TCA_RSVP_DST] == NULL)
534 f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
538 err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
542 if (tb[TCA_RSVP_SRC]) {
543 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
544 h2 = hash_src(f->src);
546 if (tb[TCA_RSVP_PINFO]) {
547 pinfo = nla_data(tb[TCA_RSVP_PINFO]);
549 f->tunnelhdr = pinfo->tunnelhdr;
551 if (tb[TCA_RSVP_CLASSID])
552 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
554 dst = nla_data(tb[TCA_RSVP_DST]);
555 h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
558 if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
563 if (f->res.classid > 255)
567 if (f->res.classid == 0 &&
568 (f->res.classid = gen_tunnel(data)) == 0)
572 for (sp = &data->ht[h1];
573 (s = rtnl_dereference(*sp)) != NULL;
575 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
576 pinfo && pinfo->protocol == s->protocol &&
577 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
578 #if RSVP_DST_LEN == 4
579 dst[0] == s->dst[0] &&
580 dst[1] == s->dst[1] &&
581 dst[2] == s->dst[2] &&
583 pinfo->tunnelid == s->tunnelid) {
586 /* OK, we found appropriate session */
591 if (f->tunnelhdr == 0)
592 tcf_bind_filter(tp, &f->res, base);
594 tcf_exts_change(tp, &f->exts, &e);
597 for (nfp = rtnl_dereference(*fp); nfp;
598 fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
599 __u32 mask = nfp->spi.mask & f->spi.mask;
601 if (mask != f->spi.mask)
604 RCU_INIT_POINTER(f->next, nfp);
605 rcu_assign_pointer(*fp, f);
607 *arg = (unsigned long)f;
612 /* No session found. Create new one. */
615 s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
618 memcpy(s->dst, dst, sizeof(s->dst));
622 s->protocol = pinfo->protocol;
623 s->tunnelid = pinfo->tunnelid;
626 for (nsp = rtnl_dereference(*sp); nsp;
627 sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
628 if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
631 RCU_INIT_POINTER(s->next, nsp);
632 rcu_assign_pointer(*sp, s);
637 tcf_exts_destroy(&f->exts);
640 tcf_exts_destroy(&e);
644 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
646 struct rsvp_head *head = rtnl_dereference(tp->root);
652 for (h = 0; h < 256; h++) {
653 struct rsvp_session *s;
655 for (s = rtnl_dereference(head->ht[h]); s;
656 s = rtnl_dereference(s->next)) {
657 for (h1 = 0; h1 <= 16; h1++) {
658 struct rsvp_filter *f;
660 for (f = rtnl_dereference(s->ht[h1]); f;
661 f = rtnl_dereference(f->next)) {
662 if (arg->count < arg->skip) {
666 if (arg->fn(tp, (unsigned long)f, arg) < 0) {
677 static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
678 struct sk_buff *skb, struct tcmsg *t)
680 struct rsvp_filter *f = (struct rsvp_filter *)fh;
681 struct rsvp_session *s;
683 struct tc_rsvp_pinfo pinfo;
689 t->tcm_handle = f->handle;
691 nest = nla_nest_start(skb, TCA_OPTIONS);
693 goto nla_put_failure;
695 if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
696 goto nla_put_failure;
699 pinfo.protocol = s->protocol;
700 pinfo.tunnelid = s->tunnelid;
701 pinfo.tunnelhdr = f->tunnelhdr;
703 if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
704 goto nla_put_failure;
705 if (f->res.classid &&
706 nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
707 goto nla_put_failure;
708 if (((f->handle >> 8) & 0xFF) != 16 &&
709 nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
710 goto nla_put_failure;
712 if (tcf_exts_dump(skb, &f->exts) < 0)
713 goto nla_put_failure;
715 nla_nest_end(skb, nest);
717 if (tcf_exts_dump_stats(skb, &f->exts) < 0)
718 goto nla_put_failure;
722 nla_nest_cancel(skb, nest);
726 static struct tcf_proto_ops RSVP_OPS __read_mostly = {
728 .classify = rsvp_classify,
730 .destroy = rsvp_destroy,
732 .change = rsvp_change,
733 .delete = rsvp_delete,
736 .owner = THIS_MODULE,
739 static int __init init_rsvp(void)
741 return register_tcf_proto_ops(&RSVP_OPS);
744 static void __exit exit_rsvp(void)
746 unregister_tcf_proto_ops(&RSVP_OPS);
749 module_init(init_rsvp)
750 module_exit(exit_rsvp)