X-Git-Url: https://git.karo-electronics.de/?a=blobdiff_plain;f=net%2Fipv4%2Fudp.c;h=db313d96488489de2e8c0d2100176e0ab893e171;hb=b7b5f487ab39bc10ed0694af35651a03d9cb97ff;hp=21901d2c53242c3ab937e94ac3bf019747bcbb73;hpb=6516c65573fde5e421c6c92c4b180bbe2245b23b;p=karo-tx-linux.git diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 21901d2c5324..db313d964884 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -114,14 +114,33 @@ DEFINE_RWLOCK(udp_hash_lock); static int udp_port_rover; -static inline int __udp_lib_lport_inuse(__u16 num, struct hlist_head udptable[]) +/* + * Note about this hash function : + * Typical use is probably daddr = 0, only dport is going to vary hash + */ +static inline unsigned int hash_port_and_addr(__u16 port, __be32 addr) +{ + addr ^= addr >> 16; + addr ^= addr >> 8; + return port ^ addr; +} + +static inline int __udp_lib_port_inuse(unsigned int hash, int port, + __be32 daddr, struct hlist_head udptable[]) { struct sock *sk; struct hlist_node *node; + struct inet_sock *inet; - sk_for_each(sk, node, &udptable[num & (UDP_HTABLE_SIZE - 1)]) - if (sk->sk_hash == num) + sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { + if (sk->sk_hash != hash) + continue; + inet = inet_sk(sk); + if (inet->num != port) + continue; + if (inet->rcv_saddr == daddr) return 1; + } return 0; } @@ -142,6 +161,7 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, struct hlist_node *node; struct hlist_head *head; struct sock *sk2; + unsigned int hash; int error = 1; write_lock_bh(&udp_hash_lock); @@ -156,7 +176,9 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { int size; - head = &udptable[result & (UDP_HTABLE_SIZE - 1)]; + hash = hash_port_and_addr(result, + inet_sk(sk)->rcv_saddr); + head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + @@ -181,7 +203,10 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & (UDP_HTABLE_SIZE - 1)); - if (! __udp_lib_lport_inuse(result, udptable)) + hash = hash_port_and_addr(result, + inet_sk(sk)->rcv_saddr); + if (! __udp_lib_port_inuse(hash, result, + inet_sk(sk)->rcv_saddr, udptable)) break; } if (i >= (1 << 16) / UDP_HTABLE_SIZE) @@ -189,11 +214,13 @@ int __udp_lib_get_port(struct sock *sk, unsigned short snum, gotit: *port_rover = snum = result; } else { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; + hash = hash_port_and_addr(snum, inet_sk(sk)->rcv_saddr); + head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; sk_for_each(sk2, node, head) - if (sk2->sk_hash == snum && + if (sk2->sk_hash == hash && sk2 != sk && + inet_sk(sk2)->num == snum && (!sk2->sk_reuse || !sk->sk_reuse) && (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && @@ -201,9 +228,9 @@ gotit: goto fail; } inet_sk(sk)->num = snum; - sk->sk_hash = snum; + sk->sk_hash = hash; if (sk_unhashed(sk)) { - head = &udptable[snum & (UDP_HTABLE_SIZE - 1)]; + head = &udptable[hash & (UDP_HTABLE_SIZE - 1)]; sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } @@ -213,13 +240,13 @@ fail: return error; } -__inline__ int udp_get_port(struct sock *sk, unsigned short snum, +int udp_get_port(struct sock *sk, unsigned short snum, int (*scmp)(const struct sock *, const struct sock *)) { return __udp_lib_get_port(sk, snum, udp_hash, &udp_port_rover, scmp); } -inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -242,63 +269,77 @@ static struct sock *__udp4_lib_lookup(__be32 saddr, __be16 sport, { struct sock *sk, *result = NULL; struct hlist_node *node; - unsigned short hnum = ntohs(dport); - int badness = -1; + unsigned int hash, hashwild; + int score, best = -1, hport = ntohs(dport); + + hash = hash_port_and_addr(hport, daddr); + hashwild = hash_port_and_addr(hport, 0); read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + +lookup: + + sk_for_each(sk, node, &udptable[hash & (UDP_HTABLE_SIZE - 1)]) { struct inet_sock *inet = inet_sk(sk); - if (sk->sk_hash == hnum && !ipv6_only_sock(sk)) { - int score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } + if (sk->sk_hash != hash || ipv6_only_sock(sk) || + inet->num != hport) + continue; + + score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + continue; + score+=2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + continue; + score+=2; + } + if (inet->dport) { + if (inet->dport != sport) + continue; + score+=2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score+=2; + } + if (score == 9) { + result = sk; + goto found; + } else if (score > best) { + result = sk; + best = score; } } + + if (hash != hashwild) { + hash = hashwild; + goto lookup; + } +found: if (result) sock_hold(result); read_unlock(&udp_hash_lock); return result; } -static inline struct sock *udp_v4_mcast_next(struct sock *sk, - __be16 loc_port, __be32 loc_addr, +static inline struct sock *udp_v4_mcast_next(struct sock *sk, unsigned int hnum, + int hport, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif) { struct hlist_node *node; struct sock *s = sk; - unsigned short hnum = ntohs(loc_port); sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); if (s->sk_hash != hnum || + inet->num != hport || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || @@ -330,8 +371,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) struct inet_sock *inet; struct iphdr *iph = (struct iphdr*)skb->data; struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); - int type = skb->h.icmph->type; - int code = skb->h.icmph->code; + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; struct sock *sk; int harderr; int err; @@ -391,7 +432,7 @@ out: sock_put(sk); } -__inline__ void udp_err(struct sk_buff *skb, u32 info) +void udp_err(struct sk_buff *skb, u32 info) { return __udp4_lib_err(skb, info, udp_hash); } @@ -420,13 +461,14 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, int len ) { unsigned int offset; - struct udphdr *uh = skb->h.uh; + struct udphdr *uh = udp_hdr(skb); __wsum csum = 0; if (skb_queue_len(&sk->sk_write_queue) == 1) { /* * Only one fragment on the socket. */ + skb->csum_start = skb_transport_header(skb) - skb->head; skb->csum_offset = offsetof(struct udphdr, check); uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); } else { @@ -435,7 +477,7 @@ static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, * fragments on the socket so that all csums of sk_buffs * should be together */ - offset = skb->h.raw - skb->data; + offset = skb_transport_offset(skb); skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); skb->ip_summed = CHECKSUM_NONE; @@ -470,7 +512,7 @@ static int udp_push_pending_frames(struct sock *sk) /* * Create a UDP header */ - uh = skb->h.uh; + uh = udp_hdr(skb); uh->source = fl->fl_ip_sport; uh->dest = fl->fl_ip_dport; uh->len = htons(up->len); @@ -847,7 +889,7 @@ try_again: goto csum_copy_err; } - if (skb->ip_summed == CHECKSUM_UNNECESSARY) + if (skb_csum_unnecessary(skb)) err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov, copied ); else { @@ -866,8 +908,8 @@ try_again: if (sin) { sin->sin_family = AF_INET; - sin->sin_port = skb->h.uh->source; - sin->sin_addr.s_addr = skb->nh.iph->saddr; + sin->sin_port = udp_hdr(skb)->source; + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); } if (inet->cmsg_flags) @@ -949,7 +991,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) return 1; /* Now we can get the pointers */ - uh = skb->h.uh; + uh = udp_hdr(skb); udpdata = (__u8 *)uh + sizeof(struct udphdr); udpdata32 = (__be32 *)udpdata; @@ -990,7 +1032,7 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) return 0; /* Now we can update and verify the packet length... */ - iph = skb->nh.iph; + iph = ip_hdr(skb); iphlen = iph->ihl << 2; iph->tot_len = htons(ntohs(iph->tot_len) - len); if (skb->len < iphlen + len) { @@ -1002,7 +1044,8 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) * transport header to point to ESP. Keep UDP on the stack * for later. */ - skb->h.raw = skb_pull(skb, len); + __skb_pull(skb, len); + skb_reset_transport_header(skb); /* modify the protocol (it's ESP!) */ iph->protocol = IPPROTO_ESP; @@ -1127,29 +1170,45 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, __be32 saddr, __be32 daddr, struct hlist_head udptable[]) { - struct sock *sk; + struct sock *sk, *skw, *sknext; int dif; + int hport = ntohs(uh->dest); + unsigned int hash = hash_port_and_addr(hport, daddr); + unsigned int hashwild = hash_port_and_addr(hport, 0); - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); - if (sk) { - struct sock *sknext = NULL; + read_lock(&udp_hash_lock); + + sk = sk_head(&udptable[hash & (UDP_HTABLE_SIZE - 1)]); + skw = sk_head(&udptable[hashwild & (UDP_HTABLE_SIZE - 1)]); + + sk = udp_v4_mcast_next(sk, hash, hport, daddr, uh->source, saddr, dif); + if (!sk) { + hash = hashwild; + sk = udp_v4_mcast_next(skw, hash, hport, daddr, uh->source, + saddr, dif); + } + if (sk) { do { struct sk_buff *skb1 = skb; - - sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, - uh->source, saddr, dif); + sknext = udp_v4_mcast_next(sk_next(sk), hash, hport, + daddr, uh->source, saddr, dif); + if (!sknext && hash != hashwild) { + hash = hashwild; + sknext = udp_v4_mcast_next(skw, hash, hport, + daddr, uh->source, saddr, dif); + } if (sknext) skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) { int ret = udp_queue_rcv_skb(sk, skb1); if (ret > 0) - /* we should probably re-process instead - * of dropping packets here. */ + /* + * we should probably re-process + * instead of dropping packets here. + */ kfree_skb(skb1); } sk = sknext; @@ -1168,6 +1227,7 @@ static int __udp4_lib_mcast_deliver(struct sk_buff *skb, static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) { + const struct iphdr *iph; int err; UDP_SKB_CB(skb)->partial_cov = 0; @@ -1179,16 +1239,16 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, return err; } + iph = ip_hdr(skb); if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, - skb->len, proto, skb->csum)) + if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + proto, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->csum = csum_tcpudp_nofold(skb->nh.iph->saddr, - skb->nh.iph->daddr, + if (!skb_csum_unnecessary(skb)) + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, skb->len, proto, 0); /* Probably, we should checksum udp header (it should be in cache * in any case) and data in tiny packets (< rx copybreak). @@ -1205,11 +1265,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], int proto) { struct sock *sk; - struct udphdr *uh = skb->h.uh; + struct udphdr *uh = udp_hdr(skb); unsigned short ulen; struct rtable *rt = (struct rtable*)skb->dst; - __be32 saddr = skb->nh.iph->saddr; - __be32 daddr = skb->nh.iph->daddr; + __be32 saddr = ip_hdr(skb)->saddr; + __be32 daddr = ip_hdr(skb)->daddr; /* * Validate the packet. @@ -1225,7 +1285,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], /* UDP validates ulen. */ if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) goto short_packet; - uh = skb->h.uh; + uh = udp_hdr(skb); } if (udp4_csum_init(skb, uh, proto)) @@ -1235,7 +1295,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); sk = __udp4_lib_lookup(saddr, uh->source, daddr, uh->dest, - skb->dev->ifindex, udptable ); + skb->dev->ifindex, udptable); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); @@ -1296,7 +1356,7 @@ drop: return 0; } -__inline__ int udp_rcv(struct sk_buff *skb) +int udp_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); }