Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ip_fragment.c
Go to the documentation of this file.
1 /*
2  * INET An implementation of the TCP/IP protocol suite for the LINUX
3  * operating system. INET is implemented using the BSD Socket
4  * interface as the means of communication with the user level.
5  *
6  * The IP fragmentation functionality.
7  *
8  * Authors: Fred N. van Kempen <[email protected]>
9  * Alan Cox <[email protected]>
10  *
11  * Fixes:
12  * Alan Cox : Split from ip.c , see ip_input.c for history.
13  * David S. Miller : Begin massive cleanup...
14  * Andi Kleen : Add sysctls.
15  * xxxx : Overlapfrag bug.
16  * Ultima : ip_expire() kernel panic.
17  * Bill Hawes : Frag accounting and evictor fixes.
18  * John McDonald : 0 length frag bug.
19  * Alexey Kuznetsov: SMP races, threading, cleanup.
20  * Patrick McHardy : LRU queue of frag heads for evictor.
21  */
22 
23 #define pr_fmt(fmt) "IPv4: " fmt
24 
25 #include <linux/compiler.h>
26 #include <linux/module.h>
27 #include <linux/types.h>
28 #include <linux/mm.h>
29 #include <linux/jiffies.h>
30 #include <linux/skbuff.h>
31 #include <linux/list.h>
32 #include <linux/ip.h>
33 #include <linux/icmp.h>
34 #include <linux/netdevice.h>
35 #include <linux/jhash.h>
36 #include <linux/random.h>
37 #include <linux/slab.h>
38 #include <net/route.h>
39 #include <net/dst.h>
40 #include <net/sock.h>
41 #include <net/ip.h>
42 #include <net/icmp.h>
43 #include <net/checksum.h>
44 #include <net/inetpeer.h>
45 #include <net/inet_frag.h>
46 #include <linux/tcp.h>
47 #include <linux/udp.h>
48 #include <linux/inet.h>
49 #include <linux/netfilter_ipv4.h>
50 #include <net/inet_ecn.h>
51 
52 /* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
53  * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
54  * as well. Or notify me, at least. --ANK
55  */
56 
57 static int sysctl_ipfrag_max_dist __read_mostly = 64;
58 
60 {
61  struct inet_skb_parm h;
62  int offset;
63 };
64 
65 #define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb))
66 
67 /* Describe an entry in the "incomplete datagrams" queue. */
68 struct ipq {
70 
76  u8 ecn; /* RFC3168 support */
77  int iif;
78  unsigned int rid;
79  struct inet_peer *peer;
80 };
81 
82 /* RFC 3168 support :
83  * We want to check ECN values of all fragments, do detect invalid combinations.
84  * In ipq->ecn, we store the OR value of each ip4_frag_ecn() fragment value.
85  */
86 #define IPFRAG_ECN_NOT_ECT 0x01 /* one frag had ECN_NOT_ECT */
87 #define IPFRAG_ECN_ECT_1 0x02 /* one frag had ECN_ECT_1 */
88 #define IPFRAG_ECN_ECT_0 0x04 /* one frag had ECN_ECT_0 */
89 #define IPFRAG_ECN_CE 0x08 /* one frag had ECN_CE */
90 
91 static inline u8 ip4_frag_ecn(u8 tos)
92 {
93  return 1 << (tos & INET_ECN_MASK);
94 }
95 
96 /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
97  * Value : 0xff if frame should be dropped.
98  * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
99  */
100 static const u8 ip4_frag_ecn_table[16] = {
101  /* at least one fragment had CE, and others ECT_0 or ECT_1 */
105 
106  /* invalid combinations : drop frame */
114 };
115 
116 static struct inet_frags ip4_frags;
117 
118 int ip_frag_nqueues(struct net *net)
119 {
120  return net->ipv4.frags.nqueues;
121 }
122 
123 int ip_frag_mem(struct net *net)
124 {
125  return atomic_read(&net->ipv4.frags.mem);
126 }
127 
128 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
129  struct net_device *dev);
130 
132  struct iphdr *iph;
134 };
135 
136 static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
137 {
138  return jhash_3words((__force u32)id << 16 | prot,
139  (__force u32)saddr, (__force u32)daddr,
140  ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
141 }
142 
143 static unsigned int ip4_hashfn(struct inet_frag_queue *q)
144 {
145  struct ipq *ipq;
146 
147  ipq = container_of(q, struct ipq, q);
148  return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol);
149 }
150 
151 static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
152 {
153  struct ipq *qp;
154  struct ip4_create_arg *arg = a;
155 
156  qp = container_of(q, struct ipq, q);
157  return qp->id == arg->iph->id &&
158  qp->saddr == arg->iph->saddr &&
159  qp->daddr == arg->iph->daddr &&
160  qp->protocol == arg->iph->protocol &&
161  qp->user == arg->user;
162 }
163 
164 /* Memory Tracking Functions. */
165 static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
166 {
167  atomic_sub(skb->truesize, &nf->mem);
168  kfree_skb(skb);
169 }
170 
171 static void ip4_frag_init(struct inet_frag_queue *q, void *a)
172 {
173  struct ipq *qp = container_of(q, struct ipq, q);
174  struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
175  frags);
176  struct net *net = container_of(ipv4, struct net, ipv4);
177 
178  struct ip4_create_arg *arg = a;
179 
180  qp->protocol = arg->iph->protocol;
181  qp->id = arg->iph->id;
182  qp->ecn = ip4_frag_ecn(arg->iph->tos);
183  qp->saddr = arg->iph->saddr;
184  qp->daddr = arg->iph->daddr;
185  qp->user = arg->user;
186  qp->peer = sysctl_ipfrag_max_dist ?
187  inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL;
188 }
189 
190 static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
191 {
192  struct ipq *qp;
193 
194  qp = container_of(q, struct ipq, q);
195  if (qp->peer)
196  inet_putpeer(qp->peer);
197 }
198 
199 
200 /* Destruction primitives. */
201 
202 static __inline__ void ipq_put(struct ipq *ipq)
203 {
204  inet_frag_put(&ipq->q, &ip4_frags);
205 }
206 
207 /* Kill ipq entry. It is not destroyed immediately,
208  * because caller (and someone more) holds reference count.
209  */
210 static void ipq_kill(struct ipq *ipq)
211 {
212  inet_frag_kill(&ipq->q, &ip4_frags);
213 }
214 
215 /* Memory limiting on fragments. Evictor trashes the oldest
216  * fragment queue until we are back under the threshold.
217  */
218 static void ip_evictor(struct net *net)
219 {
220  int evicted;
221 
222  evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false);
223  if (evicted)
225 }
226 
227 /*
228  * Oops, a fragment queue timed out. Kill it and send an ICMP reply.
229  */
230 static void ip_expire(unsigned long arg)
231 {
232  struct ipq *qp;
233  struct net *net;
234 
235  qp = container_of((struct inet_frag_queue *) arg, struct ipq, q);
236  net = container_of(qp->q.net, struct net, ipv4.frags);
237 
238  spin_lock(&qp->q.lock);
239 
240  if (qp->q.last_in & INET_FRAG_COMPLETE)
241  goto out;
242 
243  ipq_kill(qp);
244 
247 
248  if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) {
249  struct sk_buff *head = qp->q.fragments;
250  const struct iphdr *iph;
251  int err;
252 
253  rcu_read_lock();
254  head->dev = dev_get_by_index_rcu(net, qp->iif);
255  if (!head->dev)
256  goto out_rcu_unlock;
257 
258  /* skb dst is stale, drop it, and perform route lookup again */
259  skb_dst_drop(head);
260  iph = ip_hdr(head);
261  err = ip_route_input_noref(head, iph->daddr, iph->saddr,
262  iph->tos, head->dev);
263  if (err)
264  goto out_rcu_unlock;
265 
266  /*
267  * Only an end host needs to send an ICMP
268  * "Fragment Reassembly Timeout" message, per RFC792.
269  */
270  if (qp->user == IP_DEFRAG_AF_PACKET ||
271  (qp->user == IP_DEFRAG_CONNTRACK_IN &&
272  skb_rtable(head)->rt_type != RTN_LOCAL))
273  goto out_rcu_unlock;
274 
275 
276  /* Send an ICMP "Fragment Reassembly Timeout" message. */
278 out_rcu_unlock:
279  rcu_read_unlock();
280  }
281 out:
282  spin_unlock(&qp->q.lock);
283  ipq_put(qp);
284 }
285 
286 /* Find the correct entry in the "incomplete datagrams" queue for
287  * this IP datagram, and create new one, if nothing is found.
288  */
289 static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
290 {
291  struct inet_frag_queue *q;
292  struct ip4_create_arg arg;
293  unsigned int hash;
294 
295  arg.iph = iph;
296  arg.user = user;
297 
298  read_lock(&ip4_frags.lock);
299  hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
300 
301  q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);
302  if (q == NULL)
303  goto out_nomem;
304 
305  return container_of(q, struct ipq, q);
306 
307 out_nomem:
308  LIMIT_NETDEBUG(KERN_ERR pr_fmt("ip_frag_create: no memory left !\n"));
309  return NULL;
310 }
311 
312 /* Is the fragment too far ahead to be part of ipq? */
313 static inline int ip_frag_too_far(struct ipq *qp)
314 {
315  struct inet_peer *peer = qp->peer;
316  unsigned int max = sysctl_ipfrag_max_dist;
317  unsigned int start, end;
318 
319  int rc;
320 
321  if (!peer || !max)
322  return 0;
323 
324  start = qp->rid;
325  end = atomic_inc_return(&peer->rid);
326  qp->rid = end;
327 
328  rc = qp->q.fragments && (end - start) > max;
329 
330  if (rc) {
331  struct net *net;
332 
333  net = container_of(qp->q.net, struct net, ipv4.frags);
335  }
336 
337  return rc;
338 }
339 
340 static int ip_frag_reinit(struct ipq *qp)
341 {
342  struct sk_buff *fp;
343 
344  if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
345  atomic_inc(&qp->q.refcnt);
346  return -ETIMEDOUT;
347  }
348 
349  fp = qp->q.fragments;
350  do {
351  struct sk_buff *xp = fp->next;
352  frag_kfree_skb(qp->q.net, fp);
353  fp = xp;
354  } while (fp);
355 
356  qp->q.last_in = 0;
357  qp->q.len = 0;
358  qp->q.meat = 0;
359  qp->q.fragments = NULL;
360  qp->q.fragments_tail = NULL;
361  qp->iif = 0;
362  qp->ecn = 0;
363 
364  return 0;
365 }
366 
367 /* Add new segment to existing queue. */
368 static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
369 {
370  struct sk_buff *prev, *next;
371  struct net_device *dev;
372  int flags, offset;
373  int ihl, end;
374  int err = -ENOENT;
375  u8 ecn;
376 
377  if (qp->q.last_in & INET_FRAG_COMPLETE)
378  goto err;
379 
380  if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
381  unlikely(ip_frag_too_far(qp)) &&
382  unlikely(err = ip_frag_reinit(qp))) {
383  ipq_kill(qp);
384  goto err;
385  }
386 
387  ecn = ip4_frag_ecn(ip_hdr(skb)->tos);
388  offset = ntohs(ip_hdr(skb)->frag_off);
389  flags = offset & ~IP_OFFSET;
390  offset &= IP_OFFSET;
391  offset <<= 3; /* offset is in 8-byte chunks */
392  ihl = ip_hdrlen(skb);
393 
394  /* Determine the position of this fragment. */
395  end = offset + skb->len - ihl;
396  err = -EINVAL;
397 
398  /* Is this the final fragment? */
399  if ((flags & IP_MF) == 0) {
400  /* If we already have some bits beyond end
401  * or have different end, the segment is corrupted.
402  */
403  if (end < qp->q.len ||
404  ((qp->q.last_in & INET_FRAG_LAST_IN) && end != qp->q.len))
405  goto err;
406  qp->q.last_in |= INET_FRAG_LAST_IN;
407  qp->q.len = end;
408  } else {
409  if (end&7) {
410  end &= ~7;
411  if (skb->ip_summed != CHECKSUM_UNNECESSARY)
412  skb->ip_summed = CHECKSUM_NONE;
413  }
414  if (end > qp->q.len) {
415  /* Some bits beyond end -> corruption. */
416  if (qp->q.last_in & INET_FRAG_LAST_IN)
417  goto err;
418  qp->q.len = end;
419  }
420  }
421  if (end == offset)
422  goto err;
423 
424  err = -ENOMEM;
425  if (pskb_pull(skb, ihl) == NULL)
426  goto err;
427 
428  err = pskb_trim_rcsum(skb, end - offset);
429  if (err)
430  goto err;
431 
432  /* Find out which fragments are in front and at the back of us
433  * in the chain of fragments so far. We must know where to put
434  * this fragment, right?
435  */
436  prev = qp->q.fragments_tail;
437  if (!prev || FRAG_CB(prev)->offset < offset) {
438  next = NULL;
439  goto found;
440  }
441  prev = NULL;
442  for (next = qp->q.fragments; next != NULL; next = next->next) {
443  if (FRAG_CB(next)->offset >= offset)
444  break; /* bingo! */
445  prev = next;
446  }
447 
448 found:
449  /* We found where to put this one. Check for overlap with
450  * preceding fragment, and, if needed, align things so that
451  * any overlaps are eliminated.
452  */
453  if (prev) {
454  int i = (FRAG_CB(prev)->offset + prev->len) - offset;
455 
456  if (i > 0) {
457  offset += i;
458  err = -EINVAL;
459  if (end <= offset)
460  goto err;
461  err = -ENOMEM;
462  if (!pskb_pull(skb, i))
463  goto err;
464  if (skb->ip_summed != CHECKSUM_UNNECESSARY)
465  skb->ip_summed = CHECKSUM_NONE;
466  }
467  }
468 
469  err = -ENOMEM;
470 
471  while (next && FRAG_CB(next)->offset < end) {
472  int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */
473 
474  if (i < next->len) {
475  /* Eat head of the next overlapped fragment
476  * and leave the loop. The next ones cannot overlap.
477  */
478  if (!pskb_pull(next, i))
479  goto err;
480  FRAG_CB(next)->offset += i;
481  qp->q.meat -= i;
482  if (next->ip_summed != CHECKSUM_UNNECESSARY)
483  next->ip_summed = CHECKSUM_NONE;
484  break;
485  } else {
486  struct sk_buff *free_it = next;
487 
488  /* Old fragment is completely overridden with
489  * new one drop it.
490  */
491  next = next->next;
492 
493  if (prev)
494  prev->next = next;
495  else
496  qp->q.fragments = next;
497 
498  qp->q.meat -= free_it->len;
499  frag_kfree_skb(qp->q.net, free_it);
500  }
501  }
502 
503  FRAG_CB(skb)->offset = offset;
504 
505  /* Insert this fragment in the chain of fragments. */
506  skb->next = next;
507  if (!next)
508  qp->q.fragments_tail = skb;
509  if (prev)
510  prev->next = skb;
511  else
512  qp->q.fragments = skb;
513 
514  dev = skb->dev;
515  if (dev) {
516  qp->iif = dev->ifindex;
517  skb->dev = NULL;
518  }
519  qp->q.stamp = skb->tstamp;
520  qp->q.meat += skb->len;
521  qp->ecn |= ecn;
522  atomic_add(skb->truesize, &qp->q.net->mem);
523  if (offset == 0)
524  qp->q.last_in |= INET_FRAG_FIRST_IN;
525 
526  if (ip_hdr(skb)->frag_off & htons(IP_DF) &&
527  skb->len + ihl > qp->q.max_size)
528  qp->q.max_size = skb->len + ihl;
529 
530  if (qp->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
531  qp->q.meat == qp->q.len)
532  return ip_frag_reasm(qp, prev, dev);
533 
534  write_lock(&ip4_frags.lock);
535  list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
536  write_unlock(&ip4_frags.lock);
537  return -EINPROGRESS;
538 
539 err:
540  kfree_skb(skb);
541  return err;
542 }
543 
544 
545 /* Build a new IP datagram from all its fragments. */
546 
547 static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
548  struct net_device *dev)
549 {
550  struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
551  struct iphdr *iph;
552  struct sk_buff *fp, *head = qp->q.fragments;
553  int len;
554  int ihlen;
555  int err;
556  int sum_truesize;
557  u8 ecn;
558 
559  ipq_kill(qp);
560 
561  ecn = ip4_frag_ecn_table[qp->ecn];
562  if (unlikely(ecn == 0xff)) {
563  err = -EINVAL;
564  goto out_fail;
565  }
566  /* Make the one we just received the head. */
567  if (prev) {
568  head = prev->next;
569  fp = skb_clone(head, GFP_ATOMIC);
570  if (!fp)
571  goto out_nomem;
572 
573  fp->next = head->next;
574  if (!fp->next)
575  qp->q.fragments_tail = fp;
576  prev->next = fp;
577 
578  skb_morph(head, qp->q.fragments);
579  head->next = qp->q.fragments->next;
580 
581  consume_skb(qp->q.fragments);
582  qp->q.fragments = head;
583  }
584 
585  WARN_ON(head == NULL);
586  WARN_ON(FRAG_CB(head)->offset != 0);
587 
588  /* Allocate a new buffer for the datagram. */
589  ihlen = ip_hdrlen(head);
590  len = ihlen + qp->q.len;
591 
592  err = -E2BIG;
593  if (len > 65535)
594  goto out_oversize;
595 
596  /* Head of list must not be cloned. */
597  if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
598  goto out_nomem;
599 
600  /* If the first fragment is fragmented itself, we split
601  * it to two chunks: the first with data and paged part
602  * and the second, holding only fragments. */
603  if (skb_has_frag_list(head)) {
604  struct sk_buff *clone;
605  int i, plen = 0;
606 
607  if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL)
608  goto out_nomem;
609  clone->next = head->next;
610  head->next = clone;
611  skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
612  skb_frag_list_init(head);
613  for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
614  plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
615  clone->len = clone->data_len = head->data_len - plen;
616  head->data_len -= clone->len;
617  head->len -= clone->len;
618  clone->csum = 0;
619  clone->ip_summed = head->ip_summed;
620  atomic_add(clone->truesize, &qp->q.net->mem);
621  }
622 
623  skb_push(head, head->data - skb_network_header(head));
624 
625  sum_truesize = head->truesize;
626  for (fp = head->next; fp;) {
627  bool headstolen;
628  int delta;
629  struct sk_buff *next = fp->next;
630 
631  sum_truesize += fp->truesize;
632  if (head->ip_summed != fp->ip_summed)
633  head->ip_summed = CHECKSUM_NONE;
634  else if (head->ip_summed == CHECKSUM_COMPLETE)
635  head->csum = csum_add(head->csum, fp->csum);
636 
637  if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
638  kfree_skb_partial(fp, headstolen);
639  } else {
640  if (!skb_shinfo(head)->frag_list)
641  skb_shinfo(head)->frag_list = fp;
642  head->data_len += fp->len;
643  head->len += fp->len;
644  head->truesize += fp->truesize;
645  }
646  fp = next;
647  }
648  atomic_sub(sum_truesize, &qp->q.net->mem);
649 
650  head->next = NULL;
651  head->dev = dev;
652  head->tstamp = qp->q.stamp;
653  IPCB(head)->frag_max_size = qp->q.max_size;
654 
655  iph = ip_hdr(head);
656  /* max_size != 0 implies at least one fragment had IP_DF set */
657  iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0;
658  iph->tot_len = htons(len);
659  iph->tos |= ecn;
661  qp->q.fragments = NULL;
662  qp->q.fragments_tail = NULL;
663  return 0;
664 
665 out_nomem:
666  LIMIT_NETDEBUG(KERN_ERR pr_fmt("queue_glue: no memory for gluing queue %p\n"),
667  qp);
668  err = -ENOMEM;
669  goto out_fail;
670 out_oversize:
671  net_info_ratelimited("Oversized IP packet from %pI4\n", &qp->saddr);
672 out_fail:
674  return err;
675 }
676 
677 /* Process an incoming IP datagram fragment. */
678 int ip_defrag(struct sk_buff *skb, u32 user)
679 {
680  struct ipq *qp;
681  struct net *net;
682 
683  net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
685 
686  /* Start by cleaning up the memory. */
687  ip_evictor(net);
688 
689  /* Lookup (or create) queue header */
690  if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) {
691  int ret;
692 
693  spin_lock(&qp->q.lock);
694 
695  ret = ip_frag_queue(qp, skb);
696 
697  spin_unlock(&qp->q.lock);
698  ipq_put(qp);
699  return ret;
700  }
701 
703  kfree_skb(skb);
704  return -ENOMEM;
705 }
707 
708 struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
709 {
710  struct iphdr iph;
711  u32 len;
712 
713  if (skb->protocol != htons(ETH_P_IP))
714  return skb;
715 
716  if (!skb_copy_bits(skb, 0, &iph, sizeof(iph)))
717  return skb;
718 
719  if (iph.ihl < 5 || iph.version != 4)
720  return skb;
721 
722  len = ntohs(iph.tot_len);
723  if (skb->len < len || len < (iph.ihl * 4))
724  return skb;
725 
726  if (ip_is_fragment(&iph)) {
727  skb = skb_share_check(skb, GFP_ATOMIC);
728  if (skb) {
729  if (!pskb_may_pull(skb, iph.ihl*4))
730  return skb;
731  if (pskb_trim_rcsum(skb, len))
732  return skb;
733  memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
734  if (ip_defrag(skb, user))
735  return NULL;
736  skb->rxhash = 0;
737  }
738  }
739  return skb;
740 }
742 
743 #ifdef CONFIG_SYSCTL
744 static int zero;
745 
746 static struct ctl_table ip4_frags_ns_ctl_table[] = {
747  {
748  .procname = "ipfrag_high_thresh",
749  .data = &init_net.ipv4.frags.high_thresh,
750  .maxlen = sizeof(int),
751  .mode = 0644,
753  },
754  {
755  .procname = "ipfrag_low_thresh",
756  .data = &init_net.ipv4.frags.low_thresh,
757  .maxlen = sizeof(int),
758  .mode = 0644,
760  },
761  {
762  .procname = "ipfrag_time",
763  .data = &init_net.ipv4.frags.timeout,
764  .maxlen = sizeof(int),
765  .mode = 0644,
767  },
768  { }
769 };
770 
771 static struct ctl_table ip4_frags_ctl_table[] = {
772  {
773  .procname = "ipfrag_secret_interval",
774  .data = &ip4_frags.secret_interval,
775  .maxlen = sizeof(int),
776  .mode = 0644,
778  },
779  {
780  .procname = "ipfrag_max_dist",
781  .data = &sysctl_ipfrag_max_dist,
782  .maxlen = sizeof(int),
783  .mode = 0644,
785  .extra1 = &zero
786  },
787  { }
788 };
789 
790 static int __net_init ip4_frags_ns_ctl_register(struct net *net)
791 {
792  struct ctl_table *table;
793  struct ctl_table_header *hdr;
794 
795  table = ip4_frags_ns_ctl_table;
796  if (!net_eq(net, &init_net)) {
797  table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL);
798  if (table == NULL)
799  goto err_alloc;
800 
801  table[0].data = &net->ipv4.frags.high_thresh;
802  table[1].data = &net->ipv4.frags.low_thresh;
803  table[2].data = &net->ipv4.frags.timeout;
804  }
805 
806  hdr = register_net_sysctl(net, "net/ipv4", table);
807  if (hdr == NULL)
808  goto err_reg;
809 
810  net->ipv4.frags_hdr = hdr;
811  return 0;
812 
813 err_reg:
814  if (!net_eq(net, &init_net))
815  kfree(table);
816 err_alloc:
817  return -ENOMEM;
818 }
819 
820 static void __net_exit ip4_frags_ns_ctl_unregister(struct net *net)
821 {
822  struct ctl_table *table;
823 
824  table = net->ipv4.frags_hdr->ctl_table_arg;
825  unregister_net_sysctl_table(net->ipv4.frags_hdr);
826  kfree(table);
827 }
828 
829 static void ip4_frags_ctl_register(void)
830 {
831  register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
832 }
833 #else
834 static inline int ip4_frags_ns_ctl_register(struct net *net)
835 {
836  return 0;
837 }
838 
839 static inline void ip4_frags_ns_ctl_unregister(struct net *net)
840 {
841 }
842 
843 static inline void ip4_frags_ctl_register(void)
844 {
845 }
846 #endif
847 
848 static int __net_init ipv4_frags_init_net(struct net *net)
849 {
850  /*
851  * Fragment cache limits. We will commit 256K at one time. Should we
852  * cross that limit we will prune down to 192K. This should cope with
853  * even the most extreme cases without allowing an attacker to
854  * measurably harm machine performance.
855  */
856  net->ipv4.frags.high_thresh = 256 * 1024;
857  net->ipv4.frags.low_thresh = 192 * 1024;
858  /*
859  * Important NOTE! Fragment queue must be destroyed before MSL expires.
860  * RFC791 is wrong proposing to prolongate timer each fragment arrival
861  * by TTL.
862  */
863  net->ipv4.frags.timeout = IP_FRAG_TIME;
864 
865  inet_frags_init_net(&net->ipv4.frags);
866 
867  return ip4_frags_ns_ctl_register(net);
868 }
869 
870 static void __net_exit ipv4_frags_exit_net(struct net *net)
871 {
872  ip4_frags_ns_ctl_unregister(net);
873  inet_frags_exit_net(&net->ipv4.frags, &ip4_frags);
874 }
875 
876 static struct pernet_operations ip4_frags_ops = {
877  .init = ipv4_frags_init_net,
878  .exit = ipv4_frags_exit_net,
879 };
880 
881 void __init ipfrag_init(void)
882 {
883  ip4_frags_ctl_register();
884  register_pernet_subsys(&ip4_frags_ops);
885  ip4_frags.hashfn = ip4_hashfn;
886  ip4_frags.constructor = ip4_frag_init;
887  ip4_frags.destructor = ip4_frag_free;
888  ip4_frags.skb_free = NULL;
889  ip4_frags.qsize = sizeof(struct ipq);
890  ip4_frags.match = ip4_frag_match;
891  ip4_frags.frag_expire = ip_expire;
892  ip4_frags.secret_interval = 10 * 60 * HZ;
893  inet_frags_init(&ip4_frags);
894 }