Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ip_vs_ctl.c
Go to the documentation of this file.
1 /*
2  * IPVS An implementation of the IP virtual server support for the
3  * LINUX operating system. IPVS is now implemented as a module
4  * over the NetFilter framework. IPVS can be used to build a
5  * high-performance and highly available server based on a
6  * cluster of servers.
7  *
8  * Authors: Wensong Zhang <[email protected]>
9  * Peter Kese <[email protected]>
10  * Julian Anastasov <[email protected]>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public License
14  * as published by the Free Software Foundation; either version
15  * 2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20 
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23 
24 #include <linux/module.h>
25 #include <linux/init.h>
26 #include <linux/types.h>
27 #include <linux/capability.h>
28 #include <linux/fs.h>
29 #include <linux/sysctl.h>
30 #include <linux/proc_fs.h>
31 #include <linux/workqueue.h>
32 #include <linux/swap.h>
33 #include <linux/seq_file.h>
34 #include <linux/slab.h>
35 
36 #include <linux/netfilter.h>
37 #include <linux/netfilter_ipv4.h>
38 #include <linux/mutex.h>
39 
40 #include <net/net_namespace.h>
41 #include <linux/nsproxy.h>
42 #include <net/ip.h>
43 #ifdef CONFIG_IP_VS_IPV6
44 #include <net/ipv6.h>
45 #include <net/ip6_route.h>
46 #endif
47 #include <net/route.h>
48 #include <net/sock.h>
49 #include <net/genetlink.h>
50 
51 #include <asm/uaccess.h>
52 
53 #include <net/ip_vs.h>
54 
55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
56 static DEFINE_MUTEX(__ip_vs_mutex);
57 
58 /* lock for service table */
59 static DEFINE_RWLOCK(__ip_vs_svc_lock);
60 
61 /* sysctl variables */
62 
63 #ifdef CONFIG_IP_VS_DEBUG
64 static int sysctl_ip_vs_debug_level = 0;
65 
66 int ip_vs_get_debug_level(void)
67 {
68  return sysctl_ip_vs_debug_level;
69 }
70 #endif
71 
72 
73 /* Protos */
74 static void __ip_vs_del_service(struct ip_vs_service *svc);
75 
76 
77 #ifdef CONFIG_IP_VS_IPV6
78 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
79 static bool __ip_vs_addr_is_local_v6(struct net *net,
80  const struct in6_addr *addr)
81 {
82  struct flowi6 fl6 = {
83  .daddr = *addr,
84  };
85  struct dst_entry *dst = ip6_route_output(net, NULL, &fl6);
86  bool is_local;
87 
88  is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK);
89 
90  dst_release(dst);
91  return is_local;
92 }
93 #endif
94 
95 #ifdef CONFIG_SYSCTL
96 /*
97  * update_defense_level is called from keventd and from sysctl,
98  * so it needs to protect itself from softirqs
99  */
100 static void update_defense_level(struct netns_ipvs *ipvs)
101 {
102  struct sysinfo i;
103  static int old_secure_tcp = 0;
104  int availmem;
105  int nomem;
106  int to_change = -1;
107 
108  /* we only count free and buffered memory (in pages) */
109  si_meminfo(&i);
110  availmem = i.freeram + i.bufferram;
111  /* however in linux 2.5 the i.bufferram is total page cache size,
112  we need adjust it */
113  /* si_swapinfo(&i); */
114  /* availmem = availmem - (i.totalswap - i.freeswap); */
115 
116  nomem = (availmem < ipvs->sysctl_amemthresh);
117 
119 
120  /* drop_entry */
121  spin_lock(&ipvs->dropentry_lock);
122  switch (ipvs->sysctl_drop_entry) {
123  case 0:
124  atomic_set(&ipvs->dropentry, 0);
125  break;
126  case 1:
127  if (nomem) {
128  atomic_set(&ipvs->dropentry, 1);
129  ipvs->sysctl_drop_entry = 2;
130  } else {
131  atomic_set(&ipvs->dropentry, 0);
132  }
133  break;
134  case 2:
135  if (nomem) {
136  atomic_set(&ipvs->dropentry, 1);
137  } else {
138  atomic_set(&ipvs->dropentry, 0);
139  ipvs->sysctl_drop_entry = 1;
140  };
141  break;
142  case 3:
143  atomic_set(&ipvs->dropentry, 1);
144  break;
145  }
146  spin_unlock(&ipvs->dropentry_lock);
147 
148  /* drop_packet */
149  spin_lock(&ipvs->droppacket_lock);
150  switch (ipvs->sysctl_drop_packet) {
151  case 0:
152  ipvs->drop_rate = 0;
153  break;
154  case 1:
155  if (nomem) {
156  ipvs->drop_rate = ipvs->drop_counter
157  = ipvs->sysctl_amemthresh /
158  (ipvs->sysctl_amemthresh-availmem);
159  ipvs->sysctl_drop_packet = 2;
160  } else {
161  ipvs->drop_rate = 0;
162  }
163  break;
164  case 2:
165  if (nomem) {
166  ipvs->drop_rate = ipvs->drop_counter
167  = ipvs->sysctl_amemthresh /
168  (ipvs->sysctl_amemthresh-availmem);
169  } else {
170  ipvs->drop_rate = 0;
171  ipvs->sysctl_drop_packet = 1;
172  }
173  break;
174  case 3:
175  ipvs->drop_rate = ipvs->sysctl_am_droprate;
176  break;
177  }
178  spin_unlock(&ipvs->droppacket_lock);
179 
180  /* secure_tcp */
181  spin_lock(&ipvs->securetcp_lock);
182  switch (ipvs->sysctl_secure_tcp) {
183  case 0:
184  if (old_secure_tcp >= 2)
185  to_change = 0;
186  break;
187  case 1:
188  if (nomem) {
189  if (old_secure_tcp < 2)
190  to_change = 1;
191  ipvs->sysctl_secure_tcp = 2;
192  } else {
193  if (old_secure_tcp >= 2)
194  to_change = 0;
195  }
196  break;
197  case 2:
198  if (nomem) {
199  if (old_secure_tcp < 2)
200  to_change = 1;
201  } else {
202  if (old_secure_tcp >= 2)
203  to_change = 0;
204  ipvs->sysctl_secure_tcp = 1;
205  }
206  break;
207  case 3:
208  if (old_secure_tcp < 2)
209  to_change = 1;
210  break;
211  }
212  old_secure_tcp = ipvs->sysctl_secure_tcp;
213  if (to_change >= 0)
215  ipvs->sysctl_secure_tcp > 1);
216  spin_unlock(&ipvs->securetcp_lock);
217 
218  local_bh_enable();
219 }
220 
221 
222 /*
223  * Timer for checking the defense
224  */
225 #define DEFENSE_TIMER_PERIOD 1*HZ
226 
227 static void defense_work_handler(struct work_struct *work)
228 {
229  struct netns_ipvs *ipvs =
230  container_of(work, struct netns_ipvs, defense_work.work);
231 
232  update_defense_level(ipvs);
233  if (atomic_read(&ipvs->dropentry))
235  schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
236 }
237 #endif
238 
239 int
241 {
242  return try_module_get(THIS_MODULE);
243 }
244 
245 void
247 {
248  module_put(THIS_MODULE);
249 }
250 
251 
252 /*
253  * Hash table: for virtual service lookups
254  */
255 #define IP_VS_SVC_TAB_BITS 8
256 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
257 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
258 
259 /* the service table hashed by <protocol, addr, port> */
260 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
261 /* the service table hashed by fwmark */
262 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
263 
264 
265 /*
266  * Returns hash value for virtual service
267  */
268 static inline unsigned int
269 ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
270  const union nf_inet_addr *addr, __be16 port)
271 {
272  register unsigned int porth = ntohs(port);
273  __be32 addr_fold = addr->ip;
274 
275 #ifdef CONFIG_IP_VS_IPV6
276  if (af == AF_INET6)
277  addr_fold = addr->ip6[0]^addr->ip6[1]^
278  addr->ip6[2]^addr->ip6[3];
279 #endif
280  addr_fold ^= ((size_t)net>>8);
281 
282  return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
284 }
285 
286 /*
287  * Returns hash value of fwmark for virtual service lookup
288  */
289 static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
290 {
291  return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
292 }
293 
294 /*
295  * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
296  * or in the ip_vs_svc_fwm_table by fwmark.
297  * Should be called with locked tables.
298  */
299 static int ip_vs_svc_hash(struct ip_vs_service *svc)
300 {
301  unsigned int hash;
302 
303  if (svc->flags & IP_VS_SVC_F_HASHED) {
304  pr_err("%s(): request for already hashed, called from %pF\n",
305  __func__, __builtin_return_address(0));
306  return 0;
307  }
308 
309  if (svc->fwmark == 0) {
310  /*
311  * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
312  */
313  hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
314  &svc->addr, svc->port);
315  list_add(&svc->s_list, &ip_vs_svc_table[hash]);
316  } else {
317  /*
318  * Hash it by fwmark in svc_fwm_table
319  */
320  hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
321  list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
322  }
323 
324  svc->flags |= IP_VS_SVC_F_HASHED;
325  /* increase its refcnt because it is referenced by the svc table */
326  atomic_inc(&svc->refcnt);
327  return 1;
328 }
329 
330 
331 /*
332  * Unhashes a service from svc_table / svc_fwm_table.
333  * Should be called with locked tables.
334  */
335 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
336 {
337  if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
338  pr_err("%s(): request for unhash flagged, called from %pF\n",
339  __func__, __builtin_return_address(0));
340  return 0;
341  }
342 
343  if (svc->fwmark == 0) {
344  /* Remove it from the svc_table table */
345  list_del(&svc->s_list);
346  } else {
347  /* Remove it from the svc_fwm_table table */
348  list_del(&svc->f_list);
349  }
350 
351  svc->flags &= ~IP_VS_SVC_F_HASHED;
352  atomic_dec(&svc->refcnt);
353  return 1;
354 }
355 
356 
357 /*
358  * Get service by {netns, proto,addr,port} in the service table.
359  */
360 static inline struct ip_vs_service *
361 __ip_vs_service_find(struct net *net, int af, __u16 protocol,
362  const union nf_inet_addr *vaddr, __be16 vport)
363 {
364  unsigned int hash;
365  struct ip_vs_service *svc;
366 
367  /* Check for "full" addressed entries */
368  hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
369 
370  list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
371  if ((svc->af == af)
372  && ip_vs_addr_equal(af, &svc->addr, vaddr)
373  && (svc->port == vport)
374  && (svc->protocol == protocol)
375  && net_eq(svc->net, net)) {
376  /* HIT */
377  return svc;
378  }
379  }
380 
381  return NULL;
382 }
383 
384 
385 /*
386  * Get service by {fwmark} in the service table.
387  */
388 static inline struct ip_vs_service *
389 __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
390 {
391  unsigned int hash;
392  struct ip_vs_service *svc;
393 
394  /* Check for fwmark addressed entries */
395  hash = ip_vs_svc_fwm_hashkey(net, fwmark);
396 
397  list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
398  if (svc->fwmark == fwmark && svc->af == af
399  && net_eq(svc->net, net)) {
400  /* HIT */
401  return svc;
402  }
403  }
404 
405  return NULL;
406 }
407 
408 struct ip_vs_service *
409 ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
410  const union nf_inet_addr *vaddr, __be16 vport)
411 {
412  struct ip_vs_service *svc;
413  struct netns_ipvs *ipvs = net_ipvs(net);
414 
415  read_lock(&__ip_vs_svc_lock);
416 
417  /*
418  * Check the table hashed by fwmark first
419  */
420  if (fwmark) {
421  svc = __ip_vs_svc_fwm_find(net, af, fwmark);
422  if (svc)
423  goto out;
424  }
425 
426  /*
427  * Check the table hashed by <protocol,addr,port>
428  * for "full" addressed entries
429  */
430  svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
431 
432  if (svc == NULL
433  && protocol == IPPROTO_TCP
434  && atomic_read(&ipvs->ftpsvc_counter)
435  && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
436  /*
437  * Check if ftp service entry exists, the packet
438  * might belong to FTP data connections.
439  */
440  svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
441  }
442 
443  if (svc == NULL
444  && atomic_read(&ipvs->nullsvc_counter)) {
445  /*
446  * Check if the catch-all port (port zero) exists
447  */
448  svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
449  }
450 
451  out:
452  if (svc)
453  atomic_inc(&svc->usecnt);
454  read_unlock(&__ip_vs_svc_lock);
455 
456  IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
457  fwmark, ip_vs_proto_name(protocol),
458  IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
459  svc ? "hit" : "not hit");
460 
461  return svc;
462 }
463 
464 
465 static inline void
466 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
467 {
468  atomic_inc(&svc->refcnt);
469  dest->svc = svc;
470 }
471 
472 static void
473 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
474 {
475  struct ip_vs_service *svc = dest->svc;
476 
477  dest->svc = NULL;
478  if (atomic_dec_and_test(&svc->refcnt)) {
479  IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
480  svc->fwmark,
481  IP_VS_DBG_ADDR(svc->af, &svc->addr),
482  ntohs(svc->port), atomic_read(&svc->usecnt));
483  free_percpu(svc->stats.cpustats);
484  kfree(svc);
485  }
486 }
487 
488 
489 /*
490  * Returns hash value for real service
491  */
492 static inline unsigned int ip_vs_rs_hashkey(int af,
493  const union nf_inet_addr *addr,
494  __be16 port)
495 {
496  register unsigned int porth = ntohs(port);
497  __be32 addr_fold = addr->ip;
498 
499 #ifdef CONFIG_IP_VS_IPV6
500  if (af == AF_INET6)
501  addr_fold = addr->ip6[0]^addr->ip6[1]^
502  addr->ip6[2]^addr->ip6[3];
503 #endif
504 
505  return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
506  & IP_VS_RTAB_MASK;
507 }
508 
509 /*
510  * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
511  * should be called with locked tables.
512  */
513 static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
514 {
515  unsigned int hash;
516 
517  if (!list_empty(&dest->d_list)) {
518  return 0;
519  }
520 
521  /*
522  * Hash by proto,addr,port,
523  * which are the parameters of the real service.
524  */
525  hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
526 
527  list_add(&dest->d_list, &ipvs->rs_table[hash]);
528 
529  return 1;
530 }
531 
532 /*
533  * UNhashes ip_vs_dest from rs_table.
534  * should be called with locked tables.
535  */
536 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
537 {
538  /*
539  * Remove it from the rs_table table.
540  */
541  if (!list_empty(&dest->d_list)) {
542  list_del_init(&dest->d_list);
543  }
544 
545  return 1;
546 }
547 
548 /*
549  * Lookup real service by <proto,addr,port> in the real service table.
550  */
551 struct ip_vs_dest *
552 ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
553  const union nf_inet_addr *daddr,
554  __be16 dport)
555 {
556  struct netns_ipvs *ipvs = net_ipvs(net);
557  unsigned int hash;
558  struct ip_vs_dest *dest;
559 
560  /*
561  * Check for "full" addressed entries
562  * Return the first found entry
563  */
564  hash = ip_vs_rs_hashkey(af, daddr, dport);
565 
566  read_lock(&ipvs->rs_lock);
567  list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
568  if ((dest->af == af)
569  && ip_vs_addr_equal(af, &dest->addr, daddr)
570  && (dest->port == dport)
571  && ((dest->protocol == protocol) ||
572  dest->vfwmark)) {
573  /* HIT */
574  read_unlock(&ipvs->rs_lock);
575  return dest;
576  }
577  }
578  read_unlock(&ipvs->rs_lock);
579 
580  return NULL;
581 }
582 
583 /*
584  * Lookup destination by {addr,port} in the given service
585  */
586 static struct ip_vs_dest *
587 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
588  __be16 dport)
589 {
590  struct ip_vs_dest *dest;
591 
592  /*
593  * Find the destination for the given service
594  */
595  list_for_each_entry(dest, &svc->destinations, n_list) {
596  if ((dest->af == svc->af)
597  && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
598  && (dest->port == dport)) {
599  /* HIT */
600  return dest;
601  }
602  }
603 
604  return NULL;
605 }
606 
607 /*
608  * Find destination by {daddr,dport,vaddr,protocol}
609  * Cretaed to be used in ip_vs_process_message() in
610  * the backup synchronization daemon. It finds the
611  * destination to be bound to the received connection
612  * on the backup.
613  *
614  * ip_vs_lookup_real_service() looked promissing, but
615  * seems not working as expected.
616  */
617 struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
618  const union nf_inet_addr *daddr,
619  __be16 dport,
620  const union nf_inet_addr *vaddr,
621  __be16 vport, __u16 protocol, __u32 fwmark,
622  __u32 flags)
623 {
624  struct ip_vs_dest *dest;
625  struct ip_vs_service *svc;
626  __be16 port = dport;
627 
628  svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
629  if (!svc)
630  return NULL;
631  if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
632  port = 0;
633  dest = ip_vs_lookup_dest(svc, daddr, port);
634  if (!dest)
635  dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);
636  if (dest)
637  atomic_inc(&dest->refcnt);
638  ip_vs_service_put(svc);
639  return dest;
640 }
641 
642 /*
643  * Lookup dest by {svc,addr,port} in the destination trash.
644  * The destination trash is used to hold the destinations that are removed
645  * from the service table but are still referenced by some conn entries.
646  * The reason to add the destination trash is when the dest is temporary
647  * down (either by administrator or by monitor program), the dest can be
648  * picked back from the trash, the remaining connections to the dest can
649  * continue, and the counting information of the dest is also useful for
650  * scheduling.
651  */
652 static struct ip_vs_dest *
653 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
654  __be16 dport)
655 {
656  struct ip_vs_dest *dest, *nxt;
657  struct netns_ipvs *ipvs = net_ipvs(svc->net);
658 
659  /*
660  * Find the destination in trash
661  */
662  list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
663  IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
664  "dest->refcnt=%d\n",
665  dest->vfwmark,
666  IP_VS_DBG_ADDR(svc->af, &dest->addr),
667  ntohs(dest->port),
668  atomic_read(&dest->refcnt));
669  if (dest->af == svc->af &&
670  ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
671  dest->port == dport &&
672  dest->vfwmark == svc->fwmark &&
673  dest->protocol == svc->protocol &&
674  (svc->fwmark ||
675  (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
676  dest->vport == svc->port))) {
677  /* HIT */
678  return dest;
679  }
680 
681  /*
682  * Try to purge the destination from trash if not referenced
683  */
684  if (atomic_read(&dest->refcnt) == 1) {
685  IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
686  "from trash\n",
687  dest->vfwmark,
688  IP_VS_DBG_ADDR(svc->af, &dest->addr),
689  ntohs(dest->port));
690  list_del(&dest->n_list);
691  ip_vs_dst_reset(dest);
692  __ip_vs_unbind_svc(dest);
693  free_percpu(dest->stats.cpustats);
694  kfree(dest);
695  }
696  }
697 
698  return NULL;
699 }
700 
701 
702 /*
703  * Clean up all the destinations in the trash
704  * Called by the ip_vs_control_cleanup()
705  *
706  * When the ip_vs_control_clearup is activated by ipvs module exit,
707  * the service tables must have been flushed and all the connections
708  * are expired, and the refcnt of each destination in the trash must
709  * be 1, so we simply release them here.
710  */
711 static void ip_vs_trash_cleanup(struct net *net)
712 {
713  struct ip_vs_dest *dest, *nxt;
714  struct netns_ipvs *ipvs = net_ipvs(net);
715 
716  list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, n_list) {
717  list_del(&dest->n_list);
718  ip_vs_dst_reset(dest);
719  __ip_vs_unbind_svc(dest);
720  free_percpu(dest->stats.cpustats);
721  kfree(dest);
722  }
723 }
724 
725 static void
726 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
727 {
728 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
729 
730  spin_lock_bh(&src->lock);
731 
733  IP_VS_SHOW_STATS_COUNTER(inpkts);
734  IP_VS_SHOW_STATS_COUNTER(outpkts);
735  IP_VS_SHOW_STATS_COUNTER(inbytes);
736  IP_VS_SHOW_STATS_COUNTER(outbytes);
737 
738  ip_vs_read_estimator(dst, src);
739 
740  spin_unlock_bh(&src->lock);
741 }
742 
743 static void
744 ip_vs_zero_stats(struct ip_vs_stats *stats)
745 {
746  spin_lock_bh(&stats->lock);
747 
748  /* get current counters as zero point, rates are zeroed */
749 
750 #define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
751 
753  IP_VS_ZERO_STATS_COUNTER(inpkts);
754  IP_VS_ZERO_STATS_COUNTER(outpkts);
755  IP_VS_ZERO_STATS_COUNTER(inbytes);
756  IP_VS_ZERO_STATS_COUNTER(outbytes);
757 
758  ip_vs_zero_estimator(stats);
759 
760  spin_unlock_bh(&stats->lock);
761 }
762 
763 /*
764  * Update a destination in the given service
765  */
766 static void
767 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
768  struct ip_vs_dest_user_kern *udest, int add)
769 {
770  struct netns_ipvs *ipvs = net_ipvs(svc->net);
771  int conn_flags;
772 
773  /* set the weight and the flags */
774  atomic_set(&dest->weight, udest->weight);
775  conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
776  conn_flags |= IP_VS_CONN_F_INACTIVE;
777 
778  /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
779  if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
780  conn_flags |= IP_VS_CONN_F_NOOUTPUT;
781  } else {
782  /*
783  * Put the real service in rs_table if not present.
784  * For now only for NAT!
785  */
786  write_lock_bh(&ipvs->rs_lock);
787  ip_vs_rs_hash(ipvs, dest);
788  write_unlock_bh(&ipvs->rs_lock);
789  }
790  atomic_set(&dest->conn_flags, conn_flags);
791 
792  /* bind the service */
793  if (!dest->svc) {
794  __ip_vs_bind_svc(dest, svc);
795  } else {
796  if (dest->svc != svc) {
797  __ip_vs_unbind_svc(dest);
798  ip_vs_zero_stats(&dest->stats);
799  __ip_vs_bind_svc(dest, svc);
800  }
801  }
802 
803  /* set the dest status flags */
804  dest->flags |= IP_VS_DEST_F_AVAILABLE;
805 
806  if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
807  dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
808  dest->u_threshold = udest->u_threshold;
809  dest->l_threshold = udest->l_threshold;
810 
811  spin_lock_bh(&dest->dst_lock);
812  ip_vs_dst_reset(dest);
813  spin_unlock_bh(&dest->dst_lock);
814 
815  if (add)
816  ip_vs_start_estimator(svc->net, &dest->stats);
817 
818  write_lock_bh(&__ip_vs_svc_lock);
819 
820  /* Wait until all other svc users go away */
821  IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
822 
823  if (add) {
824  list_add(&dest->n_list, &svc->destinations);
825  svc->num_dests++;
826  }
827 
828  /* call the update_service, because server weight may be changed */
829  if (svc->scheduler->update_service)
830  svc->scheduler->update_service(svc);
831 
832  write_unlock_bh(&__ip_vs_svc_lock);
833 }
834 
835 
836 /*
837  * Create a destination for the given service
838  */
839 static int
840 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
841  struct ip_vs_dest **dest_p)
842 {
843  struct ip_vs_dest *dest;
844  unsigned int atype;
845 
846  EnterFunction(2);
847 
848 #ifdef CONFIG_IP_VS_IPV6
849  if (svc->af == AF_INET6) {
850  atype = ipv6_addr_type(&udest->addr.in6);
851  if ((!(atype & IPV6_ADDR_UNICAST) ||
852  atype & IPV6_ADDR_LINKLOCAL) &&
853  !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
854  return -EINVAL;
855  } else
856 #endif
857  {
858  atype = inet_addr_type(svc->net, udest->addr.ip);
859  if (atype != RTN_LOCAL && atype != RTN_UNICAST)
860  return -EINVAL;
861  }
862 
863  dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
864  if (dest == NULL)
865  return -ENOMEM;
866 
867  dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
868  if (!dest->stats.cpustats)
869  goto err_alloc;
870 
871  dest->af = svc->af;
872  dest->protocol = svc->protocol;
873  dest->vaddr = svc->addr;
874  dest->vport = svc->port;
875  dest->vfwmark = svc->fwmark;
876  ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
877  dest->port = udest->port;
878 
879  atomic_set(&dest->activeconns, 0);
880  atomic_set(&dest->inactconns, 0);
881  atomic_set(&dest->persistconns, 0);
882  atomic_set(&dest->refcnt, 1);
883 
884  INIT_LIST_HEAD(&dest->d_list);
885  spin_lock_init(&dest->dst_lock);
886  spin_lock_init(&dest->stats.lock);
887  __ip_vs_update_dest(svc, dest, udest, 1);
888 
889  *dest_p = dest;
890 
891  LeaveFunction(2);
892  return 0;
893 
894 err_alloc:
895  kfree(dest);
896  return -ENOMEM;
897 }
898 
899 
900 /*
901  * Add a destination into an existing service
902  */
903 static int
904 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
905 {
906  struct ip_vs_dest *dest;
907  union nf_inet_addr daddr;
908  __be16 dport = udest->port;
909  int ret;
910 
911  EnterFunction(2);
912 
913  if (udest->weight < 0) {
914  pr_err("%s(): server weight less than zero\n", __func__);
915  return -ERANGE;
916  }
917 
918  if (udest->l_threshold > udest->u_threshold) {
919  pr_err("%s(): lower threshold is higher than upper threshold\n",
920  __func__);
921  return -ERANGE;
922  }
923 
924  ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
925 
926  /*
927  * Check if the dest already exists in the list
928  */
929  dest = ip_vs_lookup_dest(svc, &daddr, dport);
930 
931  if (dest != NULL) {
932  IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
933  return -EEXIST;
934  }
935 
936  /*
937  * Check if the dest already exists in the trash and
938  * is from the same service
939  */
940  dest = ip_vs_trash_get_dest(svc, &daddr, dport);
941 
942  if (dest != NULL) {
943  IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
944  "dest->refcnt=%d, service %u/%s:%u\n",
945  IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
946  atomic_read(&dest->refcnt),
947  dest->vfwmark,
948  IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
949  ntohs(dest->vport));
950 
951  /*
952  * Get the destination from the trash
953  */
954  list_del(&dest->n_list);
955 
956  __ip_vs_update_dest(svc, dest, udest, 1);
957  ret = 0;
958  } else {
959  /*
960  * Allocate and initialize the dest structure
961  */
962  ret = ip_vs_new_dest(svc, udest, &dest);
963  }
964  LeaveFunction(2);
965 
966  return ret;
967 }
968 
969 
970 /*
971  * Edit a destination in the given service
972  */
973 static int
974 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
975 {
976  struct ip_vs_dest *dest;
977  union nf_inet_addr daddr;
978  __be16 dport = udest->port;
979 
980  EnterFunction(2);
981 
982  if (udest->weight < 0) {
983  pr_err("%s(): server weight less than zero\n", __func__);
984  return -ERANGE;
985  }
986 
987  if (udest->l_threshold > udest->u_threshold) {
988  pr_err("%s(): lower threshold is higher than upper threshold\n",
989  __func__);
990  return -ERANGE;
991  }
992 
993  ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
994 
995  /*
996  * Lookup the destination list
997  */
998  dest = ip_vs_lookup_dest(svc, &daddr, dport);
999 
1000  if (dest == NULL) {
1001  IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1002  return -ENOENT;
1003  }
1004 
1005  __ip_vs_update_dest(svc, dest, udest, 0);
1006  LeaveFunction(2);
1007 
1008  return 0;
1009 }
1010 
1011 
1012 /*
1013  * Delete a destination (must be already unlinked from the service)
1014  */
1015 static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
1016 {
1017  struct netns_ipvs *ipvs = net_ipvs(net);
1018 
1019  ip_vs_stop_estimator(net, &dest->stats);
1020 
1021  /*
1022  * Remove it from the d-linked list with the real services.
1023  */
1024  write_lock_bh(&ipvs->rs_lock);
1025  ip_vs_rs_unhash(dest);
1026  write_unlock_bh(&ipvs->rs_lock);
1027 
1028  /*
1029  * Decrease the refcnt of the dest, and free the dest
1030  * if nobody refers to it (refcnt=0). Otherwise, throw
1031  * the destination into the trash.
1032  */
1033  if (atomic_dec_and_test(&dest->refcnt)) {
1034  IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1035  dest->vfwmark,
1036  IP_VS_DBG_ADDR(dest->af, &dest->addr),
1037  ntohs(dest->port));
1038  ip_vs_dst_reset(dest);
1039  /* simply decrease svc->refcnt here, let the caller check
1040  and release the service if nobody refers to it.
1041  Only user context can release destination and service,
1042  and only one user context can update virtual service at a
1043  time, so the operation here is OK */
1044  atomic_dec(&dest->svc->refcnt);
1045  free_percpu(dest->stats.cpustats);
1046  kfree(dest);
1047  } else {
1048  IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1049  "dest->refcnt=%d\n",
1050  IP_VS_DBG_ADDR(dest->af, &dest->addr),
1051  ntohs(dest->port),
1052  atomic_read(&dest->refcnt));
1053  list_add(&dest->n_list, &ipvs->dest_trash);
1054  atomic_inc(&dest->refcnt);
1055  }
1056 }
1057 
1058 
1059 /*
1060  * Unlink a destination from the given service
1061  */
1062 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1063  struct ip_vs_dest *dest,
1064  int svcupd)
1065 {
1066  dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1067 
1068  /*
1069  * Remove it from the d-linked destination list.
1070  */
1071  list_del(&dest->n_list);
1072  svc->num_dests--;
1073 
1074  /*
1075  * Call the update_service function of its scheduler
1076  */
1077  if (svcupd && svc->scheduler->update_service)
1078  svc->scheduler->update_service(svc);
1079 }
1080 
1081 
1082 /*
1083  * Delete a destination server in the given service
1084  */
1085 static int
1086 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1087 {
1088  struct ip_vs_dest *dest;
1089  __be16 dport = udest->port;
1090 
1091  EnterFunction(2);
1092 
1093  dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1094 
1095  if (dest == NULL) {
1096  IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1097  return -ENOENT;
1098  }
1099 
1100  write_lock_bh(&__ip_vs_svc_lock);
1101 
1102  /*
1103  * Wait until all other svc users go away.
1104  */
1105  IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1106 
1107  /*
1108  * Unlink dest from the service
1109  */
1110  __ip_vs_unlink_dest(svc, dest, 1);
1111 
1112  write_unlock_bh(&__ip_vs_svc_lock);
1113 
1114  /*
1115  * Delete the destination
1116  */
1117  __ip_vs_del_dest(svc->net, dest);
1118 
1119  LeaveFunction(2);
1120 
1121  return 0;
1122 }
1123 
1124 
1125 /*
1126  * Add a service into the service hash table
1127  */
1128 static int
1129 ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
1130  struct ip_vs_service **svc_p)
1131 {
1132  int ret = 0;
1133  struct ip_vs_scheduler *sched = NULL;
1134  struct ip_vs_pe *pe = NULL;
1135  struct ip_vs_service *svc = NULL;
1136  struct netns_ipvs *ipvs = net_ipvs(net);
1137 
1138  /* increase the module use count */
1140 
1141  /* Lookup the scheduler by 'u->sched_name' */
1142  sched = ip_vs_scheduler_get(u->sched_name);
1143  if (sched == NULL) {
1144  pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1145  ret = -ENOENT;
1146  goto out_err;
1147  }
1148 
1149  if (u->pe_name && *u->pe_name) {
1150  pe = ip_vs_pe_getbyname(u->pe_name);
1151  if (pe == NULL) {
1152  pr_info("persistence engine module ip_vs_pe_%s "
1153  "not found\n", u->pe_name);
1154  ret = -ENOENT;
1155  goto out_err;
1156  }
1157  }
1158 
1159 #ifdef CONFIG_IP_VS_IPV6
1160  if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1161  ret = -EINVAL;
1162  goto out_err;
1163  }
1164 #endif
1165 
1166  svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1167  if (svc == NULL) {
1168  IP_VS_DBG(1, "%s(): no memory\n", __func__);
1169  ret = -ENOMEM;
1170  goto out_err;
1171  }
1172  svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
1173  if (!svc->stats.cpustats) {
1174  ret = -ENOMEM;
1175  goto out_err;
1176  }
1177 
1178  /* I'm the first user of the service */
1179  atomic_set(&svc->usecnt, 0);
1180  atomic_set(&svc->refcnt, 0);
1181 
1182  svc->af = u->af;
1183  svc->protocol = u->protocol;
1184  ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1185  svc->port = u->port;
1186  svc->fwmark = u->fwmark;
1187  svc->flags = u->flags;
1188  svc->timeout = u->timeout * HZ;
1189  svc->netmask = u->netmask;
1190  svc->net = net;
1191 
1192  INIT_LIST_HEAD(&svc->destinations);
1193  rwlock_init(&svc->sched_lock);
1194  spin_lock_init(&svc->stats.lock);
1195 
1196  /* Bind the scheduler */
1197  ret = ip_vs_bind_scheduler(svc, sched);
1198  if (ret)
1199  goto out_err;
1200  sched = NULL;
1201 
1202  /* Bind the ct retriever */
1203  ip_vs_bind_pe(svc, pe);
1204  pe = NULL;
1205 
1206  /* Update the virtual service counters */
1207  if (svc->port == FTPPORT)
1208  atomic_inc(&ipvs->ftpsvc_counter);
1209  else if (svc->port == 0)
1210  atomic_inc(&ipvs->nullsvc_counter);
1211 
1212  ip_vs_start_estimator(net, &svc->stats);
1213 
1214  /* Count only IPv4 services for old get/setsockopt interface */
1215  if (svc->af == AF_INET)
1216  ipvs->num_services++;
1217 
1218  /* Hash the service into the service table */
1219  write_lock_bh(&__ip_vs_svc_lock);
1220  ip_vs_svc_hash(svc);
1221  write_unlock_bh(&__ip_vs_svc_lock);
1222 
1223  *svc_p = svc;
1224  /* Now there is a service - full throttle */
1225  ipvs->enable = 1;
1226  return 0;
1227 
1228 
1229  out_err:
1230  if (svc != NULL) {
1232  if (svc->inc) {
1233  local_bh_disable();
1234  ip_vs_app_inc_put(svc->inc);
1235  local_bh_enable();
1236  }
1237  if (svc->stats.cpustats)
1238  free_percpu(svc->stats.cpustats);
1239  kfree(svc);
1240  }
1241  ip_vs_scheduler_put(sched);
1242  ip_vs_pe_put(pe);
1243 
1244  /* decrease the module use count */
1246 
1247  return ret;
1248 }
1249 
1250 
1251 /*
1252  * Edit a service and bind it with a new scheduler
1253  */
1254 static int
1255 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1256 {
1257  struct ip_vs_scheduler *sched, *old_sched;
1258  struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1259  int ret = 0;
1260 
1261  /*
1262  * Lookup the scheduler, by 'u->sched_name'
1263  */
1264  sched = ip_vs_scheduler_get(u->sched_name);
1265  if (sched == NULL) {
1266  pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1267  return -ENOENT;
1268  }
1269  old_sched = sched;
1270 
1271  if (u->pe_name && *u->pe_name) {
1272  pe = ip_vs_pe_getbyname(u->pe_name);
1273  if (pe == NULL) {
1274  pr_info("persistence engine module ip_vs_pe_%s "
1275  "not found\n", u->pe_name);
1276  ret = -ENOENT;
1277  goto out;
1278  }
1279  old_pe = pe;
1280  }
1281 
1282 #ifdef CONFIG_IP_VS_IPV6
1283  if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1284  ret = -EINVAL;
1285  goto out;
1286  }
1287 #endif
1288 
1289  write_lock_bh(&__ip_vs_svc_lock);
1290 
1291  /*
1292  * Wait until all other svc users go away.
1293  */
1294  IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1295 
1296  /*
1297  * Set the flags and timeout value
1298  */
1299  svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1300  svc->timeout = u->timeout * HZ;
1301  svc->netmask = u->netmask;
1302 
1303  old_sched = svc->scheduler;
1304  if (sched != old_sched) {
1305  /*
1306  * Unbind the old scheduler
1307  */
1308  if ((ret = ip_vs_unbind_scheduler(svc))) {
1309  old_sched = sched;
1310  goto out_unlock;
1311  }
1312 
1313  /*
1314  * Bind the new scheduler
1315  */
1316  if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1317  /*
1318  * If ip_vs_bind_scheduler fails, restore the old
1319  * scheduler.
1320  * The main reason of failure is out of memory.
1321  *
1322  * The question is if the old scheduler can be
1323  * restored all the time. TODO: if it cannot be
1324  * restored some time, we must delete the service,
1325  * otherwise the system may crash.
1326  */
1327  ip_vs_bind_scheduler(svc, old_sched);
1328  old_sched = sched;
1329  goto out_unlock;
1330  }
1331  }
1332 
1333  old_pe = svc->pe;
1334  if (pe != old_pe) {
1335  ip_vs_unbind_pe(svc);
1336  ip_vs_bind_pe(svc, pe);
1337  }
1338 
1339 out_unlock:
1340  write_unlock_bh(&__ip_vs_svc_lock);
1341 out:
1342  ip_vs_scheduler_put(old_sched);
1343  ip_vs_pe_put(old_pe);
1344  return ret;
1345 }
1346 
1347 
1348 /*
1349  * Delete a service from the service list
1350  * - The service must be unlinked, unlocked and not referenced!
1351  * - We are called under _bh lock
1352  */
1353 static void __ip_vs_del_service(struct ip_vs_service *svc)
1354 {
1355  struct ip_vs_dest *dest, *nxt;
1356  struct ip_vs_scheduler *old_sched;
1357  struct ip_vs_pe *old_pe;
1358  struct netns_ipvs *ipvs = net_ipvs(svc->net);
1359 
1360  pr_info("%s: enter\n", __func__);
1361 
1362  /* Count only IPv4 services for old get/setsockopt interface */
1363  if (svc->af == AF_INET)
1364  ipvs->num_services--;
1365 
1366  ip_vs_stop_estimator(svc->net, &svc->stats);
1367 
1368  /* Unbind scheduler */
1369  old_sched = svc->scheduler;
1371  ip_vs_scheduler_put(old_sched);
1372 
1373  /* Unbind persistence engine */
1374  old_pe = svc->pe;
1375  ip_vs_unbind_pe(svc);
1376  ip_vs_pe_put(old_pe);
1377 
1378  /* Unbind app inc */
1379  if (svc->inc) {
1380  ip_vs_app_inc_put(svc->inc);
1381  svc->inc = NULL;
1382  }
1383 
1384  /*
1385  * Unlink the whole destination list
1386  */
1387  list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1388  __ip_vs_unlink_dest(svc, dest, 0);
1389  __ip_vs_del_dest(svc->net, dest);
1390  }
1391 
1392  /*
1393  * Update the virtual service counters
1394  */
1395  if (svc->port == FTPPORT)
1396  atomic_dec(&ipvs->ftpsvc_counter);
1397  else if (svc->port == 0)
1398  atomic_dec(&ipvs->nullsvc_counter);
1399 
1400  /*
1401  * Free the service if nobody refers to it
1402  */
1403  if (atomic_read(&svc->refcnt) == 0) {
1404  IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1405  svc->fwmark,
1406  IP_VS_DBG_ADDR(svc->af, &svc->addr),
1407  ntohs(svc->port), atomic_read(&svc->usecnt));
1408  free_percpu(svc->stats.cpustats);
1409  kfree(svc);
1410  }
1411 
1412  /* decrease the module use count */
1414 }
1415 
1416 /*
1417  * Unlink a service from list and try to delete it if its refcnt reached 0
1418  */
1419 static void ip_vs_unlink_service(struct ip_vs_service *svc)
1420 {
1421  /*
1422  * Unhash it from the service table
1423  */
1424  write_lock_bh(&__ip_vs_svc_lock);
1425 
1426  ip_vs_svc_unhash(svc);
1427 
1428  /*
1429  * Wait until all the svc users go away.
1430  */
1431  IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1432 
1433  __ip_vs_del_service(svc);
1434 
1435  write_unlock_bh(&__ip_vs_svc_lock);
1436 }
1437 
1438 /*
1439  * Delete a service from the service list
1440  */
1441 static int ip_vs_del_service(struct ip_vs_service *svc)
1442 {
1443  if (svc == NULL)
1444  return -EEXIST;
1445  ip_vs_unlink_service(svc);
1446 
1447  return 0;
1448 }
1449 
1450 
1451 /*
1452  * Flush all the virtual services
1453  */
1454 static int ip_vs_flush(struct net *net)
1455 {
1456  int idx;
1457  struct ip_vs_service *svc, *nxt;
1458 
1459  /*
1460  * Flush the service table hashed by <netns,protocol,addr,port>
1461  */
1462  for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1463  list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
1464  s_list) {
1465  if (net_eq(svc->net, net))
1466  ip_vs_unlink_service(svc);
1467  }
1468  }
1469 
1470  /*
1471  * Flush the service table hashed by fwmark
1472  */
1473  for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474  list_for_each_entry_safe(svc, nxt,
1475  &ip_vs_svc_fwm_table[idx], f_list) {
1476  if (net_eq(svc->net, net))
1477  ip_vs_unlink_service(svc);
1478  }
1479  }
1480 
1481  return 0;
1482 }
1483 
1484 /*
1485  * Delete service by {netns} in the service table.
1486  * Called by __ip_vs_cleanup()
1487  */
1488 void ip_vs_service_net_cleanup(struct net *net)
1489 {
1490  EnterFunction(2);
1491  /* Check for "full" addressed entries */
1492  mutex_lock(&__ip_vs_mutex);
1493  ip_vs_flush(net);
1494  mutex_unlock(&__ip_vs_mutex);
1495  LeaveFunction(2);
1496 }
1497 /*
1498  * Release dst hold by dst_cache
1499  */
1500 static inline void
1501 __ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev)
1502 {
1503  spin_lock_bh(&dest->dst_lock);
1504  if (dest->dst_cache && dest->dst_cache->dev == dev) {
1505  IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n",
1506  dev->name,
1507  IP_VS_DBG_ADDR(dest->af, &dest->addr),
1508  ntohs(dest->port),
1509  atomic_read(&dest->refcnt));
1510  ip_vs_dst_reset(dest);
1511  }
1512  spin_unlock_bh(&dest->dst_lock);
1513 
1514 }
1515 /*
1516  * Netdev event receiver
1517  * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to
1518  * a device that is "unregister" it must be released.
1519  */
1520 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
1521  void *ptr)
1522 {
1523  struct net_device *dev = ptr;
1524  struct net *net = dev_net(dev);
1525  struct netns_ipvs *ipvs = net_ipvs(net);
1526  struct ip_vs_service *svc;
1527  struct ip_vs_dest *dest;
1528  unsigned int idx;
1529 
1530  if (event != NETDEV_UNREGISTER || !ipvs)
1531  return NOTIFY_DONE;
1532  IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name);
1533  EnterFunction(2);
1534  mutex_lock(&__ip_vs_mutex);
1535  for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1536  list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1537  if (net_eq(svc->net, net)) {
1538  list_for_each_entry(dest, &svc->destinations,
1539  n_list) {
1540  __ip_vs_dev_reset(dest, dev);
1541  }
1542  }
1543  }
1544 
1545  list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1546  if (net_eq(svc->net, net)) {
1547  list_for_each_entry(dest, &svc->destinations,
1548  n_list) {
1549  __ip_vs_dev_reset(dest, dev);
1550  }
1551  }
1552 
1553  }
1554  }
1555 
1556  list_for_each_entry(dest, &ipvs->dest_trash, n_list) {
1557  __ip_vs_dev_reset(dest, dev);
1558  }
1559  mutex_unlock(&__ip_vs_mutex);
1560  LeaveFunction(2);
1561  return NOTIFY_DONE;
1562 }
1563 
1564 /*
1565  * Zero counters in a service or all services
1566  */
1567 static int ip_vs_zero_service(struct ip_vs_service *svc)
1568 {
1569  struct ip_vs_dest *dest;
1570 
1571  write_lock_bh(&__ip_vs_svc_lock);
1572  list_for_each_entry(dest, &svc->destinations, n_list) {
1573  ip_vs_zero_stats(&dest->stats);
1574  }
1575  ip_vs_zero_stats(&svc->stats);
1576  write_unlock_bh(&__ip_vs_svc_lock);
1577  return 0;
1578 }
1579 
1580 static int ip_vs_zero_all(struct net *net)
1581 {
1582  int idx;
1583  struct ip_vs_service *svc;
1584 
1585  for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1586  list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1587  if (net_eq(svc->net, net))
1588  ip_vs_zero_service(svc);
1589  }
1590  }
1591 
1592  for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1593  list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1594  if (net_eq(svc->net, net))
1595  ip_vs_zero_service(svc);
1596  }
1597  }
1598 
1599  ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
1600  return 0;
1601 }
1602 
1603 #ifdef CONFIG_SYSCTL
1604 
1605 static int zero;
1606 static int three = 3;
1607 
1608 static int
1609 proc_do_defense_mode(ctl_table *table, int write,
1610  void __user *buffer, size_t *lenp, loff_t *ppos)
1611 {
1612  struct net *net = current->nsproxy->net_ns;
1613  int *valp = table->data;
1614  int val = *valp;
1615  int rc;
1616 
1617  rc = proc_dointvec(table, write, buffer, lenp, ppos);
1618  if (write && (*valp != val)) {
1619  if ((*valp < 0) || (*valp > 3)) {
1620  /* Restore the correct value */
1621  *valp = val;
1622  } else {
1623  update_defense_level(net_ipvs(net));
1624  }
1625  }
1626  return rc;
1627 }
1628 
1629 static int
1630 proc_do_sync_threshold(ctl_table *table, int write,
1631  void __user *buffer, size_t *lenp, loff_t *ppos)
1632 {
1633  int *valp = table->data;
1634  int val[2];
1635  int rc;
1636 
1637  /* backup the value first */
1638  memcpy(val, valp, sizeof(val));
1639 
1640  rc = proc_dointvec(table, write, buffer, lenp, ppos);
1641  if (write && (valp[0] < 0 || valp[1] < 0 ||
1642  (valp[0] >= valp[1] && valp[1]))) {
1643  /* Restore the correct value */
1644  memcpy(valp, val, sizeof(val));
1645  }
1646  return rc;
1647 }
1648 
1649 static int
1650 proc_do_sync_mode(ctl_table *table, int write,
1651  void __user *buffer, size_t *lenp, loff_t *ppos)
1652 {
1653  int *valp = table->data;
1654  int val = *valp;
1655  int rc;
1656 
1657  rc = proc_dointvec(table, write, buffer, lenp, ppos);
1658  if (write && (*valp != val)) {
1659  if ((*valp < 0) || (*valp > 1)) {
1660  /* Restore the correct value */
1661  *valp = val;
1662  }
1663  }
1664  return rc;
1665 }
1666 
1667 static int
1668 proc_do_sync_ports(ctl_table *table, int write,
1669  void __user *buffer, size_t *lenp, loff_t *ppos)
1670 {
1671  int *valp = table->data;
1672  int val = *valp;
1673  int rc;
1674 
1675  rc = proc_dointvec(table, write, buffer, lenp, ppos);
1676  if (write && (*valp != val)) {
1677  if (*valp < 1 || !is_power_of_2(*valp)) {
1678  /* Restore the correct value */
1679  *valp = val;
1680  }
1681  }
1682  return rc;
1683 }
1684 
1685 /*
1686  * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1687  * Do not change order or insert new entries without
1688  * align with netns init in ip_vs_control_net_init()
1689  */
1690 
1691 static struct ctl_table vs_vars[] = {
1692  {
1693  .procname = "amemthresh",
1694  .maxlen = sizeof(int),
1695  .mode = 0644,
1697  },
1698  {
1699  .procname = "am_droprate",
1700  .maxlen = sizeof(int),
1701  .mode = 0644,
1703  },
1704  {
1705  .procname = "drop_entry",
1706  .maxlen = sizeof(int),
1707  .mode = 0644,
1708  .proc_handler = proc_do_defense_mode,
1709  },
1710  {
1711  .procname = "drop_packet",
1712  .maxlen = sizeof(int),
1713  .mode = 0644,
1714  .proc_handler = proc_do_defense_mode,
1715  },
1716 #ifdef CONFIG_IP_VS_NFCT
1717  {
1718  .procname = "conntrack",
1719  .maxlen = sizeof(int),
1720  .mode = 0644,
1722  },
1723 #endif
1724  {
1725  .procname = "secure_tcp",
1726  .maxlen = sizeof(int),
1727  .mode = 0644,
1728  .proc_handler = proc_do_defense_mode,
1729  },
1730  {
1731  .procname = "snat_reroute",
1732  .maxlen = sizeof(int),
1733  .mode = 0644,
1735  },
1736  {
1737  .procname = "sync_version",
1738  .maxlen = sizeof(int),
1739  .mode = 0644,
1740  .proc_handler = &proc_do_sync_mode,
1741  },
1742  {
1743  .procname = "sync_ports",
1744  .maxlen = sizeof(int),
1745  .mode = 0644,
1746  .proc_handler = &proc_do_sync_ports,
1747  },
1748  {
1749  .procname = "sync_qlen_max",
1750  .maxlen = sizeof(int),
1751  .mode = 0644,
1753  },
1754  {
1755  .procname = "sync_sock_size",
1756  .maxlen = sizeof(int),
1757  .mode = 0644,
1759  },
1760  {
1761  .procname = "cache_bypass",
1762  .maxlen = sizeof(int),
1763  .mode = 0644,
1765  },
1766  {
1767  .procname = "expire_nodest_conn",
1768  .maxlen = sizeof(int),
1769  .mode = 0644,
1771  },
1772  {
1773  .procname = "expire_quiescent_template",
1774  .maxlen = sizeof(int),
1775  .mode = 0644,
1777  },
1778  {
1779  .procname = "sync_threshold",
1780  .maxlen =
1781  sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
1782  .mode = 0644,
1783  .proc_handler = proc_do_sync_threshold,
1784  },
1785  {
1786  .procname = "sync_refresh_period",
1787  .maxlen = sizeof(int),
1788  .mode = 0644,
1790  },
1791  {
1792  .procname = "sync_retries",
1793  .maxlen = sizeof(int),
1794  .mode = 0644,
1796  .extra1 = &zero,
1797  .extra2 = &three,
1798  },
1799  {
1800  .procname = "nat_icmp_send",
1801  .maxlen = sizeof(int),
1802  .mode = 0644,
1804  },
1805  {
1806  .procname = "pmtu_disc",
1807  .maxlen = sizeof(int),
1808  .mode = 0644,
1810  },
1811 #ifdef CONFIG_IP_VS_DEBUG
1812  {
1813  .procname = "debug_level",
1814  .data = &sysctl_ip_vs_debug_level,
1815  .maxlen = sizeof(int),
1816  .mode = 0644,
1818  },
1819 #endif
1820 #if 0
1821  {
1822  .procname = "timeout_established",
1823  .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1824  .maxlen = sizeof(int),
1825  .mode = 0644,
1827  },
1828  {
1829  .procname = "timeout_synsent",
1830  .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1831  .maxlen = sizeof(int),
1832  .mode = 0644,
1834  },
1835  {
1836  .procname = "timeout_synrecv",
1837  .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1838  .maxlen = sizeof(int),
1839  .mode = 0644,
1841  },
1842  {
1843  .procname = "timeout_finwait",
1844  .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1845  .maxlen = sizeof(int),
1846  .mode = 0644,
1848  },
1849  {
1850  .procname = "timeout_timewait",
1851  .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1852  .maxlen = sizeof(int),
1853  .mode = 0644,
1855  },
1856  {
1857  .procname = "timeout_close",
1858  .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1859  .maxlen = sizeof(int),
1860  .mode = 0644,
1862  },
1863  {
1864  .procname = "timeout_closewait",
1865  .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1866  .maxlen = sizeof(int),
1867  .mode = 0644,
1869  },
1870  {
1871  .procname = "timeout_lastack",
1872  .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1873  .maxlen = sizeof(int),
1874  .mode = 0644,
1876  },
1877  {
1878  .procname = "timeout_listen",
1879  .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1880  .maxlen = sizeof(int),
1881  .mode = 0644,
1883  },
1884  {
1885  .procname = "timeout_synack",
1886  .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1887  .maxlen = sizeof(int),
1888  .mode = 0644,
1890  },
1891  {
1892  .procname = "timeout_udp",
1893  .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1894  .maxlen = sizeof(int),
1895  .mode = 0644,
1897  },
1898  {
1899  .procname = "timeout_icmp",
1900  .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1901  .maxlen = sizeof(int),
1902  .mode = 0644,
1904  },
1905 #endif
1906  { }
1907 };
1908 
1909 #endif
1910 
1911 #ifdef CONFIG_PROC_FS
1912 
1913 struct ip_vs_iter {
1914  struct seq_net_private p; /* Do not move this, netns depends upon it*/
1915  struct list_head *table;
1916  int bucket;
1917 };
1918 
1919 /*
1920  * Write the contents of the VS rule table to a PROCfs file.
1921  * (It is kept just for backward compatibility)
1922  */
1923 static inline const char *ip_vs_fwd_name(unsigned int flags)
1924 {
1925  switch (flags & IP_VS_CONN_F_FWD_MASK) {
1927  return "Local";
1928  case IP_VS_CONN_F_TUNNEL:
1929  return "Tunnel";
1930  case IP_VS_CONN_F_DROUTE:
1931  return "Route";
1932  default:
1933  return "Masq";
1934  }
1935 }
1936 
1937 
1938 /* Get the Nth entry in the two lists */
1939 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1940 {
1941  struct net *net = seq_file_net(seq);
1942  struct ip_vs_iter *iter = seq->private;
1943  int idx;
1944  struct ip_vs_service *svc;
1945 
1946  /* look in hash by protocol */
1947  for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1948  list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1949  if (net_eq(svc->net, net) && pos-- == 0) {
1950  iter->table = ip_vs_svc_table;
1951  iter->bucket = idx;
1952  return svc;
1953  }
1954  }
1955  }
1956 
1957  /* keep looking in fwmark */
1958  for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1959  list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1960  if (net_eq(svc->net, net) && pos-- == 0) {
1961  iter->table = ip_vs_svc_fwm_table;
1962  iter->bucket = idx;
1963  return svc;
1964  }
1965  }
1966  }
1967 
1968  return NULL;
1969 }
1970 
1971 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1972 __acquires(__ip_vs_svc_lock)
1973 {
1974 
1975  read_lock_bh(&__ip_vs_svc_lock);
1976  return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1977 }
1978 
1979 
1980 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1981 {
1982  struct list_head *e;
1983  struct ip_vs_iter *iter;
1984  struct ip_vs_service *svc;
1985 
1986  ++*pos;
1987  if (v == SEQ_START_TOKEN)
1988  return ip_vs_info_array(seq,0);
1989 
1990  svc = v;
1991  iter = seq->private;
1992 
1993  if (iter->table == ip_vs_svc_table) {
1994  /* next service in table hashed by protocol */
1995  if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1996  return list_entry(e, struct ip_vs_service, s_list);
1997 
1998 
1999  while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2000  list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
2001  s_list) {
2002  return svc;
2003  }
2004  }
2005 
2006  iter->table = ip_vs_svc_fwm_table;
2007  iter->bucket = -1;
2008  goto scan_fwmark;
2009  }
2010 
2011  /* next service in hashed by fwmark */
2012  if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
2013  return list_entry(e, struct ip_vs_service, f_list);
2014 
2015  scan_fwmark:
2016  while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
2017  list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
2018  f_list)
2019  return svc;
2020  }
2021 
2022  return NULL;
2023 }
2024 
2025 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
2026 __releases(__ip_vs_svc_lock)
2027 {
2028  read_unlock_bh(&__ip_vs_svc_lock);
2029 }
2030 
2031 
2032 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
2033 {
2034  if (v == SEQ_START_TOKEN) {
2035  seq_printf(seq,
2036  "IP Virtual Server version %d.%d.%d (size=%d)\n",
2037  NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2038  seq_puts(seq,
2039  "Prot LocalAddress:Port Scheduler Flags\n");
2040  seq_puts(seq,
2041  " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
2042  } else {
2043  const struct ip_vs_service *svc = v;
2044  const struct ip_vs_iter *iter = seq->private;
2045  const struct ip_vs_dest *dest;
2046 
2047  if (iter->table == ip_vs_svc_table) {
2048 #ifdef CONFIG_IP_VS_IPV6
2049  if (svc->af == AF_INET6)
2050  seq_printf(seq, "%s [%pI6]:%04X %s ",
2051  ip_vs_proto_name(svc->protocol),
2052  &svc->addr.in6,
2053  ntohs(svc->port),
2054  svc->scheduler->name);
2055  else
2056 #endif
2057  seq_printf(seq, "%s %08X:%04X %s %s ",
2058  ip_vs_proto_name(svc->protocol),
2059  ntohl(svc->addr.ip),
2060  ntohs(svc->port),
2061  svc->scheduler->name,
2062  (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2063  } else {
2064  seq_printf(seq, "FWM %08X %s %s",
2065  svc->fwmark, svc->scheduler->name,
2066  (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
2067  }
2068 
2069  if (svc->flags & IP_VS_SVC_F_PERSISTENT)
2070  seq_printf(seq, "persistent %d %08X\n",
2071  svc->timeout,
2072  ntohl(svc->netmask));
2073  else
2074  seq_putc(seq, '\n');
2075 
2076  list_for_each_entry(dest, &svc->destinations, n_list) {
2077 #ifdef CONFIG_IP_VS_IPV6
2078  if (dest->af == AF_INET6)
2079  seq_printf(seq,
2080  " -> [%pI6]:%04X"
2081  " %-7s %-6d %-10d %-10d\n",
2082  &dest->addr.in6,
2083  ntohs(dest->port),
2084  ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2085  atomic_read(&dest->weight),
2086  atomic_read(&dest->activeconns),
2087  atomic_read(&dest->inactconns));
2088  else
2089 #endif
2090  seq_printf(seq,
2091  " -> %08X:%04X "
2092  "%-7s %-6d %-10d %-10d\n",
2093  ntohl(dest->addr.ip),
2094  ntohs(dest->port),
2095  ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
2096  atomic_read(&dest->weight),
2097  atomic_read(&dest->activeconns),
2098  atomic_read(&dest->inactconns));
2099 
2100  }
2101  }
2102  return 0;
2103 }
2104 
2105 static const struct seq_operations ip_vs_info_seq_ops = {
2106  .start = ip_vs_info_seq_start,
2107  .next = ip_vs_info_seq_next,
2108  .stop = ip_vs_info_seq_stop,
2109  .show = ip_vs_info_seq_show,
2110 };
2111 
2112 static int ip_vs_info_open(struct inode *inode, struct file *file)
2113 {
2114  return seq_open_net(inode, file, &ip_vs_info_seq_ops,
2115  sizeof(struct ip_vs_iter));
2116 }
2117 
2118 static const struct file_operations ip_vs_info_fops = {
2119  .owner = THIS_MODULE,
2120  .open = ip_vs_info_open,
2121  .read = seq_read,
2122  .llseek = seq_lseek,
2123  .release = seq_release_net,
2124 };
2125 
2126 static int ip_vs_stats_show(struct seq_file *seq, void *v)
2127 {
2128  struct net *net = seq_file_single_net(seq);
2129  struct ip_vs_stats_user show;
2130 
2131 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2132  seq_puts(seq,
2133  " Total Incoming Outgoing Incoming Outgoing\n");
2134  seq_printf(seq,
2135  " Conns Packets Packets Bytes Bytes\n");
2136 
2137  ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
2138  seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
2139  show.inpkts, show.outpkts,
2140  (unsigned long long) show.inbytes,
2141  (unsigned long long) show.outbytes);
2142 
2143 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2144  seq_puts(seq,
2145  " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2146  seq_printf(seq, "%8X %8X %8X %16X %16X\n",
2147  show.cps, show.inpps, show.outpps,
2148  show.inbps, show.outbps);
2149 
2150  return 0;
2151 }
2152 
2153 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
2154 {
2155  return single_open_net(inode, file, ip_vs_stats_show);
2156 }
2157 
2158 static const struct file_operations ip_vs_stats_fops = {
2159  .owner = THIS_MODULE,
2160  .open = ip_vs_stats_seq_open,
2161  .read = seq_read,
2162  .llseek = seq_lseek,
2163  .release = single_release_net,
2164 };
2165 
2166 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
2167 {
2168  struct net *net = seq_file_single_net(seq);
2169  struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
2170  struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
2171  struct ip_vs_stats_user rates;
2172  int i;
2173 
2174 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2175  seq_puts(seq,
2176  " Total Incoming Outgoing Incoming Outgoing\n");
2177  seq_printf(seq,
2178  "CPU Conns Packets Packets Bytes Bytes\n");
2179 
2181  struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
2182  unsigned int start;
2183  __u64 inbytes, outbytes;
2184 
2185  do {
2186  start = u64_stats_fetch_begin_bh(&u->syncp);
2187  inbytes = u->ustats.inbytes;
2188  outbytes = u->ustats.outbytes;
2189  } while (u64_stats_fetch_retry_bh(&u->syncp, start));
2190 
2191  seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
2192  i, u->ustats.conns, u->ustats.inpkts,
2193  u->ustats.outpkts, (__u64)inbytes,
2194  (__u64)outbytes);
2195  }
2196 
2197  spin_lock_bh(&tot_stats->lock);
2198 
2199  seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
2200  tot_stats->ustats.conns, tot_stats->ustats.inpkts,
2201  tot_stats->ustats.outpkts,
2202  (unsigned long long) tot_stats->ustats.inbytes,
2203  (unsigned long long) tot_stats->ustats.outbytes);
2204 
2205  ip_vs_read_estimator(&rates, tot_stats);
2206 
2207  spin_unlock_bh(&tot_stats->lock);
2208 
2209 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
2210  seq_puts(seq,
2211  " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
2212  seq_printf(seq, " %8X %8X %8X %16X %16X\n",
2213  rates.cps,
2214  rates.inpps,
2215  rates.outpps,
2216  rates.inbps,
2217  rates.outbps);
2218 
2219  return 0;
2220 }
2221 
2222 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file)
2223 {
2224  return single_open_net(inode, file, ip_vs_stats_percpu_show);
2225 }
2226 
2227 static const struct file_operations ip_vs_stats_percpu_fops = {
2228  .owner = THIS_MODULE,
2229  .open = ip_vs_stats_percpu_seq_open,
2230  .read = seq_read,
2231  .llseek = seq_lseek,
2232  .release = single_release_net,
2233 };
2234 #endif
2235 
2236 /*
2237  * Set timeout values for tcp tcpfin udp in the timeout_table.
2238  */
2239 static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
2240 {
2241 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2242  struct ip_vs_proto_data *pd;
2243 #endif
2244 
2245  IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2246  u->tcp_timeout,
2247  u->tcp_fin_timeout,
2248  u->udp_timeout);
2249 
2250 #ifdef CONFIG_IP_VS_PROTO_TCP
2251  if (u->tcp_timeout) {
2252  pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2254  = u->tcp_timeout * HZ;
2255  }
2256 
2257  if (u->tcp_fin_timeout) {
2258  pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2260  = u->tcp_fin_timeout * HZ;
2261  }
2262 #endif
2263 
2264 #ifdef CONFIG_IP_VS_PROTO_UDP
2265  if (u->udp_timeout) {
2266  pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2268  = u->udp_timeout * HZ;
2269  }
2270 #endif
2271  return 0;
2272 }
2273 
2274 
2275 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2276 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2277 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2278  sizeof(struct ip_vs_dest_user))
2279 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2280 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2281 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2282 
2283 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2295 };
2296 
2297 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2298  struct ip_vs_service_user *usvc_compat)
2299 {
2300  memset(usvc, 0, sizeof(*usvc));
2301 
2302  usvc->af = AF_INET;
2303  usvc->protocol = usvc_compat->protocol;
2304  usvc->addr.ip = usvc_compat->addr;
2305  usvc->port = usvc_compat->port;
2306  usvc->fwmark = usvc_compat->fwmark;
2307 
2308  /* Deep copy of sched_name is not needed here */
2309  usvc->sched_name = usvc_compat->sched_name;
2310 
2311  usvc->flags = usvc_compat->flags;
2312  usvc->timeout = usvc_compat->timeout;
2313  usvc->netmask = usvc_compat->netmask;
2314 }
2315 
2316 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2317  struct ip_vs_dest_user *udest_compat)
2318 {
2319  memset(udest, 0, sizeof(*udest));
2320 
2321  udest->addr.ip = udest_compat->addr;
2322  udest->port = udest_compat->port;
2323  udest->conn_flags = udest_compat->conn_flags;
2324  udest->weight = udest_compat->weight;
2325  udest->u_threshold = udest_compat->u_threshold;
2326  udest->l_threshold = udest_compat->l_threshold;
2327 }
2328 
2329 static int
2330 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2331 {
2332  struct net *net = sock_net(sk);
2333  int ret;
2334  unsigned char arg[MAX_ARG_LEN];
2335  struct ip_vs_service_user *usvc_compat;
2336  struct ip_vs_service_user_kern usvc;
2337  struct ip_vs_service *svc;
2338  struct ip_vs_dest_user *udest_compat;
2339  struct ip_vs_dest_user_kern udest;
2340  struct netns_ipvs *ipvs = net_ipvs(net);
2341 
2342  if (!capable(CAP_NET_ADMIN))
2343  return -EPERM;
2344 
2346  return -EINVAL;
2347  if (len < 0 || len > MAX_ARG_LEN)
2348  return -EINVAL;
2349  if (len != set_arglen[SET_CMDID(cmd)]) {
2350  pr_err("set_ctl: len %u != %u\n",
2351  len, set_arglen[SET_CMDID(cmd)]);
2352  return -EINVAL;
2353  }
2354 
2355  if (copy_from_user(arg, user, len) != 0)
2356  return -EFAULT;
2357 
2358  /* increase the module use count */
2360 
2361  /* Handle daemons since they have another lock */
2362  if (cmd == IP_VS_SO_SET_STARTDAEMON ||
2363  cmd == IP_VS_SO_SET_STOPDAEMON) {
2364  struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2365 
2366  if (mutex_lock_interruptible(&ipvs->sync_mutex)) {
2367  ret = -ERESTARTSYS;
2368  goto out_dec;
2369  }
2370  if (cmd == IP_VS_SO_SET_STARTDAEMON)
2371  ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
2372  dm->syncid);
2373  else
2374  ret = stop_sync_thread(net, dm->state);
2375  mutex_unlock(&ipvs->sync_mutex);
2376  goto out_dec;
2377  }
2378 
2379  if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2380  ret = -ERESTARTSYS;
2381  goto out_dec;
2382  }
2383 
2384  if (cmd == IP_VS_SO_SET_FLUSH) {
2385  /* Flush the virtual service */
2386  ret = ip_vs_flush(net);
2387  goto out_unlock;
2388  } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2389  /* Set timeout values for (tcp tcpfin udp) */
2390  ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
2391  goto out_unlock;
2392  }
2393 
2394  usvc_compat = (struct ip_vs_service_user *)arg;
2395  udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2396 
2397  /* We only use the new structs internally, so copy userspace compat
2398  * structs to extended internal versions */
2399  ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2400  ip_vs_copy_udest_compat(&udest, udest_compat);
2401 
2402  if (cmd == IP_VS_SO_SET_ZERO) {
2403  /* if no service address is set, zero counters in all */
2404  if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2405  ret = ip_vs_zero_all(net);
2406  goto out_unlock;
2407  }
2408  }
2409 
2410  /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2411  if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2412  usvc.protocol != IPPROTO_SCTP) {
2413  pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2414  usvc.protocol, &usvc.addr.ip,
2415  ntohs(usvc.port), usvc.sched_name);
2416  ret = -EFAULT;
2417  goto out_unlock;
2418  }
2419 
2420  /* Lookup the exact service by <protocol, addr, port> or fwmark */
2421  if (usvc.fwmark == 0)
2422  svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
2423  &usvc.addr, usvc.port);
2424  else
2425  svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
2426 
2427  if (cmd != IP_VS_SO_SET_ADD
2428  && (svc == NULL || svc->protocol != usvc.protocol)) {
2429  ret = -ESRCH;
2430  goto out_unlock;
2431  }
2432 
2433  switch (cmd) {
2434  case IP_VS_SO_SET_ADD:
2435  if (svc != NULL)
2436  ret = -EEXIST;
2437  else
2438  ret = ip_vs_add_service(net, &usvc, &svc);
2439  break;
2440  case IP_VS_SO_SET_EDIT:
2441  ret = ip_vs_edit_service(svc, &usvc);
2442  break;
2443  case IP_VS_SO_SET_DEL:
2444  ret = ip_vs_del_service(svc);
2445  if (!ret)
2446  goto out_unlock;
2447  break;
2448  case IP_VS_SO_SET_ZERO:
2449  ret = ip_vs_zero_service(svc);
2450  break;
2451  case IP_VS_SO_SET_ADDDEST:
2452  ret = ip_vs_add_dest(svc, &udest);
2453  break;
2454  case IP_VS_SO_SET_EDITDEST:
2455  ret = ip_vs_edit_dest(svc, &udest);
2456  break;
2457  case IP_VS_SO_SET_DELDEST:
2458  ret = ip_vs_del_dest(svc, &udest);
2459  break;
2460  default:
2461  ret = -EINVAL;
2462  }
2463 
2464  out_unlock:
2465  mutex_unlock(&__ip_vs_mutex);
2466  out_dec:
2467  /* decrease the module use count */
2469 
2470  return ret;
2471 }
2472 
2473 
2474 static void
2475 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2476 {
2477  dst->protocol = src->protocol;
2478  dst->addr = src->addr.ip;
2479  dst->port = src->port;
2480  dst->fwmark = src->fwmark;
2481  strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2482  dst->flags = src->flags;
2483  dst->timeout = src->timeout / HZ;
2484  dst->netmask = src->netmask;
2485  dst->num_dests = src->num_dests;
2486  ip_vs_copy_stats(&dst->stats, &src->stats);
2487 }
2488 
2489 static inline int
2490 __ip_vs_get_service_entries(struct net *net,
2491  const struct ip_vs_get_services *get,
2492  struct ip_vs_get_services __user *uptr)
2493 {
2494  int idx, count=0;
2495  struct ip_vs_service *svc;
2496  struct ip_vs_service_entry entry;
2497  int ret = 0;
2498 
2499  for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2500  list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2501  /* Only expose IPv4 entries to old interface */
2502  if (svc->af != AF_INET || !net_eq(svc->net, net))
2503  continue;
2504 
2505  if (count >= get->num_services)
2506  goto out;
2507  memset(&entry, 0, sizeof(entry));
2508  ip_vs_copy_service(&entry, svc);
2509  if (copy_to_user(&uptr->entrytable[count],
2510  &entry, sizeof(entry))) {
2511  ret = -EFAULT;
2512  goto out;
2513  }
2514  count++;
2515  }
2516  }
2517 
2518  for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2519  list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2520  /* Only expose IPv4 entries to old interface */
2521  if (svc->af != AF_INET || !net_eq(svc->net, net))
2522  continue;
2523 
2524  if (count >= get->num_services)
2525  goto out;
2526  memset(&entry, 0, sizeof(entry));
2527  ip_vs_copy_service(&entry, svc);
2528  if (copy_to_user(&uptr->entrytable[count],
2529  &entry, sizeof(entry))) {
2530  ret = -EFAULT;
2531  goto out;
2532  }
2533  count++;
2534  }
2535  }
2536 out:
2537  return ret;
2538 }
2539 
2540 static inline int
2541 __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
2542  struct ip_vs_get_dests __user *uptr)
2543 {
2544  struct ip_vs_service *svc;
2545  union nf_inet_addr addr = { .ip = get->addr };
2546  int ret = 0;
2547 
2548  if (get->fwmark)
2549  svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
2550  else
2551  svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
2552  get->port);
2553 
2554  if (svc) {
2555  int count = 0;
2556  struct ip_vs_dest *dest;
2557  struct ip_vs_dest_entry entry;
2558 
2559  list_for_each_entry(dest, &svc->destinations, n_list) {
2560  if (count >= get->num_dests)
2561  break;
2562 
2563  entry.addr = dest->addr.ip;
2564  entry.port = dest->port;
2565  entry.conn_flags = atomic_read(&dest->conn_flags);
2566  entry.weight = atomic_read(&dest->weight);
2567  entry.u_threshold = dest->u_threshold;
2568  entry.l_threshold = dest->l_threshold;
2569  entry.activeconns = atomic_read(&dest->activeconns);
2570  entry.inactconns = atomic_read(&dest->inactconns);
2571  entry.persistconns = atomic_read(&dest->persistconns);
2572  ip_vs_copy_stats(&entry.stats, &dest->stats);
2573  if (copy_to_user(&uptr->entrytable[count],
2574  &entry, sizeof(entry))) {
2575  ret = -EFAULT;
2576  break;
2577  }
2578  count++;
2579  }
2580  } else
2581  ret = -ESRCH;
2582  return ret;
2583 }
2584 
2585 static inline void
2586 __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
2587 {
2588 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
2589  struct ip_vs_proto_data *pd;
2590 #endif
2591 
2592  memset(u, 0, sizeof (*u));
2593 
2594 #ifdef CONFIG_IP_VS_PROTO_TCP
2595  pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
2598 #endif
2599 #ifdef CONFIG_IP_VS_PROTO_UDP
2600  pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
2601  u->udp_timeout =
2603 #endif
2604 }
2605 
2606 
2607 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2608 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2609 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2610 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2611 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2612 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2613 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2614 
2615 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2623 };
2624 
2625 static int
2626 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2627 {
2628  unsigned char arg[128];
2629  int ret = 0;
2630  unsigned int copylen;
2631  struct net *net = sock_net(sk);
2632  struct netns_ipvs *ipvs = net_ipvs(net);
2633 
2634  BUG_ON(!net);
2635  if (!capable(CAP_NET_ADMIN))
2636  return -EPERM;
2637 
2639  return -EINVAL;
2640 
2641  if (*len < get_arglen[GET_CMDID(cmd)]) {
2642  pr_err("get_ctl: len %u < %u\n",
2643  *len, get_arglen[GET_CMDID(cmd)]);
2644  return -EINVAL;
2645  }
2646 
2647  copylen = get_arglen[GET_CMDID(cmd)];
2648  if (copylen > 128)
2649  return -EINVAL;
2650 
2651  if (copy_from_user(arg, user, copylen) != 0)
2652  return -EFAULT;
2653  /*
2654  * Handle daemons first since it has its own locking
2655  */
2656  if (cmd == IP_VS_SO_GET_DAEMON) {
2657  struct ip_vs_daemon_user d[2];
2658 
2659  memset(&d, 0, sizeof(d));
2661  return -ERESTARTSYS;
2662 
2663  if (ipvs->sync_state & IP_VS_STATE_MASTER) {
2664  d[0].state = IP_VS_STATE_MASTER;
2665  strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
2666  sizeof(d[0].mcast_ifn));
2667  d[0].syncid = ipvs->master_syncid;
2668  }
2669  if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
2670  d[1].state = IP_VS_STATE_BACKUP;
2671  strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
2672  sizeof(d[1].mcast_ifn));
2673  d[1].syncid = ipvs->backup_syncid;
2674  }
2675  if (copy_to_user(user, &d, sizeof(d)) != 0)
2676  ret = -EFAULT;
2677  mutex_unlock(&ipvs->sync_mutex);
2678  return ret;
2679  }
2680 
2681  if (mutex_lock_interruptible(&__ip_vs_mutex))
2682  return -ERESTARTSYS;
2683 
2684  switch (cmd) {
2685  case IP_VS_SO_GET_VERSION:
2686  {
2687  char buf[64];
2688 
2689  sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2690  NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2691  if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2692  ret = -EFAULT;
2693  goto out;
2694  }
2695  *len = strlen(buf)+1;
2696  }
2697  break;
2698 
2699  case IP_VS_SO_GET_INFO:
2700  {
2701  struct ip_vs_getinfo info;
2702  info.version = IP_VS_VERSION_CODE;
2703  info.size = ip_vs_conn_tab_size;
2704  info.num_services = ipvs->num_services;
2705  if (copy_to_user(user, &info, sizeof(info)) != 0)
2706  ret = -EFAULT;
2707  }
2708  break;
2709 
2710  case IP_VS_SO_GET_SERVICES:
2711  {
2712  struct ip_vs_get_services *get;
2713  int size;
2714 
2715  get = (struct ip_vs_get_services *)arg;
2716  size = sizeof(*get) +
2717  sizeof(struct ip_vs_service_entry) * get->num_services;
2718  if (*len != size) {
2719  pr_err("length: %u != %u\n", *len, size);
2720  ret = -EINVAL;
2721  goto out;
2722  }
2723  ret = __ip_vs_get_service_entries(net, get, user);
2724  }
2725  break;
2726 
2727  case IP_VS_SO_GET_SERVICE:
2728  {
2729  struct ip_vs_service_entry *entry;
2730  struct ip_vs_service *svc;
2731  union nf_inet_addr addr;
2732 
2733  entry = (struct ip_vs_service_entry *)arg;
2734  addr.ip = entry->addr;
2735  if (entry->fwmark)
2736  svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
2737  else
2738  svc = __ip_vs_service_find(net, AF_INET,
2739  entry->protocol, &addr,
2740  entry->port);
2741  if (svc) {
2742  ip_vs_copy_service(entry, svc);
2743  if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2744  ret = -EFAULT;
2745  } else
2746  ret = -ESRCH;
2747  }
2748  break;
2749 
2750  case IP_VS_SO_GET_DESTS:
2751  {
2752  struct ip_vs_get_dests *get;
2753  int size;
2754 
2755  get = (struct ip_vs_get_dests *)arg;
2756  size = sizeof(*get) +
2757  sizeof(struct ip_vs_dest_entry) * get->num_dests;
2758  if (*len != size) {
2759  pr_err("length: %u != %u\n", *len, size);
2760  ret = -EINVAL;
2761  goto out;
2762  }
2763  ret = __ip_vs_get_dest_entries(net, get, user);
2764  }
2765  break;
2766 
2767  case IP_VS_SO_GET_TIMEOUT:
2768  {
2769  struct ip_vs_timeout_user t;
2770 
2771  __ip_vs_get_timeouts(net, &t);
2772  if (copy_to_user(user, &t, sizeof(t)) != 0)
2773  ret = -EFAULT;
2774  }
2775  break;
2776 
2777  default:
2778  ret = -EINVAL;
2779  }
2780 
2781 out:
2782  mutex_unlock(&__ip_vs_mutex);
2783  return ret;
2784 }
2785 
2786 
2787 static struct nf_sockopt_ops ip_vs_sockopts = {
2788  .pf = PF_INET,
2789  .set_optmin = IP_VS_BASE_CTL,
2790  .set_optmax = IP_VS_SO_SET_MAX+1,
2791  .set = do_ip_vs_set_ctl,
2792  .get_optmin = IP_VS_BASE_CTL,
2793  .get_optmax = IP_VS_SO_GET_MAX+1,
2794  .get = do_ip_vs_get_ctl,
2795  .owner = THIS_MODULE,
2796 };
2797 
2798 /*
2799  * Generic Netlink interface
2800  */
2801 
2802 /* IPVS genetlink family */
2803 static struct genl_family ip_vs_genl_family = {
2804  .id = GENL_ID_GENERATE,
2805  .hdrsize = 0,
2806  .name = IPVS_GENL_NAME,
2807  .version = IPVS_GENL_VERSION,
2808  .maxattr = IPVS_CMD_MAX,
2809  .netnsok = true, /* Make ipvsadm to work on netns */
2810 };
2811 
2812 /* Policy used for first-level command attributes */
2813 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2814  [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2815  [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2816  [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2817  [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2818  [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2819  [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2820 };
2821 
2822 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2823 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2824  [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2826  .len = IP_VS_IFNAME_MAXLEN },
2827  [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2828 };
2829 
2830 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2831 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2832  [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2833  [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2834  [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2835  .len = sizeof(union nf_inet_addr) },
2836  [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2837  [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2839  .len = IP_VS_SCHEDNAME_MAXLEN },
2840  [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2841  .len = IP_VS_PENAME_MAXLEN },
2842  [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2843  .len = sizeof(struct ip_vs_flags) },
2844  [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2845  [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2846  [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2847 };
2848 
2849 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2850 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2851  [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2852  .len = sizeof(union nf_inet_addr) },
2853  [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2854  [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2855  [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2856  [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2857  [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2858  [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2859  [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2860  [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2861  [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2862 };
2863 
2864 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2865  struct ip_vs_stats *stats)
2866 {
2867  struct ip_vs_stats_user ustats;
2868  struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2869  if (!nl_stats)
2870  return -EMSGSIZE;
2871 
2872  ip_vs_copy_stats(&ustats, stats);
2873 
2874  if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
2875  nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
2876  nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
2877  nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
2878  nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
2879  nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
2880  nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
2881  nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
2882  nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
2883  nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
2884  goto nla_put_failure;
2885  nla_nest_end(skb, nl_stats);
2886 
2887  return 0;
2888 
2889 nla_put_failure:
2890  nla_nest_cancel(skb, nl_stats);
2891  return -EMSGSIZE;
2892 }
2893 
2894 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2895  struct ip_vs_service *svc)
2896 {
2897  struct nlattr *nl_service;
2898  struct ip_vs_flags flags = { .flags = svc->flags,
2899  .mask = ~0 };
2900 
2901  nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2902  if (!nl_service)
2903  return -EMSGSIZE;
2904 
2905  if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af))
2906  goto nla_put_failure;
2907  if (svc->fwmark) {
2908  if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark))
2909  goto nla_put_failure;
2910  } else {
2911  if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) ||
2912  nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) ||
2913  nla_put_u16(skb, IPVS_SVC_ATTR_PORT, svc->port))
2914  goto nla_put_failure;
2915  }
2916 
2917  if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name) ||
2918  (svc->pe &&
2919  nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name)) ||
2920  nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
2921  nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
2922  nla_put_u32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
2923  goto nla_put_failure;
2924  if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2925  goto nla_put_failure;
2926 
2927  nla_nest_end(skb, nl_service);
2928 
2929  return 0;
2930 
2931 nla_put_failure:
2932  nla_nest_cancel(skb, nl_service);
2933  return -EMSGSIZE;
2934 }
2935 
2936 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2937  struct ip_vs_service *svc,
2938  struct netlink_callback *cb)
2939 {
2940  void *hdr;
2941 
2942  hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
2943  &ip_vs_genl_family, NLM_F_MULTI,
2945  if (!hdr)
2946  return -EMSGSIZE;
2947 
2948  if (ip_vs_genl_fill_service(skb, svc) < 0)
2949  goto nla_put_failure;
2950 
2951  return genlmsg_end(skb, hdr);
2952 
2953 nla_put_failure:
2954  genlmsg_cancel(skb, hdr);
2955  return -EMSGSIZE;
2956 }
2957 
2958 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2959  struct netlink_callback *cb)
2960 {
2961  int idx = 0, i;
2962  int start = cb->args[0];
2963  struct ip_vs_service *svc;
2964  struct net *net = skb_sknet(skb);
2965 
2966  mutex_lock(&__ip_vs_mutex);
2967  for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2968  list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2969  if (++idx <= start || !net_eq(svc->net, net))
2970  continue;
2971  if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2972  idx--;
2973  goto nla_put_failure;
2974  }
2975  }
2976  }
2977 
2978  for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2979  list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2980  if (++idx <= start || !net_eq(svc->net, net))
2981  continue;
2982  if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2983  idx--;
2984  goto nla_put_failure;
2985  }
2986  }
2987  }
2988 
2989 nla_put_failure:
2990  mutex_unlock(&__ip_vs_mutex);
2991  cb->args[0] = idx;
2992 
2993  return skb->len;
2994 }
2995 
2996 static int ip_vs_genl_parse_service(struct net *net,
2997  struct ip_vs_service_user_kern *usvc,
2998  struct nlattr *nla, int full_entry,
2999  struct ip_vs_service **ret_svc)
3000 {
3001  struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
3002  struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
3003  struct ip_vs_service *svc;
3004 
3005  /* Parse mandatory identifying service fields first */
3006  if (nla == NULL ||
3007  nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
3008  return -EINVAL;
3009 
3010  nla_af = attrs[IPVS_SVC_ATTR_AF];
3011  nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
3012  nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
3013  nla_port = attrs[IPVS_SVC_ATTR_PORT];
3014  nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
3015 
3016  if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
3017  return -EINVAL;
3018 
3019  memset(usvc, 0, sizeof(*usvc));
3020 
3021  usvc->af = nla_get_u16(nla_af);
3022 #ifdef CONFIG_IP_VS_IPV6
3023  if (usvc->af != AF_INET && usvc->af != AF_INET6)
3024 #else
3025  if (usvc->af != AF_INET)
3026 #endif
3027  return -EAFNOSUPPORT;
3028 
3029  if (nla_fwmark) {
3030  usvc->protocol = IPPROTO_TCP;
3031  usvc->fwmark = nla_get_u32(nla_fwmark);
3032  } else {
3033  usvc->protocol = nla_get_u16(nla_protocol);
3034  nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
3035  usvc->port = nla_get_u16(nla_port);
3036  usvc->fwmark = 0;
3037  }
3038 
3039  if (usvc->fwmark)
3040  svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
3041  else
3042  svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
3043  &usvc->addr, usvc->port);
3044  *ret_svc = svc;
3045 
3046  /* If a full entry was requested, check for the additional fields */
3047  if (full_entry) {
3048  struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
3049  *nla_netmask;
3050  struct ip_vs_flags flags;
3051 
3052  nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
3053  nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
3054  nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
3055  nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
3056  nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
3057 
3058  if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
3059  return -EINVAL;
3060 
3061  nla_memcpy(&flags, nla_flags, sizeof(flags));
3062 
3063  /* prefill flags from service if it already exists */
3064  if (svc)
3065  usvc->flags = svc->flags;
3066 
3067  /* set new flags from userland */
3068  usvc->flags = (usvc->flags & ~flags.mask) |
3069  (flags.flags & flags.mask);
3070  usvc->sched_name = nla_data(nla_sched);
3071  usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
3072  usvc->timeout = nla_get_u32(nla_timeout);
3073  usvc->netmask = nla_get_u32(nla_netmask);
3074  }
3075 
3076  return 0;
3077 }
3078 
3079 static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
3080  struct nlattr *nla)
3081 {
3082  struct ip_vs_service_user_kern usvc;
3083  struct ip_vs_service *svc;
3084  int ret;
3085 
3086  ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
3087  return ret ? ERR_PTR(ret) : svc;
3088 }
3089 
3090 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
3091 {
3092  struct nlattr *nl_dest;
3093 
3094  nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
3095  if (!nl_dest)
3096  return -EMSGSIZE;
3097 
3098  if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) ||
3099  nla_put_u16(skb, IPVS_DEST_ATTR_PORT, dest->port) ||
3100  nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD,
3101  (atomic_read(&dest->conn_flags) &
3102  IP_VS_CONN_F_FWD_MASK)) ||
3103  nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
3104  atomic_read(&dest->weight)) ||
3105  nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
3106  nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
3107  nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
3108  atomic_read(&dest->activeconns)) ||
3109  nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS,
3110  atomic_read(&dest->inactconns)) ||
3111  nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
3112  atomic_read(&dest->persistconns)))
3113  goto nla_put_failure;
3114  if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
3115  goto nla_put_failure;
3116 
3117  nla_nest_end(skb, nl_dest);
3118 
3119  return 0;
3120 
3121 nla_put_failure:
3122  nla_nest_cancel(skb, nl_dest);
3123  return -EMSGSIZE;
3124 }
3125 
3126 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
3127  struct netlink_callback *cb)
3128 {
3129  void *hdr;
3130 
3131  hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3132  &ip_vs_genl_family, NLM_F_MULTI,
3134  if (!hdr)
3135  return -EMSGSIZE;
3136 
3137  if (ip_vs_genl_fill_dest(skb, dest) < 0)
3138  goto nla_put_failure;
3139 
3140  return genlmsg_end(skb, hdr);
3141 
3142 nla_put_failure:
3143  genlmsg_cancel(skb, hdr);
3144  return -EMSGSIZE;
3145 }
3146 
3147 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
3148  struct netlink_callback *cb)
3149 {
3150  int idx = 0;
3151  int start = cb->args[0];
3152  struct ip_vs_service *svc;
3153  struct ip_vs_dest *dest;
3154  struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
3155  struct net *net = skb_sknet(skb);
3156 
3157  mutex_lock(&__ip_vs_mutex);
3158 
3159  /* Try to find the service for which to dump destinations */
3160  if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
3161  IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
3162  goto out_err;
3163 
3164 
3165  svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
3166  if (IS_ERR(svc) || svc == NULL)
3167  goto out_err;
3168 
3169  /* Dump the destinations */
3170  list_for_each_entry(dest, &svc->destinations, n_list) {
3171  if (++idx <= start)
3172  continue;
3173  if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
3174  idx--;
3175  goto nla_put_failure;
3176  }
3177  }
3178 
3179 nla_put_failure:
3180  cb->args[0] = idx;
3181 
3182 out_err:
3183  mutex_unlock(&__ip_vs_mutex);
3184 
3185  return skb->len;
3186 }
3187 
3188 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
3189  struct nlattr *nla, int full_entry)
3190 {
3191  struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
3192  struct nlattr *nla_addr, *nla_port;
3193 
3194  /* Parse mandatory identifying destination fields first */
3195  if (nla == NULL ||
3196  nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
3197  return -EINVAL;
3198 
3199  nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
3200  nla_port = attrs[IPVS_DEST_ATTR_PORT];
3201 
3202  if (!(nla_addr && nla_port))
3203  return -EINVAL;
3204 
3205  memset(udest, 0, sizeof(*udest));
3206 
3207  nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
3208  udest->port = nla_get_u16(nla_port);
3209 
3210  /* If a full entry was requested, check for the additional fields */
3211  if (full_entry) {
3212  struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
3213  *nla_l_thresh;
3214 
3215  nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
3216  nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
3217  nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
3218  nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
3219 
3220  if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
3221  return -EINVAL;
3222 
3223  udest->conn_flags = nla_get_u32(nla_fwd)
3225  udest->weight = nla_get_u32(nla_weight);
3226  udest->u_threshold = nla_get_u32(nla_u_thresh);
3227  udest->l_threshold = nla_get_u32(nla_l_thresh);
3228  }
3229 
3230  return 0;
3231 }
3232 
3233 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
3234  const char *mcast_ifn, __be32 syncid)
3235 {
3236  struct nlattr *nl_daemon;
3237 
3238  nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
3239  if (!nl_daemon)
3240  return -EMSGSIZE;
3241 
3242  if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) ||
3243  nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn) ||
3244  nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid))
3245  goto nla_put_failure;
3246  nla_nest_end(skb, nl_daemon);
3247 
3248  return 0;
3249 
3250 nla_put_failure:
3251  nla_nest_cancel(skb, nl_daemon);
3252  return -EMSGSIZE;
3253 }
3254 
3255 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
3256  const char *mcast_ifn, __be32 syncid,
3257  struct netlink_callback *cb)
3258 {
3259  void *hdr;
3260  hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
3261  &ip_vs_genl_family, NLM_F_MULTI,
3263  if (!hdr)
3264  return -EMSGSIZE;
3265 
3266  if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
3267  goto nla_put_failure;
3268 
3269  return genlmsg_end(skb, hdr);
3270 
3271 nla_put_failure:
3272  genlmsg_cancel(skb, hdr);
3273  return -EMSGSIZE;
3274 }
3275 
3276 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3277  struct netlink_callback *cb)
3278 {
3279  struct net *net = skb_sknet(skb);
3280  struct netns_ipvs *ipvs = net_ipvs(net);
3281 
3282  mutex_lock(&ipvs->sync_mutex);
3283  if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3284  if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3285  ipvs->master_mcast_ifn,
3286  ipvs->master_syncid, cb) < 0)
3287  goto nla_put_failure;
3288 
3289  cb->args[0] = 1;
3290  }
3291 
3292  if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3293  if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3294  ipvs->backup_mcast_ifn,
3295  ipvs->backup_syncid, cb) < 0)
3296  goto nla_put_failure;
3297 
3298  cb->args[1] = 1;
3299  }
3300 
3301 nla_put_failure:
3302  mutex_unlock(&ipvs->sync_mutex);
3303 
3304  return skb->len;
3305 }
3306 
3307 static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
3308 {
3309  if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3310  attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3311  attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3312  return -EINVAL;
3313 
3314  return start_sync_thread(net,
3315  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3316  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3317  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3318 }
3319 
3320 static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
3321 {
3322  if (!attrs[IPVS_DAEMON_ATTR_STATE])
3323  return -EINVAL;
3324 
3325  return stop_sync_thread(net,
3326  nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3327 }
3328 
3329 static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
3330 {
3331  struct ip_vs_timeout_user t;
3332 
3333  __ip_vs_get_timeouts(net, &t);
3334 
3335  if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3336  t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3337 
3338  if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3339  t.tcp_fin_timeout =
3340  nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3341 
3342  if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3343  t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3344 
3345  return ip_vs_set_timeout(net, &t);
3346 }
3347 
3348 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
3349 {
3350  int ret = 0, cmd;
3351  struct net *net;
3352  struct netns_ipvs *ipvs;
3353 
3354  net = skb_sknet(skb);
3355  ipvs = net_ipvs(net);
3356  cmd = info->genlhdr->cmd;
3357 
3358  if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
3359  struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3360 
3361  mutex_lock(&ipvs->sync_mutex);
3362  if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3363  nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3364  info->attrs[IPVS_CMD_ATTR_DAEMON],
3365  ip_vs_daemon_policy)) {
3366  ret = -EINVAL;
3367  goto out;
3368  }
3369 
3370  if (cmd == IPVS_CMD_NEW_DAEMON)
3371  ret = ip_vs_genl_new_daemon(net, daemon_attrs);
3372  else
3373  ret = ip_vs_genl_del_daemon(net, daemon_attrs);
3374 out:
3375  mutex_unlock(&ipvs->sync_mutex);
3376  }
3377  return ret;
3378 }
3379 
3380 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3381 {
3382  struct ip_vs_service *svc = NULL;
3383  struct ip_vs_service_user_kern usvc;
3384  struct ip_vs_dest_user_kern udest;
3385  int ret = 0, cmd;
3386  int need_full_svc = 0, need_full_dest = 0;
3387  struct net *net;
3388 
3389  net = skb_sknet(skb);
3390  cmd = info->genlhdr->cmd;
3391 
3392  mutex_lock(&__ip_vs_mutex);
3393 
3394  if (cmd == IPVS_CMD_FLUSH) {
3395  ret = ip_vs_flush(net);
3396  goto out;
3397  } else if (cmd == IPVS_CMD_SET_CONFIG) {
3398  ret = ip_vs_genl_set_config(net, info->attrs);
3399  goto out;
3400  } else if (cmd == IPVS_CMD_ZERO &&
3401  !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3402  ret = ip_vs_zero_all(net);
3403  goto out;
3404  }
3405 
3406  /* All following commands require a service argument, so check if we
3407  * received a valid one. We need a full service specification when
3408  * adding / editing a service. Only identifying members otherwise. */
3409  if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3410  need_full_svc = 1;
3411 
3412  ret = ip_vs_genl_parse_service(net, &usvc,
3414  need_full_svc, &svc);
3415  if (ret)
3416  goto out;
3417 
3418  /* Unless we're adding a new service, the service must already exist */
3419  if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3420  ret = -ESRCH;
3421  goto out;
3422  }
3423 
3424  /* Destination commands require a valid destination argument. For
3425  * adding / editing a destination, we need a full destination
3426  * specification. */
3427  if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3428  cmd == IPVS_CMD_DEL_DEST) {
3429  if (cmd != IPVS_CMD_DEL_DEST)
3430  need_full_dest = 1;
3431 
3432  ret = ip_vs_genl_parse_dest(&udest,
3433  info->attrs[IPVS_CMD_ATTR_DEST],
3434  need_full_dest);
3435  if (ret)
3436  goto out;
3437  }
3438 
3439  switch (cmd) {
3440  case IPVS_CMD_NEW_SERVICE:
3441  if (svc == NULL)
3442  ret = ip_vs_add_service(net, &usvc, &svc);
3443  else
3444  ret = -EEXIST;
3445  break;
3446  case IPVS_CMD_SET_SERVICE:
3447  ret = ip_vs_edit_service(svc, &usvc);
3448  break;
3449  case IPVS_CMD_DEL_SERVICE:
3450  ret = ip_vs_del_service(svc);
3451  /* do not use svc, it can be freed */
3452  break;
3453  case IPVS_CMD_NEW_DEST:
3454  ret = ip_vs_add_dest(svc, &udest);
3455  break;
3456  case IPVS_CMD_SET_DEST:
3457  ret = ip_vs_edit_dest(svc, &udest);
3458  break;
3459  case IPVS_CMD_DEL_DEST:
3460  ret = ip_vs_del_dest(svc, &udest);
3461  break;
3462  case IPVS_CMD_ZERO:
3463  ret = ip_vs_zero_service(svc);
3464  break;
3465  default:
3466  ret = -EINVAL;
3467  }
3468 
3469 out:
3470  mutex_unlock(&__ip_vs_mutex);
3471 
3472  return ret;
3473 }
3474 
3475 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3476 {
3477  struct sk_buff *msg;
3478  void *reply;
3479  int ret, cmd, reply_cmd;
3480  struct net *net;
3481 
3482  net = skb_sknet(skb);
3483  cmd = info->genlhdr->cmd;
3484 
3485  if (cmd == IPVS_CMD_GET_SERVICE)
3486  reply_cmd = IPVS_CMD_NEW_SERVICE;
3487  else if (cmd == IPVS_CMD_GET_INFO)
3488  reply_cmd = IPVS_CMD_SET_INFO;
3489  else if (cmd == IPVS_CMD_GET_CONFIG)
3490  reply_cmd = IPVS_CMD_SET_CONFIG;
3491  else {
3492  pr_err("unknown Generic Netlink command\n");
3493  return -EINVAL;
3494  }
3495 
3496  msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3497  if (!msg)
3498  return -ENOMEM;
3499 
3500  mutex_lock(&__ip_vs_mutex);
3501 
3502  reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3503  if (reply == NULL)
3504  goto nla_put_failure;
3505 
3506  switch (cmd) {
3507  case IPVS_CMD_GET_SERVICE:
3508  {
3509  struct ip_vs_service *svc;
3510 
3511  svc = ip_vs_genl_find_service(net,
3512  info->attrs[IPVS_CMD_ATTR_SERVICE]);
3513  if (IS_ERR(svc)) {
3514  ret = PTR_ERR(svc);
3515  goto out_err;
3516  } else if (svc) {
3517  ret = ip_vs_genl_fill_service(msg, svc);
3518  if (ret)
3519  goto nla_put_failure;
3520  } else {
3521  ret = -ESRCH;
3522  goto out_err;
3523  }
3524 
3525  break;
3526  }
3527 
3528  case IPVS_CMD_GET_CONFIG:
3529  {
3530  struct ip_vs_timeout_user t;
3531 
3532  __ip_vs_get_timeouts(net, &t);
3533 #ifdef CONFIG_IP_VS_PROTO_TCP
3534  if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
3535  t.tcp_timeout) ||
3536  nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3537  t.tcp_fin_timeout))
3538  goto nla_put_failure;
3539 #endif
3540 #ifdef CONFIG_IP_VS_PROTO_UDP
3541  if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout))
3542  goto nla_put_failure;
3543 #endif
3544 
3545  break;
3546  }
3547 
3548  case IPVS_CMD_GET_INFO:
3549  if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION,
3550  IP_VS_VERSION_CODE) ||
3551  nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3552  ip_vs_conn_tab_size))
3553  goto nla_put_failure;
3554  break;
3555  }
3556 
3557  genlmsg_end(msg, reply);
3558  ret = genlmsg_reply(msg, info);
3559  goto out;
3560 
3561 nla_put_failure:
3562  pr_err("not enough space in Netlink message\n");
3563  ret = -EMSGSIZE;
3564 
3565 out_err:
3566  nlmsg_free(msg);
3567 out:
3568  mutex_unlock(&__ip_vs_mutex);
3569 
3570  return ret;
3571 }
3572 
3573 
3574 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3575  {
3576  .cmd = IPVS_CMD_NEW_SERVICE,
3577  .flags = GENL_ADMIN_PERM,
3578  .policy = ip_vs_cmd_policy,
3579  .doit = ip_vs_genl_set_cmd,
3580  },
3581  {
3582  .cmd = IPVS_CMD_SET_SERVICE,
3583  .flags = GENL_ADMIN_PERM,
3584  .policy = ip_vs_cmd_policy,
3585  .doit = ip_vs_genl_set_cmd,
3586  },
3587  {
3588  .cmd = IPVS_CMD_DEL_SERVICE,
3589  .flags = GENL_ADMIN_PERM,
3590  .policy = ip_vs_cmd_policy,
3591  .doit = ip_vs_genl_set_cmd,
3592  },
3593  {
3594  .cmd = IPVS_CMD_GET_SERVICE,
3595  .flags = GENL_ADMIN_PERM,
3596  .doit = ip_vs_genl_get_cmd,
3597  .dumpit = ip_vs_genl_dump_services,
3598  .policy = ip_vs_cmd_policy,
3599  },
3600  {
3601  .cmd = IPVS_CMD_NEW_DEST,
3602  .flags = GENL_ADMIN_PERM,
3603  .policy = ip_vs_cmd_policy,
3604  .doit = ip_vs_genl_set_cmd,
3605  },
3606  {
3607  .cmd = IPVS_CMD_SET_DEST,
3608  .flags = GENL_ADMIN_PERM,
3609  .policy = ip_vs_cmd_policy,
3610  .doit = ip_vs_genl_set_cmd,
3611  },
3612  {
3613  .cmd = IPVS_CMD_DEL_DEST,
3614  .flags = GENL_ADMIN_PERM,
3615  .policy = ip_vs_cmd_policy,
3616  .doit = ip_vs_genl_set_cmd,
3617  },
3618  {
3619  .cmd = IPVS_CMD_GET_DEST,
3620  .flags = GENL_ADMIN_PERM,
3621  .policy = ip_vs_cmd_policy,
3622  .dumpit = ip_vs_genl_dump_dests,
3623  },
3624  {
3625  .cmd = IPVS_CMD_NEW_DAEMON,
3626  .flags = GENL_ADMIN_PERM,
3627  .policy = ip_vs_cmd_policy,
3628  .doit = ip_vs_genl_set_daemon,
3629  },
3630  {
3631  .cmd = IPVS_CMD_DEL_DAEMON,
3632  .flags = GENL_ADMIN_PERM,
3633  .policy = ip_vs_cmd_policy,
3634  .doit = ip_vs_genl_set_daemon,
3635  },
3636  {
3637  .cmd = IPVS_CMD_GET_DAEMON,
3638  .flags = GENL_ADMIN_PERM,
3639  .dumpit = ip_vs_genl_dump_daemons,
3640  },
3641  {
3642  .cmd = IPVS_CMD_SET_CONFIG,
3643  .flags = GENL_ADMIN_PERM,
3644  .policy = ip_vs_cmd_policy,
3645  .doit = ip_vs_genl_set_cmd,
3646  },
3647  {
3648  .cmd = IPVS_CMD_GET_CONFIG,
3649  .flags = GENL_ADMIN_PERM,
3650  .doit = ip_vs_genl_get_cmd,
3651  },
3652  {
3653  .cmd = IPVS_CMD_GET_INFO,
3654  .flags = GENL_ADMIN_PERM,
3655  .doit = ip_vs_genl_get_cmd,
3656  },
3657  {
3658  .cmd = IPVS_CMD_ZERO,
3659  .flags = GENL_ADMIN_PERM,
3660  .policy = ip_vs_cmd_policy,
3661  .doit = ip_vs_genl_set_cmd,
3662  },
3663  {
3664  .cmd = IPVS_CMD_FLUSH,
3665  .flags = GENL_ADMIN_PERM,
3666  .doit = ip_vs_genl_set_cmd,
3667  },
3668 };
3669 
3670 static int __init ip_vs_genl_register(void)
3671 {
3672  return genl_register_family_with_ops(&ip_vs_genl_family,
3673  ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3674 }
3675 
3676 static void ip_vs_genl_unregister(void)
3677 {
3678  genl_unregister_family(&ip_vs_genl_family);
3679 }
3680 
3681 /* End of Generic Netlink interface definitions */
3682 
3683 /*
3684  * per netns intit/exit func.
3685  */
3686 #ifdef CONFIG_SYSCTL
3687 static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
3688 {
3689  int idx;
3690  struct netns_ipvs *ipvs = net_ipvs(net);
3691  struct ctl_table *tbl;
3692 
3693  atomic_set(&ipvs->dropentry, 0);
3694  spin_lock_init(&ipvs->dropentry_lock);
3695  spin_lock_init(&ipvs->droppacket_lock);
3696  spin_lock_init(&ipvs->securetcp_lock);
3697 
3698  if (!net_eq(net, &init_net)) {
3699  tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
3700  if (tbl == NULL)
3701  return -ENOMEM;
3702  } else
3703  tbl = vs_vars;
3704  /* Initialize sysctl defaults */
3705  idx = 0;
3706  ipvs->sysctl_amemthresh = 1024;
3707  tbl[idx++].data = &ipvs->sysctl_amemthresh;
3708  ipvs->sysctl_am_droprate = 10;
3709  tbl[idx++].data = &ipvs->sysctl_am_droprate;
3710  tbl[idx++].data = &ipvs->sysctl_drop_entry;
3711  tbl[idx++].data = &ipvs->sysctl_drop_packet;
3712 #ifdef CONFIG_IP_VS_NFCT
3713  tbl[idx++].data = &ipvs->sysctl_conntrack;
3714 #endif
3715  tbl[idx++].data = &ipvs->sysctl_secure_tcp;
3716  ipvs->sysctl_snat_reroute = 1;
3717  tbl[idx++].data = &ipvs->sysctl_snat_reroute;
3718  ipvs->sysctl_sync_ver = 1;
3719  tbl[idx++].data = &ipvs->sysctl_sync_ver;
3720  ipvs->sysctl_sync_ports = 1;
3721  tbl[idx++].data = &ipvs->sysctl_sync_ports;
3723  tbl[idx++].data = &ipvs->sysctl_sync_qlen_max;
3724  ipvs->sysctl_sync_sock_size = 0;
3725  tbl[idx++].data = &ipvs->sysctl_sync_sock_size;
3726  tbl[idx++].data = &ipvs->sysctl_cache_bypass;
3727  tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
3728  tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
3731  tbl[idx].data = &ipvs->sysctl_sync_threshold;
3732  tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
3734  tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
3735  ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3);
3736  tbl[idx++].data = &ipvs->sysctl_sync_retries;
3737  tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
3738  ipvs->sysctl_pmtu_disc = 1;
3739  tbl[idx++].data = &ipvs->sysctl_pmtu_disc;
3740 
3741 
3742  ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
3743  if (ipvs->sysctl_hdr == NULL) {
3744  if (!net_eq(net, &init_net))
3745  kfree(tbl);
3746  return -ENOMEM;
3747  }
3748  ip_vs_start_estimator(net, &ipvs->tot_stats);
3749  ipvs->sysctl_tbl = tbl;
3750  /* Schedule defense work */
3751  INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
3752  schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
3753 
3754  return 0;
3755 }
3756 
3757 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
3758 {
3759  struct netns_ipvs *ipvs = net_ipvs(net);
3760 
3761  cancel_delayed_work_sync(&ipvs->defense_work);
3762  cancel_work_sync(&ipvs->defense_work.work);
3763  unregister_net_sysctl_table(ipvs->sysctl_hdr);
3764 }
3765 
3766 #else
3767 
3768 static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
3769 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
3770 
3771 #endif
3772 
3773 static struct notifier_block ip_vs_dst_notifier = {
3774  .notifier_call = ip_vs_dst_event,
3775 };
3776 
3778 {
3779  int idx;
3780  struct netns_ipvs *ipvs = net_ipvs(net);
3781 
3782  rwlock_init(&ipvs->rs_lock);
3783 
3784  /* Initialize rs_table */
3785  for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
3786  INIT_LIST_HEAD(&ipvs->rs_table[idx]);
3787 
3788  INIT_LIST_HEAD(&ipvs->dest_trash);
3789  atomic_set(&ipvs->ftpsvc_counter, 0);
3790  atomic_set(&ipvs->nullsvc_counter, 0);
3791 
3792  /* procfs stats */
3793  ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
3794  if (!ipvs->tot_stats.cpustats)
3795  return -ENOMEM;
3796 
3797  spin_lock_init(&ipvs->tot_stats.lock);
3798 
3799  proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
3800  proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
3801  proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
3802  &ip_vs_stats_percpu_fops);
3803 
3804  if (ip_vs_control_net_init_sysctl(net))
3805  goto err;
3806 
3807  return 0;
3808 
3809 err:
3810  free_percpu(ipvs->tot_stats.cpustats);
3811  return -ENOMEM;
3812 }
3813 
3815 {
3816  struct netns_ipvs *ipvs = net_ipvs(net);
3817 
3818  ip_vs_trash_cleanup(net);
3819  ip_vs_stop_estimator(net, &ipvs->tot_stats);
3820  ip_vs_control_net_cleanup_sysctl(net);
3821  proc_net_remove(net, "ip_vs_stats_percpu");
3822  proc_net_remove(net, "ip_vs_stats");
3823  proc_net_remove(net, "ip_vs");
3824  free_percpu(ipvs->tot_stats.cpustats);
3825 }
3826 
3828 {
3829  int ret;
3830 
3831  ret = nf_register_sockopt(&ip_vs_sockopts);
3832  if (ret) {
3833  pr_err("cannot register sockopt.\n");
3834  goto err_sock;
3835  }
3836 
3837  ret = ip_vs_genl_register();
3838  if (ret) {
3839  pr_err("cannot register Generic Netlink interface.\n");
3840  goto err_genl;
3841  }
3842  return 0;
3843 
3844 err_genl:
3845  nf_unregister_sockopt(&ip_vs_sockopts);
3846 err_sock:
3847  return ret;
3848 }
3849 
3851 {
3852  ip_vs_genl_unregister();
3853  nf_unregister_sockopt(&ip_vs_sockopts);
3854 }
3855 
3857 {
3858  int idx;
3859  int ret;
3860 
3861  EnterFunction(2);
3862 
3863  /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
3864  for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3865  INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3866  INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3867  }
3868 
3869  smp_wmb(); /* Do we really need it now ? */
3870 
3871  ret = register_netdevice_notifier(&ip_vs_dst_notifier);
3872  if (ret < 0)
3873  return ret;
3874 
3875  LeaveFunction(2);
3876  return 0;
3877 }
3878 
3879 
3881 {
3882  EnterFunction(2);
3883  unregister_netdevice_notifier(&ip_vs_dst_notifier);
3884  LeaveFunction(2);
3885 }