Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
ip_vs_app.c
Go to the documentation of this file.
1 /*
2  * ip_vs_app.c: Application module support for IPVS
3  *
4  * Authors: Wensong Zhang <[email protected]>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  *
11  * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
12  * is that ip_vs_app module handles the reverse direction (incoming requests
13  * and outgoing responses).
14  *
15  * IP_MASQ_APP application masquerading module
16  *
17  * Author: Juan Jose Ciarlante, <[email protected]>
18  *
19  */
20 
21 #define KMSG_COMPONENT "IPVS"
22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23 
24 #include <linux/module.h>
25 #include <linux/kernel.h>
26 #include <linux/skbuff.h>
27 #include <linux/in.h>
28 #include <linux/ip.h>
29 #include <linux/netfilter.h>
30 #include <linux/slab.h>
31 #include <net/net_namespace.h>
32 #include <net/protocol.h>
33 #include <net/tcp.h>
34 #include <linux/stat.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/mutex.h>
38 
39 #include <net/ip_vs.h>
40 
44 
45 static DEFINE_MUTEX(__ip_vs_app_mutex);
46 
47 /*
48  * Get an ip_vs_app object
49  */
50 static inline int ip_vs_app_get(struct ip_vs_app *app)
51 {
52  return try_module_get(app->module);
53 }
54 
55 
56 static inline void ip_vs_app_put(struct ip_vs_app *app)
57 {
58  module_put(app->module);
59 }
60 
61 
62 /*
63  * Allocate/initialize app incarnation and register it in proto apps.
64  */
65 static int
66 ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
67  __u16 port)
68 {
69  struct ip_vs_protocol *pp;
70  struct ip_vs_app *inc;
71  int ret;
72 
73  if (!(pp = ip_vs_proto_get(proto)))
74  return -EPROTONOSUPPORT;
75 
76  if (!pp->unregister_app)
77  return -EOPNOTSUPP;
78 
79  inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
80  if (!inc)
81  return -ENOMEM;
82  INIT_LIST_HEAD(&inc->p_list);
83  INIT_LIST_HEAD(&inc->incs_list);
84  inc->app = app;
85  inc->port = htons(port);
86  atomic_set(&inc->usecnt, 0);
87 
88  if (app->timeouts) {
89  inc->timeout_table =
91  app->timeouts_size);
92  if (!inc->timeout_table) {
93  ret = -ENOMEM;
94  goto out;
95  }
96  }
97 
98  ret = pp->register_app(net, inc);
99  if (ret)
100  goto out;
101 
102  list_add(&inc->a_list, &app->incs_list);
103  IP_VS_DBG(9, "%s App %s:%u registered\n",
104  pp->name, inc->name, ntohs(inc->port));
105 
106  return 0;
107 
108  out:
109  kfree(inc->timeout_table);
110  kfree(inc);
111  return ret;
112 }
113 
114 
115 /*
116  * Release app incarnation
117  */
118 static void
119 ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
120 {
121  struct ip_vs_protocol *pp;
122 
123  if (!(pp = ip_vs_proto_get(inc->protocol)))
124  return;
125 
126  if (pp->unregister_app)
127  pp->unregister_app(net, inc);
128 
129  IP_VS_DBG(9, "%s App %s:%u unregistered\n",
130  pp->name, inc->name, ntohs(inc->port));
131 
132  list_del(&inc->a_list);
133 
134  kfree(inc->timeout_table);
135  kfree(inc);
136 }
137 
138 
139 /*
140  * Get reference to app inc (only called from softirq)
141  *
142  */
143 int ip_vs_app_inc_get(struct ip_vs_app *inc)
144 {
145  int result;
146 
147  atomic_inc(&inc->usecnt);
148  if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
149  atomic_dec(&inc->usecnt);
150  return result;
151 }
152 
153 
154 /*
155  * Put the app inc (only called from timer or net softirq)
156  */
157 void ip_vs_app_inc_put(struct ip_vs_app *inc)
158 {
159  ip_vs_app_put(inc->app);
160  atomic_dec(&inc->usecnt);
161 }
162 
163 
164 /*
165  * Register an application incarnation in protocol applications
166  */
167 int
168 register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
169  __u16 port)
170 {
171  int result;
172 
173  mutex_lock(&__ip_vs_app_mutex);
174 
175  result = ip_vs_app_inc_new(net, app, proto, port);
176 
177  mutex_unlock(&__ip_vs_app_mutex);
178 
179  return result;
180 }
181 
182 
183 /* Register application for netns */
184 struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
185 {
186  struct netns_ipvs *ipvs = net_ipvs(net);
187  struct ip_vs_app *a;
188  int err = 0;
189 
190  if (!ipvs)
191  return ERR_PTR(-ENOENT);
192 
193  mutex_lock(&__ip_vs_app_mutex);
194 
195  list_for_each_entry(a, &ipvs->app_list, a_list) {
196  if (!strcmp(app->name, a->name)) {
197  err = -EEXIST;
198  goto out_unlock;
199  }
200  }
201  a = kmemdup(app, sizeof(*app), GFP_KERNEL);
202  if (!a) {
203  err = -ENOMEM;
204  goto out_unlock;
205  }
206  INIT_LIST_HEAD(&a->incs_list);
207  list_add(&a->a_list, &ipvs->app_list);
208  /* increase the module use count */
210 
211 out_unlock:
212  mutex_unlock(&__ip_vs_app_mutex);
213 
214  return err ? ERR_PTR(err) : a;
215 }
216 
217 
218 /*
219  * ip_vs_app unregistration routine
220  * We are sure there are no app incarnations attached to services
221  */
222 void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
223 {
224  struct netns_ipvs *ipvs = net_ipvs(net);
225  struct ip_vs_app *a, *anxt, *inc, *nxt;
226 
227  if (!ipvs)
228  return;
229 
230  mutex_lock(&__ip_vs_app_mutex);
231 
232  list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
233  if (app && strcmp(app->name, a->name))
234  continue;
235  list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
236  ip_vs_app_inc_release(net, inc);
237  }
238 
239  list_del(&a->a_list);
240  kfree(a);
241 
242  /* decrease the module use count */
244  }
245 
246  mutex_unlock(&__ip_vs_app_mutex);
247 }
248 
249 
250 /*
251  * Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
252  */
254  struct ip_vs_protocol *pp)
255 {
256  return pp->app_conn_bind(cp);
257 }
258 
259 
260 /*
261  * Unbind cp from application incarnation (called by cp destructor)
262  */
264 {
265  struct ip_vs_app *inc = cp->app;
266 
267  if (!inc)
268  return;
269 
270  if (inc->unbind_conn)
271  inc->unbind_conn(inc, cp);
272  if (inc->done_conn)
273  inc->done_conn(inc, cp);
274  ip_vs_app_inc_put(inc);
275  cp->app = NULL;
276 }
277 
278 
279 /*
280  * Fixes th->seq based on ip_vs_seq info.
281  */
282 static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
283 {
284  __u32 seq = ntohl(th->seq);
285 
286  /*
287  * Adjust seq with delta-offset for all packets after
288  * the most recent resized pkt seq and with previous_delta offset
289  * for all packets before most recent resized pkt seq.
290  */
291  if (vseq->delta || vseq->previous_delta) {
292  if(after(seq, vseq->init_seq)) {
293  th->seq = htonl(seq + vseq->delta);
294  IP_VS_DBG(9, "%s(): added delta (%d) to seq\n",
295  __func__, vseq->delta);
296  } else {
297  th->seq = htonl(seq + vseq->previous_delta);
298  IP_VS_DBG(9, "%s(): added previous_delta (%d) to seq\n",
299  __func__, vseq->previous_delta);
300  }
301  }
302 }
303 
304 
305 /*
306  * Fixes th->ack_seq based on ip_vs_seq info.
307  */
308 static inline void
309 vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
310 {
311  __u32 ack_seq = ntohl(th->ack_seq);
312 
313  /*
314  * Adjust ack_seq with delta-offset for
315  * the packets AFTER most recent resized pkt has caused a shift
316  * for packets before most recent resized pkt, use previous_delta
317  */
318  if (vseq->delta || vseq->previous_delta) {
319  /* since ack_seq is the number of octet that is expected
320  to receive next, so compare it with init_seq+delta */
321  if(after(ack_seq, vseq->init_seq+vseq->delta)) {
322  th->ack_seq = htonl(ack_seq - vseq->delta);
323  IP_VS_DBG(9, "%s(): subtracted delta "
324  "(%d) from ack_seq\n", __func__, vseq->delta);
325 
326  } else {
327  th->ack_seq = htonl(ack_seq - vseq->previous_delta);
328  IP_VS_DBG(9, "%s(): subtracted "
329  "previous_delta (%d) from ack_seq\n",
330  __func__, vseq->previous_delta);
331  }
332  }
333 }
334 
335 
336 /*
337  * Updates ip_vs_seq if pkt has been resized
338  * Assumes already checked proto==IPPROTO_TCP and diff!=0.
339  */
340 static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
341  unsigned int flag, __u32 seq, int diff)
342 {
343  /* spinlock is to keep updating cp->flags atomic */
344  spin_lock(&cp->lock);
345  if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
346  vseq->previous_delta = vseq->delta;
347  vseq->delta += diff;
348  vseq->init_seq = seq;
349  cp->flags |= flag;
350  }
351  spin_unlock(&cp->lock);
352 }
353 
354 static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
355  struct ip_vs_app *app)
356 {
357  int diff;
358  const unsigned int tcp_offset = ip_hdrlen(skb);
359  struct tcphdr *th;
360  __u32 seq;
361 
362  if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
363  return 0;
364 
365  th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
366 
367  /*
368  * Remember seq number in case this pkt gets resized
369  */
370  seq = ntohl(th->seq);
371 
372  /*
373  * Fix seq stuff if flagged as so.
374  */
375  if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
376  vs_fix_seq(&cp->out_seq, th);
377  if (cp->flags & IP_VS_CONN_F_IN_SEQ)
378  vs_fix_ack_seq(&cp->in_seq, th);
379 
380  /*
381  * Call private output hook function
382  */
383  if (app->pkt_out == NULL)
384  return 1;
385 
386  if (!app->pkt_out(app, cp, skb, &diff))
387  return 0;
388 
389  /*
390  * Update ip_vs seq stuff if len has changed.
391  */
392  if (diff != 0)
393  vs_seq_update(cp, &cp->out_seq,
394  IP_VS_CONN_F_OUT_SEQ, seq, diff);
395 
396  return 1;
397 }
398 
399 /*
400  * Output pkt hook. Will call bound ip_vs_app specific function
401  * called by ipvs packet handler, assumes previously checked cp!=NULL
402  * returns false if it can't handle packet (oom)
403  */
404 int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
405 {
406  struct ip_vs_app *app;
407 
408  /*
409  * check if application module is bound to
410  * this ip_vs_conn.
411  */
412  if ((app = cp->app) == NULL)
413  return 1;
414 
415  /* TCP is complicated */
416  if (cp->protocol == IPPROTO_TCP)
417  return app_tcp_pkt_out(cp, skb, app);
418 
419  /*
420  * Call private output hook function
421  */
422  if (app->pkt_out == NULL)
423  return 1;
424 
425  return app->pkt_out(app, cp, skb, NULL);
426 }
427 
428 
429 static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
430  struct ip_vs_app *app)
431 {
432  int diff;
433  const unsigned int tcp_offset = ip_hdrlen(skb);
434  struct tcphdr *th;
435  __u32 seq;
436 
437  if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
438  return 0;
439 
440  th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
441 
442  /*
443  * Remember seq number in case this pkt gets resized
444  */
445  seq = ntohl(th->seq);
446 
447  /*
448  * Fix seq stuff if flagged as so.
449  */
450  if (cp->flags & IP_VS_CONN_F_IN_SEQ)
451  vs_fix_seq(&cp->in_seq, th);
452  if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
453  vs_fix_ack_seq(&cp->out_seq, th);
454 
455  /*
456  * Call private input hook function
457  */
458  if (app->pkt_in == NULL)
459  return 1;
460 
461  if (!app->pkt_in(app, cp, skb, &diff))
462  return 0;
463 
464  /*
465  * Update ip_vs seq stuff if len has changed.
466  */
467  if (diff != 0)
468  vs_seq_update(cp, &cp->in_seq,
469  IP_VS_CONN_F_IN_SEQ, seq, diff);
470 
471  return 1;
472 }
473 
474 /*
475  * Input pkt hook. Will call bound ip_vs_app specific function
476  * called by ipvs packet handler, assumes previously checked cp!=NULL.
477  * returns false if can't handle packet (oom).
478  */
479 int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
480 {
481  struct ip_vs_app *app;
482 
483  /*
484  * check if application module is bound to
485  * this ip_vs_conn.
486  */
487  if ((app = cp->app) == NULL)
488  return 1;
489 
490  /* TCP is complicated */
491  if (cp->protocol == IPPROTO_TCP)
492  return app_tcp_pkt_in(cp, skb, app);
493 
494  /*
495  * Call private input hook function
496  */
497  if (app->pkt_in == NULL)
498  return 1;
499 
500  return app->pkt_in(app, cp, skb, NULL);
501 }
502 
503 
504 #ifdef CONFIG_PROC_FS
505 /*
506  * /proc/net/ip_vs_app entry function
507  */
508 
509 static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
510 {
511  struct ip_vs_app *app, *inc;
512 
513  list_for_each_entry(app, &ipvs->app_list, a_list) {
514  list_for_each_entry(inc, &app->incs_list, a_list) {
515  if (pos-- == 0)
516  return inc;
517  }
518  }
519  return NULL;
520 
521 }
522 
523 static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
524 {
525  struct net *net = seq_file_net(seq);
526  struct netns_ipvs *ipvs = net_ipvs(net);
527 
528  mutex_lock(&__ip_vs_app_mutex);
529 
530  return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
531 }
532 
533 static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
534 {
535  struct ip_vs_app *inc, *app;
536  struct list_head *e;
537  struct net *net = seq_file_net(seq);
538  struct netns_ipvs *ipvs = net_ipvs(net);
539 
540  ++*pos;
541  if (v == SEQ_START_TOKEN)
542  return ip_vs_app_idx(ipvs, 0);
543 
544  inc = v;
545  app = inc->app;
546 
547  if ((e = inc->a_list.next) != &app->incs_list)
548  return list_entry(e, struct ip_vs_app, a_list);
549 
550  /* go on to next application */
551  for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
552  app = list_entry(e, struct ip_vs_app, a_list);
553  list_for_each_entry(inc, &app->incs_list, a_list) {
554  return inc;
555  }
556  }
557  return NULL;
558 }
559 
560 static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
561 {
562  mutex_unlock(&__ip_vs_app_mutex);
563 }
564 
565 static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
566 {
567  if (v == SEQ_START_TOKEN)
568  seq_puts(seq, "prot port usecnt name\n");
569  else {
570  const struct ip_vs_app *inc = v;
571 
572  seq_printf(seq, "%-3s %-7u %-6d %-17s\n",
574  ntohs(inc->port),
575  atomic_read(&inc->usecnt),
576  inc->name);
577  }
578  return 0;
579 }
580 
581 static const struct seq_operations ip_vs_app_seq_ops = {
582  .start = ip_vs_app_seq_start,
583  .next = ip_vs_app_seq_next,
584  .stop = ip_vs_app_seq_stop,
585  .show = ip_vs_app_seq_show,
586 };
587 
588 static int ip_vs_app_open(struct inode *inode, struct file *file)
589 {
590  return seq_open_net(inode, file, &ip_vs_app_seq_ops,
591  sizeof(struct seq_net_private));
592 }
593 
594 static const struct file_operations ip_vs_app_fops = {
595  .owner = THIS_MODULE,
596  .open = ip_vs_app_open,
597  .read = seq_read,
598  .llseek = seq_lseek,
599  .release = seq_release_net,
600 };
601 #endif
602 
603 int __net_init ip_vs_app_net_init(struct net *net)
604 {
605  struct netns_ipvs *ipvs = net_ipvs(net);
606 
607  INIT_LIST_HEAD(&ipvs->app_list);
608  proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
609  return 0;
610 }
611 
612 void __net_exit ip_vs_app_net_cleanup(struct net *net)
613 {
614  unregister_ip_vs_app(net, NULL /* all */);
615  proc_net_remove(net, "ip_vs_app");
616 }