45 #include <asm/uaccess.h>
46 #include <linux/module.h>
47 #include <linux/types.h>
48 #include <linux/kernel.h>
50 #include <linux/string.h>
51 #include <linux/errno.h>
53 #include <linux/slab.h>
55 #include <linux/socket.h>
59 #include <linux/netdevice.h>
62 #include <linux/stat.h>
76 #include <linux/igmp.h>
77 #include <linux/netfilter_ipv4.h>
78 #include <linux/netfilter_bridge.h>
79 #include <linux/mroute.h>
80 #include <linux/netlink.h>
81 #include <linux/tcp.h>
96 struct iphdr *iph = ip_hdr(skb);
101 skb_dst(skb)->
dev, dst_output);
110 err = dst_output(skb);
121 ttl = ip4_dst_hoplimit(dst);
133 struct rtable *rt = skb_rtable(skb);
138 skb_reset_network_header(skb);
143 if (ip_dont_fragment(sk, &rt->
dst))
147 iph->
ttl = ip_select_ttl(inet, &rt->
dst);
151 ip_select_ident(iph, &rt->
dst, sk);
153 if (opt && opt->
opt.optlen) {
154 iph->ihl += opt->
opt.optlen>>2;
166 static inline int ip_finish_output2(
struct sk_buff *
skb)
190 skb_set_owner_w(skb2, skb->
sk);
196 nexthop = (
__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
197 neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
200 if (!IS_ERR(neigh)) {
201 int res = dst_neigh_output(dst, neigh, skb);
203 rcu_read_unlock_bh();
206 rcu_read_unlock_bh();
214 static inline int ip_skb_dst_mtu(
struct sk_buff *skb)
219 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
222 static int ip_finish_output(
struct sk_buff *skb)
224 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
226 if (skb_dst(skb)->xfrm !=
NULL) {
228 return dst_output(skb);
231 if (skb->
len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
234 return ip_finish_output2(skb);
240 struct rtable *rt = skb_rtable(skb);
257 #ifdef CONFIG_IP_MROUTE
280 if (ip_hdr(skb)->ttl == 0) {
294 skb->
dev, ip_finish_output,
318 static void ip_copy_addrs(
struct iphdr *iph,
const struct flowi4 *fl4)
342 rt = skb_rtable(skb);
352 daddr = inet->inet_daddr;
353 if (inet_opt && inet_opt->
opt.srr)
354 daddr = inet_opt->
opt.faddr;
360 rt = ip_route_output_ports(sock_net(sk), fl4, sk,
366 sk->sk_bound_dev_if);
378 skb_push(skb,
sizeof(
struct iphdr) + (inet_opt ? inet_opt->
opt.optlen : 0));
379 skb_reset_network_header(skb);
381 *((
__be16 *)iph) =
htons((4 << 12) | (5 << 8) | (inet->
tos & 0xff));
382 if (ip_dont_fragment(sk, &rt->
dst) && !skb->
local_df)
386 iph->
ttl = ip_select_ttl(inet, &rt->
dst);
388 ip_copy_addrs(iph, fl4);
392 if (inet_opt && inet_opt->
opt.optlen) {
393 iph->ihl += inet_opt->
opt.optlen >> 2;
397 ip_select_ident_more(iph, &rt->
dst, sk,
398 (skb_shinfo(skb)->gso_segs ?: 1) - 1);
422 skb_dst_copy(to, from);
427 IPCB(to)->flags =
IPCB(from)->flags;
429 #ifdef CONFIG_NET_SCHED
430 to->tc_index = from->tc_index;
433 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
434 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
437 #if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
440 skb_copy_secmark(to, from);
456 unsigned int mtu, hlen,
left,
len, ll_rs;
459 struct rtable *rt = skb_rtable(skb);
471 (
IPCB(skb)->frag_max_size &&
472 IPCB(skb)->frag_max_size > dst_mtu(&rt->
dst)))) {
475 htonl(ip_skb_dst_mtu(skb)));
485 mtu = dst_mtu(&rt->
dst) - hlen;
486 #ifdef CONFIG_BRIDGE_NETFILTER
488 mtu -= nf_bridge_mtu_reduction(skb);
499 if (skb_has_frag_list(skb)) {
501 int first_len = skb_pagelen(skb);
503 if (first_len - hlen > mtu ||
504 ((first_len - hlen) & 7) ||
505 ip_is_fragment(iph) ||
509 skb_walk_frags(skb, frag) {
511 if (frag->
len > mtu ||
512 ((frag->
len & 7) && frag->
next) ||
513 skb_headroom(frag) < hlen)
514 goto slow_path_clean;
517 if (skb_shared(frag))
518 goto slow_path_clean;
532 frag = skb_shinfo(skb)->frag_list;
533 skb_frag_list_init(skb);
534 skb->
data_len = first_len - skb_headlen(skb);
535 skb->
len = first_len;
545 skb_reset_transport_header(frag);
546 __skb_push(frag, hlen);
547 skb_reset_network_header(frag);
548 memcpy(skb_network_header(frag), iph, hlen);
551 ip_copy_metadata(frag, skb);
554 offset += skb->
len - hlen;
588 skb_walk_frags(skb, frag2) {
598 left = skb->
len - hlen;
641 ip_copy_metadata(skb2, skb);
642 skb_reserve(skb2, ll_rs);
644 skb_reset_network_header(skb2);
653 skb_set_owner_w(skb2, skb->
sk);
659 skb_copy_from_linear_data(skb, skb_network_header(skb2), hlen);
664 if (
skb_copy_bits(skb, ptr, skb_transport_header(skb2), len))
687 if (left > 0 || not_last_frag)
728 skb->
csum = csum_block_add(skb->
csum, csum, odd);
745 static inline int ip_ufo_append_data(
struct sock *
sk,
747 int getfrag(
void *
from,
char *to,
int offset,
int len,
749 void *from,
int length,
int hh_len,
int fragheaderlen,
750 int transhdrlen,
int maxfraglen,
unsigned int flags)
759 if ((skb = skb_peek_tail(queue)) ==
NULL) {
761 hh_len + fragheaderlen + transhdrlen + 20,
768 skb_reserve(skb, hh_len);
771 skb_put(skb, fragheaderlen + transhdrlen);
774 skb_reset_network_header(skb);
783 skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen;
785 __skb_queue_tail(queue, skb);
789 (length - transhdrlen));
792 static int __ip_append_data(
struct sock *sk,
797 int getfrag(
void *from,
char *to,
int offset,
798 int len,
int odd,
struct sk_buff *skb),
799 void *from,
int length,
int transhdrlen,
812 unsigned int maxfraglen, fragheaderlen;
816 skb = skb_peek_tail(queue);
818 exthdrlen = !skb ? rt->
dst.header_len : 0;
823 fragheaderlen =
sizeof(
struct iphdr) + (opt ? opt->optlen : 0);
824 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
826 if (cork->length + length > 0xFFFF - fragheaderlen) {
837 length + fragheaderlen <= mtu &&
843 if (((length > mtu) || (skb && skb_is_gso(skb))) &&
846 err = ip_ufo_append_data(sk, queue, getfrag, from, length,
847 hh_len, fragheaderlen, transhdrlen,
866 copy = mtu - skb->
len;
868 copy = maxfraglen - skb->
len;
872 unsigned int fraglen;
873 unsigned int fraggap;
874 unsigned int alloclen;
879 fraggap = skb_prev->
len - maxfraglen;
887 datalen = length + fraggap;
888 if (datalen > mtu - fragheaderlen)
889 datalen = maxfraglen - fragheaderlen;
890 fraglen = datalen + fragheaderlen;
898 alloclen += exthdrlen;
905 if (datalen == length + fraggap)
906 alloclen += rt->
dst.trailer_len;
910 alloclen + hh_len + 15,
917 alloclen + hh_len + 15, 1,
934 skb_reserve(skb, hh_len);
935 skb_shinfo(skb)->tx_flags = cork->
tx_flags;
940 data =
skb_put(skb, fraglen + exthdrlen);
941 skb_set_network_header(skb, exthdrlen);
944 data += fragheaderlen + exthdrlen;
948 skb_prev, maxfraglen,
949 data + transhdrlen, fraggap, 0);
950 skb_prev->
csum = csum_sub(skb_prev->
csum,
953 pskb_trim_unique(skb_prev, maxfraglen);
956 copy = datalen - transhdrlen - fraggap;
957 if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
964 length -= datalen - fraggap;
972 __skb_queue_tail(queue, skb);
983 if (getfrag(from,
skb_put(skb, copy),
984 offset, copy, off, skb) < 0) {
985 __skb_trim(skb, off);
990 int i = skb_shinfo(skb)->nr_frags;
996 if (!skb_can_coalesce(skb, i, pfrag->
page,
1002 __skb_fill_page_desc(skb, i, pfrag->
page,
1004 skb_shinfo(skb)->nr_frags = ++
i;
1005 get_page(pfrag->
page);
1010 offset, copy, skb->
len, skb) < 0)
1014 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1034 static int ip_setup_cork(
struct sock *sk,
struct inet_cork *cork,
1064 rt->
dst.dev->mtu : dst_mtu(&rt->
dst);
1084 int getfrag(
void *from,
char *to,
int offset,
int len,
1085 int odd,
struct sk_buff *skb),
1086 void *from,
int length,
int transhdrlen,
1097 err = ip_setup_cork(sk, &inet->
cork.base, ipc, rtp);
1105 sk_page_frag(sk), getfrag,
1106 from, length, transhdrlen, flags);
1110 int offset,
size_t size,
int flags)
1121 unsigned int maxfraglen, fragheaderlen, fraggap;
1132 cork = &inet->
cork.base;
1143 fragheaderlen =
sizeof(
struct iphdr) + (opt ? opt->optlen : 0);
1144 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
1146 if (cork->length + size > 0xFFFF - fragheaderlen) {
1155 if ((size + skb->
len > mtu) &&
1158 skb_shinfo(skb)->gso_size = mtu - fragheaderlen;
1166 if (skb_is_gso(skb))
1171 len = mtu - skb->
len;
1173 len = maxfraglen - skb->
len;
1180 fraggap = skb_prev->
len - maxfraglen;
1182 alloclen = fragheaderlen + hh_len + fraggap + 15;
1194 skb_reserve(skb, hh_len);
1199 skb_put(skb, fragheaderlen + fraggap);
1200 skb_reset_network_header(skb);
1206 skb_transport_header(skb),
1208 skb_prev->
csum = csum_sub(skb_prev->
csum,
1210 pskb_trim_unique(skb_prev, maxfraglen);
1220 i = skb_shinfo(skb)->nr_frags;
1223 if (skb_can_coalesce(skb, i, page, offset)) {
1224 skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len);
1227 skb_fill_page_desc(skb, i, page, offset, len);
1235 csum = csum_page(page, offset, len);
1236 skb->
csum = csum_block_add(skb->
csum, csum, skb->
len);
1254 static void ip_cork_release(
struct inet_cork *cork)
1275 struct net *
net = sock_net(sk);
1282 if ((skb = __skb_dequeue(queue)) ==
NULL)
1284 tail_skb = &(skb_shinfo(skb)->frag_list);
1287 if (skb->
data < skb_network_header(skb))
1288 __skb_pull(skb, skb_network_offset(skb));
1289 while ((tmp_skb = __skb_dequeue(queue)) !=
NULL) {
1290 __skb_pull(tmp_skb, skb_network_header_len(skb));
1291 *tail_skb = tmp_skb;
1292 tail_skb = &(tmp_skb->
next);
1293 skb->
len += tmp_skb->
len;
1311 (skb->
len <= dst_mtu(&rt->
dst) &&
1312 ip_dont_fragment(sk, &rt->
dst)))
1321 ttl = ip_select_ttl(inet, &rt->
dst);
1330 ip_copy_addrs(iph, fl4);
1331 ip_select_ident(iph, &rt->
dst, sk);
1334 iph->ihl += opt->
optlen>>2;
1345 skb_dst_set(skb, &rt->
dst);
1349 skb_transport_header(skb))->
type);
1351 ip_cork_release(cork);
1375 skb = ip_finish_skb(sk, fl4);
1386 static void __ip_flush_pending_frames(
struct sock *sk,
1392 while ((skb = __skb_dequeue_tail(queue)) !=
NULL)
1395 ip_cork_release(cork);
1400 __ip_flush_pending_frames(sk, &sk->
sk_write_queue, &inet_sk(sk)->cork.base);
1405 int getfrag(
void *from,
char *to,
int offset,
1406 int len,
int odd,
struct sk_buff *skb),
1407 void *from,
int length,
int transhdrlen,
1418 __skb_queue_head_init(&queue);
1423 err = ip_setup_cork(sk, &cork, ipc, rtp);
1425 return ERR_PTR(err);
1427 err = __ip_append_data(sk, fl4, &queue, &cork,
1429 from, length, transhdrlen, flags);
1431 __ip_flush_pending_frames(sk, &queue, &cork);
1432 return ERR_PTR(err);
1441 static int ip_reply_glue_bits(
void *dptr,
char *to,
int offset,
1442 int len,
int odd,
struct sk_buff *skb)
1447 skb->
csum = csum_block_add(skb->
csum, csum, odd);
1477 struct rtable *rt = skb_rtable(skb);
1489 if (replyopts.
opt.opt.optlen) {
1490 ipc.
opt = &replyopts.
opt;
1492 if (replyopts.
opt.opt.srr)
1493 daddr = replyopts.
opt.opt.faddr;
1499 ip_reply_arg_flowi_flags(arg),
1502 security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
1503 rt = ip_route_output_key(net, &fl4);
1514 sock_net_set(sk, net);
1522 *((
__sum16 *)skb_transport_header(nskb) +
1527 skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
1541 #if defined(CONFIG_IP_MULTICAST) && defined(CONFIG_PROC_FS)