14 #include <linux/module.h>
20 #include <linux/slab.h>
22 #include <linux/net.h>
24 #include <linux/if_arp.h>
25 #include <linux/if_tun.h>
27 #include <linux/if_vlan.h>
33 static int experimental_zcopytx;
39 #define VHOST_NET_WEIGHT 0x80000
42 #define VHOST_MAX_PEND 128
43 #define VHOST_GOODCOPY_LEN 256
67 static bool vhost_sock_zcopy(
struct socket *
sock)
69 return unlikely(experimental_zcopytx) &&
75 size_t len,
int iov_count)
80 while (len && seg < iov_count) {
94 static void copy_iovec_hdr(
const struct iovec *from,
struct iovec *to,
95 size_t len,
int iovcount)
100 while (len && seg < iovcount) {
131 static void handle_tx(
struct vhost_net *net)
157 if (wmem >= sock->
sk->sk_sndbuf) {
159 tx_poll_start(net, sock);
167 if (wmem < sock->
sk->sk_sndbuf / 2)
185 if (head == vq->
num) {
189 if (wmem >= sock->
sk->sk_sndbuf * 3 / 4) {
190 tx_poll_start(net, sock);
201 tx_poll_start(net, sock);
212 vq_err(vq,
"Unexpected descriptor format for TX: "
213 "out %d, int %d\n", out, in);
217 s = move_iovec_hdr(vq->
iov, vq->
hdr, hdr_size, out);
219 len = iov_length(vq->
iov, out);
222 vq_err(vq,
"Unexpected header len for TX: "
223 "%zd expected %zd\n",
224 iov_length(vq->
hdr, s), hdr_size);
247 kref_get(&ubufs->kref);
252 err = sock->
ops->sendmsg(
NULL, sock, &msg, len);
262 tx_poll_start(net, sock);
267 " len %d != %zd\n", err, len);
282 static int peek_head_len(
struct sock *
sk)
318 unsigned int out,
in;
324 while (datalen > 0 && headcount < quota) {
337 vq_err(vq,
"unexpected descriptor format for RX: "
338 "out %d, in %d\n", out, in);
346 heads[headcount].
id =
d;
347 heads[headcount].
len = iov_length(vq->
iov + seg, in);
348 datalen -= heads[headcount].
len;
364 static void handle_rx(
struct vhost_net *net)
381 size_t total_len = 0;
384 size_t vhost_hlen, sock_hlen;
385 size_t vhost_len, sock_len;
401 while ((sock_len = peek_head_len(sock->
sk))) {
402 sock_len += sock_hlen;
403 vhost_len = sock_len + vhost_hlen;
404 headcount = get_rx_bufs(vq, vq->
heads, vhost_len,
425 move_iovec_hdr(vq->
iov, vq->
hdr, vhost_hlen, in);
429 copy_iovec_hdr(vq->
iov, vq->
hdr, sock_hlen, in);
431 err = sock->
ops->recvmsg(
NULL, sock, &msg,
438 " len %d, expected %zd\n", err, sock_len);
445 vq_err(vq,
"Unable to write vnet_hdr at addr %p\n",
454 vq_err(vq,
"Failed num_buffers write");
462 total_len += vhost_len;
531 static void vhost_net_disable_vq(
struct vhost_net *n,
543 static void vhost_net_enable_vq(
struct vhost_net *n,
549 lockdep_is_held(&vq->
mutex));
554 tx_poll_start(n, sock);
566 lockdep_is_held(&vq->
mutex));
567 vhost_net_disable_vq(n, vq);
573 static void vhost_net_stop(
struct vhost_net *n,
struct socket **tx_sock,
586 static void vhost_net_flush(
struct vhost_net *n)
592 static int vhost_net_release(
struct inode *inode,
struct file *f)
598 vhost_net_stop(n, &tx_sock, &rx_sock);
612 static struct socket *get_raw_socket(
int fd)
618 int uaddr_len =
sizeof uaddr,
r;
645 static struct socket *get_tap_socket(
int fd)
651 return ERR_PTR(-
EBADF);
661 static struct socket *get_socket(
int fd)
668 sock = get_raw_socket(fd);
671 sock = get_tap_socket(fd);
677 static long vhost_net_set_backend(
struct vhost_net *n,
unsigned index,
int fd)
701 sock = get_socket(fd);
709 lockdep_is_held(&vq->
mutex));
710 if (sock != oldsock) {
716 oldubufs = vq->
ubufs;
718 vhost_net_disable_vq(n, vq);
720 vhost_net_enable_vq(n, vq);
737 vhost_net_flush_vq(n, index);
753 static long vhost_net_reset_owner(
struct vhost_net *n)
763 vhost_net_stop(n, &tx_sock, &rx_sock);
777 size_t vhost_hlen, sock_hlen,
hdr_len;
782 sizeof(struct virtio_net_hdr);
783 if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {
785 vhost_hlen = hdr_len;
792 mutex_lock(&n->dev.mutex);
793 if ((features & (1 << VHOST_F_LOG_ALL)) &&
794 !vhost_log_access_ok(&n->dev)) {
795 mutex_unlock(&n->dev.mutex);
798 n->dev.acked_features = features;
800 for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
801 mutex_lock(&n->vqs[i].mutex);
802 n->vqs[i].vhost_hlen = vhost_hlen;
803 n->vqs[i].sock_hlen = sock_hlen;
811 static
long vhost_net_ioctl(struct file *f, unsigned int ioctl,
825 return vhost_net_set_backend(n, backend.index, backend.fd);
836 return vhost_net_set_features(n, features);
838 return vhost_net_reset_owner(n);
849 static long vhost_net_compat_ioctl(
struct file *f,
unsigned int ioctl,
852 return vhost_net_ioctl(f, ioctl, (
unsigned long)compat_ptr(arg));
858 .release = vhost_net_release,
859 .unlocked_ioctl = vhost_net_ioctl,
861 .compat_ioctl = vhost_net_compat_ioctl,
863 .open = vhost_net_open,
870 .fops = &vhost_net_fops,
873 static int vhost_net_init(
void)
875 if (experimental_zcopytx)
881 static void vhost_net_exit(
void)