22 #include <linux/poll.h>
25 #include <linux/slab.h>
29 #include <linux/net.h>
31 #include <linux/if_arp.h>
42 #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
43 #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
60 if (!((
unsigned long)key & poll->
mask))
69 INIT_LIST_HEAD(&work->
node);
80 init_waitqueue_func_entry(&poll->
wait, vhost_poll_wakeup);
81 init_poll_funcptr(&poll->
table, vhost_poll_func);
94 mask = file->
f_op->poll(file, &poll->
table);
96 vhost_poll_wakeup(&poll->
wait, 0, 0, (
void *)mask);
137 vhost_work_flush(poll->
dev, &poll->
work);
145 if (list_empty(&work->
node)) {
150 spin_unlock_irqrestore(&dev->
work_lock, flags);
158 static void vhost_vq_reset(
struct vhost_dev *dev,
188 static int vhost_worker(
void *
data)
217 list_del_init(&work->
node);
251 vhost_zcopy_mask |= 0x1 << vq;
255 static long vhost_dev_alloc_iovecs(
struct vhost_dev *dev)
260 for (i = 0; i < dev->
nvqs; ++
i) {
267 zcopy = vhost_zcopy_mask & (0x1 <<
i);
269 dev->
vqs[
i].ubuf_info =
272 if (!dev->
vqs[i].indirect || !dev->
vqs[i].log ||
273 !dev->
vqs[i].heads ||
274 (zcopy && !dev->
vqs[i].ubuf_info))
281 vhost_vq_free_iovecs(&dev->
vqs[i]);
285 static void vhost_dev_free_iovecs(
struct vhost_dev *dev)
289 for (i = 0; i < dev->
nvqs; ++
i)
290 vhost_vq_free_iovecs(&dev->
vqs[i]);
309 for (i = 0; i < dev->
nvqs; ++
i) {
316 vhost_vq_reset(dev, dev->
vqs + i);
317 if (dev->
vqs[i].handle_kick)
338 static void vhost_attach_cgroups_work(
struct vhost_work *work)
346 static int vhost_attach_cgroups(
struct vhost_dev *dev)
353 vhost_work_flush(dev, &
attach.work);
358 static long vhost_dev_set_owner(
struct vhost_dev *dev)
372 if (IS_ERR(worker)) {
373 err = PTR_ERR(worker);
380 err = vhost_attach_cgroups(dev);
384 err = vhost_dev_alloc_iovecs(dev);
446 for (i = 0; i < dev->
nvqs; ++
i) {
447 if (dev->
vqs[i].kick && dev->
vqs[i].handle_kick) {
452 if (dev->
vqs[i].ubufs)
458 if (dev->
vqs[i].error_ctx)
460 if (dev->
vqs[i].error)
462 if (dev->
vqs[i].kick)
464 if (dev->
vqs[i].call_ctx)
466 if (dev->
vqs[i].call)
468 vhost_vq_reset(dev, dev->
vqs + i);
470 vhost_dev_free_iovecs(dev);
480 lockdep_is_held(&dev->
mutex)));
492 static int log_access_ok(
void __user *log_base,
u64 addr,
unsigned long sz)
497 if (a >
ULONG_MAX - (
unsigned long)log_base ||
506 static int vq_memory_access_ok(
void __user *log_base,
struct vhost_memory *
mem,
522 else if (log_all && !log_access_ok(log_base,
537 for (i = 0; i < d->
nvqs; ++
i) {
541 if (d->
vqs[i].private_data)
542 ok = vq_memory_access_ok(d->
vqs[i].log_base, mem,
553 static int vq_access_ok(
struct vhost_dev *d,
unsigned int num,
561 sizeof *avail + num *
sizeof *avail->ring + s) &&
563 sizeof *used + num *
sizeof *used->ring + s);
573 lockdep_is_held(&dev->
mutex));
574 return memory_access_ok(dev, mp, 1);
580 void __user *log_base)
586 lockdep_is_held(&vq->
mutex));
587 return vq_memory_access_ok(log_base, mp,
591 vq->
num *
sizeof *vq->
used->ring + s));
617 memcpy(newmem, &mem, size);
619 mem.
nregions *
sizeof *m->regions)) {
624 if (!memory_access_ok(d, newmem,
630 lockdep_is_held(&d->
mutex));
637 static long vhost_set_vring(
struct vhost_dev *d,
int ioctl,
void __user *
argp)
639 struct file *eventfp, *filep =
NULL;
640 bool pollstart =
false, pollstop =
false;
672 if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) {
689 if (s.num > 0xffff) {
714 if ((
u64)(
unsigned long)a.desc_user_addr != a.desc_user_addr ||
715 (
u64)(
unsigned long)a.used_user_addr != a.used_user_addr ||
716 (
u64)(
unsigned long)a.avail_user_addr != a.avail_user_addr) {
720 if ((a.avail_user_addr & (
sizeof *vq->
avail->ring - 1)) ||
721 (a.used_user_addr & (
sizeof *vq->
used->ring - 1)) ||
722 (a.log_guest_addr & (
sizeof *vq->
used->ring - 1))) {
731 if (!vq_access_ok(d, vq->
num,
732 (
void __user *)(
unsigned long)a.desc_user_addr,
733 (
void __user *)(
unsigned long)a.avail_user_addr,
734 (
void __user *)(
unsigned long)a.used_user_addr)) {
741 !log_access_ok(vq->
log_base, a.log_guest_addr,
743 vq->
num *
sizeof *vq->
used->ring)) {
750 vq->
desc = (
void __user *)(
unsigned long)a.desc_user_addr;
751 vq->
avail = (
void __user *)(
unsigned long)a.avail_user_addr;
753 vq->
used = (
void __user *)(
unsigned long)a.used_user_addr;
761 if (IS_ERR(eventfp)) {
762 r = PTR_ERR(eventfp);
765 if (eventfp != vq->
kick) {
766 pollstop = (filep = vq->
kick) !=
NULL;
767 pollstart = (vq->
kick = eventfp) !=
NULL;
777 if (IS_ERR(eventfp)) {
778 r = PTR_ERR(eventfp);
781 if (eventfp != vq->
call) {
796 if (IS_ERR(eventfp)) {
797 r = PTR_ERR(eventfp);
800 if (eventfp != vq->
error) {
835 struct file *eventfp, *filep =
NULL;
843 r = vhost_dev_set_owner(d);
854 r = vhost_set_memory(d, argp);
861 if ((
u64)(
unsigned long)p != p) {
865 for (i = 0; i < d->
nvqs; ++
i) {
883 if (IS_ERR(eventfp)) {
884 r = PTR_ERR(eventfp);
894 for (i = 0; i < d->
nvqs; ++
i) {
905 r = vhost_set_vring(d, ioctl, argp);
933 static int set_bit_to_user(
int nr,
void __user *addr)
935 unsigned long log = (
unsigned long)addr;
953 static int log_write(
void __user *log_base,
954 u64 write_address,
u64 write_length)
963 u64 base = (
u64)(
unsigned long)log_base;
964 u64 log = base + write_page / 8;
965 int bit = write_page % 8;
966 if ((
u64)(
unsigned long)log != log)
968 r = set_bit_to_user(bit, (
void __user *)(
unsigned long)log);
980 unsigned int log_num,
u64 len)
986 for (i = 0; i < log_num; ++
i) {
1012 used = &vq->
used->flags;
1014 (used - (
void __user *)vq->
used),
1015 sizeof vq->
used->flags);
1033 (used - (
void __user *)vq->
used),
1047 r = vhost_update_used_flags(vq);
1055 struct iovec iov[],
int iov_size)
1066 while ((
u64)len > s) {
1072 reg = find_region(mem, addr, len);
1113 struct iovec iov[],
unsigned int iov_size,
1114 unsigned int *out_num,
unsigned int *in_num,
1115 struct vhost_log *log,
unsigned int *log_num,
1119 unsigned int i = 0,
count, found = 0;
1124 vq_err(vq,
"Invalid length in indirect descriptor: "
1125 "len 0x%llx not multiple of 0x%zx\n",
1126 (
unsigned long long)indirect->
len,
1131 ret = translate_desc(dev, indirect->
addr, indirect->
len, vq->
indirect,
1134 vq_err(vq,
"Translation failure %d in indirect.\n", ret);
1142 count = indirect->
len /
sizeof desc;
1146 vq_err(vq,
"Indirect buffer length too big: %d\n",
1152 unsigned iov_count = *in_num + *out_num;
1154 vq_err(vq,
"Loop detected: last one at %u "
1155 "indirect size %u\n",
1161 vq_err(vq,
"Failed indirect descriptor: idx %d, %zx\n",
1162 i, (
size_t)indirect->
addr + i *
sizeof desc);
1166 vq_err(vq,
"Nested indirect descriptor: idx %d, %zx\n",
1167 i, (
size_t)indirect->
addr + i *
sizeof desc);
1171 ret = translate_desc(dev, desc.
addr, desc.
len, iov + iov_count,
1172 iov_size - iov_count);
1174 vq_err(vq,
"Translation failure %d indirect idx %d\n",
1183 log[*log_num].
len = desc.
len;
1190 vq_err(vq,
"Indirect descriptor "
1191 "has out after in: idx %d\n", i);
1209 struct iovec iov[],
unsigned int iov_size,
1210 unsigned int *out_num,
unsigned int *in_num,
1211 struct vhost_log *log,
unsigned int *log_num)
1214 unsigned int i,
head, found = 0;
1221 vq_err(vq,
"Failed to access avail idx at %p\n",
1227 vq_err(vq,
"Guest moved used index from %u to %u",
1242 &vq->
avail->ring[last_avail_idx % vq->
num]))) {
1243 vq_err(vq,
"Failed to read head: idx %d address %p\n",
1245 &vq->
avail->ring[last_avail_idx % vq->
num]);
1251 vq_err(vq,
"Guest says index %u > %u is available",
1257 *out_num = *in_num = 0;
1263 unsigned iov_count = *in_num + *out_num;
1265 vq_err(vq,
"Desc index is %u > %u, head = %u",
1270 vq_err(vq,
"Loop detected: last one at %u "
1271 "vq size %u head %u\n",
1277 vq_err(vq,
"Failed to get descriptor: idx %d addr %p\n",
1282 ret = get_indirect(dev, vq, iov, iov_size,
1284 log, log_num, &desc);
1286 vq_err(vq,
"Failure detected "
1287 "in indirect descriptor at idx %d\n", i);
1293 ret = translate_desc(dev, desc.
addr, desc.
len, iov + iov_count,
1294 iov_size - iov_count);
1296 vq_err(vq,
"Translation failure %d descriptor idx %d\n",
1306 log[*log_num].
len = desc.
len;
1313 vq_err(vq,
"Descriptor has out after in: "
1346 vq_err(vq,
"Failed to write used id");
1350 vq_err(vq,
"Failed to write used len");
1356 vq_err(vq,
"Failed to increment used idx");
1365 ((
void __user *)used - (
void __user *)vq->
used),
1370 sizeof vq->
used->idx);
1395 vq_err(vq,
"Failed to write used");
1404 ((
void __user *)used - (
void __user *)vq->
used),
1405 count *
sizeof *used);
1428 r = __vhost_add_used_n(vq, heads, n);
1434 r = __vhost_add_used_n(vq, heads, count);
1439 vq_err(vq,
"Failed to increment used idx");
1446 sizeof vq->
used->idx);
1469 vq_err(vq,
"Failed to get flags");
1483 vq_err(vq,
"Failed to get used event idx");
1486 return vring_need_event(event,
new, old);
1493 if (vq->
call_ctx && vhost_notify(dev, vq))
1500 unsigned int head,
int len)
1525 r = vhost_update_used_flags(vq);
1527 vq_err(vq,
"Failed to enable notification at %p: %d\n",
1528 &vq->
used->flags, r);
1532 r = vhost_update_avail_event(vq, vq->
avail_idx);
1534 vq_err(vq,
"Failed to update avail event index at %p: %d\n",
1544 vq_err(vq,
"Failed to check avail idx at %p: %d\n",
1545 &vq->
avail->idx, r);
1561 r = vhost_update_used_flags(vq);
1563 vq_err(vq,
"Failed to enable notification at %p: %d\n",
1564 &vq->
used->flags, r);
1568 static void vhost_zerocopy_done_signal(
struct kref *
kref)
1585 kref_init(&ubufs->
kref);
1593 kref_put(&ubufs->
kref, vhost_zerocopy_done_signal);
1598 kref_put(&ubufs->
kref, vhost_zerocopy_done_signal);
1611 kref_put(&ubufs->
kref, vhost_zerocopy_done_signal);