Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vlocation.c
Go to the documentation of this file.
1 /* AFS volume location management
2  *
3  * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4  * Written by David Howells ([email protected])
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version
9  * 2 of the License, or (at your option) any later version.
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/init.h>
16 #include <linux/sched.h>
17 #include "internal.h"
18 
19 static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */
20 static unsigned afs_vlocation_update_timeout = 10 * 60;
21 
22 static void afs_vlocation_reaper(struct work_struct *);
23 static void afs_vlocation_updater(struct work_struct *);
24 
25 static LIST_HEAD(afs_vlocation_updates);
26 static LIST_HEAD(afs_vlocation_graveyard);
27 static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
28 static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
29 static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
30 static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
31 static struct workqueue_struct *afs_vlocation_update_worker;
32 
33 /*
34  * iterate through the VL servers in a cell until one of them admits knowing
35  * about the volume in question
36  */
37 static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
38  struct key *key,
39  struct afs_cache_vlocation *vldb)
40 {
41  struct afs_cell *cell = vl->cell;
42  struct in_addr addr;
43  int count, ret;
44 
45  _enter("%s,%s", cell->name, vl->vldb.name);
46 
47  down_write(&vl->cell->vl_sem);
48  ret = -ENOMEDIUM;
49  for (count = cell->vl_naddrs; count > 0; count--) {
50  addr = cell->vl_addrs[cell->vl_curr_svix];
51 
52  _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
53 
54  /* attempt to access the VL server */
55  ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
56  &afs_sync_call);
57  switch (ret) {
58  case 0:
59  goto out;
60  case -ENOMEM:
61  case -ENONET:
62  case -ENETUNREACH:
63  case -EHOSTUNREACH:
64  case -ECONNREFUSED:
65  if (ret == -ENOMEM || ret == -ENONET)
66  goto out;
67  goto rotate;
68  case -ENOMEDIUM:
69  case -EKEYREJECTED:
70  case -EKEYEXPIRED:
71  goto out;
72  default:
73  ret = -EIO;
74  goto rotate;
75  }
76 
77  /* rotate the server records upon lookup failure */
78  rotate:
79  cell->vl_curr_svix++;
80  cell->vl_curr_svix %= cell->vl_naddrs;
81  }
82 
83 out:
84  up_write(&vl->cell->vl_sem);
85  _leave(" = %d", ret);
86  return ret;
87 }
88 
89 /*
90  * iterate through the VL servers in a cell until one of them admits knowing
91  * about the volume in question
92  */
93 static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
94  struct key *key,
96  afs_voltype_t voltype,
97  struct afs_cache_vlocation *vldb)
98 {
99  struct afs_cell *cell = vl->cell;
100  struct in_addr addr;
101  int count, ret;
102 
103  _enter("%s,%x,%d,", cell->name, volid, voltype);
104 
105  down_write(&vl->cell->vl_sem);
106  ret = -ENOMEDIUM;
107  for (count = cell->vl_naddrs; count > 0; count--) {
108  addr = cell->vl_addrs[cell->vl_curr_svix];
109 
110  _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
111 
112  /* attempt to access the VL server */
113  ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
114  &afs_sync_call);
115  switch (ret) {
116  case 0:
117  goto out;
118  case -ENOMEM:
119  case -ENONET:
120  case -ENETUNREACH:
121  case -EHOSTUNREACH:
122  case -ECONNREFUSED:
123  if (ret == -ENOMEM || ret == -ENONET)
124  goto out;
125  goto rotate;
126  case -EBUSY:
127  vl->upd_busy_cnt++;
128  if (vl->upd_busy_cnt <= 3) {
129  if (vl->upd_busy_cnt > 1) {
130  /* second+ BUSY - sleep a little bit */
132  schedule_timeout(1);
134  }
135  continue;
136  }
137  break;
138  case -ENOMEDIUM:
139  vl->upd_rej_cnt++;
140  goto rotate;
141  default:
142  ret = -EIO;
143  goto rotate;
144  }
145 
146  /* rotate the server records upon lookup failure */
147  rotate:
148  cell->vl_curr_svix++;
149  cell->vl_curr_svix %= cell->vl_naddrs;
150  vl->upd_busy_cnt = 0;
151  }
152 
153 out:
154  if (ret < 0 && vl->upd_rej_cnt > 0) {
155  printk(KERN_NOTICE "kAFS:"
156  " Active volume no longer valid '%s'\n",
157  vl->vldb.name);
158  vl->valid = 0;
159  ret = -ENOMEDIUM;
160  }
161 
162  up_write(&vl->cell->vl_sem);
163  _leave(" = %d", ret);
164  return ret;
165 }
166 
167 /*
168  * allocate a volume location record
169  */
170 static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
171  const char *name,
172  size_t namesz)
173 {
174  struct afs_vlocation *vl;
175 
176  vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
177  if (vl) {
178  vl->cell = cell;
179  vl->state = AFS_VL_NEW;
180  atomic_set(&vl->usage, 1);
181  INIT_LIST_HEAD(&vl->link);
182  INIT_LIST_HEAD(&vl->grave);
183  INIT_LIST_HEAD(&vl->update);
185  spin_lock_init(&vl->lock);
186  memcpy(vl->vldb.name, name, namesz);
187  }
188 
189  _leave(" = %p", vl);
190  return vl;
191 }
192 
193 /*
194  * update record if we found it in the cache
195  */
196 static int afs_vlocation_update_record(struct afs_vlocation *vl,
197  struct key *key,
198  struct afs_cache_vlocation *vldb)
199 {
200  afs_voltype_t voltype;
202  int ret;
203 
204  /* try to look up a cached volume in the cell VL databases by ID */
205  _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
206  vl->vldb.name,
207  vl->vldb.vidmask,
208  ntohl(vl->vldb.servers[0].s_addr),
209  vl->vldb.srvtmask[0],
210  ntohl(vl->vldb.servers[1].s_addr),
211  vl->vldb.srvtmask[1],
212  ntohl(vl->vldb.servers[2].s_addr),
213  vl->vldb.srvtmask[2]);
214 
215  _debug("Vids: %08x %08x %08x",
216  vl->vldb.vid[0],
217  vl->vldb.vid[1],
218  vl->vldb.vid[2]);
219 
220  if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
221  vid = vl->vldb.vid[0];
222  voltype = AFSVL_RWVOL;
223  } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
224  vid = vl->vldb.vid[1];
225  voltype = AFSVL_ROVOL;
226  } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
227  vid = vl->vldb.vid[2];
228  voltype = AFSVL_BACKVOL;
229  } else {
230  BUG();
231  vid = 0;
232  voltype = 0;
233  }
234 
235  /* contact the server to make sure the volume is still available
236  * - TODO: need to handle disconnected operation here
237  */
238  ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
239  switch (ret) {
240  /* net error */
241  default:
242  printk(KERN_WARNING "kAFS:"
243  " failed to update volume '%s' (%x) up in '%s': %d\n",
244  vl->vldb.name, vid, vl->cell->name, ret);
245  _leave(" = %d", ret);
246  return ret;
247 
248  /* pulled from local cache into memory */
249  case 0:
250  _leave(" = 0");
251  return 0;
252 
253  /* uh oh... looks like the volume got deleted */
254  case -ENOMEDIUM:
255  printk(KERN_ERR "kAFS:"
256  " volume '%s' (%x) does not exist '%s'\n",
257  vl->vldb.name, vid, vl->cell->name);
258 
259  /* TODO: make existing record unavailable */
260  _leave(" = %d", ret);
261  return ret;
262  }
263 }
264 
265 /*
266  * apply the update to a VL record
267  */
268 static void afs_vlocation_apply_update(struct afs_vlocation *vl,
269  struct afs_cache_vlocation *vldb)
270 {
271  _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
272  vldb->name, vldb->vidmask,
273  ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
274  ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
275  ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
276 
277  _debug("Vids: %08x %08x %08x",
278  vldb->vid[0], vldb->vid[1], vldb->vid[2]);
279 
280  if (strcmp(vldb->name, vl->vldb.name) != 0)
281  printk(KERN_NOTICE "kAFS:"
282  " name of volume '%s' changed to '%s' on server\n",
283  vl->vldb.name, vldb->name);
284 
285  vl->vldb = *vldb;
286 
287 #ifdef CONFIG_AFS_FSCACHE
288  fscache_update_cookie(vl->cache);
289 #endif
290 }
291 
292 /*
293  * fill in a volume location record, consulting the cache and the VL server
294  * both
295  */
296 static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
297  struct key *key)
298 {
299  struct afs_cache_vlocation vldb;
300  int ret;
301 
302  _enter("");
303 
304  ASSERTCMP(vl->valid, ==, 0);
305 
306  memset(&vldb, 0, sizeof(vldb));
307 
308  /* see if we have an in-cache copy (will set vl->valid if there is) */
309 #ifdef CONFIG_AFS_FSCACHE
310  vl->cache = fscache_acquire_cookie(vl->cell->cache,
312 #endif
313 
314  if (vl->valid) {
315  /* try to update a known volume in the cell VL databases by
316  * ID as the name may have changed */
317  _debug("found in cache");
318  ret = afs_vlocation_update_record(vl, key, &vldb);
319  } else {
320  /* try to look up an unknown volume in the cell VL databases by
321  * name */
322  ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
323  if (ret < 0) {
324  printk("kAFS: failed to locate '%s' in cell '%s'\n",
325  vl->vldb.name, vl->cell->name);
326  return ret;
327  }
328  }
329 
330  afs_vlocation_apply_update(vl, &vldb);
331  _leave(" = 0");
332  return 0;
333 }
334 
335 /*
336  * queue a vlocation record for updates
337  */
338 static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
339 {
340  struct afs_vlocation *xvl;
341 
342  /* wait at least 10 minutes before updating... */
343  vl->update_at = get_seconds() + afs_vlocation_update_timeout;
344 
345  spin_lock(&afs_vlocation_updates_lock);
346 
347  if (!list_empty(&afs_vlocation_updates)) {
348  /* ... but wait at least 1 second more than the newest record
349  * already queued so that we don't spam the VL server suddenly
350  * with lots of requests
351  */
352  xvl = list_entry(afs_vlocation_updates.prev,
353  struct afs_vlocation, update);
354  if (vl->update_at <= xvl->update_at)
355  vl->update_at = xvl->update_at + 1;
356  } else {
357  queue_delayed_work(afs_vlocation_update_worker,
358  &afs_vlocation_update,
359  afs_vlocation_update_timeout * HZ);
360  }
361 
362  list_add_tail(&vl->update, &afs_vlocation_updates);
363  spin_unlock(&afs_vlocation_updates_lock);
364 }
365 
366 /*
367  * lookup volume location
368  * - iterate through the VL servers in a cell until one of them admits knowing
369  * about the volume in question
370  * - lookup in the local cache if not able to find on the VL server
371  * - insert/update in the local cache if did get a VL response
372  */
374  struct key *key,
375  const char *name,
376  size_t namesz)
377 {
378  struct afs_vlocation *vl;
379  int ret;
380 
381  _enter("{%s},{%x},%*.*s,%zu",
382  cell->name, key_serial(key),
383  (int) namesz, (int) namesz, name, namesz);
384 
385  if (namesz >= sizeof(vl->vldb.name)) {
386  _leave(" = -ENAMETOOLONG");
387  return ERR_PTR(-ENAMETOOLONG);
388  }
389 
390  /* see if we have an in-memory copy first */
391  down_write(&cell->vl_sem);
392  spin_lock(&cell->vl_lock);
393  list_for_each_entry(vl, &cell->vl_list, link) {
394  if (vl->vldb.name[namesz] != '\0')
395  continue;
396  if (memcmp(vl->vldb.name, name, namesz) == 0)
397  goto found_in_memory;
398  }
399  spin_unlock(&cell->vl_lock);
400 
401  /* not in the cell's in-memory lists - create a new record */
402  vl = afs_vlocation_alloc(cell, name, namesz);
403  if (!vl) {
404  up_write(&cell->vl_sem);
405  return ERR_PTR(-ENOMEM);
406  }
407 
408  afs_get_cell(cell);
409 
410  list_add_tail(&vl->link, &cell->vl_list);
411  vl->state = AFS_VL_CREATING;
412  up_write(&cell->vl_sem);
413 
414 fill_in_record:
415  ret = afs_vlocation_fill_in_record(vl, key);
416  if (ret < 0)
417  goto error_abandon;
418  spin_lock(&vl->lock);
419  vl->state = AFS_VL_VALID;
420  spin_unlock(&vl->lock);
421  wake_up(&vl->waitq);
422 
423  /* update volume entry in local cache */
424 #ifdef CONFIG_AFS_FSCACHE
425  fscache_update_cookie(vl->cache);
426 #endif
427 
428  /* schedule for regular updates */
429  afs_vlocation_queue_for_updates(vl);
430  goto success;
431 
432 found_in_memory:
433  /* found in memory */
434  _debug("found in memory");
435  atomic_inc(&vl->usage);
436  spin_unlock(&cell->vl_lock);
437  if (!list_empty(&vl->grave)) {
438  spin_lock(&afs_vlocation_graveyard_lock);
439  list_del_init(&vl->grave);
440  spin_unlock(&afs_vlocation_graveyard_lock);
441  }
442  up_write(&cell->vl_sem);
443 
444  /* see if it was an abandoned record that we might try filling in */
445  spin_lock(&vl->lock);
446  while (vl->state != AFS_VL_VALID) {
447  afs_vlocation_state_t state = vl->state;
448 
449  _debug("invalid [state %d]", state);
450 
451  if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
452  vl->state = AFS_VL_CREATING;
453  spin_unlock(&vl->lock);
454  goto fill_in_record;
455  }
456 
457  /* must now wait for creation or update by someone else to
458  * complete */
459  _debug("wait");
460 
461  spin_unlock(&vl->lock);
463  vl->state == AFS_VL_NEW ||
464  vl->state == AFS_VL_VALID ||
465  vl->state == AFS_VL_NO_VOLUME);
466  if (ret < 0)
467  goto error;
468  spin_lock(&vl->lock);
469  }
470  spin_unlock(&vl->lock);
471 
472 success:
473  _leave(" = %p", vl);
474  return vl;
475 
476 error_abandon:
477  spin_lock(&vl->lock);
478  vl->state = AFS_VL_NEW;
479  spin_unlock(&vl->lock);
480  wake_up(&vl->waitq);
481 error:
482  ASSERT(vl != NULL);
483  afs_put_vlocation(vl);
484  _leave(" = %d", ret);
485  return ERR_PTR(ret);
486 }
487 
488 /*
489  * finish using a volume location record
490  */
492 {
493  if (!vl)
494  return;
495 
496  _enter("%s", vl->vldb.name);
497 
498  ASSERTCMP(atomic_read(&vl->usage), >, 0);
499 
500  if (likely(!atomic_dec_and_test(&vl->usage))) {
501  _leave("");
502  return;
503  }
504 
505  spin_lock(&afs_vlocation_graveyard_lock);
506  if (atomic_read(&vl->usage) == 0) {
507  _debug("buried");
508  list_move_tail(&vl->grave, &afs_vlocation_graveyard);
509  vl->time_of_death = get_seconds();
510  queue_delayed_work(afs_wq, &afs_vlocation_reap,
511  afs_vlocation_timeout * HZ);
512 
513  /* suspend updates on this record */
514  if (!list_empty(&vl->update)) {
515  spin_lock(&afs_vlocation_updates_lock);
516  list_del_init(&vl->update);
517  spin_unlock(&afs_vlocation_updates_lock);
518  }
519  }
520  spin_unlock(&afs_vlocation_graveyard_lock);
521  _leave(" [killed?]");
522 }
523 
524 /*
525  * destroy a dead volume location record
526  */
527 static void afs_vlocation_destroy(struct afs_vlocation *vl)
528 {
529  _enter("%p", vl);
530 
531 #ifdef CONFIG_AFS_FSCACHE
532  fscache_relinquish_cookie(vl->cache, 0);
533 #endif
534  afs_put_cell(vl->cell);
535  kfree(vl);
536 }
537 
538 /*
539  * reap dead volume location records
540  */
541 static void afs_vlocation_reaper(struct work_struct *work)
542 {
543  LIST_HEAD(corpses);
544  struct afs_vlocation *vl;
545  unsigned long delay, expiry;
546  time_t now;
547 
548  _enter("");
549 
550  now = get_seconds();
551  spin_lock(&afs_vlocation_graveyard_lock);
552 
553  while (!list_empty(&afs_vlocation_graveyard)) {
554  vl = list_entry(afs_vlocation_graveyard.next,
555  struct afs_vlocation, grave);
556 
557  _debug("check %p", vl);
558 
559  /* the queue is ordered most dead first */
560  expiry = vl->time_of_death + afs_vlocation_timeout;
561  if (expiry > now) {
562  delay = (expiry - now) * HZ;
563  _debug("delay %lu", delay);
564  mod_delayed_work(afs_wq, &afs_vlocation_reap, delay);
565  break;
566  }
567 
568  spin_lock(&vl->cell->vl_lock);
569  if (atomic_read(&vl->usage) > 0) {
570  _debug("no reap");
571  list_del_init(&vl->grave);
572  } else {
573  _debug("reap");
574  list_move_tail(&vl->grave, &corpses);
575  list_del_init(&vl->link);
576  }
577  spin_unlock(&vl->cell->vl_lock);
578  }
579 
580  spin_unlock(&afs_vlocation_graveyard_lock);
581 
582  /* now reap the corpses we've extracted */
583  while (!list_empty(&corpses)) {
584  vl = list_entry(corpses.next, struct afs_vlocation, grave);
585  list_del(&vl->grave);
586  afs_vlocation_destroy(vl);
587  }
588 
589  _leave("");
590 }
591 
592 /*
593  * initialise the VL update process
594  */
596 {
597  afs_vlocation_update_worker =
598  create_singlethread_workqueue("kafs_vlupdated");
599  return afs_vlocation_update_worker ? 0 : -ENOMEM;
600 }
601 
602 /*
603  * discard all the volume location records for rmmod
604  */
606 {
607  afs_vlocation_timeout = 0;
608 
609  spin_lock(&afs_vlocation_updates_lock);
610  list_del_init(&afs_vlocation_updates);
611  spin_unlock(&afs_vlocation_updates_lock);
612  mod_delayed_work(afs_vlocation_update_worker, &afs_vlocation_update, 0);
613  destroy_workqueue(afs_vlocation_update_worker);
614 
615  mod_delayed_work(afs_wq, &afs_vlocation_reap, 0);
616 }
617 
618 /*
619  * update a volume location
620  */
621 static void afs_vlocation_updater(struct work_struct *work)
622 {
623  struct afs_cache_vlocation vldb;
624  struct afs_vlocation *vl, *xvl;
625  time_t now;
626  long timeout;
627  int ret;
628 
629  _enter("");
630 
631  now = get_seconds();
632 
633  /* find a record to update */
634  spin_lock(&afs_vlocation_updates_lock);
635  for (;;) {
636  if (list_empty(&afs_vlocation_updates)) {
637  spin_unlock(&afs_vlocation_updates_lock);
638  _leave(" [nothing]");
639  return;
640  }
641 
642  vl = list_entry(afs_vlocation_updates.next,
643  struct afs_vlocation, update);
644  if (atomic_read(&vl->usage) > 0)
645  break;
646  list_del_init(&vl->update);
647  }
648 
649  timeout = vl->update_at - now;
650  if (timeout > 0) {
651  queue_delayed_work(afs_vlocation_update_worker,
652  &afs_vlocation_update, timeout * HZ);
653  spin_unlock(&afs_vlocation_updates_lock);
654  _leave(" [nothing]");
655  return;
656  }
657 
658  list_del_init(&vl->update);
659  atomic_inc(&vl->usage);
660  spin_unlock(&afs_vlocation_updates_lock);
661 
662  /* we can now perform the update */
663  _debug("update %s", vl->vldb.name);
664  vl->state = AFS_VL_UPDATING;
665  vl->upd_rej_cnt = 0;
666  vl->upd_busy_cnt = 0;
667 
668  ret = afs_vlocation_update_record(vl, NULL, &vldb);
669  spin_lock(&vl->lock);
670  switch (ret) {
671  case 0:
672  afs_vlocation_apply_update(vl, &vldb);
673  vl->state = AFS_VL_VALID;
674  break;
675  case -ENOMEDIUM:
677  break;
678  default:
679  vl->state = AFS_VL_UNCERTAIN;
680  break;
681  }
682  spin_unlock(&vl->lock);
683  wake_up(&vl->waitq);
684 
685  /* and then reschedule */
686  _debug("reschedule");
687  vl->update_at = get_seconds() + afs_vlocation_update_timeout;
688 
689  spin_lock(&afs_vlocation_updates_lock);
690 
691  if (!list_empty(&afs_vlocation_updates)) {
692  /* next update in 10 minutes, but wait at least 1 second more
693  * than the newest record already queued so that we don't spam
694  * the VL server suddenly with lots of requests
695  */
696  xvl = list_entry(afs_vlocation_updates.prev,
697  struct afs_vlocation, update);
698  if (vl->update_at <= xvl->update_at)
699  vl->update_at = xvl->update_at + 1;
700  xvl = list_entry(afs_vlocation_updates.next,
701  struct afs_vlocation, update);
702  timeout = xvl->update_at - now;
703  if (timeout < 0)
704  timeout = 0;
705  } else {
706  timeout = afs_vlocation_update_timeout;
707  }
708 
709  ASSERT(list_empty(&vl->update));
710 
711  list_add_tail(&vl->update, &afs_vlocation_updates);
712 
713  _debug("timeout %ld", timeout);
714  queue_delayed_work(afs_vlocation_update_worker,
715  &afs_vlocation_update, timeout * HZ);
716  spin_unlock(&afs_vlocation_updates_lock);
717  afs_put_vlocation(vl);
718 }