Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
drbd_nl.c
Go to the documentation of this file.
1 /*
2  drbd_nl.c
3 
4  This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5 
6  Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7  Copyright (C) 1999-2008, Philipp Reisner <[email protected]>.
8  Copyright (C) 2002-2008, Lars Ellenberg <[email protected]>.
9 
10  drbd is free software; you can redistribute it and/or modify
11  it under the terms of the GNU General Public License as published by
12  the Free Software Foundation; either version 2, or (at your option)
13  any later version.
14 
15  drbd is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  GNU General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with drbd; see the file COPYING. If not, write to
22  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 
24  */
25 
26 #include <linux/module.h>
27 #include <linux/drbd.h>
28 #include <linux/in.h>
29 #include <linux/fs.h>
30 #include <linux/file.h>
31 #include <linux/slab.h>
32 #include <linux/connector.h>
33 #include <linux/blkpg.h>
34 #include <linux/cpumask.h>
35 #include "drbd_int.h"
36 #include "drbd_req.h"
37 #include "drbd_wrappers.h"
38 #include <asm/unaligned.h>
39 #include <linux/drbd_tag_magic.h>
40 #include <linux/drbd_limits.h>
41 #include <linux/compiler.h>
42 #include <linux/kthread.h>
43 
44 static unsigned short *tl_add_blob(unsigned short *, enum drbd_tags, const void *, int);
45 static unsigned short *tl_add_str(unsigned short *, enum drbd_tags, const char *);
46 static unsigned short *tl_add_int(unsigned short *, enum drbd_tags, const void *);
47 
48 /* see get_sb_bdev and bd_claim */
49 static char *drbd_m_holder = "Hands off! this is DRBD's meta data device.";
50 
51 /* Generate the tag_list to struct functions */
52 #define NL_PACKET(name, number, fields) \
53 static int name ## _from_tags(struct drbd_conf *mdev, \
54  unsigned short *tags, struct name *arg) __attribute__ ((unused)); \
55 static int name ## _from_tags(struct drbd_conf *mdev, \
56  unsigned short *tags, struct name *arg) \
57 { \
58  int tag; \
59  int dlen; \
60  \
61  while ((tag = get_unaligned(tags++)) != TT_END) { \
62  dlen = get_unaligned(tags++); \
63  switch (tag_number(tag)) { \
64  fields \
65  default: \
66  if (tag & T_MANDATORY) { \
67  dev_err(DEV, "Unknown tag: %d\n", tag_number(tag)); \
68  return 0; \
69  } \
70  } \
71  tags = (unsigned short *)((char *)tags + dlen); \
72  } \
73  return 1; \
74 }
75 #define NL_INTEGER(pn, pr, member) \
76  case pn: /* D_ASSERT( tag_type(tag) == TT_INTEGER ); */ \
77  arg->member = get_unaligned((int *)(tags)); \
78  break;
79 #define NL_INT64(pn, pr, member) \
80  case pn: /* D_ASSERT( tag_type(tag) == TT_INT64 ); */ \
81  arg->member = get_unaligned((u64 *)(tags)); \
82  break;
83 #define NL_BIT(pn, pr, member) \
84  case pn: /* D_ASSERT( tag_type(tag) == TT_BIT ); */ \
85  arg->member = *(char *)(tags) ? 1 : 0; \
86  break;
87 #define NL_STRING(pn, pr, member, len) \
88  case pn: /* D_ASSERT( tag_type(tag) == TT_STRING ); */ \
89  if (dlen > len) { \
90  dev_err(DEV, "arg too long: %s (%u wanted, max len: %u bytes)\n", \
91  #member, dlen, (unsigned int)len); \
92  return 0; \
93  } \
94  arg->member ## _len = dlen; \
95  memcpy(arg->member, tags, min_t(size_t, dlen, len)); \
96  break;
97 #include <linux/drbd_nl.h>
98 
99 /* Generate the struct to tag_list functions */
100 #define NL_PACKET(name, number, fields) \
101 static unsigned short* \
102 name ## _to_tags(struct drbd_conf *mdev, \
103  struct name *arg, unsigned short *tags) __attribute__ ((unused)); \
104 static unsigned short* \
105 name ## _to_tags(struct drbd_conf *mdev, \
106  struct name *arg, unsigned short *tags) \
107 { \
108  fields \
109  return tags; \
110 }
111 
112 #define NL_INTEGER(pn, pr, member) \
113  put_unaligned(pn | pr | TT_INTEGER, tags++); \
114  put_unaligned(sizeof(int), tags++); \
115  put_unaligned(arg->member, (int *)tags); \
116  tags = (unsigned short *)((char *)tags+sizeof(int));
117 #define NL_INT64(pn, pr, member) \
118  put_unaligned(pn | pr | TT_INT64, tags++); \
119  put_unaligned(sizeof(u64), tags++); \
120  put_unaligned(arg->member, (u64 *)tags); \
121  tags = (unsigned short *)((char *)tags+sizeof(u64));
122 #define NL_BIT(pn, pr, member) \
123  put_unaligned(pn | pr | TT_BIT, tags++); \
124  put_unaligned(sizeof(char), tags++); \
125  *(char *)tags = arg->member; \
126  tags = (unsigned short *)((char *)tags+sizeof(char));
127 #define NL_STRING(pn, pr, member, len) \
128  put_unaligned(pn | pr | TT_STRING, tags++); \
129  put_unaligned(arg->member ## _len, tags++); \
130  memcpy(tags, arg->member, arg->member ## _len); \
131  tags = (unsigned short *)((char *)tags + arg->member ## _len);
132 #include <linux/drbd_nl.h>
133 
134 void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name);
135 void drbd_nl_send_reply(struct cn_msg *, int);
136 
137 int drbd_khelper(struct drbd_conf *mdev, char *cmd)
138 {
139  char *envp[] = { "HOME=/",
140  "TERM=linux",
141  "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
142  NULL, /* Will be set to address family */
143  NULL, /* Will be set to address */
144  NULL };
145 
146  char mb[12], af[20], ad[60], *afs;
147  char *argv[] = {usermode_helper, cmd, mb, NULL };
148  int ret;
149 
150  if (current == mdev->worker.task)
151  set_bit(CALLBACK_PENDING, &mdev->flags);
152 
153  snprintf(mb, 12, "minor-%d", mdev_to_minor(mdev));
154 
155  if (get_net_conf(mdev)) {
156  switch (((struct sockaddr *)mdev->net_conf->peer_addr)->sa_family) {
157  case AF_INET6:
158  afs = "ipv6";
159  snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI6",
160  &((struct sockaddr_in6 *)mdev->net_conf->peer_addr)->sin6_addr);
161  break;
162  case AF_INET:
163  afs = "ipv4";
164  snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
165  &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
166  break;
167  default:
168  afs = "ssocks";
169  snprintf(ad, 60, "DRBD_PEER_ADDRESS=%pI4",
170  &((struct sockaddr_in *)mdev->net_conf->peer_addr)->sin_addr);
171  }
172  snprintf(af, 20, "DRBD_PEER_AF=%s", afs);
173  envp[3]=af;
174  envp[4]=ad;
175  put_net_conf(mdev);
176  }
177 
178  /* The helper may take some time.
179  * write out any unsynced meta data changes now */
180  drbd_md_sync(mdev);
181 
182  dev_info(DEV, "helper command: %s %s %s\n", usermode_helper, cmd, mb);
183 
184  drbd_bcast_ev_helper(mdev, cmd);
185  ret = call_usermodehelper(usermode_helper, argv, envp, UMH_WAIT_PROC);
186  if (ret)
187  dev_warn(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
188  usermode_helper, cmd, mb,
189  (ret >> 8) & 0xff, ret);
190  else
191  dev_info(DEV, "helper command: %s %s %s exit code %u (0x%x)\n",
192  usermode_helper, cmd, mb,
193  (ret >> 8) & 0xff, ret);
194 
195  if (current == mdev->worker.task)
197 
198  if (ret < 0) /* Ignore any ERRNOs we got. */
199  ret = 0;
200 
201  return ret;
202 }
203 
205 {
206  char *ex_to_string;
207  int r;
208  enum drbd_disk_state nps;
209  enum drbd_fencing_p fp;
210 
211  D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
212 
213  if (get_ldev_if_state(mdev, D_CONSISTENT)) {
214  fp = mdev->ldev->dc.fencing;
215  put_ldev(mdev);
216  } else {
217  dev_warn(DEV, "Not fencing peer, I'm not even Consistent myself.\n");
218  nps = mdev->state.pdsk;
219  goto out;
220  }
221 
222  r = drbd_khelper(mdev, "fence-peer");
223 
224  switch ((r>>8) & 0xff) {
225  case 3: /* peer is inconsistent */
226  ex_to_string = "peer is inconsistent or worse";
227  nps = D_INCONSISTENT;
228  break;
229  case 4: /* peer got outdated, or was already outdated */
230  ex_to_string = "peer was fenced";
231  nps = D_OUTDATED;
232  break;
233  case 5: /* peer was down */
234  if (mdev->state.disk == D_UP_TO_DATE) {
235  /* we will(have) create(d) a new UUID anyways... */
236  ex_to_string = "peer is unreachable, assumed to be dead";
237  nps = D_OUTDATED;
238  } else {
239  ex_to_string = "peer unreachable, doing nothing since disk != UpToDate";
240  nps = mdev->state.pdsk;
241  }
242  break;
243  case 6: /* Peer is primary, voluntarily outdate myself.
244  * This is useful when an unconnected R_SECONDARY is asked to
245  * become R_PRIMARY, but finds the other peer being active. */
246  ex_to_string = "peer is active";
247  dev_warn(DEV, "Peer is primary, outdating myself.\n");
248  nps = D_UNKNOWN;
250  break;
251  case 7:
252  if (fp != FP_STONITH)
253  dev_err(DEV, "fence-peer() = 7 && fencing != Stonith !!!\n");
254  ex_to_string = "peer was stonithed";
255  nps = D_OUTDATED;
256  break;
257  default:
258  /* The script is broken ... */
259  nps = D_UNKNOWN;
260  dev_err(DEV, "fence-peer helper broken, returned %d\n", (r>>8)&0xff);
261  return nps;
262  }
263 
264  dev_info(DEV, "fence-peer helper returned %d (%s)\n",
265  (r>>8) & 0xff, ex_to_string);
266 
267 out:
268  if (mdev->state.susp_fen && nps >= D_UNKNOWN) {
269  /* The handler was not successful... unfreeze here, the
270  state engine can not unfreeze... */
271  _drbd_request_state(mdev, NS(susp_fen, 0), CS_VERBOSE);
272  }
273 
274  return nps;
275 }
276 
277 static int _try_outdate_peer_async(void *data)
278 {
279  struct drbd_conf *mdev = (struct drbd_conf *)data;
280  enum drbd_disk_state nps;
281  union drbd_state ns;
282 
283  nps = drbd_try_outdate_peer(mdev);
284 
285  /* Not using
286  drbd_request_state(mdev, NS(pdsk, nps));
287  here, because we might were able to re-establish the connection
288  in the meantime. This can only partially be solved in the state's
289  engine is_valid_state() and is_valid_state_transition()
290  functions.
291 
292  nps can be D_INCONSISTENT, D_OUTDATED or D_UNKNOWN.
293  pdsk == D_INCONSISTENT while conn >= C_CONNECTED is valid,
294  therefore we have to have the pre state change check here.
295  */
296  spin_lock_irq(&mdev->req_lock);
297  ns = mdev->state;
298  if (ns.conn < C_WF_REPORT_PARAMS && !test_bit(STATE_SENT, &mdev->flags)) {
299  ns.pdsk = nps;
300  _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
301  }
302  spin_unlock_irq(&mdev->req_lock);
303 
304  return 0;
305 }
306 
308 {
309  struct task_struct *opa;
310 
311  opa = kthread_run(_try_outdate_peer_async, mdev, "drbd%d_a_helper", mdev_to_minor(mdev));
312  if (IS_ERR(opa))
313  dev_err(DEV, "out of mem, failed to invoke fence-peer helper\n");
314 }
315 
316 enum drbd_state_rv
317 drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force)
318 {
319  const int max_tries = 4;
321  int try = 0;
322  int forced = 0;
323  union drbd_state mask, val;
324  enum drbd_disk_state nps;
325 
326  if (new_role == R_PRIMARY)
327  request_ping(mdev); /* Detect a dead peer ASAP */
328 
329  mutex_lock(&mdev->state_mutex);
330 
331  mask.i = 0; mask.role = R_MASK;
332  val.i = 0; val.role = new_role;
333 
334  while (try++ < max_tries) {
335  rv = _drbd_request_state(mdev, mask, val, CS_WAIT_COMPLETE);
336 
337  /* in case we first succeeded to outdate,
338  * but now suddenly could establish a connection */
339  if (rv == SS_CW_FAILED_BY_PEER && mask.pdsk != 0) {
340  val.pdsk = 0;
341  mask.pdsk = 0;
342  continue;
343  }
344 
345  if (rv == SS_NO_UP_TO_DATE_DISK && force &&
346  (mdev->state.disk < D_UP_TO_DATE &&
347  mdev->state.disk >= D_INCONSISTENT)) {
348  mask.disk = D_MASK;
349  val.disk = D_UP_TO_DATE;
350  forced = 1;
351  continue;
352  }
353 
354  if (rv == SS_NO_UP_TO_DATE_DISK &&
355  mdev->state.disk == D_CONSISTENT && mask.pdsk == 0) {
356  D_ASSERT(mdev->state.pdsk == D_UNKNOWN);
357  nps = drbd_try_outdate_peer(mdev);
358 
359  if (nps == D_OUTDATED || nps == D_INCONSISTENT) {
360  val.disk = D_UP_TO_DATE;
361  mask.disk = D_MASK;
362  }
363 
364  val.pdsk = nps;
365  mask.pdsk = D_MASK;
366 
367  continue;
368  }
369 
370  if (rv == SS_NOTHING_TO_DO)
371  goto fail;
372  if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
373  nps = drbd_try_outdate_peer(mdev);
374 
375  if (force && nps > D_OUTDATED) {
376  dev_warn(DEV, "Forced into split brain situation!\n");
377  nps = D_OUTDATED;
378  }
379 
380  mask.pdsk = D_MASK;
381  val.pdsk = nps;
382 
383  continue;
384  }
385  if (rv == SS_TWO_PRIMARIES) {
386  /* Maybe the peer is detected as dead very soon...
387  retry at most once more in this case. */
388  schedule_timeout_interruptible((mdev->net_conf->ping_timeo+1)*HZ/10);
389  if (try < max_tries)
390  try = max_tries - 1;
391  continue;
392  }
393  if (rv < SS_SUCCESS) {
394  rv = _drbd_request_state(mdev, mask, val,
396  if (rv < SS_SUCCESS)
397  goto fail;
398  }
399  break;
400  }
401 
402  if (rv < SS_SUCCESS)
403  goto fail;
404 
405  if (forced)
406  dev_warn(DEV, "Forced to consider local data as UpToDate!\n");
407 
408  /* Wait until nothing is on the fly :) */
409  wait_event(mdev->misc_wait, atomic_read(&mdev->ap_pending_cnt) == 0);
410 
411  if (new_role == R_SECONDARY) {
412  set_disk_ro(mdev->vdisk, true);
413  if (get_ldev(mdev)) {
414  mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
415  put_ldev(mdev);
416  }
417  } else {
418  if (get_net_conf(mdev)) {
419  mdev->net_conf->want_lose = 0;
420  put_net_conf(mdev);
421  }
422  set_disk_ro(mdev->vdisk, false);
423  if (get_ldev(mdev)) {
424  if (((mdev->state.conn < C_CONNECTED ||
425  mdev->state.pdsk <= D_FAILED)
426  && mdev->ldev->md.uuid[UI_BITMAP] == 0) || forced)
427  drbd_uuid_new_current(mdev);
428 
429  mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
430  put_ldev(mdev);
431  }
432  }
433 
434  /* writeout of activity log covered areas of the bitmap
435  * to stable storage done in after state change already */
436 
437  if (mdev->state.conn >= C_WF_REPORT_PARAMS) {
438  /* if this was forced, we should consider sync */
439  if (forced)
440  drbd_send_uuids(mdev);
442  }
443 
444  drbd_md_sync(mdev);
445 
446  kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
447  fail:
448  mutex_unlock(&mdev->state_mutex);
449  return rv;
450 }
451 
452 static struct drbd_conf *ensure_mdev(int minor, int create)
453 {
454  struct drbd_conf *mdev;
455 
456  if (minor >= minor_count)
457  return NULL;
458 
459  mdev = minor_to_mdev(minor);
460 
461  if (!mdev && create) {
462  struct gendisk *disk = NULL;
463  mdev = drbd_new_device(minor);
464 
465  spin_lock_irq(&drbd_pp_lock);
466  if (minor_table[minor] == NULL) {
467  minor_table[minor] = mdev;
468  disk = mdev->vdisk;
469  mdev = NULL;
470  } /* else: we lost the race */
471  spin_unlock_irq(&drbd_pp_lock);
472 
473  if (disk) /* we won the race above */
474  /* in case we ever add a drbd_delete_device(),
475  * don't forget the del_gendisk! */
476  add_disk(disk);
477  else /* we lost the race above */
478  drbd_free_mdev(mdev);
479 
480  mdev = minor_to_mdev(minor);
481  }
482 
483  return mdev;
484 }
485 
486 static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
487  struct drbd_nl_cfg_reply *reply)
488 {
489  struct primary primary_args;
490 
491  memset(&primary_args, 0, sizeof(struct primary));
492  if (!primary_from_tags(mdev, nlp->tag_list, &primary_args)) {
493  reply->ret_code = ERR_MANDATORY_TAG;
494  return 0;
495  }
496 
497  reply->ret_code =
498  drbd_set_role(mdev, R_PRIMARY, primary_args.primary_force);
499 
500  return 0;
501 }
502 
503 static int drbd_nl_secondary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
504  struct drbd_nl_cfg_reply *reply)
505 {
506  reply->ret_code = drbd_set_role(mdev, R_SECONDARY, 0);
507 
508  return 0;
509 }
510 
511 /* initializes the md.*_offset members, so we are able to find
512  * the on disk meta data */
513 static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
514  struct drbd_backing_dev *bdev)
515 {
516  sector_t md_size_sect = 0;
517  switch (bdev->dc.meta_dev_idx) {
518  default:
519  /* v07 style fixed size indexed meta data */
520  bdev->md.md_size_sect = MD_RESERVED_SECT;
521  bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
522  bdev->md.al_offset = MD_AL_OFFSET;
523  bdev->md.bm_offset = MD_BM_OFFSET;
524  break;
526  /* just occupy the full device; unit: sectors */
527  bdev->md.md_size_sect = drbd_get_capacity(bdev->md_bdev);
528  bdev->md.md_offset = 0;
529  bdev->md.al_offset = MD_AL_OFFSET;
530  bdev->md.bm_offset = MD_BM_OFFSET;
531  break;
534  bdev->md.md_offset = drbd_md_ss__(mdev, bdev);
535  /* al size is still fixed */
536  bdev->md.al_offset = -MD_AL_MAX_SIZE;
537  /* we need (slightly less than) ~ this much bitmap sectors: */
538  md_size_sect = drbd_get_capacity(bdev->backing_bdev);
539  md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT);
540  md_size_sect = BM_SECT_TO_EXT(md_size_sect);
541  md_size_sect = ALIGN(md_size_sect, 8);
542 
543  /* plus the "drbd meta data super block",
544  * and the activity log; */
545  md_size_sect += MD_BM_OFFSET;
546 
547  bdev->md.md_size_sect = md_size_sect;
548  /* bitmap offset is adjusted by 'super' block size */
549  bdev->md.bm_offset = -md_size_sect + MD_AL_OFFSET;
550  break;
551  }
552 }
553 
554 /* input size is expected to be in KB */
555 char *ppsize(char *buf, unsigned long long size)
556 {
557  /* Needs 9 bytes at max including trailing NUL:
558  * -1ULL ==> "16384 EB" */
559  static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
560  int base = 0;
561  while (size >= 10000 && base < sizeof(units)-1) {
562  /* shift + round */
563  size = (size >> 10) + !!(size & (1<<9));
564  base++;
565  }
566  sprintf(buf, "%u %cB", (unsigned)size, units[base]);
567 
568  return buf;
569 }
570 
571 /* there is still a theoretical deadlock when called from receiver
572  * on an D_INCONSISTENT R_PRIMARY:
573  * remote READ does inc_ap_bio, receiver would need to receive answer
574  * packet from remote to dec_ap_bio again.
575  * receiver receive_sizes(), comes here,
576  * waits for ap_bio_cnt == 0. -> deadlock.
577  * but this cannot happen, actually, because:
578  * R_PRIMARY D_INCONSISTENT, and peer's disk is unreachable
579  * (not connected, or bad/no disk on peer):
580  * see drbd_fail_request_early, ap_bio_cnt is zero.
581  * R_PRIMARY D_INCONSISTENT, and C_SYNC_TARGET:
582  * peer may not initiate a resize.
583  */
584 void drbd_suspend_io(struct drbd_conf *mdev)
585 {
586  set_bit(SUSPEND_IO, &mdev->flags);
587  if (is_susp(mdev->state))
588  return;
589  wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt));
590 }
591 
592 void drbd_resume_io(struct drbd_conf *mdev)
593 {
594  clear_bit(SUSPEND_IO, &mdev->flags);
595  wake_up(&mdev->misc_wait);
596 }
597 
606 {
607  sector_t prev_first_sect, prev_size; /* previous meta location */
608  sector_t la_size;
609  sector_t size;
610  char ppb[10];
611 
612  int md_moved, la_size_changed;
613  enum determine_dev_size rv = unchanged;
614 
615  /* race:
616  * application request passes inc_ap_bio,
617  * but then cannot get an AL-reference.
618  * this function later may wait on ap_bio_cnt == 0. -> deadlock.
619  *
620  * to avoid that:
621  * Suspend IO right here.
622  * still lock the act_log to not trigger ASSERTs there.
623  */
624  drbd_suspend_io(mdev);
625 
626  /* no wait necessary anymore, actually we could assert that */
627  wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
628 
629  prev_first_sect = drbd_md_first_sector(mdev->ldev);
630  prev_size = mdev->ldev->md.md_size_sect;
631  la_size = mdev->ldev->md.la_size_sect;
632 
633  /* TODO: should only be some assert here, not (re)init... */
634  drbd_md_set_sector_offsets(mdev, mdev->ldev);
635 
636  size = drbd_new_dev_size(mdev, mdev->ldev, flags & DDSF_FORCED);
637 
638  if (drbd_get_capacity(mdev->this_bdev) != size ||
639  drbd_bm_capacity(mdev) != size) {
640  int err;
641  err = drbd_bm_resize(mdev, size, !(flags & DDSF_NO_RESYNC));
642  if (unlikely(err)) {
643  /* currently there is only one error: ENOMEM! */
644  size = drbd_bm_capacity(mdev)>>1;
645  if (size == 0) {
646  dev_err(DEV, "OUT OF MEMORY! "
647  "Could not allocate bitmap!\n");
648  } else {
649  dev_err(DEV, "BM resizing failed. "
650  "Leaving size unchanged at size = %lu KB\n",
651  (unsigned long)size);
652  }
653  rv = dev_size_error;
654  }
655  /* racy, see comments above. */
656  drbd_set_my_capacity(mdev, size);
657  mdev->ldev->md.la_size_sect = size;
658  dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
659  (unsigned long long)size>>1);
660  }
661  if (rv == dev_size_error)
662  goto out;
663 
664  la_size_changed = (la_size != mdev->ldev->md.la_size_sect);
665 
666  md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
667  || prev_size != mdev->ldev->md.md_size_sect;
668 
669  if (la_size_changed || md_moved) {
670  int err;
671 
672  drbd_al_shrink(mdev); /* All extents inactive. */
673  dev_info(DEV, "Writing the whole bitmap, %s\n",
674  la_size_changed && md_moved ? "size changed and md moved" :
675  la_size_changed ? "size changed" : "md moved");
676  /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
677  err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
678  "size changed", BM_LOCKED_MASK);
679  if (err) {
680  rv = dev_size_error;
681  goto out;
682  }
683  drbd_md_mark_dirty(mdev);
684  }
685 
686  if (size > la_size)
687  rv = grew;
688  if (size < la_size)
689  rv = shrunk;
690 out:
691  lc_unlock(mdev->act_log);
692  wake_up(&mdev->al_wait);
693  drbd_resume_io(mdev);
694 
695  return rv;
696 }
697 
698 sector_t
699 drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
700 {
701  sector_t p_size = mdev->p_size; /* partner's disk size. */
702  sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
703  sector_t m_size; /* my size */
704  sector_t u_size = bdev->dc.disk_size; /* size requested by user. */
705  sector_t size = 0;
706 
707  m_size = drbd_get_max_capacity(bdev);
708 
709  if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
710  dev_warn(DEV, "Resize while not connected was forced by the user!\n");
711  p_size = m_size;
712  }
713 
714  if (p_size && m_size) {
715  size = min_t(sector_t, p_size, m_size);
716  } else {
717  if (la_size) {
718  size = la_size;
719  if (m_size && m_size < size)
720  size = m_size;
721  if (p_size && p_size < size)
722  size = p_size;
723  } else {
724  if (m_size)
725  size = m_size;
726  if (p_size)
727  size = p_size;
728  }
729  }
730 
731  if (size == 0)
732  dev_err(DEV, "Both nodes diskless!\n");
733 
734  if (u_size) {
735  if (u_size > size)
736  dev_err(DEV, "Requested disk size is too big (%lu > %lu)\n",
737  (unsigned long)u_size>>1, (unsigned long)size>>1);
738  else
739  size = u_size;
740  }
741 
742  return size;
743 }
744 
753 static int drbd_check_al_size(struct drbd_conf *mdev)
754 {
755  struct lru_cache *n, *t;
756  struct lc_element *e;
757  unsigned int in_use;
758  int i;
759 
760  ERR_IF(mdev->sync_conf.al_extents < 7)
761  mdev->sync_conf.al_extents = 127;
762 
763  if (mdev->act_log &&
764  mdev->act_log->nr_elements == mdev->sync_conf.al_extents)
765  return 0;
766 
767  in_use = 0;
768  t = mdev->act_log;
769  n = lc_create("act_log", drbd_al_ext_cache,
770  mdev->sync_conf.al_extents, sizeof(struct lc_element), 0);
771 
772  if (n == NULL) {
773  dev_err(DEV, "Cannot allocate act_log lru!\n");
774  return -ENOMEM;
775  }
776  spin_lock_irq(&mdev->al_lock);
777  if (t) {
778  for (i = 0; i < t->nr_elements; i++) {
779  e = lc_element_by_index(t, i);
780  if (e->refcnt)
781  dev_err(DEV, "refcnt(%d)==%d\n",
782  e->lc_number, e->refcnt);
783  in_use += e->refcnt;
784  }
785  }
786  if (!in_use)
787  mdev->act_log = n;
788  spin_unlock_irq(&mdev->al_lock);
789  if (in_use) {
790  dev_err(DEV, "Activity log still in use!\n");
791  lc_destroy(n);
792  return -EBUSY;
793  } else {
794  if (t)
795  lc_destroy(t);
796  }
797  drbd_md_mark_dirty(mdev); /* we changed mdev->act_log->nr_elemens */
798  return 0;
799 }
800 
801 static void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int max_bio_size)
802 {
803  struct request_queue * const q = mdev->rq_queue;
804  unsigned int max_hw_sectors = max_bio_size >> 9;
805  unsigned int max_segments = 0;
806 
807  if (get_ldev_if_state(mdev, D_ATTACHING)) {
808  struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
809 
810  max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
811  max_segments = mdev->ldev->dc.max_bio_bvecs;
812  put_ldev(mdev);
813  }
814 
816  blk_queue_max_hw_sectors(q, max_hw_sectors);
817  /* This is the workaround for "bio would need to, but cannot, be split" */
818  blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS);
820 
821  if (get_ldev_if_state(mdev, D_ATTACHING)) {
822  struct request_queue * const b = mdev->ldev->backing_bdev->bd_disk->queue;
823 
825 
826  if (q->backing_dev_info.ra_pages != b->backing_dev_info.ra_pages) {
827  dev_info(DEV, "Adjusting my ra_pages to backing device's (%lu -> %lu)\n",
828  q->backing_dev_info.ra_pages,
829  b->backing_dev_info.ra_pages);
830  q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
831  }
832  put_ldev(mdev);
833  }
834 }
835 
837 {
838  unsigned int now, new, local, peer;
839 
840  now = queue_max_hw_sectors(mdev->rq_queue) << 9;
841  local = mdev->local_max_bio_size; /* Eventually last known value, from volatile memory */
842  peer = mdev->peer_max_bio_size; /* Eventually last known value, from meta data */
843 
844  if (get_ldev_if_state(mdev, D_ATTACHING)) {
845  local = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
846  mdev->local_max_bio_size = local;
847  put_ldev(mdev);
848  }
849  local = min(local, DRBD_MAX_BIO_SIZE);
850 
851  /* We may ignore peer limits if the peer is modern enough.
852  Because new from 8.3.8 onwards the peer can use multiple
853  BIOs for a single peer_request */
854  if (mdev->state.conn >= C_CONNECTED) {
855  if (mdev->agreed_pro_version < 94) {
857  /* Correct old drbd (up to 8.3.7) if it believes it can do more than 32KiB */
858  } else if (mdev->agreed_pro_version == 94)
860  else /* drbd 8.3.8 onwards */
861  peer = DRBD_MAX_BIO_SIZE;
862  }
863 
864  new = min(local, peer);
865 
866  if (mdev->state.role == R_PRIMARY && new < now)
867  dev_err(DEV, "ASSERT FAILED new < now; (%u < %u)\n", new, now);
868 
869  if (new != now)
870  dev_info(DEV, "max BIO size = %u\n", new);
871 
872  drbd_setup_queue_param(mdev, new);
873 }
874 
875 /* serialize deconfig (worker exiting, doing cleanup)
876  * and reconfig (drbdsetup disk, drbdsetup net)
877  *
878  * Wait for a potentially exiting worker, then restart it,
879  * or start a new one. Flush any pending work, there may still be an
880  * after_state_change queued.
881  */
882 static void drbd_reconfig_start(struct drbd_conf *mdev)
883 {
885  wait_event(mdev->state_wait, !test_bit(DEVICE_DYING, &mdev->flags));
886  drbd_thread_start(&mdev->worker);
887  drbd_flush_workqueue(mdev);
888 }
889 
890 /* if still unconfigured, stops worker again.
891  * if configured now, clears CONFIG_PENDING.
892  * wakes potential waiters */
893 static void drbd_reconfig_done(struct drbd_conf *mdev)
894 {
895  spin_lock_irq(&mdev->req_lock);
896  if (mdev->state.disk == D_DISKLESS &&
897  mdev->state.conn == C_STANDALONE &&
898  mdev->state.role == R_SECONDARY) {
899  set_bit(DEVICE_DYING, &mdev->flags);
900  drbd_thread_stop_nowait(&mdev->worker);
901  } else
902  clear_bit(CONFIG_PENDING, &mdev->flags);
903  spin_unlock_irq(&mdev->req_lock);
904  wake_up(&mdev->state_wait);
905 }
906 
907 /* Make sure IO is suspended before calling this function(). */
908 static void drbd_suspend_al(struct drbd_conf *mdev)
909 {
910  int s = 0;
911 
912  if (lc_try_lock(mdev->act_log)) {
913  drbd_al_shrink(mdev);
914  lc_unlock(mdev->act_log);
915  } else {
916  dev_warn(DEV, "Failed to lock al in drbd_suspend_al()\n");
917  return;
918  }
919 
920  spin_lock_irq(&mdev->req_lock);
921  if (mdev->state.conn < C_CONNECTED)
922  s = !test_and_set_bit(AL_SUSPENDED, &mdev->flags);
923 
924  spin_unlock_irq(&mdev->req_lock);
925 
926  if (s)
927  dev_info(DEV, "Suspended AL updates\n");
928 }
929 
930 /* does always return 0;
931  * interesting return code is in reply->ret_code */
932 static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
933  struct drbd_nl_cfg_reply *reply)
934 {
935  enum drbd_ret_code retcode;
936  enum determine_dev_size dd;
937  sector_t max_possible_sectors;
938  sector_t min_md_device_sectors;
939  struct drbd_backing_dev *nbc = NULL; /* new_backing_conf */
940  struct block_device *bdev;
941  struct lru_cache *resync_lru = NULL;
942  union drbd_state ns, os;
943  enum drbd_state_rv rv;
944  int cp_discovered = 0;
945  int logical_block_size;
946 
947  drbd_reconfig_start(mdev);
948 
949  /* if you want to reconfigure, please tear down first */
950  if (mdev->state.disk > D_DISKLESS) {
951  retcode = ERR_DISK_CONFIGURED;
952  goto fail;
953  }
954  /* It may just now have detached because of IO error. Make sure
955  * drbd_ldev_destroy is done already, we may end up here very fast,
956  * e.g. if someone calls attach from the on-io-error handler,
957  * to realize a "hot spare" feature (not that I'd recommend that) */
958  wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
959 
960  /* make sure there is no leftover from previous force-detach attempts */
961  clear_bit(FORCE_DETACH, &mdev->flags);
962 
963  /* and no leftover from previously aborted resync or verify, either */
964  mdev->rs_total = 0;
965  mdev->rs_failed = 0;
966  atomic_set(&mdev->rs_pending_cnt, 0);
967 
968  /* allocation not in the IO path, cqueue thread context */
969  nbc = kzalloc(sizeof(struct drbd_backing_dev), GFP_KERNEL);
970  if (!nbc) {
971  retcode = ERR_NOMEM;
972  goto fail;
973  }
974 
975  nbc->dc.disk_size = DRBD_DISK_SIZE_SECT_DEF;
976  nbc->dc.on_io_error = DRBD_ON_IO_ERROR_DEF;
977  nbc->dc.fencing = DRBD_FENCING_DEF;
978  nbc->dc.max_bio_bvecs = DRBD_MAX_BIO_BVECS_DEF;
979 
980  if (!disk_conf_from_tags(mdev, nlp->tag_list, &nbc->dc)) {
981  retcode = ERR_MANDATORY_TAG;
982  goto fail;
983  }
984 
985  if (nbc->dc.meta_dev_idx < DRBD_MD_INDEX_FLEX_INT) {
986  retcode = ERR_MD_IDX_INVALID;
987  goto fail;
988  }
989 
990  if (get_net_conf(mdev)) {
991  int prot = mdev->net_conf->wire_protocol;
992  put_net_conf(mdev);
993  if (nbc->dc.fencing == FP_STONITH && prot == DRBD_PROT_A) {
994  retcode = ERR_STONITH_AND_PROT_A;
995  goto fail;
996  }
997  }
998 
999  bdev = blkdev_get_by_path(nbc->dc.backing_dev,
1000  FMODE_READ | FMODE_WRITE | FMODE_EXCL, mdev);
1001  if (IS_ERR(bdev)) {
1002  dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.backing_dev,
1003  PTR_ERR(bdev));
1004  retcode = ERR_OPEN_DISK;
1005  goto fail;
1006  }
1007  nbc->backing_bdev = bdev;
1008 
1009  /*
1010  * meta_dev_idx >= 0: external fixed size, possibly multiple
1011  * drbd sharing one meta device. TODO in that case, paranoia
1012  * check that [md_bdev, meta_dev_idx] is not yet used by some
1013  * other drbd minor! (if you use drbd.conf + drbdadm, that
1014  * should check it for you already; but if you don't, or
1015  * someone fooled it, we need to double check here)
1016  */
1017  bdev = blkdev_get_by_path(nbc->dc.meta_dev,
1019  (nbc->dc.meta_dev_idx < 0) ?
1020  (void *)mdev : (void *)drbd_m_holder);
1021  if (IS_ERR(bdev)) {
1022  dev_err(DEV, "open(\"%s\") failed with %ld\n", nbc->dc.meta_dev,
1023  PTR_ERR(bdev));
1024  retcode = ERR_OPEN_MD_DISK;
1025  goto fail;
1026  }
1027  nbc->md_bdev = bdev;
1028 
1029  if ((nbc->backing_bdev == nbc->md_bdev) !=
1030  (nbc->dc.meta_dev_idx == DRBD_MD_INDEX_INTERNAL ||
1031  nbc->dc.meta_dev_idx == DRBD_MD_INDEX_FLEX_INT)) {
1032  retcode = ERR_MD_IDX_INVALID;
1033  goto fail;
1034  }
1035 
1036  resync_lru = lc_create("resync", drbd_bm_ext_cache,
1037  61, sizeof(struct bm_extent),
1038  offsetof(struct bm_extent, lce));
1039  if (!resync_lru) {
1040  retcode = ERR_NOMEM;
1041  goto fail;
1042  }
1043 
1044  /* RT - for drbd_get_max_capacity() DRBD_MD_INDEX_FLEX_INT */
1045  drbd_md_set_sector_offsets(mdev, nbc);
1046 
1047  if (drbd_get_max_capacity(nbc) < nbc->dc.disk_size) {
1048  dev_err(DEV, "max capacity %llu smaller than disk size %llu\n",
1049  (unsigned long long) drbd_get_max_capacity(nbc),
1050  (unsigned long long) nbc->dc.disk_size);
1051  retcode = ERR_DISK_TOO_SMALL;
1052  goto fail;
1053  }
1054 
1055  if (nbc->dc.meta_dev_idx < 0) {
1056  max_possible_sectors = DRBD_MAX_SECTORS_FLEX;
1057  /* at least one MB, otherwise it does not make sense */
1058  min_md_device_sectors = (2<<10);
1059  } else {
1060  max_possible_sectors = DRBD_MAX_SECTORS;
1061  min_md_device_sectors = MD_RESERVED_SECT * (nbc->dc.meta_dev_idx + 1);
1062  }
1063 
1064  if (drbd_get_capacity(nbc->md_bdev) < min_md_device_sectors) {
1065  retcode = ERR_MD_DISK_TOO_SMALL;
1066  dev_warn(DEV, "refusing attach: md-device too small, "
1067  "at least %llu sectors needed for this meta-disk type\n",
1068  (unsigned long long) min_md_device_sectors);
1069  goto fail;
1070  }
1071 
1072  /* Make sure the new disk is big enough
1073  * (we may currently be R_PRIMARY with no local disk...) */
1074  if (drbd_get_max_capacity(nbc) <
1075  drbd_get_capacity(mdev->this_bdev)) {
1076  retcode = ERR_DISK_TOO_SMALL;
1077  goto fail;
1078  }
1079 
1080  nbc->known_size = drbd_get_capacity(nbc->backing_bdev);
1081 
1082  if (nbc->known_size > max_possible_sectors) {
1083  dev_warn(DEV, "==> truncating very big lower level device "
1084  "to currently maximum possible %llu sectors <==\n",
1085  (unsigned long long) max_possible_sectors);
1086  if (nbc->dc.meta_dev_idx >= 0)
1087  dev_warn(DEV, "==>> using internal or flexible "
1088  "meta data may help <<==\n");
1089  }
1090 
1091  drbd_suspend_io(mdev);
1092  /* also wait for the last barrier ack. */
1093  wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_pending_cnt) || is_susp(mdev->state));
1094  /* and for any other previously queued work */
1095  drbd_flush_workqueue(mdev);
1096 
1097  rv = _drbd_request_state(mdev, NS(disk, D_ATTACHING), CS_VERBOSE);
1098  retcode = rv; /* FIXME: Type mismatch. */
1099  drbd_resume_io(mdev);
1100  if (rv < SS_SUCCESS)
1101  goto fail;
1102 
1103  if (!get_ldev_if_state(mdev, D_ATTACHING))
1104  goto force_diskless;
1105 
1106  drbd_md_set_sector_offsets(mdev, nbc);
1107 
1108  /* allocate a second IO page if logical_block_size != 512 */
1109  logical_block_size = bdev_logical_block_size(nbc->md_bdev);
1110  if (logical_block_size == 0)
1111  logical_block_size = MD_SECTOR_SIZE;
1112 
1113  if (logical_block_size != MD_SECTOR_SIZE) {
1114  if (!mdev->md_io_tmpp) {
1115  struct page *page = alloc_page(GFP_NOIO);
1116  if (!page)
1117  goto force_diskless_dec;
1118 
1119  dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n",
1120  logical_block_size, MD_SECTOR_SIZE);
1121  dev_warn(DEV, "Workaround engaged (has performance impact).\n");
1122 
1123  mdev->md_io_tmpp = page;
1124  }
1125  }
1126 
1127  if (!mdev->bitmap) {
1128  if (drbd_bm_init(mdev)) {
1129  retcode = ERR_NOMEM;
1130  goto force_diskless_dec;
1131  }
1132  }
1133 
1134  retcode = drbd_md_read(mdev, nbc);
1135  if (retcode != NO_ERROR)
1136  goto force_diskless_dec;
1137 
1138  if (mdev->state.conn < C_CONNECTED &&
1139  mdev->state.role == R_PRIMARY &&
1140  (mdev->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) {
1141  dev_err(DEV, "Can only attach to data with current UUID=%016llX\n",
1142  (unsigned long long)mdev->ed_uuid);
1143  retcode = ERR_DATA_NOT_CURRENT;
1144  goto force_diskless_dec;
1145  }
1146 
1147  /* Since we are diskless, fix the activity log first... */
1148  if (drbd_check_al_size(mdev)) {
1149  retcode = ERR_NOMEM;
1150  goto force_diskless_dec;
1151  }
1152 
1153  /* Prevent shrinking of consistent devices ! */
1154  if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
1155  drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
1156  dev_warn(DEV, "refusing to truncate a consistent device\n");
1157  retcode = ERR_DISK_TOO_SMALL;
1158  goto force_diskless_dec;
1159  }
1160 
1161  if (!drbd_al_read_log(mdev, nbc)) {
1162  retcode = ERR_IO_MD_DISK;
1163  goto force_diskless_dec;
1164  }
1165 
1166  /* Reset the "barriers don't work" bits here, then force meta data to
1167  * be written, to ensure we determine if barriers are supported. */
1168  if (nbc->dc.no_md_flush)
1169  set_bit(MD_NO_FUA, &mdev->flags);
1170  else
1171  clear_bit(MD_NO_FUA, &mdev->flags);
1172 
1173  /* Point of no return reached.
1174  * Devices and memory are no longer released by error cleanup below.
1175  * now mdev takes over responsibility, and the state engine should
1176  * clean it up somewhere. */
1177  D_ASSERT(mdev->ldev == NULL);
1178  mdev->ldev = nbc;
1179  mdev->resync = resync_lru;
1180  nbc = NULL;
1181  resync_lru = NULL;
1182 
1183  mdev->write_ordering = WO_bdev_flush;
1185 
1186  if (drbd_md_test_flag(mdev->ldev, MDF_CRASHED_PRIMARY))
1187  set_bit(CRASHED_PRIMARY, &mdev->flags);
1188  else
1189  clear_bit(CRASHED_PRIMARY, &mdev->flags);
1190 
1191  if (drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
1192  !(mdev->state.role == R_PRIMARY && mdev->state.susp_nod)) {
1193  set_bit(CRASHED_PRIMARY, &mdev->flags);
1194  cp_discovered = 1;
1195  }
1196 
1197  mdev->send_cnt = 0;
1198  mdev->recv_cnt = 0;
1199  mdev->read_cnt = 0;
1200  mdev->writ_cnt = 0;
1201 
1203 
1204  /* If I am currently not R_PRIMARY,
1205  * but meta data primary indicator is set,
1206  * I just now recover from a hard crash,
1207  * and have been R_PRIMARY before that crash.
1208  *
1209  * Now, if I had no connection before that crash
1210  * (have been degraded R_PRIMARY), chances are that
1211  * I won't find my peer now either.
1212  *
1213  * In that case, and _only_ in that case,
1214  * we use the degr-wfc-timeout instead of the default,
1215  * so we can automatically recover from a crash of a
1216  * degraded but active "cluster" after a certain timeout.
1217  */
1218  clear_bit(USE_DEGR_WFC_T, &mdev->flags);
1219  if (mdev->state.role != R_PRIMARY &&
1220  drbd_md_test_flag(mdev->ldev, MDF_PRIMARY_IND) &&
1221  !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
1222  set_bit(USE_DEGR_WFC_T, &mdev->flags);
1223 
1224  dd = drbd_determine_dev_size(mdev, 0);
1225  if (dd == dev_size_error) {
1226  retcode = ERR_NOMEM_BITMAP;
1227  goto force_diskless_dec;
1228  } else if (dd == grew)
1229  set_bit(RESYNC_AFTER_NEG, &mdev->flags);
1230 
1231  if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1232  dev_info(DEV, "Assuming that all blocks are out of sync "
1233  "(aka FullSync)\n");
1235  "set_n_write from attaching", BM_LOCKED_MASK)) {
1236  retcode = ERR_IO_MD_DISK;
1237  goto force_diskless_dec;
1238  }
1239  } else {
1240  if (drbd_bitmap_io(mdev, &drbd_bm_read,
1241  "read from attaching", BM_LOCKED_MASK) < 0) {
1242  retcode = ERR_IO_MD_DISK;
1243  goto force_diskless_dec;
1244  }
1245  }
1246 
1247  if (cp_discovered) {
1248  drbd_al_apply_to_bm(mdev);
1249  if (drbd_bitmap_io(mdev, &drbd_bm_write,
1250  "crashed primary apply AL", BM_LOCKED_MASK)) {
1251  retcode = ERR_IO_MD_DISK;
1252  goto force_diskless_dec;
1253  }
1254  }
1255 
1256  if (_drbd_bm_total_weight(mdev) == drbd_bm_bits(mdev))
1257  drbd_suspend_al(mdev); /* IO is still suspended here... */
1258 
1259  spin_lock_irq(&mdev->req_lock);
1260  os = mdev->state;
1261  ns.i = os.i;
1262  /* If MDF_CONSISTENT is not set go into inconsistent state,
1263  otherwise investigate MDF_WasUpToDate...
1264  If MDF_WAS_UP_TO_DATE is not set go into D_OUTDATED disk state,
1265  otherwise into D_CONSISTENT state.
1266  */
1267  if (drbd_md_test_flag(mdev->ldev, MDF_CONSISTENT)) {
1268  if (drbd_md_test_flag(mdev->ldev, MDF_WAS_UP_TO_DATE))
1269  ns.disk = D_CONSISTENT;
1270  else
1271  ns.disk = D_OUTDATED;
1272  } else {
1273  ns.disk = D_INCONSISTENT;
1274  }
1275 
1276  if (drbd_md_test_flag(mdev->ldev, MDF_PEER_OUT_DATED))
1277  ns.pdsk = D_OUTDATED;
1278 
1279  if ( ns.disk == D_CONSISTENT &&
1280  (ns.pdsk == D_OUTDATED || mdev->ldev->dc.fencing == FP_DONT_CARE))
1281  ns.disk = D_UP_TO_DATE;
1282 
1283  /* All tests on MDF_PRIMARY_IND, MDF_CONNECTED_IND,
1284  MDF_CONSISTENT and MDF_WAS_UP_TO_DATE must happen before
1285  this point, because drbd_request_state() modifies these
1286  flags. */
1287 
1288  /* In case we are C_CONNECTED postpone any decision on the new disk
1289  state after the negotiation phase. */
1290  if (mdev->state.conn == C_CONNECTED) {
1291  mdev->new_state_tmp.i = ns.i;
1292  ns.i = os.i;
1293  ns.disk = D_NEGOTIATING;
1294 
1295  /* We expect to receive up-to-date UUIDs soon.
1296  To avoid a race in receive_state, free p_uuid while
1297  holding req_lock. I.e. atomic with the state change */
1298  kfree(mdev->p_uuid);
1299  mdev->p_uuid = NULL;
1300  }
1301 
1302  rv = _drbd_set_state(mdev, ns, CS_VERBOSE, NULL);
1303  ns = mdev->state;
1304  spin_unlock_irq(&mdev->req_lock);
1305 
1306  if (rv < SS_SUCCESS)
1307  goto force_diskless_dec;
1308 
1309  if (mdev->state.role == R_PRIMARY)
1310  mdev->ldev->md.uuid[UI_CURRENT] |= (u64)1;
1311  else
1312  mdev->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
1313 
1314  drbd_md_mark_dirty(mdev);
1315  drbd_md_sync(mdev);
1316 
1317  kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1318  put_ldev(mdev);
1319  reply->ret_code = retcode;
1320  drbd_reconfig_done(mdev);
1321  return 0;
1322 
1323  force_diskless_dec:
1324  put_ldev(mdev);
1325  force_diskless:
1326  drbd_force_state(mdev, NS(disk, D_FAILED));
1327  drbd_md_sync(mdev);
1328  fail:
1329  if (nbc) {
1330  if (nbc->backing_bdev)
1331  blkdev_put(nbc->backing_bdev,
1333  if (nbc->md_bdev)
1334  blkdev_put(nbc->md_bdev,
1336  kfree(nbc);
1337  }
1338  lc_destroy(resync_lru);
1339 
1340  reply->ret_code = retcode;
1341  drbd_reconfig_done(mdev);
1342  return 0;
1343 }
1344 
1345 /* Detaching the disk is a process in multiple stages. First we need to lock
1346  * out application IO, in-flight IO, IO stuck in drbd_al_begin_io.
1347  * Then we transition to D_DISKLESS, and wait for put_ldev() to return all
1348  * internal references as well.
1349  * Only then we have finally detached. */
1350 static int drbd_nl_detach(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1351  struct drbd_nl_cfg_reply *reply)
1352 {
1353  enum drbd_ret_code retcode;
1354  int ret;
1355  struct detach dt = {};
1356 
1357  if (!detach_from_tags(mdev, nlp->tag_list, &dt)) {
1358  reply->ret_code = ERR_MANDATORY_TAG;
1359  goto out;
1360  }
1361 
1362  if (dt.detach_force) {
1363  set_bit(FORCE_DETACH, &mdev->flags);
1364  drbd_force_state(mdev, NS(disk, D_FAILED));
1365  reply->ret_code = SS_SUCCESS;
1366  goto out;
1367  }
1368 
1369  drbd_suspend_io(mdev); /* so no-one is stuck in drbd_al_begin_io */
1370  drbd_md_get_buffer(mdev); /* make sure there is no in-flight meta-data IO */
1371  retcode = drbd_request_state(mdev, NS(disk, D_FAILED));
1372  drbd_md_put_buffer(mdev);
1373  /* D_FAILED will transition to DISKLESS. */
1374  ret = wait_event_interruptible(mdev->misc_wait,
1375  mdev->state.disk != D_FAILED);
1376  drbd_resume_io(mdev);
1377 
1378  if ((int)retcode == (int)SS_IS_DISKLESS)
1379  retcode = SS_NOTHING_TO_DO;
1380  if (ret)
1381  retcode = ERR_INTR;
1382  reply->ret_code = retcode;
1383 out:
1384  return 0;
1385 }
1386 
1387 static int drbd_nl_net_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1388  struct drbd_nl_cfg_reply *reply)
1389 {
1390  int i, ns;
1391  enum drbd_ret_code retcode;
1392  struct net_conf *new_conf = NULL;
1393  struct crypto_hash *tfm = NULL;
1394  struct crypto_hash *integrity_w_tfm = NULL;
1395  struct crypto_hash *integrity_r_tfm = NULL;
1396  struct hlist_head *new_tl_hash = NULL;
1397  struct hlist_head *new_ee_hash = NULL;
1398  struct drbd_conf *odev;
1399  char hmac_name[CRYPTO_MAX_ALG_NAME];
1400  void *int_dig_out = NULL;
1401  void *int_dig_in = NULL;
1402  void *int_dig_vv = NULL;
1403  struct sockaddr *new_my_addr, *new_peer_addr, *taken_addr;
1404 
1405  drbd_reconfig_start(mdev);
1406 
1407  if (mdev->state.conn > C_STANDALONE) {
1408  retcode = ERR_NET_CONFIGURED;
1409  goto fail;
1410  }
1411 
1412  /* allocation not in the IO path, cqueue thread context */
1413  new_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
1414  if (!new_conf) {
1415  retcode = ERR_NOMEM;
1416  goto fail;
1417  }
1418 
1419  new_conf->timeout = DRBD_TIMEOUT_DEF;
1420  new_conf->try_connect_int = DRBD_CONNECT_INT_DEF;
1421  new_conf->ping_int = DRBD_PING_INT_DEF;
1422  new_conf->max_epoch_size = DRBD_MAX_EPOCH_SIZE_DEF;
1423  new_conf->max_buffers = DRBD_MAX_BUFFERS_DEF;
1424  new_conf->unplug_watermark = DRBD_UNPLUG_WATERMARK_DEF;
1425  new_conf->sndbuf_size = DRBD_SNDBUF_SIZE_DEF;
1426  new_conf->rcvbuf_size = DRBD_RCVBUF_SIZE_DEF;
1427  new_conf->ko_count = DRBD_KO_COUNT_DEF;
1428  new_conf->after_sb_0p = DRBD_AFTER_SB_0P_DEF;
1429  new_conf->after_sb_1p = DRBD_AFTER_SB_1P_DEF;
1430  new_conf->after_sb_2p = DRBD_AFTER_SB_2P_DEF;
1431  new_conf->want_lose = 0;
1432  new_conf->two_primaries = 0;
1433  new_conf->wire_protocol = DRBD_PROT_C;
1434  new_conf->ping_timeo = DRBD_PING_TIMEO_DEF;
1435  new_conf->rr_conflict = DRBD_RR_CONFLICT_DEF;
1436  new_conf->on_congestion = DRBD_ON_CONGESTION_DEF;
1437  new_conf->cong_extents = DRBD_CONG_EXTENTS_DEF;
1438 
1439  if (!net_conf_from_tags(mdev, nlp->tag_list, new_conf)) {
1440  retcode = ERR_MANDATORY_TAG;
1441  goto fail;
1442  }
1443 
1444  if (new_conf->two_primaries
1445  && (new_conf->wire_protocol != DRBD_PROT_C)) {
1446  retcode = ERR_NOT_PROTO_C;
1447  goto fail;
1448  }
1449 
1450  if (get_ldev(mdev)) {
1451  enum drbd_fencing_p fp = mdev->ldev->dc.fencing;
1452  put_ldev(mdev);
1453  if (new_conf->wire_protocol == DRBD_PROT_A && fp == FP_STONITH) {
1454  retcode = ERR_STONITH_AND_PROT_A;
1455  goto fail;
1456  }
1457  }
1458 
1459  if (new_conf->on_congestion != OC_BLOCK && new_conf->wire_protocol != DRBD_PROT_A) {
1460  retcode = ERR_CONG_NOT_PROTO_A;
1461  goto fail;
1462  }
1463 
1464  if (mdev->state.role == R_PRIMARY && new_conf->want_lose) {
1465  retcode = ERR_DISCARD;
1466  goto fail;
1467  }
1468 
1469  retcode = NO_ERROR;
1470 
1471  new_my_addr = (struct sockaddr *)&new_conf->my_addr;
1472  new_peer_addr = (struct sockaddr *)&new_conf->peer_addr;
1473  for (i = 0; i < minor_count; i++) {
1474  odev = minor_to_mdev(i);
1475  if (!odev || odev == mdev)
1476  continue;
1477  if (get_net_conf(odev)) {
1478  taken_addr = (struct sockaddr *)&odev->net_conf->my_addr;
1479  if (new_conf->my_addr_len == odev->net_conf->my_addr_len &&
1480  !memcmp(new_my_addr, taken_addr, new_conf->my_addr_len))
1481  retcode = ERR_LOCAL_ADDR;
1482 
1483  taken_addr = (struct sockaddr *)&odev->net_conf->peer_addr;
1484  if (new_conf->peer_addr_len == odev->net_conf->peer_addr_len &&
1485  !memcmp(new_peer_addr, taken_addr, new_conf->peer_addr_len))
1486  retcode = ERR_PEER_ADDR;
1487 
1488  put_net_conf(odev);
1489  if (retcode != NO_ERROR)
1490  goto fail;
1491  }
1492  }
1493 
1494  if (new_conf->cram_hmac_alg[0] != 0) {
1495  snprintf(hmac_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)",
1496  new_conf->cram_hmac_alg);
1497  tfm = crypto_alloc_hash(hmac_name, 0, CRYPTO_ALG_ASYNC);
1498  if (IS_ERR(tfm)) {
1499  tfm = NULL;
1500  retcode = ERR_AUTH_ALG;
1501  goto fail;
1502  }
1503 
1504  if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
1505  retcode = ERR_AUTH_ALG_ND;
1506  goto fail;
1507  }
1508  }
1509 
1510  if (new_conf->integrity_alg[0]) {
1511  integrity_w_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
1512  if (IS_ERR(integrity_w_tfm)) {
1513  integrity_w_tfm = NULL;
1514  retcode=ERR_INTEGRITY_ALG;
1515  goto fail;
1516  }
1517 
1518  if (!drbd_crypto_is_hash(crypto_hash_tfm(integrity_w_tfm))) {
1519  retcode=ERR_INTEGRITY_ALG_ND;
1520  goto fail;
1521  }
1522 
1523  integrity_r_tfm = crypto_alloc_hash(new_conf->integrity_alg, 0, CRYPTO_ALG_ASYNC);
1524  if (IS_ERR(integrity_r_tfm)) {
1525  integrity_r_tfm = NULL;
1526  retcode=ERR_INTEGRITY_ALG;
1527  goto fail;
1528  }
1529  }
1530 
1531  ns = new_conf->max_epoch_size/8;
1532  if (mdev->tl_hash_s != ns) {
1533  new_tl_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
1534  if (!new_tl_hash) {
1535  retcode = ERR_NOMEM;
1536  goto fail;
1537  }
1538  }
1539 
1540  ns = new_conf->max_buffers/8;
1541  if (new_conf->two_primaries && (mdev->ee_hash_s != ns)) {
1542  new_ee_hash = kzalloc(ns*sizeof(void *), GFP_KERNEL);
1543  if (!new_ee_hash) {
1544  retcode = ERR_NOMEM;
1545  goto fail;
1546  }
1547  }
1548 
1549  ((char *)new_conf->shared_secret)[SHARED_SECRET_MAX-1] = 0;
1550 
1551  if (integrity_w_tfm) {
1552  i = crypto_hash_digestsize(integrity_w_tfm);
1553  int_dig_out = kmalloc(i, GFP_KERNEL);
1554  if (!int_dig_out) {
1555  retcode = ERR_NOMEM;
1556  goto fail;
1557  }
1558  int_dig_in = kmalloc(i, GFP_KERNEL);
1559  if (!int_dig_in) {
1560  retcode = ERR_NOMEM;
1561  goto fail;
1562  }
1563  int_dig_vv = kmalloc(i, GFP_KERNEL);
1564  if (!int_dig_vv) {
1565  retcode = ERR_NOMEM;
1566  goto fail;
1567  }
1568  }
1569 
1570  if (!mdev->bitmap) {
1571  if(drbd_bm_init(mdev)) {
1572  retcode = ERR_NOMEM;
1573  goto fail;
1574  }
1575  }
1576 
1577  drbd_flush_workqueue(mdev);
1578  spin_lock_irq(&mdev->req_lock);
1579  if (mdev->net_conf != NULL) {
1580  retcode = ERR_NET_CONFIGURED;
1581  spin_unlock_irq(&mdev->req_lock);
1582  goto fail;
1583  }
1584  mdev->net_conf = new_conf;
1585 
1586  mdev->send_cnt = 0;
1587  mdev->recv_cnt = 0;
1588 
1589  if (new_tl_hash) {
1590  kfree(mdev->tl_hash);
1591  mdev->tl_hash_s = mdev->net_conf->max_epoch_size/8;
1592  mdev->tl_hash = new_tl_hash;
1593  }
1594 
1595  if (new_ee_hash) {
1596  kfree(mdev->ee_hash);
1597  mdev->ee_hash_s = mdev->net_conf->max_buffers/8;
1598  mdev->ee_hash = new_ee_hash;
1599  }
1600 
1601  crypto_free_hash(mdev->cram_hmac_tfm);
1602  mdev->cram_hmac_tfm = tfm;
1603 
1604  crypto_free_hash(mdev->integrity_w_tfm);
1605  mdev->integrity_w_tfm = integrity_w_tfm;
1606 
1607  crypto_free_hash(mdev->integrity_r_tfm);
1608  mdev->integrity_r_tfm = integrity_r_tfm;
1609 
1610  kfree(mdev->int_dig_out);
1611  kfree(mdev->int_dig_in);
1612  kfree(mdev->int_dig_vv);
1613  mdev->int_dig_out=int_dig_out;
1614  mdev->int_dig_in=int_dig_in;
1615  mdev->int_dig_vv=int_dig_vv;
1616  retcode = _drbd_set_state(_NS(mdev, conn, C_UNCONNECTED), CS_VERBOSE, NULL);
1617  spin_unlock_irq(&mdev->req_lock);
1618 
1619  kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1620  reply->ret_code = retcode;
1621  drbd_reconfig_done(mdev);
1622  return 0;
1623 
1624 fail:
1625  kfree(int_dig_out);
1626  kfree(int_dig_in);
1627  kfree(int_dig_vv);
1628  crypto_free_hash(tfm);
1629  crypto_free_hash(integrity_w_tfm);
1630  crypto_free_hash(integrity_r_tfm);
1631  kfree(new_tl_hash);
1632  kfree(new_ee_hash);
1633  kfree(new_conf);
1634 
1635  reply->ret_code = retcode;
1636  drbd_reconfig_done(mdev);
1637  return 0;
1638 }
1639 
1640 static int drbd_nl_disconnect(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1641  struct drbd_nl_cfg_reply *reply)
1642 {
1643  int retcode;
1644  struct disconnect dc;
1645 
1646  memset(&dc, 0, sizeof(struct disconnect));
1647  if (!disconnect_from_tags(mdev, nlp->tag_list, &dc)) {
1648  retcode = ERR_MANDATORY_TAG;
1649  goto fail;
1650  }
1651 
1652  if (dc.force) {
1653  spin_lock_irq(&mdev->req_lock);
1654  if (mdev->state.conn >= C_WF_CONNECTION)
1655  _drbd_set_state(_NS(mdev, conn, C_DISCONNECTING), CS_HARD, NULL);
1656  spin_unlock_irq(&mdev->req_lock);
1657  goto done;
1658  }
1659 
1660  retcode = _drbd_request_state(mdev, NS(conn, C_DISCONNECTING), CS_ORDERED);
1661 
1662  if (retcode == SS_NOTHING_TO_DO)
1663  goto done;
1664  else if (retcode == SS_ALREADY_STANDALONE)
1665  goto done;
1666  else if (retcode == SS_PRIMARY_NOP) {
1667  /* Our statche checking code wants to see the peer outdated. */
1668  retcode = drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
1669  pdsk, D_OUTDATED));
1670  } else if (retcode == SS_CW_FAILED_BY_PEER) {
1671  /* The peer probably wants to see us outdated. */
1672  retcode = _drbd_request_state(mdev, NS2(conn, C_DISCONNECTING,
1673  disk, D_OUTDATED),
1674  CS_ORDERED);
1675  if (retcode == SS_IS_DISKLESS || retcode == SS_LOWER_THAN_OUTDATED) {
1676  drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
1677  retcode = SS_SUCCESS;
1678  }
1679  }
1680 
1681  if (retcode < SS_SUCCESS)
1682  goto fail;
1683 
1685  mdev->state.conn != C_DISCONNECTING)) {
1686  /* Do not test for mdev->state.conn == C_STANDALONE, since
1687  someone else might connect us in the mean time! */
1688  retcode = ERR_INTR;
1689  goto fail;
1690  }
1691 
1692  done:
1693  retcode = NO_ERROR;
1694  fail:
1695  drbd_md_sync(mdev);
1696  reply->ret_code = retcode;
1697  return 0;
1698 }
1699 
1701 {
1702  int iass; /* I am sync source */
1703 
1704  dev_info(DEV, "Resync of new storage after online grow\n");
1705  if (mdev->state.role != mdev->state.peer)
1706  iass = (mdev->state.role == R_PRIMARY);
1707  else
1708  iass = test_bit(DISCARD_CONCURRENT, &mdev->flags);
1709 
1710  if (iass)
1712  else
1714 }
1715 
1716 static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1717  struct drbd_nl_cfg_reply *reply)
1718 {
1719  struct resize rs;
1720  int retcode = NO_ERROR;
1721  enum determine_dev_size dd;
1722  enum dds_flags ddsf;
1723 
1724  memset(&rs, 0, sizeof(struct resize));
1725  if (!resize_from_tags(mdev, nlp->tag_list, &rs)) {
1726  retcode = ERR_MANDATORY_TAG;
1727  goto fail;
1728  }
1729 
1730  if (mdev->state.conn > C_CONNECTED) {
1731  retcode = ERR_RESIZE_RESYNC;
1732  goto fail;
1733  }
1734 
1735  if (mdev->state.role == R_SECONDARY &&
1736  mdev->state.peer == R_SECONDARY) {
1737  retcode = ERR_NO_PRIMARY;
1738  goto fail;
1739  }
1740 
1741  if (!get_ldev(mdev)) {
1742  retcode = ERR_NO_DISK;
1743  goto fail;
1744  }
1745 
1746  if (rs.no_resync && mdev->agreed_pro_version < 93) {
1747  retcode = ERR_NEED_APV_93;
1748  goto fail_ldev;
1749  }
1750 
1751  if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
1752  mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
1753 
1754  mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
1755  ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
1756  dd = drbd_determine_dev_size(mdev, ddsf);
1757  drbd_md_sync(mdev);
1758  put_ldev(mdev);
1759  if (dd == dev_size_error) {
1760  retcode = ERR_NOMEM_BITMAP;
1761  goto fail;
1762  }
1763 
1764  if (mdev->state.conn == C_CONNECTED) {
1765  if (dd == grew)
1766  set_bit(RESIZE_PENDING, &mdev->flags);
1767 
1768  drbd_send_uuids(mdev);
1769  drbd_send_sizes(mdev, 1, ddsf);
1770  }
1771 
1772  fail:
1773  reply->ret_code = retcode;
1774  return 0;
1775 
1776  fail_ldev:
1777  put_ldev(mdev);
1778  goto fail;
1779 }
1780 
1781 static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1782  struct drbd_nl_cfg_reply *reply)
1783 {
1784  int retcode = NO_ERROR;
1785  int err;
1786  int ovr; /* online verify running */
1787  int rsr; /* re-sync running */
1788  struct crypto_hash *verify_tfm = NULL;
1789  struct crypto_hash *csums_tfm = NULL;
1790  struct syncer_conf sc;
1791  cpumask_var_t new_cpu_mask;
1792  int *rs_plan_s = NULL;
1793  int fifo_size;
1794 
1795  if (!zalloc_cpumask_var(&new_cpu_mask, GFP_KERNEL)) {
1796  retcode = ERR_NOMEM;
1797  goto fail;
1798  }
1799 
1800  if (nlp->flags & DRBD_NL_SET_DEFAULTS) {
1801  memset(&sc, 0, sizeof(struct syncer_conf));
1802  sc.rate = DRBD_RATE_DEF;
1803  sc.after = DRBD_AFTER_DEF;
1804  sc.al_extents = DRBD_AL_EXTENTS_DEF;
1805  sc.on_no_data = DRBD_ON_NO_DATA_DEF;
1806  sc.c_plan_ahead = DRBD_C_PLAN_AHEAD_DEF;
1807  sc.c_delay_target = DRBD_C_DELAY_TARGET_DEF;
1808  sc.c_fill_target = DRBD_C_FILL_TARGET_DEF;
1809  sc.c_max_rate = DRBD_C_MAX_RATE_DEF;
1810  sc.c_min_rate = DRBD_C_MIN_RATE_DEF;
1811  } else
1812  memcpy(&sc, &mdev->sync_conf, sizeof(struct syncer_conf));
1813 
1814  if (!syncer_conf_from_tags(mdev, nlp->tag_list, &sc)) {
1815  retcode = ERR_MANDATORY_TAG;
1816  goto fail;
1817  }
1818 
1819  /* re-sync running */
1820  rsr = ( mdev->state.conn == C_SYNC_SOURCE ||
1821  mdev->state.conn == C_SYNC_TARGET ||
1822  mdev->state.conn == C_PAUSED_SYNC_S ||
1823  mdev->state.conn == C_PAUSED_SYNC_T );
1824 
1825  if (rsr && strcmp(sc.csums_alg, mdev->sync_conf.csums_alg)) {
1826  retcode = ERR_CSUMS_RESYNC_RUNNING;
1827  goto fail;
1828  }
1829 
1830  if (!rsr && sc.csums_alg[0]) {
1831  csums_tfm = crypto_alloc_hash(sc.csums_alg, 0, CRYPTO_ALG_ASYNC);
1832  if (IS_ERR(csums_tfm)) {
1833  csums_tfm = NULL;
1834  retcode = ERR_CSUMS_ALG;
1835  goto fail;
1836  }
1837 
1838  if (!drbd_crypto_is_hash(crypto_hash_tfm(csums_tfm))) {
1839  retcode = ERR_CSUMS_ALG_ND;
1840  goto fail;
1841  }
1842  }
1843 
1844  /* online verify running */
1845  ovr = (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T);
1846 
1847  if (ovr) {
1848  if (strcmp(sc.verify_alg, mdev->sync_conf.verify_alg)) {
1849  retcode = ERR_VERIFY_RUNNING;
1850  goto fail;
1851  }
1852  }
1853 
1854  if (!ovr && sc.verify_alg[0]) {
1855  verify_tfm = crypto_alloc_hash(sc.verify_alg, 0, CRYPTO_ALG_ASYNC);
1856  if (IS_ERR(verify_tfm)) {
1857  verify_tfm = NULL;
1858  retcode = ERR_VERIFY_ALG;
1859  goto fail;
1860  }
1861 
1862  if (!drbd_crypto_is_hash(crypto_hash_tfm(verify_tfm))) {
1863  retcode = ERR_VERIFY_ALG_ND;
1864  goto fail;
1865  }
1866  }
1867 
1868  /* silently ignore cpu mask on UP kernel */
1869  if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) {
1870  err = bitmap_parse(sc.cpu_mask, 32,
1871  cpumask_bits(new_cpu_mask), nr_cpu_ids);
1872  if (err) {
1873  dev_warn(DEV, "bitmap_parse() failed with %d\n", err);
1874  retcode = ERR_CPU_MASK_PARSE;
1875  goto fail;
1876  }
1877  }
1878 
1879  ERR_IF (sc.rate < 1) sc.rate = 1;
1880  ERR_IF (sc.al_extents < 7) sc.al_extents = 127; /* arbitrary minimum */
1881 #define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT)
1882  if (sc.al_extents > AL_MAX) {
1883  dev_err(DEV, "sc.al_extents > %d\n", AL_MAX);
1884  sc.al_extents = AL_MAX;
1885  }
1886 #undef AL_MAX
1887 
1888  /* to avoid spurious errors when configuring minors before configuring
1889  * the minors they depend on: if necessary, first create the minor we
1890  * depend on */
1891  if (sc.after >= 0)
1892  ensure_mdev(sc.after, 1);
1893 
1894  /* most sanity checks done, try to assign the new sync-after
1895  * dependency. need to hold the global lock in there,
1896  * to avoid a race in the dependency loop check. */
1897  retcode = drbd_alter_sa(mdev, sc.after);
1898  if (retcode != NO_ERROR)
1899  goto fail;
1900 
1901  fifo_size = (sc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
1902  if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
1903  rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
1904  if (!rs_plan_s) {
1905  dev_err(DEV, "kmalloc of fifo_buffer failed");
1906  retcode = ERR_NOMEM;
1907  goto fail;
1908  }
1909  }
1910 
1911  /* ok, assign the rest of it as well.
1912  * lock against receive_SyncParam() */
1913  spin_lock(&mdev->peer_seq_lock);
1914  mdev->sync_conf = sc;
1915 
1916  if (!rsr) {
1917  crypto_free_hash(mdev->csums_tfm);
1918  mdev->csums_tfm = csums_tfm;
1919  csums_tfm = NULL;
1920  }
1921 
1922  if (!ovr) {
1923  crypto_free_hash(mdev->verify_tfm);
1924  mdev->verify_tfm = verify_tfm;
1925  verify_tfm = NULL;
1926  }
1927 
1928  if (fifo_size != mdev->rs_plan_s.size) {
1929  kfree(mdev->rs_plan_s.values);
1930  mdev->rs_plan_s.values = rs_plan_s;
1931  mdev->rs_plan_s.size = fifo_size;
1932  mdev->rs_planed = 0;
1933  rs_plan_s = NULL;
1934  }
1935 
1936  spin_unlock(&mdev->peer_seq_lock);
1937 
1938  if (get_ldev(mdev)) {
1939  wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
1940  drbd_al_shrink(mdev);
1941  err = drbd_check_al_size(mdev);
1942  lc_unlock(mdev->act_log);
1943  wake_up(&mdev->al_wait);
1944 
1945  put_ldev(mdev);
1946  drbd_md_sync(mdev);
1947 
1948  if (err) {
1949  retcode = ERR_NOMEM;
1950  goto fail;
1951  }
1952  }
1953 
1954  if (mdev->state.conn >= C_CONNECTED)
1955  drbd_send_sync_param(mdev, &sc);
1956 
1957  if (!cpumask_equal(mdev->cpu_mask, new_cpu_mask)) {
1958  cpumask_copy(mdev->cpu_mask, new_cpu_mask);
1959  drbd_calc_cpu_mask(mdev);
1960  mdev->receiver.reset_cpu_mask = 1;
1961  mdev->asender.reset_cpu_mask = 1;
1962  mdev->worker.reset_cpu_mask = 1;
1963  }
1964 
1965  kobject_uevent(&disk_to_dev(mdev->vdisk)->kobj, KOBJ_CHANGE);
1966 fail:
1967  kfree(rs_plan_s);
1968  free_cpumask_var(new_cpu_mask);
1969  crypto_free_hash(csums_tfm);
1970  crypto_free_hash(verify_tfm);
1971  reply->ret_code = retcode;
1972  return 0;
1973 }
1974 
1975 static int drbd_nl_invalidate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
1976  struct drbd_nl_cfg_reply *reply)
1977 {
1978  int retcode;
1979 
1980  /* If there is still bitmap IO pending, probably because of a previous
1981  * resync just being finished, wait for it before requesting a new resync.
1982  * Also wait for it's after_state_ch(). */
1983  drbd_suspend_io(mdev);
1984  wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
1985  drbd_flush_workqueue(mdev);
1986 
1987  retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T), CS_ORDERED);
1988 
1989  if (retcode < SS_SUCCESS && retcode != SS_NEED_CONNECTION)
1990  retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
1991 
1992  while (retcode == SS_NEED_CONNECTION) {
1993  spin_lock_irq(&mdev->req_lock);
1994  if (mdev->state.conn < C_CONNECTED)
1995  retcode = _drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_VERBOSE, NULL);
1996  spin_unlock_irq(&mdev->req_lock);
1997 
1998  if (retcode != SS_NEED_CONNECTION)
1999  break;
2000 
2001  retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_T));
2002  }
2003  drbd_resume_io(mdev);
2004 
2005  reply->ret_code = retcode;
2006  return 0;
2007 }
2008 
2009 static int drbd_bmio_set_susp_al(struct drbd_conf *mdev)
2010 {
2011  int rv;
2012 
2013  rv = drbd_bmio_set_n_write(mdev);
2014  drbd_suspend_al(mdev);
2015  return rv;
2016 }
2017 
2018 static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2019  struct drbd_nl_cfg_reply *reply)
2020 {
2021  int retcode;
2022 
2023  /* If there is still bitmap IO pending, probably because of a previous
2024  * resync just being finished, wait for it before requesting a new resync.
2025  * Also wait for it's after_state_ch(). */
2026  drbd_suspend_io(mdev);
2027  wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
2028  drbd_flush_workqueue(mdev);
2029 
2030  retcode = _drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S), CS_ORDERED);
2031 
2032  if (retcode < SS_SUCCESS) {
2033  if (retcode == SS_NEED_CONNECTION && mdev->state.role == R_PRIMARY) {
2034  /* The peer will get a resync upon connect anyways. Just make that
2035  into a full resync. */
2036  retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
2037  if (retcode >= SS_SUCCESS) {
2038  if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
2039  "set_n_write from invalidate_peer",
2041  retcode = ERR_IO_MD_DISK;
2042  }
2043  } else
2044  retcode = drbd_request_state(mdev, NS(conn, C_STARTING_SYNC_S));
2045  }
2046  drbd_resume_io(mdev);
2047 
2048  reply->ret_code = retcode;
2049  return 0;
2050 }
2051 
2052 static int drbd_nl_pause_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2053  struct drbd_nl_cfg_reply *reply)
2054 {
2055  int retcode = NO_ERROR;
2056 
2057  if (drbd_request_state(mdev, NS(user_isp, 1)) == SS_NOTHING_TO_DO)
2058  retcode = ERR_PAUSE_IS_SET;
2059 
2060  reply->ret_code = retcode;
2061  return 0;
2062 }
2063 
2064 static int drbd_nl_resume_sync(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2065  struct drbd_nl_cfg_reply *reply)
2066 {
2067  int retcode = NO_ERROR;
2068  union drbd_state s;
2069 
2070  if (drbd_request_state(mdev, NS(user_isp, 0)) == SS_NOTHING_TO_DO) {
2071  s = mdev->state;
2072  if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) {
2073  retcode = s.aftr_isp ? ERR_PIC_AFTER_DEP :
2074  s.peer_isp ? ERR_PIC_PEER_DEP : ERR_PAUSE_IS_CLEAR;
2075  } else {
2076  retcode = ERR_PAUSE_IS_CLEAR;
2077  }
2078  }
2079 
2080  reply->ret_code = retcode;
2081  return 0;
2082 }
2083 
2084 static int drbd_nl_suspend_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2085  struct drbd_nl_cfg_reply *reply)
2086 {
2087  reply->ret_code = drbd_request_state(mdev, NS(susp, 1));
2088 
2089  return 0;
2090 }
2091 
2092 static int drbd_nl_resume_io(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2093  struct drbd_nl_cfg_reply *reply)
2094 {
2095  if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
2096  drbd_uuid_new_current(mdev);
2097  clear_bit(NEW_CUR_UUID, &mdev->flags);
2098  }
2099  drbd_suspend_io(mdev);
2100  reply->ret_code = drbd_request_state(mdev, NS3(susp, 0, susp_nod, 0, susp_fen, 0));
2101  if (reply->ret_code == SS_SUCCESS) {
2102  if (mdev->state.conn < C_CONNECTED)
2103  tl_clear(mdev);
2104  if (mdev->state.disk == D_DISKLESS || mdev->state.disk == D_FAILED)
2106  }
2107  drbd_resume_io(mdev);
2108 
2109  return 0;
2110 }
2111 
2112 static int drbd_nl_outdate(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2113  struct drbd_nl_cfg_reply *reply)
2114 {
2115  reply->ret_code = drbd_request_state(mdev, NS(disk, D_OUTDATED));
2116  return 0;
2117 }
2118 
2119 static int drbd_nl_get_config(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2120  struct drbd_nl_cfg_reply *reply)
2121 {
2122  unsigned short *tl;
2123 
2124  tl = reply->tag_list;
2125 
2126  if (get_ldev(mdev)) {
2127  tl = disk_conf_to_tags(mdev, &mdev->ldev->dc, tl);
2128  put_ldev(mdev);
2129  }
2130 
2131  if (get_net_conf(mdev)) {
2132  tl = net_conf_to_tags(mdev, mdev->net_conf, tl);
2133  put_net_conf(mdev);
2134  }
2135  tl = syncer_conf_to_tags(mdev, &mdev->sync_conf, tl);
2136 
2137  put_unaligned(TT_END, tl++); /* Close the tag list */
2138 
2139  return (int)((char *)tl - (char *)reply->tag_list);
2140 }
2141 
2142 static int drbd_nl_get_state(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2143  struct drbd_nl_cfg_reply *reply)
2144 {
2145  unsigned short *tl = reply->tag_list;
2146  union drbd_state s = mdev->state;
2147  unsigned long rs_left;
2148  unsigned int res;
2149 
2150  tl = get_state_to_tags(mdev, (struct get_state *)&s, tl);
2151 
2152  /* no local ref, no bitmap, no syncer progress. */
2153  if (s.conn >= C_SYNC_SOURCE && s.conn <= C_PAUSED_SYNC_T) {
2154  if (get_ldev(mdev)) {
2155  drbd_get_syncer_progress(mdev, &rs_left, &res);
2156  tl = tl_add_int(tl, T_sync_progress, &res);
2157  put_ldev(mdev);
2158  }
2159  }
2160  put_unaligned(TT_END, tl++); /* Close the tag list */
2161 
2162  return (int)((char *)tl - (char *)reply->tag_list);
2163 }
2164 
2165 static int drbd_nl_get_uuids(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2166  struct drbd_nl_cfg_reply *reply)
2167 {
2168  unsigned short *tl;
2169 
2170  tl = reply->tag_list;
2171 
2172  if (get_ldev(mdev)) {
2173  tl = tl_add_blob(tl, T_uuids, mdev->ldev->md.uuid, UI_SIZE*sizeof(u64));
2174  tl = tl_add_int(tl, T_uuids_flags, &mdev->ldev->md.flags);
2175  put_ldev(mdev);
2176  }
2177  put_unaligned(TT_END, tl++); /* Close the tag list */
2178 
2179  return (int)((char *)tl - (char *)reply->tag_list);
2180 }
2181 
2188 static int drbd_nl_get_timeout_flag(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2189  struct drbd_nl_cfg_reply *reply)
2190 {
2191  unsigned short *tl;
2192  char rv;
2193 
2194  tl = reply->tag_list;
2195 
2196  rv = mdev->state.pdsk == D_OUTDATED ? UT_PEER_OUTDATED :
2198 
2199  tl = tl_add_blob(tl, T_use_degraded, &rv, sizeof(rv));
2200  put_unaligned(TT_END, tl++); /* Close the tag list */
2201 
2202  return (int)((char *)tl - (char *)reply->tag_list);
2203 }
2204 
2205 static int drbd_nl_start_ov(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2206  struct drbd_nl_cfg_reply *reply)
2207 {
2208  /* default to resume from last known position, if possible */
2209  struct start_ov args =
2210  { .start_sector = mdev->ov_start_sector };
2211 
2212  if (!start_ov_from_tags(mdev, nlp->tag_list, &args)) {
2213  reply->ret_code = ERR_MANDATORY_TAG;
2214  return 0;
2215  }
2216 
2217  /* If there is still bitmap IO pending, e.g. previous resync or verify
2218  * just being finished, wait for it before requesting a new resync. */
2219  drbd_suspend_io(mdev);
2220  wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
2221 
2222  /* w_make_ov_request expects position to be aligned */
2223  mdev->ov_start_sector = args.start_sector & ~BM_SECT_PER_BIT;
2224  reply->ret_code = drbd_request_state(mdev,NS(conn,C_VERIFY_S));
2225  drbd_resume_io(mdev);
2226  return 0;
2227 }
2228 
2229 
2230 static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
2231  struct drbd_nl_cfg_reply *reply)
2232 {
2233  int retcode = NO_ERROR;
2234  int skip_initial_sync = 0;
2235  int err;
2236 
2237  struct new_c_uuid args;
2238 
2239  memset(&args, 0, sizeof(struct new_c_uuid));
2240  if (!new_c_uuid_from_tags(mdev, nlp->tag_list, &args)) {
2241  reply->ret_code = ERR_MANDATORY_TAG;
2242  return 0;
2243  }
2244 
2245  mutex_lock(&mdev->state_mutex); /* Protects us against serialized state changes. */
2246 
2247  if (!get_ldev(mdev)) {
2248  retcode = ERR_NO_DISK;
2249  goto out;
2250  }
2251 
2252  /* this is "skip initial sync", assume to be clean */
2253  if (mdev->state.conn == C_CONNECTED && mdev->agreed_pro_version >= 90 &&
2254  mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED && args.clear_bm) {
2255  dev_info(DEV, "Preparing to skip initial sync\n");
2256  skip_initial_sync = 1;
2257  } else if (mdev->state.conn != C_STANDALONE) {
2258  retcode = ERR_CONNECTED;
2259  goto out_dec;
2260  }
2261 
2262  drbd_uuid_set(mdev, UI_BITMAP, 0); /* Rotate UI_BITMAP to History 1, etc... */
2263  drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
2264 
2265  if (args.clear_bm) {
2267  "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
2268  if (err) {
2269  dev_err(DEV, "Writing bitmap failed with %d\n",err);
2270  retcode = ERR_IO_MD_DISK;
2271  }
2272  if (skip_initial_sync) {
2274  _drbd_uuid_set(mdev, UI_BITMAP, 0);
2275  drbd_print_uuids(mdev, "cleared bitmap UUID");
2276  spin_lock_irq(&mdev->req_lock);
2277  _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
2278  CS_VERBOSE, NULL);
2279  spin_unlock_irq(&mdev->req_lock);
2280  }
2281  }
2282 
2283  drbd_md_sync(mdev);
2284 out_dec:
2285  put_ldev(mdev);
2286 out:
2287  mutex_unlock(&mdev->state_mutex);
2288 
2289  reply->ret_code = retcode;
2290  return 0;
2291 }
2292 
2294  int (*function)(struct drbd_conf *,
2295  struct drbd_nl_cfg_req *,
2296  struct drbd_nl_cfg_reply *);
2298 };
2299 
2300 static struct cn_handler_struct cnd_table[] = {
2301  [ P_primary ] = { &drbd_nl_primary, 0 },
2302  [ P_secondary ] = { &drbd_nl_secondary, 0 },
2303  [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 },
2304  [ P_detach ] = { &drbd_nl_detach, 0 },
2305  [ P_net_conf ] = { &drbd_nl_net_conf, 0 },
2306  [ P_disconnect ] = { &drbd_nl_disconnect, 0 },
2307  [ P_resize ] = { &drbd_nl_resize, 0 },
2308  [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 },
2309  [ P_invalidate ] = { &drbd_nl_invalidate, 0 },
2310  [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 },
2311  [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 },
2312  [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 },
2313  [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 },
2314  [ P_resume_io ] = { &drbd_nl_resume_io, 0 },
2315  [ P_outdate ] = { &drbd_nl_outdate, 0 },
2316  [ P_get_config ] = { &drbd_nl_get_config,
2317  sizeof(struct syncer_conf_tag_len_struct) +
2318  sizeof(struct disk_conf_tag_len_struct) +
2319  sizeof(struct net_conf_tag_len_struct) },
2320  [ P_get_state ] = { &drbd_nl_get_state,
2321  sizeof(struct get_state_tag_len_struct) +
2322  sizeof(struct sync_progress_tag_len_struct) },
2323  [ P_get_uuids ] = { &drbd_nl_get_uuids,
2324  sizeof(struct get_uuids_tag_len_struct) },
2325  [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag,
2326  sizeof(struct get_timeout_flag_tag_len_struct)},
2327  [ P_start_ov ] = { &drbd_nl_start_ov, 0 },
2328  [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 },
2329 };
2330 
2331 static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp)
2332 {
2333  struct drbd_nl_cfg_req *nlp = (struct drbd_nl_cfg_req *)req->data;
2334  struct cn_handler_struct *cm;
2335  struct cn_msg *cn_reply;
2336  struct drbd_nl_cfg_reply *reply;
2337  struct drbd_conf *mdev;
2338  int retcode, rr;
2339  int reply_size = sizeof(struct cn_msg)
2340  + sizeof(struct drbd_nl_cfg_reply)
2341  + sizeof(short int);
2342 
2343  if (!try_module_get(THIS_MODULE)) {
2344  printk(KERN_ERR "drbd: try_module_get() failed!\n");
2345  return;
2346  }
2347 
2348  if (!capable(CAP_SYS_ADMIN)) {
2349  retcode = ERR_PERM;
2350  goto fail;
2351  }
2352 
2353  mdev = ensure_mdev(nlp->drbd_minor,
2354  (nlp->flags & DRBD_NL_CREATE_DEVICE));
2355  if (!mdev) {
2356  retcode = ERR_MINOR_INVALID;
2357  goto fail;
2358  }
2359 
2360  if (nlp->packet_type >= P_nl_after_last_packet ||
2361  nlp->packet_type == P_return_code_only) {
2362  retcode = ERR_PACKET_NR;
2363  goto fail;
2364  }
2365 
2366  cm = cnd_table + nlp->packet_type;
2367 
2368  /* This may happen if packet number is 0: */
2369  if (cm->function == NULL) {
2370  retcode = ERR_PACKET_NR;
2371  goto fail;
2372  }
2373 
2374  reply_size += cm->reply_body_size;
2375 
2376  /* allocation not in the IO path, cqueue thread context */
2377  cn_reply = kzalloc(reply_size, GFP_KERNEL);
2378  if (!cn_reply) {
2379  retcode = ERR_NOMEM;
2380  goto fail;
2381  }
2382  reply = (struct drbd_nl_cfg_reply *) cn_reply->data;
2383 
2384  reply->packet_type =
2385  cm->reply_body_size ? nlp->packet_type : P_return_code_only;
2386  reply->minor = nlp->drbd_minor;
2387  reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */
2388  /* reply->tag_list; might be modified by cm->function. */
2389 
2390  rr = cm->function(mdev, nlp, reply);
2391 
2392  cn_reply->id = req->id;
2393  cn_reply->seq = req->seq;
2394  cn_reply->ack = req->ack + 1;
2395  cn_reply->len = sizeof(struct drbd_nl_cfg_reply) + rr;
2396  cn_reply->flags = 0;
2397 
2398  rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_KERNEL);
2399  if (rr && rr != -ESRCH)
2400  printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
2401 
2402  kfree(cn_reply);
2403  module_put(THIS_MODULE);
2404  return;
2405  fail:
2406  drbd_nl_send_reply(req, retcode);
2407  module_put(THIS_MODULE);
2408 }
2409 
2410 static atomic_t drbd_nl_seq = ATOMIC_INIT(2); /* two. */
2411 
2412 static unsigned short *
2413 __tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data,
2414  unsigned short len, int nul_terminated)
2415 {
2416  unsigned short l = tag_descriptions[tag_number(tag)].max_len;
2417  len = (len < l) ? len : l;
2418  put_unaligned(tag, tl++);
2419  put_unaligned(len, tl++);
2420  memcpy(tl, data, len);
2421  tl = (unsigned short*)((char*)tl + len);
2422  if (nul_terminated)
2423  *((char*)tl - 1) = 0;
2424  return tl;
2425 }
2426 
2427 static unsigned short *
2428 tl_add_blob(unsigned short *tl, enum drbd_tags tag, const void *data, int len)
2429 {
2430  return __tl_add_blob(tl, tag, data, len, 0);
2431 }
2432 
2433 static unsigned short *
2434 tl_add_str(unsigned short *tl, enum drbd_tags tag, const char *str)
2435 {
2436  return __tl_add_blob(tl, tag, str, strlen(str)+1, 0);
2437 }
2438 
2439 static unsigned short *
2440 tl_add_int(unsigned short *tl, enum drbd_tags tag, const void *val)
2441 {
2442  put_unaligned(tag, tl++);
2443  switch(tag_type(tag)) {
2444  case TT_INTEGER:
2445  put_unaligned(sizeof(int), tl++);
2446  put_unaligned(*(int *)val, (int *)tl);
2447  tl = (unsigned short*)((char*)tl+sizeof(int));
2448  break;
2449  case TT_INT64:
2450  put_unaligned(sizeof(u64), tl++);
2451  put_unaligned(*(u64 *)val, (u64 *)tl);
2452  tl = (unsigned short*)((char*)tl+sizeof(u64));
2453  break;
2454  default:
2455  /* someone did something stupid. */
2456  ;
2457  }
2458  return tl;
2459 }
2460 
2461 void drbd_bcast_state(struct drbd_conf *mdev, union drbd_state state)
2462 {
2463  char buffer[sizeof(struct cn_msg)+
2464  sizeof(struct drbd_nl_cfg_reply)+
2465  sizeof(struct get_state_tag_len_struct)+
2466  sizeof(short int)];
2467  struct cn_msg *cn_reply = (struct cn_msg *) buffer;
2468  struct drbd_nl_cfg_reply *reply =
2469  (struct drbd_nl_cfg_reply *)cn_reply->data;
2470  unsigned short *tl = reply->tag_list;
2471 
2472  /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
2473 
2474  tl = get_state_to_tags(mdev, (struct get_state *)&state, tl);
2475 
2476  put_unaligned(TT_END, tl++); /* Close the tag list */
2477 
2478  cn_reply->id.idx = CN_IDX_DRBD;
2479  cn_reply->id.val = CN_VAL_DRBD;
2480 
2481  cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
2482  cn_reply->ack = 0; /* not used here. */
2483  cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
2484  (int)((char *)tl - (char *)reply->tag_list);
2485  cn_reply->flags = 0;
2486 
2487  reply->packet_type = P_get_state;
2488  reply->minor = mdev_to_minor(mdev);
2489  reply->ret_code = NO_ERROR;
2490 
2491  cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
2492 }
2493 
2494 void drbd_bcast_ev_helper(struct drbd_conf *mdev, char *helper_name)
2495 {
2496  char buffer[sizeof(struct cn_msg)+
2497  sizeof(struct drbd_nl_cfg_reply)+
2498  sizeof(struct call_helper_tag_len_struct)+
2499  sizeof(short int)];
2500  struct cn_msg *cn_reply = (struct cn_msg *) buffer;
2501  struct drbd_nl_cfg_reply *reply =
2502  (struct drbd_nl_cfg_reply *)cn_reply->data;
2503  unsigned short *tl = reply->tag_list;
2504 
2505  /* dev_warn(DEV, "drbd_bcast_state() got called\n"); */
2506 
2507  tl = tl_add_str(tl, T_helper, helper_name);
2508  put_unaligned(TT_END, tl++); /* Close the tag list */
2509 
2510  cn_reply->id.idx = CN_IDX_DRBD;
2511  cn_reply->id.val = CN_VAL_DRBD;
2512 
2513  cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
2514  cn_reply->ack = 0; /* not used here. */
2515  cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
2516  (int)((char *)tl - (char *)reply->tag_list);
2517  cn_reply->flags = 0;
2518 
2519  reply->packet_type = P_call_helper;
2520  reply->minor = mdev_to_minor(mdev);
2521  reply->ret_code = NO_ERROR;
2522 
2523  cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
2524 }
2525 
2526 void drbd_bcast_ee(struct drbd_conf *mdev,
2527  const char *reason, const int dgs,
2528  const char* seen_hash, const char* calc_hash,
2529  const struct drbd_epoch_entry* e)
2530 {
2531  struct cn_msg *cn_reply;
2532  struct drbd_nl_cfg_reply *reply;
2533  unsigned short *tl;
2534  struct page *page;
2535  unsigned len;
2536 
2537  if (!e)
2538  return;
2539  if (!reason || !reason[0])
2540  return;
2541 
2542  /* apparently we have to memcpy twice, first to prepare the data for the
2543  * struct cn_msg, then within cn_netlink_send from the cn_msg to the
2544  * netlink skb. */
2545  /* receiver thread context, which is not in the writeout path (of this node),
2546  * but may be in the writeout path of the _other_ node.
2547  * GFP_NOIO to avoid potential "distributed deadlock". */
2548  cn_reply = kzalloc(
2549  sizeof(struct cn_msg)+
2550  sizeof(struct drbd_nl_cfg_reply)+
2551  sizeof(struct dump_ee_tag_len_struct)+
2552  sizeof(short int),
2553  GFP_NOIO);
2554 
2555  if (!cn_reply) {
2556  dev_err(DEV, "could not kmalloc buffer for drbd_bcast_ee, sector %llu, size %u\n",
2557  (unsigned long long)e->sector, e->size);
2558  return;
2559  }
2560 
2561  reply = (struct drbd_nl_cfg_reply*)cn_reply->data;
2562  tl = reply->tag_list;
2563 
2564  tl = tl_add_str(tl, T_dump_ee_reason, reason);
2565  tl = tl_add_blob(tl, T_seen_digest, seen_hash, dgs);
2566  tl = tl_add_blob(tl, T_calc_digest, calc_hash, dgs);
2567  tl = tl_add_int(tl, T_ee_sector, &e->sector);
2568  tl = tl_add_int(tl, T_ee_block_id, &e->block_id);
2569 
2570  /* dump the first 32k */
2571  len = min_t(unsigned, e->size, 32 << 10);
2572  put_unaligned(T_ee_data, tl++);
2573  put_unaligned(len, tl++);
2574 
2575  page = e->pages;
2576  page_chain_for_each(page) {
2577  void *d = kmap_atomic(page);
2578  unsigned l = min_t(unsigned, len, PAGE_SIZE);
2579  memcpy(tl, d, l);
2580  kunmap_atomic(d);
2581  tl = (unsigned short*)((char*)tl + l);
2582  len -= l;
2583  if (len == 0)
2584  break;
2585  }
2586  put_unaligned(TT_END, tl++); /* Close the tag list */
2587 
2588  cn_reply->id.idx = CN_IDX_DRBD;
2589  cn_reply->id.val = CN_VAL_DRBD;
2590 
2591  cn_reply->seq = atomic_add_return(1,&drbd_nl_seq);
2592  cn_reply->ack = 0; // not used here.
2593  cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
2594  (int)((char*)tl - (char*)reply->tag_list);
2595  cn_reply->flags = 0;
2596 
2597  reply->packet_type = P_dump_ee;
2598  reply->minor = mdev_to_minor(mdev);
2599  reply->ret_code = NO_ERROR;
2600 
2601  cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
2602  kfree(cn_reply);
2603 }
2604 
2606 {
2607  char buffer[sizeof(struct cn_msg)+
2608  sizeof(struct drbd_nl_cfg_reply)+
2609  sizeof(struct sync_progress_tag_len_struct)+
2610  sizeof(short int)];
2611  struct cn_msg *cn_reply = (struct cn_msg *) buffer;
2612  struct drbd_nl_cfg_reply *reply =
2613  (struct drbd_nl_cfg_reply *)cn_reply->data;
2614  unsigned short *tl = reply->tag_list;
2615  unsigned long rs_left;
2616  unsigned int res;
2617 
2618  /* no local ref, no bitmap, no syncer progress, no broadcast. */
2619  if (!get_ldev(mdev))
2620  return;
2621  drbd_get_syncer_progress(mdev, &rs_left, &res);
2622  put_ldev(mdev);
2623 
2624  tl = tl_add_int(tl, T_sync_progress, &res);
2625  put_unaligned(TT_END, tl++); /* Close the tag list */
2626 
2627  cn_reply->id.idx = CN_IDX_DRBD;
2628  cn_reply->id.val = CN_VAL_DRBD;
2629 
2630  cn_reply->seq = atomic_add_return(1, &drbd_nl_seq);
2631  cn_reply->ack = 0; /* not used here. */
2632  cn_reply->len = sizeof(struct drbd_nl_cfg_reply) +
2633  (int)((char *)tl - (char *)reply->tag_list);
2634  cn_reply->flags = 0;
2635 
2636  reply->packet_type = P_sync_progress;
2637  reply->minor = mdev_to_minor(mdev);
2638  reply->ret_code = NO_ERROR;
2639 
2640  cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
2641 }
2642 
2644 {
2645  static struct cb_id cn_id_drbd;
2646  int err, try=10;
2647 
2648  cn_id_drbd.val = CN_VAL_DRBD;
2649  do {
2650  cn_id_drbd.idx = cn_idx;
2651  err = cn_add_callback(&cn_id_drbd, "cn_drbd", &drbd_connector_callback);
2652  if (!err)
2653  break;
2654  cn_idx = (cn_idx + CN_IDX_STEP);
2655  } while (try--);
2656 
2657  if (err) {
2658  printk(KERN_ERR "drbd: cn_drbd failed to register\n");
2659  return err;
2660  }
2661 
2662  return 0;
2663 }
2664 
2666 {
2667  static struct cb_id cn_id_drbd;
2668 
2669  cn_id_drbd.idx = cn_idx;
2670  cn_id_drbd.val = CN_VAL_DRBD;
2671 
2672  cn_del_callback(&cn_id_drbd);
2673 }
2674 
2675 void drbd_nl_send_reply(struct cn_msg *req, int ret_code)
2676 {
2677  char buffer[sizeof(struct cn_msg)+sizeof(struct drbd_nl_cfg_reply)];
2678  struct cn_msg *cn_reply = (struct cn_msg *) buffer;
2679  struct drbd_nl_cfg_reply *reply =
2680  (struct drbd_nl_cfg_reply *)cn_reply->data;
2681  int rr;
2682 
2683  memset(buffer, 0, sizeof(buffer));
2684  cn_reply->id = req->id;
2685 
2686  cn_reply->seq = req->seq;
2687  cn_reply->ack = req->ack + 1;
2688  cn_reply->len = sizeof(struct drbd_nl_cfg_reply);
2689  cn_reply->flags = 0;
2690 
2691  reply->packet_type = P_return_code_only;
2692  reply->minor = ((struct drbd_nl_cfg_req *)req->data)->drbd_minor;
2693  reply->ret_code = ret_code;
2694 
2695  rr = cn_netlink_send(cn_reply, CN_IDX_DRBD, GFP_NOIO);
2696  if (rr && rr != -ESRCH)
2697  printk(KERN_INFO "drbd: cn_netlink_send()=%d\n", rr);
2698 }
2699