Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
xpc_partition.c
Go to the documentation of this file.
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License. See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2004-2008 Silicon Graphics, Inc. All Rights Reserved.
7  */
8 
9 /*
10  * Cross Partition Communication (XPC) partition support.
11  *
12  * This is the part of XPC that detects the presence/absence of
13  * other partitions. It provides a heartbeat and monitors the
14  * heartbeats of other partitions.
15  *
16  */
17 
18 #include <linux/device.h>
19 #include <linux/hardirq.h>
20 #include <linux/slab.h>
21 #include "xpc.h"
22 #include <asm/uv/uv_hub.h>
23 
24 /* XPC is exiting flag */
26 
27 /* this partition's reserved page pointers */
29 static unsigned long *xpc_part_nasids;
30 unsigned long *xpc_mach_nasids;
31 
32 static int xpc_nasid_mask_nbytes; /* #of bytes in nasid mask */
33 int xpc_nasid_mask_nlongs; /* #of longs in nasid mask */
34 
36 
37 /*
38  * Guarantee that the kmalloc'd memory is cacheline aligned.
39  */
40 void *
42 {
43  /* see if kmalloc will give us cachline aligned memory by default */
44  *base = kmalloc(size, flags);
45  if (*base == NULL)
46  return NULL;
47 
48  if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
49  return *base;
50 
51  kfree(*base);
52 
53  /* nope, we'll have to do it ourselves */
54  *base = kmalloc(size + L1_CACHE_BYTES, flags);
55  if (*base == NULL)
56  return NULL;
57 
58  return (void *)L1_CACHE_ALIGN((u64)*base);
59 }
60 
61 /*
62  * Given a nasid, get the physical address of the partition's reserved page
63  * for that nasid. This function returns 0 on any error.
64  */
65 static unsigned long
66 xpc_get_rsvd_page_pa(int nasid)
67 {
68  enum xp_retval ret;
69  u64 cookie = 0;
70  unsigned long rp_pa = nasid; /* seed with nasid */
71  size_t len = 0;
72  size_t buf_len = 0;
73  void *buf = buf;
74  void *buf_base = NULL;
75  enum xp_retval (*get_partition_rsvd_page_pa)
76  (void *, u64 *, unsigned long *, size_t *) =
77  xpc_arch_ops.get_partition_rsvd_page_pa;
78 
79  while (1) {
80 
81  /* !!! rp_pa will need to be _gpa on UV.
82  * ??? So do we save it into the architecture specific parts
83  * ??? of the xpc_partition structure? Do we rename this
84  * ??? function or have two versions? Rename rp_pa for UV to
85  * ??? rp_gpa?
86  */
87  ret = get_partition_rsvd_page_pa(buf, &cookie, &rp_pa, &len);
88 
89  dev_dbg(xpc_part, "SAL returned with ret=%d, cookie=0x%016lx, "
90  "address=0x%016lx, len=0x%016lx\n", ret,
91  (unsigned long)cookie, rp_pa, len);
92 
93  if (ret != xpNeedMoreInfo)
94  break;
95 
96  /* !!! L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
97  if (is_shub())
98  len = L1_CACHE_ALIGN(len);
99 
100  if (len > buf_len) {
101  if (buf_base != NULL)
102  kfree(buf_base);
103  buf_len = L1_CACHE_ALIGN(len);
105  &buf_base);
106  if (buf_base == NULL) {
107  dev_err(xpc_part, "unable to kmalloc "
108  "len=0x%016lx\n", buf_len);
109  ret = xpNoMemory;
110  break;
111  }
112  }
113 
114  ret = xp_remote_memcpy(xp_pa(buf), rp_pa, len);
115  if (ret != xpSuccess) {
116  dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
117  break;
118  }
119  }
120 
121  kfree(buf_base);
122 
123  if (ret != xpSuccess)
124  rp_pa = 0;
125 
126  dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
127  return rp_pa;
128 }
129 
130 /*
131  * Fill the partition reserved page with the information needed by
132  * other partitions to discover we are alive and establish initial
133  * communications.
134  */
135 int
137 {
138  int ret;
139  struct xpc_rsvd_page *rp;
140  unsigned long rp_pa;
141  unsigned long new_ts_jiffies;
142 
143  /* get the local reserved page's address */
144 
145  preempt_disable();
146  rp_pa = xpc_get_rsvd_page_pa(xp_cpu_to_nasid(smp_processor_id()));
147  preempt_enable();
148  if (rp_pa == 0) {
149  dev_err(xpc_part, "SAL failed to locate the reserved page\n");
150  return -ESRCH;
151  }
152  rp = (struct xpc_rsvd_page *)__va(xp_socket_pa(rp_pa));
153 
154  if (rp->SAL_version < 3) {
155  /* SAL_versions < 3 had a SAL_partid defined as a u8 */
156  rp->SAL_partid &= 0xff;
157  }
159 
160  if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
161  dev_err(xpc_part, "the reserved page's partid of %d is outside "
162  "supported range (< 0 || >= %d)\n", rp->SAL_partid,
164  return -EINVAL;
165  }
166 
167  rp->version = XPC_RP_VERSION;
169 
170  /* establish the actual sizes of the nasid masks */
171  if (rp->SAL_version == 1) {
172  /* SAL_version 1 didn't set the nasids_size field */
173  rp->SAL_nasids_size = 128;
174  }
175  xpc_nasid_mask_nbytes = rp->SAL_nasids_size;
177  BITS_PER_BYTE);
178 
179  /* setup the pointers to the various items in the reserved page */
180  xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
182 
183  ret = xpc_arch_ops.setup_rsvd_page(rp);
184  if (ret != 0)
185  return ret;
186 
187  /*
188  * Set timestamp of when reserved page was setup by XPC.
189  * This signifies to the remote partition that our reserved
190  * page is initialized.
191  */
192  new_ts_jiffies = jiffies;
193  if (new_ts_jiffies == 0 || new_ts_jiffies == rp->ts_jiffies)
194  new_ts_jiffies++;
195  rp->ts_jiffies = new_ts_jiffies;
196 
197  xpc_rsvd_page = rp;
198  return 0;
199 }
200 
201 void
203 {
204  /* a zero timestamp indicates our rsvd page is not initialized */
205  xpc_rsvd_page->ts_jiffies = 0;
206 }
207 
208 /*
209  * Get a copy of a portion of the remote partition's rsvd page.
210  *
211  * remote_rp points to a buffer that is cacheline aligned for BTE copies and
212  * is large enough to contain a copy of their reserved page header and
213  * part_nasids mask.
214  */
215 enum xp_retval
216 xpc_get_remote_rp(int nasid, unsigned long *discovered_nasids,
217  struct xpc_rsvd_page *remote_rp, unsigned long *remote_rp_pa)
218 {
219  int l;
220  enum xp_retval ret;
221 
222  /* get the reserved page's physical address */
223 
224  *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
225  if (*remote_rp_pa == 0)
226  return xpNoRsvdPageAddr;
227 
228  /* pull over the reserved page header and part_nasids mask */
229  ret = xp_remote_memcpy(xp_pa(remote_rp), *remote_rp_pa,
230  XPC_RP_HEADER_SIZE + xpc_nasid_mask_nbytes);
231  if (ret != xpSuccess)
232  return ret;
233 
234  if (discovered_nasids != NULL) {
235  unsigned long *remote_part_nasids =
236  XPC_RP_PART_NASIDS(remote_rp);
237 
238  for (l = 0; l < xpc_nasid_mask_nlongs; l++)
239  discovered_nasids[l] |= remote_part_nasids[l];
240  }
241 
242  /* zero timestamp indicates the reserved page has not been setup */
243  if (remote_rp->ts_jiffies == 0)
244  return xpRsvdPageNotSet;
245 
246  if (XPC_VERSION_MAJOR(remote_rp->version) !=
248  return xpBadVersion;
249  }
250 
251  /* check that both remote and local partids are valid for each side */
252  if (remote_rp->SAL_partid < 0 ||
253  remote_rp->SAL_partid >= xp_max_npartitions ||
254  remote_rp->max_npartitions <= xp_partition_id) {
255  return xpInvalidPartid;
256  }
257 
258  if (remote_rp->SAL_partid == xp_partition_id)
259  return xpLocalPartid;
260 
261  return xpSuccess;
262 }
263 
264 /*
265  * See if the other side has responded to a partition deactivate request
266  * from us. Though we requested the remote partition to deactivate with regard
267  * to us, we really only need to wait for the other side to disengage from us.
268  */
269 int
271 {
272  short partid = XPC_PARTID(part);
273  int disengaged;
274 
275  disengaged = !xpc_arch_ops.partition_engaged(partid);
276  if (part->disengage_timeout) {
277  if (!disengaged) {
279  /* timelimit hasn't been reached yet */
280  return 0;
281  }
282 
283  /*
284  * Other side hasn't responded to our deactivate
285  * request in a timely fashion, so assume it's dead.
286  */
287 
288  dev_info(xpc_part, "deactivate request to remote "
289  "partition %d timed out\n", partid);
291  xpc_arch_ops.assume_partition_disengaged(partid);
292  disengaged = 1;
293  }
294  part->disengage_timeout = 0;
295 
296  /* cancel the timer function, provided it's not us */
297  if (!in_interrupt())
299 
301  part->act_state != XPC_P_AS_INACTIVE);
302  if (part->act_state != XPC_P_AS_INACTIVE)
303  xpc_wakeup_channel_mgr(part);
304 
305  xpc_arch_ops.cancel_partition_deactivation_request(part);
306  }
307  return disengaged;
308 }
309 
310 /*
311  * Mark specified partition as active.
312  */
313 enum xp_retval
315 {
316  unsigned long irq_flags;
317  enum xp_retval ret;
318 
319  dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
320 
321  spin_lock_irqsave(&part->act_lock, irq_flags);
322  if (part->act_state == XPC_P_AS_ACTIVATING) {
323  part->act_state = XPC_P_AS_ACTIVE;
324  ret = xpSuccess;
325  } else {
326  DBUG_ON(part->reason == xpSuccess);
327  ret = part->reason;
328  }
329  spin_unlock_irqrestore(&part->act_lock, irq_flags);
330 
331  return ret;
332 }
333 
334 /*
335  * Start the process of deactivating the specified partition.
336  */
337 void
339  enum xp_retval reason)
340 {
341  unsigned long irq_flags;
342 
343  spin_lock_irqsave(&part->act_lock, irq_flags);
344 
345  if (part->act_state == XPC_P_AS_INACTIVE) {
346  XPC_SET_REASON(part, reason, line);
347  spin_unlock_irqrestore(&part->act_lock, irq_flags);
348  if (reason == xpReactivating) {
349  /* we interrupt ourselves to reactivate partition */
350  xpc_arch_ops.request_partition_reactivation(part);
351  }
352  return;
353  }
354  if (part->act_state == XPC_P_AS_DEACTIVATING) {
355  if ((part->reason == xpUnloading && reason != xpUnloading) ||
356  reason == xpReactivating) {
357  XPC_SET_REASON(part, reason, line);
358  }
359  spin_unlock_irqrestore(&part->act_lock, irq_flags);
360  return;
361  }
362 
364  XPC_SET_REASON(part, reason, line);
365 
366  spin_unlock_irqrestore(&part->act_lock, irq_flags);
367 
368  /* ask remote partition to deactivate with regard to us */
369  xpc_arch_ops.request_partition_deactivation(part);
370 
371  /* set a timelimit on the disengage phase of the deactivation request */
373  part->disengage_timer.expires = part->disengage_timeout;
374  add_timer(&part->disengage_timer);
375 
376  dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
377  XPC_PARTID(part), reason);
378 
379  xpc_partition_going_down(part, reason);
380 }
381 
382 /*
383  * Mark specified partition as inactive.
384  */
385 void
387 {
388  unsigned long irq_flags;
389 
390  dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
391  XPC_PARTID(part));
392 
393  spin_lock_irqsave(&part->act_lock, irq_flags);
395  spin_unlock_irqrestore(&part->act_lock, irq_flags);
396  part->remote_rp_pa = 0;
397 }
398 
399 /*
400  * SAL has provided a partition and machine mask. The partition mask
401  * contains a bit for each even nasid in our partition. The machine
402  * mask contains a bit for each even nasid in the entire machine.
403  *
404  * Using those two bit arrays, we can determine which nasids are
405  * known in the machine. Each should also have a reserved page
406  * initialized if they are available for partitioning.
407  */
408 void
410 {
411  void *remote_rp_base;
412  struct xpc_rsvd_page *remote_rp;
413  unsigned long remote_rp_pa;
414  int region;
415  int region_size;
416  int max_regions;
417  int nasid;
418  struct xpc_rsvd_page *rp;
419  unsigned long *discovered_nasids;
420  enum xp_retval ret;
421 
423  xpc_nasid_mask_nbytes,
424  GFP_KERNEL, &remote_rp_base);
425  if (remote_rp == NULL)
426  return;
427 
428  discovered_nasids = kzalloc(sizeof(long) * xpc_nasid_mask_nlongs,
429  GFP_KERNEL);
430  if (discovered_nasids == NULL) {
431  kfree(remote_rp_base);
432  return;
433  }
434 
435  rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
436 
437  /*
438  * The term 'region' in this context refers to the minimum number of
439  * nodes that can comprise an access protection grouping. The access
440  * protection is in regards to memory, IOI and IPI.
441  */
442  region_size = xp_region_size;
443 
444  if (is_uv())
445  max_regions = 256;
446  else {
447  max_regions = 64;
448 
449  switch (region_size) {
450  case 128:
451  max_regions *= 2;
452  case 64:
453  max_regions *= 2;
454  case 32:
455  max_regions *= 2;
456  region_size = 16;
457  DBUG_ON(!is_shub2());
458  }
459  }
460 
461  for (region = 0; region < max_regions; region++) {
462 
463  if (xpc_exiting)
464  break;
465 
466  dev_dbg(xpc_part, "searching region %d\n", region);
467 
468  for (nasid = (region * region_size * 2);
469  nasid < ((region + 1) * region_size * 2); nasid += 2) {
470 
471  if (xpc_exiting)
472  break;
473 
474  dev_dbg(xpc_part, "checking nasid %d\n", nasid);
475 
476  if (test_bit(nasid / 2, xpc_part_nasids)) {
477  dev_dbg(xpc_part, "PROM indicates Nasid %d is "
478  "part of the local partition; skipping "
479  "region\n", nasid);
480  break;
481  }
482 
483  if (!(test_bit(nasid / 2, xpc_mach_nasids))) {
484  dev_dbg(xpc_part, "PROM indicates Nasid %d was "
485  "not on Numa-Link network at reset\n",
486  nasid);
487  continue;
488  }
489 
490  if (test_bit(nasid / 2, discovered_nasids)) {
491  dev_dbg(xpc_part, "Nasid %d is part of a "
492  "partition which was previously "
493  "discovered\n", nasid);
494  continue;
495  }
496 
497  /* pull over the rsvd page header & part_nasids mask */
498 
499  ret = xpc_get_remote_rp(nasid, discovered_nasids,
500  remote_rp, &remote_rp_pa);
501  if (ret != xpSuccess) {
502  dev_dbg(xpc_part, "unable to get reserved page "
503  "from nasid %d, reason=%d\n", nasid,
504  ret);
505 
506  if (ret == xpLocalPartid)
507  break;
508 
509  continue;
510  }
511 
512  xpc_arch_ops.request_partition_activation(remote_rp,
513  remote_rp_pa, nasid);
514  }
515  }
516 
517  kfree(discovered_nasids);
518  kfree(remote_rp_base);
519 }
520 
521 /*
522  * Given a partid, get the nasids owned by that partition from the
523  * remote partition's reserved page.
524  */
525 enum xp_retval
526 xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
527 {
528  struct xpc_partition *part;
529  unsigned long part_nasid_pa;
530 
531  part = &xpc_partitions[partid];
532  if (part->remote_rp_pa == 0)
533  return xpPartitionDown;
534 
535  memset(nasid_mask, 0, xpc_nasid_mask_nbytes);
536 
537  part_nasid_pa = (unsigned long)XPC_RP_PART_NASIDS(part->remote_rp_pa);
538 
539  return xp_remote_memcpy(xp_pa(nasid_mask), part_nasid_pa,
540  xpc_nasid_mask_nbytes);
541 }