12 #include <linux/kernel.h>
13 #include <linux/slab.h>
16 #include <asm/mmu_context.h>
17 #include <asm/uv/uv.h>
18 #include <asm/uv/uv_mmrs.h>
19 #include <asm/uv/uv_hub.h>
24 #include <asm/irq_vectors.h>
25 #include <asm/timer.h>
28 static int timeout_base_ns[] = {
39 static int timeout_us;
41 static int nobau_perm;
70 static struct dentry *tunables_dir;
71 static struct dentry *tunables_file;
74 static char *stat_description[] = {
75 "sent: number of shootdown messages sent",
76 "stime: time spent sending messages",
77 "numuvhubs: number of hubs targeted with shootdown",
78 "numuvhubs16: number times 16 or more hubs targeted",
79 "numuvhubs8: number times 8 or more hubs targeted",
80 "numuvhubs4: number times 4 or more hubs targeted",
81 "numuvhubs2: number times 2 or more hubs targeted",
82 "numuvhubs1: number times 1 hub targeted",
83 "numcpus: number of cpus targeted with shootdown",
84 "dto: number of destination timeouts",
85 "retries: destination timeout retries sent",
86 "rok: : destination timeouts successfully retried",
87 "resetp: ipi-style resource resets for plugs",
88 "resett: ipi-style resource resets for timeouts",
89 "giveup: fall-backs to ipi-style shootdowns",
90 "sto: number of source timeouts",
91 "bz: number of stay-busy's",
92 "throt: number times spun in throttle",
93 "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE",
94 "recv: shootdown messages received",
95 "rtime: time spent processing messages",
96 "all: shootdown all-tlb messages",
97 "one: shootdown one-tlb messages",
98 "mult: interrupts that found multiple messages",
99 "none: interrupts that found no messages",
100 "retry: number of retry messages processed",
101 "canc: number messages canceled by retries",
102 "nocan: number retries that found nothing to cancel",
103 "reset: number of ipi-style reset requests processed",
104 "rcan: number messages canceled by reset requests",
105 "disable: number times use of the BAU was disabled",
106 "enable: number times use of the BAU was re-enabled"
110 setup_nobau(
char *
arg)
131 pr_info(
"BAU not initialized; cannot be turned on\n");
167 b = uv_node_to_blade_id(node);
177 static int __init uvhub_to_first_apicid(
int uvhub)
182 if (uvhub == uv_cpu_to_blade_id(cpu))
202 if (!msg->
canceled && do_acknowledge) {
204 write_mmr_sw_ack(dw);
213 static void bau_process_retry_msg(
struct msg_desc *mdp,
217 int cancel_count = 0;
218 unsigned long msg_res;
219 unsigned long mmr = 0;
228 for (msg2 = msg+1, i = 0; i <
DEST_Q_SIZE; msg2++, i++) {
240 mmr = read_mmr_sw_ack();
258 write_mmr_sw_ack(mr);
273 short socket_ack_count = 0;
299 bau_process_retry_msg(mdp, bcp);
309 socket_ack_count = atom_asr(1, asp);
318 msg_ack_count = atom_asr(socket_ack_count, asp);
325 reply_to_message(mdp, bcp, do_acknowledge);
335 static int pnode_to_first_cpu(
int pnode,
struct bau_control *smaster)
342 if (pnode == hpp->
pnode)
373 for (msg = bcp->
queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) {
374 unsigned long msg_res;
391 mmr = read_mmr_sw_ack();
396 write_mmr_sw_ack(mr);
412 int sender = bcp->
cpu;
422 for (pnode = 0; pnode < maskbits; pnode++) {
424 if (!bau_uvhub_isset(pnode, distribution))
427 cpu = pnode_to_first_cpu(apnode, smaster);
436 static inline unsigned long cycles_2_us(
unsigned long long cyc)
438 unsigned long long ns;
452 static inline void quiesce_local_uvhub(
struct bau_control *hmaster)
460 static inline void end_uvhub_quiesce(
struct bau_control *hmaster)
465 static unsigned long uv1_read_status(
unsigned long mmr_offset,
int right_shift)
467 unsigned long descriptor_status;
469 descriptor_status = uv_read_local_mmr(mmr_offset);
470 descriptor_status >>= right_shift;
472 return descriptor_status;
480 unsigned long mmr_offset,
int right_shift,
483 unsigned long descriptor_status;
487 descriptor_status = uv1_read_status(mmr_offset, right_shift);
489 while ((descriptor_status !=
DS_IDLE)) {
509 if (cycles_2_us(ttm - bcp->
send_message) < timeout_us) {
522 descriptor_status = uv1_read_status(mmr_offset, right_shift);
532 static unsigned long uv2_read_status(
unsigned long offset,
int rshft,
int desc)
534 unsigned long descriptor_status;
538 return descriptor_status;
557 return (((((
read_lmmr(mmr_offset) >> right_shift) &
575 static int uv2_wait_completion(
struct bau_desc *bau_desc,
576 unsigned long mmr_offset,
int right_shift,
579 unsigned long descriptor_stat;
585 descriptor_stat = uv2_read_status(mmr_offset, right_shift, desc);
611 if (cycles_2_us(ttm - bcp->
send_message) < timeout_us) {
623 if (busy_reps > 1000000) {
636 descriptor_stat = uv2_read_status(mmr_offset, right_shift,
648 static int wait_completion(
struct bau_desc *bau_desc,
652 unsigned long mmr_offset;
664 return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
667 return uv2_wait_completion(bau_desc, mmr_offset, right_shift,
671 static inline cycles_t sec_2_cycles(
unsigned long sec)
676 ns = sec * 1000000000;
686 static void destination_plugged(
struct bau_desc *bau_desc,
696 quiesce_local_uvhub(hmaster);
702 end_uvhub_quiesce(hmaster);
709 static void destination_timeout(
struct bau_desc *bau_desc,
718 quiesce_local_uvhub(hmaster);
724 end_uvhub_quiesce(hmaster);
759 static void count_max_concurr(
int stat,
struct bau_control *bcp,
775 int completion_status,
int try)
780 elapsed = time2 - time1;
786 if ((elapsed > congested_cycles) &&
791 disable_for_period(bcp, stat);
807 disable_for_period(bcp, stat);
823 if (!atomic_inc_unless_ge(lock, v, hmaster->
max_concurr)) {
827 }
while (!atomic_inc_unless_ge(lock, v, hmaster->
max_concurr));
834 static void handle_cmplt(
int completion_status,
struct bau_desc *bau_desc,
839 destination_plugged(bau_desc, bcp, hmaster, stat);
841 destination_timeout(bau_desc, bcp, hmaster, stat);
855 struct bau_desc *bau_desc)
858 int completion_stat = 0;
871 uv1_throttle(hmaster, stat);
905 write_mmr_activation(index);
908 completion_stat = wait_completion(bau_desc, bcp,
try);
910 handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat);
924 count_max_concurr(completion_stat, bcp, hmaster);
931 record_send_stats(time1, time2, bcp, stat, completion_stat,
try);
970 static void record_send_statistics(
struct ptc_stats *stat,
int locals,
int hubs,
971 int remotes,
struct bau_desc *bau_desc)
1005 struct bau_desc *bau_desc,
int *localsp,
int *remotesp)
1059 unsigned end,
unsigned int cpu)
1064 struct bau_desc *bau_desc;
1065 struct cpumask *flush_mask;
1068 unsigned long descriptor_status;
1081 status = ((descriptor_status >> (bcp->
uvhub_cpu *
1090 if (check_enable(bcp, stat)) {
1101 flush_mask = (
struct cpumask *)
per_cpu(uv_flush_tlb_mask, cpu);
1103 cpumask_andnot(flush_mask, cpumask,
cpumask_of(cpu));
1111 if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
1114 record_send_statistics(stat, locals, hubs, remotes, bau_desc);
1140 while (msg_next != msg) {
1158 unsigned long mmr_image;
1163 mmr_image = read_mmr_sw_ack();
1166 if ((swack_vec & mmr_image) == 0) {
1182 bau_process_message(mdp, bcp, 0);
1196 bau_process_message(mdp, bcp, 1);
1242 bau_process_message(&msgdesc, bcp, 1);
1262 static void __init enable_timeouts(
void)
1267 unsigned long mmr_image;
1269 nuvhubs = uv_num_possible_blades();
1271 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
1272 if (!uv_blade_nr_possible_cpus(uvhub))
1275 pnode = uv_blade_to_pnode(uvhub);
1276 mmr_image = read_mmr_misc_control(pnode);
1284 write_mmr_misc_control(pnode, mmr_image);
1290 write_mmr_misc_control(pnode, mmr_image);
1302 write_mmr_misc_control(pnode, mmr_image);
1313 static void *ptc_seq_next(
struct seq_file *file,
void *
data, loff_t *offset)
1321 static void ptc_seq_stop(
struct seq_file *file,
void *data)
1325 static inline unsigned long long usec_2_cycles(
unsigned long microsec)
1328 unsigned long long cyc;
1330 ns = microsec * 1000;
1340 static int ptc_seq_show(
struct seq_file *file,
void *data)
1346 cpu = *(loff_t *)data;
1349 "# cpu bauoff sent stime self locals remotes ncpus localhub ");
1351 "remotehub numuvhubs numuvhubs16 numuvhubs8 ");
1353 "numuvhubs4 numuvhubs2 numuvhubs1 dto snacks retries ");
1355 "rok resetp resett giveup sto bz throt disable ");
1357 "enable wars warshw warwaits enters ipidis plugged ");
1359 "ipiover glim cong swack recv rtime all one mult ");
1361 "none retry canc nocan reset rcan\n");
1368 "cpu %d %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
1370 cycles_2_us(stat->
s_time),
1379 seq_printf(file,
"%ld %ld %ld %ld %ld %ld %ld %ld ",
1384 seq_printf(file,
"%ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ",
1394 "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n",
1395 read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)),
1408 static ssize_t tunables_read(
struct file *file,
char __user *userbuf,
1409 size_t count, loff_t *ppos)
1415 "max_concur plugged_delay plugsb4reset timeoutsb4reset",
1416 "ipi_reset_limit complete_threshold congested_response_us",
1417 "congested_reps disabled_period giveup_limit",
1436 static ssize_t ptc_proc_write(
struct file *file,
const char __user *
user,
1437 size_t count, loff_t *data)
1446 if (count == 0 || count >
sizeof(optstr))
1450 optstr[count - 1] =
'\0';
1452 if (!
strcmp(optstr,
"on")) {
1455 }
else if (!
strcmp(optstr,
"off")) {
1465 if (input_arg == 0) {
1466 elements =
sizeof(stat_description)/
sizeof(*stat_description);
1471 }
else if (input_arg == -1) {
1473 stat = &
per_cpu(ptcstats, cpu);
1481 static int local_atoi(
const char *
name)
1488 val = 10*val+(*name-
'0');
1507 int e =
sizeof(tunables) /
sizeof(*tunables);
1526 val = local_atoi(p);
1536 "Error: BAU max concurrent %d is invalid\n",
1541 max_concurr_const =
val;
1559 static ssize_t tunables_write(
struct file *file,
const char __user *user,
1560 size_t count, loff_t *data)
1567 if (count == 0 || count >
sizeof(instr)-1)
1572 instr[
count] =
'\0';
1576 ret = parse_tunables_write(bcp, instr, count);
1599 .start = ptc_seq_start,
1600 .next = ptc_seq_next,
1601 .stop = ptc_seq_stop,
1602 .show = ptc_seq_show
1605 static int ptc_proc_open(
struct inode *
inode,
struct file *file)
1607 return seq_open(file, &uv_ptc_seq_ops);
1610 static int tunables_open(
struct inode *
inode,
struct file *file)
1616 .open = ptc_proc_open,
1618 .write = ptc_proc_write,
1624 .open = tunables_open,
1625 .read = tunables_read,
1626 .write = tunables_write,
1630 static int __init uv_ptc_init(
void)
1638 &proc_uv_ptc_operations);
1646 if (!tunables_dir) {
1652 tunables_dir,
NULL, &tunables_fops);
1653 if (!tunables_file) {
1664 static void activation_descriptor_init(
int node,
int pnode,
int base_pnode)
1673 struct bau_desc *bau_desc;
1674 struct bau_desc *bd2;
1684 bau_desc = kmalloc_node(dsize,
GFP_KERNEL, node);
1687 gpa = uv_gpa(bau_desc);
1688 n = uv_gpa_to_gnode(gpa);
1689 m = uv_gpa_to_offset(gpa);
1701 memset(bd2, 0,
sizeof(
struct bau_desc));
1735 if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
1748 static void pq_init(
int node,
int pnode)
1755 unsigned long first;
1756 unsigned long pn_first;
1761 plsize = (DEST_Q_SIZE + 1) *
sizeof(
struct bau_pq_entry);
1766 cp = (
char *)pqp + 31;
1767 pqp = (
struct bau_pq_entry *)(((
unsigned long)cp >> 5) << 5);
1770 if (pnode != uv_cpu_to_pnode(cpu))
1781 pn = uv_gpa_to_gnode(uv_gpa(pqp));
1785 write_mmr_payload_first(pnode, pn_first);
1786 write_mmr_payload_tail(pnode, first);
1787 write_mmr_payload_last(pnode, last);
1788 write_gmmr_sw_ack(pnode, 0xffffUL);
1797 static void __init init_uvhub(
int uvhub,
int vector,
int base_pnode)
1803 node = uvhub_to_first_node(uvhub);
1804 pnode = uv_blade_to_pnode(uvhub);
1806 activation_descriptor_init(node, pnode, base_pnode);
1808 pq_init(node, pnode);
1814 write_mmr_data_config(pnode, ((apicid << 32) | vector));
1822 static int calculate_destination_timeout(
void)
1824 unsigned long mmr_image;
1830 unsigned long ts_ns;
1838 ts_ns = timeout_base_ns[
index];
1839 ts_ns *= (mult1 * mult2);
1855 static void __init init_per_cpu_tunables(
void)
1888 static int __init get_cpu_topology(
int base_pnode,
1890 unsigned char *uvhub_mask)
1908 "cpu %d pnode %d-%d beyond %d; BAU disabled\n",
1917 *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8));
1918 bdp = &uvhub_descs[uvhub];
1926 socket = bcp->
osnode & 1;
1928 sdp = &bdp->
socket[socket];
1943 static void make_per_cpu_thp(
struct bau_control *smaster)
1959 static void make_per_hub_cpumask(
struct bau_control *hmaster)
1979 for (i = 0; i < sdp->
num_cpus; i++) {
1994 else if (is_uv2_hub())
2014 static int __init summarize_uvhub_sockets(
int nuvhubs,
2016 unsigned char *uvhub_mask)
2020 unsigned short socket_mask;
2022 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
2027 if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8))))
2030 bdp = &uvhub_descs[
uvhub];
2033 while (socket_mask) {
2035 if ((socket_mask & 1)) {
2036 sdp = &bdp->
socket[socket];
2037 if (scan_sock(sdp, bdp, &smaster, &hmaster))
2039 make_per_cpu_thp(smaster);
2042 socket_mask = (socket_mask >> 1);
2044 make_per_hub_cpumask(hmaster);
2054 unsigned char *uvhub_mask;
2058 timeout_us = calculate_destination_timeout();
2063 uvhub_mask = kzalloc((nuvhubs+7)/8,
GFP_KERNEL);
2065 if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
2068 if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask))
2073 init_per_cpu_tunables();
2085 static int __init uv_bau_init(
void)
2099 mask = &
per_cpu(uv_flush_tlb_mask, cur_cpu);
2103 nuvhubs = uv_num_possible_blades();
2104 congested_cycles = usec_2_cycles(congested_respns_us);
2106 uv_base_pnode = 0x7fffffff;
2107 for (uvhub = 0; uvhub < nuvhubs; uvhub++) {
2108 cpus = uv_blade_nr_possible_cpus(uvhub);
2109 if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode))
2110 uv_base_pnode = uv_blade_to_pnode(uvhub);
2122 for_each_possible_blade(uvhub)
2123 if (uv_blade_nr_possible_cpus(uvhub))
2124 init_uvhub(uvhub, vector, uv_base_pnode);
2128 for_each_possible_blade(uvhub) {
2129 if (uv_blade_nr_possible_cpus(uvhub)) {
2132 pnode = uv_blade_to_pnode(uvhub);
2135 write_gmmr_activation(pnode, val);
2138 write_mmr_data_broadcast(pnode, mmr);