sgi-xp: isolate xpc_vars_part structure to sn2 only
[pandora-kernel.git] / drivers / misc / sgi-xp / xpc_partition.c
1 /*
2  * This file is subject to the terms and conditions of the GNU General Public
3  * License.  See the file "COPYING" in the main directory of this archive
4  * for more details.
5  *
6  * Copyright (c) 2004-2008 Silicon Graphics, Inc.  All Rights Reserved.
7  */
8
9 /*
10  * Cross Partition Communication (XPC) partition support.
11  *
12  *      This is the part of XPC that detects the presence/absence of
13  *      other partitions. It provides a heartbeat and monitors the
14  *      heartbeats of other partitions.
15  *
16  */
17
18 #include <linux/kernel.h>
19 #include <linux/sysctl.h>
20 #include <linux/cache.h>
21 #include <linux/mmzone.h>
22 #include <linux/nodemask.h>
23 #include <asm/sn/intr.h>
24 #include <asm/sn/sn_sal.h>
25 #include <asm/sn/nodepda.h>
26 #include <asm/sn/addrs.h>
27 #include "xpc.h"
28
29 /* XPC is exiting flag */
30 int xpc_exiting;
31
32 /* SH_IPI_ACCESS shub register value on startup */
33 static u64 xpc_sh1_IPI_access;
34 static u64 xpc_sh2_IPI_access0;
35 static u64 xpc_sh2_IPI_access1;
36 static u64 xpc_sh2_IPI_access2;
37 static u64 xpc_sh2_IPI_access3;
38
39 /* original protection values for each node */
40 u64 xpc_prot_vec[MAX_NUMNODES];
41
42 /* this partition's reserved page pointers */
43 struct xpc_rsvd_page *xpc_rsvd_page;
44 static u64 *xpc_part_nasids;
45 static u64 *xpc_mach_nasids;
46
47 /* >>> next two variables should be 'xpc_' if they remain here */
48 static int xp_sizeof_nasid_mask;        /* actual size in bytes of nasid mask */
49 int xp_nasid_mask_words;        /* actual size in words of nasid mask */
50
51 struct xpc_partition *xpc_partitions;
52
53 /*
54  * Generic buffer used to store a local copy of portions of a remote
55  * partition's reserved page (either its header and part_nasids mask,
56  * or its vars).
57  */
58 char *xpc_remote_copy_buffer;
59 void *xpc_remote_copy_buffer_base;
60
61 /*
62  * Guarantee that the kmalloc'd memory is cacheline aligned.
63  */
64 void *
65 xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
66 {
67         /* see if kmalloc will give us cachline aligned memory by default */
68         *base = kmalloc(size, flags);
69         if (*base == NULL)
70                 return NULL;
71
72         if ((u64)*base == L1_CACHE_ALIGN((u64)*base))
73                 return *base;
74
75         kfree(*base);
76
77         /* nope, we'll have to do it ourselves */
78         *base = kmalloc(size + L1_CACHE_BYTES, flags);
79         if (*base == NULL)
80                 return NULL;
81
82         return (void *)L1_CACHE_ALIGN((u64)*base);
83 }
84
85 /*
86  * Given a nasid, get the physical address of the  partition's reserved page
87  * for that nasid. This function returns 0 on any error.
88  */
89 static u64
90 xpc_get_rsvd_page_pa(int nasid)
91 {
92         enum xp_retval ret;
93         s64 status;
94         u64 cookie = 0;
95         u64 rp_pa = nasid;      /* seed with nasid */
96         u64 len = 0;
97         u64 buf = buf;
98         u64 buf_len = 0;
99         void *buf_base = NULL;
100
101         while (1) {
102
103                 status = sn_partition_reserved_page_pa(buf, &cookie, &rp_pa,
104                                                        &len);
105
106                 dev_dbg(xpc_part, "SAL returned with status=%li, cookie="
107                         "0x%016lx, address=0x%016lx, len=0x%016lx\n",
108                         status, cookie, rp_pa, len);
109
110                 if (status != SALRET_MORE_PASSES)
111                         break;
112
113                 /* >>> L1_CACHE_ALIGN() is only a sn2-bte_copy requirement */
114                 if (L1_CACHE_ALIGN(len) > buf_len) {
115                         kfree(buf_base);
116                         buf_len = L1_CACHE_ALIGN(len);
117                         buf = (u64)xpc_kmalloc_cacheline_aligned(buf_len,
118                                                                  GFP_KERNEL,
119                                                                  &buf_base);
120                         if (buf_base == NULL) {
121                                 dev_err(xpc_part, "unable to kmalloc "
122                                         "len=0x%016lx\n", buf_len);
123                                 status = SALRET_ERROR;
124                                 break;
125                         }
126                 }
127
128                 ret = xp_remote_memcpy((void *)buf, (void *)rp_pa, buf_len);
129                 if (ret != xpSuccess) {
130                         dev_dbg(xpc_part, "xp_remote_memcpy failed %d\n", ret);
131                         status = SALRET_ERROR;
132                         break;
133                 }
134         }
135
136         kfree(buf_base);
137
138         if (status != SALRET_OK)
139                 rp_pa = 0;
140
141         dev_dbg(xpc_part, "reserved page at phys address 0x%016lx\n", rp_pa);
142         return rp_pa;
143 }
144
145 /*
146  * Fill the partition reserved page with the information needed by
147  * other partitions to discover we are alive and establish initial
148  * communications.
149  */
150 struct xpc_rsvd_page *
151 xpc_setup_rsvd_page(void)
152 {
153         struct xpc_rsvd_page *rp;
154         u64 rp_pa;
155
156         /* get the local reserved page's address */
157
158         preempt_disable();
159         rp_pa = xpc_get_rsvd_page_pa(cpuid_to_nasid(smp_processor_id()));
160         preempt_enable();
161         if (rp_pa == 0) {
162                 dev_err(xpc_part, "SAL failed to locate the reserved page\n");
163                 return NULL;
164         }
165         rp = (struct xpc_rsvd_page *)__va(rp_pa);
166
167         if (rp->SAL_version < 3) {
168                 /* SAL_versions < 3 had a SAL_partid defined as a u8 */
169                 rp->SAL_partid &= 0xff;
170         }
171         BUG_ON(rp->SAL_partid != sn_partition_id);
172
173         if (rp->SAL_partid < 0 || rp->SAL_partid >= xp_max_npartitions) {
174                 dev_err(xpc_part, "the reserved page's partid of %d is outside "
175                         "supported range (< 0 || >= %d)\n", rp->SAL_partid,
176                         xp_max_npartitions);
177                 return NULL;
178         }
179
180         rp->version = XPC_RP_VERSION;
181         rp->max_npartitions = xp_max_npartitions;
182
183         /* establish the actual sizes of the nasid masks */
184         if (rp->SAL_version == 1) {
185                 /* SAL_version 1 didn't set the nasids_size field */
186                 rp->SAL_nasids_size = 128;
187         }
188         xp_sizeof_nasid_mask = rp->SAL_nasids_size;
189         xp_nasid_mask_words = DIV_ROUND_UP(xp_sizeof_nasid_mask,
190                                            BYTES_PER_WORD);
191
192         /* setup the pointers to the various items in the reserved page */
193         xpc_part_nasids = XPC_RP_PART_NASIDS(rp);
194         xpc_mach_nasids = XPC_RP_MACH_NASIDS(rp);
195
196         if (xpc_rsvd_page_init(rp) != xpSuccess)
197                 return NULL;
198
199         /*
200          * Set timestamp of when reserved page was setup by XPC.
201          * This signifies to the remote partition that our reserved
202          * page is initialized.
203          */
204         rp->stamp = CURRENT_TIME;
205
206         return rp;
207 }
208
209 /*
210  * Change protections to allow IPI operations (and AMO operations on
211  * Shub 1.1 systems).
212  */
213 void
214 xpc_allow_IPI_ops(void)
215 {
216         int node;
217         int nasid;
218
219         /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
220
221         if (is_shub2()) {
222                 xpc_sh2_IPI_access0 =
223                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS0));
224                 xpc_sh2_IPI_access1 =
225                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS1));
226                 xpc_sh2_IPI_access2 =
227                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS2));
228                 xpc_sh2_IPI_access3 =
229                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH2_IPI_ACCESS3));
230
231                 for_each_online_node(node) {
232                         nasid = cnodeid_to_nasid(node);
233                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
234                               -1UL);
235                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
236                               -1UL);
237                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
238                               -1UL);
239                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
240                               -1UL);
241                 }
242
243         } else {
244                 xpc_sh1_IPI_access =
245                     (u64)HUB_L((u64 *)LOCAL_MMR_ADDR(SH1_IPI_ACCESS));
246
247                 for_each_online_node(node) {
248                         nasid = cnodeid_to_nasid(node);
249                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
250                               -1UL);
251
252                         /*
253                          * Since the BIST collides with memory operations on
254                          * SHUB 1.1 sn_change_memprotect() cannot be used.
255                          */
256                         if (enable_shub_wars_1_1()) {
257                                 /* open up everything */
258                                 xpc_prot_vec[node] = (u64)HUB_L((u64 *)
259                                                                 GLOBAL_MMR_ADDR
260                                                                 (nasid,
261                                                   SH1_MD_DQLP_MMR_DIR_PRIVEC0));
262                                 HUB_S((u64 *)
263                                       GLOBAL_MMR_ADDR(nasid,
264                                                    SH1_MD_DQLP_MMR_DIR_PRIVEC0),
265                                       -1UL);
266                                 HUB_S((u64 *)
267                                       GLOBAL_MMR_ADDR(nasid,
268                                                    SH1_MD_DQRP_MMR_DIR_PRIVEC0),
269                                       -1UL);
270                         }
271                 }
272         }
273 }
274
275 /*
276  * Restrict protections to disallow IPI operations (and AMO operations on
277  * Shub 1.1 systems).
278  */
279 void
280 xpc_restrict_IPI_ops(void)
281 {
282         int node;
283         int nasid;
284
285         /* >>> Change SH_IPI_ACCESS code to use SAL call once it is available */
286
287         if (is_shub2()) {
288
289                 for_each_online_node(node) {
290                         nasid = cnodeid_to_nasid(node);
291                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS0),
292                               xpc_sh2_IPI_access0);
293                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS1),
294                               xpc_sh2_IPI_access1);
295                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS2),
296                               xpc_sh2_IPI_access2);
297                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH2_IPI_ACCESS3),
298                               xpc_sh2_IPI_access3);
299                 }
300
301         } else {
302
303                 for_each_online_node(node) {
304                         nasid = cnodeid_to_nasid(node);
305                         HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid, SH1_IPI_ACCESS),
306                               xpc_sh1_IPI_access);
307
308                         if (enable_shub_wars_1_1()) {
309                                 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
310                                                    SH1_MD_DQLP_MMR_DIR_PRIVEC0),
311                                       xpc_prot_vec[node]);
312                                 HUB_S((u64 *)GLOBAL_MMR_ADDR(nasid,
313                                                    SH1_MD_DQRP_MMR_DIR_PRIVEC0),
314                                       xpc_prot_vec[node]);
315                         }
316                 }
317         }
318 }
319
320 /*
321  * At periodic intervals, scan through all active partitions and ensure
322  * their heartbeat is still active.  If not, the partition is deactivated.
323  */
324 void
325 xpc_check_remote_hb(void)
326 {
327         struct xpc_vars *remote_vars;
328         struct xpc_partition *part;
329         short partid;
330         enum xp_retval ret;
331
332         remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
333
334         for (partid = 0; partid < xp_max_npartitions; partid++) {
335
336                 if (xpc_exiting)
337                         break;
338
339                 if (partid == sn_partition_id)
340                         continue;
341
342                 part = &xpc_partitions[partid];
343
344                 if (part->act_state == XPC_P_INACTIVE ||
345                     part->act_state == XPC_P_DEACTIVATING) {
346                         continue;
347                 }
348
349                 /* pull the remote_hb cache line */
350                 ret = xp_remote_memcpy(remote_vars,
351                                        (void *)part->remote_vars_pa,
352                                        XPC_RP_VARS_SIZE);
353                 if (ret != xpSuccess) {
354                         XPC_DEACTIVATE_PARTITION(part, ret);
355                         continue;
356                 }
357
358                 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
359                         " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
360                         partid, remote_vars->heartbeat, part->last_heartbeat,
361                         remote_vars->heartbeat_offline,
362                         remote_vars->heartbeating_to_mask);
363
364                 if (((remote_vars->heartbeat == part->last_heartbeat) &&
365                      (remote_vars->heartbeat_offline == 0)) ||
366                     !xpc_hb_allowed(sn_partition_id, remote_vars)) {
367
368                         XPC_DEACTIVATE_PARTITION(part, xpNoHeartbeat);
369                         continue;
370                 }
371
372                 part->last_heartbeat = remote_vars->heartbeat;
373         }
374 }
375
376 /*
377  * Get a copy of a portion of the remote partition's rsvd page.
378  *
379  * remote_rp points to a buffer that is cacheline aligned for BTE copies and
380  * is large enough to contain a copy of their reserved page header and
381  * part_nasids mask.
382  */
383 static enum xp_retval
384 xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
385                   struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
386 {
387         int i;
388         enum xp_retval ret;
389
390         /* get the reserved page's physical address */
391
392         *remote_rp_pa = xpc_get_rsvd_page_pa(nasid);
393         if (*remote_rp_pa == 0)
394                 return xpNoRsvdPageAddr;
395
396         /* pull over the reserved page header and part_nasids mask */
397         ret = xp_remote_memcpy(remote_rp, (void *)*remote_rp_pa,
398                                XPC_RP_HEADER_SIZE + xp_sizeof_nasid_mask);
399         if (ret != xpSuccess)
400                 return ret;
401
402         if (discovered_nasids != NULL) {
403                 u64 *remote_part_nasids = XPC_RP_PART_NASIDS(remote_rp);
404
405                 for (i = 0; i < xp_nasid_mask_words; i++)
406                         discovered_nasids[i] |= remote_part_nasids[i];
407         }
408
409         /* check that the partid is valid and is for another partition */
410
411         if (remote_rp->SAL_partid < 0 ||
412             remote_rp->SAL_partid >= xp_max_npartitions) {
413                 return xpInvalidPartid;
414         }
415
416         if (remote_rp->SAL_partid == sn_partition_id)
417                 return xpLocalPartid;
418
419         /* see if the rest of the reserved page has been set up by XPC */
420         if (timespec_equal(&remote_rp->stamp, &ZERO_STAMP))
421                 return xpRsvdPageNotSet;
422
423         if (XPC_VERSION_MAJOR(remote_rp->version) !=
424             XPC_VERSION_MAJOR(XPC_RP_VERSION)) {
425                 return xpBadVersion;
426         }
427
428         if (remote_rp->max_npartitions <= sn_partition_id)
429                 return xpInvalidPartid;
430
431         return xpSuccess;
432 }
433
434 /*
435  * Get a copy of the remote partition's XPC variables from the reserved page.
436  *
437  * remote_vars points to a buffer that is cacheline aligned for BTE copies and
438  * assumed to be of size XPC_RP_VARS_SIZE.
439  */
440 static enum xp_retval
441 xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
442 {
443         enum xp_retval ret;
444
445         if (remote_vars_pa == 0)
446                 return xpVarsNotSet;
447
448         /* pull over the cross partition variables */
449         ret = xp_remote_memcpy(remote_vars, (void *)remote_vars_pa,
450                                XPC_RP_VARS_SIZE);
451         if (ret != xpSuccess)
452                 return ret;
453
454         if (XPC_VERSION_MAJOR(remote_vars->version) !=
455             XPC_VERSION_MAJOR(XPC_V_VERSION)) {
456                 return xpBadVersion;
457         }
458
459         return xpSuccess;
460 }
461
462 /*
463  * Update the remote partition's info.
464  */
465 static void
466 xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
467                           struct timespec *remote_rp_stamp, u64 remote_rp_pa,
468                           u64 remote_vars_pa, struct xpc_vars *remote_vars)
469 {
470         part->remote_rp_version = remote_rp_version;
471         dev_dbg(xpc_part, "  remote_rp_version = 0x%016x\n",
472                 part->remote_rp_version);
473
474         part->remote_rp_stamp = *remote_rp_stamp;
475         dev_dbg(xpc_part, "  remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
476                 part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
477
478         part->remote_rp_pa = remote_rp_pa;
479         dev_dbg(xpc_part, "  remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
480
481         part->remote_vars_pa = remote_vars_pa;
482         dev_dbg(xpc_part, "  remote_vars_pa = 0x%016lx\n",
483                 part->remote_vars_pa);
484
485         part->last_heartbeat = remote_vars->heartbeat;
486         dev_dbg(xpc_part, "  last_heartbeat = 0x%016lx\n",
487                 part->last_heartbeat);
488
489 /* >>> remote_vars_part_pa and vars_part_pa are sn2 only!!! */
490         part->remote_vars_part_pa = remote_vars->vars_part_pa;
491         dev_dbg(xpc_part, "  remote_vars_part_pa = 0x%016lx\n",
492                 part->remote_vars_part_pa);
493
494         part->remote_act_nasid = remote_vars->act_nasid;
495         dev_dbg(xpc_part, "  remote_act_nasid = 0x%x\n",
496                 part->remote_act_nasid);
497
498         part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
499         dev_dbg(xpc_part, "  remote_act_phys_cpuid = 0x%x\n",
500                 part->remote_act_phys_cpuid);
501
502         part->remote_amos_page_pa = remote_vars->amos_page_pa;
503         dev_dbg(xpc_part, "  remote_amos_page_pa = 0x%lx\n",
504                 part->remote_amos_page_pa);
505
506         part->remote_vars_version = remote_vars->version;
507         dev_dbg(xpc_part, "  remote_vars_version = 0x%x\n",
508                 part->remote_vars_version);
509 }
510
511 /*
512  * Prior code has determined the nasid which generated an IPI.  Inspect
513  * that nasid to determine if its partition needs to be activated or
514  * deactivated.
515  *
516  * A partition is consider "awaiting activation" if our partition
517  * flags indicate it is not active and it has a heartbeat.  A
518  * partition is considered "awaiting deactivation" if our partition
519  * flags indicate it is active but it has no heartbeat or it is not
520  * sending its heartbeat to us.
521  *
522  * To determine the heartbeat, the remote nasid must have a properly
523  * initialized reserved page.
524  */
525 static void
526 xpc_identify_act_IRQ_req(int nasid)
527 {
528         struct xpc_rsvd_page *remote_rp;
529         struct xpc_vars *remote_vars;
530         u64 remote_rp_pa;
531         u64 remote_vars_pa;
532         int remote_rp_version;
533         int reactivate = 0;
534         int stamp_diff;
535         struct timespec remote_rp_stamp = { 0, 0 }; /*>>> ZERO_STAMP */
536         short partid;
537         struct xpc_partition *part;
538         enum xp_retval ret;
539
540         /* pull over the reserved page structure */
541
542         remote_rp = (struct xpc_rsvd_page *)xpc_remote_copy_buffer;
543
544         ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
545         if (ret != xpSuccess) {
546                 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
547                          "which sent interrupt, reason=%d\n", nasid, ret);
548                 return;
549         }
550
551         remote_vars_pa = remote_rp->sn.vars_pa;
552         remote_rp_version = remote_rp->version;
553         if (XPC_SUPPORTS_RP_STAMP(remote_rp_version))
554                 remote_rp_stamp = remote_rp->stamp;
555
556         partid = remote_rp->SAL_partid;
557         part = &xpc_partitions[partid];
558
559         /* pull over the cross partition variables */
560
561         remote_vars = (struct xpc_vars *)xpc_remote_copy_buffer;
562
563         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
564         if (ret != xpSuccess) {
565
566                 dev_warn(xpc_part, "unable to get XPC variables from nasid %d, "
567                          "which sent interrupt, reason=%d\n", nasid, ret);
568
569                 XPC_DEACTIVATE_PARTITION(part, ret);
570                 return;
571         }
572
573         part->act_IRQ_rcvd++;
574
575         dev_dbg(xpc_part, "partid for nasid %d is %d; IRQs = %d; HB = "
576                 "%ld:0x%lx\n", (int)nasid, (int)partid, part->act_IRQ_rcvd,
577                 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
578
579         if (xpc_partition_disengaged(part) &&
580             part->act_state == XPC_P_INACTIVE) {
581
582                 xpc_update_partition_info(part, remote_rp_version,
583                                           &remote_rp_stamp, remote_rp_pa,
584                                           remote_vars_pa, remote_vars);
585
586                 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
587                         if (xpc_partition_disengage_requested(1UL << partid)) {
588                                 /*
589                                  * Other side is waiting on us to disengage,
590                                  * even though we already have.
591                                  */
592                                 return;
593                         }
594                 } else {
595                         /* other side doesn't support disengage requests */
596                         xpc_clear_partition_disengage_request(1UL << partid);
597                 }
598
599                 xpc_activate_partition(part);
600                 return;
601         }
602
603         DBUG_ON(part->remote_rp_version == 0);
604         DBUG_ON(part->remote_vars_version == 0);
605
606         if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
607                 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
608                                                        remote_vars_version));
609
610                 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
611                         DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
612                                                                version));
613                         /* see if the other side rebooted */
614                         if (part->remote_amos_page_pa ==
615                             remote_vars->amos_page_pa &&
616                             xpc_hb_allowed(sn_partition_id, remote_vars)) {
617                                 /* doesn't look that way, so ignore the IPI */
618                                 return;
619                         }
620                 }
621
622                 /*
623                  * Other side rebooted and previous XPC didn't support the
624                  * disengage request, so we don't need to do anything special.
625                  */
626
627                 xpc_update_partition_info(part, remote_rp_version,
628                                           &remote_rp_stamp, remote_rp_pa,
629                                           remote_vars_pa, remote_vars);
630                 part->reactivate_nasid = nasid;
631                 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
632                 return;
633         }
634
635         DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
636
637         if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
638                 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
639
640                 /*
641                  * Other side rebooted and previous XPC did support the
642                  * disengage request, but the new one doesn't.
643                  */
644
645                 xpc_clear_partition_engaged(1UL << partid);
646                 xpc_clear_partition_disengage_request(1UL << partid);
647
648                 xpc_update_partition_info(part, remote_rp_version,
649                                           &remote_rp_stamp, remote_rp_pa,
650                                           remote_vars_pa, remote_vars);
651                 reactivate = 1;
652
653         } else {
654                 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
655
656                 stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
657                                                 &remote_rp_stamp);
658                 if (stamp_diff != 0) {
659                         DBUG_ON(stamp_diff >= 0);
660
661                         /*
662                          * Other side rebooted and the previous XPC did support
663                          * the disengage request, as does the new one.
664                          */
665
666                         DBUG_ON(xpc_partition_engaged(1UL << partid));
667                         DBUG_ON(xpc_partition_disengage_requested(1UL <<
668                                                                   partid));
669
670                         xpc_update_partition_info(part, remote_rp_version,
671                                                   &remote_rp_stamp,
672                                                   remote_rp_pa, remote_vars_pa,
673                                                   remote_vars);
674                         reactivate = 1;
675                 }
676         }
677
678         if (part->disengage_request_timeout > 0 &&
679             !xpc_partition_disengaged(part)) {
680                 /* still waiting on other side to disengage from us */
681                 return;
682         }
683
684         if (reactivate) {
685                 part->reactivate_nasid = nasid;
686                 XPC_DEACTIVATE_PARTITION(part, xpReactivating);
687
688         } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
689                    xpc_partition_disengage_requested(1UL << partid)) {
690                 XPC_DEACTIVATE_PARTITION(part, xpOtherGoingDown);
691         }
692 }
693
694 /*
695  * Loop through the activation AMO variables and process any bits
696  * which are set.  Each bit indicates a nasid sending a partition
697  * activation or deactivation request.
698  *
699  * Return #of IRQs detected.
700  */
701 int
702 xpc_identify_act_IRQ_sender(void)
703 {
704         int word, bit;
705         u64 nasid_mask;
706         u64 nasid;              /* remote nasid */
707         int n_IRQs_detected = 0;
708         AMO_t *act_amos;
709
710         act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
711
712         /* scan through act AMO variable looking for non-zero entries */
713         for (word = 0; word < xp_nasid_mask_words; word++) {
714
715                 if (xpc_exiting)
716                         break;
717
718                 nasid_mask = xpc_IPI_receive(&act_amos[word]);
719                 if (nasid_mask == 0) {
720                         /* no IRQs from nasids in this variable */
721                         continue;
722                 }
723
724                 dev_dbg(xpc_part, "AMO[%d] gave back 0x%lx\n", word,
725                         nasid_mask);
726
727                 /*
728                  * If this nasid has been added to the machine since
729                  * our partition was reset, this will retain the
730                  * remote nasid in our reserved pages machine mask.
731                  * This is used in the event of module reload.
732                  */
733                 xpc_mach_nasids[word] |= nasid_mask;
734
735                 /* locate the nasid(s) which sent interrupts */
736
737                 for (bit = 0; bit < (8 * sizeof(u64)); bit++) {
738                         if (nasid_mask & (1UL << bit)) {
739                                 n_IRQs_detected++;
740                                 nasid = XPC_NASID_FROM_W_B(word, bit);
741                                 dev_dbg(xpc_part, "interrupt from nasid %ld\n",
742                                         nasid);
743                                 xpc_identify_act_IRQ_req(nasid);
744                         }
745                 }
746         }
747         return n_IRQs_detected;
748 }
749
750 /*
751  * See if the other side has responded to a partition disengage request
752  * from us.
753  */
754 int
755 xpc_partition_disengaged(struct xpc_partition *part)
756 {
757         short partid = XPC_PARTID(part);
758         int disengaged;
759
760         disengaged = (xpc_partition_engaged(1UL << partid) == 0);
761         if (part->disengage_request_timeout) {
762                 if (!disengaged) {
763                         if (time_before(jiffies,
764                             part->disengage_request_timeout)) {
765                                 /* timelimit hasn't been reached yet */
766                                 return 0;
767                         }
768
769                         /*
770                          * Other side hasn't responded to our disengage
771                          * request in a timely fashion, so assume it's dead.
772                          */
773
774                         dev_info(xpc_part, "disengage from remote partition %d "
775                                  "timed out\n", partid);
776                         xpc_disengage_request_timedout = 1;
777                         xpc_clear_partition_engaged(1UL << partid);
778                         disengaged = 1;
779                 }
780                 part->disengage_request_timeout = 0;
781
782                 /* cancel the timer function, provided it's not us */
783                 if (!in_interrupt()) {
784                         del_singleshot_timer_sync(&part->
785                                                   disengage_request_timer);
786                 }
787
788                 DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
789                         part->act_state != XPC_P_INACTIVE);
790                 if (part->act_state != XPC_P_INACTIVE)
791                         xpc_wakeup_channel_mgr(part);
792
793                 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version))
794                         xpc_cancel_partition_disengage_request(part);
795         }
796         return disengaged;
797 }
798
799 /*
800  * Mark specified partition as active.
801  */
802 enum xp_retval
803 xpc_mark_partition_active(struct xpc_partition *part)
804 {
805         unsigned long irq_flags;
806         enum xp_retval ret;
807
808         dev_dbg(xpc_part, "setting partition %d to ACTIVE\n", XPC_PARTID(part));
809
810         spin_lock_irqsave(&part->act_lock, irq_flags);
811         if (part->act_state == XPC_P_ACTIVATING) {
812                 part->act_state = XPC_P_ACTIVE;
813                 ret = xpSuccess;
814         } else {
815                 DBUG_ON(part->reason == xpSuccess);
816                 ret = part->reason;
817         }
818         spin_unlock_irqrestore(&part->act_lock, irq_flags);
819
820         return ret;
821 }
822
823 /*
824  * Notify XPC that the partition is down.
825  */
826 void
827 xpc_deactivate_partition(const int line, struct xpc_partition *part,
828                          enum xp_retval reason)
829 {
830         unsigned long irq_flags;
831
832         spin_lock_irqsave(&part->act_lock, irq_flags);
833
834         if (part->act_state == XPC_P_INACTIVE) {
835                 XPC_SET_REASON(part, reason, line);
836                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
837                 if (reason == xpReactivating) {
838                         /* we interrupt ourselves to reactivate partition */
839                         xpc_IPI_send_reactivate(part);
840                 }
841                 return;
842         }
843         if (part->act_state == XPC_P_DEACTIVATING) {
844                 if ((part->reason == xpUnloading && reason != xpUnloading) ||
845                     reason == xpReactivating) {
846                         XPC_SET_REASON(part, reason, line);
847                 }
848                 spin_unlock_irqrestore(&part->act_lock, irq_flags);
849                 return;
850         }
851
852         part->act_state = XPC_P_DEACTIVATING;
853         XPC_SET_REASON(part, reason, line);
854
855         spin_unlock_irqrestore(&part->act_lock, irq_flags);
856
857         if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
858                 xpc_request_partition_disengage(part);
859                 xpc_IPI_send_disengage(part);
860
861                 /* set a timelimit on the disengage request */
862                 part->disengage_request_timeout = jiffies +
863                     (xpc_disengage_request_timelimit * HZ);
864                 part->disengage_request_timer.expires =
865                     part->disengage_request_timeout;
866                 add_timer(&part->disengage_request_timer);
867         }
868
869         dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n",
870                 XPC_PARTID(part), reason);
871
872         xpc_partition_going_down(part, reason);
873 }
874
875 /*
876  * Mark specified partition as inactive.
877  */
878 void
879 xpc_mark_partition_inactive(struct xpc_partition *part)
880 {
881         unsigned long irq_flags;
882
883         dev_dbg(xpc_part, "setting partition %d to INACTIVE\n",
884                 XPC_PARTID(part));
885
886         spin_lock_irqsave(&part->act_lock, irq_flags);
887         part->act_state = XPC_P_INACTIVE;
888         spin_unlock_irqrestore(&part->act_lock, irq_flags);
889         part->remote_rp_pa = 0;
890 }
891
892 /*
893  * SAL has provided a partition and machine mask.  The partition mask
894  * contains a bit for each even nasid in our partition.  The machine
895  * mask contains a bit for each even nasid in the entire machine.
896  *
897  * Using those two bit arrays, we can determine which nasids are
898  * known in the machine.  Each should also have a reserved page
899  * initialized if they are available for partitioning.
900  */
901 void
902 xpc_discovery(void)
903 {
904         void *remote_rp_base;
905         struct xpc_rsvd_page *remote_rp;
906         struct xpc_vars *remote_vars;
907         u64 remote_rp_pa;
908         u64 remote_vars_pa;
909         int region;
910         int region_size;
911         int max_regions;
912         int nasid;
913         struct xpc_rsvd_page *rp;
914         short partid;
915         struct xpc_partition *part;
916         u64 *discovered_nasids;
917         enum xp_retval ret;
918
919         remote_rp = xpc_kmalloc_cacheline_aligned(XPC_RP_HEADER_SIZE +
920                                                   xp_sizeof_nasid_mask,
921                                                   GFP_KERNEL, &remote_rp_base);
922         if (remote_rp == NULL)
923                 return;
924
925         remote_vars = (struct xpc_vars *)remote_rp;
926
927         discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
928                                     GFP_KERNEL);
929         if (discovered_nasids == NULL) {
930                 kfree(remote_rp_base);
931                 return;
932         }
933
934         rp = (struct xpc_rsvd_page *)xpc_rsvd_page;
935
936         /*
937          * The term 'region' in this context refers to the minimum number of
938          * nodes that can comprise an access protection grouping. The access
939          * protection is in regards to memory, IOI and IPI.
940          */
941         max_regions = 64;
942         region_size = sn_region_size;
943
944         switch (region_size) {
945         case 128:
946                 max_regions *= 2;
947         case 64:
948                 max_regions *= 2;
949         case 32:
950                 max_regions *= 2;
951                 region_size = 16;
952                 DBUG_ON(!is_shub2());
953         }
954
955         for (region = 0; region < max_regions; region++) {
956
957                 if (xpc_exiting)
958                         break;
959
960                 dev_dbg(xpc_part, "searching region %d\n", region);
961
962                 for (nasid = (region * region_size * 2);
963                      nasid < ((region + 1) * region_size * 2); nasid += 2) {
964
965                         if (xpc_exiting)
966                                 break;
967
968                         dev_dbg(xpc_part, "checking nasid %d\n", nasid);
969
970                         if (XPC_NASID_IN_ARRAY(nasid, xpc_part_nasids)) {
971                                 dev_dbg(xpc_part, "PROM indicates Nasid %d is "
972                                         "part of the local partition; skipping "
973                                         "region\n", nasid);
974                                 break;
975                         }
976
977                         if (!(XPC_NASID_IN_ARRAY(nasid, xpc_mach_nasids))) {
978                                 dev_dbg(xpc_part, "PROM indicates Nasid %d was "
979                                         "not on Numa-Link network at reset\n",
980                                         nasid);
981                                 continue;
982                         }
983
984                         if (XPC_NASID_IN_ARRAY(nasid, discovered_nasids)) {
985                                 dev_dbg(xpc_part, "Nasid %d is part of a "
986                                         "partition which was previously "
987                                         "discovered\n", nasid);
988                                 continue;
989                         }
990
991                         /* pull over the reserved page structure */
992
993                         ret = xpc_get_remote_rp(nasid, discovered_nasids,
994                                                 remote_rp, &remote_rp_pa);
995                         if (ret != xpSuccess) {
996                                 dev_dbg(xpc_part, "unable to get reserved page "
997                                         "from nasid %d, reason=%d\n", nasid,
998                                         ret);
999
1000                                 if (ret == xpLocalPartid)
1001                                         break;
1002
1003                                 continue;
1004                         }
1005
1006                         remote_vars_pa = remote_rp->sn.vars_pa;
1007
1008                         partid = remote_rp->SAL_partid;
1009                         part = &xpc_partitions[partid];
1010
1011                         /* pull over the cross partition variables */
1012
1013                         ret = xpc_get_remote_vars(remote_vars_pa, remote_vars);
1014                         if (ret != xpSuccess) {
1015                                 dev_dbg(xpc_part, "unable to get XPC variables "
1016                                         "from nasid %d, reason=%d\n", nasid,
1017                                         ret);
1018
1019                                 XPC_DEACTIVATE_PARTITION(part, ret);
1020                                 continue;
1021                         }
1022
1023                         if (part->act_state != XPC_P_INACTIVE) {
1024                                 dev_dbg(xpc_part, "partition %d on nasid %d is "
1025                                         "already activating\n", partid, nasid);
1026                                 break;
1027                         }
1028
1029                         /*
1030                          * Register the remote partition's AMOs with SAL so it
1031                          * can handle and cleanup errors within that address
1032                          * range should the remote partition go down. We don't
1033                          * unregister this range because it is difficult to
1034                          * tell when outstanding writes to the remote partition
1035                          * are finished and thus when it is thus safe to
1036                          * unregister. This should not result in wasted space
1037                          * in the SAL xp_addr_region table because we should
1038                          * get the same page for remote_act_amos_pa after
1039                          * module reloads and system reboots.
1040                          */
1041                         if (sn_register_xp_addr_region
1042                             (remote_vars->amos_page_pa, PAGE_SIZE, 1) < 0) {
1043                                 dev_dbg(xpc_part,
1044                                         "partition %d failed to "
1045                                         "register xp_addr region 0x%016lx\n",
1046                                         partid, remote_vars->amos_page_pa);
1047
1048                                 XPC_SET_REASON(part, xpPhysAddrRegFailed,
1049                                                __LINE__);
1050                                 break;
1051                         }
1052
1053                         /*
1054                          * The remote nasid is valid and available.
1055                          * Send an interrupt to that nasid to notify
1056                          * it that we are ready to begin activation.
1057                          */
1058                         dev_dbg(xpc_part, "sending an interrupt to AMO 0x%lx, "
1059                                 "nasid %d, phys_cpuid 0x%x\n",
1060                                 remote_vars->amos_page_pa,
1061                                 remote_vars->act_nasid,
1062                                 remote_vars->act_phys_cpuid);
1063
1064                         if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
1065                                                            version)) {
1066                                 part->remote_amos_page_pa =
1067                                     remote_vars->amos_page_pa;
1068                                 xpc_mark_partition_disengaged(part);
1069                                 xpc_cancel_partition_disengage_request(part);
1070                         }
1071                         xpc_IPI_send_activate(remote_vars);
1072                 }
1073         }
1074
1075         kfree(discovered_nasids);
1076         kfree(remote_rp_base);
1077 }
1078
1079 /*
1080  * Given a partid, get the nasids owned by that partition from the
1081  * remote partition's reserved page.
1082  */
1083 enum xp_retval
1084 xpc_initiate_partid_to_nasids(short partid, void *nasid_mask)
1085 {
1086         struct xpc_partition *part;
1087         u64 part_nasid_pa;
1088
1089         part = &xpc_partitions[partid];
1090         if (part->remote_rp_pa == 0)
1091                 return xpPartitionDown;
1092
1093         memset(nasid_mask, 0, XP_NASID_MASK_BYTES);
1094
1095         part_nasid_pa = (u64)XPC_RP_PART_NASIDS(part->remote_rp_pa);
1096
1097         return xp_remote_memcpy(nasid_mask, (void *)part_nasid_pa,
1098                                 xp_sizeof_nasid_mask);
1099 }