x86/UV2: Fix new UV2 hardware by using native UV2 broadcast mode
authorCliff Wickman <cpw@sgi.com>
Mon, 16 Jan 2012 21:17:50 +0000 (15:17 -0600)
committerGreg Kroah-Hartman <gregkh@suse.de>
Thu, 26 Jan 2012 00:13:53 +0000 (16:13 -0800)
commit da87c937e5a2374686edd58df06cfd5050b125fa upstream.

Update the use of the Broadcast Assist Unit on SGI Altix UV2 to
the use of native UV2 mode on new hardware (not the legacy mode).

UV2 native mode has a different format for a broadcast message.
We also need quick differentiaton between UV1 and UV2.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Link: http://lkml.kernel.org/r/20120116211750.GA5767@sgi.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
arch/x86/include/asm/uv/uv_bau.h
arch/x86/platform/uv/tlb_uv.c

index 8e862aa..4a46b27 100644 (file)
@@ -65,7 +65,7 @@
  * UV2: Bit 19 selects between
  *  (0): 10 microsecond timebase and
  *  (1): 80 microseconds
- *  we're using 655us, similar to UV1: 65 units of 10us
+ *  we're using 560us, similar to UV1: 65 units of 10us
  */
 #define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL)
 #define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL)
@@ -235,10 +235,10 @@ struct bau_msg_payload {
 
 
 /*
- * Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
+ * UV1 Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
  * see table 4.2.3.0.1 in broacast_assist spec.
  */
-struct bau_msg_header {
+struct uv1_bau_msg_header {
        unsigned int    dest_subnodeid:6;       /* must be 0x10, for the LB */
        /* bits 5:0 */
        unsigned int    base_dest_nasid:15;     /* nasid of the first bit */
@@ -317,20 +317,88 @@ struct bau_msg_header {
        /* bits 127:107 */
 };
 
+/*
+ * UV2 Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
+ * see figure 9-2 of harp_sys.pdf
+ */
+struct uv2_bau_msg_header {
+       unsigned int    base_dest_nasid:15;     /* nasid of the first bit */
+       /* bits 14:0 */                         /* in uvhub map */
+       unsigned int    dest_subnodeid:5;       /* must be 0x10, for the LB */
+       /* bits 19:15 */
+       unsigned int    rsvd_1:1;               /* must be zero */
+       /* bit 20 */
+       /* Address bits 59:21 */
+       /* bits 25:2 of address (44:21) are payload */
+       /* these next 24 bits become bytes 12-14 of msg */
+       /* bits 28:21 land in byte 12 */
+       unsigned int    replied_to:1;           /* sent as 0 by the source to
+                                                  byte 12 */
+       /* bit 21 */
+       unsigned int    msg_type:3;             /* software type of the
+                                                  message */
+       /* bits 24:22 */
+       unsigned int    canceled:1;             /* message canceled, resource
+                                                  is to be freed*/
+       /* bit 25 */
+       unsigned int    payload_1:3;            /* not currently used */
+       /* bits 28:26 */
+
+       /* bits 36:29 land in byte 13 */
+       unsigned int    payload_2a:3;           /* not currently used */
+       unsigned int    payload_2b:5;           /* not currently used */
+       /* bits 36:29 */
+
+       /* bits 44:37 land in byte 14 */
+       unsigned int    payload_3:8;            /* not currently used */
+       /* bits 44:37 */
+
+       unsigned int    rsvd_2:7;               /* reserved */
+       /* bits 51:45 */
+       unsigned int    swack_flag:1;           /* software acknowledge flag */
+       /* bit 52 */
+       unsigned int    rsvd_3a:3;              /* must be zero */
+       unsigned int    rsvd_3b:8;              /* must be zero */
+       unsigned int    rsvd_3c:8;              /* must be zero */
+       unsigned int    rsvd_3d:3;              /* must be zero */
+       /* bits 74:53 */
+       unsigned int    fairness:3;             /* usually zero */
+       /* bits 77:75 */
+
+       unsigned int    sequence:16;            /* message sequence number */
+       /* bits 93:78  Suppl_A  */
+       unsigned int    chaining:1;             /* next descriptor is part of
+                                                  this activation*/
+       /* bit 94 */
+       unsigned int    multilevel:1;           /* multi-level multicast
+                                                  format */
+       /* bit 95 */
+       unsigned int    rsvd_4:24;              /* ordered / source node /
+                                                  source subnode / aging
+                                                  must be zero */
+       /* bits 119:96 */
+       unsigned int    command:8;              /* message type */
+       /* bits 127:120 */
+};
+
 /*
  * The activation descriptor:
  * The format of the message to send, plus all accompanying control
  * Should be 64 bytes
  */
 struct bau_desc {
-       struct pnmask                   distribution;
+       struct pnmask                           distribution;
        /*
         * message template, consisting of header and payload:
         */
-       struct bau_msg_header           header;
-       struct bau_msg_payload          payload;
+       union bau_msg_header {
+               struct uv1_bau_msg_header       uv1_hdr;
+               struct uv2_bau_msg_header       uv2_hdr;
+       } header;
+
+       struct bau_msg_payload                  payload;
 };
-/*
+/* UV1:
  *   -payload--    ---------header------
  *   bytes 0-11    bits 41-56  bits 58-81
  *       A           B  (2)      C (3)
@@ -340,6 +408,16 @@ struct bau_desc {
  *   bytes 0-11  bytes 12-14  bytes 16-17  (byte 15 filled in by hw as vector)
  *   ------------payload queue-----------
  */
+/* UV2:
+ *   -payload--    ---------header------
+ *   bytes 0-11    bits 70-78  bits 21-44
+ *       A           B  (2)      C (3)
+ *
+ *            A/B/C are moved to:
+ *       A            C          B
+ *   bytes 0-11  bytes 12-14  bytes 16-17  (byte 15 filled in by hw as vector)
+ *   ------------payload queue-----------
+ */
 
 /*
  * The payload queue on the destination side is an array of these.
@@ -511,6 +589,7 @@ struct bau_control {
        short                   osnode;
        short                   uvhub_cpu;
        short                   uvhub;
+       short                   uvhub_version;
        short                   cpus_in_socket;
        short                   cpus_in_uvhub;
        short                   partition_base_pnode;
index 5b55219..1341a2e 100644 (file)
@@ -573,7 +573,7 @@ static int wait_completion(struct bau_desc *bau_desc,
                right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
        }
 
-       if (is_uv1_hub())
+       if (bcp->uvhub_version == 1)
                return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
                                                                bcp, try);
        else
@@ -757,15 +757,22 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
 {
        int seq_number = 0;
        int completion_stat = 0;
+       int uv1 = 0;
        long try = 0;
        unsigned long index;
        cycles_t time1;
        cycles_t time2;
        struct ptc_stats *stat = bcp->statp;
        struct bau_control *hmaster = bcp->uvhub_master;
+       struct uv1_bau_msg_header *uv1_hdr = NULL;
+       struct uv2_bau_msg_header *uv2_hdr = NULL;
 
-       if (is_uv1_hub())
+       if (bcp->uvhub_version == 1) {
+               uv1 = 1;
                uv1_throttle(hmaster, stat);
+               uv1_hdr = &bau_desc->header.uv1_hdr;
+       } else
+               uv2_hdr = &bau_desc->header.uv2_hdr;
 
        while (hmaster->uvhub_quiesce)
                cpu_relax();
@@ -773,14 +780,23 @@ int uv_flush_send_and_wait(struct bau_desc *bau_desc,
        time1 = get_cycles();
        do {
                if (try == 0) {
-                       bau_desc->header.msg_type = MSG_REGULAR;
+                       if (uv1)
+                               uv1_hdr->msg_type = MSG_REGULAR;
+                       else
+                               uv2_hdr->msg_type = MSG_REGULAR;
                        seq_number = bcp->message_number++;
                } else {
-                       bau_desc->header.msg_type = MSG_RETRY;
+                       if (uv1)
+                               uv1_hdr->msg_type = MSG_RETRY;
+                       else
+                               uv2_hdr->msg_type = MSG_RETRY;
                        stat->s_retry_messages++;
                }
 
-               bau_desc->header.sequence = seq_number;
+               if (uv1)
+                       uv1_hdr->sequence = seq_number;
+               else
+                       uv2_hdr->sequence = seq_number;
                index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
                bcp->send_message = get_cycles();
 
@@ -967,7 +983,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
                stat->s_ntargself++;
 
        bau_desc = bcp->descriptor_base;
-       bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu;
+       bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
        bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
        if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
                return NULL;
@@ -1083,7 +1099,7 @@ static void __init enable_timeouts(void)
                 */
                mmr_image |= (1L << SOFTACK_MSHIFT);
                if (is_uv2_hub()) {
-                       mmr_image |= (1L << UV2_LEG_SHFT);
+                       mmr_image &= ~(1L << UV2_LEG_SHFT);
                        mmr_image |= (1L << UV2_EXT_SHFT);
                }
                write_mmr_misc_control(pnode, mmr_image);
@@ -1432,12 +1448,15 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
 {
        int i;
        int cpu;
+       int uv1 = 0;
        unsigned long gpa;
        unsigned long m;
        unsigned long n;
        size_t dsize;
        struct bau_desc *bau_desc;
        struct bau_desc *bd2;
+       struct uv1_bau_msg_header *uv1_hdr;
+       struct uv2_bau_msg_header *uv2_hdr;
        struct bau_control *bcp;
 
        /*
@@ -1451,6 +1470,8 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
        gpa = uv_gpa(bau_desc);
        n = uv_gpa_to_gnode(gpa);
        m = uv_gpa_to_offset(gpa);
+       if (is_uv1_hub())
+               uv1 = 1;
 
        /* the 14-bit pnode */
        write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
@@ -1461,21 +1482,33 @@ static void activation_descriptor_init(int node, int pnode, int base_pnode)
         */
        for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
                memset(bd2, 0, sizeof(struct bau_desc));
-               bd2->header.swack_flag =        1;
-               /*
-                * The base_dest_nasid set in the message header is the nasid
-                * of the first uvhub in the partition. The bit map will
-                * indicate destination pnode numbers relative to that base.
-                * They may not be consecutive if nasid striding is being used.
-                */
-               bd2->header.base_dest_nasid =   UV_PNODE_TO_NASID(base_pnode);
-               bd2->header.dest_subnodeid =    UV_LB_SUBNODEID;
-               bd2->header.command =           UV_NET_ENDPOINT_INTD;
-               bd2->header.int_both =          1;
-               /*
-                * all others need to be set to zero:
-                *   fairness chaining multilevel count replied_to
-                */
+               if (uv1) {
+                       uv1_hdr = &bd2->header.uv1_hdr;
+                       uv1_hdr->swack_flag =   1;
+                       /*
+                        * The base_dest_nasid set in the message header
+                        * is the nasid of the first uvhub in the partition.
+                        * The bit map will indicate destination pnode numbers
+                        * relative to that base. They may not be consecutive
+                        * if nasid striding is being used.
+                        */
+                       uv1_hdr->base_dest_nasid =
+                                               UV_PNODE_TO_NASID(base_pnode);
+                       uv1_hdr->dest_subnodeid =       UV_LB_SUBNODEID;
+                       uv1_hdr->command =              UV_NET_ENDPOINT_INTD;
+                       uv1_hdr->int_both =             1;
+                       /*
+                        * all others need to be set to zero:
+                        *   fairness chaining multilevel count replied_to
+                        */
+               } else {
+                       uv2_hdr = &bd2->header.uv2_hdr;
+                       uv2_hdr->swack_flag =   1;
+                       uv2_hdr->base_dest_nasid =
+                                               UV_PNODE_TO_NASID(base_pnode);
+                       uv2_hdr->dest_subnodeid =       UV_LB_SUBNODEID;
+                       uv2_hdr->command =              UV_NET_ENDPOINT_INTD;
+               }
        }
        for_each_present_cpu(cpu) {
                if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
@@ -1728,6 +1761,14 @@ static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp,
                bcp->cpus_in_socket = sdp->num_cpus;
                bcp->socket_master = *smasterp;
                bcp->uvhub = bdp->uvhub;
+               if (is_uv1_hub())
+                       bcp->uvhub_version = 1;
+               else if (is_uv2_hub())
+                       bcp->uvhub_version = 2;
+               else {
+                       printk(KERN_EMERG "uvhub version not 1 or 2\n");
+                       return 1;
+               }
                bcp->uvhub_master = *hmasterp;
                bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
                if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
@@ -1867,7 +1908,8 @@ static int __init uv_bau_init(void)
                        val = 1L << 63;
                        write_gmmr_activation(pnode, val);
                        mmr = 1; /* should be 1 to broadcast to both sockets */
-                       write_mmr_data_broadcast(pnode, mmr);
+                       if (!is_uv1_hub())
+                               write_mmr_data_broadcast(pnode, mmr);
                }
        }