2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
6 * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
9 #include <linux/config.h>
10 #include <linux/module.h>
11 #include <linux/init.h>
12 #include <linux/delay.h>
13 #include <linux/kernel.h>
14 #include <linux/kdev_t.h>
15 #include <linux/string.h>
16 #include <linux/tty.h>
17 #include <linux/console.h>
18 #include <linux/timex.h>
19 #include <linux/sched.h>
20 #include <linux/ioport.h>
22 #include <linux/serial.h>
23 #include <linux/irq.h>
24 #include <linux/bootmem.h>
25 #include <linux/mmzone.h>
26 #include <linux/interrupt.h>
27 #include <linux/acpi.h>
28 #include <linux/compiler.h>
29 #include <linux/sched.h>
30 #include <linux/root_dev.h>
31 #include <linux/nodemask.h>
33 #include <linux/efi.h>
37 #include <asm/machvec.h>
38 #include <asm/system.h>
39 #include <asm/processor.h>
41 #include <asm/sn/arch.h>
42 #include <asm/sn/addrs.h>
43 #include <asm/sn/pda.h>
44 #include <asm/sn/nodepda.h>
45 #include <asm/sn/sn_cpuid.h>
46 #include <asm/sn/simulator.h>
47 #include <asm/sn/leds.h>
48 #include <asm/sn/bte.h>
49 #include <asm/sn/shub_mmr.h>
50 #include <asm/sn/clksupport.h>
51 #include <asm/sn/sn_sal.h>
52 #include <asm/sn/geo.h>
53 #include <asm/sn/sn_feature_sets.h>
54 #include "xtalk/xwidgetdev.h"
55 #include "xtalk/hubdev.h"
56 #include <asm/sn/klconfig.h>
59 DEFINE_PER_CPU(struct pda_s, pda_percpu);
61 #define MAX_PHYS_MEMORY (1UL << IA64_MAX_PHYS_BITS) /* Max physical address supported */
63 extern void bte_init_node(nodepda_t *, cnodeid_t);
65 extern void sn_timer_init(void);
66 extern unsigned long last_time_offset;
67 extern void (*ia64_mark_idle) (int);
68 extern void snidle(int);
69 extern unsigned char acpi_kbd_controller_present;
70 extern unsigned long long (*ia64_printk_clock)(void);
72 unsigned long sn_rtc_cycles_per_second;
73 EXPORT_SYMBOL(sn_rtc_cycles_per_second);
75 DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
76 EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
78 DEFINE_PER_CPU(short, __sn_cnodeid_to_nasid[MAX_NUMNODES]);
79 EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid);
81 DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda);
82 EXPORT_PER_CPU_SYMBOL(__sn_nodepda);
84 char sn_system_serial_number_string[128];
85 EXPORT_SYMBOL(sn_system_serial_number_string);
86 u64 sn_partition_serial_number;
87 EXPORT_SYMBOL(sn_partition_serial_number);
89 EXPORT_SYMBOL(sn_partition_id);
91 EXPORT_SYMBOL(sn_system_size);
92 u8 sn_sharing_domain_size;
93 EXPORT_SYMBOL(sn_sharing_domain_size);
95 EXPORT_SYMBOL(sn_coherency_id);
97 EXPORT_SYMBOL(sn_region_size);
98 int sn_prom_type; /* 0=hardware, 1=medusa/realprom, 2=medusa/fakeprom */
100 short physical_node_map[MAX_NUMALINK_NODES];
101 static unsigned long sn_prom_features[MAX_PROM_FEATURE_SETS];
103 EXPORT_SYMBOL(physical_node_map);
107 static void sn_init_pdas(char **);
108 static void build_cnode_tables(void);
110 static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
113 * The format of "screen_info" is strange, and due to early i386-setup
114 * code. This is just enough to make the console code think we're on a
117 struct screen_info sn_screen_info = {
120 .orig_video_mode = 3,
121 .orig_video_cols = 80,
122 .orig_video_ega_bx = 3,
123 .orig_video_lines = 25,
124 .orig_video_isVGA = 1,
125 .orig_video_points = 16
129 * This is here so we can use the CMOS detection in ide-probe.c to
130 * determine what drives are present. In theory, we don't need this
131 * as the auto-detection could be done via ide-probe.c:do_probe() but
132 * in practice that would be much slower, which is painful when
133 * running in the simulator. Note that passing zeroes in DRIVE_INFO
134 * is sufficient (the IDE driver will autodetect the drive geometry).
136 #ifdef CONFIG_IA64_GENERIC
137 extern char drive_info[4 * 16];
139 char drive_info[4 * 16];
143 * This routine can only be used during init, since
144 * smp_boot_data is an init data structure.
145 * We have to use smp_boot_data.cpu_phys_id to find
146 * the physical id of the processor because the normal
147 * cpu_physical_id() relies on data structures that
148 * may not be initialized yet.
151 static int __init pxm_to_nasid(int pxm)
156 nid = pxm_to_nid_map[pxm];
157 for (i = 0; i < num_node_memblks; i++) {
158 if (node_memblk[i].nid == nid) {
159 return NASID_GET(node_memblk[i].start_paddr);
166 * early_sn_setup - early setup routine for SN platforms
168 * Sets up an initial console to aid debugging. Intended primarily
169 * for bringup. See start_kernel() in init/main.c.
172 void __init early_sn_setup(void)
174 efi_system_table_t *efi_systab;
175 efi_config_table_t *config_tables;
176 struct ia64_sal_systab *sal_systab;
177 struct ia64_sal_desc_entry_point *ep;
182 * Parse enough of the SAL tables to locate the SAL entry point. Since, console
183 * IO on SN2 is done via SAL calls, early_printk won't work without this.
185 * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c.
186 * Any changes to those file may have to be made hereas well.
188 efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
189 config_tables = __va(efi_systab->tables);
190 for (i = 0; i < efi_systab->nr_tables; i++) {
191 if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
193 sal_systab = __va(config_tables[i].table);
194 p = (char *)(sal_systab + 1);
195 for (j = 0; j < sal_systab->entry_count; j++) {
196 if (*p == SAL_DESC_ENTRY_POINT) {
197 ep = (struct ia64_sal_desc_entry_point
199 ia64_sal_handler_init(__va
204 p += SAL_DESC_SIZE(*p);
208 /* Uh-oh, SAL not available?? */
209 printk(KERN_ERR "failed to find SAL entry point\n");
212 extern int platform_intr_list[];
213 static int __initdata shub_1_1_found;
218 * Set flag for enabling shub specific wars
221 static inline int __init is_shub_1_1(int nasid)
228 id = REMOTE_HUB_L(nasid, SH1_SHUB_ID);
229 rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT;
233 static void __init sn_check_for_wars(void)
240 for_each_online_node(cnode) {
241 if (is_shub_1_1(cnodeid_to_nasid(cnode)))
248 * Scan the EFI PCDP table (if it exists) for an acceptable VGA console
249 * output device. If one exists, pick it and set sn_legacy_{io,mem} to
250 * reflect the bus offsets needed to address it.
252 * Since pcdp support in SN is not supported in the 2.4 kernel (or at least
253 * the one lbs is based on) just declare the needed structs here.
255 * Reference spec http://www.dig64.org/specifications/DIG64_PCDPv20.pdf
257 * Returns 0 if no acceptable vga is found, !0 otherwise.
259 * Note: This stuff is duped here because Altix requires the PCDP to
260 * locate a usable VGA device due to lack of proper ACPI support. Structures
261 * could be used from drivers/firmware/pcdp.h, but it was decided that moving
262 * this file to a more public location just for Altix use was undesireable.
265 struct hcdp_uart_desc {
270 u8 signature[4]; /* should be 'HCDP' */
272 u8 rev; /* should be >=3 for pcdp, <3 for hcdp */
280 struct hcdp_uart_desc uart[0]; /* num_type0 of these */
281 /* pcdp descriptors follow */
282 } __attribute__((packed));
284 struct pcdp_device_desc {
289 /* interconnect specific structure follows */
290 /* device specific structure follows that */
291 } __attribute__((packed));
293 struct pcdp_interface_pci {
294 u8 type; /* 1 == pci */
308 } __attribute__((packed));
310 struct pcdp_vga_device {
312 /* ACPI Extended Address Space Desc follows */
313 } __attribute__((packed));
315 /* from pcdp_device_desc.primary */
316 #define PCDP_PRIMARY_CONSOLE 0x01
318 /* from pcdp_device_desc.type */
319 #define PCDP_CONSOLE_INOUT 0x0
320 #define PCDP_CONSOLE_DEBUG 0x1
321 #define PCDP_CONSOLE_OUT 0x2
322 #define PCDP_CONSOLE_IN 0x3
323 #define PCDP_CONSOLE_TYPE_VGA 0x8
325 #define PCDP_CONSOLE_VGA (PCDP_CONSOLE_TYPE_VGA | PCDP_CONSOLE_OUT)
327 /* from pcdp_interface_pci.type */
328 #define PCDP_IF_PCI 1
330 /* from pcdp_interface_pci.translation */
331 #define PCDP_PCI_TRANS_IOPORT 0x02
332 #define PCDP_PCI_TRANS_MMIO 0x01
339 struct pcdp_device_desc device;
340 struct pcdp_interface_pci if_pci;
341 extern struct efi efi;
345 return; /* no hcdp/pcdp table */
348 return; /* only support PCDP (rev >= 3) */
350 for (bp = (u8 *)&pcdp->uart[pcdp->num_type0];
351 bp < (u8 *)pcdp + pcdp->length;
352 bp += device.length) {
353 memcpy(&device, bp, sizeof(device));
354 if (! (device.primary & PCDP_PRIMARY_CONSOLE))
355 continue; /* not primary console */
357 if (device.type != PCDP_CONSOLE_VGA)
358 continue; /* not VGA descriptor */
360 memcpy(&if_pci, bp+sizeof(device), sizeof(if_pci));
361 if (if_pci.type != PCDP_IF_PCI)
362 continue; /* not PCI interconnect */
364 if (if_pci.translation & PCDP_PCI_TRANS_IOPORT)
366 if_pci.ioport_tra | __IA64_UNCACHED_OFFSET;
368 if (if_pci.translation & PCDP_PCI_TRANS_MMIO)
369 vga_console_membase =
370 if_pci.mmio_tra | __IA64_UNCACHED_OFFSET;
372 break; /* once we find the primary, we're done */
376 static unsigned long sn2_rtc_initial;
378 static unsigned long long ia64_sn2_printk_clock(void)
380 unsigned long rtc_now = rtc_time();
382 return (rtc_now - sn2_rtc_initial) *
383 (1000000000 / sn_rtc_cycles_per_second);
387 * sn_setup - SN platform setup routine
388 * @cmdline_p: kernel command line
390 * Handles platform setup for SN machines. This includes determining
391 * the RTC frequency (via a SAL call), initializing secondary CPUs, and
392 * setting up per-node data areas. The console is also initialized here.
394 void __init sn_setup(char **cmdline_p)
396 long status, ticks_per_sec, drift;
397 u32 version = sn_sal_rev();
398 extern void sn_cpu_init(void);
400 sn2_rtc_initial = rtc_time();
401 ia64_sn_plat_set_error_handling_features(); // obsolete
402 ia64_sn_set_os_feature(OSF_MCA_SLV_TO_OS_INIT_SLV);
403 ia64_sn_set_os_feature(OSF_FEAT_LOG_SBES);
406 #if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
408 * Handle SN vga console.
410 * SN systems do not have enough ACPI table information
411 * being passed from prom to identify VGA adapters and the legacy
412 * addresses to access them. Until that is done, SN systems rely
413 * on the PCDP table to identify the primary VGA console if one
416 * However, kernel PCDP support is optional, and even if it is built
417 * into the kernel, it will not be used if the boot cmdline contains
418 * console= directives.
420 * So, to work around this mess, we duplicate some of the PCDP code
421 * here so that the primary VGA console (as defined by PCDP) will
422 * work on SN systems even if a different console (e.g. serial) is
423 * selected on the boot line (or CONFIG_EFI_PCDP is off).
426 if (! vga_console_membase)
429 if (vga_console_membase) {
430 /* usable vga ... make tty0 the preferred default console */
431 if (!strstr(*cmdline_p, "console="))
432 add_preferred_console("tty", 0, NULL);
434 printk(KERN_DEBUG "SGI: Disabling VGA console\n");
435 if (!strstr(*cmdline_p, "console="))
436 add_preferred_console("ttySG", 0, NULL);
437 #ifdef CONFIG_DUMMY_CONSOLE
438 conswitchp = &dummy_con;
441 #endif /* CONFIG_DUMMY_CONSOLE */
443 #endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */
445 MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
448 * Build the tables for managing cnodes.
450 build_cnode_tables();
453 ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
455 if (status != 0 || ticks_per_sec < 100000) {
457 "unable to determine platform RTC clock frequency, guessing.\n");
458 /* PROM gives wrong value for clock freq. so guess */
459 sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
461 sn_rtc_cycles_per_second = ticks_per_sec;
463 platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
465 ia64_printk_clock = ia64_sn2_printk_clock;
468 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
469 * support here so we don't have to listen to failed keyboard probe
472 if (version <= 0x0209 && acpi_kbd_controller_present) {
473 printk(KERN_INFO "Disabling legacy keyboard support as prom "
474 "is too old and doesn't provide FADT\n");
475 acpi_kbd_controller_present = 0;
478 printk("SGI SAL version %x.%02x\n", version >> 8, version & 0x00FF);
481 * we set the default root device to /dev/hda
482 * to make simulation easy
484 ROOT_DEV = Root_HDA1;
487 * Create the PDAs and NODEPDAs for all the cpus.
489 sn_init_pdas(cmdline_p);
491 ia64_mark_idle = &snidle;
494 * For the bootcpu, we do this here. All other cpus will make the
495 * call as part of cpu_init in slave cpu initialization.
502 screen_info = sn_screen_info;
507 * set pm_power_off to a SAL call to allow
508 * sn machines to power off. The SAL call can be replaced
509 * by an ACPI interface call when ACPI is fully implemented
512 pm_power_off = ia64_sn_power_down;
516 * sn_init_pdas - setup node data areas
518 * One time setup for Node Data Area. Called by sn_setup().
520 static void __init sn_init_pdas(char **cmdline_p)
525 * Allocate & initalize the nodepda for each node.
527 for_each_online_node(cnode) {
529 alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t));
530 memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
531 memset(nodepdaindr[cnode]->phys_cpuid, -1,
532 sizeof(nodepdaindr[cnode]->phys_cpuid));
533 spin_lock_init(&nodepdaindr[cnode]->ptc_lock);
537 * Allocate & initialize nodepda for TIOs. For now, put them on node 0.
539 for (cnode = num_online_nodes(); cnode < num_cnodes; cnode++) {
541 alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
542 memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
546 * Now copy the array of nodepda pointers to each nodepda.
548 for (cnode = 0; cnode < num_cnodes; cnode++)
549 memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr,
550 sizeof(nodepdaindr));
553 * Set up IO related platform-dependent nodepda fields.
554 * The following routine actually sets up the hubinfo struct
557 for_each_online_node(cnode) {
558 bte_init_node(nodepdaindr[cnode], cnode);
562 * Initialize the per node hubdev. This includes IO Nodes and
563 * headless/memless nodes.
565 for (cnode = 0; cnode < num_cnodes; cnode++) {
566 hubdev_init_node(nodepdaindr[cnode], cnode);
571 * sn_cpu_init - initialize per-cpu data areas
572 * @cpuid: cpuid of the caller
574 * Called during cpu initialization on each cpu as it starts.
575 * Currently, initializes the per-cpu data area for SNIA.
576 * Also sets up a few fields in the nodepda. Also known as
577 * platform_cpu_init() by the ia64 machvec code.
579 void __init sn_cpu_init(void)
588 static int wars_have_been_checked;
590 if (smp_processor_id() == 0 && IS_MEDUSA()) {
591 if (ia64_sn_is_fake_prom())
595 printk(KERN_INFO "Running on medusa with %s PROM\n",
596 (sn_prom_type == 1) ? "real" : "fake");
599 memset(pda, 0, sizeof(pda));
600 if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2,
601 &sn_hub_info->nasid_bitmask,
602 &sn_hub_info->nasid_shift,
603 &sn_system_size, &sn_sharing_domain_size,
604 &sn_partition_id, &sn_coherency_id,
607 sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
610 * The boot cpu makes this call again after platform initialization is
613 if (nodepdaindr[0] == NULL)
616 for (i = 0; i < MAX_PROM_FEATURE_SETS; i++)
617 if (ia64_sn_get_prom_feature_set(i, &sn_prom_features[i]) != 0)
620 cpuid = smp_processor_id();
621 cpuphyid = get_sapicid();
623 if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice))
626 for (i=0; i < MAX_NUMNODES; i++) {
627 if (nodepdaindr[i]) {
628 nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid;
629 nodepdaindr[i]->phys_cpuid[cpuid].slice = slice;
630 nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode;
634 cnode = nasid_to_cnodeid(nasid);
636 sn_nodepda = nodepdaindr[cnode];
639 (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
640 pda->led_state = LED_ALWAYS_SET;
641 pda->hb_count = HZ / 2;
646 /* copy cpu 0's sn_cnodeid_to_nasid table to this cpu's */
647 memcpy(sn_cnodeid_to_nasid,
648 (&per_cpu(__sn_cnodeid_to_nasid, 0)),
649 sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
654 * Only needs to be done once, on BSP.
655 * Has to be done after loop above, because it uses this cpu's
656 * sn_cnodeid_to_nasid table which was just initialized if this
658 * Has to be done before assignment below.
660 if (!wars_have_been_checked) {
662 wars_have_been_checked = 1;
664 sn_hub_info->shub_1_1_found = shub_1_1_found;
667 * Set up addresses of PIO/MEM write status registers.
670 u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0};
671 u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2,
672 SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
674 pio = is_shub1() ? pio1 : pio2;
675 pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
676 pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
680 * WAR addresses for SHUB 1.x.
682 if (local_node_data->active_cpu_count++ == 0 && is_shub1()) {
685 cnodeid_to_nasid(numa_node_id() ==
686 num_online_nodes() - 1 ? 0 : numa_node_id() + 1);
687 pda->pio_shub_war_cam_addr =
688 (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid,
694 * Build tables for converting between NASIDs and cnodes.
696 static inline int __init board_needs_cnode(int type)
698 return (type == KLTYPE_SNIA || type == KLTYPE_TIO);
701 void __init build_cnode_tables(void)
707 memset(physical_node_map, -1, sizeof(physical_node_map));
708 memset(sn_cnodeid_to_nasid, -1,
709 sizeof(__ia64_per_cpu_var(__sn_cnodeid_to_nasid)));
712 * First populate the tables with C/M bricks. This ensures that
713 * cnode == node for all C & M bricks.
715 for_each_online_node(node) {
716 nasid = pxm_to_nasid(nid_to_pxm_map[node]);
717 sn_cnodeid_to_nasid[node] = nasid;
718 physical_node_map[nasid] = node;
722 * num_cnodes is total number of C/M/TIO bricks. Because of the 256 node
723 * limit on the number of nodes, we can't use the generic node numbers
724 * for this. Note that num_cnodes is incremented below as TIOs or
725 * headless/memoryless nodes are discovered.
727 num_cnodes = num_online_nodes();
729 /* fakeprom does not support klgraph */
730 if (IS_RUNNING_ON_FAKE_PROM())
733 /* Find TIOs & headless/memoryless nodes and add them to the tables */
734 for_each_online_node(node) {
735 kl_config_hdr_t *klgraph_header;
736 nasid = cnodeid_to_nasid(node);
737 klgraph_header = ia64_sn_get_klconfig_addr(nasid);
738 if (klgraph_header == NULL)
740 brd = NODE_OFFSET_TO_LBOARD(nasid, klgraph_header->ch_board_info);
742 if (board_needs_cnode(brd->brd_type) && physical_node_map[brd->brd_nasid] < 0) {
743 sn_cnodeid_to_nasid[num_cnodes] = brd->brd_nasid;
744 physical_node_map[brd->brd_nasid] = num_cnodes++;
746 brd = find_lboard_next(brd);
752 nasid_slice_to_cpuid(int nasid, int slice)
756 for (cpu = 0; cpu < NR_CPUS; cpu++)
757 if (cpuid_to_nasid(cpu) == nasid &&
758 cpuid_to_slice(cpu) == slice)
764 int sn_prom_feature_available(int id)
766 if (id >= BITS_PER_LONG * MAX_PROM_FEATURE_SETS)
768 return test_bit(id, sn_prom_features);
770 EXPORT_SYMBOL(sn_prom_feature_available);