- THE LINUX/I386 BOOT PROTOCOL
- ----------------------------
+ THE LINUX/x86 BOOT PROTOCOL
+ ---------------------------
- H. Peter Anvin <hpa@zytor.com>
- Last update 2007-05-23
-
-On the i386 platform, the Linux kernel uses a rather complicated boot
+On the x86 platform, the Linux kernel uses a rather complicated boot
convention. This has evolved partially due to historical aspects, as
well as the desire in the early days to have the kernel itself be a
bootable image, the complicated PC memory model and due to changed
expectations in the PC industry caused by the effective demise of
real-mode DOS as a mainstream operating system.
-Currently, the following versions of the Linux/i386 boot protocol exist.
+Currently, the following versions of the Linux/x86 boot protocol exist.
Old kernels: zImage/Image support only. Some very early kernels
may not even support a command line.
- If 0, the protected-mode code is loaded at 0x10000.
- If 1, the protected-mode code is loaded at 0x100000.
+ Bit 5 (write): QUIET_FLAG
+ - If 0, print early messages.
+ - If 1, suppress early messages.
+ This requests to the kernel (decompressor and early
+ kernel) to not write early messages that require
+ accessing the display hardware directly.
+
Bit 6 (write): KEEP_SEGMENTS
Protocol: 2.07+
- - if 0, reload the segment registers in the 32bit entry point.
- - if 1, do not reload the segment registers in the 32bit entry point.
+ - If 0, reload the segment registers in the 32bit entry point.
+ - If 1, do not reload the segment registers in the 32bit entry point.
Assume that %cs %ds %ss %es are all set to flat segments with
a base of 0 (or the equivalent for their environment).
maximum size was 255.
Field name: hardware_subarch
-Type: write
+Type: write (optional, defaults to x86/PC)
Offset/size: 0x23c/4
Protocol: 2.07+
0x00000002 Xen
Field name: hardware_subarch_data
-Type: write
+Type: write (subarch-dependent)
Offset/size: 0x240/8
Protocol: 2.07+
A pointer to data that is specific to hardware subarch
+ This field is currently unused for the default x86/PC environment,
+ do not modify.
Field name: payload_offset
Type: read
The length of the payload.
+Field name: setup_data
+Type: write (special)
+Offset/size: 0x250/8
+Protocol: 2.09+
+
+ The 64-bit physical pointer to NULL terminated single linked list of
+ struct setup_data. This is used to define a more extensible boot
+ parameters passing mechanism. The definition of struct setup_data is
+ as follow:
+
+ struct setup_data {
+ u64 next;
+ u32 type;
+ u32 len;
+ u8 data[0];
+ };
+
+ Where, the next is a 64-bit physical pointer to the next node of
+ linked list, the next field of the last node is 0; the type is used
+ to identify the contents of data; the len is the length of data
+ field; the data holds the real payload.
+
+ This list may be modified at a number of points during the bootup
+ process. Therefore, when modifying this list one should always make
+ sure to consider the case where the linked list already contains
+ entries.
+
+
**** THE IMAGE CHECKSUM
From boot protocol version 2.08 onwards the CRC-32 is calculated over
file; therefore the CRC of the file up to the limit specified in the
syssize field of the header is always 0.
+
**** THE KERNEL COMMAND LINE
The kernel command line has become an important way for the boot
covered by setup_move_size, so you may need to adjust this
field.
-Field name: setup_data
-Type: write (obligatory)
-Offset/size: 0x250/8
-Protocol: 2.09+
-
- The 64-bit physical pointer to NULL terminated single linked list of
- struct setup_data. This is used to define a more extensible boot
- parameters passing mechanism. The definition of struct setup_data is
- as follow:
-
- struct setup_data {
- u64 next;
- u32 type;
- u32 len;
- u8 data[0];
- };
-
- Where, the next is a 64-bit physical pointer to the next node of
- linked list, the next field of the last node is 0; the type is used
- to identify the contents of data; the len is the length of data
- field; the data holds the real payload.
-
**** MEMORY LAYOUT OF THE REAL-MODE CODE
#include <asm/io.h>
#include <asm/page.h>
#include <asm/boot.h>
+#include <asm/bootparam.h>
/* WARNING!!
* This code is compiled with -fPIC and it is relocated dynamically
/*
* This is set up by the setup-routine at boot-time
*/
-static unsigned char *real_mode; /* Pointer to real-mode data */
-
-#define RM_EXT_MEM_K (*(unsigned short *)(real_mode + 0x2))
-#ifndef STANDARD_MEMORY_BIOS_CALL
-#define RM_ALT_MEM_K (*(unsigned long *)(real_mode + 0x1e0))
-#endif
-#define RM_SCREEN_INFO (*(struct screen_info *)(real_mode+0))
+static struct boot_params *real_mode; /* Pointer to real-mode data */
+static int quiet;
extern unsigned char input_data[];
extern int input_len;
static void *memset(void *s, int c, unsigned n);
static void *memcpy(void *dest, const void *src, unsigned n);
-static void putstr(const char *);
+static void __putstr(int, const char *);
+#define putstr(__x) __putstr(0, __x)
#ifdef CONFIG_X86_64
#define memptr long
vidmem[i] = ' ';
}
-static void putstr(const char *s)
+static void __putstr(int error, const char *s)
{
int x, y, pos;
char c;
+#ifndef CONFIG_X86_VERBOSE_BOOTUP
+ if (!error)
+ return;
+#endif
+
#ifdef CONFIG_X86_32
- if (RM_SCREEN_INFO.orig_video_mode == 0 && lines == 0 && cols == 0)
+ if (real_mode->screen_info.orig_video_mode == 0 &&
+ lines == 0 && cols == 0)
return;
#endif
- x = RM_SCREEN_INFO.orig_x;
- y = RM_SCREEN_INFO.orig_y;
+ x = real_mode->screen_info.orig_x;
+ y = real_mode->screen_info.orig_y;
while ((c = *s++) != '\0') {
if (c == '\n') {
}
}
- RM_SCREEN_INFO.orig_x = x;
- RM_SCREEN_INFO.orig_y = y;
+ real_mode->screen_info.orig_x = x;
+ real_mode->screen_info.orig_y = y;
pos = (x + cols * y) * 2; /* Update cursor position */
outb(14, vidport);
static void error(char *x)
{
- putstr("\n\n");
- putstr(x);
- putstr("\n\n -- System halted");
+ __putstr(1, "\n\n");
+ __putstr(1, x);
+ __putstr(1, "\n\n -- System halted");
while (1)
asm("hlt");
return;
}
- putstr("Parsing ELF... ");
+ if (!quiet)
+ putstr("Parsing ELF... ");
phdrs = malloc(sizeof(*phdrs) * ehdr.e_phnum);
if (!phdrs)
{
real_mode = rmode;
- if (RM_SCREEN_INFO.orig_video_mode == 7) {
+ if (real_mode->hdr.loadflags & QUIET_FLAG)
+ quiet = 1;
+
+ if (real_mode->screen_info.orig_video_mode == 7) {
vidmem = (char *) 0xb0000;
vidport = 0x3b4;
} else {
vidport = 0x3d4;
}
- lines = RM_SCREEN_INFO.orig_video_lines;
- cols = RM_SCREEN_INFO.orig_video_cols;
+ lines = real_mode->screen_info.orig_video_lines;
+ cols = real_mode->screen_info.orig_video_cols;
window = output; /* Output buffer (Normally at 1M) */
free_mem_ptr = heap; /* Heap */
#endif
makecrc();
- putstr("\nDecompressing Linux... ");
+ if (!quiet)
+ putstr("\nDecompressing Linux... ");
gunzip();
parse_elf(output);
- putstr("done.\nBooting the kernel.\n");
+ if (!quiet)
+ putstr("done.\nBooting the kernel.\n");
return;
}
#define USE_BSD
#include <endian.h>
-#define MAX_SHDRS 100
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
static Elf32_Ehdr ehdr;
-static Elf32_Shdr shdr[MAX_SHDRS];
-static Elf32_Sym *symtab[MAX_SHDRS];
-static Elf32_Rel *reltab[MAX_SHDRS];
-static char *strtab[MAX_SHDRS];
static unsigned long reloc_count, reloc_idx;
static unsigned long *relocs;
+struct section {
+ Elf32_Shdr shdr;
+ struct section *link;
+ Elf32_Sym *symtab;
+ Elf32_Rel *reltab;
+ char *strtab;
+};
+static struct section *secs;
+
/*
* Following symbols have been audited. There values are constant and do
* not change if bzImage is loaded at a different physical address than
{
int i;
- for(i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) {
+ for (i = 0; i < ARRAY_SIZE(safe_abs_relocs); i++) {
if (!strcmp(sym_name, safe_abs_relocs[i]))
/* Match found */
return 1;
{
const char *sec_strtab;
const char *name;
- sec_strtab = strtab[ehdr.e_shstrndx];
+ sec_strtab = secs[ehdr.e_shstrndx].strtab;
name = "<noname>";
if (shndx < ehdr.e_shnum) {
- name = sec_strtab + shdr[shndx].sh_name;
+ name = sec_strtab + secs[shndx].shdr.sh_name;
}
else if (shndx == SHN_ABS) {
name = "ABSOLUTE";
name = sym_strtab + sym->st_name;
}
else {
- name = sec_name(shdr[sym->st_shndx].sh_name);
+ name = sec_name(secs[sym->st_shndx].shdr.sh_name);
}
return name;
}
static void read_shdrs(FILE *fp)
{
int i;
- if (ehdr.e_shnum > MAX_SHDRS) {
- die("%d section headers supported: %d\n",
- ehdr.e_shnum, MAX_SHDRS);
+ Elf32_Shdr shdr;
+
+ secs = calloc(ehdr.e_shnum, sizeof(struct section));
+ if (!secs) {
+ die("Unable to allocate %d section headers\n",
+ ehdr.e_shnum);
}
if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
die("Seek to %d failed: %s\n",
ehdr.e_shoff, strerror(errno));
}
- if (fread(&shdr, sizeof(shdr[0]), ehdr.e_shnum, fp) != ehdr.e_shnum) {
- die("Cannot read ELF section headers: %s\n",
- strerror(errno));
- }
- for(i = 0; i < ehdr.e_shnum; i++) {
- shdr[i].sh_name = elf32_to_cpu(shdr[i].sh_name);
- shdr[i].sh_type = elf32_to_cpu(shdr[i].sh_type);
- shdr[i].sh_flags = elf32_to_cpu(shdr[i].sh_flags);
- shdr[i].sh_addr = elf32_to_cpu(shdr[i].sh_addr);
- shdr[i].sh_offset = elf32_to_cpu(shdr[i].sh_offset);
- shdr[i].sh_size = elf32_to_cpu(shdr[i].sh_size);
- shdr[i].sh_link = elf32_to_cpu(shdr[i].sh_link);
- shdr[i].sh_info = elf32_to_cpu(shdr[i].sh_info);
- shdr[i].sh_addralign = elf32_to_cpu(shdr[i].sh_addralign);
- shdr[i].sh_entsize = elf32_to_cpu(shdr[i].sh_entsize);
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
+ if (fread(&shdr, sizeof shdr, 1, fp) != 1)
+ die("Cannot read ELF section headers %d/%d: %s\n",
+ i, ehdr.e_shnum, strerror(errno));
+ sec->shdr.sh_name = elf32_to_cpu(shdr.sh_name);
+ sec->shdr.sh_type = elf32_to_cpu(shdr.sh_type);
+ sec->shdr.sh_flags = elf32_to_cpu(shdr.sh_flags);
+ sec->shdr.sh_addr = elf32_to_cpu(shdr.sh_addr);
+ sec->shdr.sh_offset = elf32_to_cpu(shdr.sh_offset);
+ sec->shdr.sh_size = elf32_to_cpu(shdr.sh_size);
+ sec->shdr.sh_link = elf32_to_cpu(shdr.sh_link);
+ sec->shdr.sh_info = elf32_to_cpu(shdr.sh_info);
+ sec->shdr.sh_addralign = elf32_to_cpu(shdr.sh_addralign);
+ sec->shdr.sh_entsize = elf32_to_cpu(shdr.sh_entsize);
+ if (sec->shdr.sh_link < ehdr.e_shnum)
+ sec->link = &secs[sec->shdr.sh_link];
}
}
static void read_strtabs(FILE *fp)
{
int i;
- for(i = 0; i < ehdr.e_shnum; i++) {
- if (shdr[i].sh_type != SHT_STRTAB) {
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
+ if (sec->shdr.sh_type != SHT_STRTAB) {
continue;
}
- strtab[i] = malloc(shdr[i].sh_size);
- if (!strtab[i]) {
+ sec->strtab = malloc(sec->shdr.sh_size);
+ if (!sec->strtab) {
die("malloc of %d bytes for strtab failed\n",
- shdr[i].sh_size);
+ sec->shdr.sh_size);
}
- if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+ if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
die("Seek to %d failed: %s\n",
- shdr[i].sh_offset, strerror(errno));
+ sec->shdr.sh_offset, strerror(errno));
}
- if (fread(strtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+ if (fread(sec->strtab, 1, sec->shdr.sh_size, fp)
+ != sec->shdr.sh_size) {
die("Cannot read symbol table: %s\n",
strerror(errno));
}
static void read_symtabs(FILE *fp)
{
int i,j;
- for(i = 0; i < ehdr.e_shnum; i++) {
- if (shdr[i].sh_type != SHT_SYMTAB) {
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
+ if (sec->shdr.sh_type != SHT_SYMTAB) {
continue;
}
- symtab[i] = malloc(shdr[i].sh_size);
- if (!symtab[i]) {
+ sec->symtab = malloc(sec->shdr.sh_size);
+ if (!sec->symtab) {
die("malloc of %d bytes for symtab failed\n",
- shdr[i].sh_size);
+ sec->shdr.sh_size);
}
- if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+ if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
die("Seek to %d failed: %s\n",
- shdr[i].sh_offset, strerror(errno));
+ sec->shdr.sh_offset, strerror(errno));
}
- if (fread(symtab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+ if (fread(sec->symtab, 1, sec->shdr.sh_size, fp)
+ != sec->shdr.sh_size) {
die("Cannot read symbol table: %s\n",
strerror(errno));
}
- for(j = 0; j < shdr[i].sh_size/sizeof(symtab[i][0]); j++) {
- symtab[i][j].st_name = elf32_to_cpu(symtab[i][j].st_name);
- symtab[i][j].st_value = elf32_to_cpu(symtab[i][j].st_value);
- symtab[i][j].st_size = elf32_to_cpu(symtab[i][j].st_size);
- symtab[i][j].st_shndx = elf16_to_cpu(symtab[i][j].st_shndx);
+ for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) {
+ Elf32_Sym *sym = &sec->symtab[j];
+ sym->st_name = elf32_to_cpu(sym->st_name);
+ sym->st_value = elf32_to_cpu(sym->st_value);
+ sym->st_size = elf32_to_cpu(sym->st_size);
+ sym->st_shndx = elf16_to_cpu(sym->st_shndx);
}
}
}
static void read_relocs(FILE *fp)
{
int i,j;
- for(i = 0; i < ehdr.e_shnum; i++) {
- if (shdr[i].sh_type != SHT_REL) {
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
+ if (sec->shdr.sh_type != SHT_REL) {
continue;
}
- reltab[i] = malloc(shdr[i].sh_size);
- if (!reltab[i]) {
+ sec->reltab = malloc(sec->shdr.sh_size);
+ if (!sec->reltab) {
die("malloc of %d bytes for relocs failed\n",
- shdr[i].sh_size);
+ sec->shdr.sh_size);
}
- if (fseek(fp, shdr[i].sh_offset, SEEK_SET) < 0) {
+ if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
die("Seek to %d failed: %s\n",
- shdr[i].sh_offset, strerror(errno));
+ sec->shdr.sh_offset, strerror(errno));
}
- if (fread(reltab[i], 1, shdr[i].sh_size, fp) != shdr[i].sh_size) {
+ if (fread(sec->reltab, 1, sec->shdr.sh_size, fp)
+ != sec->shdr.sh_size) {
die("Cannot read symbol table: %s\n",
strerror(errno));
}
- for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
- reltab[i][j].r_offset = elf32_to_cpu(reltab[i][j].r_offset);
- reltab[i][j].r_info = elf32_to_cpu(reltab[i][j].r_info);
+ for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
+ Elf32_Rel *rel = &sec->reltab[j];
+ rel->r_offset = elf32_to_cpu(rel->r_offset);
+ rel->r_info = elf32_to_cpu(rel->r_info);
}
}
}
int i;
printf("Absolute symbols\n");
printf(" Num: Value Size Type Bind Visibility Name\n");
- for(i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
char *sym_strtab;
Elf32_Sym *sh_symtab;
int j;
- if (shdr[i].sh_type != SHT_SYMTAB) {
+
+ if (sec->shdr.sh_type != SHT_SYMTAB) {
continue;
}
- sh_symtab = symtab[i];
- sym_strtab = strtab[shdr[i].sh_link];
- for(j = 0; j < shdr[i].sh_size/sizeof(symtab[0][0]); j++) {
+ sh_symtab = sec->symtab;
+ sym_strtab = sec->link->strtab;
+ for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Sym); j++) {
Elf32_Sym *sym;
const char *name;
- sym = &symtab[i][j];
+ sym = &sec->symtab[j];
name = sym_name(sym_strtab, sym);
if (sym->st_shndx != SHN_ABS) {
continue;
{
int i, printed = 0;
- for(i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
+ struct section *sec_applies, *sec_symtab;
char *sym_strtab;
Elf32_Sym *sh_symtab;
- unsigned sec_applies, sec_symtab;
int j;
- if (shdr[i].sh_type != SHT_REL) {
+ if (sec->shdr.sh_type != SHT_REL) {
continue;
}
- sec_symtab = shdr[i].sh_link;
- sec_applies = shdr[i].sh_info;
- if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
+ sec_symtab = sec->link;
+ sec_applies = &secs[sec->shdr.sh_info];
+ if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) {
continue;
}
- sh_symtab = symtab[sec_symtab];
- sym_strtab = strtab[shdr[sec_symtab].sh_link];
- for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+ sh_symtab = sec_symtab->symtab;
+ sym_strtab = sec_symtab->link->strtab;
+ for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
Elf32_Rel *rel;
Elf32_Sym *sym;
const char *name;
- rel = &reltab[i][j];
+ rel = &sec->reltab[j];
sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
name = sym_name(sym_strtab, sym);
if (sym->st_shndx != SHN_ABS) {
{
int i;
/* Walk through the relocations */
- for(i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < ehdr.e_shnum; i++) {
char *sym_strtab;
Elf32_Sym *sh_symtab;
- unsigned sec_applies, sec_symtab;
+ struct section *sec_applies, *sec_symtab;
int j;
- if (shdr[i].sh_type != SHT_REL) {
+ struct section *sec = &secs[i];
+
+ if (sec->shdr.sh_type != SHT_REL) {
continue;
}
- sec_symtab = shdr[i].sh_link;
- sec_applies = shdr[i].sh_info;
- if (!(shdr[sec_applies].sh_flags & SHF_ALLOC)) {
+ sec_symtab = sec->link;
+ sec_applies = &secs[sec->shdr.sh_info];
+ if (!(sec_applies->shdr.sh_flags & SHF_ALLOC)) {
continue;
}
- sh_symtab = symtab[sec_symtab];
- sym_strtab = strtab[shdr[sec_symtab].sh_link];
- for(j = 0; j < shdr[i].sh_size/sizeof(reltab[0][0]); j++) {
+ sh_symtab = sec_symtab->symtab;
+ sym_strtab = sec->link->strtab;
+ for (j = 0; j < sec->shdr.sh_size/sizeof(Elf32_Rel); j++) {
Elf32_Rel *rel;
Elf32_Sym *sym;
unsigned r_type;
- rel = &reltab[i][j];
+ rel = &sec->reltab[j];
sym = &sh_symtab[ELF32_R_SYM(rel->r_info)];
r_type = ELF32_R_TYPE(rel->r_info);
/* Don't visit relocations to absolute symbols */
*/
printf(".section \".data.reloc\",\"a\"\n");
printf(".balign 4\n");
- for(i = 0; i < reloc_count; i++) {
+ for (i = 0; i < reloc_count; i++) {
printf("\t .long 0x%08lx\n", relocs[i]);
}
printf("\n");
/* Print a stop */
printf("%c%c%c%c", buf[0], buf[1], buf[2], buf[3]);
/* Now print each relocation */
- for(i = 0; i < reloc_count; i++) {
+ for (i = 0; i < reloc_count; i++) {
buf[0] = (relocs[i] >> 0) & 0xff;
buf[1] = (relocs[i] >> 8) & 0xff;
buf[2] = (relocs[i] >> 16) & 0xff;
show_absolute_relocs = 0;
as_text = 0;
fname = NULL;
- for(i = 1; i < argc; i++) {
+ for (i = 1; i < argc; i++) {
char *arg = argv[i];
if (*arg == '-') {
if (strcmp(argv[1], "--abs-syms") == 0) {