]> git.karo-electronics.de Git - karo-tx-linux.git/commitdiff
Merge branch 'linus' into tmp.x86.mpparse.new
authorIngo Molnar <mingo@elte.hu>
Tue, 8 Jul 2008 08:32:56 +0000 (10:32 +0200)
committerIngo Molnar <mingo@elte.hu>
Tue, 8 Jul 2008 08:32:56 +0000 (10:32 +0200)
78 files changed:
Documentation/kernel-parameters.txt
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/x86/Makefile
arch/x86/boot/compressed/misc.c
arch/x86/boot/memory.c
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/apic_32.c
arch/x86/kernel/apic_64.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/cpu/mtrr/mtrr.h
arch/x86/kernel/e820.c [new file with mode: 0644]
arch/x86/kernel/e820_32.c
arch/x86/kernel/e820_64.c
arch/x86/kernel/efi.c
arch/x86/kernel/efi_64.c
arch/x86/kernel/genapic_64.c
arch/x86/kernel/head.c [new file with mode: 0644]
arch/x86/kernel/head32.c
arch/x86/kernel/head64.c
arch/x86/kernel/head_32.S
arch/x86/kernel/io_apic_32.c
arch/x86/kernel/io_apic_64.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/numaq_32.c
arch/x86/kernel/setup.c
arch/x86/kernel/setup_32.c
arch/x86/kernel/setup_64.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/srat_32.c
arch/x86/kernel/summit_32.c
arch/x86/kernel/trampoline.c
arch/x86/lguest/boot.c
arch/x86/mach-default/setup.c
arch/x86/mach-es7000/Makefile
arch/x86/mach-es7000/es7000plat.c
arch/x86/mach-generic/Makefile
arch/x86/mach-generic/bigsmp.c
arch/x86/mach-generic/numaq.c [new file with mode: 0644]
arch/x86/mach-generic/probe.c
arch/x86/mach-visws/mpparse.c
arch/x86/mach-voyager/setup.c
arch/x86/mach-voyager/voyager_smp.c
arch/x86/mm/discontig_32.c
arch/x86/mm/init_32.c
arch/x86/mm/numa_64.c
arch/x86/pci/Makefile_32
arch/x86/pci/k8-bus_64.c
arch/x86/pci/numa.c
arch/x86/xen/enlighten.c
drivers/acpi/Kconfig
drivers/acpi/pci_irq.c
drivers/firmware/dmi_scan.c
include/asm-x86/acpi.h
include/asm-x86/bios_ebda.h
include/asm-x86/bootparam.h
include/asm-x86/e820.h
include/asm-x86/e820_32.h
include/asm-x86/e820_64.h
include/asm-x86/efi.h
include/asm-x86/io_apic.h
include/asm-x86/ipi.h
include/asm-x86/mach-generic/mach_mpparse.h
include/asm-x86/mach-numaq/mach_apic.h
include/asm-x86/mach-numaq/mach_mpparse.h
include/asm-x86/mmzone_32.h
include/asm-x86/mpspec.h
include/asm-x86/numaq.h
include/asm-x86/setup.h
include/asm-x86/smp.h
include/asm-x86/srat.h
include/asm-x86/system.h
include/linux/efi.h
include/linux/mm.h
include/linux/pageblock-flags.h
mm/page_alloc.c

index b52f47d588b40fea417bf4bca4ffdbfef4d02152..e53ff097557383dab01943be2af06eaf848b6e91 100644 (file)
@@ -599,6 +599,29 @@ and is between 256 and 4096 characters. It is defined in the file
                        See drivers/char/README.epca and
                        Documentation/digiepca.txt.
 
+       disable_mtrr_cleanup [X86]
+       enable_mtrr_cleanup [X86]
+                       The kernel tries to adjust MTRR layout from continuous
+                       to discrete, to make X server driver able to add WB
+                       entry later. This parameter enables/disables that.
+
+       mtrr_chunk_size=nn[KMG] [X86]
+                       used for mtrr cleanup. It is largest continous chunk
+                       that could hold holes aka. UC entries.
+
+       mtrr_gran_size=nn[KMG] [X86]
+                       Used for mtrr cleanup. It is granularity of mtrr block.
+                       Default is 1.
+                       Large value could prevent small alignment from
+                       using up MTRRs.
+
+       mtrr_spare_reg_nr=n [X86]
+                       Format: <integer>
+                       Range: 0,7 : spare reg number
+                       Default : 1
+                       Used for mtrr cleanup. It is spare mtrr entries number.
+                       Set to 2 or more if your graphical card needs more.
+
        disable_mtrr_trim [X86, Intel and AMD only]
                        By default the kernel will trim any uncacheable
                        memory out of your available memory pool based on
index bf07b6f50fa178268f9d81388a615a7a8abff32b..07276ac01c2055f95df5ab5a93f3b58c50870292 100644 (file)
@@ -261,36 +261,6 @@ config X86_VOYAGER
          If you do not specifically know you have a Voyager based machine,
          say N here, otherwise the kernel you build will not be bootable.
 
-config X86_NUMAQ
-       bool "NUMAQ (IBM/Sequent)"
-       depends on SMP && X86_32
-       select NUMA
-       help
-         This option is used for getting Linux to run on a (IBM/Sequent) NUMA
-         multiquad box. This changes the way that processors are bootstrapped,
-         and uses Clustered Logical APIC addressing mode instead of Flat Logical.
-         You will need a new lynxer.elf file to flash your firmware with - send
-         email to <Martin.Bligh@us.ibm.com>.
-
-config X86_SUMMIT
-       bool "Summit/EXA (IBM x440)"
-       depends on X86_32 && SMP
-       help
-         This option is needed for IBM systems that use the Summit/EXA chipset.
-         In particular, it is needed for the x440.
-
-         If you don't have one of these computers, you should say N here.
-         If you want to build a NUMA kernel, you must select ACPI.
-
-config X86_BIGSMP
-       bool "Support for other sub-arch SMP systems with more than 8 CPUs"
-       depends on X86_32 && SMP
-       help
-         This option is needed for the systems that have more than 8 CPUs
-         and if the system is not of any sub-arch type above.
-
-         If you don't have such a system, you should say N here.
-
 config X86_VISWS
        bool "SGI 320/540 (Visual Workstation)"
        depends on X86_32
@@ -304,12 +274,33 @@ config X86_VISWS
          and vice versa. See <file:Documentation/sgi-visws.txt> for details.
 
 config X86_GENERICARCH
-       bool "Generic architecture (Summit, bigsmp, ES7000, default)"
+       bool "Generic architecture"
        depends on X86_32
        help
-          This option compiles in the Summit, bigsmp, ES7000, default subarchitectures.
-         It is intended for a generic binary kernel.
-         If you want a NUMA kernel, select ACPI.   We need SRAT for NUMA.
+          This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
+         subarchitectures.  It is intended for a generic binary kernel.
+         if you select them all, kernel will probe it one by one. and will
+         fallback to default.
+
+if X86_GENERICARCH
+
+config X86_NUMAQ
+       bool "NUMAQ (IBM/Sequent)"
+       depends on SMP && X86_32
+       select NUMA
+       help
+         This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
+         NUMA multiquad box. This changes the way that processors are
+         bootstrapped, and uses Clustered Logical APIC addressing mode instead
+         of Flat Logical.  You will need a new lynxer.elf file to flash your
+         firmware with - send email to <Martin.Bligh@us.ibm.com>.
+
+config X86_SUMMIT
+       bool "Summit/EXA (IBM x440)"
+       depends on X86_32 && SMP
+       help
+         This option is needed for IBM systems that use the Summit/EXA chipset.
+         In particular, it is needed for the x440.
 
 config X86_ES7000
        bool "Support for Unisys ES7000 IA32 series"
@@ -317,8 +308,15 @@ config X86_ES7000
        help
          Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
          supposed to run on an IA32-based Unisys ES7000 system.
-         Only choose this option if you have such a system, otherwise you
-         should say N here.
+
+config X86_BIGSMP
+       bool "Support for big SMP systems with more than 8 CPUs"
+       depends on X86_32 && SMP
+       help
+         This option is needed for the systems that have more than 8 CPUs
+         and if the system is not of any sub-arch type above.
+
+endif
 
 config X86_RDC321X
        bool "RDC R-321x SoC"
@@ -911,9 +909,9 @@ config X86_PAE
 config NUMA
        bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
        depends on SMP
-       depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || (X86_SUMMIT || X86_GENERICARCH) && ACPI) && EXPERIMENTAL)
+       depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || X86_SUMMIT && ACPI) && EXPERIMENTAL)
        default n if X86_PC
-       default y if (X86_NUMAQ || X86_SUMMIT)
+       default y if (X86_NUMAQ || X86_SUMMIT || X86_GENERICARCH)
        help
          Enable NUMA (Non Uniform Memory Access) support.
          The kernel will try to allocate memory used by a CPU on the
@@ -1090,6 +1088,40 @@ config MTRR
 
          See <file:Documentation/mtrr.txt> for more information.
 
+config MTRR_SANITIZER
+       def_bool y
+       prompt "MTRR cleanup support"
+       depends on MTRR
+       help
+         Convert MTRR layout from continuous to discrete, so some X driver
+         could add WB entries.
+
+         Say N here if you see bootup problems (boot crash, boot hang,
+         spontaneous reboots).
+
+         Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size
+         could be used to send largest mtrr entry size for continuous block
+         to hold holes (aka. UC entries)
+
+         If unsure, say Y.
+
+config MTRR_SANITIZER_ENABLE_DEFAULT
+       int "MTRR cleanup enable value (0-1)"
+       range 0 1
+       default "0"
+       depends on MTRR_SANITIZER
+       help
+         Enable mtrr cleanup default value
+
+config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
+       int "MTRR cleanup spare reg num (0-7)"
+       range 0 7
+       default "1"
+       depends on MTRR_SANITIZER
+       help
+         mtrr cleanup spare entries default, it can be changed via
+         mtrr_spare_reg_nr=
+
 config X86_PAT
        bool
        prompt "x86 PAT support"
index 18363374d51a9a57b39b6fb8d3f87a054b4b4aa5..253e7a5706d36391fc9ac2b3a52f7be4dced66c0 100644 (file)
@@ -131,7 +131,7 @@ config 4KSTACKS
 
 config X86_FIND_SMP_CONFIG
        def_bool y
-       depends on X86_LOCAL_APIC || X86_VOYAGER
+       depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS
        depends on X86_32
 
 config X86_MPPARSE
index 3cff3c894cf3c0cd80161d1f61a959cbb80f7106..d6650131659e14febfc645fdbe13cdbb330602be 100644 (file)
@@ -117,29 +117,11 @@ mcore-$(CONFIG_X86_VOYAGER)       := arch/x86/mach-voyager/
 mflags-$(CONFIG_X86_VISWS)     := -Iinclude/asm-x86/mach-visws
 mcore-$(CONFIG_X86_VISWS)      := arch/x86/mach-visws/
 
-# NUMAQ subarch support
-mflags-$(CONFIG_X86_NUMAQ)     := -Iinclude/asm-x86/mach-numaq
-mcore-$(CONFIG_X86_NUMAQ)      := arch/x86/mach-default/
-
-# BIGSMP subarch support
-mflags-$(CONFIG_X86_BIGSMP)    := -Iinclude/asm-x86/mach-bigsmp
-mcore-$(CONFIG_X86_BIGSMP)     := arch/x86/mach-default/
-
-#Summit subarch support
-mflags-$(CONFIG_X86_SUMMIT)    := -Iinclude/asm-x86/mach-summit
-mcore-$(CONFIG_X86_SUMMIT)     := arch/x86/mach-default/
-
 # generic subarchitecture
 mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic
 fcore-$(CONFIG_X86_GENERICARCH)        += arch/x86/mach-generic/
 mcore-$(CONFIG_X86_GENERICARCH)        := arch/x86/mach-default/
 
-
-# ES7000 subarch support
-mflags-$(CONFIG_X86_ES7000)    := -Iinclude/asm-x86/mach-es7000
-fcore-$(CONFIG_X86_ES7000)     := arch/x86/mach-es7000/
-mcore-$(CONFIG_X86_ES7000)     := arch/x86/mach-default/
-
 # RDC R-321x subarch support
 mflags-$(CONFIG_X86_RDC321X)   := -Iinclude/asm-x86/mach-rdc321x
 mcore-$(CONFIG_X86_RDC321X)    := arch/x86/mach-default/
@@ -160,6 +142,7 @@ KBUILD_AFLAGS += $(mflags-y)
 
 head-y := arch/x86/kernel/head_$(BITS).o
 head-y += arch/x86/kernel/head$(BITS).o
+head-y += arch/x86/kernel/head.o
 head-y += arch/x86/kernel/init_task.o
 
 libs-y  += arch/x86/lib/
index 90456cee47c337b226a8874582440377c428a91e..ba0be6a25ff74be27d09aa9fcc23720ce2805a4e 100644 (file)
@@ -221,10 +221,6 @@ static char *vidmem;
 static int vidport;
 static int lines, cols;
 
-#ifdef CONFIG_X86_NUMAQ
-void *xquad_portio;
-#endif
-
 #include "../../../../lib/inflate.c"
 
 static void *malloc(int size)
index acad32eb4290861b1539553c1e8eb49f7ec82e92..53165c97336b21406f49307cb4932c87468d32d9 100644 (file)
@@ -13,6 +13,7 @@
  */
 
 #include "boot.h"
+#include <linux/kernel.h>
 
 #define SMAP   0x534d4150      /* ASCII "SMAP" */
 
@@ -53,7 +54,7 @@ static int detect_memory_e820(void)
 
                count++;
                desc++;
-       } while (next && count < E820MAX);
+       } while (next && count < ARRAY_SIZE(boot_params.e820_map));
 
        return boot_params.e820_entries = count;
 }
index 77807d4769c99c455237a810fe0edd0f996fb31f..dc3c636d113e1f6e050801cc395a21e56e14364d 100644 (file)
@@ -2,7 +2,7 @@
 # Makefile for the linux kernel.
 #
 
-extra-y                := head_$(BITS).o head$(BITS).o init_task.o vmlinux.lds
+extra-y                := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinux.lds
 
 CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
 
@@ -22,7 +22,7 @@ obj-y                 += setup_$(BITS).o i8259_$(BITS).o setup.o
 obj-$(CONFIG_X86_32)   += sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)   += sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)   += syscall_64.o vsyscall_64.o setup64.o
-obj-y                  += bootflag.o e820_$(BITS).o
+obj-y                  += bootflag.o e820_$(BITS).o e820.o
 obj-y                  += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
 obj-y                  += alternative.o i8253.o pci-nommu.o
 obj-$(CONFIG_X86_64)   += bugs_64.o
index 33c5216fd3e1f5137b0b8c52fa8178ac328d630a..caf4ed7ca069f73c7a5e81f88f4bb77416fb4413 100644 (file)
@@ -338,8 +338,6 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
 
 #ifdef CONFIG_X86_IO_APIC
 
-struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
-
 static int __init
 acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
 {
@@ -860,6 +858,336 @@ static int __init acpi_parse_madt_lapic_entries(void)
 #endif                         /* CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_IO_APIC
+#define MP_ISA_BUS             0
+
+#ifdef CONFIG_X86_ES7000
+extern int es7000_plat;
+#endif
+
+static struct {
+       int apic_id;
+       int gsi_base;
+       int gsi_end;
+       DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
+static int mp_find_ioapic(int gsi)
+{
+       int i = 0;
+
+       /* Find the IOAPIC that manages this GSI. */
+       for (i = 0; i < nr_ioapics; i++) {
+               if ((gsi >= mp_ioapic_routing[i].gsi_base)
+                   && (gsi <= mp_ioapic_routing[i].gsi_end))
+                       return i;
+       }
+
+       printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+       return -1;
+}
+
+static u8 __init uniq_ioapic_id(u8 id)
+{
+#ifdef CONFIG_X86_32
+       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+           !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+               return io_apic_get_unique_id(nr_ioapics, id);
+       else
+               return id;
+#else
+       int i;
+       DECLARE_BITMAP(used, 256);
+       bitmap_zero(used, 256);
+       for (i = 0; i < nr_ioapics; i++) {
+               struct mp_config_ioapic *ia = &mp_ioapics[i];
+               __set_bit(ia->mp_apicid, used);
+       }
+       if (!test_bit(id, used))
+               return id;
+       return find_first_zero_bit(used, 256);
+#endif
+}
+
+static int bad_ioapic(unsigned long address)
+{
+       if (nr_ioapics >= MAX_IO_APICS) {
+               printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
+                      "(found %d)\n", MAX_IO_APICS, nr_ioapics);
+               panic("Recompile kernel with bigger MAX_IO_APICS!\n");
+       }
+       if (!address) {
+               printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
+                      " found in table, skipping!\n");
+               return 1;
+       }
+       return 0;
+}
+
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+{
+       int idx = 0;
+
+       if (bad_ioapic(address))
+               return;
+
+       idx = nr_ioapics;
+
+       mp_ioapics[idx].mp_type = MP_IOAPIC;
+       mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
+       mp_ioapics[idx].mp_apicaddr = address;
+
+       set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+       mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
+#ifdef CONFIG_X86_32
+       mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
+#else
+       mp_ioapics[idx].mp_apicver = 0;
+#endif
+       /*
+        * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+        * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+        */
+       mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
+       mp_ioapic_routing[idx].gsi_base = gsi_base;
+       mp_ioapic_routing[idx].gsi_end = gsi_base +
+           io_apic_get_redir_entries(idx);
+
+       printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+              "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
+              mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
+              mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
+
+       nr_ioapics++;
+}
+
+void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
+{
+       int ioapic = -1;
+       int pin = -1;
+
+       /*
+        * Convert 'gsi' to 'ioapic.pin'.
+        */
+       ioapic = mp_find_ioapic(gsi);
+       if (ioapic < 0)
+               return;
+       pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+       /*
+        * TBD: This check is for faulty timer entries, where the override
+        *      erroneously sets the trigger to level, resulting in a HUGE
+        *      increase of timer interrupts!
+        */
+       if ((bus_irq == 0) && (trigger == 3))
+               trigger = 1;
+
+       mp_irqs[mp_irq_entries].mp_type = MP_INTSRC;
+       mp_irqs[mp_irq_entries].mp_irqtype = mp_INT;
+       mp_irqs[mp_irq_entries].mp_irqflag = (trigger << 2) | polarity;
+       mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS;
+       mp_irqs[mp_irq_entries].mp_srcbusirq = bus_irq; /* IRQ */
+       mp_irqs[mp_irq_entries].mp_dstapic =
+                       mp_ioapics[ioapic].mp_apicid;   /* APIC ID */
+       mp_irqs[mp_irq_entries].mp_dstirq = pin;        /* INTIN# */
+
+       if (++mp_irq_entries == MAX_IRQ_SOURCES)
+               panic("Max # of irq sources exceeded!!\n");
+
+}
+
+void __init mp_config_acpi_legacy_irqs(void)
+{
+       int i = 0;
+       int ioapic = -1;
+
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+       /*
+        * Fabricate the legacy ISA bus (bus #31).
+        */
+       mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
+#endif
+       set_bit(MP_ISA_BUS, mp_bus_not_pci);
+       Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+
+#ifdef CONFIG_X86_ES7000
+       /*
+        * Older generations of ES7000 have no legacy identity mappings
+        */
+       if (es7000_plat == 1)
+               return;
+#endif
+
+       /*
+        * Locate the IOAPIC that manages the ISA IRQs (0-15).
+        */
+       ioapic = mp_find_ioapic(0);
+       if (ioapic < 0)
+               return;
+
+       /*
+        * Use the default configuration for the IRQs 0-15.  Unless
+        * overridden by (MADT) interrupt source override entries.
+        */
+       for (i = 0; i < 16; i++) {
+               int idx;
+
+               mp_irqs[mp_irq_entries].mp_type = MP_INTSRC;
+               mp_irqs[mp_irq_entries].mp_irqflag = 0; /* Conforming */
+               mp_irqs[mp_irq_entries].mp_srcbus = MP_ISA_BUS;
+               mp_irqs[mp_irq_entries].mp_dstapic = mp_ioapics[ioapic].mp_apicid;
+
+               for (idx = 0; idx < mp_irq_entries; idx++) {
+                       struct mp_config_intsrc *irq = mp_irqs + idx;
+
+                       /* Do we already have a mapping for this ISA IRQ? */
+                       if (irq->mp_srcbus == MP_ISA_BUS
+                           && irq->mp_srcbusirq == i)
+                               break;
+
+                       /* Do we already have a mapping for this IOAPIC pin */
+                       if ((irq->mp_dstapic ==
+                               mp_irqs[mp_irq_entries].mp_dstapic) &&
+                           (irq->mp_dstirq == i))
+                               break;
+               }
+
+               if (idx != mp_irq_entries) {
+                       printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
+                       continue;       /* IRQ already used */
+               }
+
+               mp_irqs[mp_irq_entries].mp_irqtype = mp_INT;
+               mp_irqs[mp_irq_entries].mp_srcbusirq = i;       /* Identity mapped */
+               mp_irqs[mp_irq_entries].mp_dstirq = i;
+
+               if (++mp_irq_entries == MAX_IRQ_SOURCES)
+                       panic("Max # of irq sources exceeded!!\n");
+       }
+}
+
+int mp_register_gsi(u32 gsi, int triggering, int polarity)
+{
+       int ioapic;
+       int ioapic_pin;
+#ifdef CONFIG_X86_32
+#define MAX_GSI_NUM    4096
+#define IRQ_COMPRESSION_START  64
+
+       static int pci_irq = IRQ_COMPRESSION_START;
+       /*
+        * Mapping between Global System Interrupts, which
+        * represent all possible interrupts, and IRQs
+        * assigned to actual devices.
+        */
+       static int gsi_to_irq[MAX_GSI_NUM];
+#else
+
+       if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+               return gsi;
+#endif
+
+       /* Don't set up the ACPI SCI because it's already set up */
+       if (acpi_gbl_FADT.sci_interrupt == gsi)
+               return gsi;
+
+       ioapic = mp_find_ioapic(gsi);
+       if (ioapic < 0) {
+               printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
+               return gsi;
+       }
+
+       ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+#ifdef CONFIG_X86_32
+       if (ioapic_renumber_irq)
+               gsi = ioapic_renumber_irq(ioapic, gsi);
+#endif
+
+       /*
+        * Avoid pin reprogramming.  PRTs typically include entries
+        * with redundant pin->gsi mappings (but unique PCI devices);
+        * we only program the IOAPIC on the first.
+        */
+       if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
+               printk(KERN_ERR "Invalid reference to IOAPIC pin "
+                      "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+                      ioapic_pin);
+               return gsi;
+       }
+       if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+               Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
+                       mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
+#ifdef CONFIG_X86_32
+               return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
+#else
+               return gsi;
+#endif
+       }
+
+       set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
+#ifdef CONFIG_X86_32
+       /*
+        * For GSI >= 64, use IRQ compression
+        */
+       if ((gsi >= IRQ_COMPRESSION_START)
+           && (triggering == ACPI_LEVEL_SENSITIVE)) {
+               /*
+                * For PCI devices assign IRQs in order, avoiding gaps
+                * due to unused I/O APIC pins.
+                */
+               int irq = gsi;
+               if (gsi < MAX_GSI_NUM) {
+                       /*
+                        * Retain the VIA chipset work-around (gsi > 15), but
+                        * avoid a problem where the 8254 timer (IRQ0) is setup
+                        * via an override (so it's not on pin 0 of the ioapic),
+                        * and at the same time, the pin 0 interrupt is a PCI
+                        * type.  The gsi > 15 test could cause these two pins
+                        * to be shared as IRQ0, and they are not shareable.
+                        * So test for this condition, and if necessary, avoid
+                        * the pin collision.
+                        */
+                       gsi = pci_irq++;
+                       /*
+                        * Don't assign IRQ used by ACPI SCI
+                        */
+                       if (gsi == acpi_gbl_FADT.sci_interrupt)
+                               gsi = pci_irq++;
+                       gsi_to_irq[irq] = gsi;
+               } else {
+                       printk(KERN_ERR "GSI %u is too high\n", gsi);
+                       return gsi;
+               }
+       }
+#endif
+       io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
+                               triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
+                               polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+       return gsi;
+}
+
+int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
+                       u32 gsi, int triggering, int polarity)
+{
+       struct mpc_config_intsrc intsrc;
+       int ioapic;
+
+       /* print the entry should happen on mptable identically */
+       intsrc.mpc_type = MP_INTSRC;
+       intsrc.mpc_irqtype = mp_INT;
+       intsrc.mpc_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+                               (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+       intsrc.mpc_srcbus = number;
+       intsrc.mpc_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+       ioapic = mp_find_ioapic(gsi);
+       intsrc.mpc_dstapic = mp_ioapic_routing[ioapic].apic_id;
+       intsrc.mpc_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
+
+       MP_intsrc_info(&intsrc);
+
+       return 0;
+}
+
 /*
  * Parse IOAPIC related entries in MADT
  * returns 0 on success, < 0 on error
index 4b99b1bdeb6cbb090e5868138922b259da7d98fe..954d67931a504c94c32087aa128d07375f842cee 100644 (file)
@@ -76,6 +76,11 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
  */
 int apic_verbosity;
 
+int pic_mode;
+
+/* Have we found an MP table */
+int smp_found_config;
+
 static unsigned int calibration_result;
 
 static int lapic_next_event(unsigned long delta,
@@ -1202,7 +1207,7 @@ void __init init_apic_mappings(void)
 
                for (i = 0; i < nr_ioapics; i++) {
                        if (smp_found_config) {
-                               ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+                               ioapic_phys = mp_ioapics[i].mp_apicaddr;
                                if (!ioapic_phys) {
                                        printk(KERN_ERR
                                               "WARNING: bogus zero IO-APIC "
@@ -1513,6 +1518,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
                 */
                cpu = 0;
 
+       if (apicid > max_physical_apicid)
+               max_physical_apicid = apicid;
+
        /*
         * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
         * but we need to work other dependencies like SMP_SUSPEND etc
@@ -1520,7 +1528,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
         * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
         *       - Ashok Raj <ashok.raj@intel.com>
         */
-       if (num_processors > 8) {
+       if (max_physical_apicid >= 8) {
                switch (boot_cpu_data.x86_vendor) {
                case X86_VENDOR_INTEL:
                        if (!APIC_XAPIC(version)) {
index 0633cfd0dc291a1310f0632e1012cf663b5e89a9..a4bd8fbb78a98d0a4171eb599f3ccfdbf52b9a41 100644 (file)
@@ -56,6 +56,9 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
  */
 int apic_verbosity;
 
+/* Have we found an MP table */
+int smp_found_config;
+
 static struct resource lapic_resource = {
        .name = "Local APIC",
        .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
@@ -1090,6 +1093,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
                 */
                cpu = 0;
        }
+       if (apicid > max_physical_apicid)
+               max_physical_apicid = apicid;
+
        /* are we being called early in kernel startup? */
        if (x86_cpu_to_apicid_early_ptr) {
                u16 *cpu_to_apicid = x86_cpu_to_apicid_early_ptr;
index 5d241ce94a44f4f5b0fceb7a7a8292e08b6d9b08..509bd3d9eacd28da737ade7a365f1a17acf71a11 100644 (file)
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_range_blocks[] = {
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
 static int mtrr_state_set;
-static u64 tom2;
+u64 mtrr_tom2;
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
                }
        }
 
-       if (tom2) {
-               if (start >= (1ULL<<32) && (end < tom2))
+       if (mtrr_tom2) {
+               if (start >= (1ULL<<32) && (end < mtrr_tom2))
                        return MTRR_TYPE_WRBACK;
        }
 
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
        rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+               u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+       struct mtrr_var_range *vr;
+
+       vr = mtrr_state.var_ranges;
+
+       vr[index].base_lo = base_lo;
+       vr[index].base_hi = base_hi;
+       vr[index].mask_lo = mask_lo;
+       vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
@@ -213,13 +227,13 @@ void __init get_mtrr_state(void)
        mtrr_state.enabled = (lo & 0xc00) >> 10;
 
        if (amd_special_default_mtrr()) {
-               unsigned lo, hi;
+               unsigned low, high;
                /* TOP_MEM2 */
-               rdmsr(MSR_K8_TOP_MEM2, lo, hi);
-               tom2 = hi;
-               tom2 <<= 32;
-               tom2 |= lo;
-               tom2 &= 0xffffff8000000ULL;
+               rdmsr(MSR_K8_TOP_MEM2, low, high);
+               mtrr_tom2 = high;
+               mtrr_tom2 <<= 32;
+               mtrr_tom2 |= low;
+               mtrr_tom2 &= 0xffffff800000ULL;
        }
        if (mtrr_show) {
                int high_width;
@@ -251,9 +265,9 @@ void __init get_mtrr_state(void)
                        else
                                printk(KERN_INFO "MTRR %u disabled\n", i);
                }
-               if (tom2) {
+               if (mtrr_tom2) {
                        printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
-                                         tom2, tom2>>20);
+                                         mtrr_tom2, mtrr_tom2>>20);
                }
        }
        mtrr_state_set = 1;
@@ -328,7 +342,7 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
 
        if (lo != msrwords[0] || hi != msrwords[1]) {
                if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
-                   boot_cpu_data.x86 == 15 &&
+                   (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11) &&
                    ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK))
                        k8_enable_fixed_iorrs();
                mtrr_wrmsr(msr, msrwords[0], msrwords[1]);
index 6a1e278d93230de4da44549cf537a33b5b3ff441..0642201784e0e83dd51196828c7fc888b2236e27 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/smp.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/sort.h>
 
 #include <asm/e820.h>
 #include <asm/mtrr.h>
@@ -609,6 +610,787 @@ static struct sysdev_driver mtrr_sysdev_driver = {
        .resume         = mtrr_restore,
 };
 
+/* should be related to MTRR_VAR_RANGES nums */
+#define RANGE_NUM 256
+
+struct res_range {
+       unsigned long start;
+       unsigned long end;
+};
+
+static int __init
+add_range(struct res_range *range, int nr_range, unsigned long start,
+                             unsigned long end)
+{
+       /* out of slots */
+       if (nr_range >= RANGE_NUM)
+               return nr_range;
+
+       range[nr_range].start = start;
+       range[nr_range].end = end;
+
+       nr_range++;
+
+       return nr_range;
+}
+
+static int __init
+add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
+                             unsigned long end)
+{
+       int i;
+
+       /* try to merge it with old one */
+       for (i = 0; i < nr_range; i++) {
+               unsigned long final_start, final_end;
+               unsigned long common_start, common_end;
+
+               if (!range[i].end)
+                       continue;
+
+               common_start = max(range[i].start, start);
+               common_end = min(range[i].end, end);
+               if (common_start > common_end + 1)
+                       continue;
+
+               final_start = min(range[i].start, start);
+               final_end = max(range[i].end, end);
+
+               range[i].start = final_start;
+               range[i].end =  final_end;
+               return nr_range;
+       }
+
+       /* need to add that */
+       return add_range(range, nr_range, start, end);
+}
+
+static void __init
+subtract_range(struct res_range *range, unsigned long start, unsigned long end)
+{
+       int i, j;
+
+       for (j = 0; j < RANGE_NUM; j++) {
+               if (!range[j].end)
+                       continue;
+
+               if (start <= range[j].start && end >= range[j].end) {
+                       range[j].start = 0;
+                       range[j].end = 0;
+                       continue;
+               }
+
+               if (start <= range[j].start && end < range[j].end &&
+                   range[j].start < end + 1) {
+                       range[j].start = end + 1;
+                       continue;
+               }
+
+
+               if (start > range[j].start && end >= range[j].end &&
+                   range[j].end > start - 1) {
+                       range[j].end = start - 1;
+                       continue;
+               }
+
+               if (start > range[j].start && end < range[j].end) {
+                       /* find the new spare */
+                       for (i = 0; i < RANGE_NUM; i++) {
+                               if (range[i].end == 0)
+                                       break;
+                       }
+                       if (i < RANGE_NUM) {
+                               range[i].end = range[j].end;
+                               range[i].start = end + 1;
+                       } else {
+                               printk(KERN_ERR "run of slot in ranges\n");
+                       }
+                       range[j].end = start - 1;
+                       continue;
+               }
+       }
+}
+
+static int __init cmp_range(const void *x1, const void *x2)
+{
+       const struct res_range *r1 = x1;
+       const struct res_range *r2 = x2;
+       long start1, start2;
+
+       start1 = r1->start;
+       start2 = r2->start;
+
+       return start1 - start2;
+}
+
+struct var_mtrr_range_state {
+       unsigned long base_pfn;
+       unsigned long size_pfn;
+       mtrr_type type;
+};
+
+struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+static int __initdata debug_print;
+
+static int __init
+x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+                      unsigned long extra_remove_base,
+                      unsigned long extra_remove_size)
+{
+       unsigned long i, base, size;
+       mtrr_type type;
+
+       for (i = 0; i < num_var_ranges; i++) {
+               type = range_state[i].type;
+               if (type != MTRR_TYPE_WRBACK)
+                       continue;
+               base = range_state[i].base_pfn;
+               size = range_state[i].size_pfn;
+               nr_range = add_range_with_merge(range, nr_range, base,
+                                               base + size - 1);
+       }
+       if (debug_print) {
+               printk(KERN_DEBUG "After WB checking\n");
+               for (i = 0; i < nr_range; i++)
+                       printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+                                range[i].start, range[i].end + 1);
+       }
+
+       /* take out UC ranges */
+       for (i = 0; i < num_var_ranges; i++) {
+               type = range_state[i].type;
+               if (type != MTRR_TYPE_UNCACHABLE)
+                       continue;
+               size = range_state[i].size_pfn;
+               if (!size)
+                       continue;
+               base = range_state[i].base_pfn;
+               subtract_range(range, base, base + size - 1);
+       }
+       if (extra_remove_size)
+               subtract_range(range, extra_remove_base,
+                                extra_remove_base + extra_remove_size  - 1);
+
+       /* get new range num */
+       nr_range = 0;
+       for (i = 0; i < RANGE_NUM; i++) {
+               if (!range[i].end)
+                       continue;
+               nr_range++;
+       }
+       if  (debug_print) {
+               printk(KERN_DEBUG "After UC checking\n");
+               for (i = 0; i < nr_range; i++)
+                       printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+                                range[i].start, range[i].end + 1);
+       }
+
+       /* sort the ranges */
+       sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+       if  (debug_print) {
+               printk(KERN_DEBUG "After sorting\n");
+               for (i = 0; i < nr_range; i++)
+                       printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+                                range[i].start, range[i].end + 1);
+       }
+
+       /* clear those is not used */
+       for (i = nr_range; i < RANGE_NUM; i++)
+               memset(&range[i], 0, sizeof(range[i]));
+
+       return nr_range;
+}
+
+static struct res_range __initdata range[RANGE_NUM];
+
+#ifdef CONFIG_MTRR_SANITIZER
+
+static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+{
+       unsigned long sum;
+       int i;
+
+       sum = 0;
+       for (i = 0; i < nr_range; i++)
+               sum += range[i].end + 1 - range[i].start;
+
+       return sum;
+}
+
+static int enable_mtrr_cleanup __initdata =
+       CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+       if (enable_mtrr_cleanup != -1)
+               enable_mtrr_cleanup = 0;
+       return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+       if (enable_mtrr_cleanup != -1)
+               enable_mtrr_cleanup = 1;
+       return 0;
+}
+early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+
+struct var_mtrr_state {
+       unsigned long   range_startk;
+       unsigned long   range_sizek;
+       unsigned long   chunk_sizek;
+       unsigned long   gran_sizek;
+       unsigned int    reg;
+};
+
+static void __init
+set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+               unsigned char type, unsigned int address_bits)
+{
+       u32 base_lo, base_hi, mask_lo, mask_hi;
+       u64 base, mask;
+
+       if (!sizek) {
+               fill_mtrr_var_range(reg, 0, 0, 0, 0);
+               return;
+       }
+
+       mask = (1ULL << address_bits) - 1;
+       mask &= ~((((u64)sizek) << 10) - 1);
+
+       base  = ((u64)basek) << 10;
+
+       base |= type;
+       mask |= 0x800;
+
+       base_lo = base & ((1ULL<<32) - 1);
+       base_hi = base >> 32;
+
+       mask_lo = mask & ((1ULL<<32) - 1);
+       mask_hi = mask >> 32;
+
+       fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+}
+
+static void __init
+save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+               unsigned char type)
+{
+       range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+       range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+       range_state[reg].type = type;
+}
+
+static void __init
+set_var_mtrr_all(unsigned int address_bits)
+{
+       unsigned long basek, sizek;
+       unsigned char type;
+       unsigned int reg;
+
+       for (reg = 0; reg < num_var_ranges; reg++) {
+               basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+               sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+               type = range_state[reg].type;
+
+               set_var_mtrr(reg, basek, sizek, type, address_bits);
+       }
+}
+
+static unsigned int __init
+range_to_mtrr(unsigned int reg, unsigned long range_startk,
+             unsigned long range_sizek, unsigned char type)
+{
+       if (!range_sizek || (reg >= num_var_ranges))
+               return reg;
+
+       while (range_sizek) {
+               unsigned long max_align, align;
+               unsigned long sizek;
+
+               /* Compute the maximum size I can make a range */
+               if (range_startk)
+                       max_align = ffs(range_startk) - 1;
+               else
+                       max_align = 32;
+               align = fls(range_sizek) - 1;
+               if (align > max_align)
+                       align = max_align;
+
+               sizek = 1 << align;
+               if (debug_print)
+                       printk(KERN_DEBUG "Setting variable MTRR %d, "
+                               "base: %ldMB, range: %ldMB, type %s\n",
+                               reg, range_startk >> 10, sizek >> 10,
+                               (type == MTRR_TYPE_UNCACHABLE)?"UC":
+                                   ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+                               );
+               save_var_mtrr(reg++, range_startk, sizek, type);
+               range_startk += sizek;
+               range_sizek -= sizek;
+               if (reg >= num_var_ranges)
+                       break;
+       }
+       return reg;
+}
+
+static unsigned __init
+range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+                       unsigned long sizek)
+{
+       unsigned long hole_basek, hole_sizek;
+       unsigned long second_basek, second_sizek;
+       unsigned long range0_basek, range0_sizek;
+       unsigned long range_basek, range_sizek;
+       unsigned long chunk_sizek;
+       unsigned long gran_sizek;
+
+       hole_basek = 0;
+       hole_sizek = 0;
+       second_basek = 0;
+       second_sizek = 0;
+       chunk_sizek = state->chunk_sizek;
+       gran_sizek = state->gran_sizek;
+
+       /* align with gran size, prevent small block used up MTRRs */
+       range_basek = ALIGN(state->range_startk, gran_sizek);
+       if ((range_basek > basek) && basek)
+               return second_sizek;
+       state->range_sizek -= (range_basek - state->range_startk);
+       range_sizek = ALIGN(state->range_sizek, gran_sizek);
+
+       while (range_sizek > state->range_sizek) {
+               range_sizek -= gran_sizek;
+               if (!range_sizek)
+                       return 0;
+       }
+       state->range_sizek = range_sizek;
+
+       /* try to append some small hole */
+       range0_basek = state->range_startk;
+       range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+       if (range0_sizek == state->range_sizek) {
+               if (debug_print)
+                       printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
+                               range0_basek<<10,
+                               (range0_basek + state->range_sizek)<<10);
+               state->reg = range_to_mtrr(state->reg, range0_basek,
+                               state->range_sizek, MTRR_TYPE_WRBACK);
+               return 0;
+       }
+
+       range0_sizek -= chunk_sizek;
+       if (range0_sizek && sizek) {
+           while (range0_basek + range0_sizek > (basek + sizek)) {
+               range0_sizek -= chunk_sizek;
+               if (!range0_sizek)
+                       break;
+           }
+       }
+
+       if (range0_sizek) {
+               if (debug_print)
+                       printk(KERN_DEBUG "range0: %016lx - %016lx\n",
+                               range0_basek<<10,
+                               (range0_basek + range0_sizek)<<10);
+               state->reg = range_to_mtrr(state->reg, range0_basek,
+                               range0_sizek, MTRR_TYPE_WRBACK);
+
+       }
+
+       range_basek = range0_basek + range0_sizek;
+       range_sizek = chunk_sizek;
+
+       if (range_basek + range_sizek > basek &&
+           range_basek + range_sizek <= (basek + sizek)) {
+               /* one hole */
+               second_basek = basek;
+               second_sizek = range_basek + range_sizek - basek;
+       }
+
+       /* if last piece, only could one hole near end */
+       if ((second_basek || !basek) &&
+           range_sizek - (state->range_sizek - range0_sizek) - second_sizek <
+           (chunk_sizek >> 1)) {
+               /*
+                * one hole in middle (second_sizek is 0) or at end
+                * (second_sizek is 0 )
+                */
+               hole_sizek = range_sizek - (state->range_sizek - range0_sizek)
+                                - second_sizek;
+               hole_basek = range_basek + range_sizek - hole_sizek
+                                - second_sizek;
+       } else {
+               /* fallback for big hole, or several holes */
+               range_sizek = state->range_sizek - range0_sizek;
+               second_basek = 0;
+               second_sizek = 0;
+       }
+
+       if (debug_print)
+               printk(KERN_DEBUG "range: %016lx - %016lx\n", range_basek<<10,
+                        (range_basek + range_sizek)<<10);
+       state->reg = range_to_mtrr(state->reg, range_basek, range_sizek,
+                                        MTRR_TYPE_WRBACK);
+       if (hole_sizek) {
+               if (debug_print)
+                       printk(KERN_DEBUG "hole: %016lx - %016lx\n",
+                                hole_basek<<10, (hole_basek + hole_sizek)<<10);
+               state->reg = range_to_mtrr(state->reg, hole_basek, hole_sizek,
+                                                MTRR_TYPE_UNCACHABLE);
+
+       }
+
+       return second_sizek;
+}
+
+static void __init
+set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
+                  unsigned long size_pfn)
+{
+       unsigned long basek, sizek;
+       unsigned long second_sizek = 0;
+
+       if (state->reg >= num_var_ranges)
+               return;
+
+       basek = base_pfn << (PAGE_SHIFT - 10);
+       sizek = size_pfn << (PAGE_SHIFT - 10);
+
+       /* See if I can merge with the last range */
+       if ((basek <= 1024) ||
+           (state->range_startk + state->range_sizek == basek)) {
+               unsigned long endk = basek + sizek;
+               state->range_sizek = endk - state->range_startk;
+               return;
+       }
+       /* Write the range mtrrs */
+       if (state->range_sizek != 0)
+               second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
+
+       /* Allocate an msr */
+       state->range_startk = basek + second_sizek;
+       state->range_sizek  = sizek - second_sizek;
+}
+
+/* mininum size of mtrr block that can take hole */
+static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+
+static int __init parse_mtrr_chunk_size_opt(char *p)
+{
+       if (!p)
+               return -EINVAL;
+       mtrr_chunk_size = memparse(p, &p);
+       return 0;
+}
+early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+
+/* granity of mtrr of block */
+static u64 mtrr_gran_size __initdata;
+
+static int __init parse_mtrr_gran_size_opt(char *p)
+{
+       if (!p)
+               return -EINVAL;
+       mtrr_gran_size = memparse(p, &p);
+       return 0;
+}
+early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
+
+static int nr_mtrr_spare_reg __initdata =
+                                CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+
+static int __init parse_mtrr_spare_reg(char *arg)
+{
+       if (arg)
+               nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+       return 0;
+}
+
+early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+
+static int __init
+x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+                   u64 chunk_size, u64 gran_size)
+{
+       struct var_mtrr_state var_state;
+       int i;
+       int num_reg;
+
+       var_state.range_startk  = 0;
+       var_state.range_sizek   = 0;
+       var_state.reg           = 0;
+       var_state.chunk_sizek   = chunk_size >> 10;
+       var_state.gran_sizek    = gran_size >> 10;
+
+       memset(range_state, 0, sizeof(range_state));
+
+       /* Write the range etc */
+       for (i = 0; i < nr_range; i++)
+               set_var_mtrr_range(&var_state, range[i].start,
+                                  range[i].end - range[i].start + 1);
+
+       /* Write the last range */
+       if (var_state.range_sizek != 0)
+               range_to_mtrr_with_hole(&var_state, 0, 0);
+
+       num_reg = var_state.reg;
+       /* Clear out the extra MTRR's */
+       while (var_state.reg < num_var_ranges) {
+               save_var_mtrr(var_state.reg, 0, 0, 0);
+               var_state.reg++;
+       }
+
+       return num_reg;
+}
+
+struct mtrr_cleanup_result {
+       unsigned long gran_sizek;
+       unsigned long chunk_sizek;
+       unsigned long lose_cover_sizek;
+       unsigned int num_reg;
+       int bad;
+};
+
+/*
+ * gran_size: 1M, 2M, ..., 2G
+ * chunk size: gran_size, ..., 4G
+ * so we need (2+13)*6
+ */
+#define NUM_RESULT     90
+#define PSHIFT         (PAGE_SHIFT - 10)
+
+static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+static struct res_range __initdata range_new[RANGE_NUM];
+static unsigned long __initdata min_loss_pfn[RANGE_NUM];
+
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+       unsigned long extra_remove_base, extra_remove_size;
+       unsigned long i, base, size, def, dummy;
+       mtrr_type type;
+       int nr_range, nr_range_new;
+       u64 chunk_size, gran_size;
+       unsigned long range_sums, range_sums_new;
+       int index_good;
+       int num_reg_good;
+
+       /* extra one for all 0 */
+       int num[MTRR_NUM_TYPES + 1];
+
+       if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+               return 0;
+       rdmsr(MTRRdefType_MSR, def, dummy);
+       def &= 0xff;
+       if (def != MTRR_TYPE_UNCACHABLE)
+               return 0;
+
+       /* get it and store it aside */
+       memset(range_state, 0, sizeof(range_state));
+       for (i = 0; i < num_var_ranges; i++) {
+               mtrr_if->get(i, &base, &size, &type);
+               range_state[i].base_pfn = base;
+               range_state[i].size_pfn = size;
+               range_state[i].type = type;
+       }
+
+       /* check entries number */
+       memset(num, 0, sizeof(num));
+       for (i = 0; i < num_var_ranges; i++) {
+               type = range_state[i].type;
+               size = range_state[i].size_pfn;
+               if (type >= MTRR_NUM_TYPES)
+                       continue;
+               if (!size)
+                       type = MTRR_NUM_TYPES;
+               num[type]++;
+       }
+
+       /* check if we got UC entries */
+       if (!num[MTRR_TYPE_UNCACHABLE])
+               return 0;
+
+       /* check if we only had WB and UC */
+       if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+               num_var_ranges - num[MTRR_NUM_TYPES])
+               return 0;
+
+       memset(range, 0, sizeof(range));
+       extra_remove_size = 0;
+       if (mtrr_tom2) {
+               extra_remove_base = 1 << (32 - PAGE_SHIFT);
+               extra_remove_size =
+                       (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
+       }
+       nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+                                         extra_remove_size);
+       range_sums = sum_ranges(range, nr_range);
+       printk(KERN_INFO "total RAM coverred: %ldM\n",
+              range_sums >> (20 - PAGE_SHIFT));
+
+       if (mtrr_chunk_size && mtrr_gran_size) {
+               int num_reg;
+
+               debug_print = 1;
+               /* convert ranges to var ranges state */
+               num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size,
+                                             mtrr_gran_size);
+
+               /* we got new setting in range_state, check it */
+               memset(range_new, 0, sizeof(range_new));
+               nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+                                                     extra_remove_base,
+                                                     extra_remove_size);
+               range_sums_new = sum_ranges(range_new, nr_range_new);
+
+               i = 0;
+               result[i].chunk_sizek = mtrr_chunk_size >> 10;
+               result[i].gran_sizek = mtrr_gran_size >> 10;
+               result[i].num_reg = num_reg;
+               if (range_sums < range_sums_new) {
+                       result[i].lose_cover_sizek =
+                               (range_sums_new - range_sums) << PSHIFT;
+                       result[i].bad = 1;
+               } else
+                       result[i].lose_cover_sizek =
+                               (range_sums - range_sums_new) << PSHIFT;
+
+               printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
+                        result[i].bad?"*BAD*":" ", result[i].gran_sizek >> 10,
+                        result[i].chunk_sizek >> 10);
+               printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ldM \n",
+                        result[i].num_reg, result[i].bad?"-":"",
+                        result[i].lose_cover_sizek >> 10);
+               if (!result[i].bad) {
+                       set_var_mtrr_all(address_bits);
+                       return 1;
+               }
+               printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
+                      "will find optimal one\n");
+               debug_print = 0;
+               memset(result, 0, sizeof(result[0]));
+       }
+
+       i = 0;
+       memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
+       memset(result, 0, sizeof(result));
+       for (gran_size = (1ULL<<20); gran_size < (1ULL<<32); gran_size <<= 1) {
+               for (chunk_size = gran_size; chunk_size < (1ULL<<33);
+                    chunk_size <<= 1) {
+                       int num_reg;
+
+                       if (debug_print)
+                               printk(KERN_INFO
+                              "\ngran_size: %lldM   chunk_size_size: %lldM\n",
+                                      gran_size >> 20, chunk_size >> 20);
+                       if (i >= NUM_RESULT)
+                               continue;
+
+                       /* convert ranges to var ranges state */
+                       num_reg = x86_setup_var_mtrrs(range, nr_range,
+                                                        chunk_size, gran_size);
+
+                       /* we got new setting in range_state, check it */
+                       memset(range_new, 0, sizeof(range_new));
+                       nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+                                        extra_remove_base, extra_remove_size);
+                       range_sums_new = sum_ranges(range_new, nr_range_new);
+
+                       result[i].chunk_sizek = chunk_size >> 10;
+                       result[i].gran_sizek = gran_size >> 10;
+                       result[i].num_reg = num_reg;
+                       if (range_sums < range_sums_new) {
+                               result[i].lose_cover_sizek =
+                                       (range_sums_new - range_sums) << PSHIFT;
+                               result[i].bad = 1;
+                       } else
+                               result[i].lose_cover_sizek =
+                                       (range_sums - range_sums_new) << PSHIFT;
+
+                       /* double check it */
+                       if (!result[i].bad && !result[i].lose_cover_sizek) {
+                               if (nr_range_new != nr_range ||
+                                       memcmp(range, range_new, sizeof(range)))
+                                               result[i].bad = 1;
+                       }
+
+                       if (!result[i].bad && (range_sums - range_sums_new <
+                                              min_loss_pfn[num_reg])) {
+                               min_loss_pfn[num_reg] =
+                                       range_sums - range_sums_new;
+                       }
+                       i++;
+               }
+       }
+
+       /* print out all */
+       for (i = 0; i < NUM_RESULT; i++) {
+               printk(KERN_INFO "%sgran_size: %ldM \tchunk_size: %ldM \t",
+                      result[i].bad?"*BAD* ":" ", result[i].gran_sizek >> 10,
+                      result[i].chunk_sizek >> 10);
+               printk(KERN_CONT "num_reg: %d \tlose RAM: %s%ldM\n",
+                      result[i].num_reg, result[i].bad?"-":"",
+                      result[i].lose_cover_sizek >> 10);
+       }
+
+       /* try to find the optimal index */
+       if (nr_mtrr_spare_reg >= num_var_ranges)
+               nr_mtrr_spare_reg = num_var_ranges - 1;
+       num_reg_good = -1;
+       for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
+               if (!min_loss_pfn[i]) {
+                       num_reg_good = i;
+                       break;
+               }
+       }
+
+       index_good = -1;
+       if (num_reg_good != -1) {
+               for (i = 0; i < NUM_RESULT; i++) {
+                       if (!result[i].bad &&
+                           result[i].num_reg == num_reg_good &&
+                           !result[i].lose_cover_sizek) {
+                               index_good = i;
+                               break;
+                       }
+               }
+       }
+
+       if (index_good != -1) {
+               printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+               i = index_good;
+               printk(KERN_INFO "gran_size: %ldM \tchunk_size: %ldM \t",
+                               result[i].gran_sizek >> 10,
+                               result[i].chunk_sizek >> 10);
+               printk(KERN_CONT "num_reg: %d \tlose RAM: %ldM\n",
+                               result[i].num_reg,
+                               result[i].lose_cover_sizek >> 10);
+               /* convert ranges to var ranges state */
+               chunk_size = result[i].chunk_sizek;
+               chunk_size <<= 10;
+               gran_size = result[i].gran_sizek;
+               gran_size <<= 10;
+               debug_print = 1;
+               x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+               set_var_mtrr_all(address_bits);
+               return 1;
+       }
+
+       printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+       printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+
+       return 0;
+}
+#else
+static int __init mtrr_cleanup(unsigned address_bits)
+{
+       return 0;
+}
+#endif
+
+static int __initdata changed_by_mtrr_cleanup;
+
 static int disable_mtrr_trim;
 
 static int __init disable_mtrr_trim_setup(char *str)
@@ -648,6 +1430,19 @@ int __init amd_special_default_mtrr(void)
        return 0;
 }
 
+static u64 __init real_trim_memory(unsigned long start_pfn,
+                                  unsigned long limit_pfn)
+{
+       u64 trim_start, trim_size;
+       trim_start = start_pfn;
+       trim_start <<= PAGE_SHIFT;
+       trim_size = limit_pfn;
+       trim_size <<= PAGE_SHIFT;
+       trim_size -= trim_start;
+
+       return update_memory_range(trim_start, trim_size, E820_RAM,
+                               E820_RESERVED);
+}
 /**
  * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
  * @end_pfn: ending page frame number
@@ -663,8 +1458,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
 {
        unsigned long i, base, size, highest_pfn = 0, def, dummy;
        mtrr_type type;
-       u64 trim_start, trim_size;
+       int nr_range;
+       u64 total_trim_size;
 
+       /* extra one for all 0 */
+       int num[MTRR_NUM_TYPES + 1];
        /*
         * Make sure we only trim uncachable memory on machines that
         * support the Intel MTRR architecture:
@@ -676,14 +1474,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
        if (def != MTRR_TYPE_UNCACHABLE)
                return 0;
 
-       if (amd_special_default_mtrr())
-               return 0;
+       /* get it and store it aside */
+       memset(range_state, 0, sizeof(range_state));
+       for (i = 0; i < num_var_ranges; i++) {
+               mtrr_if->get(i, &base, &size, &type);
+               range_state[i].base_pfn = base;
+               range_state[i].size_pfn = size;
+               range_state[i].type = type;
+       }
 
        /* Find highest cached pfn */
        for (i = 0; i < num_var_ranges; i++) {
-               mtrr_if->get(i, &base, &size, &type);
+               type = range_state[i].type;
                if (type != MTRR_TYPE_WRBACK)
                        continue;
+               base = range_state[i].base_pfn;
+               size = range_state[i].size_pfn;
                if (highest_pfn < base + size)
                        highest_pfn = base + size;
        }
@@ -698,22 +1504,65 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
                return 0;
        }
 
-       if (highest_pfn < end_pfn) {
+       /* check entries number */
+       memset(num, 0, sizeof(num));
+       for (i = 0; i < num_var_ranges; i++) {
+               type = range_state[i].type;
+               if (type >= MTRR_NUM_TYPES)
+                       continue;
+               size = range_state[i].size_pfn;
+               if (!size)
+                       type = MTRR_NUM_TYPES;
+               num[type]++;
+       }
+
+       /* no entry for WB? */
+       if (!num[MTRR_TYPE_WRBACK])
+               return 0;
+
+       /* check if we only had WB and UC */
+       if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+               num_var_ranges - num[MTRR_NUM_TYPES])
+               return 0;
+
+       memset(range, 0, sizeof(range));
+       nr_range = 0;
+       if (mtrr_tom2) {
+               range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
+               range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
+               if (highest_pfn < range[nr_range].end + 1)
+                       highest_pfn = range[nr_range].end + 1;
+               nr_range++;
+       }
+       nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
+
+       total_trim_size = 0;
+       /* check the head */
+       if (range[0].start)
+               total_trim_size += real_trim_memory(0, range[0].start);
+       /* check the holes */
+       for (i = 0; i < nr_range - 1; i++) {
+               if (range[i].end + 1 < range[i+1].start)
+                       total_trim_size += real_trim_memory(range[i].end + 1,
+                                                           range[i+1].start);
+       }
+       /* check the top */
+       i = nr_range - 1;
+       if (range[i].end + 1 < end_pfn)
+               total_trim_size += real_trim_memory(range[i].end + 1,
+                                                        end_pfn);
+
+       if (total_trim_size) {
                printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
-                       " all of memory, losing %luMB of RAM.\n",
-                       (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT));
+                       " all of memory, losing %lluMB of RAM.\n",
+                       total_trim_size >> 20);
 
-               WARN_ON(1);
+               if (!changed_by_mtrr_cleanup)
+                       WARN_ON(1);
 
                printk(KERN_INFO "update e820 for mtrr\n");
-               trim_start = highest_pfn;
-               trim_start <<= PAGE_SHIFT;
-               trim_size = end_pfn;
-               trim_size <<= PAGE_SHIFT;
-               trim_size -= trim_start;
-               update_memory_range(trim_start, trim_size, E820_RAM,
-                                       E820_RESERVED);
                update_e820();
+
                return 1;
        }
 
@@ -729,18 +1578,21 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
  */
 void __init mtrr_bp_init(void)
 {
+       u32 phys_addr;
        init_ifs();
 
+       phys_addr = 32;
+
        if (cpu_has_mtrr) {
                mtrr_if = &generic_mtrr_ops;
                size_or_mask = 0xff000000;      /* 36 bits */
                size_and_mask = 0x00f00000;
+               phys_addr = 36;
 
                /* This is an AMD specific MSR, but we assume(hope?) that
                   Intel will implement it to when they extend the address
                   bus of the Xeon. */
                if (cpuid_eax(0x80000000) >= 0x80000008) {
-                       u32 phys_addr;
                        phys_addr = cpuid_eax(0x80000008) & 0xff;
                        /* CPUID workaround for Intel 0F33/0F34 CPU */
                        if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
@@ -758,6 +1610,7 @@ void __init mtrr_bp_init(void)
                           don't support PAE */
                        size_or_mask = 0xfff00000;      /* 32 bits */
                        size_and_mask = 0;
+                       phys_addr = 32;
                }
        } else {
                switch (boot_cpu_data.x86_vendor) {
@@ -791,8 +1644,15 @@ void __init mtrr_bp_init(void)
        if (mtrr_if) {
                set_num_var_ranges();
                init_table();
-               if (use_intel())
+               if (use_intel()) {
                        get_mtrr_state();
+
+                       if (mtrr_cleanup(phys_addr)) {
+                               changed_by_mtrr_cleanup = 1;
+                               mtrr_if->set_all();
+                       }
+
+               }
        }
 }
 
@@ -829,9 +1689,10 @@ static int __init mtrr_init_finialize(void)
 {
        if (!mtrr_if)
                return 0;
-       if (use_intel())
-               mtrr_state_warn();
-       else {
+       if (use_intel()) {
+               if (!changed_by_mtrr_cleanup)
+                       mtrr_state_warn();
+       } else {
                /* The CPUs haven't MTRR and seem to not support SMP. They have
                 * specific drivers, we use a tricky method to support
                 * suspend/resume for them.
index 2cc77eb6fea36967f7defdbae74359ebaac14ebb..2dc4ec656b23c22c9d3806cd57746d4c834b5166 100644 (file)
@@ -81,6 +81,8 @@ void set_mtrr_done(struct set_mtrr_context *ctxt);
 void set_mtrr_cache_disable(struct set_mtrr_context *ctxt);
 void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
 
+void fill_mtrr_var_range(unsigned int index,
+               u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
 void get_mtrr_state(void);
 
 extern void set_mtrr_ops(struct mtrr_ops * ops);
@@ -92,6 +94,7 @@ extern struct mtrr_ops * mtrr_if;
 #define use_intel()    (mtrr_if && mtrr_if->use_intel_if == 1)
 
 extern unsigned int num_var_ranges;
+extern u64 mtrr_tom2;
 
 void mtrr_state_warn(void);
 const char *mtrr_attrib_to_str(int x);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
new file mode 100644 (file)
index 0000000..a706e90
--- /dev/null
@@ -0,0 +1,896 @@
+/*
+ * Handle the memory map.
+ * The functions here do the job until bootmem takes over.
+ *
+ *  Getting sanitize_e820_map() in sync with i386 version by applying change:
+ *  -  Provisions for empty E820 memory regions (reported by certain BIOSes).
+ *     Alex Achenbach <xela@slit.de>, December 2002.
+ *  Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/ioport.h>
+#include <linux/string.h>
+#include <linux/kexec.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/pfn.h>
+#include <linux/suspend.h>
+
+#include <asm/pgtable.h>
+#include <asm/page.h>
+#include <asm/e820.h>
+#include <asm/proto.h>
+#include <asm/setup.h>
+#include <asm/trampoline.h>
+
+struct e820map e820;
+
+/* For PCI or other memory-mapped resources */
+unsigned long pci_mem_start = 0xaeedbabe;
+#ifdef CONFIG_PCI
+EXPORT_SYMBOL(pci_mem_start);
+#endif
+
+/*
+ * This function checks if any part of the range <start,end> is mapped
+ * with type.
+ */
+int
+e820_any_mapped(u64 start, u64 end, unsigned type)
+{
+       int i;
+
+       for (i = 0; i < e820.nr_map; i++) {
+               struct e820entry *ei = &e820.map[i];
+
+               if (type && ei->type != type)
+                       continue;
+               if (ei->addr >= end || ei->addr + ei->size <= start)
+                       continue;
+               return 1;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(e820_any_mapped);
+
+/*
+ * This function checks if the entire range <start,end> is mapped with type.
+ *
+ * Note: this function only works correct if the e820 table is sorted and
+ * not-overlapping, which is the case
+ */
+int __init e820_all_mapped(u64 start, u64 end, unsigned type)
+{
+       int i;
+
+       for (i = 0; i < e820.nr_map; i++) {
+               struct e820entry *ei = &e820.map[i];
+
+               if (type && ei->type != type)
+                       continue;
+               /* is the region (part) in overlap with the current region ?*/
+               if (ei->addr >= end || ei->addr + ei->size <= start)
+                       continue;
+
+               /* if the region is at the beginning of <start,end> we move
+                * start to the end of the region since it's ok until there
+                */
+               if (ei->addr <= start)
+                       start = ei->addr + ei->size;
+               /*
+                * if start is now at or beyond end, we're done, full
+                * coverage
+                */
+               if (start >= end)
+                       return 1;
+       }
+       return 0;
+}
+
+/*
+ * Add a memory region to the kernel e820 map.
+ */
+void __init add_memory_region(u64 start, u64 size, int type)
+{
+       int x = e820.nr_map;
+
+       if (x == ARRAY_SIZE(e820.map)) {
+               printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+               return;
+       }
+
+       e820.map[x].addr = start;
+       e820.map[x].size = size;
+       e820.map[x].type = type;
+       e820.nr_map++;
+}
+
+void __init e820_print_map(char *who)
+{
+       int i;
+
+       for (i = 0; i < e820.nr_map; i++) {
+               printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
+                      (unsigned long long) e820.map[i].addr,
+                      (unsigned long long)
+                      (e820.map[i].addr + e820.map[i].size));
+               switch (e820.map[i].type) {
+               case E820_RAM:
+                       printk(KERN_CONT "(usable)\n");
+                       break;
+               case E820_RESERVED:
+                       printk(KERN_CONT "(reserved)\n");
+                       break;
+               case E820_ACPI:
+                       printk(KERN_CONT "(ACPI data)\n");
+                       break;
+               case E820_NVS:
+                       printk(KERN_CONT "(ACPI NVS)\n");
+                       break;
+               default:
+                       printk(KERN_CONT "type %u\n", e820.map[i].type);
+                       break;
+               }
+       }
+}
+
+/*
+ * Sanitize the BIOS e820 map.
+ *
+ * Some e820 responses include overlapping entries. The following
+ * replaces the original e820 map with a new one, removing overlaps,
+ * and resolving conflicting memory types in favor of highest
+ * numbered type.
+ *
+ * The input parameter biosmap points to an array of 'struct
+ * e820entry' which on entry has elements in the range [0, *pnr_map)
+ * valid, and which has space for up to max_nr_map entries.
+ * On return, the resulting sanitized e820 map entries will be in
+ * overwritten in the same location, starting at biosmap.
+ *
+ * The integer pointed to by pnr_map must be valid on entry (the
+ * current number of valid entries located at biosmap) and will
+ * be updated on return, with the new number of valid entries
+ * (something no more than max_nr_map.)
+ *
+ * The return value from sanitize_e820_map() is zero if it
+ * successfully 'sanitized' the map entries passed in, and is -1
+ * if it did nothing, which can happen if either of (1) it was
+ * only passed one map entry, or (2) any of the input map entries
+ * were invalid (start + size < start, meaning that the size was
+ * so big the described memory range wrapped around through zero.)
+ *
+ *     Visually we're performing the following
+ *     (1,2,3,4 = memory types)...
+ *
+ *     Sample memory map (w/overlaps):
+ *        ____22__________________
+ *        ______________________4_
+ *        ____1111________________
+ *        _44_____________________
+ *        11111111________________
+ *        ____________________33__
+ *        ___________44___________
+ *        __________33333_________
+ *        ______________22________
+ *        ___________________2222_
+ *        _________111111111______
+ *        _____________________11_
+ *        _________________4______
+ *
+ *     Sanitized equivalent (no overlap):
+ *        1_______________________
+ *        _44_____________________
+ *        ___1____________________
+ *        ____22__________________
+ *        ______11________________
+ *        _________1______________
+ *        __________3_____________
+ *        ___________44___________
+ *        _____________33_________
+ *        _______________2________
+ *        ________________1_______
+ *        _________________4______
+ *        ___________________2____
+ *        ____________________33__
+ *        ______________________4_
+ */
+
+int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
+                               int *pnr_map)
+{
+       struct change_member {
+               struct e820entry *pbios; /* pointer to original bios entry */
+               unsigned long long addr; /* address for this change point */
+       };
+static struct change_member change_point_list[2*E820_X_MAX] __initdata;
+static struct change_member *change_point[2*E820_X_MAX] __initdata;
+static struct e820entry *overlap_list[E820_X_MAX] __initdata;
+static struct e820entry new_bios[E820_X_MAX] __initdata;
+       struct change_member *change_tmp;
+       unsigned long current_type, last_type;
+       unsigned long long last_addr;
+       int chgidx, still_changing;
+       int overlap_entries;
+       int new_bios_entry;
+       int old_nr, new_nr, chg_nr;
+       int i;
+
+       /* if there's only one memory region, don't bother */
+       if (*pnr_map < 2)
+               return -1;
+
+       old_nr = *pnr_map;
+       BUG_ON(old_nr > max_nr_map);
+
+       /* bail out if we find any unreasonable addresses in bios map */
+       for (i = 0; i < old_nr; i++)
+               if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
+                       return -1;
+
+       /* create pointers for initial change-point information (for sorting) */
+       for (i = 0; i < 2 * old_nr; i++)
+               change_point[i] = &change_point_list[i];
+
+       /* record all known change-points (starting and ending addresses),
+          omitting those that are for empty memory regions */
+       chgidx = 0;
+       for (i = 0; i < old_nr; i++)    {
+               if (biosmap[i].size != 0) {
+                       change_point[chgidx]->addr = biosmap[i].addr;
+                       change_point[chgidx++]->pbios = &biosmap[i];
+                       change_point[chgidx]->addr = biosmap[i].addr +
+                               biosmap[i].size;
+                       change_point[chgidx++]->pbios = &biosmap[i];
+               }
+       }
+       chg_nr = chgidx;
+
+       /* sort change-point list by memory addresses (low -> high) */
+       still_changing = 1;
+       while (still_changing)  {
+               still_changing = 0;
+               for (i = 1; i < chg_nr; i++)  {
+                       unsigned long long curaddr, lastaddr;
+                       unsigned long long curpbaddr, lastpbaddr;
+
+                       curaddr = change_point[i]->addr;
+                       lastaddr = change_point[i - 1]->addr;
+                       curpbaddr = change_point[i]->pbios->addr;
+                       lastpbaddr = change_point[i - 1]->pbios->addr;
+
+                       /*
+                        * swap entries, when:
+                        *
+                        * curaddr > lastaddr or
+                        * curaddr == lastaddr and curaddr == curpbaddr and
+                        * lastaddr != lastpbaddr
+                        */
+                       if (curaddr < lastaddr ||
+                           (curaddr == lastaddr && curaddr == curpbaddr &&
+                            lastaddr != lastpbaddr)) {
+                               change_tmp = change_point[i];
+                               change_point[i] = change_point[i-1];
+                               change_point[i-1] = change_tmp;
+                               still_changing = 1;
+                       }
+               }
+       }
+
+       /* create a new bios memory map, removing overlaps */
+       overlap_entries = 0;     /* number of entries in the overlap table */
+       new_bios_entry = 0;      /* index for creating new bios map entries */
+       last_type = 0;           /* start with undefined memory type */
+       last_addr = 0;           /* start with 0 as last starting address */
+
+       /* loop through change-points, determining affect on the new bios map */
+       for (chgidx = 0; chgidx < chg_nr; chgidx++) {
+               /* keep track of all overlapping bios entries */
+               if (change_point[chgidx]->addr ==
+                   change_point[chgidx]->pbios->addr) {
+                       /*
+                        * add map entry to overlap list (> 1 entry
+                        * implies an overlap)
+                        */
+                       overlap_list[overlap_entries++] =
+                               change_point[chgidx]->pbios;
+               } else {
+                       /*
+                        * remove entry from list (order independent,
+                        * so swap with last)
+                        */
+                       for (i = 0; i < overlap_entries; i++) {
+                               if (overlap_list[i] ==
+                                   change_point[chgidx]->pbios)
+                                       overlap_list[i] =
+                                               overlap_list[overlap_entries-1];
+                       }
+                       overlap_entries--;
+               }
+               /*
+                * if there are overlapping entries, decide which
+                * "type" to use (larger value takes precedence --
+                * 1=usable, 2,3,4,4+=unusable)
+                */
+               current_type = 0;
+               for (i = 0; i < overlap_entries; i++)
+                       if (overlap_list[i]->type > current_type)
+                               current_type = overlap_list[i]->type;
+               /*
+                * continue building up new bios map based on this
+                * information
+                */
+               if (current_type != last_type)  {
+                       if (last_type != 0)      {
+                               new_bios[new_bios_entry].size =
+                                       change_point[chgidx]->addr - last_addr;
+                               /*
+                                * move forward only if the new size
+                                * was non-zero
+                                */
+                               if (new_bios[new_bios_entry].size != 0)
+                                       /*
+                                        * no more space left for new
+                                        * bios entries ?
+                                        */
+                                       if (++new_bios_entry >= max_nr_map)
+                                               break;
+                       }
+                       if (current_type != 0)  {
+                               new_bios[new_bios_entry].addr =
+                                       change_point[chgidx]->addr;
+                               new_bios[new_bios_entry].type = current_type;
+                               last_addr = change_point[chgidx]->addr;
+                       }
+                       last_type = current_type;
+               }
+       }
+       /* retain count for new bios entries */
+       new_nr = new_bios_entry;
+
+       /* copy new bios mapping into original location */
+       memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
+       *pnr_map = new_nr;
+
+       return 0;
+}
+
+/*
+ * Copy the BIOS e820 map into a safe place.
+ *
+ * Sanity-check it while we're at it..
+ *
+ * If we're lucky and live on a modern system, the setup code
+ * will have given us a memory map that we can use to properly
+ * set up memory.  If we aren't, we'll fake a memory map.
+ */
+int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
+{
+       /* Only one memory region (or negative)? Ignore it */
+       if (nr_map < 2)
+               return -1;
+
+       do {
+               u64 start = biosmap->addr;
+               u64 size = biosmap->size;
+               u64 end = start + size;
+               u32 type = biosmap->type;
+
+               /* Overflow in 64 bits? Ignore the memory map. */
+               if (start > end)
+                       return -1;
+
+               add_memory_region(start, size, type);
+       } while (biosmap++, --nr_map);
+       return 0;
+}
+
+u64 __init update_memory_range(u64 start, u64 size, unsigned old_type,
+                               unsigned new_type)
+{
+       int i;
+       u64 real_updated_size = 0;
+
+       BUG_ON(old_type == new_type);
+
+       for (i = 0; i < e820.nr_map; i++) {
+               struct e820entry *ei = &e820.map[i];
+               u64 final_start, final_end;
+               if (ei->type != old_type)
+                       continue;
+               /* totally covered? */
+               if (ei->addr >= start &&
+                   (ei->addr + ei->size) <= (start + size)) {
+                       ei->type = new_type;
+                       real_updated_size += ei->size;
+                       continue;
+               }
+               /* partially covered */
+               final_start = max(start, ei->addr);
+               final_end = min(start + size, ei->addr + ei->size);
+               if (final_start >= final_end)
+                       continue;
+               add_memory_region(final_start, final_end - final_start,
+                                        new_type);
+               real_updated_size += final_end - final_start;
+       }
+       return real_updated_size;
+}
+
+void __init update_e820(void)
+{
+       int nr_map;
+
+       nr_map = e820.nr_map;
+       if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
+               return;
+       e820.nr_map = nr_map;
+       printk(KERN_INFO "modified physical RAM map:\n");
+       e820_print_map("modified");
+}
+
+/*
+ * Search for the biggest gap in the low 32 bits of the e820
+ * memory space.  We pass this space to PCI to assign MMIO resources
+ * for hotplug or unconfigured devices in.
+ * Hopefully the BIOS let enough space left.
+ */
+__init void e820_setup_gap(void)
+{
+       unsigned long gapstart, gapsize, round;
+       unsigned long long last;
+       int i;
+       int found = 0;
+
+       last = 0x100000000ull;
+       gapstart = 0x10000000;
+       gapsize = 0x400000;
+       i = e820.nr_map;
+       while (--i >= 0) {
+               unsigned long long start = e820.map[i].addr;
+               unsigned long long end = start + e820.map[i].size;
+
+               /*
+                * Since "last" is at most 4GB, we know we'll
+                * fit in 32 bits if this condition is true
+                */
+               if (last > end) {
+                       unsigned long gap = last - end;
+
+                       if (gap > gapsize) {
+                               gapsize = gap;
+                               gapstart = end;
+                               found = 1;
+                       }
+               }
+               if (start < last)
+                       last = start;
+       }
+
+#ifdef CONFIG_X86_64
+       if (!found) {
+               gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
+               printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
+                      "address range\n"
+                      KERN_ERR "PCI: Unassigned devices with 32bit resource "
+                      "registers may break!\n");
+       }
+#endif
+
+       /*
+        * See how much we want to round up: start off with
+        * rounding to the next 1MB area.
+        */
+       round = 0x100000;
+       while ((gapsize >> 4) > round)
+               round += round;
+       /* Fun with two's complement */
+       pci_mem_start = (gapstart + round) & -round;
+
+       printk(KERN_INFO
+              "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
+              pci_mem_start, gapstart, gapsize);
+}
+
+#if defined(CONFIG_X86_64) || \
+       (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
+/**
+ * Find the ranges of physical addresses that do not correspond to
+ * e820 RAM areas and mark the corresponding pages as nosave for
+ * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
+ *
+ * This function requires the e820 map to be sorted and without any
+ * overlapping entries and assumes the first e820 area to be RAM.
+ */
+void __init e820_mark_nosave_regions(unsigned long limit_pfn)
+{
+       int i;
+       unsigned long pfn;
+
+       pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
+       for (i = 1; i < e820.nr_map; i++) {
+               struct e820entry *ei = &e820.map[i];
+
+               if (pfn < PFN_UP(ei->addr))
+                       register_nosave_region(pfn, PFN_UP(ei->addr));
+
+               pfn = PFN_DOWN(ei->addr + ei->size);
+               if (ei->type != E820_RAM)
+                       register_nosave_region(PFN_UP(ei->addr), pfn);
+
+               if (pfn >= limit_pfn)
+                       break;
+       }
+}
+#endif
+
+/*
+ * Early reserved memory areas.
+ */
+#define MAX_EARLY_RES 20
+
+struct early_res {
+       u64 start, end;
+       char name[16];
+};
+static struct early_res early_res[MAX_EARLY_RES] __initdata = {
+       { 0, PAGE_SIZE, "BIOS data page" },     /* BIOS data page */
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
+       { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
+#endif
+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
+       /*
+        * But first pinch a few for the stack/trampoline stuff
+        * FIXME: Don't need the extra page at 4K, but need to fix
+        * trampoline before removing it. (see the GDT stuff)
+        */
+       { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
+       /*
+        * Has to be in very low memory so we can execute
+        * real-mode AP code.
+        */
+       { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
+#endif
+       {}
+};
+
+static int __init find_overlapped_early(u64 start, u64 end)
+{
+       int i;
+       struct early_res *r;
+
+       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+               r = &early_res[i];
+               if (end > r->start && start < r->end)
+                       break;
+       }
+
+       return i;
+}
+
+void __init reserve_early(u64 start, u64 end, char *name)
+{
+       int i;
+       struct early_res *r;
+
+       i = find_overlapped_early(start, end);
+       if (i >= MAX_EARLY_RES)
+               panic("Too many early reservations");
+       r = &early_res[i];
+       if (r->end)
+               panic("Overlapping early reservations "
+                     "%llx-%llx %s to %llx-%llx %s\n",
+                     start, end - 1, name?name:"", r->start,
+                     r->end - 1, r->name);
+       r->start = start;
+       r->end = end;
+       if (name)
+               strncpy(r->name, name, sizeof(r->name) - 1);
+}
+
+void __init free_early(u64 start, u64 end)
+{
+       struct early_res *r;
+       int i, j;
+
+       i = find_overlapped_early(start, end);
+       r = &early_res[i];
+       if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
+               panic("free_early on not reserved area: %llx-%llx!",
+                        start, end - 1);
+
+       for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
+               ;
+
+       memmove(&early_res[i], &early_res[i + 1],
+              (j - 1 - i) * sizeof(struct early_res));
+
+       early_res[j - 1].end = 0;
+}
+
+int __init page_is_reserved_early(unsigned long pagenr)
+{
+       u64 start = (u64)pagenr << PAGE_SHIFT;
+       int i;
+       struct early_res *r;
+
+       i = find_overlapped_early(start, start + PAGE_SIZE);
+       r = &early_res[i];
+       return (i < MAX_EARLY_RES && r->end);
+}
+
+void __init early_res_to_bootmem(u64 start, u64 end)
+{
+       int i;
+       u64 final_start, final_end;
+       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+               struct early_res *r = &early_res[i];
+               final_start = max(start, r->start);
+               final_end = min(end, r->end);
+               if (final_start >= final_end)
+                       continue;
+               printk(KERN_INFO "  early res: %d [%llx-%llx] %s\n", i,
+                       final_start, final_end - 1, r->name);
+#ifdef CONFIG_X86_64
+               reserve_bootmem_generic(final_start, final_end - final_start);
+#else
+               reserve_bootmem(final_start, final_end - final_start,
+                               BOOTMEM_DEFAULT);
+#endif
+       }
+}
+
+/* Check for already reserved areas */
+static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
+{
+       int i;
+       u64 addr = *addrp;
+       int changed = 0;
+       struct early_res *r;
+again:
+       i = find_overlapped_early(addr, addr + size);
+       r = &early_res[i];
+       if (i < MAX_EARLY_RES && r->end) {
+               *addrp = addr = round_up(r->end, align);
+               changed = 1;
+               goto again;
+       }
+       return changed;
+}
+
+/* Check for already reserved areas */
+static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
+{
+       int i;
+       u64 addr = *addrp, last;
+       u64 size = *sizep;
+       int changed = 0;
+again:
+       last = addr + size;
+       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
+               struct early_res *r = &early_res[i];
+               if (last > r->start && addr < r->start) {
+                       size = r->start - addr;
+                       changed = 1;
+                       goto again;
+               }
+               if (last > r->end && addr < r->end) {
+                       addr = round_up(r->end, align);
+                       size = last - addr;
+                       changed = 1;
+                       goto again;
+               }
+               if (last <= r->end && addr >= r->start) {
+                       (*sizep)++;
+                       return 0;
+               }
+       }
+       if (changed) {
+               *addrp = addr;
+               *sizep = size;
+       }
+       return changed;
+}
+
+/*
+ * Find a free area with specified alignment in a specific range.
+ */
+u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
+{
+       int i;
+
+       for (i = 0; i < e820.nr_map; i++) {
+               struct e820entry *ei = &e820.map[i];
+               u64 addr, last;
+               u64 ei_last;
+
+               if (ei->type != E820_RAM)
+                       continue;
+               addr = round_up(ei->addr, align);
+               ei_last = ei->addr + ei->size;
+               if (addr < start)
+                       addr = round_up(start, align);
+               if (addr >= ei_last)
+                       continue;
+               while (bad_addr(&addr, size, align) && addr+size <= ei_last)
+                       ;
+               last = addr + size;
+               if (last > ei_last)
+                       continue;
+               if (last > end)
+                       continue;
+               return addr;
+       }
+       return -1ULL;
+}
+
+/*
+ * Find next free range after *start
+ */
+u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
+{
+       int i;
+
+       for (i = 0; i < e820.nr_map; i++) {
+               struct e820entry *ei = &e820.map[i];
+               u64 addr, last;
+               u64 ei_last;
+
+               if (ei->type != E820_RAM)
+                       continue;
+               addr = round_up(ei->addr, align);
+               ei_last = ei->addr + ei->size;
+               if (addr < start)
+                       addr = round_up(start, align);
+               if (addr >= ei_last)
+                       continue;
+               *sizep = ei_last - addr;
+               while (bad_addr_size(&addr, sizep, align) &&
+                       addr + *sizep <= ei_last)
+                       ;
+               last = addr + *sizep;
+               if (last > ei_last)
+                       continue;
+               return addr;
+       }
+       return -1UL;
+
+}
+
+/*
+ * pre allocated 4k and reserved it in e820
+ */
+u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
+{
+       u64 size = 0;
+       u64 addr;
+       u64 start;
+
+       start = startt;
+       while (size < sizet)
+               start = find_e820_area_size(start, &size, align);
+
+       if (size < sizet)
+               return 0;
+
+       addr = round_down(start + size - sizet, align);
+       update_memory_range(addr, sizet, E820_RAM, E820_RESERVED);
+       printk(KERN_INFO "update e820 for early_reserve_e820\n");
+       update_e820();
+
+       return addr;
+}
+
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_X86_PAE
+#  define MAX_ARCH_PFN         (1ULL<<(36-PAGE_SHIFT))
+# else
+#  define MAX_ARCH_PFN         (1ULL<<(32-PAGE_SHIFT))
+# endif
+#else /* CONFIG_X86_32 */
+# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
+#endif
+
+/*
+ * Last pfn which the user wants to use.
+ */
+unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
+
+/*
+ * Find the highest page frame number we have available
+ */
+unsigned long __init e820_end_of_ram(void)
+{
+       unsigned long last_pfn;
+       unsigned long max_arch_pfn = MAX_ARCH_PFN;
+
+       last_pfn = find_max_pfn_with_active_regions();
+
+       if (last_pfn > max_arch_pfn)
+               last_pfn = max_arch_pfn;
+       if (last_pfn > end_user_pfn)
+               last_pfn = end_user_pfn;
+
+       printk(KERN_INFO "last_pfn = %lu max_arch_pfn = %lu\n",
+                        last_pfn, max_arch_pfn);
+       return last_pfn;
+}
+
+/*
+ * Finds an active region in the address range from start_pfn to last_pfn and
+ * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
+ */
+int __init e820_find_active_region(const struct e820entry *ei,
+                                 unsigned long start_pfn,
+                                 unsigned long last_pfn,
+                                 unsigned long *ei_startpfn,
+                                 unsigned long *ei_endpfn)
+{
+       u64 align = PAGE_SIZE;
+
+       *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
+       *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
+
+       /* Skip map entries smaller than a page */
+       if (*ei_startpfn >= *ei_endpfn)
+               return 0;
+
+       /* Skip if map is outside the node */
+       if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
+                                   *ei_startpfn >= last_pfn)
+               return 0;
+
+       /* Check for overlaps */
+       if (*ei_startpfn < start_pfn)
+               *ei_startpfn = start_pfn;
+       if (*ei_endpfn > last_pfn)
+               *ei_endpfn = last_pfn;
+
+       /* Obey end_user_pfn to save on memmap */
+       if (*ei_startpfn >= end_user_pfn)
+               return 0;
+       if (*ei_endpfn > end_user_pfn)
+               *ei_endpfn = end_user_pfn;
+
+       return 1;
+}
+
+/* Walk the e820 map and register active regions within a node */
+void __init e820_register_active_regions(int nid, unsigned long start_pfn,
+                                        unsigned long last_pfn)
+{
+       unsigned long ei_startpfn;
+       unsigned long ei_endpfn;
+       int i;
+
+       for (i = 0; i < e820.nr_map; i++)
+               if (e820_find_active_region(&e820.map[i],
+                                           start_pfn, last_pfn,
+                                           &ei_startpfn, &ei_endpfn))
+                       add_active_range(nid, ei_startpfn, ei_endpfn);
+}
+
+/*
+ * Find the hole size (in bytes) in the memory range.
+ * @start: starting address of the memory range to scan
+ * @end: ending address of the memory range to scan
+ */
+u64 __init e820_hole_size(u64 start, u64 end)
+{
+       unsigned long start_pfn = start >> PAGE_SHIFT;
+       unsigned long last_pfn = end >> PAGE_SHIFT;
+       unsigned long ei_startpfn, ei_endpfn, ram = 0;
+       int i;
+
+       for (i = 0; i < e820.nr_map; i++) {
+               if (e820_find_active_region(&e820.map[i],
+                                           start_pfn, last_pfn,
+                                           &ei_startpfn, &ei_endpfn))
+                       ram += ei_endpfn - ei_startpfn;
+       }
+       return end - start - ((u64)ram << PAGE_SHIFT);
+}
index ed733e7cf4e611c454f9ea622c9322919aa624b4..e8a3b968c9faf653a29ebf2ef649afdc90f283f4 100644 (file)
@@ -9,29 +9,12 @@
 #include <linux/mm.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
-#include <linux/suspend.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
 #include <asm/e820.h>
 #include <asm/setup.h>
 
-struct e820map e820;
-struct change_member {
-       struct e820entry *pbios; /* pointer to original bios entry */
-       unsigned long long addr; /* address for this change point */
-};
-static struct change_member change_point_list[2*E820MAX] __initdata;
-static struct change_member *change_point[2*E820MAX] __initdata;
-static struct e820entry *overlap_list[E820MAX] __initdata;
-static struct e820entry new_bios[E820MAX] __initdata;
-/* For PCI or other memory-mapped resources */
-unsigned long pci_mem_start = 0x10000000;
-#ifdef CONFIG_PCI
-EXPORT_SYMBOL(pci_mem_start);
-#endif
-extern int user_defined_memmap;
-
 static struct resource system_rom_resource = {
        .name   = "System ROM",
        .start  = 0xf0000,
@@ -224,398 +207,12 @@ void __init init_iomem_resources(struct resource *code_resource,
        }
 }
 
-#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
-/**
- * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
- * correspond to e820 RAM areas and mark the corresponding pages as nosave for
- * hibernation.
- *
- * This function requires the e820 map to be sorted and without any
- * overlapping entries and assumes the first e820 area to be RAM.
- */
-void __init e820_mark_nosave_regions(void)
-{
-       int i;
-       unsigned long pfn;
-
-       pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
-       for (i = 1; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-
-               if (pfn < PFN_UP(ei->addr))
-                       register_nosave_region(pfn, PFN_UP(ei->addr));
-
-               pfn = PFN_DOWN(ei->addr + ei->size);
-               if (ei->type != E820_RAM)
-                       register_nosave_region(PFN_UP(ei->addr), pfn);
-
-               if (pfn >= max_low_pfn)
-                       break;
-       }
-}
-#endif
-
-void __init add_memory_region(unsigned long long start,
-                             unsigned long long size, int type)
-{
-       int x;
-
-       x = e820.nr_map;
-
-       if (x == E820MAX) {
-               printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
-               return;
-       }
-
-       e820.map[x].addr = start;
-       e820.map[x].size = size;
-       e820.map[x].type = type;
-       e820.nr_map++;
-} /* add_memory_region */
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries.  The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
-{
-       struct change_member *change_tmp;
-       unsigned long current_type, last_type;
-       unsigned long long last_addr;
-       int chgidx, still_changing;
-       int overlap_entries;
-       int new_bios_entry;
-       int old_nr, new_nr, chg_nr;
-       int i;
-
-       /*
-               Visually we're performing the following (1,2,3,4 = memory types)...
-
-               Sample memory map (w/overlaps):
-                  ____22__________________
-                  ______________________4_
-                  ____1111________________
-                  _44_____________________
-                  11111111________________
-                  ____________________33__
-                  ___________44___________
-                  __________33333_________
-                  ______________22________
-                  ___________________2222_
-                  _________111111111______
-                  _____________________11_
-                  _________________4______
-
-               Sanitized equivalent (no overlap):
-                  1_______________________
-                  _44_____________________
-                  ___1____________________
-                  ____22__________________
-                  ______11________________
-                  _________1______________
-                  __________3_____________
-                  ___________44___________
-                  _____________33_________
-                  _______________2________
-                  ________________1_______
-                  _________________4______
-                  ___________________2____
-                  ____________________33__
-                  ______________________4_
-       */
-       /* if there's only one memory region, don't bother */
-       if (*pnr_map < 2) {
-               return -1;
-       }
-
-       old_nr = *pnr_map;
-
-       /* bail out if we find any unreasonable addresses in bios map */
-       for (i=0; i<old_nr; i++)
-               if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
-                       return -1;
-               }
-
-       /* create pointers for initial change-point information (for sorting) */
-       for (i=0; i < 2*old_nr; i++)
-               change_point[i] = &change_point_list[i];
-
-       /* record all known change-points (starting and ending addresses),
-          omitting those that are for empty memory regions */
-       chgidx = 0;
-       for (i=0; i < old_nr; i++)      {
-               if (biosmap[i].size != 0) {
-                       change_point[chgidx]->addr = biosmap[i].addr;
-                       change_point[chgidx++]->pbios = &biosmap[i];
-                       change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
-                       change_point[chgidx++]->pbios = &biosmap[i];
-               }
-       }
-       chg_nr = chgidx;        /* true number of change-points */
-
-       /* sort change-point list by memory addresses (low -> high) */
-       still_changing = 1;
-       while (still_changing)  {
-               still_changing = 0;
-               for (i=1; i < chg_nr; i++)  {
-                       /* if <current_addr> > <last_addr>, swap */
-                       /* or, if current=<start_addr> & last=<end_addr>, swap */
-                       if ((change_point[i]->addr < change_point[i-1]->addr) ||
-                               ((change_point[i]->addr == change_point[i-1]->addr) &&
-                                (change_point[i]->addr == change_point[i]->pbios->addr) &&
-                                (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
-                          )
-                       {
-                               change_tmp = change_point[i];
-                               change_point[i] = change_point[i-1];
-                               change_point[i-1] = change_tmp;
-                               still_changing=1;
-                       }
-               }
-       }
-
-       /* create a new bios memory map, removing overlaps */
-       overlap_entries=0;       /* number of entries in the overlap table */
-       new_bios_entry=0;        /* index for creating new bios map entries */
-       last_type = 0;           /* start with undefined memory type */
-       last_addr = 0;           /* start with 0 as last starting address */
-       /* loop through change-points, determining affect on the new bios map */
-       for (chgidx=0; chgidx < chg_nr; chgidx++)
-       {
-               /* keep track of all overlapping bios entries */
-               if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
-               {
-                       /* add map entry to overlap list (> 1 entry implies an overlap) */
-                       overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
-               }
-               else
-               {
-                       /* remove entry from list (order independent, so swap with last) */
-                       for (i=0; i<overlap_entries; i++)
-                       {
-                               if (overlap_list[i] == change_point[chgidx]->pbios)
-                                       overlap_list[i] = overlap_list[overlap_entries-1];
-                       }
-                       overlap_entries--;
-               }
-               /* if there are overlapping entries, decide which "type" to use */
-               /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
-               current_type = 0;
-               for (i=0; i<overlap_entries; i++)
-                       if (overlap_list[i]->type > current_type)
-                               current_type = overlap_list[i]->type;
-               /* continue building up new bios map based on this information */
-               if (current_type != last_type)  {
-                       if (last_type != 0)      {
-                               new_bios[new_bios_entry].size =
-                                       change_point[chgidx]->addr - last_addr;
-                               /* move forward only if the new size was non-zero */
-                               if (new_bios[new_bios_entry].size != 0)
-                                       if (++new_bios_entry >= E820MAX)
-                                               break;  /* no more space left for new bios entries */
-                       }
-                       if (current_type != 0)  {
-                               new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
-                               new_bios[new_bios_entry].type = current_type;
-                               last_addr=change_point[chgidx]->addr;
-                       }
-                       last_type = current_type;
-               }
-       }
-       new_nr = new_bios_entry;   /* retain count for new bios entries */
-
-       /* copy new bios mapping into original location */
-       memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
-       *pnr_map = new_nr;
-
-       return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory.  If we aren't, we'll fake a memory map.
- *
- * We check to see that the memory map contains at least 2 elements
- * before we'll use it, because the detection code in setup.S may
- * not be perfect and most every PC known to man has two memory
- * regions: one from 0 to 640k, and one from 1mb up.  (The IBM
- * thinkpad 560x, for example, does not cooperate with the memory
- * detection code.)
- */
-int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
-{
-       /* Only one memory region (or negative)? Ignore it */
-       if (nr_map < 2)
-               return -1;
-
-       do {
-               u64 start = biosmap->addr;
-               u64 size = biosmap->size;
-               u64 end = start + size;
-               u32 type = biosmap->type;
-
-               /* Overflow in 64 bits? Ignore the memory map. */
-               if (start > end)
-                       return -1;
-
-               add_memory_region(start, size, type);
-       } while (biosmap++, --nr_map);
-
-       return 0;
-}
-
-/*
- * Find the highest page frame number we have available
- */
-void __init propagate_e820_map(void)
-{
-       int i;
-
-       max_pfn = 0;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               unsigned long start, end;
-               /* RAM? */
-               if (e820.map[i].type != E820_RAM)
-                       continue;
-               start = PFN_UP(e820.map[i].addr);
-               end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-               if (start >= end)
-                       continue;
-               if (end > max_pfn)
-                       max_pfn = end;
-               memory_present(0, start, end);
-       }
-}
-
-/*
- * Register fully available low RAM pages with the bootmem allocator.
- */
-void __init register_bootmem_low_pages(unsigned long max_low_pfn)
-{
-       int i;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               unsigned long curr_pfn, last_pfn, size;
-               /*
-                * Reserve usable low memory
-                */
-               if (e820.map[i].type != E820_RAM)
-                       continue;
-               /*
-                * We are rounding up the start address of usable memory:
-                */
-               curr_pfn = PFN_UP(e820.map[i].addr);
-               if (curr_pfn >= max_low_pfn)
-                       continue;
-               /*
-                * ... and at the end of the usable range downwards:
-                */
-               last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
-
-               if (last_pfn > max_low_pfn)
-                       last_pfn = max_low_pfn;
-
-               /*
-                * .. finally, did all the rounding and playing
-                * around just make the area go away?
-                */
-               if (last_pfn <= curr_pfn)
-                       continue;
-
-               size = last_pfn - curr_pfn;
-               free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
-       }
-}
-
-void __init e820_register_memory(void)
-{
-       unsigned long gapstart, gapsize, round;
-       unsigned long long last;
-       int i;
-
-       /*
-        * Search for the biggest gap in the low 32 bits of the e820
-        * memory space.
-        */
-       last = 0x100000000ull;
-       gapstart = 0x10000000;
-       gapsize = 0x400000;
-       i = e820.nr_map;
-       while (--i >= 0) {
-               unsigned long long start = e820.map[i].addr;
-               unsigned long long end = start + e820.map[i].size;
-
-               /*
-                * Since "last" is at most 4GB, we know we'll
-                * fit in 32 bits if this condition is true
-                */
-               if (last > end) {
-                       unsigned long gap = last - end;
-
-                       if (gap > gapsize) {
-                               gapsize = gap;
-                               gapstart = end;
-                       }
-               }
-               if (start < last)
-                       last = start;
-       }
-
-       /*
-        * See how much we want to round up: start off with
-        * rounding to the next 1MB area.
-        */
-       round = 0x100000;
-       while ((gapsize >> 4) > round)
-               round += round;
-       /* Fun with two's complement */
-       pci_mem_start = (gapstart + round) & -round;
-
-       printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
-               pci_mem_start, gapstart, gapsize);
-}
-
-void __init print_memory_map(char *who)
-{
-       int i;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               printk(" %s: %016Lx - %016Lx ", who,
-                       e820.map[i].addr,
-                       e820.map[i].addr + e820.map[i].size);
-               switch (e820.map[i].type) {
-               case E820_RAM:  printk("(usable)\n");
-                               break;
-               case E820_RESERVED:
-                               printk("(reserved)\n");
-                               break;
-               case E820_ACPI:
-                               printk("(ACPI data)\n");
-                               break;
-               case E820_NVS:
-                               printk("(ACPI NVS)\n");
-                               break;
-               default:        printk("type %u\n", e820.map[i].type);
-                               break;
-               }
-       }
-}
-
 void __init limit_regions(unsigned long long size)
 {
        unsigned long long current_addr;
        int i;
 
-       print_memory_map("limit_regions start");
+       e820_print_map("limit_regions start");
        for (i = 0; i < e820.nr_map; i++) {
                current_addr = e820.map[i].addr + e820.map[i].size;
                if (current_addr < size)
@@ -634,63 +231,59 @@ void __init limit_regions(unsigned long long size)
                        e820.nr_map = i + 1;
                        e820.map[i].size -= current_addr - size;
                }
-               print_memory_map("limit_regions endfor");
+               e820_print_map("limit_regions endfor");
                return;
        }
-       print_memory_map("limit_regions endfunc");
+       e820_print_map("limit_regions endfunc");
 }
 
-/*
- * This function checks if any part of the range <start,end> is mapped
- * with type.
- */
-int
-e820_any_mapped(u64 start, u64 end, unsigned type)
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
+char * __init __attribute__((weak)) memory_setup(void)
 {
-       int i;
-       for (i = 0; i < e820.nr_map; i++) {
-               const struct e820entry *ei = &e820.map[i];
-               if (type && ei->type != type)
-                       continue;
-               if (ei->addr >= end || ei->addr + ei->size <= start)
-                       continue;
-               return 1;
-       }
-       return 0;
+       return machine_specific_memory_setup();
 }
-EXPORT_SYMBOL_GPL(e820_any_mapped);
-
- /*
-  * This function checks if the entire range <start,end> is mapped with type.
-  *
-  * Note: this function only works correct if the e820 table is sorted and
-  * not-overlapping, which is the case
-  */
-int __init
-e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
+
+void __init setup_memory_map(void)
 {
-       u64 start = s;
-       u64 end = e;
-       int i;
-       for (i = 0; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-               if (type && ei->type != type)
-                       continue;
-               /* is the region (part) in overlap with the current region ?*/
-               if (ei->addr >= end || ei->addr + ei->size <= start)
-                       continue;
-               /* if the region is at the beginning of <start,end> we move
-                * start to the end of the region since it's ok until there
+       printk(KERN_INFO "BIOS-provided physical RAM map:\n");
+       e820_print_map(memory_setup());
+}
+
+static int __initdata user_defined_memmap;
+
+/*
+ * "mem=nopentium" disables the 4MB page tables.
+ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
+ * to <mem>, overriding the bios size.
+ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
+ * <start> to <start>+<mem>, overriding the bios size.
+ *
+ * HPA tells me bootloaders need to parse mem=, so no new
+ * option should be mem=  [also see Documentation/i386/boot.txt]
+ */
+static int __init parse_mem(char *arg)
+{
+       if (!arg)
+               return -EINVAL;
+
+       if (strcmp(arg, "nopentium") == 0) {
+               setup_clear_cpu_cap(X86_FEATURE_PSE);
+       } else {
+               /* If the user specifies memory size, we
+                * limit the BIOS-provided memory map to
+                * that size. exactmap can be used to specify
+                * the exact map. mem=number can be used to
+                * trim the existing memory map.
                 */
-               if (ei->addr <= start)
-                       start = ei->addr + ei->size;
-               /* if start is now at or beyond end, we're done, full
-                * coverage */
-               if (start >= end)
-                       return 1; /* we're done */
+               unsigned long long mem_size;
+
+               mem_size = memparse(arg, &arg);
+               limit_regions(mem_size);
+               user_defined_memmap = 1;
        }
        return 0;
 }
+early_param("mem", parse_mem);
 
 static int __init parse_memmap(char *arg)
 {
@@ -704,8 +297,9 @@ static int __init parse_memmap(char *arg)
                 * size before original memory map is
                 * reset.
                 */
-               propagate_e820_map();
-               saved_max_pfn = max_pfn;
+               e820_register_active_regions(0, 0, -1UL);
+               saved_max_pfn = e820_end_of_ram();
+               remove_all_active_ranges();
 #endif
                e820.nr_map = 0;
                user_defined_memmap = 1;
@@ -736,40 +330,12 @@ static int __init parse_memmap(char *arg)
        return 0;
 }
 early_param("memmap", parse_memmap);
-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
-                               unsigned new_type)
-{
-       int i;
-
-       BUG_ON(old_type == new_type);
 
-       for (i = 0; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-               u64 final_start, final_end;
-               if (ei->type != old_type)
-                       continue;
-               /* totally covered? */
-               if (ei->addr >= start && ei->size <= size) {
-                       ei->type = new_type;
-                       continue;
-               }
-               /* partially covered */
-               final_start = max(start, ei->addr);
-               final_end = min(start + size, ei->addr + ei->size);
-               if (final_start >= final_end)
-                       continue;
-               add_memory_region(final_start, final_end - final_start,
-                                        new_type);
+void __init finish_e820_parsing(void)
+{
+       if (user_defined_memmap) {
+               printk(KERN_INFO "user-defined physical RAM map:\n");
+               e820_print_map("user");
        }
 }
-void __init update_e820(void)
-{
-       u8 nr_map;
 
-       nr_map = e820.nr_map;
-       if (sanitize_e820_map(e820.map, &nr_map))
-               return;
-       e820.nr_map = nr_map;
-       printk(KERN_INFO "modified physical RAM map:\n");
-       print_memory_map("modified");
-}
index 124480c0008dd2a5e348cd2db7b928ff13d3aa41..0afee2ca0bf811800793246c1a396cc7a2ec6f0f 100644 (file)
@@ -17,8 +17,8 @@
 #include <linux/kexec.h>
 #include <linux/module.h>
 #include <linux/mm.h>
-#include <linux/suspend.h>
 #include <linux/pfn.h>
+#include <linux/pci.h>
 
 #include <asm/pgtable.h>
 #include <asm/page.h>
@@ -29,8 +29,6 @@
 #include <asm/kdebug.h>
 #include <asm/trampoline.h>
 
-struct e820map e820;
-
 /*
  * PFN of last memory page.
  */
@@ -43,285 +41,6 @@ unsigned long end_pfn;
  */
 unsigned long max_pfn_mapped;
 
-/*
- * Last pfn which the user wants to use.
- */
-static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
-
-/*
- * Early reserved memory areas.
- */
-#define MAX_EARLY_RES 20
-
-struct early_res {
-       unsigned long start, end;
-       char name[16];
-};
-static struct early_res early_res[MAX_EARLY_RES] __initdata = {
-       { 0, PAGE_SIZE, "BIOS data page" },                     /* BIOS data page */
-#ifdef CONFIG_X86_TRAMPOLINE
-       { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
-#endif
-       {}
-};
-
-void __init reserve_early(unsigned long start, unsigned long end, char *name)
-{
-       int i;
-       struct early_res *r;
-       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-               r = &early_res[i];
-               if (end > r->start && start < r->end)
-                       panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
-                             start, end - 1, name?name:"", r->start, r->end - 1, r->name);
-       }
-       if (i >= MAX_EARLY_RES)
-               panic("Too many early reservations");
-       r = &early_res[i];
-       r->start = start;
-       r->end = end;
-       if (name)
-               strncpy(r->name, name, sizeof(r->name) - 1);
-}
-
-void __init free_early(unsigned long start, unsigned long end)
-{
-       struct early_res *r;
-       int i, j;
-
-       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-               r = &early_res[i];
-               if (start == r->start && end == r->end)
-                       break;
-       }
-       if (i >= MAX_EARLY_RES || !early_res[i].end)
-               panic("free_early on not reserved area: %lx-%lx!", start, end);
-
-       for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
-               ;
-
-       memmove(&early_res[i], &early_res[i + 1],
-              (j - 1 - i) * sizeof(struct early_res));
-
-       early_res[j - 1].end = 0;
-}
-
-void __init early_res_to_bootmem(unsigned long start, unsigned long end)
-{
-       int i;
-       unsigned long final_start, final_end;
-       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-               struct early_res *r = &early_res[i];
-               final_start = max(start, r->start);
-               final_end = min(end, r->end);
-               if (final_start >= final_end)
-                       continue;
-               printk(KERN_INFO "  early res: %d [%lx-%lx] %s\n", i,
-                       final_start, final_end - 1, r->name);
-               reserve_bootmem_generic(final_start, final_end - final_start);
-       }
-}
-
-/* Check for already reserved areas */
-static inline int __init
-bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
-{
-       int i;
-       unsigned long addr = *addrp, last;
-       int changed = 0;
-again:
-       last = addr + size;
-       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-               struct early_res *r = &early_res[i];
-               if (last >= r->start && addr < r->end) {
-                       *addrp = addr = round_up(r->end, align);
-                       changed = 1;
-                       goto again;
-               }
-       }
-       return changed;
-}
-
-/* Check for already reserved areas */
-static inline int __init
-bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
-{
-       int i;
-       unsigned long addr = *addrp, last;
-       unsigned long size = *sizep;
-       int changed = 0;
-again:
-       last = addr + size;
-       for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
-               struct early_res *r = &early_res[i];
-               if (last > r->start && addr < r->start) {
-                       size = r->start - addr;
-                       changed = 1;
-                       goto again;
-               }
-               if (last > r->end && addr < r->end) {
-                       addr = round_up(r->end, align);
-                       size = last - addr;
-                       changed = 1;
-                       goto again;
-               }
-               if (last <= r->end && addr >= r->start) {
-                       (*sizep)++;
-                       return 0;
-               }
-       }
-       if (changed) {
-               *addrp = addr;
-               *sizep = size;
-       }
-       return changed;
-}
-/*
- * This function checks if any part of the range <start,end> is mapped
- * with type.
- */
-int
-e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
-{
-       int i;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-
-               if (type && ei->type != type)
-                       continue;
-               if (ei->addr >= end || ei->addr + ei->size <= start)
-                       continue;
-               return 1;
-       }
-       return 0;
-}
-EXPORT_SYMBOL_GPL(e820_any_mapped);
-
-/*
- * This function checks if the entire range <start,end> is mapped with type.
- *
- * Note: this function only works correct if the e820 table is sorted and
- * not-overlapping, which is the case
- */
-int __init e820_all_mapped(unsigned long start, unsigned long end,
-                          unsigned type)
-{
-       int i;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-
-               if (type && ei->type != type)
-                       continue;
-               /* is the region (part) in overlap with the current region ?*/
-               if (ei->addr >= end || ei->addr + ei->size <= start)
-                       continue;
-
-               /* if the region is at the beginning of <start,end> we move
-                * start to the end of the region since it's ok until there
-                */
-               if (ei->addr <= start)
-                       start = ei->addr + ei->size;
-               /*
-                * if start is now at or beyond end, we're done, full
-                * coverage
-                */
-               if (start >= end)
-                       return 1;
-       }
-       return 0;
-}
-
-/*
- * Find a free area with specified alignment in a specific range.
- */
-unsigned long __init find_e820_area(unsigned long start, unsigned long end,
-                                   unsigned long size, unsigned long align)
-{
-       int i;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-               unsigned long addr, last;
-               unsigned long ei_last;
-
-               if (ei->type != E820_RAM)
-                       continue;
-               addr = round_up(ei->addr, align);
-               ei_last = ei->addr + ei->size;
-               if (addr < start)
-                       addr = round_up(start, align);
-               if (addr >= ei_last)
-                       continue;
-               while (bad_addr(&addr, size, align) && addr+size <= ei_last)
-                       ;
-               last = addr + size;
-               if (last > ei_last)
-                       continue;
-               if (last > end)
-                       continue;
-               return addr;
-       }
-       return -1UL;
-}
-
-/*
- * Find next free range after *start
- */
-unsigned long __init find_e820_area_size(unsigned long start,
-                                        unsigned long *sizep,
-                                        unsigned long align)
-{
-       int i;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-               unsigned long addr, last;
-               unsigned long ei_last;
-
-               if (ei->type != E820_RAM)
-                       continue;
-               addr = round_up(ei->addr, align);
-               ei_last = ei->addr + ei->size;
-               if (addr < start)
-                       addr = round_up(start, align);
-               if (addr >= ei_last)
-                       continue;
-               *sizep = ei_last - addr;
-               while (bad_addr_size(&addr, sizep, align) &&
-                       addr + *sizep <= ei_last)
-                       ;
-               last = addr + *sizep;
-               if (last > ei_last)
-                       continue;
-               return addr;
-       }
-       return -1UL;
-
-}
-/*
- * Find the highest page frame number we have available
- */
-unsigned long __init e820_end_of_ram(void)
-{
-       unsigned long end_pfn;
-
-       end_pfn = find_max_pfn_with_active_regions();
-
-       if (end_pfn > max_pfn_mapped)
-               max_pfn_mapped = end_pfn;
-       if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
-               max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
-       if (end_pfn > end_user_pfn)
-               end_pfn = end_user_pfn;
-       if (end_pfn > max_pfn_mapped)
-               end_pfn = max_pfn_mapped;
-
-       printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
-       return end_pfn;
-}
-
 /*
  * Mark e820 reserved areas as busy for the resource manager.
  */
@@ -346,393 +65,6 @@ void __init e820_reserve_resources(void)
        }
 }
 
-/*
- * Find the ranges of physical addresses that do not correspond to
- * e820 RAM areas and mark the corresponding pages as nosave for software
- * suspend and suspend to RAM.
- *
- * This function requires the e820 map to be sorted and without any
- * overlapping entries and assumes the first e820 area to be RAM.
- */
-void __init e820_mark_nosave_regions(void)
-{
-       int i;
-       unsigned long paddr;
-
-       paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
-       for (i = 1; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-
-               if (paddr < ei->addr)
-                       register_nosave_region(PFN_DOWN(paddr),
-                                               PFN_UP(ei->addr));
-
-               paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
-               if (ei->type != E820_RAM)
-                       register_nosave_region(PFN_UP(ei->addr),
-                                               PFN_DOWN(paddr));
-
-               if (paddr >= (end_pfn << PAGE_SHIFT))
-                       break;
-       }
-}
-
-/*
- * Finds an active region in the address range from start_pfn to end_pfn and
- * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
- */
-static int __init e820_find_active_region(const struct e820entry *ei,
-                                         unsigned long start_pfn,
-                                         unsigned long end_pfn,
-                                         unsigned long *ei_startpfn,
-                                         unsigned long *ei_endpfn)
-{
-       *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
-       *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
-
-       /* Skip map entries smaller than a page */
-       if (*ei_startpfn >= *ei_endpfn)
-               return 0;
-
-       /* Check if max_pfn_mapped should be updated */
-       if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
-               max_pfn_mapped = *ei_endpfn;
-
-       /* Skip if map is outside the node */
-       if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
-                                   *ei_startpfn >= end_pfn)
-               return 0;
-
-       /* Check for overlaps */
-       if (*ei_startpfn < start_pfn)
-               *ei_startpfn = start_pfn;
-       if (*ei_endpfn > end_pfn)
-               *ei_endpfn = end_pfn;
-
-       /* Obey end_user_pfn to save on memmap */
-       if (*ei_startpfn >= end_user_pfn)
-               return 0;
-       if (*ei_endpfn > end_user_pfn)
-               *ei_endpfn = end_user_pfn;
-
-       return 1;
-}
-
-/* Walk the e820 map and register active regions within a node */
-void __init
-e820_register_active_regions(int nid, unsigned long start_pfn,
-                                                       unsigned long end_pfn)
-{
-       unsigned long ei_startpfn;
-       unsigned long ei_endpfn;
-       int i;
-
-       for (i = 0; i < e820.nr_map; i++)
-               if (e820_find_active_region(&e820.map[i],
-                                           start_pfn, end_pfn,
-                                           &ei_startpfn, &ei_endpfn))
-                       add_active_range(nid, ei_startpfn, ei_endpfn);
-}
-
-/*
- * Add a memory region to the kernel e820 map.
- */
-void __init add_memory_region(unsigned long start, unsigned long size, int type)
-{
-       int x = e820.nr_map;
-
-       if (x == E820MAX) {
-               printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
-               return;
-       }
-
-       e820.map[x].addr = start;
-       e820.map[x].size = size;
-       e820.map[x].type = type;
-       e820.nr_map++;
-}
-
-/*
- * Find the hole size (in bytes) in the memory range.
- * @start: starting address of the memory range to scan
- * @end: ending address of the memory range to scan
- */
-unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
-{
-       unsigned long start_pfn = start >> PAGE_SHIFT;
-       unsigned long end_pfn = end >> PAGE_SHIFT;
-       unsigned long ei_startpfn, ei_endpfn, ram = 0;
-       int i;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               if (e820_find_active_region(&e820.map[i],
-                                           start_pfn, end_pfn,
-                                           &ei_startpfn, &ei_endpfn))
-                       ram += ei_endpfn - ei_startpfn;
-       }
-       return end - start - (ram << PAGE_SHIFT);
-}
-
-static void __init e820_print_map(char *who)
-{
-       int i;
-
-       for (i = 0; i < e820.nr_map; i++) {
-               printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
-                      (unsigned long long) e820.map[i].addr,
-                      (unsigned long long)
-                      (e820.map[i].addr + e820.map[i].size));
-               switch (e820.map[i].type) {
-               case E820_RAM:
-                       printk(KERN_CONT "(usable)\n");
-                       break;
-               case E820_RESERVED:
-                       printk(KERN_CONT "(reserved)\n");
-                       break;
-               case E820_ACPI:
-                       printk(KERN_CONT "(ACPI data)\n");
-                       break;
-               case E820_NVS:
-                       printk(KERN_CONT "(ACPI NVS)\n");
-                       break;
-               default:
-                       printk(KERN_CONT "type %u\n", e820.map[i].type);
-                       break;
-               }
-       }
-}
-
-/*
- * Sanitize the BIOS e820 map.
- *
- * Some e820 responses include overlapping entries. The following
- * replaces the original e820 map with a new one, removing overlaps.
- *
- */
-static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
-{
-       struct change_member {
-               struct e820entry *pbios; /* pointer to original bios entry */
-               unsigned long long addr; /* address for this change point */
-       };
-       static struct change_member change_point_list[2*E820MAX] __initdata;
-       static struct change_member *change_point[2*E820MAX] __initdata;
-       static struct e820entry *overlap_list[E820MAX] __initdata;
-       static struct e820entry new_bios[E820MAX] __initdata;
-       struct change_member *change_tmp;
-       unsigned long current_type, last_type;
-       unsigned long long last_addr;
-       int chgidx, still_changing;
-       int overlap_entries;
-       int new_bios_entry;
-       int old_nr, new_nr, chg_nr;
-       int i;
-
-       /*
-               Visually we're performing the following
-               (1,2,3,4 = memory types)...
-
-               Sample memory map (w/overlaps):
-                  ____22__________________
-                  ______________________4_
-                  ____1111________________
-                  _44_____________________
-                  11111111________________
-                  ____________________33__
-                  ___________44___________
-                  __________33333_________
-                  ______________22________
-                  ___________________2222_
-                  _________111111111______
-                  _____________________11_
-                  _________________4______
-
-               Sanitized equivalent (no overlap):
-                  1_______________________
-                  _44_____________________
-                  ___1____________________
-                  ____22__________________
-                  ______11________________
-                  _________1______________
-                  __________3_____________
-                  ___________44___________
-                  _____________33_________
-                  _______________2________
-                  ________________1_______
-                  _________________4______
-                  ___________________2____
-                  ____________________33__
-                  ______________________4_
-       */
-
-       /* if there's only one memory region, don't bother */
-       if (*pnr_map < 2)
-               return -1;
-
-       old_nr = *pnr_map;
-
-       /* bail out if we find any unreasonable addresses in bios map */
-       for (i = 0; i < old_nr; i++)
-               if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
-                       return -1;
-
-       /* create pointers for initial change-point information (for sorting) */
-       for (i = 0; i < 2 * old_nr; i++)
-               change_point[i] = &change_point_list[i];
-
-       /* record all known change-points (starting and ending addresses),
-          omitting those that are for empty memory regions */
-       chgidx = 0;
-       for (i = 0; i < old_nr; i++)    {
-               if (biosmap[i].size != 0) {
-                       change_point[chgidx]->addr = biosmap[i].addr;
-                       change_point[chgidx++]->pbios = &biosmap[i];
-                       change_point[chgidx]->addr = biosmap[i].addr +
-                               biosmap[i].size;
-                       change_point[chgidx++]->pbios = &biosmap[i];
-               }
-       }
-       chg_nr = chgidx;
-
-       /* sort change-point list by memory addresses (low -> high) */
-       still_changing = 1;
-       while (still_changing)  {
-               still_changing = 0;
-               for (i = 1; i < chg_nr; i++)  {
-                       unsigned long long curaddr, lastaddr;
-                       unsigned long long curpbaddr, lastpbaddr;
-
-                       curaddr = change_point[i]->addr;
-                       lastaddr = change_point[i - 1]->addr;
-                       curpbaddr = change_point[i]->pbios->addr;
-                       lastpbaddr = change_point[i - 1]->pbios->addr;
-
-                       /*
-                        * swap entries, when:
-                        *
-                        * curaddr > lastaddr or
-                        * curaddr == lastaddr and curaddr == curpbaddr and
-                        * lastaddr != lastpbaddr
-                        */
-                       if (curaddr < lastaddr ||
-                           (curaddr == lastaddr && curaddr == curpbaddr &&
-                            lastaddr != lastpbaddr)) {
-                               change_tmp = change_point[i];
-                               change_point[i] = change_point[i-1];
-                               change_point[i-1] = change_tmp;
-                               still_changing = 1;
-                       }
-               }
-       }
-
-       /* create a new bios memory map, removing overlaps */
-       overlap_entries = 0;     /* number of entries in the overlap table */
-       new_bios_entry = 0;      /* index for creating new bios map entries */
-       last_type = 0;           /* start with undefined memory type */
-       last_addr = 0;           /* start with 0 as last starting address */
-
-       /* loop through change-points, determining affect on the new bios map */
-       for (chgidx = 0; chgidx < chg_nr; chgidx++) {
-               /* keep track of all overlapping bios entries */
-               if (change_point[chgidx]->addr ==
-                   change_point[chgidx]->pbios->addr) {
-                       /*
-                        * add map entry to overlap list (> 1 entry
-                        * implies an overlap)
-                        */
-                       overlap_list[overlap_entries++] =
-                               change_point[chgidx]->pbios;
-               } else {
-                       /*
-                        * remove entry from list (order independent,
-                        * so swap with last)
-                        */
-                       for (i = 0; i < overlap_entries; i++) {
-                               if (overlap_list[i] ==
-                                   change_point[chgidx]->pbios)
-                                       overlap_list[i] =
-                                               overlap_list[overlap_entries-1];
-                       }
-                       overlap_entries--;
-               }
-               /*
-                * if there are overlapping entries, decide which
-                * "type" to use (larger value takes precedence --
-                * 1=usable, 2,3,4,4+=unusable)
-                */
-               current_type = 0;
-               for (i = 0; i < overlap_entries; i++)
-                       if (overlap_list[i]->type > current_type)
-                               current_type = overlap_list[i]->type;
-               /*
-                * continue building up new bios map based on this
-                * information
-                */
-               if (current_type != last_type)  {
-                       if (last_type != 0)      {
-                               new_bios[new_bios_entry].size =
-                                       change_point[chgidx]->addr - last_addr;
-                               /*
-                                * move forward only if the new size
-                                * was non-zero
-                                */
-                               if (new_bios[new_bios_entry].size != 0)
-                                       /*
-                                        * no more space left for new
-                                        * bios entries ?
-                                        */
-                                       if (++new_bios_entry >= E820MAX)
-                                               break;
-                       }
-                       if (current_type != 0)  {
-                               new_bios[new_bios_entry].addr =
-                                       change_point[chgidx]->addr;
-                               new_bios[new_bios_entry].type = current_type;
-                               last_addr = change_point[chgidx]->addr;
-                       }
-                       last_type = current_type;
-               }
-       }
-       /* retain count for new bios entries */
-       new_nr = new_bios_entry;
-
-       /* copy new bios mapping into original location */
-       memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
-       *pnr_map = new_nr;
-
-       return 0;
-}
-
-/*
- * Copy the BIOS e820 map into a safe place.
- *
- * Sanity-check it while we're at it..
- *
- * If we're lucky and live on a modern system, the setup code
- * will have given us a memory map that we can use to properly
- * set up memory.  If we aren't, we'll fake a memory map.
- */
-static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
-{
-       /* Only one memory region (or negative)? Ignore it */
-       if (nr_map < 2)
-               return -1;
-
-       do {
-               u64 start = biosmap->addr;
-               u64 size = biosmap->size;
-               u64 end = start + size;
-               u32 type = biosmap->type;
-
-               /* Overflow in 64 bits? Ignore the memory map. */
-               if (start > end)
-                       return -1;
-
-               add_memory_region(start, size, type);
-       } while (biosmap++, --nr_map);
-       return 0;
-}
-
 static void early_panic(char *msg)
 {
        early_printk(msg);
@@ -740,16 +72,21 @@ static void early_panic(char *msg)
 }
 
 /* We're not void only for x86 32-bit compat */
-char * __init machine_specific_memory_setup(void)
+char *__init machine_specific_memory_setup(void)
 {
        char *who = "BIOS-e820";
+       int new_nr;
        /*
         * Try to copy the BIOS-supplied E820-map.
         *
         * Otherwise fake a memory map; one section from 0k->640k,
         * the next section from 1mb->appropriate_mem_k
         */
-       sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
+       new_nr = boot_params.e820_entries;
+       sanitize_e820_map(boot_params.e820_map,
+                       ARRAY_SIZE(boot_params.e820_map),
+                       &new_nr);
+       boot_params.e820_entries = new_nr;
        if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
                early_panic("Cannot find a valid memory map");
        printk(KERN_INFO "BIOS-provided physical RAM map:\n");
@@ -787,7 +124,6 @@ static int __init parse_memmap_opt(char *p)
                saved_max_pfn = e820_end_of_ram();
                remove_all_active_ranges();
 #endif
-               max_pfn_mapped = 0;
                e820.nr_map = 0;
                userdef = 1;
                return 0;
@@ -818,9 +154,9 @@ early_param("memmap", parse_memmap_opt);
 void __init finish_e820_parsing(void)
 {
        if (userdef) {
-               char nr = e820.nr_map;
+               int nr = e820.nr_map;
 
-               if (sanitize_e820_map(e820.map, &nr) < 0)
+               if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
                        early_panic("Invalid user supplied memory map");
                e820.nr_map = nr;
 
@@ -829,109 +165,6 @@ void __init finish_e820_parsing(void)
        }
 }
 
-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
-                               unsigned new_type)
-{
-       int i;
-
-       BUG_ON(old_type == new_type);
-
-       for (i = 0; i < e820.nr_map; i++) {
-               struct e820entry *ei = &e820.map[i];
-               u64 final_start, final_end;
-               if (ei->type != old_type)
-                       continue;
-               /* totally covered? */
-               if (ei->addr >= start && ei->size <= size) {
-                       ei->type = new_type;
-                       continue;
-               }
-               /* partially covered */
-               final_start = max(start, ei->addr);
-               final_end = min(start + size, ei->addr + ei->size);
-               if (final_start >= final_end)
-                       continue;
-               add_memory_region(final_start, final_end - final_start,
-                                        new_type);
-       }
-}
-
-void __init update_e820(void)
-{
-       u8 nr_map;
-
-       nr_map = e820.nr_map;
-       if (sanitize_e820_map(e820.map, &nr_map))
-               return;
-       e820.nr_map = nr_map;
-       printk(KERN_INFO "modified physical RAM map:\n");
-       e820_print_map("modified");
-}
-
-unsigned long pci_mem_start = 0xaeedbabe;
-EXPORT_SYMBOL(pci_mem_start);
-
-/*
- * Search for the biggest gap in the low 32 bits of the e820
- * memory space.  We pass this space to PCI to assign MMIO resources
- * for hotplug or unconfigured devices in.
- * Hopefully the BIOS let enough space left.
- */
-__init void e820_setup_gap(void)
-{
-       unsigned long gapstart, gapsize, round;
-       unsigned long last;
-       int i;
-       int found = 0;
-
-       last = 0x100000000ull;
-       gapstart = 0x10000000;
-       gapsize = 0x400000;
-       i = e820.nr_map;
-       while (--i >= 0) {
-               unsigned long long start = e820.map[i].addr;
-               unsigned long long end = start + e820.map[i].size;
-
-               /*
-                * Since "last" is at most 4GB, we know we'll
-                * fit in 32 bits if this condition is true
-                */
-               if (last > end) {
-                       unsigned long gap = last - end;
-
-                       if (gap > gapsize) {
-                               gapsize = gap;
-                               gapstart = end;
-                               found = 1;
-                       }
-               }
-               if (start < last)
-                       last = start;
-       }
-
-       if (!found) {
-               gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
-               printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
-                      "address range\n"
-                      KERN_ERR "PCI: Unassigned devices with 32bit resource "
-                      "registers may break!\n");
-       }
-
-       /*
-        * See how much we want to round up: start off with
-        * rounding to the next 1MB area.
-        */
-       round = 0x100000;
-       while ((gapsize >> 4) > round)
-               round += round;
-       /* Fun with two's complement */
-       pci_mem_start = (gapstart + round) & -round;
-
-       printk(KERN_INFO
-              "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
-              pci_mem_start, gapstart, gapsize);
-}
-
 int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
 {
        int i;
index 77d424cf68b38e6b919f55b6fd895436e887fd74..d5c7fcdd18610690fe51e116105198736b096f7f 100644 (file)
@@ -213,6 +213,48 @@ unsigned long efi_get_time(void)
                      eft.minute, eft.second);
 }
 
+/*
+ * Tell the kernel about the EFI memory map.  This might include
+ * more than the max 128 entries that can fit in the e820 legacy
+ * (zeropage) memory map.
+ */
+
+static void __init add_efi_memmap(void)
+{
+       void *p;
+
+       for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+               efi_memory_desc_t *md = p;
+               unsigned long long start = md->phys_addr;
+               unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
+               int e820_type;
+
+               if (md->attribute & EFI_MEMORY_WB)
+                       e820_type = E820_RAM;
+               else
+                       e820_type = E820_RESERVED;
+               add_memory_region(start, size, e820_type);
+       }
+       sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
+void __init efi_reserve_early(void)
+{
+       unsigned long pmap;
+
+       pmap = boot_params.efi_info.efi_memmap;
+#ifdef CONFIG_X86_64
+       pmap += (__u64)boot_params.efi_info.efi_memmap_hi << 32;
+#endif
+       memmap.phys_map = (void *)pmap;
+       memmap.nr_map = boot_params.efi_info.efi_memmap_size /
+               boot_params.efi_info.efi_memdesc_size;
+       memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
+       memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
+       reserve_early(pmap, pmap + memmap.nr_map * memmap.desc_size,
+                     "EFI memmap");
+}
+
 #if EFI_DEBUG
 static void __init print_efi_memmap(void)
 {
@@ -242,21 +284,11 @@ void __init efi_init(void)
        int i = 0;
        void *tmp;
 
-#ifdef CONFIG_X86_32
        efi_phys.systab = (efi_system_table_t *)boot_params.efi_info.efi_systab;
-       memmap.phys_map = (void *)boot_params.efi_info.efi_memmap;
-#else
-       efi_phys.systab = (efi_system_table_t *)
-               (boot_params.efi_info.efi_systab |
-                ((__u64)boot_params.efi_info.efi_systab_hi<<32));
-       memmap.phys_map = (void *)
-               (boot_params.efi_info.efi_memmap |
-                ((__u64)boot_params.efi_info.efi_memmap_hi<<32));
+#ifdef CONFIG_X86_64
+       efi_phys.systab = (void *)efi_phys.systab +
+               ((__u64)boot_params.efi_info.efi_systab_hi<<32);
 #endif
-       memmap.nr_map = boot_params.efi_info.efi_memmap_size /
-               boot_params.efi_info.efi_memdesc_size;
-       memmap.desc_version = boot_params.efi_info.efi_memdesc_version;
-       memmap.desc_size = boot_params.efi_info.efi_memdesc_size;
 
        efi.systab = early_ioremap((unsigned long)efi_phys.systab,
                                   sizeof(efi_system_table_t));
@@ -370,6 +402,7 @@ void __init efi_init(void)
        if (memmap.desc_size != sizeof(efi_memory_desc_t))
                printk(KERN_WARNING "Kernel-defined memdesc"
                       "doesn't match the one from EFI!\n");
+       add_efi_memmap();
 
        /* Setup for EFI runtime service */
        reboot_type = BOOT_EFI;
index d0060fdcccac1658968db527f4c05d791b056e6b..652c5287215fdb9500e4013794d559cfc4ace801 100644 (file)
@@ -97,13 +97,7 @@ void __init efi_call_phys_epilog(void)
        early_runtime_code_mapping_set_exec(0);
 }
 
-void __init efi_reserve_bootmem(void)
-{
-       reserve_bootmem_generic((unsigned long)memmap.phys_map,
-                               memmap.nr_map * memmap.desc_size);
-}
-
-void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size)
+void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size)
 {
        static unsigned pages_mapped __initdata;
        unsigned i, pages;
index cbaaf69bedb29c92055e2272a12efba7f49f9ee1..1fa8be5bd217b5c45a649e6745b847567a9553f6 100644 (file)
@@ -51,7 +51,7 @@ void __init setup_apic_routing(void)
        else
 #endif
 
-       if (num_possible_cpus() <= 8)
+       if (max_physical_apicid < 8)
                genapic = &apic_flat;
        else
                genapic = &apic_physflat;
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c
new file mode 100644 (file)
index 0000000..a727c0b
--- /dev/null
@@ -0,0 +1,73 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+
+#include <asm/setup.h>
+#include <asm/bios_ebda.h>
+
+#define BIOS_LOWMEM_KILOBYTES 0x413
+
+/*
+ * The BIOS places the EBDA/XBDA at the top of conventional
+ * memory, and usually decreases the reported amount of
+ * conventional memory (int 0x12) too. This also contains a
+ * workaround for Dell systems that neglect to reserve EBDA.
+ * The same workaround also avoids a problem with the AMD768MPX
+ * chipset: reserve a page before VGA to prevent PCI prefetch
+ * into it (errata #56). Usually the page is reserved anyways,
+ * unless you have no PS/2 mouse plugged in.
+ */
+void __init reserve_ebda_region(void)
+{
+       unsigned int lowmem, ebda_addr;
+
+       /* To determine the position of the EBDA and the */
+       /* end of conventional memory, we need to look at */
+       /* the BIOS data area. In a paravirtual environment */
+       /* that area is absent. We'll just have to assume */
+       /* that the paravirt case can handle memory setup */
+       /* correctly, without our help. */
+       if (paravirt_enabled())
+               return;
+
+       /* end of low (conventional) memory */
+       lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
+       lowmem <<= 10;
+
+       /* start of EBDA area */
+       ebda_addr = get_bios_ebda();
+
+       /* Fixup: bios puts an EBDA in the top 64K segment */
+       /* of conventional memory, but does not adjust lowmem. */
+       if ((lowmem - ebda_addr) <= 0x10000)
+               lowmem = ebda_addr;
+
+       /* Fixup: bios does not report an EBDA at all. */
+       /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
+       if ((ebda_addr == 0) && (lowmem >= 0x9f000))
+               lowmem = 0x9f000;
+
+       /* Paranoia: should never happen, but... */
+       if ((lowmem == 0) || (lowmem >= 0x100000))
+               lowmem = 0x9f000;
+
+       /* reserve all memory between lowmem and the 1MB mark */
+       reserve_early(lowmem, 0x100000, "BIOS reserved");
+}
+
+void __init reserve_setup_data(void)
+{
+       struct setup_data *data;
+       u64 pa_data;
+       char buf[32];
+
+       if (boot_params.hdr.version < 0x0209)
+               return;
+       pa_data = boot_params.hdr.setup_data;
+       while (pa_data) {
+               data = early_ioremap(pa_data, sizeof(*data));
+               sprintf(buf, "setup data %x", data->type);
+               reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+               pa_data = data->next;
+               early_iounmap(data, sizeof(*data));
+       }
+}
index 3db0590589276e5ecbc1d74dd1331264bc414f11..fa1d25dd83e3fa18cc2a445841c61e1e1b8b571c 100644 (file)
@@ -8,7 +8,34 @@
 #include <linux/init.h>
 #include <linux/start_kernel.h>
 
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/e820.h>
+#include <asm/bios_ebda.h>
+
 void __init i386_start_kernel(void)
 {
+       reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
+
+#ifdef CONFIG_BLK_DEV_INITRD
+       /* Reserve INITRD */
+       if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+               u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+               u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
+               u64 ramdisk_end   = ramdisk_image + ramdisk_size;
+               reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
+       }
+#endif
+       reserve_early(init_pg_tables_start, init_pg_tables_end,
+                       "INIT_PG_TABLE");
+
+       reserve_ebda_region();
+
+       /*
+        * At this point everything still needed from the boot loader
+        * or BIOS or kernel text should be early reserved or marked not
+        * RAM in e820. All other memory is free game.
+        */
+
        start_kernel();
 }
index e25c57b8aa844937742f246288e9ffa2e2be5229..5fbed459ff3b6bf8e5744f45e518d5981266175a 100644 (file)
@@ -51,74 +51,6 @@ static void __init copy_bootdata(char *real_mode_data)
        }
 }
 
-#define BIOS_LOWMEM_KILOBYTES 0x413
-
-/*
- * The BIOS places the EBDA/XBDA at the top of conventional
- * memory, and usually decreases the reported amount of
- * conventional memory (int 0x12) too. This also contains a
- * workaround for Dell systems that neglect to reserve EBDA.
- * The same workaround also avoids a problem with the AMD768MPX
- * chipset: reserve a page before VGA to prevent PCI prefetch
- * into it (errata #56). Usually the page is reserved anyways,
- * unless you have no PS/2 mouse plugged in.
- */
-static void __init reserve_ebda_region(void)
-{
-       unsigned int lowmem, ebda_addr;
-
-       /* To determine the position of the EBDA and the */
-       /* end of conventional memory, we need to look at */
-       /* the BIOS data area. In a paravirtual environment */
-       /* that area is absent. We'll just have to assume */
-       /* that the paravirt case can handle memory setup */
-       /* correctly, without our help. */
-       if (paravirt_enabled())
-               return;
-
-       /* end of low (conventional) memory */
-       lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
-       lowmem <<= 10;
-
-       /* start of EBDA area */
-       ebda_addr = get_bios_ebda();
-
-       /* Fixup: bios puts an EBDA in the top 64K segment */
-       /* of conventional memory, but does not adjust lowmem. */
-       if ((lowmem - ebda_addr) <= 0x10000)
-               lowmem = ebda_addr;
-
-       /* Fixup: bios does not report an EBDA at all. */
-       /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
-       if ((ebda_addr == 0) && (lowmem >= 0x9f000))
-               lowmem = 0x9f000;
-
-       /* Paranoia: should never happen, but... */
-       if ((lowmem == 0) || (lowmem >= 0x100000))
-               lowmem = 0x9f000;
-
-       /* reserve all memory between lowmem and the 1MB mark */
-       reserve_early(lowmem, 0x100000, "BIOS reserved");
-}
-
-static void __init reserve_setup_data(void)
-{
-       struct setup_data *data;
-       unsigned long pa_data;
-       char buf[32];
-
-       if (boot_params.hdr.version < 0x0209)
-               return;
-       pa_data = boot_params.hdr.setup_data;
-       while (pa_data) {
-               data = early_ioremap(pa_data, sizeof(*data));
-               sprintf(buf, "setup data %x", data->type);
-               reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
-               pa_data = data->next;
-               early_iounmap(data, sizeof(*data));
-       }
-}
-
 void __init x86_64_start_kernel(char * real_mode_data)
 {
        int i;
index f7357cc0162c2f1c4786b3208e8524b51837fef8..b98b338aae1a536639186611dc915ce82ee02366 100644 (file)
@@ -194,6 +194,7 @@ default_entry:
        xorl %ebx,%ebx                          /* %ebx is kept at zero */
 
        movl $pa(pg0), %edi
+       movl %edi, pa(init_pg_tables_start)
        movl $pa(swapper_pg_pmd), %edx
        movl $PTE_ATTR, %eax
 10:
@@ -219,6 +220,8 @@ default_entry:
        jb 10b
 1:
        movl %edi,pa(init_pg_tables_end)
+       shrl $12, %eax
+       movl %eax, pa(max_pfn_mapped)
 
        /* Do early initialization of the fixmap area */
        movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
@@ -228,6 +231,7 @@ default_entry:
 page_pde_offset = (__PAGE_OFFSET >> 20);
 
        movl $pa(pg0), %edi
+       movl %edi, pa(init_pg_tables_start)
        movl $pa(swapper_pg_dir), %edx
        movl $PTE_ATTR, %eax
 10:
@@ -249,6 +253,8 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
        cmpl %ebp,%eax
        jb 10b
        movl %edi,pa(init_pg_tables_end)
+       shrl $12, %eax
+       movl %eax, pa(max_pfn_mapped)
 
        /* Do early initialization of the fixmap area */
        movl $pa(swapper_pg_fixmap)+PDE_ATTR,%eax
index 4dc8600d9d2072ed5ed79ee3884f70afd2281636..0662817d61bfdd61f189fb7576ada934fdbfbedb 100644 (file)
@@ -72,15 +72,21 @@ int sis_apic_bug = -1;
 int nr_ioapic_registers[MAX_IO_APICS];
 
 /* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
 /* MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
 static int disable_timer_pin_1 __initdata;
 
 /*
@@ -110,7 +116,7 @@ struct io_apic {
 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 {
        return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-               + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+               + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
 }
 
 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -801,10 +807,10 @@ static int find_irq_entry(int apic, int pin, int type)
        int i;
 
        for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mpc_irqtype == type &&
-                   (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
-                    mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
-                   mp_irqs[i].mpc_dstirq == pin)
+               if (mp_irqs[i].mp_irqtype == type &&
+                   (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+                    mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].mp_dstirq == pin)
                        return i;
 
        return -1;
@@ -818,13 +824,13 @@ static int __init find_isa_irq_pin(int irq, int type)
        int i;
 
        for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mpc_srcbus;
+               int lbus = mp_irqs[i].mp_srcbus;
 
                if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mpc_irqtype == type) &&
-                   (mp_irqs[i].mpc_srcbusirq == irq))
+                   (mp_irqs[i].mp_irqtype == type) &&
+                   (mp_irqs[i].mp_srcbusirq == irq))
 
-                       return mp_irqs[i].mpc_dstirq;
+                       return mp_irqs[i].mp_dstirq;
        }
        return -1;
 }
@@ -834,17 +840,17 @@ static int __init find_isa_irq_apic(int irq, int type)
        int i;
 
        for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mpc_srcbus;
+               int lbus = mp_irqs[i].mp_srcbus;
 
                if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mpc_irqtype == type) &&
-                   (mp_irqs[i].mpc_srcbusirq == irq))
+                   (mp_irqs[i].mp_irqtype == type) &&
+                   (mp_irqs[i].mp_srcbusirq == irq))
                        break;
        }
        if (i < mp_irq_entries) {
                int apic;
                for(apic = 0; apic < nr_ioapics; apic++) {
-                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
                                return apic;
                }
        }
@@ -864,28 +870,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 
        apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
                "slot:%d, pin:%d.\n", bus, slot, pin);
-       if (mp_bus_id_to_pci_bus[bus] == -1) {
+       if (test_bit(bus, mp_bus_not_pci)) {
                printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
                return -1;
        }
        for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mpc_srcbus;
+               int lbus = mp_irqs[i].mp_srcbus;
 
                for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
-                           mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+                           mp_irqs[i].mp_dstapic == MP_APIC_ALL)
                                break;
 
                if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].mpc_irqtype &&
+                   !mp_irqs[i].mp_irqtype &&
                    (bus == lbus) &&
-                   (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+                   (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
 
                        if (!(apic || IO_APIC_IRQ(irq)))
                                continue;
 
-                       if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+                       if (pin == (mp_irqs[i].mp_srcbusirq & 3))
                                return irq;
                        /*
                         * Use the first all-but-pin matching entry as a
@@ -952,7 +958,7 @@ static int EISA_ELCR(unsigned int irq)
  * EISA conforming in the MP table, that means its trigger type must
  * be read in from the ELCR */
 
-#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
+#define default_EISA_trigger(idx)      (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
 #define default_EISA_polarity(idx)     default_ISA_polarity(idx)
 
 /* PCI interrupts are always polarity one level triggered,
@@ -969,13 +975,13 @@ static int EISA_ELCR(unsigned int irq)
 
 static int MPBIOS_polarity(int idx)
 {
-       int bus = mp_irqs[idx].mpc_srcbus;
+       int bus = mp_irqs[idx].mp_srcbus;
        int polarity;
 
        /*
         * Determine IRQ line polarity (high active or low active):
         */
-       switch (mp_irqs[idx].mpc_irqflag & 3)
+       switch (mp_irqs[idx].mp_irqflag & 3)
        {
                case 0: /* conforms, ie. bus-type dependent polarity */
                {
@@ -1012,13 +1018,13 @@ static int MPBIOS_polarity(int idx)
 
 static int MPBIOS_trigger(int idx)
 {
-       int bus = mp_irqs[idx].mpc_srcbus;
+       int bus = mp_irqs[idx].mp_srcbus;
        int trigger;
 
        /*
         * Determine IRQ trigger mode (edge or level sensitive):
         */
-       switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+       switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
        {
                case 0: /* conforms, ie. bus-type dependent */
                {
@@ -1097,16 +1103,16 @@ static inline int irq_trigger(int idx)
 static int pin_2_irq(int idx, int apic, int pin)
 {
        int irq, i;
-       int bus = mp_irqs[idx].mpc_srcbus;
+       int bus = mp_irqs[idx].mp_srcbus;
 
        /*
         * Debugging check, we are in big trouble if this message pops up!
         */
-       if (mp_irqs[idx].mpc_dstirq != pin)
+       if (mp_irqs[idx].mp_dstirq != pin)
                printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
        if (test_bit(bus, mp_bus_not_pci))
-               irq = mp_irqs[idx].mpc_srcbusirq;
+               irq = mp_irqs[idx].mp_srcbusirq;
        else {
                /*
                 * PCI IRQs are mapped in order
@@ -1250,12 +1256,12 @@ static void __init setup_IO_APIC_irqs(void)
                        if (first_notcon) {
                                apic_printk(APIC_VERBOSE, KERN_DEBUG
                                                " IO-APIC (apicid-pin) %d-%d",
-                                               mp_ioapics[apic].mpc_apicid,
+                                               mp_ioapics[apic].mp_apicid,
                                                pin);
                                first_notcon = 0;
                        } else
                                apic_printk(APIC_VERBOSE, ", %d-%d",
-                                       mp_ioapics[apic].mpc_apicid, pin);
+                                       mp_ioapics[apic].mp_apicid, pin);
                        continue;
                }
 
@@ -1357,7 +1363,7 @@ void __init print_IO_APIC(void)
        printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
        for (i = 0; i < nr_ioapics; i++)
                printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-                      mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+                      mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
 
        /*
         * We are a bit conservative about what we expect.  We have to
@@ -1376,7 +1382,7 @@ void __init print_IO_APIC(void)
                reg_03.raw = io_apic_read(apic, 3);
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
-       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
        printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
        printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1716,7 +1722,6 @@ void disable_IO_APIC(void)
  * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
  */
 
-#ifndef CONFIG_X86_NUMAQ
 static void __init setup_ioapic_ids_from_mpc(void)
 {
        union IO_APIC_reg_00 reg_00;
@@ -1726,6 +1731,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
        unsigned char old_id;
        unsigned long flags;
 
+#ifdef CONFIG_X86_NUMAQ
+       if (found_numaq)
+               return;
+#endif
+
        /*
         * Don't check I/O APIC IDs for xAPIC systems.  They have
         * no meaning without the serial APIC bus.
@@ -1749,14 +1759,14 @@ static void __init setup_ioapic_ids_from_mpc(void)
                reg_00.raw = io_apic_read(apic, 0);
                spin_unlock_irqrestore(&ioapic_lock, flags);
                
-               old_id = mp_ioapics[apic].mpc_apicid;
+               old_id = mp_ioapics[apic].mp_apicid;
 
-               if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
+               if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
-                               apic, mp_ioapics[apic].mpc_apicid);
+                               apic, mp_ioapics[apic].mp_apicid);
                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
                                reg_00.bits.ID);
-                       mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
+                       mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
                }
 
                /*
@@ -1765,9 +1775,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
                 * 'stuck on smp_invalidate_needed IPI wait' messages.
                 */
                if (check_apicid_used(phys_id_present_map,
-                                       mp_ioapics[apic].mpc_apicid)) {
+                                       mp_ioapics[apic].mp_apicid)) {
                        printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
-                               apic, mp_ioapics[apic].mpc_apicid);
+                               apic, mp_ioapics[apic].mp_apicid);
                        for (i = 0; i < get_physical_broadcast(); i++)
                                if (!physid_isset(i, phys_id_present_map))
                                        break;
@@ -1776,13 +1786,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
                        printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
                                i);
                        physid_set(i, phys_id_present_map);
-                       mp_ioapics[apic].mpc_apicid = i;
+                       mp_ioapics[apic].mp_apicid = i;
                } else {
                        physid_mask_t tmp;
-                       tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
+                       tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
                        apic_printk(APIC_VERBOSE, "Setting %d in the "
                                        "phys_id_present_map\n",
-                                       mp_ioapics[apic].mpc_apicid);
+                                       mp_ioapics[apic].mp_apicid);
                        physids_or(phys_id_present_map, phys_id_present_map, tmp);
                }
 
@@ -1791,11 +1801,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
                 * We need to adjust the IRQ routing table
                 * if the ID changed.
                 */
-               if (old_id != mp_ioapics[apic].mpc_apicid)
+               if (old_id != mp_ioapics[apic].mp_apicid)
                        for (i = 0; i < mp_irq_entries; i++)
-                               if (mp_irqs[i].mpc_dstapic == old_id)
-                                       mp_irqs[i].mpc_dstapic
-                                               = mp_ioapics[apic].mpc_apicid;
+                               if (mp_irqs[i].mp_dstapic == old_id)
+                                       mp_irqs[i].mp_dstapic
+                                               = mp_ioapics[apic].mp_apicid;
 
                /*
                 * Read the right value from the MPC table and
@@ -1803,9 +1813,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
                 */
                apic_printk(APIC_VERBOSE, KERN_INFO
                        "...changing IO-APIC physical APIC ID to %d ...",
-                       mp_ioapics[apic].mpc_apicid);
+                       mp_ioapics[apic].mp_apicid);
 
-               reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
+               reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
                spin_lock_irqsave(&ioapic_lock, flags);
                io_apic_write(apic, 0, reg_00.raw);
                spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -1816,15 +1826,12 @@ static void __init setup_ioapic_ids_from_mpc(void)
                spin_lock_irqsave(&ioapic_lock, flags);
                reg_00.raw = io_apic_read(apic, 0);
                spin_unlock_irqrestore(&ioapic_lock, flags);
-               if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
+               if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
                        printk("could not set ID!\n");
                else
                        apic_printk(APIC_VERBOSE, " ok.\n");
        }
 }
-#else
-static void __init setup_ioapic_ids_from_mpc(void) { }
-#endif
 
 int no_timer_check __initdata;
 
@@ -2347,8 +2354,8 @@ static int ioapic_resume(struct sys_device *dev)
 
        spin_lock_irqsave(&ioapic_lock, flags);
        reg_00.raw = io_apic_read(dev->id, 0);
-       if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
-               reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+       if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
                io_apic_write(dev->id, 0, reg_00.raw);
        }
        spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2781,7 +2788,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
 
        apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
                "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
-               mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
+               mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
                edge_level, active_high_low);
 
        ioapic_register_intr(irq, entry.vector, edge_level);
@@ -2802,8 +2809,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
                return -1;
 
        for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mpc_irqtype == mp_INT &&
-                   mp_irqs[i].mpc_srcbusirq == bus_irq)
+               if (mp_irqs[i].mp_irqtype == mp_INT &&
+                   mp_irqs[i].mp_srcbusirq == bus_irq)
                        break;
        if (i >= mp_irq_entries)
                return -1;
index ef1a8dfcc529a36643bdbe34f835eb1819c5f265..339cf6f926dc96875e23b65815f3d98d582b946d 100644 (file)
@@ -104,15 +104,17 @@ DEFINE_SPINLOCK(vector_lock);
 int nr_ioapic_registers[MAX_IO_APICS];
 
 /* I/O APIC entries */
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 int nr_ioapics;
 
 /* MP IRQ source entries */
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* # of MP IRQ source entries */
 int mp_irq_entries;
 
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
 /*
  * Rough estimation of how many shared IRQs there are, can
  * be changed anytime.
@@ -140,7 +142,7 @@ struct io_apic {
 static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
 {
        return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
-               + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+               + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
 }
 
 static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -453,10 +455,10 @@ static int find_irq_entry(int apic, int pin, int type)
        int i;
 
        for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mpc_irqtype == type &&
-                   (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
-                    mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
-                   mp_irqs[i].mpc_dstirq == pin)
+               if (mp_irqs[i].mp_irqtype == type &&
+                   (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+                    mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+                   mp_irqs[i].mp_dstirq == pin)
                        return i;
 
        return -1;
@@ -470,13 +472,13 @@ static int __init find_isa_irq_pin(int irq, int type)
        int i;
 
        for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mpc_srcbus;
+               int lbus = mp_irqs[i].mp_srcbus;
 
                if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mpc_irqtype == type) &&
-                   (mp_irqs[i].mpc_srcbusirq == irq))
+                   (mp_irqs[i].mp_irqtype == type) &&
+                   (mp_irqs[i].mp_srcbusirq == irq))
 
-                       return mp_irqs[i].mpc_dstirq;
+                       return mp_irqs[i].mp_dstirq;
        }
        return -1;
 }
@@ -486,17 +488,17 @@ static int __init find_isa_irq_apic(int irq, int type)
        int i;
 
        for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mpc_srcbus;
+               int lbus = mp_irqs[i].mp_srcbus;
 
                if (test_bit(lbus, mp_bus_not_pci) &&
-                   (mp_irqs[i].mpc_irqtype == type) &&
-                   (mp_irqs[i].mpc_srcbusirq == irq))
+                   (mp_irqs[i].mp_irqtype == type) &&
+                   (mp_irqs[i].mp_srcbusirq == irq))
                        break;
        }
        if (i < mp_irq_entries) {
                int apic;
                for(apic = 0; apic < nr_ioapics; apic++) {
-                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
                                return apic;
                }
        }
@@ -516,28 +518,28 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 
        apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
                bus, slot, pin);
-       if (mp_bus_id_to_pci_bus[bus] == -1) {
+       if (test_bit(bus, mp_bus_not_pci)) {
                apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
                return -1;
        }
        for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].mpc_srcbus;
+               int lbus = mp_irqs[i].mp_srcbus;
 
                for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
-                           mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
+                       if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+                           mp_irqs[i].mp_dstapic == MP_APIC_ALL)
                                break;
 
                if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].mpc_irqtype &&
+                   !mp_irqs[i].mp_irqtype &&
                    (bus == lbus) &&
-                   (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
+                   (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
 
                        if (!(apic || IO_APIC_IRQ(irq)))
                                continue;
 
-                       if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
+                       if (pin == (mp_irqs[i].mp_srcbusirq & 3))
                                return irq;
                        /*
                         * Use the first all-but-pin matching entry as a
@@ -565,13 +567,13 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 
 static int MPBIOS_polarity(int idx)
 {
-       int bus = mp_irqs[idx].mpc_srcbus;
+       int bus = mp_irqs[idx].mp_srcbus;
        int polarity;
 
        /*
         * Determine IRQ line polarity (high active or low active):
         */
-       switch (mp_irqs[idx].mpc_irqflag & 3)
+       switch (mp_irqs[idx].mp_irqflag & 3)
        {
                case 0: /* conforms, ie. bus-type dependent polarity */
                        if (test_bit(bus, mp_bus_not_pci))
@@ -607,13 +609,13 @@ static int MPBIOS_polarity(int idx)
 
 static int MPBIOS_trigger(int idx)
 {
-       int bus = mp_irqs[idx].mpc_srcbus;
+       int bus = mp_irqs[idx].mp_srcbus;
        int trigger;
 
        /*
         * Determine IRQ trigger mode (edge or level sensitive):
         */
-       switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
+       switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
        {
                case 0: /* conforms, ie. bus-type dependent */
                        if (test_bit(bus, mp_bus_not_pci))
@@ -660,16 +662,16 @@ static inline int irq_trigger(int idx)
 static int pin_2_irq(int idx, int apic, int pin)
 {
        int irq, i;
-       int bus = mp_irqs[idx].mpc_srcbus;
+       int bus = mp_irqs[idx].mp_srcbus;
 
        /*
         * Debugging check, we are in big trouble if this message pops up!
         */
-       if (mp_irqs[idx].mpc_dstirq != pin)
+       if (mp_irqs[idx].mp_dstirq != pin)
                printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
 
        if (test_bit(bus, mp_bus_not_pci)) {
-               irq = mp_irqs[idx].mpc_srcbusirq;
+               irq = mp_irqs[idx].mp_srcbusirq;
        } else {
                /*
                 * PCI IRQs are mapped in order
@@ -846,7 +848,7 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
        apic_printk(APIC_VERBOSE,KERN_DEBUG
                    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
                    "IRQ %d Mode:%i Active:%i)\n",
-                   apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
+                   apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
                    irq, trigger, polarity);
 
        /*
@@ -887,10 +889,10 @@ static void __init setup_IO_APIC_irqs(void)
                idx = find_irq_entry(apic,pin,mp_INT);
                if (idx == -1) {
                        if (first_notcon) {
-                               apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
                                first_notcon = 0;
                        } else
-                               apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
+                               apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
                        continue;
                }
                if (!first_notcon) {
@@ -965,7 +967,7 @@ void __apicdebuginit print_IO_APIC(void)
        printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
        for (i = 0; i < nr_ioapics; i++)
                printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
-                      mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
+                      mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
 
        /*
         * We are a bit conservative about what we expect.  We have to
@@ -983,7 +985,7 @@ void __apicdebuginit print_IO_APIC(void)
        spin_unlock_irqrestore(&ioapic_lock, flags);
 
        printk("\n");
-       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
+       printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
        printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
        printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
 
@@ -1841,8 +1843,8 @@ static int ioapic_resume(struct sys_device *dev)
 
        spin_lock_irqsave(&ioapic_lock, flags);
        reg_00.raw = io_apic_read(dev->id, 0);
-       if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
-               reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
+       if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+               reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
                io_apic_write(dev->id, 0, reg_00.raw);
        }
        spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2242,8 +2244,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
                return -1;
 
        for (i = 0; i < mp_irq_entries; i++)
-               if (mp_irqs[i].mpc_irqtype == mp_INT &&
-                   mp_irqs[i].mpc_srcbusirq == bus_irq)
+               if (mp_irqs[i].mp_irqtype == mp_INT &&
+                   mp_irqs[i].mp_srcbusirq == bus_irq)
                        break;
        if (i >= mp_irq_entries)
                return -1;
@@ -2336,7 +2338,7 @@ void __init ioapic_init_mappings(void)
        ioapic_res = ioapic_setup_resources();
        for (i = 0; i < nr_ioapics; i++) {
                if (smp_found_config) {
-                       ioapic_phys = mp_ioapics[i].mpc_apicaddr;
+                       ioapic_phys = mp_ioapics[i].mp_apicaddr;
                } else {
                        ioapic_phys = (unsigned long)
                                alloc_bootmem_pages(PAGE_SIZE);
index 404683b94e79592c69e04930ed385056f8f5861c..1cc7a4b8643fa8c3bc72579b7207024680a6c2aa 100644 (file)
@@ -25,6 +25,8 @@
 #include <asm/proto.h>
 #include <asm/acpi.h>
 #include <asm/bios_ebda.h>
+#include <asm/e820.h>
+#include <asm/trampoline.h>
 
 #include <mach_apic.h>
 #ifdef CONFIG_X86_32
 #include <mach_mpparse.h>
 #endif
 
-/* Have we found an MP table */
-int smp_found_config;
-
-/*
- * Various Linux-internal data structures created from the
- * MP-table.
- */
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-int mp_bus_id_to_type[MAX_MP_BUSSES];
-#endif
-
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
-int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
-
-static int mp_current_pci_id;
-
-int pic_mode;
-
-/*
- * Intel MP BIOS table parsing routines:
- */
-
 /*
  * Checksum an MP configuration block.
  */
@@ -69,15 +49,73 @@ static int __init mpf_checksum(unsigned char *mp, int len)
 }
 
 #ifdef CONFIG_X86_NUMAQ
+int found_numaq;
 /*
  * Have to match translation table entries to main table entries by counter
  * hence the mpc_record variable .... can't see a less disgusting way of
  * doing this ....
  */
+struct mpc_config_translation {
+       unsigned char mpc_type;
+       unsigned char trans_len;
+       unsigned char trans_type;
+       unsigned char trans_quad;
+       unsigned char trans_global;
+       unsigned char trans_local;
+       unsigned short trans_reserved;
+};
+
 
 static int mpc_record;
 static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
     __cpuinitdata;
+
+static inline int generate_logical_apicid(int quad, int phys_apicid)
+{
+       return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
+}
+
+
+static inline int mpc_apic_id(struct mpc_config_processor *m,
+                       struct mpc_config_translation *translation_record)
+{
+       int quad = translation_record->trans_quad;
+       int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
+
+       printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+              m->mpc_apicid,
+              (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
+              (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
+              m->mpc_apicver, quad, logical_apicid);
+       return logical_apicid;
+}
+
+int mp_bus_id_to_node[MAX_MP_BUSSES];
+
+int mp_bus_id_to_local[MAX_MP_BUSSES];
+
+static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
+       struct mpc_config_translation *translation)
+{
+       int quad = translation->trans_quad;
+       int local = translation->trans_local;
+
+       mp_bus_id_to_node[m->mpc_busid] = quad;
+       mp_bus_id_to_local[m->mpc_busid] = local;
+       printk(KERN_INFO "Bus #%d is %s (node %d)\n",
+              m->mpc_busid, name, quad);
+}
+
+int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+static void mpc_oem_pci_bus(struct mpc_config_bus *m,
+       struct mpc_config_translation *translation)
+{
+       int quad = translation->trans_quad;
+       int local = translation->trans_local;
+
+       quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
+}
+
 #endif
 
 static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
@@ -90,7 +128,10 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
                return;
        }
 #ifdef CONFIG_X86_NUMAQ
-       apicid = mpc_apic_id(m, translation_table[mpc_record]);
+       if (found_numaq)
+               apicid = mpc_apic_id(m, translation_table[mpc_record]);
+       else
+               apicid = m->mpc_apicid;
 #else
        apicid = m->mpc_apicid;
 #endif
@@ -103,17 +144,18 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
        generic_processor_info(apicid, m->mpc_apicver);
 }
 
+#ifdef CONFIG_X86_IO_APIC
 static void __init MP_bus_info(struct mpc_config_bus *m)
 {
        char str[7];
-
        memcpy(str, m->mpc_bustype, 6);
        str[6] = 0;
 
 #ifdef CONFIG_X86_NUMAQ
-       mpc_oem_bus_info(m, str, translation_table[mpc_record]);
+       if (found_numaq)
+               mpc_oem_bus_info(m, str, translation_table[mpc_record]);
 #else
-       Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
+       printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str);
 #endif
 
 #if MAX_MP_BUSSES < 256
@@ -132,11 +174,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
 #endif
        } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
 #ifdef CONFIG_X86_NUMAQ
-               mpc_oem_pci_bus(m, translation_table[mpc_record]);
+               if (found_numaq)
+                       mpc_oem_pci_bus(m, translation_table[mpc_record]);
 #endif
                clear_bit(m->mpc_busid, mp_bus_not_pci);
-               mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
-               mp_current_pci_id++;
 #if defined(CONFIG_EISA) || defined (CONFIG_MCA)
                mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
        } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
@@ -147,6 +188,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
        } else
                printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
 }
+#endif
 
 #ifdef CONFIG_X86_IO_APIC
 
@@ -176,18 +218,89 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
        if (bad_ioapic(m->mpc_apicaddr))
                return;
 
-       mp_ioapics[nr_ioapics] = *m;
+       mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr;
+       mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid;
+       mp_ioapics[nr_ioapics].mp_type = m->mpc_type;
+       mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver;
+       mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags;
        nr_ioapics++;
 }
 
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
 {
-       mp_irqs[mp_irq_entries] = *m;
-       Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
+       printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
                " IRQ %02x, APIC ID %x, APIC INT %02x\n",
                m->mpc_irqtype, m->mpc_irqflag & 3,
                (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
                m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+}
+
+static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
+{
+       printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
+               " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+               mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
+               (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
+               mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
+}
+
+static void assign_to_mp_irq(struct mpc_config_intsrc *m,
+                                   struct mp_config_intsrc *mp_irq)
+{
+       mp_irq->mp_dstapic = m->mpc_dstapic;
+       mp_irq->mp_type = m->mpc_type;
+       mp_irq->mp_irqtype = m->mpc_irqtype;
+       mp_irq->mp_irqflag = m->mpc_irqflag;
+       mp_irq->mp_srcbus = m->mpc_srcbus;
+       mp_irq->mp_srcbusirq = m->mpc_srcbusirq;
+       mp_irq->mp_dstirq = m->mpc_dstirq;
+}
+
+static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
+                                       struct mpc_config_intsrc *m)
+{
+       m->mpc_dstapic = mp_irq->mp_dstapic;
+       m->mpc_type = mp_irq->mp_type;
+       m->mpc_irqtype = mp_irq->mp_irqtype;
+       m->mpc_irqflag = mp_irq->mp_irqflag;
+       m->mpc_srcbus = mp_irq->mp_srcbus;
+       m->mpc_srcbusirq = mp_irq->mp_srcbusirq;
+       m->mpc_dstirq = mp_irq->mp_dstirq;
+}
+
+static int mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
+                                       struct mpc_config_intsrc *m)
+{
+       if (mp_irq->mp_dstapic != m->mpc_dstapic)
+               return 1;
+       if (mp_irq->mp_type != m->mpc_type)
+               return 2;
+       if (mp_irq->mp_irqtype != m->mpc_irqtype)
+               return 3;
+       if (mp_irq->mp_irqflag != m->mpc_irqflag)
+               return 4;
+       if (mp_irq->mp_srcbus != m->mpc_srcbus)
+               return 5;
+       if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq)
+               return 6;
+       if (mp_irq->mp_dstirq != m->mpc_dstirq)
+               return 7;
+
+       return 0;
+}
+
+void MP_intsrc_info(struct mpc_config_intsrc *m)
+{
+       int i;
+
+       print_MP_intsrc_info(m);
+
+       for (i = 0; i < mp_irq_entries; i++) {
+               if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
+                       return;
+       }
+
+       assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
        if (++mp_irq_entries == MAX_IRQ_SOURCES)
                panic("Max # of irq sources exceeded!!\n");
 }
@@ -196,7 +309,7 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
 
 static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
 {
-       Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
+       printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x,"
                " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
                m->mpc_irqtype, m->mpc_irqflag & 3,
                (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
@@ -266,11 +379,14 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
        }
 }
 
-static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
+void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
                                 char *productid)
 {
        if (strncmp(oem, "IBM NUMA", 8))
-               printk("Warning!  May not be a NUMA-Q system!\n");
+               printk("Warning!  Not a NUMA-Q system!\n");
+       else
+               found_numaq = 1;
+
        if (mpc->mpc_oemptr)
                smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
                                 mpc->mpc_oemsize);
@@ -281,12 +397,9 @@ static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
  * Read/parse the MPC
  */
 
-static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
+static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem,
+                               char *str)
 {
-       char str[16];
-       char oem[10];
-       int count = sizeof(*mpc);
-       unsigned char *mpt = ((unsigned char *)mpc) + count;
 
        if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
                printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
@@ -309,19 +422,42 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
        }
        memcpy(oem, mpc->mpc_oem, 8);
        oem[8] = 0;
-       printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem);
+       printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
 
        memcpy(str, mpc->mpc_productid, 12);
        str[12] = 0;
-       printk("Product ID: %s ", str);
 
-#ifdef CONFIG_X86_32
-       mps_oem_check(mpc, oem, str);
-#endif
-       printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
+       printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
 
        printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
 
+       return 1;
+}
+
+static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
+{
+       char str[16];
+       char oem[10];
+
+       int count = sizeof(*mpc);
+       unsigned char *mpt = ((unsigned char *)mpc) + count;
+
+       if (!smp_check_mpc(mpc, oem, str))
+               return 0;
+
+#ifdef CONFIG_X86_32
+       /*
+        * need to make sure summit and es7000's mps_oem_check is safe to be
+        * called early via genericarch 's mps_oem_check
+        */
+       if (early) {
+#ifdef CONFIG_X86_NUMAQ
+               numaq_mps_oem_check(mpc, oem, str);
+#endif
+       } else
+               mps_oem_check(mpc, oem, str);
+#endif
+
        /* save the local APIC address, it might be non-default */
        if (!acpi_lapic)
                mp_lapic_addr = mpc->mpc_lapic;
@@ -352,7 +488,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
                        {
                                struct mpc_config_bus *m =
                                    (struct mpc_config_bus *)mpt;
+#ifdef CONFIG_X86_IO_APIC
                                MP_bus_info(m);
+#endif
                                mpt += sizeof(*m);
                                count += sizeof(*m);
                                break;
@@ -402,6 +540,11 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
                ++mpc_record;
 #endif
        }
+
+#ifdef CONFIG_X86_GENERICARCH
+       generic_bigsmp_probe();
+#endif
+
        setup_apic_routing();
        if (!num_processors)
                printk(KERN_ERR "MPTABLE: no processors registered!\n");
@@ -427,7 +570,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
        intsrc.mpc_type = MP_INTSRC;
        intsrc.mpc_irqflag = 0; /* conforming */
        intsrc.mpc_srcbus = 0;
-       intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
+       intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid;
 
        intsrc.mpc_irqtype = mp_INT;
 
@@ -488,40 +631,11 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
        MP_intsrc_info(&intsrc);
 }
 
-#endif
 
-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+static void construct_ioapic_table(int mpc_default_type)
 {
-       struct mpc_config_processor processor;
-       struct mpc_config_bus bus;
-#ifdef CONFIG_X86_IO_APIC
        struct mpc_config_ioapic ioapic;
-#endif
-       struct mpc_config_lintsrc lintsrc;
-       int linttypes[2] = { mp_ExtINT, mp_NMI };
-       int i;
-
-       /*
-        * local APIC has default address
-        */
-       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
-       /*
-        * 2 CPUs, numbered 0 & 1.
-        */
-       processor.mpc_type = MP_PROCESSOR;
-       /* Either an integrated APIC or a discrete 82489DX. */
-       processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
-       processor.mpc_cpuflag = CPU_ENABLED;
-       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
-           (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
-       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
-       processor.mpc_reserved[0] = 0;
-       processor.mpc_reserved[1] = 0;
-       for (i = 0; i < 2; i++) {
-               processor.mpc_apicid = i;
-               MP_processor_info(&processor);
-       }
+       struct mpc_config_bus bus;
 
        bus.mpc_type = MP_BUS;
        bus.mpc_busid = 0;
@@ -550,7 +664,6 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
                MP_bus_info(&bus);
        }
 
-#ifdef CONFIG_X86_IO_APIC
        ioapic.mpc_type = MP_IOAPIC;
        ioapic.mpc_apicid = 2;
        ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
@@ -562,7 +675,42 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
         * We set up most of the low 16 IO-APIC pins according to MPS rules.
         */
        construct_default_ioirq_mptable(mpc_default_type);
+}
+#else
+static inline void construct_ioapic_table(int mpc_default_type) { }
 #endif
+
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
+{
+       struct mpc_config_processor processor;
+       struct mpc_config_lintsrc lintsrc;
+       int linttypes[2] = { mp_ExtINT, mp_NMI };
+       int i;
+
+       /*
+        * local APIC has default address
+        */
+       mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+       /*
+        * 2 CPUs, numbered 0 & 1.
+        */
+       processor.mpc_type = MP_PROCESSOR;
+       /* Either an integrated APIC or a discrete 82489DX. */
+       processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+       processor.mpc_cpuflag = CPU_ENABLED;
+       processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
+           (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
+       processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
+       processor.mpc_reserved[0] = 0;
+       processor.mpc_reserved[1] = 0;
+       for (i = 0; i < 2; i++) {
+               processor.mpc_apicid = i;
+               MP_processor_info(&processor);
+       }
+
+       construct_ioapic_table(mpc_default_type);
+
        lintsrc.mpc_type = MP_LINTSRC;
        lintsrc.mpc_irqflag = 0;        /* conforming */
        lintsrc.mpc_srcbusid = 0;
@@ -600,7 +748,7 @@ static void __init __get_smp_config(unsigned early)
 
        printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
               mpf->mpf_specification);
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
        if (mpf->mpf_feature2 & (1 << 7)) {
                printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
                pic_mode = 1;
@@ -632,7 +780,9 @@ static void __init __get_smp_config(unsigned early)
                 * override the defaults.
                 */
                if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) {
+#ifdef CONFIG_X86_LOCAL_APIC
                        smp_found_config = 0;
+#endif
                        printk(KERN_ERR
                               "BIOS bug, MP table errors detected!...\n");
                        printk(KERN_ERR "... disabling SMP support. "
@@ -689,7 +839,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
        unsigned int *bp = phys_to_virt(base);
        struct intel_mp_floating *mpf;
 
-       Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
+       printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length);
        BUILD_BUG_ON(sizeof(*mpf) != 16);
 
        while (length > 0) {
@@ -699,8 +849,9 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
                    !mpf_checksum((unsigned char *)bp, 16) &&
                    ((mpf->mpf_specification == 1)
                     || (mpf->mpf_specification == 4))) {
-
+#ifdef CONFIG_X86_LOCAL_APIC
                        smp_found_config = 1;
+#endif
                        mpf_found = mpf;
 #ifdef CONFIG_X86_32
                        printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
@@ -790,298 +941,294 @@ void __init find_smp_config(void)
        __find_smp_config(1);
 }
 
-/* --------------------------------------------------------------------------
-                            ACPI-based MP Configuration
-   -------------------------------------------------------------------------- */
+#ifdef CONFIG_X86_IO_APIC
+static u8 __initdata irq_used[MAX_IRQ_SOURCES];
 
-/*
- * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
- */
-int es7000_plat;
+static int  __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
+{
+       int i;
 
-#ifdef CONFIG_ACPI
+       if (m->mpc_irqtype != mp_INT)
+               return 0;
 
-#ifdef CONFIG_X86_IO_APIC
+       if (m->mpc_irqflag != 0x0f)
+               return 0;
 
-#define MP_ISA_BUS             0
+       /* not legacy */
 
-extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
+       for (i = 0; i < mp_irq_entries; i++) {
+               if (mp_irqs[i].mp_irqtype != mp_INT)
+                       continue;
 
-static int mp_find_ioapic(int gsi)
-{
-       int i = 0;
+               if (mp_irqs[i].mp_irqflag != 0x0f)
+                       continue;
 
-       /* Find the IOAPIC that manages this GSI. */
-       for (i = 0; i < nr_ioapics; i++) {
-               if ((gsi >= mp_ioapic_routing[i].gsi_base)
-                   && (gsi <= mp_ioapic_routing[i].gsi_end))
-                       return i;
+               if (mp_irqs[i].mp_srcbus != m->mpc_srcbus)
+                       continue;
+               if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq)
+                       continue;
+               if (irq_used[i]) {
+                       /* already claimed */
+                       return -2;
+               }
+               irq_used[i] = 1;
+               return i;
        }
 
-       printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+       /* not found */
        return -1;
 }
 
-static u8 __init uniq_ioapic_id(u8 id)
-{
-#ifdef CONFIG_X86_32
-       if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-           !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-               return io_apic_get_unique_id(nr_ioapics, id);
-       else
-               return id;
-#else
-       int i;
-       DECLARE_BITMAP(used, 256);
-       bitmap_zero(used, 256);
-       for (i = 0; i < nr_ioapics; i++) {
-               struct mpc_config_ioapic *ia = &mp_ioapics[i];
-               __set_bit(ia->mpc_apicid, used);
-       }
-       if (!test_bit(id, used))
-               return id;
-       return find_first_zero_bit(used, 256);
+#define SPARE_SLOT_NUM 20
+
+static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
 #endif
-}
 
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+static int  __init replace_intsrc_all(struct mp_config_table *mpc,
+                                       unsigned long mpc_new_phys,
+                                       unsigned long mpc_new_length)
 {
-       int idx = 0;
-
-       if (bad_ioapic(address))
-               return;
+#ifdef CONFIG_X86_IO_APIC
+       int i;
+       int nr_m_spare = 0;
+#endif
 
-       idx = nr_ioapics;
+       int count = sizeof(*mpc);
+       unsigned char *mpt = ((unsigned char *)mpc) + count;
 
-       mp_ioapics[idx].mpc_type = MP_IOAPIC;
-       mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
-       mp_ioapics[idx].mpc_apicaddr = address;
+       printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length);
+       while (count < mpc->mpc_length) {
+               switch (*mpt) {
+               case MP_PROCESSOR:
+                       {
+                               struct mpc_config_processor *m =
+                                   (struct mpc_config_processor *)mpt;
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+               case MP_BUS:
+                       {
+                               struct mpc_config_bus *m =
+                                   (struct mpc_config_bus *)mpt;
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+               case MP_IOAPIC:
+                       {
+                               mpt += sizeof(struct mpc_config_ioapic);
+                               count += sizeof(struct mpc_config_ioapic);
+                               break;
+                       }
+               case MP_INTSRC:
+                       {
+#ifdef CONFIG_X86_IO_APIC
+                               struct mpc_config_intsrc *m =
+                                   (struct mpc_config_intsrc *)mpt;
 
-       set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-       mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
-#ifdef CONFIG_X86_32
-       mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
-#else
-       mp_ioapics[idx].mpc_apicver = 0;
+                               printk(KERN_INFO "OLD ");
+                               print_MP_intsrc_info(m);
+                               i = get_MP_intsrc_index(m);
+                               if (i > 0) {
+                                       assign_to_mpc_intsrc(&mp_irqs[i], m);
+                                       printk(KERN_INFO "NEW ");
+                                       print_mp_irq_info(&mp_irqs[i]);
+                               } else if (!i) {
+                                       /* legacy, do nothing */
+                               } else if (nr_m_spare < SPARE_SLOT_NUM) {
+                                       /*
+                                        * not found (-1), or duplicated (-2)
+                                        * are invalid entries,
+                                        * we need to use the slot  later
+                                        */
+                                       m_spare[nr_m_spare] = m;
+                                       nr_m_spare++;
+                               }
 #endif
-       /*
-        * Build basic GSI lookup table to facilitate gsi->io_apic lookups
-        * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
-        */
-       mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
-       mp_ioapic_routing[idx].gsi_base = gsi_base;
-       mp_ioapic_routing[idx].gsi_end = gsi_base +
-           io_apic_get_redir_entries(idx);
-
-       printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
-              "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
-              mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
-              mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
-
-       nr_ioapics++;
-}
+                               mpt += sizeof(struct mpc_config_intsrc);
+                               count += sizeof(struct mpc_config_intsrc);
+                               break;
+                       }
+               case MP_LINTSRC:
+                       {
+                               struct mpc_config_lintsrc *m =
+                                   (struct mpc_config_lintsrc *)mpt;
+                               mpt += sizeof(*m);
+                               count += sizeof(*m);
+                               break;
+                       }
+               default:
+                       /* wrong mptable */
+                       printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
+                       printk(KERN_ERR "type %x\n", *mpt);
+                       print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
+                                       1, mpc, mpc->mpc_length, 1);
+                       goto out;
+               }
+       }
 
-void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
-{
-       struct mpc_config_intsrc intsrc;
-       int ioapic = -1;
-       int pin = -1;
+#ifdef CONFIG_X86_IO_APIC
+       for (i = 0; i < mp_irq_entries; i++) {
+               if (irq_used[i])
+                       continue;
 
-       /*
-        * Convert 'gsi' to 'ioapic.pin'.
-        */
-       ioapic = mp_find_ioapic(gsi);
-       if (ioapic < 0)
-               return;
-       pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+               if (mp_irqs[i].mp_irqtype != mp_INT)
+                       continue;
 
-       /*
-        * TBD: This check is for faulty timer entries, where the override
-        *      erroneously sets the trigger to level, resulting in a HUGE
-        *      increase of timer interrupts!
-        */
-       if ((bus_irq == 0) && (trigger == 3))
-               trigger = 1;
+               if (mp_irqs[i].mp_irqflag != 0x0f)
+                       continue;
 
-       intsrc.mpc_type = MP_INTSRC;
-       intsrc.mpc_irqtype = mp_INT;
-       intsrc.mpc_irqflag = (trigger << 2) | polarity;
-       intsrc.mpc_srcbus = MP_ISA_BUS;
-       intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
-       intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;     /* APIC ID */
-       intsrc.mpc_dstirq = pin;        /* INTIN# */
+               if (nr_m_spare > 0) {
+                       printk(KERN_INFO "*NEW* found ");
+                       nr_m_spare--;
+                       assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
+                       m_spare[nr_m_spare] = NULL;
+               } else {
+                       struct mpc_config_intsrc *m =
+                           (struct mpc_config_intsrc *)mpt;
+                       count += sizeof(struct mpc_config_intsrc);
+                       if (!mpc_new_phys) {
+                               printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
+                       } else {
+                               if (count <= mpc_new_length)
+                                       printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
+                               else {
+                                       printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
+                                       goto out;
+                               }
+                       }
+                       assign_to_mpc_intsrc(&mp_irqs[i], m);
+                       mpc->mpc_length = count;
+                       mpt += sizeof(struct mpc_config_intsrc);
+               }
+               print_mp_irq_info(&mp_irqs[i]);
+       }
+#endif
+out:
+       /* update checksum */
+       mpc->mpc_checksum = 0;
+       mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc,
+                                          mpc->mpc_length);
 
-       MP_intsrc_info(&intsrc);
+       return 0;
 }
 
-void __init mp_config_acpi_legacy_irqs(void)
-{
-       struct mpc_config_intsrc intsrc;
-       int i = 0;
-       int ioapic = -1;
+int __initdata enable_update_mptable;
 
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
-       /*
-        * Fabricate the legacy ISA bus (bus #31).
-        */
-       mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
-#endif
-       set_bit(MP_ISA_BUS, mp_bus_not_pci);
-       Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
+static int __init update_mptable_setup(char *str)
+{
+       enable_update_mptable = 1;
+       return 0;
+}
+early_param("update_mptable", update_mptable_setup);
 
-       /*
-        * Older generations of ES7000 have no legacy identity mappings
-        */
-       if (es7000_plat == 1)
-               return;
+static unsigned long __initdata mpc_new_phys;
+static unsigned long mpc_new_length __initdata = 4096;
 
-       /*
-        * Locate the IOAPIC that manages the ISA IRQs (0-15).
-        */
-       ioapic = mp_find_ioapic(0);
-       if (ioapic < 0)
-               return;
+/* alloc_mptable or alloc_mptable=4k */
+static int __initdata alloc_mptable;
+static int __init parse_alloc_mptable_opt(char *p)
+{
+       enable_update_mptable = 1;
+       alloc_mptable = 1;
+       if (!p)
+               return 0;
+       mpc_new_length = memparse(p, &p);
+       return 0;
+}
+early_param("alloc_mptable", parse_alloc_mptable_opt);
 
-       intsrc.mpc_type = MP_INTSRC;
-       intsrc.mpc_irqflag = 0; /* Conforming */
-       intsrc.mpc_srcbus = MP_ISA_BUS;
-#ifdef CONFIG_X86_IO_APIC
-       intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
+void __init early_reserve_e820_mpc_new(void)
+{
+       if (enable_update_mptable && alloc_mptable) {
+               u64 startt = 0;
+#ifdef CONFIG_X86_TRAMPOLINE
+               startt = TRAMPOLINE_BASE;
 #endif
-       /*
-        * Use the default configuration for the IRQs 0-15.  Unless
-        * overridden by (MADT) interrupt source override entries.
-        */
-       for (i = 0; i < 16; i++) {
-               int idx;
-
-               for (idx = 0; idx < mp_irq_entries; idx++) {
-                       struct mpc_config_intsrc *irq = mp_irqs + idx;
-
-                       /* Do we already have a mapping for this ISA IRQ? */
-                       if (irq->mpc_srcbus == MP_ISA_BUS
-                           && irq->mpc_srcbusirq == i)
-                               break;
-
-                       /* Do we already have a mapping for this IOAPIC pin */
-                       if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
-                           (irq->mpc_dstirq == i))
-                               break;
-               }
-
-               if (idx != mp_irq_entries) {
-                       printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
-                       continue;       /* IRQ already used */
-               }
-
-               intsrc.mpc_irqtype = mp_INT;
-               intsrc.mpc_srcbusirq = i;       /* Identity mapped */
-               intsrc.mpc_dstirq = i;
-
-               MP_intsrc_info(&intsrc);
+               mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
        }
 }
 
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+static int __init update_mp_table(void)
 {
-       int ioapic;
-       int ioapic_pin;
-#ifdef CONFIG_X86_32
-#define MAX_GSI_NUM    4096
-#define IRQ_COMPRESSION_START  64
+       char str[16];
+       char oem[10];
+       struct intel_mp_floating *mpf;
+       struct mp_config_table *mpc;
+       struct mp_config_table *mpc_new;
+
+       if (!enable_update_mptable)
+               return 0;
+
+       mpf = mpf_found;
+       if (!mpf)
+               return 0;
 
-       static int pci_irq = IRQ_COMPRESSION_START;
        /*
-        * Mapping between Global System Interrupts, which
-        * represent all possible interrupts, and IRQs
-        * assigned to actual devices.
+        * Now see if we need to go further.
         */
-       static int gsi_to_irq[MAX_GSI_NUM];
-#else
-
-       if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
-               return gsi;
-#endif
+       if (mpf->mpf_feature1 != 0)
+               return 0;
 
-       /* Don't set up the ACPI SCI because it's already set up */
-       if (acpi_gbl_FADT.sci_interrupt == gsi)
-               return gsi;
+       if (!mpf->mpf_physptr)
+               return 0;
 
-       ioapic = mp_find_ioapic(gsi);
-       if (ioapic < 0) {
-               printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
-               return gsi;
-       }
+       mpc = phys_to_virt(mpf->mpf_physptr);
 
-       ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
+       if (!smp_check_mpc(mpc, oem, str))
+               return 0;
 
-#ifdef CONFIG_X86_32
-       if (ioapic_renumber_irq)
-               gsi = ioapic_renumber_irq(ioapic, gsi);
-#endif
+       printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
+       printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
 
-       /*
-        * Avoid pin reprogramming.  PRTs typically include entries
-        * with redundant pin->gsi mappings (but unique PCI devices);
-        * we only program the IOAPIC on the first.
-        */
-       if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
-               printk(KERN_ERR "Invalid reference to IOAPIC pin "
-                      "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
-                      ioapic_pin);
-               return gsi;
+       if (mpc_new_phys && mpc->mpc_length > mpc_new_length) {
+               mpc_new_phys = 0;
+               printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
+                        mpc_new_length);
        }
-       if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
-               Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
-                       mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-#ifdef CONFIG_X86_32
-               return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
-#else
-               return gsi;
-#endif
+
+       if (!mpc_new_phys) {
+               unsigned char old, new;
+               /* check if we can change the postion */
+               mpc->mpc_checksum = 0;
+               old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
+               mpc->mpc_checksum = 0xff;
+               new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
+               if (old == new) {
+                       printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
+                       return 0;
+               }
+               printk(KERN_INFO "use in-positon replacing\n");
+       } else {
+               mpf->mpf_physptr = mpc_new_phys;
+               mpc_new = phys_to_virt(mpc_new_phys);
+               memcpy(mpc_new, mpc, mpc->mpc_length);
+               mpc = mpc_new;
+               /* check if we can modify that */
+               if (mpc_new_phys - mpf->mpf_physptr) {
+                       struct intel_mp_floating *mpf_new;
+                       /* steal 16 bytes from [0, 1k) */
+                       printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
+                       mpf_new = phys_to_virt(0x400 - 16);
+                       memcpy(mpf_new, mpf, 16);
+                       mpf = mpf_new;
+                       mpf->mpf_physptr = mpc_new_phys;
+               }
+               mpf->mpf_checksum = 0;
+               mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
+               printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
        }
 
-       set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
-#ifdef CONFIG_X86_32
        /*
-        * For GSI >= 64, use IRQ compression
+        * only replace the one with mp_INT and
+        *       MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW,
+        * already in mp_irqs , stored by ... and mp_config_acpi_gsi,
+        * may need pci=routeirq for all coverage
         */
-       if ((gsi >= IRQ_COMPRESSION_START)
-           && (triggering == ACPI_LEVEL_SENSITIVE)) {
-               /*
-                * For PCI devices assign IRQs in order, avoiding gaps
-                * due to unused I/O APIC pins.
-                */
-               int irq = gsi;
-               if (gsi < MAX_GSI_NUM) {
-                       /*
-                        * Retain the VIA chipset work-around (gsi > 15), but
-                        * avoid a problem where the 8254 timer (IRQ0) is setup
-                        * via an override (so it's not on pin 0 of the ioapic),
-                        * and at the same time, the pin 0 interrupt is a PCI
-                        * type.  The gsi > 15 test could cause these two pins
-                        * to be shared as IRQ0, and they are not shareable.
-                        * So test for this condition, and if necessary, avoid
-                        * the pin collision.
-                        */
-                       gsi = pci_irq++;
-                       /*
-                        * Don't assign IRQ used by ACPI SCI
-                        */
-                       if (gsi == acpi_gbl_FADT.sci_interrupt)
-                               gsi = pci_irq++;
-                       gsi_to_irq[irq] = gsi;
-               } else {
-                       printk(KERN_ERR "GSI %u is too high\n", gsi);
-                       return gsi;
-               }
-       }
-#endif
-       io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
-                               triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
-                               polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
-       return gsi;
+       replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length);
+
+       return 0;
 }
 
-#endif /* CONFIG_X86_IO_APIC */
-#endif /* CONFIG_ACPI */
+late_initcall(update_mp_table);
index e65281b1634b790bd151c479fc96f3846febe87d..f0f1de1c4a1de4465cf53aae7e50c9bbcb7af0c1 100644 (file)
@@ -31,6 +31,8 @@
 #include <asm/numaq.h>
 #include <asm/topology.h>
 #include <asm/processor.h>
+#include <asm/mpspec.h>
+#include <asm/e820.h>
 
 #define        MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
 
@@ -58,6 +60,8 @@ static void __init smp_dump_qct(void)
                        node_end_pfn[node] = MB_TO_PAGES(
                                eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
 
+                       e820_register_active_regions(node, node_start_pfn[node],
+                                                       node_end_pfn[node]);
                        memory_present(node,
                                node_start_pfn[node], node_end_pfn[node]);
                        node_remap_size[node] = node_memmap_size_bytes(node,
@@ -67,13 +71,24 @@ static void __init smp_dump_qct(void)
        }
 }
 
-/*
- * Unlike Summit, we don't really care to let the NUMA-Q
- * fall back to flat mode.  Don't compile for NUMA-Q
- * unless you really need it!
- */
+static __init void early_check_numaq(void)
+{
+       /*
+        * Find possible boot-time SMP configuration:
+        */
+       early_find_smp_config();
+       /*
+        * get boot-time SMP configuration:
+        */
+       if (smp_found_config)
+               early_get_smp_config();
+}
+
 int __init get_memcfg_numaq(void)
 {
+       early_check_numaq();
+       if (!found_numaq)
+               return 0;
        smp_dump_qct();
        return 1;
 }
index 6f80b852a1961a6b496bc2404c5194769557d97a..45a5e247d45093af8db7305623e24d9f4a197ba5 100644 (file)
@@ -17,6 +17,7 @@ unsigned int num_processors;
 unsigned disabled_cpus __cpuinitdata;
 /* Processor that is doing the boot up */
 unsigned int boot_cpu_physical_apicid = -1U;
+unsigned int max_physical_apicid;
 EXPORT_SYMBOL(boot_cpu_physical_apicid);
 
 DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
@@ -137,3 +138,25 @@ void __init setup_per_cpu_areas(void)
 }
 
 #endif
+
+void __init parse_setup_data(void)
+{
+       struct setup_data *data;
+       u64 pa_data;
+
+       if (boot_params.hdr.version < 0x0209)
+               return;
+       pa_data = boot_params.hdr.setup_data;
+       while (pa_data) {
+               data = early_ioremap(pa_data, PAGE_SIZE);
+               switch (data->type) {
+               default:
+                       break;
+               }
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
+               free_early(pa_data, pa_data+sizeof(*data)+data->len);
+#endif
+               pa_data = data->next;
+               early_iounmap(data, PAGE_SIZE);
+       }
+}
index 5a2f8e0638875a348fcf415bef070fbece342fa5..1d4be07e15e545fdf767fd679653bacf33772a81 100644 (file)
 #include <asm/bios_ebda.h>
 #include <asm/cacheflush.h>
 #include <asm/processor.h>
+#include <asm/efi.h>
 
 /* This value is set up by the early boot code to point to the value
    immediately after the boot time page tables.  It contains a *physical*
    address, and must not be in the .bss segment! */
+unsigned long init_pg_tables_start __initdata = ~0UL;
 unsigned long init_pg_tables_end __initdata = ~0UL;
 
 /*
@@ -237,42 +239,6 @@ static inline void copy_edd(void)
 }
 #endif
 
-int __initdata user_defined_memmap;
-
-/*
- * "mem=nopentium" disables the 4MB page tables.
- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
- * to <mem>, overriding the bios size.
- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
- * <start> to <start>+<mem>, overriding the bios size.
- *
- * HPA tells me bootloaders need to parse mem=, so no new
- * option should be mem=  [also see Documentation/i386/boot.txt]
- */
-static int __init parse_mem(char *arg)
-{
-       if (!arg)
-               return -EINVAL;
-
-       if (strcmp(arg, "nopentium") == 0) {
-               setup_clear_cpu_cap(X86_FEATURE_PSE);
-       } else {
-               /* If the user specifies memory size, we
-                * limit the BIOS-provided memory map to
-                * that size. exactmap can be used to specify
-                * the exact map. mem=number can be used to
-                * trim the existing memory map.
-                */
-               unsigned long long mem_size;
-
-               mem_size = memparse(arg, &arg);
-               limit_regions(mem_size);
-               user_defined_memmap = 1;
-       }
-       return 0;
-}
-early_param("mem", parse_mem);
-
 #ifdef CONFIG_PROC_VMCORE
 /* elfcorehdr= specifies the location of elf core header
  * stored by the crashed kernel.
@@ -395,56 +361,6 @@ unsigned long __init find_max_low_pfn(void)
        return max_low_pfn;
 }
 
-#define BIOS_LOWMEM_KILOBYTES 0x413
-
-/*
- * The BIOS places the EBDA/XBDA at the top of conventional
- * memory, and usually decreases the reported amount of
- * conventional memory (int 0x12) too. This also contains a
- * workaround for Dell systems that neglect to reserve EBDA.
- * The same workaround also avoids a problem with the AMD768MPX
- * chipset: reserve a page before VGA to prevent PCI prefetch
- * into it (errata #56). Usually the page is reserved anyways,
- * unless you have no PS/2 mouse plugged in.
- */
-static void __init reserve_ebda_region(void)
-{
-       unsigned int lowmem, ebda_addr;
-
-       /* To determine the position of the EBDA and the */
-       /* end of conventional memory, we need to look at */
-       /* the BIOS data area. In a paravirtual environment */
-       /* that area is absent. We'll just have to assume */
-       /* that the paravirt case can handle memory setup */
-       /* correctly, without our help. */
-       if (paravirt_enabled())
-               return;
-
-       /* end of low (conventional) memory */
-       lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
-       lowmem <<= 10;
-
-       /* start of EBDA area */
-       ebda_addr = get_bios_ebda();
-
-       /* Fixup: bios puts an EBDA in the top 64K segment */
-       /* of conventional memory, but does not adjust lowmem. */
-       if ((lowmem - ebda_addr) <= 0x10000)
-               lowmem = ebda_addr;
-
-       /* Fixup: bios does not report an EBDA at all. */
-       /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
-       if ((ebda_addr == 0) && (lowmem >= 0x9f000))
-               lowmem = 0x9f000;
-
-       /* Paranoia: should never happen, but... */
-       if ((lowmem == 0) || (lowmem >= 0x100000))
-               lowmem = 0x9f000;
-
-       /* reserve all memory between lowmem and the 1MB mark */
-       reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
-}
-
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 static void __init setup_bootmem_allocator(void);
 static unsigned long __init setup_memory(void)
@@ -462,11 +378,13 @@ static unsigned long __init setup_memory(void)
        if (max_pfn > max_low_pfn) {
                highstart_pfn = max_low_pfn;
        }
+       memory_present(0, 0, highend_pfn);
        printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
                pages_to_mb(highend_pfn - highstart_pfn));
        num_physpages = highend_pfn;
        high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
+       memory_present(0, 0, max_low_pfn);
        num_physpages = max_low_pfn;
        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
@@ -488,11 +406,12 @@ static void __init zone_sizes_init(void)
        max_zone_pfns[ZONE_DMA] =
                virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
        max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+       remove_all_active_ranges();
 #ifdef CONFIG_HIGHMEM
        max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
-       add_active_range(0, 0, highend_pfn);
+       e820_register_active_regions(0, 0, highend_pfn);
 #else
-       add_active_range(0, 0, max_low_pfn);
+       e820_register_active_regions(0, 0, max_low_pfn);
 #endif
 
        free_area_init_nodes(max_zone_pfns);
@@ -558,44 +477,57 @@ static bool do_relocate_initrd = false;
 
 static void __init reserve_initrd(void)
 {
-       unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
-       unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
-       unsigned long ramdisk_end   = ramdisk_image + ramdisk_size;
-       unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
-       unsigned long ramdisk_here;
-
-       initrd_start = 0;
+       u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+       u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
+       u64 ramdisk_end   = ramdisk_image + ramdisk_size;
+       u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+       u64 ramdisk_here;
 
        if (!boot_params.hdr.type_of_loader ||
            !ramdisk_image || !ramdisk_size)
                return;         /* No initrd provided by bootloader */
 
-       if (ramdisk_end < ramdisk_image) {
-               printk(KERN_ERR "initrd wraps around end of memory, "
-                      "disabling initrd\n");
-               return;
-       }
+       initrd_start = 0;
+
        if (ramdisk_size >= end_of_lowmem/2) {
+               free_early(ramdisk_image, ramdisk_end);
                printk(KERN_ERR "initrd too large to handle, "
                       "disabling initrd\n");
                return;
        }
+
+       printk(KERN_INFO "old RAMDISK: %08llx - %08llx\n", ramdisk_image,
+                       ramdisk_end);
+
+
        if (ramdisk_end <= end_of_lowmem) {
                /* All in lowmem, easy case */
-               reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
+               /*
+                * don't need to reserve again, already reserved early
+                * in i386_start_kernel
+                */
                initrd_start = ramdisk_image + PAGE_OFFSET;
                initrd_end = initrd_start+ramdisk_size;
                return;
        }
 
        /* We need to move the initrd down into lowmem */
-       ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK;
+       ramdisk_here = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+                                end_of_lowmem, ramdisk_size,
+                                PAGE_SIZE);
+
+       if (ramdisk_here == -1ULL)
+               panic("Cannot find place for new RAMDISK of size %lld\n",
+                        ramdisk_size);
 
        /* Note: this includes all the lowmem currently occupied by
           the initrd, we rely on that fact to keep the data intact. */
-       reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
+       reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
+                        "NEW RAMDISK");
        initrd_start = ramdisk_here + PAGE_OFFSET;
        initrd_end   = initrd_start + ramdisk_size;
+       printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
+                        ramdisk_here, ramdisk_here + ramdisk_size);
 
        do_relocate_initrd = true;
 }
@@ -604,10 +536,10 @@ static void __init reserve_initrd(void)
 
 static void __init relocate_initrd(void)
 {
-       unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
-       unsigned long ramdisk_size  = boot_params.hdr.ramdisk_size;
-       unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
-       unsigned long ramdisk_here;
+       u64 ramdisk_image = boot_params.hdr.ramdisk_image;
+       u64 ramdisk_size  = boot_params.hdr.ramdisk_size;
+       u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+       u64 ramdisk_here;
        unsigned long slop, clen, mapaddr;
        char *p, *q;
 
@@ -624,6 +556,10 @@ static void __init relocate_initrd(void)
                p = (char *)__va(ramdisk_image);
                memcpy(q, p, clen);
                q += clen;
+               /* need to free these low pages...*/
+               printk(KERN_INFO "Freeing old partial RAMDISK %08llx-%08llx\n",
+                        ramdisk_image, ramdisk_image + clen - 1);
+               free_bootmem(ramdisk_image, clen);
                ramdisk_image += clen;
                ramdisk_size  -= clen;
        }
@@ -642,47 +578,44 @@ static void __init relocate_initrd(void)
                ramdisk_image += clen;
                ramdisk_size  -= clen;
        }
+       /* high pages is not converted by early_res_to_bootmem */
+       ramdisk_image = boot_params.hdr.ramdisk_image;
+       ramdisk_size  = boot_params.hdr.ramdisk_size;
+       printk(KERN_INFO "Copied RAMDISK from %016llx - %016llx to %08llx - %08llx\n",
+               ramdisk_image, ramdisk_image + ramdisk_size - 1,
+               ramdisk_here, ramdisk_here + ramdisk_size - 1);
 }
 
 #endif /* CONFIG_BLK_DEV_INITRD */
 
 void __init setup_bootmem_allocator(void)
 {
-       unsigned long bootmap_size;
+       int i;
+       unsigned long bootmap_size, bootmap;
        /*
         * Initialize the boot-time allocator (with low memory only):
         */
-       bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
-
-       register_bootmem_low_pages(max_low_pfn);
-
-       /*
-        * Reserve the bootmem bitmap itself as well. We do this in two
-        * steps (first step was init_bootmem()) because this catches
-        * the (very unlikely) case of us accidentally initializing the
-        * bootmem allocator with an invalid RAM area.
-        */
-       reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
-                        bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
-                        BOOTMEM_DEFAULT);
-
-       /*
-        * reserve physical page 0 - it's a special BIOS page on many boxes,
-        * enabling clean reboots, SMP operation, laptop functions.
-        */
-       reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
-
-       /* reserve EBDA region */
-       reserve_ebda_region();
-
-#ifdef CONFIG_SMP
-       /*
-        * But first pinch a few for the stack/trampoline stuff
-        * FIXME: Don't need the extra page at 4K, but need to fix
-        * trampoline before removing it. (see the GDT stuff)
-        */
-       reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
+       bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
+       bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+                                max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
+                                PAGE_SIZE);
+       if (bootmap == -1L)
+               panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+       reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
+#ifdef CONFIG_BLK_DEV_INITRD
+       reserve_initrd();
 #endif
+       bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, max_low_pfn);
+       printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
+                max_pfn_mapped<<PAGE_SHIFT);
+       printk(KERN_INFO "  low ram: %08lx - %08lx\n",
+                min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
+       printk(KERN_INFO "  bootmap %08lx - %08lx\n",
+                bootmap, bootmap + bootmap_size);
+       for_each_online_node(i)
+               free_bootmem_with_active_regions(i, max_low_pfn);
+       early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
+
 #ifdef CONFIG_ACPI_SLEEP
        /*
         * Reserve low memory region for sleep support.
@@ -695,10 +628,6 @@ void __init setup_bootmem_allocator(void)
         */
        find_smp_config();
 #endif
-#ifdef CONFIG_BLK_DEV_INITRD
-       reserve_initrd();
-#endif
-       numa_kva_reserve();
        reserve_crashkernel();
 
        reserve_ibft_region();
@@ -731,12 +660,6 @@ static void set_mca_bus(int x)
 static void set_mca_bus(int x) { }
 #endif
 
-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
-char * __init __attribute__((weak)) memory_setup(void)
-{
-       return machine_specific_memory_setup();
-}
-
 #ifdef CONFIG_NUMA
 /*
  * In the golden day, when everything among i386 and x86_64 will be
@@ -764,11 +687,14 @@ void __init setup_arch(char **cmdline_p)
        pre_setup_arch_hook();
        early_cpu_init();
        early_ioremap_init();
+       reserve_setup_data();
 
 #ifdef CONFIG_EFI
        if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
-                    "EL32", 4))
+                    "EL32", 4)) {
                efi_enabled = 1;
+               efi_reserve_early();
+       }
 #endif
 
        ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
@@ -792,8 +718,7 @@ void __init setup_arch(char **cmdline_p)
 #endif
        ARCH_SETUP
 
-       printk(KERN_INFO "BIOS-provided physical RAM map:\n");
-       print_memory_map(memory_setup());
+       setup_memory_map();
 
        copy_edd();
 
@@ -811,12 +736,11 @@ void __init setup_arch(char **cmdline_p)
        bss_resource.start = virt_to_phys(&__bss_start);
        bss_resource.end = virt_to_phys(&__bss_stop)-1;
 
+       parse_setup_data();
+
        parse_early_param();
 
-       if (user_defined_memmap) {
-               printk(KERN_INFO "user-defined physical RAM map:\n");
-               print_memory_map("user");
-       }
+       finish_e820_parsing();
 
        strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
        *cmdline_p = command_line;
@@ -824,11 +748,22 @@ void __init setup_arch(char **cmdline_p)
        if (efi_enabled)
                efi_init();
 
+       e820_register_active_regions(0, 0, -1UL);
+       /*
+        * partially used pages are not usable - thus
+        * we are rounding upwards:
+        */
+       max_pfn = e820_end_of_ram();
+
+       /* preallocate 4k for mptable mpc */
+       early_reserve_e820_mpc_new();
        /* update e820 for memory not covered by WB MTRRs */
-       propagate_e820_map();
        mtrr_bp_init();
-       if (mtrr_trim_uncached_memory(max_pfn))
-               propagate_e820_map();
+       if (mtrr_trim_uncached_memory(max_pfn)) {
+               remove_all_active_ranges();
+               e820_register_active_regions(0, 0, -1UL);
+               max_pfn = e820_end_of_ram();
+       }
 
        max_low_pfn = setup_memory();
 
@@ -855,9 +790,6 @@ void __init setup_arch(char **cmdline_p)
         * not to exceed the 8Mb limit.
         */
 
-#ifdef CONFIG_SMP
-       smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
-#endif
        paging_init();
 
        /*
@@ -914,21 +846,20 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_ACPI
        acpi_boot_init();
-
+#endif
+#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
+       if (smp_found_config)
+               get_smp_config();
+#endif
 #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
        if (def_to_bigsmp)
                printk(KERN_WARNING "More than 8 CPUs detected and "
                        "CONFIG_X86_PC cannot handle it.\nUse "
                        "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
 #endif
-#endif
-#ifdef CONFIG_X86_LOCAL_APIC
-       if (smp_found_config)
-               get_smp_config();
-#endif
 
-       e820_register_memory();
-       e820_mark_nosave_regions();
+       e820_setup_gap();
+       e820_mark_nosave_regions(max_low_pfn);
 
 #ifdef CONFIG_VT
 #if defined(CONFIG_VGA_CONSOLE)
index 6dff1286ad8adec4b9fc6bb7808c3a233c476fd2..26d60cc0e3708004ad1f13d71da8f595680e0964 100644 (file)
@@ -56,6 +56,7 @@
 #include <asm/desc.h>
 #include <video/edid.h>
 #include <asm/e820.h>
+#include <asm/mpspec.h>
 #include <asm/dma.h>
 #include <asm/gart.h>
 #include <asm/mpspec.h>
@@ -271,28 +272,6 @@ void __attribute__((weak)) __init memory_setup(void)
        machine_specific_memory_setup();
 }
 
-static void __init parse_setup_data(void)
-{
-       struct setup_data *data;
-       unsigned long pa_data;
-
-       if (boot_params.hdr.version < 0x0209)
-               return;
-       pa_data = boot_params.hdr.setup_data;
-       while (pa_data) {
-               data = early_ioremap(pa_data, PAGE_SIZE);
-               switch (data->type) {
-               default:
-                       break;
-               }
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
-               free_early(pa_data, pa_data+sizeof(*data)+data->len);
-#endif
-               pa_data = data->next;
-               early_iounmap(data, PAGE_SIZE);
-       }
-}
-
 #ifdef CONFIG_PCI_MMCONFIG
 extern void __cpuinit fam10h_check_enable_mmcfg(void);
 extern void __init check_enable_amd_mmconf_dmi(void);
@@ -329,8 +308,10 @@ void __init setup_arch(char **cmdline_p)
 #endif
 #ifdef CONFIG_EFI
        if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
-                    "EL64", 4))
+                    "EL64", 4)) {
                efi_enabled = 1;
+               efi_reserve_early();
+       }
 #endif
 
        ARCH_SETUP
@@ -381,9 +362,13 @@ void __init setup_arch(char **cmdline_p)
         * we are rounding upwards:
         */
        end_pfn = e820_end_of_ram();
+
+       /* pre allocte 4k for mptable mpc */
+       early_reserve_e820_mpc_new();
        /* update e820 for memory not covered by WB MTRRs */
        mtrr_bp_init();
        if (mtrr_trim_uncached_memory(end_pfn)) {
+               remove_all_active_ranges();
                e820_register_active_regions(0, 0, -1UL);
                end_pfn = e820_end_of_ram();
        }
@@ -392,7 +377,7 @@ void __init setup_arch(char **cmdline_p)
 
        check_efer();
 
-       max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
+       max_pfn_mapped = init_memory_mapping(0, (end_pfn << PAGE_SHIFT));
        if (efi_enabled)
                efi_init();
 
@@ -453,13 +438,12 @@ void __init setup_arch(char **cmdline_p)
        acpi_reserve_bootmem();
 #endif
 
-       if (efi_enabled)
-               efi_reserve_bootmem();
-
+#ifdef CONFIG_X86_MPPARSE
        /*
        * Find and reserve possible boot-time SMP configuration:
        */
        find_smp_config();
+#endif
 #ifdef CONFIG_BLK_DEV_INITRD
        if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
                unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -502,11 +486,13 @@ void __init setup_arch(char **cmdline_p)
 
        init_cpu_to_node();
 
+#ifdef CONFIG_X86_MPPARSE
        /*
         * get boot-time SMP configuration:
         */
        if (smp_found_config)
                get_smp_config();
+#endif
        init_apic_mappings();
        ioapic_init_mappings();
 
@@ -516,7 +502,7 @@ void __init setup_arch(char **cmdline_p)
         * We trust e820 completely. No explicit ROM probing in memory.
         */
        e820_reserve_resources();
-       e820_mark_nosave_regions();
+       e820_mark_nosave_regions(end_pfn);
 
        /* request I/O space for devices used on all i[345]86 PCs */
        for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
index 3e1cecedde42747261c94053a53191ffa7291107..83e62137911be188f9b75a592ddd304d806dddc6 100644 (file)
@@ -555,23 +555,6 @@ cpumask_t cpu_coregroup_map(int cpu)
                return c->llc_shared_map;
 }
 
-#ifdef CONFIG_X86_32
-/*
- * We are called very early to get the low memory for the
- * SMP bootup trampoline page.
- */
-void __init smp_alloc_memory(void)
-{
-       trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
-       /*
-        * Has to be in very low memory so we can execute
-        * real-mode AP code.
-        */
-       if (__pa(trampoline_base) >= 0x9F000)
-               BUG();
-}
-#endif
-
 static void impress_friends(void)
 {
        int cpu;
index 70e4a374b4e804948998e6711c16ce3682c93e05..e9d91720a40f1600ce6a9b5aa2457a2edef64837 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/srat.h>
 #include <asm/topology.h>
 #include <asm/smp.h>
+#include <asm/e820.h>
 
 /*
  * proximity macros and definitions
@@ -244,12 +245,13 @@ static int __init acpi20_parse_srat(struct acpi_table_srat *sratp)
                printk("chunk %d nid %d start_pfn %08lx end_pfn %08lx\n",
                       j, chunk->nid, chunk->start_pfn, chunk->end_pfn);
                node_read_chunk(chunk->nid, chunk);
-               add_active_range(chunk->nid, chunk->start_pfn, chunk->end_pfn);
+               e820_register_active_regions(chunk->nid, chunk->start_pfn,
+                                            min(chunk->end_pfn, max_pfn));
        }
  
        for_each_online_node(nid) {
                unsigned long start = node_start_pfn[nid];
-               unsigned long end = node_end_pfn[nid];
+               unsigned long end = min(node_end_pfn[nid], max_pfn);
 
                memory_present(nid, start, end);
                node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
@@ -261,7 +263,7 @@ out_fail:
 
 struct acpi_static_rsdt {
        struct acpi_table_rsdt table;
-       u32 padding[7]; /* Allow for 7 more table entries */
+       u32 padding[32]; /* Allow for 32 more table entries */
 };
 
 int __init get_memcfg_from_srat(void)
@@ -297,7 +299,7 @@ int __init get_memcfg_from_srat(void)
        }
 
        rsdt = (struct acpi_table_rsdt *)
-           early_ioremap(rsdp->rsdt_physical_address, sizeof(struct acpi_table_rsdt));
+           early_ioremap(rsdp->rsdt_physical_address, sizeof(saved_rsdt));
 
        if (!rsdt) {
                printk(KERN_WARNING
@@ -310,6 +312,7 @@ int __init get_memcfg_from_srat(void)
 
        if (strncmp(header->signature, ACPI_SIG_RSDT, strlen(ACPI_SIG_RSDT))) {
                printk(KERN_WARNING "ACPI: RSDT signature incorrect\n");
+               early_iounmap(rsdt, sizeof(saved_rsdt));
                goto out_err;
        }
 
@@ -319,37 +322,51 @@ int __init get_memcfg_from_srat(void)
         * size of RSDT) divided by the size of each entry
         * (4-byte table pointers).
         */
-       tables = (header->length - sizeof(struct acpi_table_header)) / 4;
+       tables = (header->length - sizeof(struct acpi_table_header)) / sizeof(u32);
 
        if (!tables)
                goto out_err;
 
        memcpy(&saved_rsdt, rsdt, sizeof(saved_rsdt));
-
+       early_iounmap(rsdt, sizeof(saved_rsdt));
        if (saved_rsdt.table.header.length > sizeof(saved_rsdt)) {
                printk(KERN_WARNING "ACPI: Too big length in RSDT: %d\n",
                       saved_rsdt.table.header.length);
                goto out_err;
        }
 
-       printk("Begin SRAT table scan....\n");
+       printk("Begin SRAT table scan....%d\n", tables);
 
-       for (i = 0; i < tables; i++) {
+       for (i = 0; i < tables; i++){
+               int result;
+               u32 length;
                /* Map in header, then map in full table length. */
                header = (struct acpi_table_header *)
                        early_ioremap(saved_rsdt.table.table_offset_entry[i], sizeof(struct acpi_table_header));
                if (!header)
                        break;
+
+                printk(KERN_INFO "ACPI: %4.4s %08lX, %04X\n",
+                           header->signature,
+                  (unsigned long)saved_rsdt.table.table_offset_entry[i],
+                           header->length);
+
+               if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4)) {
+                       early_iounmap(header, sizeof(struct acpi_table_header));
+                       continue;
+               }
+
+               length = header->length;
+               early_iounmap(header, sizeof(struct acpi_table_header));
                header = (struct acpi_table_header *)
-                       early_ioremap(saved_rsdt.table.table_offset_entry[i], header->length);
+                       early_ioremap(saved_rsdt.table.table_offset_entry[i], length);
                if (!header)
                        break;
 
-               if (strncmp((char *) &header->signature, ACPI_SIG_SRAT, 4))
-                       continue;
-
                /* we've found the srat table. don't need to look at any more tables */
-               return acpi20_parse_srat((struct acpi_table_srat *)header);
+               result = acpi20_parse_srat((struct acpi_table_srat *)header);
+               early_iounmap(header, length);
+               return result;
        }
 out_err:
        remove_all_active_ranges();
index ae751094eba99754b47482aac7bab45d6dd40875..d67ce5f044ba342a327ba634a17ccea09698e040 100644 (file)
@@ -36,7 +36,9 @@ static struct rio_table_hdr *rio_table_hdr __initdata;
 static struct scal_detail   *scal_devs[MAX_NUMNODES] __initdata;
 static struct rio_detail    *rio_devs[MAX_NUMNODES*4] __initdata;
 
+#ifndef CONFIG_X86_NUMAQ
 static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
+#endif
 
 static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
 {
index abbf199adebbccda8bb7e51523da6d161e8aa48a..1106fac6024d412fae02df4780464ecd2f9abf4b 100644 (file)
@@ -2,7 +2,7 @@
 
 #include <asm/trampoline.h>
 
-/* ready for x86_64, no harm for x86, since it will overwrite after alloc */
+/* ready for x86_64 and x86 */
 unsigned char *trampoline_base = __va(TRAMPOLINE_BASE);
 
 /*
index 5c7e2fd52075be167680311b13d5e7c9d120bbe4..5e4772907c6e3d629793092a21cfab78dfee066b 100644 (file)
@@ -1012,6 +1012,7 @@ __init void lguest_init(void)
         * clobbered.  The Launcher places our initial pagetables somewhere at
         * the top of our physical memory, so we don't need extra space: set
         * init_pg_tables_end to the end of the kernel. */
+       init_pg_tables_start = __pa(pg0);
        init_pg_tables_end = __pa(pg0);
 
        /* Load the %fs segment register (the per-cpu segment register) with
@@ -1065,9 +1066,9 @@ __init void lguest_init(void)
        pm_power_off = lguest_power_off;
        machine_ops.restart = lguest_restart;
 
-       /* Now we're set up, call start_kernel() in init/main.c and we proceed
+       /* Now we're set up, call i386_start_kernel() in head32.c and we proceed
         * to boot as normal.  It never returns. */
-       start_kernel();
+       i386_start_kernel();
 }
 /*
  * This marks the end of stage II of our journey, The Guest.
index 0c28a071824c2bd0789eb9c7d929615e8ba944d1..56b4c39cb7fa87fbc89ba92e055ad49eabcc66ba 100644 (file)
@@ -153,6 +153,7 @@ late_initcall(print_ipi_mode);
 char * __init machine_specific_memory_setup(void)
 {
        char *who;
+       int new_nr;
 
 
        who = "BIOS-e820";
@@ -163,7 +164,11 @@ char * __init machine_specific_memory_setup(void)
         * Otherwise fake a memory map; one section from 0k->640k,
         * the next section from 1mb->appropriate_mem_k
         */
-       sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
+       new_nr = boot_params.e820_entries;
+       sanitize_e820_map(boot_params.e820_map,
+                       ARRAY_SIZE(boot_params.e820_map),
+                       &new_nr);
+       boot_params.e820_entries = new_nr;
        if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
            < 0) {
                unsigned long mem_size;
index 69dd4da218dcbb7e332f5ec15ca144000b0c15e5..3ef8b43b62fc9e3195904e57d45fd4f2031874d4 100644 (file)
@@ -3,4 +3,3 @@
 #
 
 obj-$(CONFIG_X86_ES7000)       := es7000plat.o
-obj-$(CONFIG_X86_GENERICARCH)  := es7000plat.o
index f5d6f7d8b86ec2ed6d218a2dd0aca3e36115e841..4354ce804889920f7560361cf79c2876cf7417b9 100644 (file)
@@ -52,6 +52,8 @@ static struct mip_reg         *host_reg;
 static int                     mip_port;
 static unsigned long           mip_addr, host_addr;
 
+int es7000_plat;
+
 /*
  * GSI override for ES7000 platforms.
  */
@@ -175,53 +177,6 @@ find_unisys_acpi_oem_table(unsigned long *oem_addr)
 }
 #endif
 
-/*
- * This file also gets compiled if CONFIG_X86_GENERICARCH is set. Generic
- * arch already has got following function definitions (asm-generic/es7000.c)
- * hence no need to define these for that case.
- */
-#ifndef CONFIG_X86_GENERICARCH
-void es7000_sw_apic(void);
-void __init enable_apic_mode(void)
-{
-       es7000_sw_apic();
-       return;
-}
-
-__init int mps_oem_check(struct mp_config_table *mpc, char *oem,
-               char *productid)
-{
-       if (mpc->mpc_oemptr) {
-               struct mp_config_oemtable *oem_table =
-                       (struct mp_config_oemtable *)mpc->mpc_oemptr;
-               if (!strncmp(oem, "UNISYS", 6))
-                       return parse_unisys_oem((char *)oem_table);
-       }
-       return 0;
-}
-#ifdef CONFIG_ACPI
-/* Hook from generic ACPI tables.c */
-int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-       unsigned long oem_addr;
-       if (!find_unisys_acpi_oem_table(&oem_addr)) {
-               if (es7000_check_dsdt())
-                       return parse_unisys_oem((char *)oem_addr);
-               else {
-                       setup_unisys();
-                       return 1;
-               }
-       }
-       return 0;
-}
-#else
-int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-       return 0;
-}
-#endif
-#endif /* COFIG_X86_GENERICARCH */
-
 static void
 es7000_spin(int n)
 {
index 19d6d407737b51dbb76ac08be1f83afab194f6ba..0dbd7803a1d5e4199f6d1c84676296247428a2d3 100644 (file)
@@ -2,7 +2,11 @@
 # Makefile for the generic architecture
 #
 
-EXTRA_CFLAGS   := -Iarch/x86/kernel
+EXTRA_CFLAGS                   := -Iarch/x86/kernel
 
-obj-y          := probe.o summit.o bigsmp.o es7000.o default.o 
-obj-y          += ../../x86/mach-es7000/
+obj-y                          := probe.o default.o
+obj-$(CONFIG_X86_NUMAQ)                += numaq.o
+obj-$(CONFIG_X86_SUMMIT)       += summit.o
+obj-$(CONFIG_X86_BIGSMP)       += bigsmp.o
+obj-$(CONFIG_X86_ES7000)       += es7000.o
+obj-$(CONFIG_X86_ES7000)       += ../../x86/mach-es7000/
index 95fc463056d0dc92345aaee6cbd0b5ef148add51..59d7717145590b4f70f7e75daad74bff953d8b3a 100644 (file)
@@ -23,10 +23,8 @@ static int dmi_bigsmp; /* can be set by dmi scanners */
 
 static int hp_ht_bigsmp(const struct dmi_system_id *d)
 {
-#ifdef CONFIG_X86_GENERICARCH
        printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
        dmi_bigsmp = 1;
-#endif
        return 0;
 }
 
@@ -48,7 +46,7 @@ static const struct dmi_system_id bigsmp_dmi_table[] = {
 static int probe_bigsmp(void)
 {
        if (def_to_bigsmp)
-       dmi_bigsmp = 1;
+               dmi_bigsmp = 1;
        else
                dmi_check_system(bigsmp_dmi_table);
        return dmi_bigsmp;
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
new file mode 100644 (file)
index 0000000..8091e68
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * APIC driver for the IBM NUMAQ chipset.
+ */
+#define APIC_DEFINITION 1
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/smp.h>
+#include <asm/mpspec.h>
+#include <asm/genapic.h>
+#include <asm/fixmap.h>
+#include <asm/apicdef.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <asm/mach-numaq/mach_apic.h>
+#include <asm/mach-numaq/mach_apicdef.h>
+#include <asm/mach-numaq/mach_ipi.h>
+#include <asm/mach-numaq/mach_mpparse.h>
+#include <asm/mach-numaq/mach_wakecpu.h>
+#include <asm/numaq.h>
+
+static int mps_oem_check(struct mp_config_table *mpc, char *oem,
+               char *productid)
+{
+       numaq_mps_oem_check(mpc, oem, productid);
+       return found_numaq;
+}
+
+static int probe_numaq(void)
+{
+       /* already know from get_memcfg_numaq() */
+       return found_numaq;
+}
+
+/* Hook from generic ACPI tables.c */
+static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+       return 0;
+}
+
+struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
index c5ae751b994a317c32f086d59c2cf33f120b402e..ba18dec48555904333ddd0120cd894f3bdf1539b 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/apicdef.h>
 #include <asm/genapic.h>
 
+extern struct genapic apic_numaq;
 extern struct genapic apic_summit;
 extern struct genapic apic_bigsmp;
 extern struct genapic apic_es7000;
@@ -24,9 +25,18 @@ extern struct genapic apic_default;
 struct genapic *genapic = &apic_default;
 
 static struct genapic *apic_probe[] __initdata = {
+#ifdef CONFIG_X86_NUMAQ
+       &apic_numaq,
+#endif
+#ifdef CONFIG_X86_SUMMIT
        &apic_summit,
+#endif
+#ifdef CONFIG_X86_BIGSMP
        &apic_bigsmp,
+#endif
+#ifdef CONFIG_X86_ES7000
        &apic_es7000,
+#endif
        &apic_default,  /* must be last */
        NULL,
 };
@@ -54,6 +64,7 @@ early_param("apic", parse_apic);
 
 void __init generic_bigsmp_probe(void)
 {
+#if CONFIG_X86_BIGSMP
        /*
         * This routine is used to switch to bigsmp mode when
         * - There is no apic= option specified by the user
@@ -67,6 +78,7 @@ void __init generic_bigsmp_probe(void)
                        printk(KERN_INFO "Overriding APIC driver with %s\n",
                               genapic->name);
                }
+#endif
 }
 
 void __init generic_apic_probe(void)
@@ -88,7 +100,8 @@ void __init generic_apic_probe(void)
 
 /* These functions can switch the APIC even after the initial ->probe() */
 
-int __init mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid)
+int __init mps_oem_check(struct mp_config_table *mpc, char *oem,
+                                char *productid)
 {
        int i;
        for (i = 0; apic_probe[i]; ++i) {
index 57484e91ab904c35b327464d80bd16766556f3eb..a2fb78c0d154ef19f1fbb58874840ae5bab8b5ca 100644 (file)
@@ -8,11 +8,6 @@
 #include "cobalt.h"
 #include "mach_apic.h"
 
-/* Have we found an MP table */
-int smp_found_config;
-
-int pic_mode;
-
 extern unsigned int __cpuinitdata maxcpus;
 
 /*
@@ -76,7 +71,9 @@ void __init find_smp_config(void)
        if (ncpus > maxcpus)
                ncpus = maxcpus;
 
+#ifdef CONFIG_X86_LOCAL_APIC
        smp_found_config = 1;
+#endif
        while (ncpus--)
                MP_processor_info(mp++);
 
index 5ae5466b9eb9c3b9850fca6617e36dab2c653da7..f4aca9fa9546f23ff49874a322d1a684078f485b 100644 (file)
@@ -62,6 +62,7 @@ void __init time_init_hook(void)
 char *__init machine_specific_memory_setup(void)
 {
        char *who;
+       int new_nr;
 
        who = "NOT VOYAGER";
 
@@ -111,7 +112,11 @@ char *__init machine_specific_memory_setup(void)
         * Otherwise fake a memory map; one section from 0k->640k,
         * the next section from 1mb->appropriate_mem_k
         */
-       sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
+       new_nr = boot_params.e820_entries;
+       sanitize_e820_map(boot_params.e820_map,
+                       ARRAY_SIZE(boot_params.e820_map),
+                       &new_nr);
+       boot_params.e820_entries = new_nr;
        if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries)
            < 0) {
                unsigned long mem_size;
index 8acbf0cdf1a5fb8eb4d75e1d57905104b23cce14..8dedd01e909fe4efeb2ff1501518856b23b6e967 100644 (file)
@@ -59,11 +59,6 @@ __u32 voyager_quad_processors = 0;
  * activity count.  Finally exported by i386_ksyms.c */
 static int voyager_extended_cpus = 1;
 
-/* Have we found an SMP box - used by time.c to do the profiling
-   interrupt for timeslicing; do not set to 1 until the per CPU timer
-   interrupt is active */
-int smp_found_config = 0;
-
 /* Used for the invalidate map that's also checked in the spinlock */
 static volatile unsigned long smp_invalidate_needed;
 
@@ -1137,15 +1132,6 @@ void flush_tlb_all(void)
        on_each_cpu(do_flush_tlb_all, 0, 1, 1);
 }
 
-/* used to set up the trampoline for other CPUs when the memory manager
- * is sorted out */
-void __init smp_alloc_memory(void)
-{
-       trampoline_base = alloc_bootmem_low_pages(PAGE_SIZE);
-       if (__pa(trampoline_base) >= 0x93000)
-               BUG();
-}
-
 /* send a reschedule CPI to one CPU by physical CPU number*/
 static void voyager_smp_send_reschedule(int cpu)
 {
index 914ccf983687dd276b8cc5827d4430cf093fd822..accc7c6c57fc1dace92139a5981460f7a213822a 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/setup.h>
 #include <asm/mmzone.h>
 #include <asm/bios_ebda.h>
+#include <asm/proto.h>
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
 EXPORT_SYMBOL(node_data);
@@ -59,14 +60,14 @@ unsigned long node_end_pfn[MAX_NUMNODES] __read_mostly;
 /*
  * 4) physnode_map     - the mapping between a pfn and owning node
  * physnode_map keeps track of the physical memory layout of a generic
- * numa node on a 256Mb break (each element of the array will
- * represent 256Mb of memory and will be marked by the node id.  so,
+ * numa node on a 64Mb break (each element of the array will
+ * represent 64Mb of memory and will be marked by the node id.  so,
  * if the first gig is on node 0, and the second gig is on node 1
  * physnode_map will contain:
  *
- *     physnode_map[0-3] = 0;
- *     physnode_map[4-7] = 1;
- *     physnode_map[8- ] = -1;
+ *     physnode_map[0-15] = 0;
+ *     physnode_map[16-31] = 1;
+ *     physnode_map[32- ] = -1;
  */
 s8 physnode_map[MAX_ELEMENTS] __read_mostly = { [0 ... (MAX_ELEMENTS - 1)] = -1};
 EXPORT_SYMBOL(physnode_map);
@@ -81,9 +82,9 @@ void memory_present(int nid, unsigned long start, unsigned long end)
        printk(KERN_DEBUG "  ");
        for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
                physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
-               printk("%ld ", pfn);
+               printk(KERN_CONT "%ld ", pfn);
        }
-       printk("\n");
+       printk(KERN_CONT "\n");
 }
 
 unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
@@ -119,11 +120,11 @@ int __init get_memcfg_numa_flat(void)
 {
        printk("NUMA - single node, flat memory mode\n");
 
-       /* Run the memory configuration and find the top of memory. */
-       propagate_e820_map();
        node_start_pfn[0] = 0;
        node_end_pfn[0] = max_pfn;
+       e820_register_active_regions(0, 0, max_pfn);
        memory_present(0, 0, max_pfn);
+       node_remap_size[0] = node_memmap_size_bytes(0, 0, max_pfn);
 
         /* Indicate there is one node available. */
        nodes_clear(node_online_map);
@@ -159,9 +160,17 @@ static void __init allocate_pgdat(int nid)
        if (nid && node_has_online_mem(nid))
                NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
        else {
-               NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(min_low_pfn));
-               min_low_pfn += PFN_UP(sizeof(pg_data_t));
+               unsigned long pgdat_phys;
+               pgdat_phys = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+                                (nid ? max_low_pfn:max_pfn_mapped)<<PAGE_SHIFT,
+                                sizeof(pg_data_t),
+                                PAGE_SIZE);
+               NODE_DATA(nid) = (pg_data_t *)(pfn_to_kaddr(pgdat_phys>>PAGE_SHIFT));
+               reserve_early(pgdat_phys, pgdat_phys + sizeof(pg_data_t),
+                             "NODE_DATA");
        }
+       printk(KERN_DEBUG "allocate_pgdat: node %d NODE_DATA %08lx\n",
+               nid, (unsigned long)NODE_DATA(nid));
 }
 
 #ifdef CONFIG_DISCONTIGMEM
@@ -202,8 +211,12 @@ void __init remap_numa_kva(void)
        int node;
 
        for_each_online_node(node) {
+               printk(KERN_DEBUG "remap_numa_kva: node %d\n", node);
                for (pfn=0; pfn < node_remap_size[node]; pfn += PTRS_PER_PTE) {
                        vaddr = node_remap_start_vaddr[node]+(pfn<<PAGE_SHIFT);
+                       printk(KERN_DEBUG "remap_numa_kva: %08lx to pfn %08lx\n",
+                               (unsigned long)vaddr,
+                               node_remap_start_pfn[node] + pfn);
                        set_pmd_pfn((ulong) vaddr, 
                                node_remap_start_pfn[node] + pfn, 
                                PAGE_KERNEL_LARGE);
@@ -215,17 +228,21 @@ static unsigned long calculate_numa_remap_pages(void)
 {
        int nid;
        unsigned long size, reserve_pages = 0;
-       unsigned long pfn;
 
        for_each_online_node(nid) {
-               unsigned old_end_pfn = node_end_pfn[nid];
+               u64 node_end_target;
+               u64 node_end_final;
 
                /*
                 * The acpi/srat node info can show hot-add memroy zones
                 * where memory could be added but not currently present.
                 */
+               printk("node %d pfn: [%lx - %lx]\n",
+                       nid, node_start_pfn[nid], node_end_pfn[nid]);
                if (node_start_pfn[nid] > max_pfn)
                        continue;
+               if (!node_end_pfn[nid])
+                       continue;
                if (node_end_pfn[nid] > max_pfn)
                        node_end_pfn[nid] = max_pfn;
 
@@ -237,39 +254,42 @@ static unsigned long calculate_numa_remap_pages(void)
                /* now the roundup is correct, convert to PAGE_SIZE pages */
                size = size * PTRS_PER_PTE;
 
-               /*
-                * Validate the region we are allocating only contains valid
-                * pages.
-                */
-               for (pfn = node_end_pfn[nid] - size;
-                    pfn < node_end_pfn[nid]; pfn++)
-                       if (!page_is_ram(pfn))
-                               break;
-
-               if (pfn != node_end_pfn[nid])
-                       size = 0;
+               node_end_target = round_down(node_end_pfn[nid] - size,
+                                                PTRS_PER_PTE);
+               node_end_target <<= PAGE_SHIFT;
+               do {
+                       node_end_final = find_e820_area(node_end_target,
+                                       ((u64)node_end_pfn[nid])<<PAGE_SHIFT,
+                                               ((u64)size)<<PAGE_SHIFT,
+                                               LARGE_PAGE_BYTES);
+                       node_end_target -= LARGE_PAGE_BYTES;
+               } while (node_end_final == -1ULL &&
+                        (node_end_target>>PAGE_SHIFT) > (node_start_pfn[nid]));
+
+               if (node_end_final == -1ULL)
+                       panic("Can not get kva ram\n");
 
                printk("Reserving %ld pages of KVA for lmem_map of node %d\n",
                                size, nid);
                node_remap_size[nid] = size;
                node_remap_offset[nid] = reserve_pages;
                reserve_pages += size;
-               printk("Shrinking node %d from %ld pages to %ld pages\n",
-                       nid, node_end_pfn[nid], node_end_pfn[nid] - size);
-
-               if (node_end_pfn[nid] & (PTRS_PER_PTE-1)) {
-                       /*
-                        * Align node_end_pfn[] and node_remap_start_pfn[] to
-                        * pmd boundary. remap_numa_kva will barf otherwise.
-                        */
-                       printk("Shrinking node %d further by %ld pages for proper alignment\n",
-                               nid, node_end_pfn[nid] & (PTRS_PER_PTE-1));
-                       size +=  node_end_pfn[nid] & (PTRS_PER_PTE-1);
-               }
+               printk("Shrinking node %d from %ld pages to %lld pages\n",
+                       nid, node_end_pfn[nid], node_end_final>>PAGE_SHIFT);
 
-               node_end_pfn[nid] -= size;
+               /*
+                *  prevent kva address below max_low_pfn want it on system
+                *  with less memory later.
+                *  layout will be: KVA address , KVA RAM
+                */
+               if ((node_end_final>>PAGE_SHIFT) < max_low_pfn)
+                       reserve_early(node_end_final,
+                                     node_end_final+(((u64)size)<<PAGE_SHIFT),
+                                     "KVA RAM");
+
+               node_end_pfn[nid] = node_end_final>>PAGE_SHIFT;
                node_remap_start_pfn[nid] = node_end_pfn[nid];
-               shrink_active_range(nid, old_end_pfn, node_end_pfn[nid]);
+               shrink_active_range(nid, node_end_pfn[nid]);
        }
        printk("Reserving total of %ld pages for numa KVA remap\n",
                        reserve_pages);
@@ -287,8 +307,7 @@ static void init_remap_allocator(int nid)
 
        printk ("node %d will remap to vaddr %08lx - %08lx\n", nid,
                (ulong) node_remap_start_vaddr[nid],
-               (ulong) pfn_to_kaddr(highstart_pfn
-                  + node_remap_offset[nid] + node_remap_size[nid]));
+               (ulong) node_remap_end_vaddr[nid]);
 }
 #else
 void *alloc_remap(int nid, unsigned long size)
@@ -315,7 +334,7 @@ unsigned long __init setup_memory(void)
 {
        int nid;
        unsigned long system_start_pfn, system_max_low_pfn;
-       unsigned long wasted_pages;
+       long kva_target_pfn;
 
        /*
         * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -324,34 +343,38 @@ unsigned long __init setup_memory(void)
         * this space and use it to adjust the boundary between ZONE_NORMAL
         * and ZONE_HIGHMEM.
         */
+
+       /* call find_max_low_pfn at first, it could update max_pfn */
+       system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+
+       remove_all_active_ranges();
        get_memcfg_numa();
 
-       kva_pages = calculate_numa_remap_pages();
+       kva_pages = round_up(calculate_numa_remap_pages(), PTRS_PER_PTE);
 
        /* partially used pages are not usable - thus round upwards */
        system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
 
-       kva_start_pfn = find_max_low_pfn() - kva_pages;
+       kva_target_pfn = round_down(max_low_pfn - kva_pages, PTRS_PER_PTE);
+       do {
+               kva_start_pfn = find_e820_area(kva_target_pfn<<PAGE_SHIFT,
+                                       max_low_pfn<<PAGE_SHIFT,
+                                       kva_pages<<PAGE_SHIFT,
+                                       PTRS_PER_PTE<<PAGE_SHIFT) >> PAGE_SHIFT;
+               kva_target_pfn -= PTRS_PER_PTE;
+       } while (kva_start_pfn == -1UL && kva_target_pfn > min_low_pfn);
 
-#ifdef CONFIG_BLK_DEV_INITRD
-       /* Numa kva area is below the initrd */
-       if (initrd_start)
-               kva_start_pfn = PFN_DOWN(initrd_start - PAGE_OFFSET)
-                       - kva_pages;
-#endif
+       if (kva_start_pfn == -1UL)
+               panic("Can not get kva space\n");
 
-       /*
-        * We waste pages past at the end of the KVA for no good reason other
-        * than how it is located. This is bad.
-        */
-       wasted_pages = kva_start_pfn & (PTRS_PER_PTE-1);
-       kva_start_pfn -= wasted_pages;
-       kva_pages += wasted_pages;
-
-       system_max_low_pfn = max_low_pfn = find_max_low_pfn();
        printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n",
                kva_start_pfn, max_low_pfn);
        printk("max_pfn = %ld\n", max_pfn);
+
+       /* avoid clash with initrd */
+       reserve_early(kva_start_pfn<<PAGE_SHIFT,
+                     (kva_start_pfn + kva_pages)<<PAGE_SHIFT,
+                    "KVA PG");
 #ifdef CONFIG_HIGHMEM
        highstart_pfn = highend_pfn = max_pfn;
        if (max_pfn > system_max_low_pfn)
@@ -387,16 +410,8 @@ unsigned long __init setup_memory(void)
        return max_low_pfn;
 }
 
-void __init numa_kva_reserve(void)
-{
-       if (kva_pages)
-               reserve_bootmem(PFN_PHYS(kva_start_pfn), PFN_PHYS(kva_pages),
-                               BOOTMEM_DEFAULT);
-}
-
 void __init zone_sizes_init(void)
 {
-       int nid;
        unsigned long max_zone_pfns[MAX_NR_ZONES];
        memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
        max_zone_pfns[ZONE_DMA] =
@@ -406,15 +421,6 @@ void __init zone_sizes_init(void)
        max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
 #endif
 
-       /* If SRAT has not registered memory, register it now */
-       if (find_max_pfn_with_active_regions() == 0) {
-               for_each_online_node(nid) {
-                       if (node_has_online_mem(nid))
-                               add_active_range(nid, node_start_pfn[nid],
-                                                       node_end_pfn[nid]);
-               }
-       }
-
        free_area_init_nodes(max_zone_pfns);
        return;
 }
index ec30d10154b657a63ab07b5e25b4298c10f4aabe..0e7bb5e81670e577de9ab19a9e5a350fb4d55d6b 100644 (file)
@@ -289,7 +289,8 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
 
 void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
 {
-       if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
+       if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn)) &&
+           !page_is_reserved_early(pfn)) {
                ClearPageReserved(page);
                init_page_count(page);
                __free_page(page);
index c5066d519e5de4cf73dec673f6d51c2ca131075b..afb07ffb931d5d04cabfae2d2a5f73e5ffaabd3c 100644 (file)
@@ -233,7 +233,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
        else
                bootmap_start = round_up(start, PAGE_SIZE);
        /*
-        * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like
+        * SMP_CACHE_BYTES could be enough, but init_bootmem_node like
         * to use that to align to PAGE_SIZE
         */
        bootmap = early_node_mem(nodeid, bootmap_start, end,
index 89ec35d00efde0ebde8488f84bc491cffe8fb092..962d96c0495ad5169fe099c571d18c67a1a5acf8 100644 (file)
@@ -13,10 +13,11 @@ pci-y                               := fixup.o
 pci-$(CONFIG_ACPI)             += acpi.o
 pci-y                          += legacy.o irq.o
 
-# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are
+# Careful: VISWS overrule the pci-y above. The colons are
 # therefor correct. This needs a proper fix by distangling the code.
 pci-$(CONFIG_X86_VISWS)                := visws.o fixup.o
-pci-$(CONFIG_X86_NUMAQ)                := numa.o irq.o
+
+pci-$(CONFIG_X86_NUMAQ)                += numa.o
 
 # Necessary for NUMAQ as well
 pci-$(CONFIG_NUMA)             += mp_bus_to_node.o
index 5c2799c20e47b7a48b05e1fd15cbdbc79229d86b..bfefdf0f40d4da01e57ab4db7ddb1af4d9f9e077 100644 (file)
@@ -384,7 +384,7 @@ static int __init early_fill_mp_bus_info(void)
        /* need to take out [0, TOM) for RAM*/
        address = MSR_K8_TOP_MEM1;
        rdmsrl(address, val);
-       end = (val & 0xffffff8000000ULL);
+       end = (val & 0xffffff800000ULL);
        printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
        if (end < (1ULL<<32))
                update_range(range, 0, end - 1);
@@ -478,7 +478,7 @@ static int __init early_fill_mp_bus_info(void)
                /* TOP_MEM2 */
                address = MSR_K8_TOP_MEM2;
                rdmsrl(address, val);
-               end = (val & 0xffffff8000000ULL);
+               end = (val & 0xffffff800000ULL);
                printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
                update_range(range, 1ULL<<32, end - 1);
        }
index d9afbae5092b64e6487c4dec7c1e57c35bbc935c..99f1ecd485b5d2a228222b4f05915064b3bcd43c 100644 (file)
@@ -6,45 +6,21 @@
 #include <linux/init.h>
 #include <linux/nodemask.h>
 #include <mach_apic.h>
+#include <asm/mpspec.h>
 #include "pci.h"
 
 #define XQUAD_PORTIO_BASE 0xfe400000
 #define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
 
-int mp_bus_id_to_node[MAX_MP_BUSSES];
 #define BUS2QUAD(global) (mp_bus_id_to_node[global])
 
-int mp_bus_id_to_local[MAX_MP_BUSSES];
 #define BUS2LOCAL(global) (mp_bus_id_to_local[global])
 
-void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
-       struct mpc_config_translation *translation)
-{
-       int quad = translation->trans_quad;
-       int local = translation->trans_local;
-
-       mp_bus_id_to_node[m->mpc_busid] = quad;
-       mp_bus_id_to_local[m->mpc_busid] = local;
-       printk(KERN_INFO "Bus #%d is %s (node %d)\n",
-              m->mpc_busid, name, quad);
-}
-
-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
 #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
-void mpc_oem_pci_bus(struct mpc_config_bus *m,
-       struct mpc_config_translation *translation)
-{
-       int quad = translation->trans_quad;
-       int local = translation->trans_local;
-
-       quad_local_to_mp_bus_id[quad][local] = m->mpc_busid;
-}
 
 /* Where the IO area was mapped on multiquad, always 0 otherwise */
 void *xquad_portio;
-#ifdef CONFIG_X86_NUMAQ
 EXPORT_SYMBOL(xquad_portio);
-#endif
 
 #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
 
@@ -179,6 +155,9 @@ static int __init pci_numa_init(void)
 {
        int quad;
 
+       if (!found_numaq)
+               return 0;
+
        raw_pci_ops = &pci_direct_conf1_mq;
 
        if (pcibios_scanned++)
index f09c1c69c37a1498da07524c477e98bfc397ebe6..275163f81464d452dc75f3f9a5af5d3a7b51a38b 100644 (file)
@@ -1196,6 +1196,7 @@ asmlinkage void __init xen_start_kernel(void)
 
        pgd = (pgd_t *)xen_start_info->pt_base;
 
+       init_pg_tables_start = __pa(pgd);
        init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
 
        init_mm.pgd = pgd; /* use the Xen pagetables to start */
@@ -1236,5 +1237,5 @@ asmlinkage void __init xen_start_kernel(void)
                add_preferred_console("hvc", 0, NULL);
 
        /* Start the world */
-       start_kernel();
+       i386_start_kernel();
 }
index c52fca833268c7f1eaf53c091fa2f41e49d5d4a0..860f15f36ce926290f94dc0f6b13762408734798 100644 (file)
@@ -4,7 +4,6 @@
 
 menuconfig ACPI
        bool "ACPI (Advanced Configuration and Power Interface) Support"
-       depends on !X86_NUMAQ
        depends on !X86_VISWS
        depends on !IA64_HP_SIM
        depends on IA64 || X86
index 89022a74faeeda9d9ed9b11f431c2e4c4eecb4d7..e556f30c7c16016094354d7d01fc5a695af7c6f9 100644 (file)
@@ -570,6 +570,11 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
               (triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge",
               (polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq);
 
+#ifdef CONFIG_X86
+       mp_config_acpi_gsi(dev->bus->number, dev->devfn, dev->pin, irq,
+                                triggering, polarity);
+#endif
+
        return 0;
 }
 
index c5e3ed7e903b3a14f405554f8c6163f2d62cb998..455575be3560d2336c75d2cc8aaf42c64a728f9c 100644 (file)
@@ -8,6 +8,11 @@
 #include <linux/slab.h>
 #include <asm/dmi.h>
 
+/*
+ * DMI stands for "Desktop Management Interface".  It is part
+ * of and an antecedent to, SMBIOS, which stands for System
+ * Management BIOS.  See further: http://www.dmtf.org/standards
+ */
 static char dmi_empty_string[] = "        ";
 
 static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s)
index 14411c9de46f8f1f22434759c91b4ad892d17227..73ce5b32443ff0e8fda659ac70e3de7d0062d01a 100644 (file)
@@ -28,6 +28,7 @@
 #include <asm/numa.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
+#include <asm/mpspec.h>
 
 #define COMPILER_DEPENDENT_INT64   long long
 #define COMPILER_DEPENDENT_UINT64  unsigned long long
index b4a46b7be7941fca2a7cd8636d9701bbf9ed7aa4..0033e50c13b28cec6bfcd479ced48e01798e9e76 100644 (file)
@@ -14,4 +14,6 @@ static inline unsigned int get_bios_ebda(void)
        return address; /* 0 means none */
 }
 
+void reserve_ebda_region(void);
+
 #endif /* _MACH_BIOS_EBDA_H */
index f62f4733606bfa9388ad98dce4a0c72fec7e9b2f..0a073904168b48e18bf7613097dc0842cd6863fe 100644 (file)
@@ -106,4 +106,7 @@ struct boot_params {
        __u8  _pad9[276];                               /* 0xeec */
 } __attribute__((packed));
 
+void reserve_setup_data(void);
+void parse_setup_data(void);
+
 #endif /* _ASM_BOOTPARAM_H */
index 7004251fc66bd7925a787bd09ccb675dc0714de8..8aa32323a1822642ee709fca7e50fbf760d74b8c 100644 (file)
@@ -2,6 +2,41 @@
 #define __ASM_E820_H
 #define E820MAP        0x2d0           /* our map */
 #define E820MAX        128             /* number of entries in E820MAP */
+
+/*
+ * Legacy E820 BIOS limits us to 128 (E820MAX) nodes due to the
+ * constrained space in the zeropage.  If we have more nodes than
+ * that, and if we've booted off EFI firmware, then the EFI tables
+ * passed us from the EFI firmware can list more nodes.  Size our
+ * internal memory map tables to have room for these additional
+ * nodes, based on up to three entries per node for which the
+ * kernel was built: MAX_NUMNODES == (1 << CONFIG_NODES_SHIFT),
+ * plus E820MAX, allowing space for the possible duplicate E820
+ * entries that might need room in the same arrays, prior to the
+ * call to sanitize_e820_map() to remove duplicates.  The allowance
+ * of three memory map entries per node is "enough" entries for
+ * the initial hardware platform motivating this mechanism to make
+ * use of additional EFI map entries.  Future platforms may want
+ * to allow more than three entries per node or otherwise refine
+ * this size.
+ */
+
+/*
+ * Odd: 'make headers_check' complains about numa.h if I try
+ * to collapse the next two #ifdef lines to a single line:
+ *     #if defined(__KERNEL__) && defined(CONFIG_EFI)
+ */
+#ifdef __KERNEL__
+#ifdef CONFIG_EFI
+#include <linux/numa.h>
+#define E820_X_MAX (E820MAX + 3 * MAX_NUMNODES)
+#else  /* ! CONFIG_EFI */
+#define E820_X_MAX E820MAX
+#endif
+#else  /* ! __KERNEL__ */
+#define E820_X_MAX E820MAX
+#endif
+
 #define E820NR 0x1e8           /* # entries in E820MAP */
 
 #define E820_RAM       1
@@ -18,8 +53,51 @@ struct e820entry {
 
 struct e820map {
        __u32 nr_map;
-       struct e820entry map[E820MAX];
+       struct e820entry map[E820_X_MAX];
 };
+
+extern struct e820map e820;
+
+extern int e820_any_mapped(u64 start, u64 end, unsigned type);
+extern int e820_all_mapped(u64 start, u64 end, unsigned type);
+extern void add_memory_region(u64 start, u64 size, int type);
+extern void e820_print_map(char *who);
+extern int
+sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map);
+extern int copy_e820_map(struct e820entry *biosmap, int nr_map);
+extern u64 update_memory_range(u64 start, u64 size, unsigned old_type,
+                              unsigned new_type);
+extern void update_e820(void);
+extern void e820_setup_gap(void);
+
+#if defined(CONFIG_X86_64) || \
+       (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
+extern void e820_mark_nosave_regions(unsigned long limit_pfn);
+#else
+static inline void e820_mark_nosave_regions(unsigned long limit_pfn)
+{
+}
+#endif
+
+extern unsigned long end_user_pfn;
+
+extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
+extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
+extern void reserve_early(u64 start, u64 end, char *name);
+extern void free_early(u64 start, u64 end);
+extern void early_res_to_bootmem(u64 start, u64 end);
+extern int page_is_reserved_early(unsigned long pagenr);
+extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
+
+extern unsigned long e820_end_of_ram(void);
+extern int e820_find_active_region(const struct e820entry *ei,
+                                 unsigned long start_pfn,
+                                 unsigned long last_pfn,
+                                 unsigned long *ei_startpfn,
+                                 unsigned long *ei_endpfn);
+extern void e820_register_active_regions(int nid, unsigned long start_pfn,
+                                        unsigned long end_pfn);
+extern u64 e820_hole_size(u64 start, u64 end);
 #endif /* __ASSEMBLY__ */
 
 #define ISA_START_ADDRESS      0xa0000
index a9f7c6ec32bf7ecad8041a7db1af54c4f2a47393..212b74c10efca34e91c3bb6cf11f301fb83ab44a 100644 (file)
 
 #ifndef __ASSEMBLY__
 
-extern struct e820map e820;
-extern void update_e820(void);
+extern void setup_memory_map(void);
+extern void finish_e820_parsing(void);
 
-extern int e820_all_mapped(unsigned long start, unsigned long end,
-                          unsigned type);
-extern int e820_any_mapped(u64 start, u64 end, unsigned type);
-extern void propagate_e820_map(void);
-extern void register_bootmem_low_pages(unsigned long max_low_pfn);
-extern void add_memory_region(unsigned long long start,
-                             unsigned long long size, int type);
-extern void update_memory_range(u64 start, u64 size, unsigned old_type,
-                               unsigned new_type);
-extern void e820_register_memory(void);
 extern void limit_regions(unsigned long long size);
-extern void print_memory_map(char *who);
 extern void init_iomem_resources(struct resource *code_resource,
                                 struct resource *data_resource,
                                 struct resource *bss_resource);
 
-#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
-extern void e820_mark_nosave_regions(void);
-#else
-static inline void e820_mark_nosave_regions(void)
-{
-}
-#endif
-
-
 #endif/*!__ASSEMBLY__*/
 #endif/*__E820_HEADER*/
index 71c4d685d30d894ad1d1700bdbde2ea468f3e8d3..368585daaa422ce2f1207eccb6bf829e197ada43 100644 (file)
 #include <linux/ioport.h>
 
 #ifndef __ASSEMBLY__
-extern unsigned long find_e820_area(unsigned long start, unsigned long end,
-                                   unsigned long size, unsigned long align);
-extern unsigned long find_e820_area_size(unsigned long start,
-                                        unsigned long *sizep,
-                                        unsigned long align);
-extern void add_memory_region(unsigned long start, unsigned long size,
-                             int type);
-extern void update_memory_range(u64 start, u64 size, unsigned old_type,
-                               unsigned new_type);
 extern void setup_memory_region(void);
 extern void contig_e820_setup(void);
-extern unsigned long e820_end_of_ram(void);
 extern void e820_reserve_resources(void);
-extern void e820_mark_nosave_regions(void);
-extern int e820_any_mapped(unsigned long start, unsigned long end,
-                          unsigned type);
-extern int e820_all_mapped(unsigned long start, unsigned long end,
-                          unsigned type);
 extern int e820_any_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_any_valid(unsigned long start, unsigned long end);
 extern int e820_all_non_reserved(unsigned long start, unsigned long end);
 extern int is_memory_all_valid(unsigned long start, unsigned long end);
-extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
-
-extern void e820_setup_gap(void);
-extern void e820_register_active_regions(int nid, unsigned long start_pfn,
-                                        unsigned long end_pfn);
 
 extern void finish_e820_parsing(void);
 
-extern struct e820map e820;
-extern void update_e820(void);
-
-extern void reserve_early(unsigned long start, unsigned long end, char *name);
-extern void free_early(unsigned long start, unsigned long end);
-extern void early_res_to_bootmem(unsigned long start, unsigned long end);
-
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/
index d53004b855cce82b6303cbe8adde1bf47d450977..7ed2bd7a7f51c3621b26acde91bdaa65f36d9f96 100644 (file)
@@ -90,7 +90,7 @@ extern void *efi_ioremap(unsigned long addr, unsigned long size);
 
 #endif /* CONFIG_X86_32 */
 
-extern void efi_reserve_bootmem(void);
+extern void efi_reserve_early(void);
 extern void efi_call_phys_prelog(void);
 extern void efi_call_phys_epilog(void);
 
index d593e14f03411df3797562d4e01ed318b00efdc2..86d8c3bdcca42447f1375f4bf3b7967eac010558 100644 (file)
@@ -112,21 +112,32 @@ extern int nr_ioapic_registers[MAX_IO_APICS];
 
 #define MP_MAX_IOAPIC_PIN 127
 
-struct mp_ioapic_routing {
-       int apic_id;
-       int gsi_base;
-       int gsi_end;
-       DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+struct mp_config_ioapic {
+       unsigned long mp_apicaddr;
+       unsigned int mp_apicid;
+       unsigned char mp_type;
+       unsigned char mp_apicver;
+       unsigned char mp_flags;
+};
+
+struct mp_config_intsrc {
+       unsigned int mp_dstapic;
+       unsigned char mp_type;
+       unsigned char mp_irqtype;
+       unsigned short mp_irqflag;
+       unsigned char mp_srcbus;
+       unsigned char mp_srcbusirq;
+       unsigned char mp_dstirq;
 };
 
 /* I/O APIC entries */
-extern struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
+extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
 
 /* # of MP IRQ source entries */
 extern int mp_irq_entries;
 
 /* MP IRQ source entries */
-extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 
 /* non-0 if default (table-less) MP configuration */
 extern int mpc_default_type;
index ecc80f341f37723bd689c26da6efb86241c704f5..196d63c28aa44aa3365977da4b729d59444fcb69 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
+#include <asm/smp.h>
 
 /*
  * the following functions deal with sending IPIs between CPUs.
index 0d0b5ba2e9d1a87d53022ea1eaf8a462c19654f2..586cadbf3787ce9c98449921db1c14c942d418bc 100644 (file)
@@ -1,7 +1,10 @@
 #ifndef _MACH_MPPARSE_H
 #define _MACH_MPPARSE_H 1
 
-int mps_oem_check(struct mp_config_table *mpc, char *oem, char *productid); 
-int acpi_madt_oem_check(char *oem_id, char *oem_table_id); 
+
+extern int mps_oem_check(struct mp_config_table *mpc, char *oem,
+                        char *productid);
+
+extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
 
 #endif
index 75a56e5afbe7093db489441b3135c27946ea3234..d802465e026a4a5adb6834f90c0ea74caa672617 100644 (file)
@@ -20,8 +20,14 @@ static inline cpumask_t target_cpus(void)
 #define INT_DELIVERY_MODE dest_LowestPrio
 #define INT_DEST_MODE 0     /* physical delivery on LOCAL quad */
  
-#define check_apicid_used(bitmap, apicid) physid_isset(apicid, bitmap)
-#define check_apicid_present(bit) physid_isset(bit, phys_cpu_present_map)
+static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+{
+       return physid_isset(apicid, bitmap);
+}
+static inline unsigned long check_apicid_present(int bit)
+{
+       return physid_isset(bit, phys_cpu_present_map);
+}
 #define apicid_cluster(apicid) (apicid & 0xF0)
 
 static inline int apic_id_registered(void)
@@ -77,11 +83,6 @@ static inline int cpu_present_to_apicid(int mps_cpu)
                return BAD_APICID;
 }
 
-static inline int generate_logical_apicid(int quad, int phys_apicid)
-{
-       return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
-}
-
 static inline int apicid_to_node(int logical_apicid) 
 {
        return logical_apicid >> 4;
@@ -95,30 +96,6 @@ static inline physid_mask_t apicid_to_cpu_present(int logical_apicid)
        return physid_mask_of_physid(cpu + 4*node);
 }
 
-struct mpc_config_translation {
-       unsigned char mpc_type;
-       unsigned char trans_len;
-       unsigned char trans_type;
-       unsigned char trans_quad;
-       unsigned char trans_global;
-       unsigned char trans_local;
-       unsigned short trans_reserved;
-};
-
-static inline int mpc_apic_id(struct mpc_config_processor *m, 
-                       struct mpc_config_translation *translation_record)
-{
-       int quad = translation_record->trans_quad;
-       int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid);
-
-       printk("Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
-              m->mpc_apicid,
-              (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
-              (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
-              m->mpc_apicver, quad, logical_apicid);
-       return logical_apicid;
-}
-
 extern void *xquad_portio;
 
 static inline void setup_portio_remap(void)
index 459b124011872c9337ad0e6bd58b4a509ef3464a..626aef6b155f1498eee4ab3594eab6a4adeee00d 100644 (file)
@@ -1,14 +1,7 @@
 #ifndef __ASM_MACH_MPPARSE_H
 #define __ASM_MACH_MPPARSE_H
 
-extern void mpc_oem_bus_info(struct mpc_config_bus *m, char *name,
-                            struct mpc_config_translation *translation);
-extern void mpc_oem_pci_bus(struct mpc_config_bus *m,
-       struct mpc_config_translation *translation);
-
-/* Hook from generic ACPI tables.c */
-static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-}
+extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
+                               char *productid);
 
 #endif /* __ASM_MACH_MPPARSE_H */
index cb2cad0b65a7dded9049d7a2a8db02e6d58562a4..b2298a227567fa24f28bd04de70b0b80b70aaa93 100644 (file)
 extern struct pglist_data *node_data[];
 #define NODE_DATA(nid) (node_data[nid])
 
-#ifdef CONFIG_X86_NUMAQ
-       #include <asm/numaq.h>
-#elif defined(CONFIG_ACPI_SRAT)/* summit or generic arch */
-       #include <asm/srat.h>
-#endif
+#include <asm/numaq.h>
+/* summit or generic arch */
+#include <asm/srat.h>
 
 extern int get_memcfg_numa_flat(void);
 /*
@@ -26,28 +24,20 @@ extern int get_memcfg_numa_flat(void);
  */
 static inline void get_memcfg_numa(void)
 {
-#ifdef CONFIG_X86_NUMAQ
+
        if (get_memcfg_numaq())
                return;
-#elif defined(CONFIG_ACPI_SRAT)
        if (get_memcfg_from_srat())
                return;
-#endif
-
        get_memcfg_numa_flat();
 }
 
 extern int early_pfn_to_nid(unsigned long pfn);
-extern void numa_kva_reserve(void);
 
 #else /* !CONFIG_NUMA */
 
 #define get_memcfg_numa get_memcfg_numa_flat
-#define get_zholes_size(n) (0)
 
-static inline void numa_kva_reserve(void)
-{
-}
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_DISCONTIGMEM
@@ -55,14 +45,14 @@ static inline void numa_kva_reserve(void)
 /*
  * generic node memory support, the following assumptions apply:
  *
- * 1) memory comes in 256Mb contigious chunks which are either present or not
+ * 1) memory comes in 64Mb contigious chunks which are either present or not
  * 2) we will not have more than 64Gb in total
  *
  * for now assume that 64Gb is max amount of RAM for whole system
  *    64Gb / 4096bytes/page = 16777216 pages
  */
 #define MAX_NR_PAGES 16777216
-#define MAX_ELEMENTS 256
+#define MAX_ELEMENTS 1024
 #define PAGES_PER_ELEMENT (MAX_NR_PAGES/MAX_ELEMENTS)
 
 extern s8 physnode_map[];
@@ -87,9 +77,6 @@ static inline int pfn_to_nid(unsigned long pfn)
        __pgdat->node_start_pfn + __pgdat->node_spanned_pages;          \
 })
 
-#ifdef CONFIG_X86_NUMAQ            /* we have contiguous memory on NUMA-Q */
-#define pfn_valid(pfn)          ((pfn) < num_physpages)
-#else
 static inline int pfn_valid(int pfn)
 {
        int nid = pfn_to_nid(pfn);
@@ -98,7 +85,6 @@ static inline int pfn_valid(int pfn)
                return (pfn < node_end_pfn(nid));
        return 0;
 }
-#endif /* CONFIG_X86_NUMAQ */
 
 #endif /* CONFIG_DISCONTIGMEM */
 
index 57a991b9c0530a0f82268853c74a6feb2411b883..b8ba37496e2dde26d18bc746468906f7a5d01162 100644 (file)
@@ -13,6 +13,12 @@ extern int apic_version[MAX_APICS];
 extern u8 apicid_2_node[];
 extern int pic_mode;
 
+#ifdef CONFIG_X86_NUMAQ
+extern int mp_bus_id_to_node[MAX_MP_BUSSES];
+extern int mp_bus_id_to_local[MAX_MP_BUSSES];
+extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+#endif
+
 #define MAX_APICID 256
 
 #else
@@ -21,26 +27,30 @@ extern int pic_mode;
 /* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
 #define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
 
+#endif
+
 extern void early_find_smp_config(void);
 extern void early_get_smp_config(void);
 
-#endif
-
 #if defined(CONFIG_MCA) || defined(CONFIG_EISA)
 extern int mp_bus_id_to_type[MAX_MP_BUSSES];
 #endif
 
 extern DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
 
-extern int mp_bus_id_to_pci_bus[MAX_MP_BUSSES];
-
 extern unsigned int boot_cpu_physical_apicid;
+extern unsigned int max_physical_apicid;
 extern int smp_found_config;
 extern int mpc_default_type;
 extern unsigned long mp_lapic_addr;
 
 extern void find_smp_config(void);
 extern void get_smp_config(void);
+#ifdef CONFIG_X86_MPPARSE
+extern void early_reserve_e820_mpc_new(void);
+#else
+static inline void early_reserve_e820_mpc_new(void) { }
+#endif
 
 void __cpuinit generic_processor_info(int apicid, int version);
 #ifdef CONFIG_ACPI
@@ -49,6 +59,18 @@ extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
                                   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
 extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+extern void MP_intsrc_info(struct mpc_config_intsrc *m);
+#ifdef CONFIG_X86_IO_APIC
+extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
+                               u32 gsi, int triggering, int polarity);
+#else
+static inline int
+mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
+                  u32 gsi, int triggering, int polarity)
+{
+       return 0;
+}
+#endif
 #endif /* CONFIG_ACPI */
 
 #define PHYSID_ARRAY_SIZE      BITS_TO_LONGS(MAX_APICS)
index 94b86c31239aad607ff00e344e67233b42882f82..ef068d2465d691a785c274389ea94f2f047364dd 100644 (file)
@@ -28,6 +28,7 @@
 
 #ifdef CONFIG_X86_NUMAQ
 
+extern int found_numaq;
 extern int get_memcfg_numaq(void);
 
 /*
@@ -156,9 +157,10 @@ struct sys_cfg_data {
        struct          eachquadmem eq[MAX_NUMNODES];   /* indexed by quad id */
 };
 
-static inline unsigned long *get_zholes_size(int nid)
+#else
+static inline int get_memcfg_numaq(void)
 {
-       return NULL;
+       return 0;
 }
 #endif /* CONFIG_X86_NUMAQ */
 #endif /* NUMAQ_H */
index fa6763af8d2686c8d1b52a023f72c70806ec5bc4..9e163fc3e984505880e6dacf6c7f1127f52166bd 100644 (file)
@@ -50,19 +50,14 @@ extern struct boot_params boot_params;
  */
 #define LOWMEMSIZE()   (0x9f000)
 
-struct e820entry;
-
 char * __init machine_specific_memory_setup(void);
 char *memory_setup(void);
 
-int __init copy_e820_map(struct e820entry *biosmap, int nr_map);
-int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map);
-void __init add_memory_region(unsigned long long start,
-                             unsigned long long size, int type);
-
-extern unsigned long init_pg_tables_end;
 
+void __init i386_start_kernel(void);
 
+extern unsigned long init_pg_tables_start;
+extern unsigned long init_pg_tables_end;
 
 #endif /* __i386__ */
 #endif /* _SETUP */
index 1ebaa5cd31128eb40a7ca2c918aa17d7d8d8f244..514e52b95cef89c1ddf3236b5c6ce30919cbe16d 100644 (file)
@@ -201,7 +201,6 @@ extern void cpu_exit_clear(void);
 extern void cpu_uninit(void);
 #endif
 
-extern void smp_alloc_memory(void);
 extern void lock_ipi_call_lock(void);
 extern void unlock_ipi_call_lock(void);
 #endif /* __ASSEMBLY__ */
index f4bba131d0686733a7060f6d4227977fdf6c2588..456fe0b5a92162245d3b63ba8a497f762aae474a 100644 (file)
 #ifndef _ASM_SRAT_H_
 #define _ASM_SRAT_H_
 
-#ifndef CONFIG_ACPI_SRAT
-#error CONFIG_ACPI_SRAT not defined, and srat.h header has been included
-#endif
-
+#ifdef CONFIG_ACPI_SRAT
 extern int get_memcfg_from_srat(void);
-extern unsigned long *get_zholes_size(int);
+#else
+static inline int get_memcfg_from_srat(void)
+{
+       return 0;
+}
+#endif
 
 #endif /* _ASM_SRAT_H_ */
index a2f04cd79b29617e4677a0a1a1c23f4f1d434b23..9f7f63ba0042a25b979a0f0083e1583be1da4743 100644 (file)
@@ -303,7 +303,6 @@ static inline void clflush(volatile void *__p)
 void disable_hlt(void);
 void enable_hlt(void);
 
-extern int es7000_plat;
 void cpu_idle_wait(void);
 
 extern unsigned long arch_align_stack(unsigned long sp);
index a5f359a7ad0ef84d2829872f904f50014abe71f9..807373d467f7485a03dfa1417f16a62097108e17 100644 (file)
@@ -287,7 +287,6 @@ efi_guid_unparse(efi_guid_t *guid, char *out)
 extern void efi_init (void);
 extern void *efi_get_pal_addr (void);
 extern void efi_map_pal_code (void);
-extern void efi_map_memmap(void);
 extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timespec *ts);
 extern void efi_enter_virtual_mode (void);     /* switch EFI to virtual mode, if possible */
@@ -295,14 +294,11 @@ extern u64 efi_get_iobase (void);
 extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
 extern u64 efi_mem_attribute (unsigned long phys_addr, unsigned long size);
-extern int efi_mem_attribute_range (unsigned long phys_addr, unsigned long size,
-                                   u64 attr);
 extern int __init efi_uart_console_only (void);
 extern void efi_initialize_iomem_resources(struct resource *code_resource,
                struct resource *data_resource, struct resource *bss_resource);
 extern unsigned long efi_get_time(void);
 extern int efi_set_rtc_mmss(unsigned long nowtime);
-extern int is_available_memory(efi_memory_desc_t * md);
 extern struct efi_memory_map memmap;
 
 /**
index 586a943cab018647da208582bc1f19e0dc485f1d..ce8e397a61f69a5836109214c54f99bdda25117a 100644 (file)
@@ -998,8 +998,7 @@ extern void free_area_init_node(int nid, pg_data_t *pgdat,
 extern void free_area_init_nodes(unsigned long *max_zone_pfn);
 extern void add_active_range(unsigned int nid, unsigned long start_pfn,
                                        unsigned long end_pfn);
-extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
-                                               unsigned long new_end_pfn);
+extern void shrink_active_range(unsigned int nid, unsigned long new_end_pfn);
 extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
                                        unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
index e875905f7b12c3ed8a057ca235ff3fa41e72fcf2..e8c06122be36058f88cb6e291360eb2c0ebe09a0 100644 (file)
 
 #include <linux/types.h>
 
-/* Macro to aid the definition of ranges of bits */
-#define PB_range(name, required_bits) \
-       name, name ## _end = (name + required_bits) - 1
-
 /* Bit indices that affect a whole block of pages */
 enum pageblock_bits {
-       PB_range(PB_migrate, 3), /* 3 bits required for migrate types */
+       PB_migrate,
+       PB_migrate_end = PB_migrate + 3 - 1,
+                       /* 3 bits required for migrate types */
        NR_PAGEBLOCK_BITS
 };
 
index f32fae3121f07461f846911ee6963ce679828780..eee5ba7509c17b9ed521bea99aca23bbdc5444c9 100644 (file)
@@ -3461,6 +3461,11 @@ void __paginginit free_area_init_node(int nid, struct pglist_data *pgdat,
        calculate_node_totalpages(pgdat, zones_size, zholes_size);
 
        alloc_node_mem_map(pgdat);
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+       printk(KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
+               nid, (unsigned long)pgdat,
+               (unsigned long)pgdat->node_mem_map);
+#endif
 
        free_area_init_core(pgdat, zones_size, zholes_size);
 }
@@ -3549,25 +3554,49 @@ void __init add_active_range(unsigned int nid, unsigned long start_pfn,
 /**
  * shrink_active_range - Shrink an existing registered range of PFNs
  * @nid: The node id the range is on that should be shrunk
- * @old_end_pfn: The old end PFN of the range
  * @new_end_pfn: The new PFN of the range
  *
  * i386 with NUMA use alloc_remap() to store a node_mem_map on a local node.
- * The map is kept at the end physical page range that has already been
- * registered with add_active_range(). This function allows an arch to shrink
- * an existing registered range.
+ * The map is kept near the end physical page range that has already been
+ * registered. This function allows an arch to shrink an existing registered
+ * range.
  */
-void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
-                                               unsigned long new_end_pfn)
+void __init shrink_active_range(unsigned int nid, unsigned long new_end_pfn)
 {
-       int i;
+       int i, j;
+       int removed = 0;
 
        /* Find the old active region end and shrink */
-       for_each_active_range_index_in_nid(i, nid)
-               if (early_node_map[i].end_pfn == old_end_pfn) {
+       for_each_active_range_index_in_nid(i, nid) {
+               if (early_node_map[i].start_pfn >= new_end_pfn) {
+                       /* clear it */
+                       early_node_map[i].end_pfn = 0;
+                       removed = 1;
+                       continue;
+               }
+               if (early_node_map[i].end_pfn > new_end_pfn) {
                        early_node_map[i].end_pfn = new_end_pfn;
-                       break;
+                       continue;
                }
+       }
+
+       if (!removed)
+               return;
+
+       /* remove the blank ones */
+       for (i = nr_nodemap_entries - 1; i > 0; i--) {
+               if (early_node_map[i].nid != nid)
+                       continue;
+               if (early_node_map[i].end_pfn)
+                       continue;
+               /* we found it, get rid of it */
+               for (j = i; j < nr_nodemap_entries - 1; j++)
+                       memcpy(&early_node_map[j], &early_node_map[j+1],
+                               sizeof(early_node_map[j]));
+               j = nr_nodemap_entries - 1;
+               memset(&early_node_map[j], 0, sizeof(early_node_map[j]));
+               nr_nodemap_entries--;
+       }
 }
 
 /**