Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 11 Jun 2009 17:00:50 +0000 (10:00 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 11 Jun 2009 17:00:50 +0000 (10:00 -0700)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (49 commits)
  ext4: Avoid corrupting the uninitialized bit in the extent during truncate
  ext4: Don't treat a truncation of a zero-length file as replace-via-truncate
  ext4: fix dx_map_entry to support 256k directory blocks
  ext4: truncate the file properly if we fail to copy data from userspace
  ext4: Avoid leaking blocks after a block allocation failure
  ext4: Change all super.c messages to print the device
  ext4: Get rid of EXTEND_DISKSIZE flag of ext4_get_blocks_handle()
  ext4: super.c whitespace cleanup
  jbd2: Fix minor typos in comments in fs/jbd2/journal.c
  ext4: Clean up calls to ext4_get_group_desc()
  ext4: remove unused function __ext4_write_dirty_metadata
  ext2: Fix memory leak in ext2_fill_super() in case of a failed mount
  ext3: Fix memory leak in ext3_fill_super() in case of a failed mount
  ext4: Fix memory leak in ext4_fill_super() in case of a failed mount
  ext4: down i_data_sem only for read when walking tree for fiemap
  ext4: Add a comprehensive block validity check to ext4_get_blocks()
  ext4: Clean up ext4_get_blocks() so it does not depend on bh_result->b_state
  ext4: Merge ext4_da_get_block_write() into mpage_da_map_blocks()
  ext4: Add BUG_ON debugging checks to noalloc_get_block_write()
  ext4: Add documentation to the ext4_*get_block* functions
  ...

602 files changed:
Documentation/ABI/testing/sysfs-devices-cache_disable [new file with mode: 0644]
Documentation/DMA-API.txt
Documentation/DocBook/Makefile
Documentation/DocBook/tracepoint.tmpl [new file with mode: 0644]
Documentation/RCU/trace.txt
Documentation/futex-requeue-pi.txt [new file with mode: 0644]
Documentation/kernel-parameters.txt
Documentation/memory-barriers.txt
Documentation/scheduler/sched-rt-group.txt
Documentation/trace/events.txt [new file with mode: 0644]
Documentation/trace/ftrace.txt
Documentation/trace/power.txt [new file with mode: 0644]
Documentation/x86/boot.txt
Documentation/x86/x86_64/boot-options.txt
Documentation/x86/x86_64/mm.txt
MAINTAINERS
Makefile
arch/alpha/kernel/sys_dp264.c
arch/alpha/kernel/sys_titan.c
arch/arm/common/gic.c
arch/arm/include/asm/cache.h
arch/arm/include/asm/page.h
arch/arm/mach-mx2/clock_imx21.c
arch/arm/mach-mx2/clock_imx27.c
arch/arm/mach-mx3/clock-imx35.c
arch/arm/mach-mx3/clock.c
arch/arm/mach-pxa/devices.c
arch/arm/mach-pxa/imote2.c
arch/arm/mm/proc-v7.S
arch/arm/plat-mxc/include/mach/imx-uart.h
arch/cris/arch-v32/kernel/irq.c
arch/frv/Kconfig
arch/frv/include/asm/bitops.h
arch/frv/include/asm/elf.h
arch/frv/include/asm/pci.h
arch/frv/include/asm/ptrace.h
arch/frv/include/asm/syscall.h [new file with mode: 0644]
arch/frv/include/asm/thread_info.h
arch/frv/kernel/entry.S
arch/frv/kernel/ptrace.c
arch/frv/kernel/signal.c
arch/frv/kernel/uaccess.c
arch/frv/mb93090-mb00/pci-dma-nommu.c
arch/frv/mb93090-mb00/pci-dma.c
arch/ia64/hp/sim/hpsim_irq.c
arch/ia64/kernel/acpi.c
arch/ia64/kernel/iosapic.c
arch/ia64/kernel/msi_ia64.c
arch/ia64/sn/kernel/irq.c
arch/ia64/sn/kernel/msi_sn.c
arch/mips/Kconfig
arch/mips/cavium-octeon/octeon-irq.c
arch/mips/include/asm/cpu-info.h
arch/mips/include/asm/delay.h
arch/mips/include/asm/ioctl.h
arch/mips/include/asm/irq.h
arch/mips/kernel/irq-gic.c
arch/mips/kernel/proc.c
arch/mips/lib/Makefile
arch/mips/lib/delay.c [new file with mode: 0644]
arch/mips/mti-malta/malta-smtc.c
arch/mips/sibyte/bcm1480/irq.c
arch/mips/sibyte/cfe/setup.c
arch/mips/sibyte/sb1250/irq.c
arch/mn10300/Kconfig
arch/mn10300/include/asm/elf.h
arch/mn10300/include/asm/processor.h
arch/mn10300/include/asm/ptrace.h
arch/mn10300/kernel/entry.S
arch/mn10300/kernel/ptrace.c
arch/mn10300/kernel/signal.c
arch/mn10300/mm/tlb-mn10300.S
arch/parisc/kernel/irq.c
arch/powerpc/platforms/pseries/xics.c
arch/powerpc/sysdev/mpic.c
arch/powerpc/sysdev/mpic.h
arch/sparc/include/asm/thread_info_64.h
arch/sparc/kernel/irq_64.c
arch/x86/Kbuild [new file with mode: 0644]
arch/x86/Kconfig
arch/x86/Kconfig.debug
arch/x86/Makefile
arch/x86/boot/.gitignore
arch/x86/boot/Makefile
arch/x86/boot/a20.c
arch/x86/boot/apm.c
arch/x86/boot/bioscall.S [new file with mode: 0644]
arch/x86/boot/boot.h
arch/x86/boot/compressed/.gitignore
arch/x86/boot/compressed/Makefile
arch/x86/boot/compressed/head_32.S
arch/x86/boot/compressed/head_64.S
arch/x86/boot/compressed/misc.c
arch/x86/boot/compressed/mkpiggy.c [new file with mode: 0644]
arch/x86/boot/compressed/vmlinux.lds.S [moved from arch/x86/boot/compressed/vmlinux_64.lds with 57% similarity]
arch/x86/boot/compressed/vmlinux.scr [deleted file]
arch/x86/boot/compressed/vmlinux_32.lds [deleted file]
arch/x86/boot/edd.c
arch/x86/boot/header.S
arch/x86/boot/main.c
arch/x86/boot/mca.c
arch/x86/boot/memory.c
arch/x86/boot/regs.c [new file with mode: 0644]
arch/x86/boot/setup.ld
arch/x86/boot/tty.c
arch/x86/boot/video-bios.c
arch/x86/boot/video-vesa.c
arch/x86/boot/video-vga.c
arch/x86/boot/video.c
arch/x86/boot/video.h
arch/x86/configs/i386_defconfig
arch/x86/configs/x86_64_defconfig
arch/x86/ia32/ia32entry.S
arch/x86/include/asm/alternative.h
arch/x86/include/asm/amd_iommu.h
arch/x86/include/asm/amd_iommu_types.h
arch/x86/include/asm/apic.h
arch/x86/include/asm/apicdef.h
arch/x86/include/asm/boot.h
arch/x86/include/asm/bootparam.h
arch/x86/include/asm/cpu_debug.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/ds.h
arch/x86/include/asm/hw_irq.h
arch/x86/include/asm/i387.h
arch/x86/include/asm/i8259.h
arch/x86/include/asm/io_apic.h
arch/x86/include/asm/iomap.h
arch/x86/include/asm/irq_remapping.h
arch/x86/include/asm/irq_vectors.h
arch/x86/include/asm/k8.h
arch/x86/include/asm/microcode.h
arch/x86/include/asm/mpspec.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nmi.h
arch/x86/include/asm/numa_64.h
arch/x86/include/asm/page_32_types.h
arch/x86/include/asm/page_64_types.h
arch/x86/include/asm/page_types.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_64.h
arch/x86/include/asm/pgtable_64_types.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/required-features.h
arch/x86/include/asm/setup.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/sparsemem.h
arch/x86/include/asm/syscalls.h
arch/x86/include/asm/termios.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/tlbflush.h
arch/x86/include/asm/topology.h
arch/x86/include/asm/traps.h
arch/x86/include/asm/unistd_32.h
arch/x86/include/asm/unistd_64.h
arch/x86/include/asm/uv/uv_bau.h
arch/x86/include/asm/uv/uv_hub.h
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/acpi/realmode/Makefile
arch/x86/kernel/acpi/realmode/bioscall.S [new file with mode: 0644]
arch/x86/kernel/acpi/realmode/regs.c [new file with mode: 0644]
arch/x86/kernel/amd_iommu.c
arch/x86/kernel/amd_iommu_init.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/apic_flat_64.c
arch/x86/kernel/apic/es7000_32.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/apic/nmi.c
arch/x86/kernel/apic/probe_32.c
arch/x86/kernel/apic/probe_64.c
arch/x86/kernel/apic/summit_32.c
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/cpu_debug.c
arch/x86/kernel/cpu/cpufreq/Kconfig
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
arch/x86/kernel/cpu/cpufreq/powernow-k7.c
arch/x86/kernel/cpu/cpufreq/powernow-k8.c
arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/cpu/intel_cacheinfo.c
arch/x86/kernel/cpu/mcheck/mce_64.c
arch/x86/kernel/cpu/mcheck/mce_intel_64.c
arch/x86/kernel/cpu/mtrr/cleanup.c
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/mtrr/main.c
arch/x86/kernel/cpu/mtrr/mtrr.h
arch/x86/kernel/cpu/mtrr/state.c
arch/x86/kernel/ds.c
arch/x86/kernel/ds_selftest.c [new file with mode: 0644]
arch/x86/kernel/ds_selftest.h [new file with mode: 0644]
arch/x86/kernel/dumpstack.h
arch/x86/kernel/e820.c
arch/x86/kernel/early-quirks.c
arch/x86/kernel/entry_64.S
arch/x86/kernel/head_32.S
arch/x86/kernel/irq.c
arch/x86/kernel/irqinit.c [moved from arch/x86/kernel/irqinit_32.c with 68% similarity]
arch/x86/kernel/irqinit_64.c [deleted file]
arch/x86/kernel/kgdb.c
arch/x86/kernel/kvm.c
arch/x86/kernel/microcode_amd.c
arch/x86/kernel/microcode_core.c
arch/x86/kernel/microcode_intel.c
arch/x86/kernel/mpparse.c
arch/x86/kernel/paravirt.c
arch/x86/kernel/pci-calgary_64.c
arch/x86/kernel/pci-gart_64.c
arch/x86/kernel/pci-swiotlb.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/quirks.c
arch/x86/kernel/reboot.c
arch/x86/kernel/setup.c
arch/x86/kernel/setup_percpu.c
arch/x86/kernel/smp.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/stacktrace.c
arch/x86/kernel/syscall_table_32.S
arch/x86/kernel/tlb_uv.c
arch/x86/kernel/traps.c
arch/x86/kernel/tsc.c
arch/x86/kernel/tsc_sync.c
arch/x86/kernel/vm86_32.c
arch/x86/kernel/vmi_32.c
arch/x86/kernel/vmlinux.lds.S
arch/x86/kernel/vmlinux_32.lds.S [deleted file]
arch/x86/kernel/vmlinux_64.lds.S [deleted file]
arch/x86/kernel/vsyscall_64.c
arch/x86/lguest/Makefile
arch/x86/lguest/boot.c
arch/x86/mm/dump_pagetables.c
arch/x86/mm/fault.c
arch/x86/mm/highmem_32.c
arch/x86/mm/init.c
arch/x86/mm/init_32.c
arch/x86/mm/init_64.c
arch/x86/mm/iomap_32.c
arch/x86/mm/kmmio.c
arch/x86/mm/memtest.c
arch/x86/mm/mmio-mod.c
arch/x86/mm/numa_64.c
arch/x86/mm/pageattr.c
arch/x86/mm/srat_64.c
arch/x86/oprofile/nmi_int.c
arch/x86/pci/irq.c
arch/x86/pci/mmconfig-shared.c
arch/x86/vdso/vma.c
arch/x86/xen/enlighten.c
arch/x86/xen/mmu.c
arch/x86/xen/setup.c
arch/x86/xen/xen-ops.h
block/blk-core.c
block/blk-sysfs.c
block/bsg.c
block/compat_ioctl.c
block/elevator.c
drivers/acpi/pci_irq.c
drivers/acpi/processor_core.c
drivers/ata/ahci.c
drivers/ata/ata_piix.c
drivers/ata/pata_ali.c
drivers/ata/pata_efar.c
drivers/ata/pata_legacy.c
drivers/ata/pata_netcell.c
drivers/bluetooth/hci_ldisc.c
drivers/char/Kconfig
drivers/char/Makefile
drivers/char/bfin_jtag_comm.c [new file with mode: 0644]
drivers/char/cyclades.c
drivers/char/epca.c
drivers/char/hpet.c
drivers/char/ip2/i2lib.c
drivers/char/ip2/ip2main.c
drivers/char/isicom.c
drivers/char/istallion.c
drivers/char/mem.c
drivers/char/moxa.c
drivers/char/mxser.c
drivers/char/n_hdlc.c
drivers/char/n_tty.c
drivers/char/pcmcia/synclink_cs.c
drivers/char/pty.c
drivers/char/rocket.c
drivers/char/selection.c
drivers/char/stallion.c
drivers/char/synclink.c
drivers/char/synclink_gt.c
drivers/char/synclinkmp.c
drivers/char/tty_audit.c
drivers/char/tty_io.c
drivers/char/tty_ioctl.c
drivers/char/tty_ldisc.c
drivers/char/tty_port.c
drivers/cpufreq/cpufreq.c
drivers/gpu/drm/drm_crtc.c
drivers/gpu/drm/drm_crtc_helper.c
drivers/gpu/drm/drm_edid.c
drivers/gpu/drm/drm_irq.c
drivers/gpu/drm/drm_sysfs.c
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/intel_crt.c
drivers/gpu/drm/i915/intel_dvo.c
drivers/gpu/drm/i915/intel_hdmi.c
drivers/gpu/drm/i915/intel_lvds.c
drivers/gpu/drm/i915/intel_sdvo.c
drivers/gpu/drm/i915/intel_tv.c
drivers/gpu/drm/radeon/radeon_cp.c
drivers/gpu/drm/radeon/radeon_drv.h
drivers/ide/alim15x3.c
drivers/ide/ide-atapi.c
drivers/ide/ide-cd.c
drivers/ide/ide-cd.h
drivers/ide/ide-disk.c
drivers/ide/ide-dma.c
drivers/ide/ide-floppy.c
drivers/ide/ide-io.c
drivers/ide/ide-ioctls.c
drivers/ide/ide-park.c
drivers/ide/ide-pm.c
drivers/ide/ide-tape.c
drivers/ide/ide-taskfile.c
drivers/ide/pdc202xx_old.c
drivers/md/dm.c
drivers/md/raid5.c
drivers/media/video/ivtv/ivtv-queue.c
drivers/mmc/host/mvsdio.c
drivers/mmc/host/mxcmmc.c
drivers/mmc/host/omap.c
drivers/mmc/host/omap_hsmmc.c
drivers/mmc/host/sdhci-of.c
drivers/mtd/nand/davinci_nand.c
drivers/net/r8169.c
drivers/parisc/iosapic.c
drivers/parport/parport_pc.c
drivers/pci/hotplug/ibmphp_core.c
drivers/pci/htirq.c
drivers/pci/intel-iommu.c
drivers/pci/intr_remapping.c
drivers/pci/probe.c
drivers/pnp/pnpacpi/rsparser.c
drivers/scsi/sg.c
drivers/serial/8250.c
drivers/serial/8250_pci.c
drivers/serial/Kconfig
drivers/serial/Makefile
drivers/serial/bfin_5xx.c
drivers/serial/bfin_sport_uart.c
drivers/serial/icom.c
drivers/serial/imx.c
drivers/serial/jsm/jsm.h
drivers/serial/jsm/jsm_tty.c
drivers/serial/timbuart.c [new file with mode: 0644]
drivers/serial/timbuart.h [new file with mode: 0644]
drivers/ssb/embedded.c
drivers/usb/class/cdc-acm.c
drivers/usb/class/cdc-acm.h
drivers/usb/serial/belkin_sa.c
drivers/usb/serial/ch341.c
drivers/usb/serial/console.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/cyberjack.c
drivers/usb/serial/cypress_m8.c
drivers/usb/serial/digi_acceleport.c
drivers/usb/serial/empeg.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/garmin_gps.c
drivers/usb/serial/generic.c
drivers/usb/serial/io_edgeport.c
drivers/usb/serial/io_ti.c
drivers/usb/serial/ipaq.c
drivers/usb/serial/ipw.c
drivers/usb/serial/ir-usb.c
drivers/usb/serial/iuu_phoenix.c
drivers/usb/serial/keyspan.c
drivers/usb/serial/keyspan.h
drivers/usb/serial/keyspan_pda.c
drivers/usb/serial/kl5kusb105.c
drivers/usb/serial/kobil_sct.c
drivers/usb/serial/mct_u232.c
drivers/usb/serial/mos7720.c
drivers/usb/serial/mos7840.c
drivers/usb/serial/navman.c
drivers/usb/serial/omninet.c
drivers/usb/serial/opticon.c
drivers/usb/serial/option.c
drivers/usb/serial/oti6858.c
drivers/usb/serial/pl2303.c
drivers/usb/serial/sierra.c
drivers/usb/serial/spcp8x5.c
drivers/usb/serial/symbolserial.c
drivers/usb/serial/ti_usb_3410_5052.c
drivers/usb/serial/usb-serial.c
drivers/usb/serial/visor.c
drivers/usb/serial/whiteheat.c
drivers/xen/Kconfig
drivers/xen/Makefile
drivers/xen/events.c
drivers/xen/evtchn.c [new file with mode: 0644]
drivers/xen/manage.c
drivers/xen/sys-hypervisor.c [new file with mode: 0644]
drivers/xen/xenbus/xenbus_probe.c
drivers/xen/xenbus/xenbus_xs.c
drivers/xen/xenfs/super.c
fs/autofs4/waitq.c
fs/bio.c
fs/btrfs/extent-tree.c
fs/btrfs/volumes.c
fs/buffer.c
fs/devpts/inode.c
fs/inode.c
fs/jbd/commit.c
fs/proc/loadavg.c
include/Kbuild
include/asm-generic/pgtable.h
include/asm-generic/vmlinux.lds.h
include/drm/drm_crtc.h
include/drm/drm_crtc_helper.h
include/linux/acpi.h
include/linux/amba/serial.h
include/linux/blktrace_api.h
include/linux/compat.h
include/linux/cpumask.h
include/linux/cyclades.h
include/linux/dma-debug.h
include/linux/dmar.h
include/linux/ftrace.h
include/linux/ftrace_event.h [new file with mode: 0644]
include/linux/futex.h
include/linux/ide.h
include/linux/init_task.h
include/linux/interrupt.h
include/linux/irq.h
include/linux/kmemtrace.h [new file with mode: 0644]
include/linux/mm.h
include/linux/mmiotrace.h
include/linux/module.h
include/linux/mutex.h
include/linux/pci_ids.h
include/linux/ptrace.h
include/linux/rational.h [new file with mode: 0644]
include/linux/rculist.h
include/linux/rcutree.h
include/linux/ring_buffer.h
include/linux/sched.h
include/linux/serial.h
include/linux/serial_core.h
include/linux/signal.h
include/linux/slab_def.h
include/linux/slub_def.h
include/linux/spinlock_up.h
include/linux/swiotlb.h
include/linux/thread_info.h
include/linux/trace_seq.h [new file with mode: 0644]
include/linux/tracehook.h
include/linux/tracepoint.h
include/linux/tty.h
include/linux/tty_driver.h
include/linux/usb/serial.h
include/linux/wait.h
include/trace/block.h [deleted file]
include/trace/define_trace.h [new file with mode: 0644]
include/trace/events/block.h [new file with mode: 0644]
include/trace/events/irq.h [new file with mode: 0644]
include/trace/events/kmem.h [new file with mode: 0644]
include/trace/events/lockdep.h [new file with mode: 0644]
include/trace/events/sched.h [moved from include/trace/sched_event_types.h with 91% similarity]
include/trace/events/skb.h [new file with mode: 0644]
include/trace/events/workqueue.h [new file with mode: 0644]
include/trace/ftrace.h [new file with mode: 0644]
include/trace/irq.h [deleted file]
include/trace/irq_event_types.h [deleted file]
include/trace/kmemtrace.h [deleted file]
include/trace/lockdep.h [deleted file]
include/trace/lockdep_event_types.h [deleted file]
include/trace/sched.h [deleted file]
include/trace/skb.h [deleted file]
include/trace/trace_event_types.h [deleted file]
include/trace/trace_events.h [deleted file]
include/trace/workqueue.h [deleted file]
include/xen/Kbuild [new file with mode: 0644]
include/xen/events.h
include/xen/evtchn.h [new file with mode: 0644]
include/xen/interface/version.h
include/xen/xenbus.h
init/Kconfig
init/main.c
ipc/sem.c
ipc/shm.c
kernel/Makefile
kernel/async.c
kernel/compat.c
kernel/exit.c
kernel/fork.c
kernel/futex.c
kernel/irq/Makefile
kernel/irq/chip.c
kernel/irq/handle.c
kernel/irq/internals.h
kernel/irq/manage.c
kernel/irq/migration.c
kernel/irq/numa_migrate.c
kernel/kthread.c
kernel/lockdep.c
kernel/module.c
kernel/mutex.c
kernel/ptrace.c
kernel/rcupreempt.c
kernel/rcutree.c
kernel/rcutree_trace.c
kernel/rtmutex.c
kernel/rtmutex_common.h
kernel/sched.c
kernel/sched_cpupri.c
kernel/sched_fair.c
kernel/sched_idletask.c
kernel/sched_rt.c
kernel/signal.c
kernel/smp.c
kernel/softirq.c
kernel/sysctl.c
kernel/time/timekeeping.c
kernel/timer.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/blktrace.c
kernel/trace/events.c [deleted file]
kernel/trace/ftrace.c
kernel/trace/kmemtrace.c
kernel/trace/ring_buffer.c
kernel/trace/ring_buffer_benchmark.c [new file with mode: 0644]
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_boot.c
kernel/trace/trace_branch.c
kernel/trace/trace_event_profile.c
kernel/trace/trace_event_types.h
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_events_stage_1.h [deleted file]
kernel/trace/trace_events_stage_2.h [deleted file]
kernel/trace/trace_events_stage_3.h [deleted file]
kernel/trace/trace_export.c
kernel/trace/trace_functions_graph.c
kernel/trace/trace_hw_branches.c
kernel/trace/trace_mmiotrace.c
kernel/trace/trace_output.c
kernel/trace/trace_output.h
kernel/trace/trace_power.c
kernel/trace/trace_printk.c
kernel/trace/trace_sched_switch.c
kernel/trace/trace_sched_wakeup.c
kernel/trace/trace_selftest.c
kernel/trace/trace_stack.c
kernel/trace/trace_stat.c
kernel/trace/trace_stat.h
kernel/trace/trace_sysprof.c
kernel/trace/trace_workqueue.c
kernel/wait.c
kernel/workqueue.c
lib/Kconfig
lib/Makefile
lib/cpumask.c
lib/dma-debug.c
lib/rational.c [new file with mode: 0644]
lib/swiotlb.c
lib/vsprintf.c
mm/bounce.c
mm/mlock.c
mm/page_alloc.c
mm/percpu.c
mm/slab.c
mm/slob.c
mm/slub.c
mm/util.c
net/core/drop_monitor.c
net/core/net-traces.c
net/core/skbuff.c
net/sched/cls_cgroup.c
samples/Kconfig
samples/Makefile
samples/trace_events/Makefile [new file with mode: 0644]
samples/trace_events/trace-events-sample.c [new file with mode: 0644]
samples/trace_events/trace-events-sample.h [new file with mode: 0644]
scripts/Makefile.lib
scripts/bin_size [deleted file]
scripts/kernel-doc
scripts/recordmcount.pl
security/integrity/ima/ima_fs.c
security/smack/smackfs.c
virt/kvm/kvm_main.c

diff --git a/Documentation/ABI/testing/sysfs-devices-cache_disable b/Documentation/ABI/testing/sysfs-devices-cache_disable
new file mode 100644 (file)
index 0000000..175bb4f
--- /dev/null
@@ -0,0 +1,18 @@
+What:      /sys/devices/system/cpu/cpu*/cache/index*/cache_disable_X
+Date:      August 2008
+KernelVersion: 2.6.27
+Contact:       mark.langsdorf@amd.com
+Description:   These files exist in every cpu's cache index directories.
+               There are currently 2 cache_disable_# files in each
+               directory.  Reading from these files on a supported
+               processor will return that cache disable index value
+               for that processor and node.  Writing to one of these
+               files will cause the specificed cache index to be disabled.
+
+               Currently, only AMD Family 10h Processors support cache index
+               disable, and only for their L3 caches.  See the BIOS and
+               Kernel Developer's Guide at
+               http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/31116-Public-GH-BKDG_3.20_2-4-09.pdf
+               for formatting information and other details on the
+               cache index disable.
+Users:    joachim.deguara@amd.com
index d9aa43d..25fb8bc 100644 (file)
@@ -704,12 +704,24 @@ this directory the following files can currently be found:
                                The current number of free dma_debug_entries
                                in the allocator.
 
+       dma-api/driver-filter
+                               You can write a name of a driver into this file
+                               to limit the debug output to requests from that
+                               particular driver. Write an empty string to
+                               that file to disable the filter and see
+                               all errors again.
+
 If you have this code compiled into your kernel it will be enabled by default.
 If you want to boot without the bookkeeping anyway you can provide
 'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
 Notice that you can not enable it again at runtime. You have to reboot to do
 so.
 
+If you want to see debug messages only for a special device driver you can
+specify the dma_debug_driver=<drivername> parameter. This will enable the
+driver filter at boot time. The debug code will only print errors for that
+driver afterwards. This filter can be disabled or changed later using debugfs.
+
 When the code disables itself at runtime this is most likely because it ran
 out of dma_debug_entries. These entries are preallocated at boot. The number
 of preallocated entries is defined per architecture. If it is too low for you
index b1eb661..9632444 100644 (file)
@@ -13,7 +13,8 @@ DOCBOOKS := z8530book.xml mcabook.xml device-drivers.xml \
            gadget.xml libata.xml mtdnand.xml librs.xml rapidio.xml \
            genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
            mac80211.xml debugobjects.xml sh.xml regulator.xml \
-           alsa-driver-api.xml writing-an-alsa-driver.xml
+           alsa-driver-api.xml writing-an-alsa-driver.xml \
+           tracepoint.xml
 
 ###
 # The build process is as follows (targets):
diff --git a/Documentation/DocBook/tracepoint.tmpl b/Documentation/DocBook/tracepoint.tmpl
new file mode 100644 (file)
index 0000000..b0756d0
--- /dev/null
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
+       "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" []>
+
+<book id="Tracepoints">
+ <bookinfo>
+  <title>The Linux Kernel Tracepoint API</title>
+
+  <authorgroup>
+   <author>
+    <firstname>Jason</firstname>
+    <surname>Baron</surname>
+    <affiliation>
+     <address>
+      <email>jbaron@redhat.com</email>
+     </address>
+    </affiliation>
+   </author>
+  </authorgroup>
+
+  <legalnotice>
+   <para>
+     This documentation is free software; you can redistribute
+     it and/or modify it under the terms of the GNU General Public
+     License as published by the Free Software Foundation; either
+     version 2 of the License, or (at your option) any later
+     version.
+   </para>
+
+   <para>
+     This program is distributed in the hope that it will be
+     useful, but WITHOUT ANY WARRANTY; without even the implied
+     warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+     See the GNU General Public License for more details.
+   </para>
+
+   <para>
+     You should have received a copy of the GNU General Public
+     License along with this program; if not, write to the Free
+     Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+     MA 02111-1307 USA
+   </para>
+
+   <para>
+     For more details see the file COPYING in the source
+     distribution of Linux.
+   </para>
+  </legalnotice>
+ </bookinfo>
+
+ <toc></toc>
+  <chapter id="intro">
+   <title>Introduction</title>
+   <para>
+     Tracepoints are static probe points that are located in strategic points
+     throughout the kernel. 'Probes' register/unregister with tracepoints
+     via a callback mechanism. The 'probes' are strictly typed functions that
+     are passed a unique set of parameters defined by each tracepoint.
+   </para>
+
+   <para>
+     From this simple callback mechanism, 'probes' can be used to profile, debug,
+     and understand kernel behavior. There are a number of tools that provide a
+     framework for using 'probes'. These tools include Systemtap, ftrace, and
+     LTTng.
+   </para>
+
+   <para>
+     Tracepoints are defined in a number of header files via various macros. Thus,
+     the purpose of this document is to provide a clear accounting of the available
+     tracepoints. The intention is to understand not only what tracepoints are
+     available but also to understand where future tracepoints might be added.
+   </para>
+
+   <para>
+     The API presented has functions of the form:
+     <function>trace_tracepointname(function parameters)</function>. These are the
+     tracepoints callbacks that are found throughout the code. Registering and
+     unregistering probes with these callback sites is covered in the
+     <filename>Documentation/trace/*</filename> directory.
+   </para>
+  </chapter>
+
+  <chapter id="irq">
+   <title>IRQ</title>
+!Iinclude/trace/events/irq.h
+  </chapter>
+
+</book>
index 0688482..02cced1 100644 (file)
@@ -192,23 +192,24 @@ rcu/rcuhier (which displays the struct rcu_node hierarchy).
 The output of "cat rcu/rcudata" looks as follows:
 
 rcu:
-  0 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=1 rp=3c2a dt=23301/73 dn=2 df=1882 of=0 ri=2126 ql=2 b=10
-  1 c=4011 g=4012 pq=1 pqc=4011 qp=0 rpfq=3 rp=39a6 dt=78073/1 dn=2 df=1402 of=0 ri=1875 ql=46 b=10
-  2 c=4010 g=4010 pq=1 pqc=4010 qp=0 rpfq=-5 rp=1d12 dt=16646/0 dn=2 df=3140 of=0 ri=2080 ql=0 b=10
-  3 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=2b50 dt=21159/1 dn=2 df=2230 of=0 ri=1923 ql=72 b=10
-  4 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1644 dt=5783/1 dn=2 df=3348 of=0 ri=2805 ql=7 b=10
-  5 c=4012 g=4013 pq=0 pqc=4011 qp=1 rpfq=3 rp=1aac dt=5879/1 dn=2 df=3140 of=0 ri=2066 ql=10 b=10
-  6 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=ed8 dt=5847/1 dn=2 df=3797 of=0 ri=1266 ql=10 b=10
-  7 c=4012 g=4013 pq=1 pqc=4012 qp=1 rpfq=3 rp=1fa2 dt=6199/1 dn=2 df=2795 of=0 ri=2162 ql=28 b=10
+rcu:
+  0 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=10951/1 dn=0 df=1101 of=0 ri=36 ql=0 b=10
+  1 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=16117/1 dn=0 df=1015 of=0 ri=0 ql=0 b=10
+  2 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1445/1 dn=0 df=1839 of=0 ri=0 ql=0 b=10
+  3 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=6681/1 dn=0 df=1545 of=0 ri=0 ql=0 b=10
+  4 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=1003/1 dn=0 df=1992 of=0 ri=0 ql=0 b=10
+  5 c=17829 g=17830 pq=1 pqc=17829 qp=1 dt=3887/1 dn=0 df=3331 of=0 ri=4 ql=2 b=10
+  6 c=17829 g=17829 pq=1 pqc=17829 qp=0 dt=859/1 dn=0 df=3224 of=0 ri=0 ql=0 b=10
+  7 c=17829 g=17830 pq=0 pqc=17829 qp=1 dt=3761/1 dn=0 df=1818 of=0 ri=0 ql=2 b=10
 rcu_bh:
-  0 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-145 rp=21d6 dt=23301/73 dn=2 df=0 of=0 ri=0 ql=0 b=10
-  1 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-170 rp=20ce dt=78073/1 dn=2 df=26 of=0 ri=5 ql=0 b=10
-  2 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-83 rp=fbd dt=16646/0 dn=2 df=28 of=0 ri=4 ql=0 b=10
-  3 c=-268 g=-268 pq=1 pqc=-268 qp=0 rpfq=-105 rp=178c dt=21159/1 dn=2 df=28 of=0 ri=2 ql=0 b=10
-  4 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-30 rp=b54 dt=5783/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  5 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-29 rp=df5 dt=5879/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
-  6 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-28 rp=788 dt=5847/1 dn=2 df=32 of=0 ri=0 ql=0 b=10
-  7 c=-268 g=-268 pq=1 pqc=-268 qp=1 rpfq=-53 rp=1098 dt=6199/1 dn=2 df=30 of=0 ri=3 ql=0 b=10
+  0 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=10951/1 dn=0 df=0 of=0 ri=0 ql=0 b=10
+  1 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=16117/1 dn=0 df=13 of=0 ri=0 ql=0 b=10
+  2 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1445/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  3 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=6681/1 dn=0 df=9 of=0 ri=0 ql=0 b=10
+  4 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=1003/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  5 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3887/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
+  7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10
 
 The first section lists the rcu_data structures for rcu, the second for
 rcu_bh.  Each section has one line per CPU, or eight for this 8-CPU system.
@@ -253,12 +254,6 @@ o  "pqc" indicates which grace period the last-observed quiescent
 o      "qp" indicates that RCU still expects a quiescent state from
        this CPU.
 
-o      "rpfq" is the number of rcu_pending() calls on this CPU required
-       to induce this CPU to invoke force_quiescent_state().
-
-o      "rp" is low-order four hex digits of the count of how many times
-       rcu_pending() has been invoked on this CPU.
-
 o      "dt" is the current value of the dyntick counter that is incremented
        when entering or leaving dynticks idle state, either by the
        scheduler or by irq.  The number after the "/" is the interrupt
@@ -305,6 +300,9 @@ o   "b" is the batch limit for this CPU.  If more than this number
        of RCU callbacks is ready to invoke, then the remainder will
        be deferred.
 
+There is also an rcu/rcudata.csv file with the same information in
+comma-separated-variable spreadsheet format.
+
 
 The output of "cat rcu/rcugp" looks as follows:
 
@@ -411,3 +409,63 @@ o  Each element of the form "1/1 0:127 ^0" represents one struct
                For example, the first entry at the lowest level shows
                "^0", indicating that it corresponds to bit zero in
                the first entry at the middle level.
+
+
+The output of "cat rcu/rcu_pending" looks as follows:
+
+rcu:
+  0 np=255892 qsp=53936 cbr=0 cng=14417 gpc=10033 gps=24320 nf=6445 nn=146741
+  1 np=261224 qsp=54638 cbr=0 cng=25723 gpc=16310 gps=2849 nf=5912 nn=155792
+  2 np=237496 qsp=49664 cbr=0 cng=2762 gpc=45478 gps=1762 nf=1201 nn=136629
+  3 np=236249 qsp=48766 cbr=0 cng=286 gpc=48049 gps=1218 nf=207 nn=137723
+  4 np=221310 qsp=46850 cbr=0 cng=26 gpc=43161 gps=4634 nf=3529 nn=123110
+  5 np=237332 qsp=48449 cbr=0 cng=54 gpc=47920 gps=3252 nf=201 nn=137456
+  6 np=219995 qsp=46718 cbr=0 cng=50 gpc=42098 gps=6093 nf=4202 nn=120834
+  7 np=249893 qsp=49390 cbr=0 cng=72 gpc=38400 gps=17102 nf=41 nn=144888
+rcu_bh:
+  0 np=146741 qsp=1419 cbr=0 cng=6 gpc=0 gps=0 nf=2 nn=145314
+  1 np=155792 qsp=12597 cbr=0 cng=0 gpc=4 gps=8 nf=3 nn=143180
+  2 np=136629 qsp=18680 cbr=0 cng=0 gpc=7 gps=6 nf=0 nn=117936
+  3 np=137723 qsp=2843 cbr=0 cng=0 gpc=10 gps=7 nf=0 nn=134863
+  4 np=123110 qsp=12433 cbr=0 cng=0 gpc=4 gps=2 nf=0 nn=110671
+  5 np=137456 qsp=4210 cbr=0 cng=0 gpc=6 gps=5 nf=0 nn=133235
+  6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921
+  7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542
+
+As always, this is once again split into "rcu" and "rcu_bh" portions.
+The fields are as follows:
+
+o      "np" is the number of times that __rcu_pending() has been invoked
+       for the corresponding flavor of RCU.
+
+o      "qsp" is the number of times that the RCU was waiting for a
+       quiescent state from this CPU.
+
+o      "cbr" is the number of times that this CPU had RCU callbacks
+       that had passed through a grace period, and were thus ready
+       to be invoked.
+
+o      "cng" is the number of times that this CPU needed another
+       grace period while RCU was idle.
+
+o      "gpc" is the number of times that an old grace period had
+       completed, but this CPU was not yet aware of it.
+
+o      "gps" is the number of times that a new grace period had started,
+       but this CPU was not yet aware of it.
+
+o      "nf" is the number of times that this CPU suspected that the
+       current grace period had run for too long, and thus needed to
+       be forced.
+
+       Please note that "forcing" consists of sending resched IPIs
+       to holdout CPUs.  If that CPU really still is in an old RCU
+       read-side critical section, then we really do have to wait for it.
+       The assumption behing "forcing" is that the CPU is not still in
+       an old RCU read-side critical section, but has not yet responded
+       for some other reason.
+
+o      "nn" is the number of times that this CPU needed nothing.  Alert
+       readers will note that the rcu "nn" number for a given CPU very
+       closely matches the rcu_bh "np" number for that same CPU.  This
+       is due to short-circuit evaluation in rcu_pending().
diff --git a/Documentation/futex-requeue-pi.txt b/Documentation/futex-requeue-pi.txt
new file mode 100644 (file)
index 0000000..9dc1ff4
--- /dev/null
@@ -0,0 +1,131 @@
+Futex Requeue PI
+----------------
+
+Requeueing of tasks from a non-PI futex to a PI futex requires
+special handling in order to ensure the underlying rt_mutex is never
+left without an owner if it has waiters; doing so would break the PI
+boosting logic [see rt-mutex-desgin.txt] For the purposes of
+brevity, this action will be referred to as "requeue_pi" throughout
+this document.  Priority inheritance is abbreviated throughout as
+"PI".
+
+Motivation
+----------
+
+Without requeue_pi, the glibc implementation of
+pthread_cond_broadcast() must resort to waking all the tasks waiting
+on a pthread_condvar and letting them try to sort out which task
+gets to run first in classic thundering-herd formation.  An ideal
+implementation would wake the highest-priority waiter, and leave the
+rest to the natural wakeup inherent in unlocking the mutex
+associated with the condvar.
+
+Consider the simplified glibc calls:
+
+/* caller must lock mutex */
+pthread_cond_wait(cond, mutex)
+{
+       lock(cond->__data.__lock);
+       unlock(mutex);
+       do {
+          unlock(cond->__data.__lock);
+          futex_wait(cond->__data.__futex);
+          lock(cond->__data.__lock);
+       } while(...)
+       unlock(cond->__data.__lock);
+       lock(mutex);
+}
+
+pthread_cond_broadcast(cond)
+{
+       lock(cond->__data.__lock);
+       unlock(cond->__data.__lock);
+       futex_requeue(cond->data.__futex, cond->mutex);
+}
+
+Once pthread_cond_broadcast() requeues the tasks, the cond->mutex
+has waiters. Note that pthread_cond_wait() attempts to lock the
+mutex only after it has returned to user space.  This will leave the
+underlying rt_mutex with waiters, and no owner, breaking the
+previously mentioned PI-boosting algorithms.
+
+In order to support PI-aware pthread_condvar's, the kernel needs to
+be able to requeue tasks to PI futexes.  This support implies that
+upon a successful futex_wait system call, the caller would return to
+user space already holding the PI futex.  The glibc implementation
+would be modified as follows:
+
+
+/* caller must lock mutex */
+pthread_cond_wait_pi(cond, mutex)
+{
+       lock(cond->__data.__lock);
+       unlock(mutex);
+       do {
+          unlock(cond->__data.__lock);
+          futex_wait_requeue_pi(cond->__data.__futex);
+          lock(cond->__data.__lock);
+       } while(...)
+       unlock(cond->__data.__lock);
+        /* the kernel acquired the the mutex for us */
+}
+
+pthread_cond_broadcast_pi(cond)
+{
+       lock(cond->__data.__lock);
+       unlock(cond->__data.__lock);
+       futex_requeue_pi(cond->data.__futex, cond->mutex);
+}
+
+The actual glibc implementation will likely test for PI and make the
+necessary changes inside the existing calls rather than creating new
+calls for the PI cases.  Similar changes are needed for
+pthread_cond_timedwait() and pthread_cond_signal().
+
+Implementation
+--------------
+
+In order to ensure the rt_mutex has an owner if it has waiters, it
+is necessary for both the requeue code, as well as the waiting code,
+to be able to acquire the rt_mutex before returning to user space.
+The requeue code cannot simply wake the waiter and leave it to
+acquire the rt_mutex as it would open a race window between the
+requeue call returning to user space and the waiter waking and
+starting to run.  This is especially true in the uncontended case.
+
+The solution involves two new rt_mutex helper routines,
+rt_mutex_start_proxy_lock() and rt_mutex_finish_proxy_lock(), which
+allow the requeue code to acquire an uncontended rt_mutex on behalf
+of the waiter and to enqueue the waiter on a contended rt_mutex.
+Two new system calls provide the kernel<->user interface to
+requeue_pi: FUTEX_WAIT_REQUEUE_PI and FUTEX_REQUEUE_CMP_PI.
+
+FUTEX_WAIT_REQUEUE_PI is called by the waiter (pthread_cond_wait()
+and pthread_cond_timedwait()) to block on the initial futex and wait
+to be requeued to a PI-aware futex.  The implementation is the
+result of a high-speed collision between futex_wait() and
+futex_lock_pi(), with some extra logic to check for the additional
+wake-up scenarios.
+
+FUTEX_REQUEUE_CMP_PI is called by the waker
+(pthread_cond_broadcast() and pthread_cond_signal()) to requeue and
+possibly wake the waiting tasks. Internally, this system call is
+still handled by futex_requeue (by passing requeue_pi=1).  Before
+requeueing, futex_requeue() attempts to acquire the requeue target
+PI futex on behalf of the top waiter.  If it can, this waiter is
+woken.  futex_requeue() then proceeds to requeue the remaining
+nr_wake+nr_requeue tasks to the PI futex, calling
+rt_mutex_start_proxy_lock() prior to each requeue to prepare the
+task as a waiter on the underlying rt_mutex.  It is possible that
+the lock can be acquired at this stage as well, if so, the next
+waiter is woken to finish the acquisition of the lock.
+
+FUTEX_REQUEUE_PI accepts nr_wake and nr_requeue as arguments, but
+their sum is all that really matters.  futex_requeue() will wake or
+requeue up to nr_wake + nr_requeue tasks.  It will wake only as many
+tasks as it can acquire the lock for, which in the majority of cases
+should be 0 as good programming practice dictates that the caller of
+either pthread_cond_broadcast() or pthread_cond_signal() acquire the
+mutex prior to making the call. FUTEX_REQUEUE_PI requires that
+nr_wake=1.  nr_requeue should be INT_MAX for broadcast and 0 for
+signal.
index fd5cac0..4a3c220 100644 (file)
@@ -56,7 +56,6 @@ parameter is applicable:
        ISAPNP  ISA PnP code is enabled.
        ISDN    Appropriate ISDN support is enabled.
        JOY     Appropriate joystick support is enabled.
-       KMEMTRACE kmemtrace is enabled.
        LIBATA  Libata driver is enabled
        LP      Printer support is enabled.
        LOOP    Loopback device support is enabled.
@@ -329,11 +328,6 @@ and is between 256 and 4096 characters. It is defined in the file
                                    flushed before they will be reused, which
                                    is a lot of faster
 
-       amd_iommu_size= [HW,X86-64]
-                       Define the size of the aperture for the AMD IOMMU
-                       driver. Possible values are:
-                       '32M', '64M' (default), '128M', '256M', '512M', '1G'
-
        amijoy.map=     [HW,JOY] Amiga joystick support
                        Map of devices attached to JOY0DAT and JOY1DAT
                        Format: <a>,<b>
@@ -646,6 +640,13 @@ and is between 256 and 4096 characters. It is defined in the file
                        DMA-API debugging code disables itself because the
                        architectural default is too low.
 
+       dma_debug_driver=<driver_name>
+                       With this option the DMA-API debugging driver
+                       filter feature can be enabled at boot time. Just
+                       pass the driver to filter for as the parameter.
+                       The filter can be disabled or changed to another
+                       driver later using sysfs.
+
        dscc4.setup=    [NET]
 
        dtc3181e=       [HW,SCSI]
@@ -752,12 +753,25 @@ and is between 256 and 4096 characters. It is defined in the file
                        ia64_pal_cache_flush instead of SAL_CACHE_FLUSH.
 
        ftrace=[tracer]
-                       [ftrace] will set and start the specified tracer
+                       [FTRACE] will set and start the specified tracer
                        as early as possible in order to facilitate early
                        boot debugging.
 
        ftrace_dump_on_oops
-                       [ftrace] will dump the trace buffers on oops.
+                       [FTRACE] will dump the trace buffers on oops.
+
+       ftrace_filter=[function-list]
+                       [FTRACE] Limit the functions traced by the function
+                       tracer at boot up. function-list is a comma separated
+                       list of functions. This list can be changed at run
+                       time by the set_ftrace_filter file in the debugfs
+                       tracing directory. 
+
+       ftrace_notrace=[function-list]
+                       [FTRACE] Do not trace the functions specified in
+                       function-list. This list can be changed at run time
+                       by the set_ftrace_notrace file in the debugfs
+                       tracing directory.
 
        gamecon.map[2|3]=
                        [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
@@ -1054,15 +1068,6 @@ and is between 256 and 4096 characters. It is defined in the file
                        use the HighMem zone if it exists, and the Normal
                        zone if it does not.
 
-       kmemtrace.enable=       [KNL,KMEMTRACE] Format: { yes | no }
-                               Controls whether kmemtrace is enabled
-                               at boot-time.
-
-       kmemtrace.subbufs=n     [KNL,KMEMTRACE] Overrides the number of
-                       subbufs kmemtrace's relay channel has. Set this
-                       higher than default (KMEMTRACE_N_SUBBUFS in code) if
-                       you experience buffer overruns.
-
        kgdboc=         [HW] kgdb over consoles.
                        Requires a tty driver that supports console polling.
                        (only serial suported for now)
@@ -1575,6 +1580,9 @@ and is between 256 and 4096 characters. It is defined in the file
        noinitrd        [RAM] Tells the kernel not to load any configured
                        initial RAM disk.
 
+       nointremap      [X86-64, Intel-IOMMU] Do not enable interrupt
+                       remapping.
+
        nointroute      [IA-64]
 
        nojitter        [IA64] Disables jitter checking for ITC timers.
@@ -1660,6 +1668,14 @@ and is between 256 and 4096 characters. It is defined in the file
        oprofile.timer= [HW]
                        Use timer interrupt instead of performance counters
 
+       oprofile.cpu_type=      Force an oprofile cpu type
+                       This might be useful if you have an older oprofile
+                       userland or if you want common events.
+                       Format: { archperfmon }
+                       archperfmon: [X86] Force use of architectural
+                               perfmon on Intel CPUs instead of the
+                               CPU specific event set.
+
        osst=           [HW,SCSI] SCSI Tape Driver
                        Format: <buffer_size>,<write_threshold>
                        See also Documentation/scsi/st.txt.
index f5b7127..7f5809e 100644 (file)
@@ -31,6 +31,7 @@ Contents:
 
      - Locking functions.
      - Interrupt disabling functions.
+     - Sleep and wake-up functions.
      - Miscellaneous functions.
 
  (*) Inter-CPU locking barrier effects.
@@ -1217,6 +1218,132 @@ barriers are required in such a situation, they must be provided from some
 other means.
 
 
+SLEEP AND WAKE-UP FUNCTIONS
+---------------------------
+
+Sleeping and waking on an event flagged in global data can be viewed as an
+interaction between two pieces of data: the task state of the task waiting for
+the event and the global data used to indicate the event.  To make sure that
+these appear to happen in the right order, the primitives to begin the process
+of going to sleep, and the primitives to initiate a wake up imply certain
+barriers.
+
+Firstly, the sleeper normally follows something like this sequence of events:
+
+       for (;;) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (event_indicated)
+                       break;
+               schedule();
+       }
+
+A general memory barrier is interpolated automatically by set_current_state()
+after it has altered the task state:
+
+       CPU 1
+       ===============================
+       set_current_state();
+         set_mb();
+           STORE current->state
+           <general barrier>
+       LOAD event_indicated
+
+set_current_state() may be wrapped by:
+
+       prepare_to_wait();
+       prepare_to_wait_exclusive();
+
+which therefore also imply a general memory barrier after setting the state.
+The whole sequence above is available in various canned forms, all of which
+interpolate the memory barrier in the right place:
+
+       wait_event();
+       wait_event_interruptible();
+       wait_event_interruptible_exclusive();
+       wait_event_interruptible_timeout();
+       wait_event_killable();
+       wait_event_timeout();
+       wait_on_bit();
+       wait_on_bit_lock();
+
+
+Secondly, code that performs a wake up normally follows something like this:
+
+       event_indicated = 1;
+       wake_up(&event_wait_queue);
+
+or:
+
+       event_indicated = 1;
+       wake_up_process(event_daemon);
+
+A write memory barrier is implied by wake_up() and co. if and only if they wake
+something up.  The barrier occurs before the task state is cleared, and so sits
+between the STORE to indicate the event and the STORE to set TASK_RUNNING:
+
+       CPU 1                           CPU 2
+       =============================== ===============================
+       set_current_state();            STORE event_indicated
+         set_mb();                     wake_up();
+           STORE current->state          <write barrier>
+           <general barrier>             STORE current->state
+       LOAD event_indicated
+
+The available waker functions include:
+
+       complete();
+       wake_up();
+       wake_up_all();
+       wake_up_bit();
+       wake_up_interruptible();
+       wake_up_interruptible_all();
+       wake_up_interruptible_nr();
+       wake_up_interruptible_poll();
+       wake_up_interruptible_sync();
+       wake_up_interruptible_sync_poll();
+       wake_up_locked();
+       wake_up_locked_poll();
+       wake_up_nr();
+       wake_up_poll();
+       wake_up_process();
+
+
+[!] Note that the memory barriers implied by the sleeper and the waker do _not_
+order multiple stores before the wake-up with respect to loads of those stored
+values after the sleeper has called set_current_state().  For instance, if the
+sleeper does:
+
+       set_current_state(TASK_INTERRUPTIBLE);
+       if (event_indicated)
+               break;
+       __set_current_state(TASK_RUNNING);
+       do_something(my_data);
+
+and the waker does:
+
+       my_data = value;
+       event_indicated = 1;
+       wake_up(&event_wait_queue);
+
+there's no guarantee that the change to event_indicated will be perceived by
+the sleeper as coming after the change to my_data.  In such a circumstance, the
+code on both sides must interpolate its own memory barriers between the
+separate data accesses.  Thus the above sleeper ought to do:
+
+       set_current_state(TASK_INTERRUPTIBLE);
+       if (event_indicated) {
+               smp_rmb();
+               do_something(my_data);
+       }
+
+and the waker should do:
+
+       my_data = value;
+       smp_wmb();
+       event_indicated = 1;
+       wake_up(&event_wait_queue);
+
+
 MISCELLANEOUS FUNCTIONS
 -----------------------
 
@@ -1366,7 +1493,7 @@ WHERE ARE MEMORY BARRIERS NEEDED?
 
 Under normal operation, memory operation reordering is generally not going to
 be a problem as a single-threaded linear piece of code will still appear to
-work correctly, even if it's in an SMP kernel.  There are, however, three
+work correctly, even if it's in an SMP kernel.  There are, however, four
 circumstances in which reordering definitely _could_ be a problem:
 
  (*) Interprocessor interaction.
index 5ba4d3f..1df7f9c 100644 (file)
@@ -4,6 +4,7 @@
 CONTENTS
 ========
 
+0. WARNING
 1. Overview
   1.1 The problem
   1.2 The solution
@@ -14,6 +15,23 @@ CONTENTS
 3. Future plans
 
 
+0. WARNING
+==========
+
+ Fiddling with these settings can result in an unstable system, the knobs are
+ root only and assumes root knows what he is doing.
+
+Most notable:
+
+ * very small values in sched_rt_period_us can result in an unstable
+   system when the period is smaller than either the available hrtimer
+   resolution, or the time it takes to handle the budget refresh itself.
+
+ * very small values in sched_rt_runtime_us can result in an unstable
+   system when the runtime is so small the system has difficulty making
+   forward progress (NOTE: the migration thread and kstopmachine both
+   are real-time processes).
+
 1. Overview
 ===========
 
@@ -169,7 +187,7 @@ get their allocated time.
 
 Implementing SCHED_EDF might take a while to complete. Priority Inheritance is
 the biggest challenge as the current linux PI infrastructure is geared towards
-the limited static priority levels 0-139. With deadline scheduling you need to
+the limited static priority levels 0-99. With deadline scheduling you need to
 do deadline inheritance (since priority is inversely proportional to the
 deadline delta (deadline - now).
 
diff --git a/Documentation/trace/events.txt b/Documentation/trace/events.txt
new file mode 100644 (file)
index 0000000..f157d75
--- /dev/null
@@ -0,0 +1,90 @@
+                            Event Tracing
+
+               Documentation written by Theodore Ts'o
+                       Updated by Li Zefan
+
+1. Introduction
+===============
+
+Tracepoints (see Documentation/trace/tracepoints.txt) can be used
+without creating custom kernel modules to register probe functions
+using the event tracing infrastructure.
+
+Not all tracepoints can be traced using the event tracing system;
+the kernel developer must provide code snippets which define how the
+tracing information is saved into the tracing buffer, and how the
+tracing information should be printed.
+
+2. Using Event Tracing
+======================
+
+2.1 Via the 'set_event' interface
+---------------------------------
+
+The events which are available for tracing can be found in the file
+/debug/tracing/available_events.
+
+To enable a particular event, such as 'sched_wakeup', simply echo it
+to /debug/tracing/set_event. For example:
+
+       # echo sched_wakeup >> /debug/tracing/set_event
+
+[ Note: '>>' is necessary, otherwise it will firstly disable
+  all the events. ]
+
+To disable an event, echo the event name to the set_event file prefixed
+with an exclamation point:
+
+       # echo '!sched_wakeup' >> /debug/tracing/set_event
+
+To disable all events, echo an empty line to the set_event file:
+
+       # echo > /debug/tracing/set_event
+
+To enable all events, echo '*:*' or '*:' to the set_event file:
+
+       # echo *:* > /debug/tracing/set_event
+
+The events are organized into subsystems, such as ext4, irq, sched,
+etc., and a full event name looks like this: <subsystem>:<event>.  The
+subsystem name is optional, but it is displayed in the available_events
+file.  All of the events in a subsystem can be specified via the syntax
+"<subsystem>:*"; for example, to enable all irq events, you can use the
+command:
+
+       # echo 'irq:*' > /debug/tracing/set_event
+
+2.2 Via the 'enable' toggle
+---------------------------
+
+The events available are also listed in /debug/tracing/events/ hierarchy
+of directories.
+
+To enable event 'sched_wakeup':
+
+       # echo 1 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To disable it:
+
+       # echo 0 > /debug/tracing/events/sched/sched_wakeup/enable
+
+To enable all events in sched subsystem:
+
+       # echo 1 > /debug/tracing/events/sched/enable
+
+To eanble all events:
+
+       # echo 1 > /debug/tracing/events/enable
+
+When reading one of these enable files, there are four results:
+
+ 0 - all events this file affects are disabled
+ 1 - all events this file affects are enabled
+ X - there is a mixture of events enabled and disabled
+ ? - this file does not affect any event
+
+3. Defining an event-enabled tracepoint
+=======================================
+
+See The example provided in samples/trace_events
+
index fd9a3e6..2a82d86 100644 (file)
@@ -179,7 +179,7 @@ Here is the list of current tracers that may be configured.
 
        Function call tracer to trace all kernel functions.
 
-  "function_graph_tracer"
+  "function_graph"
 
        Similar to the function tracer except that the
        function tracer probes the functions on their entry
@@ -518,9 +518,18 @@ priority with zero (0) being the highest priority and the nice
 values starting at 100 (nice -20). Below is a quick chart to map
 the kernel priority to user land priorities.
 
-  Kernel priority: 0 to 99    ==> user RT priority 99 to 0
-  Kernel priority: 100 to 139 ==> user nice -20 to 19
-  Kernel priority: 140        ==> idle task priority
+   Kernel Space                     User Space
+ ===============================================================
+   0(high) to  98(low)     user RT priority 99(high) to 1(low)
+                           with SCHED_RR or SCHED_FIFO
+ ---------------------------------------------------------------
+  99                       sched_priority is not used in scheduling
+                           decisions(it must be specified as 0)
+ ---------------------------------------------------------------
+ 100(high) to 139(low)     user nice -20(high) to 19(low)
+ ---------------------------------------------------------------
+ 140                       idle task priority
+ ---------------------------------------------------------------
 
 The task states are:
 
diff --git a/Documentation/trace/power.txt b/Documentation/trace/power.txt
new file mode 100644 (file)
index 0000000..cd805e1
--- /dev/null
@@ -0,0 +1,17 @@
+The power tracer collects detailed information about C-state and P-state
+transitions, instead of just looking at the high-level "average"
+information.
+
+There is a helper script found in scrips/tracing/power.pl in the kernel
+sources which can be used to parse this information and create a
+Scalable Vector Graphics (SVG) picture from the trace data.
+
+To use this tracer:
+
+       echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+       echo power > /sys/kernel/debug/tracing/current_tracer
+       echo 1 > /sys/kernel/debug/tracing/tracing_enabled
+       sleep 1
+       echo 0 > /sys/kernel/debug/tracing/tracing_enabled
+       cat /sys/kernel/debug/tracing/trace | \
+               perl scripts/tracing/power.pl > out.sv
index e020366..8da3a79 100644 (file)
@@ -50,6 +50,10 @@ Protocol 2.08:       (Kernel 2.6.26) Added crc32 checksum and ELF format
 Protocol 2.09: (Kernel 2.6.26) Added a field of 64-bit physical
                pointer to single linked list of struct setup_data.
 
+Protocol 2.10: (Kernel 2.6.31) Added a protocol for relaxed alignment
+               beyond the kernel_alignment added, new init_size and
+               pref_address fields.  Added extended boot loader IDs.
+
 **** MEMORY LAYOUT
 
 The traditional memory map for the kernel loader, used for Image or
@@ -168,12 +172,13 @@ Offset    Proto   Name            Meaning
 021C/4 2.00+   ramdisk_size    initrd size (set by boot loader)
 0220/4 2.00+   bootsect_kludge DO NOT USE - for bootsect.S use only
 0224/2 2.01+   heap_end_ptr    Free memory after setup end
-0226/2 N/A     pad1            Unused
+0226/1 2.02+(3 ext_loader_ver  Extended boot loader version
+0227/1 2.02+(3 ext_loader_type Extended boot loader ID
 0228/4 2.02+   cmd_line_ptr    32-bit pointer to the kernel command line
 022C/4 2.03+   ramdisk_max     Highest legal initrd address
 0230/4 2.05+   kernel_alignment Physical addr alignment required for kernel
 0234/1 2.05+   relocatable_kernel Whether kernel is relocatable or not
-0235/1 N/A     pad2            Unused
+0235/1 2.10+   min_alignment   Minimum alignment, as a power of two
 0236/2 N/A     pad3            Unused
 0238/4 2.06+   cmdline_size    Maximum size of the kernel command line
 023C/4 2.07+   hardware_subarch Hardware subarchitecture
@@ -182,6 +187,8 @@ Offset      Proto   Name            Meaning
 024C/4 2.08+   payload_length  Length of kernel payload
 0250/8 2.09+   setup_data      64-bit physical pointer to linked list
                                of struct setup_data
+0258/8 2.10+   pref_address    Preferred loading address
+0260/4 2.10+   init_size       Linear memory required during initialization
 
 (1) For backwards compatibility, if the setup_sects field contains 0, the
     real value is 4.
@@ -190,6 +197,8 @@ Offset      Proto   Name            Meaning
     field are unusable, which means the size of a bzImage kernel
     cannot be determined.
 
+(3) Ignored, but safe to set, for boot protocols 2.02-2.09.
+
 If the "HdrS" (0x53726448) magic number is not found at offset 0x202,
 the boot protocol version is "old".  Loading an old kernel, the
 following parameters should be assumed:
@@ -343,18 +352,32 @@ Protocol: 2.00+
   0xTV here, where T is an identifier for the boot loader and V is
   a version number.  Otherwise, enter 0xFF here.
 
+  For boot loader IDs above T = 0xD, write T = 0xE to this field and
+  write the extended ID minus 0x10 to the ext_loader_type field.
+  Similarly, the ext_loader_ver field can be used to provide more than
+  four bits for the bootloader version.
+
+  For example, for T = 0x15, V = 0x234, write:
+
+  type_of_loader  <- 0xE4
+  ext_loader_type <- 0x05
+  ext_loader_ver  <- 0x23
+
   Assigned boot loader ids:
        0  LILO                 (0x00 reserved for pre-2.00 bootloader)
        1  Loadlin
        2  bootsect-loader      (0x20, all other values reserved)
-       3  SYSLINUX
-       4  EtherBoot
+       3  Syslinux
+       4  Etherboot/gPXE
        5  ELILO
        7  GRUB
-       8  U-BOOT
+       8  U-Boot
        9  Xen
        A  Gujin
        B  Qemu
+       C  Arcturus Networks uCbootloader
+       E  Extended             (see ext_loader_type)
+       F  Special              (0xFF = undefined)
 
   Please contact <hpa@zytor.com> if you need a bootloader ID
   value assigned.
@@ -453,6 +476,35 @@ Protocol:  2.01+
   Set this field to the offset (from the beginning of the real-mode
   code) of the end of the setup stack/heap, minus 0x0200.
 
+Field name:    ext_loader_ver
+Type:          write (optional)
+Offset/size:   0x226/1
+Protocol:      2.02+
+
+  This field is used as an extension of the version number in the
+  type_of_loader field.  The total version number is considered to be
+  (type_of_loader & 0x0f) + (ext_loader_ver << 4).
+
+  The use of this field is boot loader specific.  If not written, it
+  is zero.
+
+  Kernels prior to 2.6.31 did not recognize this field, but it is safe
+  to write for protocol version 2.02 or higher.
+
+Field name:    ext_loader_type
+Type:          write (obligatory if (type_of_loader & 0xf0) == 0xe0)
+Offset/size:   0x227/1
+Protocol:      2.02+
+
+  This field is used as an extension of the type number in
+  type_of_loader field.  If the type in type_of_loader is 0xE, then
+  the actual type is (ext_loader_type + 0x10).
+
+  This field is ignored if the type in type_of_loader is not 0xE.
+
+  Kernels prior to 2.6.31 did not recognize this field, but it is safe
+  to write for protocol version 2.02 or higher.
+
 Field name:    cmd_line_ptr
 Type:          write (obligatory)
 Offset/size:   0x228/4
@@ -482,11 +534,19 @@ Protocol: 2.03+
   0x37FFFFFF, you can start your ramdisk at 0x37FE0000.)
 
 Field name:    kernel_alignment
-Type:          read (reloc)
+Type:          read/modify (reloc)
 Offset/size:   0x230/4
-Protocol:      2.05+
+Protocol:      2.05+ (read), 2.10+ (modify)
+
+  Alignment unit required by the kernel (if relocatable_kernel is
+  true.)  A relocatable kernel that is loaded at an alignment
+  incompatible with the value in this field will be realigned during
+  kernel initialization.
 
-  Alignment unit required by the kernel (if relocatable_kernel is true.)
+  Starting with protocol version 2.10, this reflects the kernel
+  alignment preferred for optimal performance; it is possible for the
+  loader to modify this field to permit a lesser alignment.  See the
+  min_alignment and pref_address field below.
 
 Field name:    relocatable_kernel
 Type:          read (reloc)
@@ -498,6 +558,22 @@ Protocol:  2.05+
   After loading, the boot loader must set the code32_start field to
   point to the loaded code, or to a boot loader hook.
 
+Field name:    min_alignment
+Type:          read (reloc)
+Offset/size:   0x235/1
+Protocol:      2.10+
+
+  This field, if nonzero, indicates as a power of two the minimum
+  alignment required, as opposed to preferred, by the kernel to boot.
+  If a boot loader makes use of this field, it should update the
+  kernel_alignment field with the alignment unit desired; typically:
+
+       kernel_alignment = 1 << min_alignment
+
+  There may be a considerable performance cost with an excessively
+  misaligned kernel.  Therefore, a loader should typically try each
+  power-of-two alignment from kernel_alignment down to this alignment.
+
 Field name:    cmdline_size
 Type:          read
 Offset/size:   0x238/4
@@ -582,6 +658,36 @@ Protocol:  2.09+
   sure to consider the case where the linked list already contains
   entries.
 
+Field name:    pref_address
+Type:          read (reloc)
+Offset/size:   0x258/8
+Protocol:      2.10+
+
+  This field, if nonzero, represents a preferred load address for the
+  kernel.  A relocating bootloader should attempt to load at this
+  address if possible.
+
+  A non-relocatable kernel will unconditionally move itself and to run
+  at this address.
+
+Field name:    init_size
+Type:          read
+Offset/size:   0x25c/4
+
+  This field indicates the amount of linear contiguous memory starting
+  at the kernel runtime start address that the kernel needs before it
+  is capable of examining its memory map.  This is not the same thing
+  as the total amount of memory the kernel needs to boot, but it can
+  be used by a relocating boot loader to help select a safe load
+  address for the kernel.
+
+  The kernel runtime start address is determined by the following algorithm:
+
+  if (relocatable_kernel)
+       runtime_start = align_up(load_address, kernel_alignment)
+  else
+       runtime_start = pref_address
+
 
 **** THE IMAGE CHECKSUM
 
index 34c1304..2db5893 100644 (file)
@@ -150,11 +150,6 @@ NUMA
                Otherwise, the remaining system RAM is allocated to an
                additional node.
 
-  numa=hotadd=percent
-               Only allow hotadd memory to preallocate page structures upto
-               percent of already available memory.
-               numa=hotadd=0 will disable hotadd memory.
-
 ACPI
 
   acpi=off     Don't enable ACPI
index 29b52b1..d6498e3 100644 (file)
@@ -6,10 +6,11 @@ Virtual memory map with 4 level page tables:
 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm
 hole caused by [48:63] sign extension
 ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole
-ffff880000000000 - ffffc0ffffffffff (=57 TB) direct mapping of all phys. memory
-ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole
-ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space
-ffffe20000000000 - ffffe2ffffffffff (=40 bits) virtual memory map (1TB)
+ffff880000000000 - ffffc7ffffffffff (=64 TB) direct mapping of all phys. memory
+ffffc80000000000 - ffffc8ffffffffff (=40 bits) hole
+ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
+ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
+ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
 ... unused hole ...
 ffffffff80000000 - ffffffffa0000000 (=512 MB)  kernel text mapping, from phys 0
 ffffffffa0000000 - fffffffffff00000 (=1536 MB) module mapping space
index cf4abdd..84285b5 100644 (file)
@@ -71,7 +71,7 @@ P: Person
 M: Mail patches to
 L: Mailing list that is relevant to this area
 W: Web-page with status/info
-T: SCM tree type and location.  Type is one of: git, hg, quilt.
+T: SCM tree type and location.  Type is one of: git, hg, quilt, stgit.
 S: Status, one of the following:
 
        Supported:      Someone is actually paid to look after this.
@@ -159,7 +159,8 @@ F:  drivers/net/r8169.c
 8250/16?50 (AND CLONE UARTS) SERIAL DRIVER
 L:     linux-serial@vger.kernel.org
 W:     http://serial.sourceforge.net
-S:     Orphan
+M:     alan@lxorguk.ukuu.org.uk
+S:     Odd Fixes
 F:     drivers/serial/8250*
 F:     include/linux/serial_8250.h
 
@@ -5629,6 +5630,7 @@ P:        Alan Cox
 M:     alan@lxorguk.ukuu.org.uk
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
+T:     stgit http://zeniv.linux.org.uk/~alan/ttydev/
 
 TULIP NETWORK DRIVERS
 P:     Grant Grundler
index 610d1c3..03373bb 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 30
-EXTRAVERSION = -rc8
+EXTRAVERSION =
 NAME = Man-Eating Seals of Antiquity
 
 # *DOCUMENTATION*
@@ -533,7 +533,7 @@ endif
 
 include $(srctree)/arch/$(SRCARCH)/Makefile
 
-ifneq (CONFIG_FRAME_WARN,0)
+ifneq ($(CONFIG_FRAME_WARN),0)
 KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN})
 endif
 
index 9c9d1fd..5bd5259 100644 (file)
@@ -176,22 +176,26 @@ cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
        }
 }
 
-static void
+static int
 dp264_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
        spin_lock(&dp264_irq_lock);
        cpu_set_irq_affinity(irq, *affinity);
        tsunami_update_irq_hw(cached_irq_mask);
        spin_unlock(&dp264_irq_lock);
+
+       return 0;
 }
 
-static void
+static int
 clipper_set_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
        spin_lock(&dp264_irq_lock);
        cpu_set_irq_affinity(irq - 16, *affinity);
        tsunami_update_irq_hw(cached_irq_mask);
        spin_unlock(&dp264_irq_lock);
+
+       return 0;
 }
 
 static struct hw_interrupt_type dp264_irq_type = {
index 27f840a..8dd239e 100644 (file)
@@ -157,13 +157,15 @@ titan_cpu_set_irq_affinity(unsigned int irq, cpumask_t affinity)
 
 }
 
-static void
+static int
 titan_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 { 
        spin_lock(&titan_irq_lock);
        titan_cpu_set_irq_affinity(irq - 16, *affinity);
        titan_update_irq_hw(titan_cached_irq_mask);
        spin_unlock(&titan_irq_lock);
+
+       return 0;
 }
 
 static void
index 3e1714c..664c7b8 100644 (file)
@@ -109,7 +109,7 @@ static void gic_unmask_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
+static int gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
 {
        void __iomem *reg = gic_dist_base(irq) + GIC_DIST_TARGET + (gic_irq(irq) & ~3);
        unsigned int shift = (irq % 4) * 8;
@@ -122,6 +122,8 @@ static void gic_set_cpu(unsigned int irq, const struct cpumask *mask_val)
        val |= 1 << (cpu + shift);
        writel(val, reg);
        spin_unlock(&irq_controller_lock);
+
+       return 0;
 }
 #endif
 
index cb7a9e9..feaa75f 100644 (file)
@@ -7,4 +7,20 @@
 #define L1_CACHE_SHIFT         5
 #define L1_CACHE_BYTES         (1 << L1_CACHE_SHIFT)
 
+/*
+ * Memory returned by kmalloc() may be used for DMA, so we must make
+ * sure that all such allocations are cache aligned. Otherwise,
+ * unrelated code may cause parts of the buffer to be read into the
+ * cache before the transfer is done, causing old data to be seen by
+ * the CPU.
+ */
+#define ARCH_KMALLOC_MINALIGN  L1_CACHE_BYTES
+
+/*
+ * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
+ */
+#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
+#define ARCH_SLAB_MINALIGN 8
+#endif
+
 #endif
index e6eb8a6..7b52277 100644 (file)
@@ -202,13 +202,6 @@ typedef struct page *pgtable_t;
        (((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) | \
         VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
 
-/*
- * With EABI on ARMv5 and above we must have 64-bit aligned slab pointers.
- */
-#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5)
-#define ARCH_SLAB_MINALIGN 8
-#endif
-
 #include <asm-generic/page.h>
 
 #endif
index 999d013..e4b08ca 100644 (file)
@@ -890,7 +890,7 @@ static struct clk clko_clk = {
                .con_id = n, \
                .clk = &c, \
        },
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
 /* It's unlikely that any driver wants one of them directly:
        _REGISTER_CLOCK(NULL, "ckih", ckih_clk)
        _REGISTER_CLOCK(NULL, "ckil", ckil_clk)
index 3f7280c..2c97144 100644 (file)
@@ -621,7 +621,7 @@ DEFINE_CLOCK1(csi_clk,     0, 0,      0, parent, &csi_clk1, &per4_clk);
                .clk = &c, \
        },
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
        _REGISTER_CLOCK("imx-uart.0", NULL, uart1_clk)
        _REGISTER_CLOCK("imx-uart.1", NULL, uart2_clk)
        _REGISTER_CLOCK("imx-uart.2", NULL, uart3_clk)
index 53a112d..3c1e06f 100644 (file)
@@ -404,7 +404,7 @@ DEFINE_CLOCK(gpu2d_clk,  0, CCM_CGR3,  4, NULL, NULL);
                .clk = &c,              \
        },
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
        _REGISTER_CLOCK(NULL, "asrc", asrc_clk)
        _REGISTER_CLOCK(NULL, "ata", ata_clk)
        _REGISTER_CLOCK(NULL, "audmux", audmux_clk)
index 9957a11..a68fcf9 100644 (file)
@@ -516,7 +516,7 @@ DEFINE_CLOCK(ipg_clk,     0, NULL,          0, ipg_get_rate, NULL, &ahb_clk);
                .clk = &c, \
        },
 
-static struct clk_lookup lookups[] __initdata = {
+static struct clk_lookup lookups[] = {
        _REGISTER_CLOCK(NULL, "emi", emi_clk)
        _REGISTER_CLOCK(NULL, "cspi", cspi1_clk)
        _REGISTER_CLOCK(NULL, "cspi", cspi2_clk)
index d245e59..29970f7 100644 (file)
@@ -72,7 +72,10 @@ void __init pxa_set_mci_info(struct pxamci_platform_data *info)
 }
 
 
-static struct pxa2xx_udc_mach_info pxa_udc_info;
+static struct pxa2xx_udc_mach_info pxa_udc_info = {
+       .gpio_pullup = -1,
+       .gpio_vbus   = -1,
+};
 
 void __init pxa_set_udc_info(struct pxa2xx_udc_mach_info *info)
 {
index 2121309..2b27336 100644 (file)
@@ -412,7 +412,7 @@ static struct platform_device imote2_flash_device = {
  */
 static struct i2c_board_info __initdata imote2_i2c_board_info[] = {
        { /* UCAM sensor board */
-               .type = "max1238",
+               .type = "max1239",
                .addr = 0x35,
        }, { /* ITS400 Sensor board only */
                .type = "max1363",
index 3397f1e..a08d9d2 100644 (file)
@@ -184,23 +184,37 @@ __v7_setup:
        stmia   r12, {r0-r5, r7, r9, r11, lr}
        bl      v7_flush_dcache_all
        ldmia   r12, {r0-r5, r7, r9, r11, lr}
+
+       mrc     p15, 0, r0, c0, c0, 0           @ read main ID register
+       and     r10, r0, #0xff000000            @ ARM?
+       teq     r10, #0x41000000
+       bne     2f
+       and     r5, r0, #0x00f00000             @ variant
+       and     r6, r0, #0x0000000f             @ revision
+       orr     r0, r6, r5, lsr #20-4           @ combine variant and revision
+
 #ifdef CONFIG_ARM_ERRATA_430973
-       mrc     p15, 0, r10, c1, c0, 1          @ read aux control register
-       orr     r10, r10, #(1 << 6)             @ set IBE to 1
-       mcr     p15, 0, r10, c1, c0, 1          @ write aux control register
+       teq     r5, #0x00100000                 @ only present in r1p*
+       mrceq   p15, 0, r10, c1, c0, 1          @ read aux control register
+       orreq   r10, r10, #(1 << 6)             @ set IBE to 1
+       mcreq   p15, 0, r10, c1, c0, 1          @ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_458693
-       mrc     p15, 0, r10, c1, c0, 1          @ read aux control register
-       orr     r10, r10, #(1 << 5)             @ set L1NEON to 1
-       orr     r10, r10, #(1 << 9)             @ set PLDNOP to 1
-       mcr     p15, 0, r10, c1, c0, 1          @ write aux control register
+       teq     r0, #0x20                       @ only present in r2p0
+       mrceq   p15, 0, r10, c1, c0, 1          @ read aux control register
+       orreq   r10, r10, #(1 << 5)             @ set L1NEON to 1
+       orreq   r10, r10, #(1 << 9)             @ set PLDNOP to 1
+       mcreq   p15, 0, r10, c1, c0, 1          @ write aux control register
 #endif
 #ifdef CONFIG_ARM_ERRATA_460075
-       mrc     p15, 1, r10, c9, c0, 2          @ read L2 cache aux ctrl register
-       orr     r10, r10, #(1 << 22)            @ set the Write Allocate disable bit
-       mcr     p15, 1, r10, c9, c0, 2          @ write the L2 cache aux ctrl register
+       teq     r0, #0x20                       @ only present in r2p0
+       mrceq   p15, 1, r10, c9, c0, 2          @ read L2 cache aux ctrl register
+       tsteq   r10, #1 << 22
+       orreq   r10, r10, #(1 << 22)            @ set the Write Allocate disable bit
+       mcreq   p15, 1, r10, c9, c0, 2          @ write the L2 cache aux ctrl register
 #endif
-       mov     r10, #0
+
+2:     mov     r10, #0
 #ifdef HARVARD_CACHE
        mcr     p15, 0, r10, c7, c5, 0          @ I+BTB cache invalidate
 #endif
index 599217b..f9bd17d 100644 (file)
 #define ASMARM_ARCH_UART_H
 
 #define IMXUART_HAVE_RTSCTS (1<<0)
+#define IMXUART_IRDA        (1<<1)
 
 struct imxuart_platform_data {
        int (*init)(struct platform_device *pdev);
        int (*exit)(struct platform_device *pdev);
        unsigned int flags;
+       void (*irda_enable)(int enable);
+       unsigned int irda_inv_rx:1;
+       unsigned int irda_inv_tx:1;
+       unsigned short transceiver_delay;
 };
 
 #endif
index df3925c..d70b445 100644 (file)
@@ -325,12 +325,14 @@ static void end_crisv32_irq(unsigned int irq)
 {
 }
 
-void set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
+int set_affinity_crisv32_irq(unsigned int irq, const struct cpumask *dest)
 {
        unsigned long flags;
        spin_lock_irqsave(&irq_lock, flags);
        irq_allocations[irq - FIRST_IRQ].mask = *dest;
        spin_unlock_irqrestore(&irq_lock, flags);
+
+       return 0;
 }
 
 static struct irq_chip crisv32_irq_type = {
index 9d1552a..8a5bd7a 100644 (file)
@@ -6,6 +6,7 @@ config FRV
        bool
        default y
        select HAVE_IDE
+       select HAVE_ARCH_TRACEHOOK
 
 config ZONE_DMA
        bool
index 287f6f6..50ae91b 100644 (file)
@@ -112,7 +112,7 @@ extern unsigned long atomic_test_and_XOR_mask(unsigned long mask, volatile unsig
 #define atomic_clear_mask(mask, v)     atomic_test_and_ANDNOT_mask((mask), (v))
 #define atomic_set_mask(mask, v)       atomic_test_and_OR_mask((mask), (v))
 
-static inline int test_and_clear_bit(int nr, volatile void *addr)
+static inline int test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *ptr = addr;
        unsigned long mask = 1UL << (nr & 31);
@@ -120,7 +120,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
        return (atomic_test_and_ANDNOT_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_set_bit(int nr, volatile void *addr)
+static inline int test_and_set_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *ptr = addr;
        unsigned long mask = 1UL << (nr & 31);
@@ -128,7 +128,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
        return (atomic_test_and_OR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline int test_and_change_bit(int nr, volatile void *addr)
+static inline int test_and_change_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *ptr = addr;
        unsigned long mask = 1UL << (nr & 31);
@@ -136,22 +136,22 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
        return (atomic_test_and_XOR_mask(mask, ptr) & mask) != 0;
 }
 
-static inline void clear_bit(int nr, volatile void *addr)
+static inline void clear_bit(unsigned long nr, volatile void *addr)
 {
        test_and_clear_bit(nr, addr);
 }
 
-static inline void set_bit(int nr, volatile void *addr)
+static inline void set_bit(unsigned long nr, volatile void *addr)
 {
        test_and_set_bit(nr, addr);
 }
 
-static inline void change_bit(int nr, volatile void * addr)
+static inline void change_bit(unsigned long nr, volatile void *addr)
 {
        test_and_change_bit(nr, addr);
 }
 
-static inline void __clear_bit(int nr, volatile void * addr)
+static inline void __clear_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask;
@@ -161,7 +161,7 @@ static inline void __clear_bit(int nr, volatile void * addr)
        *a &= ~mask;
 }
 
-static inline void __set_bit(int nr, volatile void * addr)
+static inline void __set_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask;
@@ -171,7 +171,7 @@ static inline void __set_bit(int nr, volatile void * addr)
        *a |= mask;
 }
 
-static inline void __change_bit(int nr, volatile void *addr)
+static inline void __change_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask;
@@ -181,7 +181,7 @@ static inline void __change_bit(int nr, volatile void *addr)
        *a ^= mask;
 }
 
-static inline int __test_and_clear_bit(int nr, volatile void * addr)
+static inline int __test_and_clear_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask, retval;
@@ -193,7 +193,7 @@ static inline int __test_and_clear_bit(int nr, volatile void * addr)
        return retval;
 }
 
-static inline int __test_and_set_bit(int nr, volatile void * addr)
+static inline int __test_and_set_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask, retval;
@@ -205,7 +205,7 @@ static inline int __test_and_set_bit(int nr, volatile void * addr)
        return retval;
 }
 
-static inline int __test_and_change_bit(int nr, volatile void * addr)
+static inline int __test_and_change_bit(unsigned long nr, volatile void *addr)
 {
        volatile unsigned long *a = addr;
        int mask, retval;
@@ -220,12 +220,13 @@ static inline int __test_and_change_bit(int nr, volatile void * addr)
 /*
  * This routine doesn't need to be atomic.
  */
-static inline int __constant_test_bit(int nr, const volatile void * addr)
+static inline int
+__constant_test_bit(unsigned long nr, const volatile void *addr)
 {
        return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
 }
 
-static inline int __test_bit(int nr, const volatile void * addr)
+static inline int __test_bit(unsigned long nr, const volatile void *addr)
 {
        int     * a = (int *) addr;
        int     mask;
index 7279ec0..7bbf6e4 100644 (file)
@@ -116,6 +116,7 @@ do {                                                                                        \
 } while(0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_FDPIC_CORE_EFLAGS  EF_FRV_FDPIC
 #define ELF_EXEC_PAGESIZE      16384
 
index 585d9b4..cc685e6 100644 (file)
@@ -87,8 +87,7 @@ static inline void pci_dma_sync_single(struct pci_dev *hwdev,
                                       dma_addr_t dma_handle,
                                       size_t size, int direction)
 {
-       if (direction == PCI_DMA_NONE)
-                BUG();
+       BUG_ON(direction == PCI_DMA_NONE);
 
        frv_cache_wback_inv((unsigned long)bus_to_virt(dma_handle),
                            (unsigned long)bus_to_virt(dma_handle) + size);
@@ -105,9 +104,7 @@ static inline void pci_dma_sync_sg(struct pci_dev *hwdev,
                                   int nelems, int direction)
 {
        int i;
-
-       if (direction == PCI_DMA_NONE)
-                BUG();
+       BUG_ON(direction == PCI_DMA_NONE);
 
        for (i = 0; i < nelems; i++)
                frv_cache_wback_inv(sg_dma_address(&sg[i]),
index cf69340..a54b535 100644 (file)
@@ -65,6 +65,8 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
+struct task_struct;
+
 /*
  * we dedicate GR28 to keeping a pointer to the current exception frame
  * - gr28 is destroyed on entry to the kernel from userspace
@@ -73,11 +75,18 @@ register struct pt_regs *__frame asm("gr28");
 
 #define user_mode(regs)                        (!((regs)->psr & PSR_S))
 #define instruction_pointer(regs)      ((regs)->pc)
+#define user_stack_pointer(regs)       ((regs)->sp)
 
 extern unsigned long user_stack(const struct pt_regs *);
 extern void show_regs(struct pt_regs *);
 #define profile_pc(regs) ((regs)->pc)
-#endif
+
+#define task_pt_regs(task) ((task)->thread.frame0)
+
+#define arch_has_single_step() (1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
 
 #endif /* !__ASSEMBLY__ */
+#endif /* __KERNEL__ */
 #endif /* _ASM_PTRACE_H */
diff --git a/arch/frv/include/asm/syscall.h b/arch/frv/include/asm/syscall.h
new file mode 100644 (file)
index 0000000..70689eb
--- /dev/null
@@ -0,0 +1,123 @@
+/* syscall parameter access functions
+ *
+ * Copyright (C) 2009 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _ASM_SYSCALL_H
+#define _ASM_SYSCALL_H
+
+#include <linux/err.h>
+#include <asm/ptrace.h>
+
+/*
+ * Get the system call number or -1
+ */
+static inline long syscall_get_nr(struct task_struct *task,
+                                 struct pt_regs *regs)
+{
+       return regs->syscallno;
+}
+
+/*
+ * Restore the clobbered GR8 register
+ * (1st syscall arg was overwritten with syscall return or error)
+ */
+static inline void syscall_rollback(struct task_struct *task,
+                                   struct pt_regs *regs)
+{
+       regs->gr8 = regs->orig_gr8;
+}
+
+/*
+ * See if the syscall return value is an error, returning it if it is and 0 if
+ * not
+ */
+static inline long syscall_get_error(struct task_struct *task,
+                                    struct pt_regs *regs)
+{
+       return IS_ERR_VALUE(regs->gr8) ? regs->gr8 : 0;
+}
+
+/*
+ * Get the syscall return value
+ */
+static inline long syscall_get_return_value(struct task_struct *task,
+                                           struct pt_regs *regs)
+{
+       return regs->gr8;
+}
+
+/*
+ * Set the syscall return value
+ */
+static inline void syscall_set_return_value(struct task_struct *task,
+                                           struct pt_regs *regs,
+                                           int error, long val)
+{
+       if (error)
+               regs->gr8 = -error;
+       else
+               regs->gr8 = val;
+}
+
+/*
+ * Retrieve the system call arguments
+ */
+static inline void syscall_get_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        unsigned long *args)
+{
+       /*
+        * Do this simply for now. If we need to start supporting
+        * fetching arguments from arbitrary indices, this will need some
+        * extra logic. Presently there are no in-tree users that depend
+        * on this behaviour.
+        */
+       BUG_ON(i);
+
+       /* Argument pattern is: GR8, GR9, GR10, GR11, GR12, GR13 */
+       switch (n) {
+       case 6: args[5] = regs->gr13;
+       case 5: args[4] = regs->gr12;
+       case 4: args[3] = regs->gr11;
+       case 3: args[2] = regs->gr10;
+       case 2: args[1] = regs->gr9;
+       case 1: args[0] = regs->gr8;
+               break;
+       default:
+               BUG();
+       }
+}
+
+/*
+ * Alter the system call arguments
+ */
+static inline void syscall_set_arguments(struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned int i, unsigned int n,
+                                        const unsigned long *args)
+{
+       /* Same note as above applies */
+       BUG_ON(i);
+
+       switch (n) {
+       case 6: regs->gr13 = args[5];
+       case 5: regs->gr12 = args[4];
+       case 4: regs->gr11 = args[3];
+       case 3: regs->gr10 = args[2];
+       case 2: regs->gr9  = args[1];
+       case 1: regs->gr8  = args[0];
+               break;
+       default:
+               BUG();
+       }
+}
+
+#endif /* _ASM_SYSCALL_H */
index bb53ab7..e8a5ed7 100644 (file)
@@ -109,20 +109,20 @@ register struct thread_info *__current_thread_info asm("gr15");
  * - other flags in MSW
  */
 #define TIF_SYSCALL_TRACE      0       /* syscall trace active */
-#define TIF_SIGPENDING         1       /* signal pending */
-#define TIF_NEED_RESCHED       2       /* rescheduling necessary */
-#define TIF_SINGLESTEP         3       /* restore singlestep on return to user mode */
-#define TIF_IRET               4       /* return with iret */
+#define TIF_NOTIFY_RESUME      1       /* callback before returning to user */
+#define TIF_SIGPENDING         2       /* signal pending */
+#define TIF_NEED_RESCHED       3       /* rescheduling necessary */
+#define TIF_SINGLESTEP         4       /* restore singlestep on return to user mode */
 #define TIF_RESTORE_SIGMASK    5       /* restore signal mask in do_signal() */
 #define TIF_POLLING_NRFLAG     16      /* true if poll_idle() is polling TIF_NEED_RESCHED */
 #define TIF_MEMDIE             17      /* OOM killer killed process */
 #define TIF_FREEZE             18      /* freezing for suspend */
 
 #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING                (1 << TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1 << TIF_NEED_RESCHED)
 #define _TIF_SINGLESTEP                (1 << TIF_SINGLESTEP)
-#define _TIF_IRET              (1 << TIF_IRET)
 #define _TIF_RESTORE_SIGMASK   (1 << TIF_RESTORE_SIGMASK)
 #define _TIF_POLLING_NRFLAG    (1 << TIF_POLLING_NRFLAG)
 #define _TIF_FREEZE            (1 << TIF_FREEZE)
index 1da523b..356e0e3 100644 (file)
@@ -886,7 +886,6 @@ system_call:
        bnc             icc0,#0,__syscall_badsys
 
        ldi             @(gr15,#TI_FLAGS),gr4
-       ori             gr4,#_TIF_SYSCALL_TRACE,gr4
        andicc          gr4,#_TIF_SYSCALL_TRACE,gr0,icc0
        bne             icc0,#0,__syscall_trace_entry
 
@@ -1150,11 +1149,10 @@ __entry_work_notifysig:
        # perform syscall entry tracing
 __syscall_trace_entry:
        LEDS            0x6320
-       setlos.p        #0,gr8
-       call            do_syscall_trace
+       call            syscall_trace_entry
 
-       ldi             @(gr28,#REG_SYSCALLNO),gr7
-       lddi            @(gr28,#REG_GR(8)) ,gr8
+       lddi.p          @(gr28,#REG_GR(8)) ,gr8
+       ori             gr8,#0,gr7              ; syscall_trace_entry() returned new syscallno
        lddi            @(gr28,#REG_GR(10)),gr10
        lddi.p          @(gr28,#REG_GR(12)),gr12
 
@@ -1169,11 +1167,10 @@ __syscall_exit_work:
        beq             icc0,#1,__entry_work_pending
 
        movsg           psr,gr23
-       andi            gr23,#~PSR_PIL,gr23     ; could let do_syscall_trace() call schedule()
+       andi            gr23,#~PSR_PIL,gr23     ; could let syscall_trace_exit() call schedule()
        movgs           gr23,psr
 
-       setlos.p        #1,gr8
-       call            do_syscall_trace
+       call            syscall_trace_exit
        bra             __entry_resume_userspace
 
 __syscall_badsys:
index 5e7d401..60eeed3 100644 (file)
@@ -19,6 +19,9 @@
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/signal.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
  * in exit.c or in signal.c.
  */
 
+/*
+ * retrieve the contents of FRV userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      void *kbuf, void __user *ubuf)
+{
+       const struct user_int_regs *iregs = &target->thread.user->i;
+       int ret;
+
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 iregs, 0, sizeof(*iregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*iregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace general registers
+ */
+static int genregs_set(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      const void *kbuf, const void __user *ubuf)
+{
+       struct user_int_regs *iregs = &target->thread.user->i;
+       unsigned int offs_gr0, offs_gr1;
+       int ret;
+
+       /* not allowed to set PSR or __status */
+       if (pos < offsetof(struct user_int_regs, psr) + sizeof(long) &&
+           pos + count > offsetof(struct user_int_regs, psr))
+               return -EIO;
+
+       if (pos < offsetof(struct user_int_regs, __status) + sizeof(long) &&
+           pos + count > offsetof(struct user_int_regs, __status))
+               return -EIO;
+
+       /* set the control regs */
+       offs_gr0 = offsetof(struct user_int_regs, gr[0]);
+       offs_gr1 = offsetof(struct user_int_regs, gr[1]);
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                iregs, 0, offs_gr0);
+       if (ret < 0)
+               return ret;
+
+       /* skip GR0/TBR */
+       ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                       offs_gr0, offs_gr1);
+       if (ret < 0)
+               return ret;
+
+       /* set the general regs */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &iregs->gr[1], offs_gr1, sizeof(*iregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*iregs), -1);
+}
+
+/*
+ * retrieve the contents of FRV userspace FP/Media registers
+ */
+static int fpmregs_get(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      void *kbuf, void __user *ubuf)
+{
+       const struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+       int ret;
+
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 fpregs, 0, sizeof(*fpregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*fpregs), -1);
+}
+
+/*
+ * update the contents of the FRV userspace FP/Media registers
+ */
+static int fpmregs_set(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      const void *kbuf, const void __user *ubuf)
+{
+       struct user_fpmedia_regs *fpregs = &target->thread.user->f;
+       int ret;
+
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                fpregs, 0, sizeof(*fpregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*fpregs), -1);
+}
+
+/*
+ * determine if the FP/Media registers have actually been used
+ */
+static int fpmregs_active(struct task_struct *target,
+                         const struct user_regset *regset)
+{
+       return tsk_used_math(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the FRV under Linux
+ */
+enum frv_regset {
+       REGSET_GENERAL,
+       REGSET_FPMEDIA,
+};
+
+static const struct user_regset frv_regsets[] = {
+       /*
+        * General register format is:
+        *      PSR, ISR, CCR, CCCR, LR, LCR, PC, (STATUS), SYSCALLNO, ORIG_G8
+        *      GNER0-1, IACC0, TBR, GR1-63
+        */
+       [REGSET_GENERAL] = {
+               .core_note_type = NT_PRSTATUS,
+               .n              = ELF_NGREG,
+               .size           = sizeof(long),
+               .align          = sizeof(long),
+               .get            = genregs_get,
+               .set            = genregs_set,
+       },
+       /*
+        * FPU/Media register format is:
+        *      FR0-63, FNER0-1, MSR0-1, ACC0-7, ACCG0-8, FSR
+        */
+       [REGSET_FPMEDIA] = {
+               .core_note_type = NT_PRFPREG,
+               .n              = sizeof(struct user_fpmedia_regs) / sizeof(long),
+               .size           = sizeof(long),
+               .align          = sizeof(long),
+               .get            = fpmregs_get,
+               .set            = fpmregs_set,
+               .active         = fpmregs_active,
+       },
+};
+
+static const struct user_regset_view user_frv_native_view = {
+       .name           = "frv",
+       .e_machine      = EM_FRV,
+       .regsets        = frv_regsets,
+       .n              = ARRAY_SIZE(frv_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+       return &user_frv_native_view;
+}
+
 /*
  * Get contents of register REGNO in task TASK.
  */
@@ -68,41 +234,24 @@ static inline int put_reg(struct task_struct *task, int regno,
        }
 }
 
-/*
- * check that an address falls within the bounds of the target process's memory
- * mappings
- */
-static inline int is_user_addr_valid(struct task_struct *child,
-                                    unsigned long start, unsigned long len)
-{
-#ifdef CONFIG_MMU
-       if (start >= PAGE_OFFSET || len > PAGE_OFFSET - start)
-               return -EIO;
-       return 0;
-#else
-       struct vm_area_struct *vma;
-
-       vma = find_vma(child->mm, start);
-       if (vma && start >= vma->vm_start && start + len <= vma->vm_end)
-               return 0;
-
-       return -EIO;
-#endif
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
  * Control h/w single stepping
  */
-void ptrace_disable(struct task_struct *child)
+void user_enable_single_step(struct task_struct *child)
+{
+       child->thread.frame0->__status |= REG__STATUS_STEP;
+}
+
+void user_disable_single_step(struct task_struct *child)
 {
        child->thread.frame0->__status &= ~REG__STATUS_STEP;
 }
 
-void ptrace_enable(struct task_struct *child)
+void ptrace_disable(struct task_struct *child)
 {
-       child->thread.frame0->__status |= REG__STATUS_STEP;
+       user_disable_single_step(child);
 }
 
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
@@ -111,15 +260,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
        int ret;
 
        switch (request) {
-               /* when I and D space are separate, these will need to be fixed. */
-       case PTRACE_PEEKTEXT: /* read word at location addr. */
-       case PTRACE_PEEKDATA:
-               ret = -EIO;
-               if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-                       break;
-               ret = generic_ptrace_peekdata(child, addr, data);
-               break;
-
                /* read the word at location addr in the USER area. */
        case PTRACE_PEEKUSR: {
                tmp = 0;
@@ -163,15 +303,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
        }
 
-               /* when I and D space are separate, this will have to be fixed. */
-       case PTRACE_POKETEXT: /* write the word at location addr. */
-       case PTRACE_POKEDATA:
-               ret = -EIO;
-               if (is_user_addr_valid(child, addr, sizeof(tmp)) < 0)
-                       break;
-               ret = generic_ptrace_pokedata(child, addr, data);
-               break;
-
        case PTRACE_POKEUSR: /* write the word at location addr in the USER area */
                ret = -EIO;
                if ((addr & 3) || addr < 0)
@@ -179,7 +310,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 
                ret = 0;
                switch (addr >> 2) {
-               case 0 ... PT__END-1:
+               case 0 ... PT__END - 1:
                        ret = put_reg(child, addr >> 2, data);
                        break;
 
@@ -189,95 +320,29 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                }
                break;
 
-       case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
-       case PTRACE_CONT: /* restart after signal. */
-               ret = -EIO;
-               if (!valid_signal(data))
-                       break;
-               if (request == PTRACE_SYSCALL)
-                       set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               else
-                       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               child->exit_code = data;
-               ptrace_disable(child);
-               wake_up_process(child);
-               ret = 0;
-               break;
-
-               /* make the child exit.  Best I can do is send it a sigkill.
-                * perhaps it should be put in the status that it wants to
-                * exit.
-                */
-       case PTRACE_KILL:
-               ret = 0;
-               if (child->exit_state == EXIT_ZOMBIE)   /* already dead */
-                       break;
-               child->exit_code = SIGKILL;
-               clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-               ptrace_disable(child);
-               wake_up_process(child);
-               break;
-
-       case PTRACE_SINGLESTEP:  /* set the trap flag. */
-               ret = -EIO;
-               if (!valid_signal(data))
-                       break;
-               clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               ptrace_enable(child);
-               child->exit_code = data;
-               wake_up_process(child);
-               ret = 0;
-               break;
-
-       case PTRACE_DETACH:     /* detach a process that was attached. */
-               ret = ptrace_detach(child, data);
-               break;
-
-       case PTRACE_GETREGS: { /* Get all integer regs from the child. */
-               int i;
-               for (i = 0; i < PT__GPEND; i++) {
-                       tmp = get_reg(child, i);
-                       if (put_user(tmp, (unsigned long *) data)) {
-                               ret = -EFAULT;
-                               break;
-                       }
-                       data += sizeof(long);
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_SETREGS: { /* Set all integer regs in the child. */
-               int i;
-               for (i = 0; i < PT__GPEND; i++) {
-                       if (get_user(tmp, (unsigned long *) data)) {
-                               ret = -EFAULT;
-                               break;
-                       }
-                       put_reg(child, i, tmp);
-                       data += sizeof(long);
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_GETFPREGS: { /* Get the child FP/Media state. */
-               ret = 0;
-               if (copy_to_user((void *) data,
-                                &child->thread.user->f,
-                                sizeof(child->thread.user->f)))
-                       ret = -EFAULT;
-               break;
-       }
-
-       case PTRACE_SETFPREGS: { /* Set the child FP/Media state. */
-               ret = 0;
-               if (copy_from_user(&child->thread.user->f,
-                                  (void *) data,
-                                  sizeof(child->thread.user->f)))
-                       ret = -EFAULT;
-               break;
-       }
+       case PTRACE_GETREGS:    /* Get all integer regs from the child. */
+               return copy_regset_to_user(child, &user_frv_native_view,
+                                          REGSET_GENERAL,
+                                          0, sizeof(child->thread.user->i),
+                                          (void __user *)data);
+
+       case PTRACE_SETREGS:    /* Set all integer regs in the child. */
+               return copy_regset_from_user(child, &user_frv_native_view,
+                                            REGSET_GENERAL,
+                                            0, sizeof(child->thread.user->i),
+                                            (const void __user *)data);
+
+       case PTRACE_GETFPREGS:  /* Get the child FP/Media state. */
+               return copy_regset_to_user(child, &user_frv_native_view,
+                                          REGSET_FPMEDIA,
+                                          0, sizeof(child->thread.user->f),
+                                          (void __user *)data);
+
+       case PTRACE_SETFPREGS:  /* Set the child FP/Media state. */
+               return copy_regset_from_user(child, &user_frv_native_view,
+                                            REGSET_FPMEDIA,
+                                            0, sizeof(child->thread.user->f),
+                                            (const void __user *)data);
 
        case PTRACE_GETFDPIC:
                tmp = 0;
@@ -300,414 +365,36 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                break;
 
        default:
-               ret = -EIO;
+               ret = ptrace_request(child, request, addr, data);
                break;
        }
        return ret;
 }
 
-int __nongprelbss kstrace;
-
-static const struct {
-       const char      *name;
-       unsigned        argmask;
-} __syscall_name_table[NR_syscalls] = {
-       [0]     = { "restart_syscall"                   },
-       [1]     = { "exit",             0x000001        },
-       [2]     = { "fork",             0xffffff        },
-       [3]     = { "read",             0x000141        },
-       [4]     = { "write",            0x000141        },
-       [5]     = { "open",             0x000235        },
-       [6]     = { "close",            0x000001        },
-       [7]     = { "waitpid",          0x000141        },
-       [8]     = { "creat",            0x000025        },
-       [9]     = { "link",             0x000055        },
-       [10]    = { "unlink",           0x000005        },
-       [11]    = { "execve",           0x000445        },
-       [12]    = { "chdir",            0x000005        },
-       [13]    = { "time",             0x000004        },
-       [14]    = { "mknod",            0x000325        },
-       [15]    = { "chmod",            0x000025        },
-       [16]    = { "lchown",           0x000025        },
-       [17]    = { "break" },
-       [18]    = { "oldstat",          0x000045        },
-       [19]    = { "lseek",            0x000131        },
-       [20]    = { "getpid",           0xffffff        },
-       [21]    = { "mount",            0x043555        },
-       [22]    = { "umount",           0x000005        },
-       [23]    = { "setuid",           0x000001        },
-       [24]    = { "getuid",           0xffffff        },
-       [25]    = { "stime",            0x000004        },
-       [26]    = { "ptrace",           0x004413        },
-       [27]    = { "alarm",            0x000001        },
-       [28]    = { "oldfstat",         0x000041        },
-       [29]    = { "pause",            0xffffff        },
-       [30]    = { "utime",            0x000045        },
-       [31]    = { "stty" },
-       [32]    = { "gtty" },
-       [33]    = { "access",           0x000025        },
-       [34]    = { "nice",             0x000001        },
-       [35]    = { "ftime" },
-       [36]    = { "sync",             0xffffff        },
-       [37]    = { "kill",             0x000011        },
-       [38]    = { "rename",           0x000055        },
-       [39]    = { "mkdir",            0x000025        },
-       [40]    = { "rmdir",            0x000005        },
-       [41]    = { "dup",              0x000001        },
-       [42]    = { "pipe",             0x000004        },
-       [43]    = { "times",            0x000004        },
-       [44]    = { "prof" },
-       [45]    = { "brk",              0x000004        },
-       [46]    = { "setgid",           0x000001        },
-       [47]    = { "getgid",           0xffffff        },
-       [48]    = { "signal",           0x000041        },
-       [49]    = { "geteuid",          0xffffff        },
-       [50]    = { "getegid",          0xffffff        },
-       [51]    = { "acct",             0x000005        },
-       [52]    = { "umount2",          0x000035        },
-       [53]    = { "lock" },
-       [54]    = { "ioctl",            0x000331        },
-       [55]    = { "fcntl",            0x000331        },
-       [56]    = { "mpx" },
-       [57]    = { "setpgid",          0x000011        },
-       [58]    = { "ulimit" },
-       [60]    = { "umask",            0x000002        },
-       [61]    = { "chroot",           0x000005        },
-       [62]    = { "ustat",            0x000043        },
-       [63]    = { "dup2",             0x000011        },
-       [64]    = { "getppid",          0xffffff        },
-       [65]    = { "getpgrp",          0xffffff        },
-       [66]    = { "setsid",           0xffffff        },
-       [67]    = { "sigaction" },
-       [68]    = { "sgetmask" },
-       [69]    = { "ssetmask" },
-       [70]    = { "setreuid" },
-       [71]    = { "setregid" },
-       [72]    = { "sigsuspend" },
-       [73]    = { "sigpending" },
-       [74]    = { "sethostname" },
-       [75]    = { "setrlimit" },
-       [76]    = { "getrlimit" },
-       [77]    = { "getrusage" },
-       [78]    = { "gettimeofday" },
-       [79]    = { "settimeofday" },
-       [80]    = { "getgroups" },
-       [81]    = { "setgroups" },
-       [82]    = { "select" },
-       [83]    = { "symlink" },
-       [84]    = { "oldlstat" },
-       [85]    = { "readlink" },
-       [86]    = { "uselib" },
-       [87]    = { "swapon" },
-       [88]    = { "reboot" },
-       [89]    = { "readdir" },
-       [91]    = { "munmap",           0x000034        },
-       [92]    = { "truncate" },
-       [93]    = { "ftruncate" },
-       [94]    = { "fchmod" },
-       [95]    = { "fchown" },
-       [96]    = { "getpriority" },
-       [97]    = { "setpriority" },
-       [99]    = { "statfs" },
-       [100]   = { "fstatfs" },
-       [102]   = { "socketcall" },
-       [103]   = { "syslog" },
-       [104]   = { "setitimer" },
-       [105]   = { "getitimer" },
-       [106]   = { "stat" },
-       [107]   = { "lstat" },
-       [108]   = { "fstat" },
-       [111]   = { "vhangup" },
-       [114]   = { "wait4" },
-       [115]   = { "swapoff" },
-       [116]   = { "sysinfo" },
-       [117]   = { "ipc" },
-       [118]   = { "fsync" },
-       [119]   = { "sigreturn" },
-       [120]   = { "clone" },
-       [121]   = { "setdomainname" },
-       [122]   = { "uname" },
-       [123]   = { "modify_ldt" },
-       [123]   = { "cacheflush" },
-       [124]   = { "adjtimex" },
-       [125]   = { "mprotect" },
-       [126]   = { "sigprocmask" },
-       [127]   = { "create_module" },
-       [128]   = { "init_module" },
-       [129]   = { "delete_module" },
-       [130]   = { "get_kernel_syms" },
-       [131]   = { "quotactl" },
-       [132]   = { "getpgid" },
-       [133]   = { "fchdir" },
-       [134]   = { "bdflush" },
-       [135]   = { "sysfs" },
-       [136]   = { "personality" },
-       [137]   = { "afs_syscall" },
-       [138]   = { "setfsuid" },
-       [139]   = { "setfsgid" },
-       [140]   = { "_llseek",                  0x014331        },
-       [141]   = { "getdents" },
-       [142]   = { "_newselect",               0x000141        },
-       [143]   = { "flock" },
-       [144]   = { "msync" },
-       [145]   = { "readv" },
-       [146]   = { "writev" },
-       [147]   = { "getsid",                   0x000001        },
-       [148]   = { "fdatasync",                0x000001        },
-       [149]   = { "_sysctl",                  0x000004        },
-       [150]   = { "mlock" },
-       [151]   = { "munlock" },
-       [152]   = { "mlockall" },
-       [153]   = { "munlockall" },
-       [154]   = { "sched_setparam" },
-       [155]   = { "sched_getparam" },
-       [156]   = { "sched_setscheduler" },
-       [157]   = { "sched_getscheduler" },
-       [158]   = { "sched_yield" },
-       [159]   = { "sched_get_priority_max" },
-       [160]   = { "sched_get_priority_min" },
-       [161]   = { "sched_rr_get_interval" },
-       [162]   = { "nanosleep",                0x000044        },
-       [163]   = { "mremap" },
-       [164]   = { "setresuid" },
-       [165]   = { "getresuid" },
-       [166]   = { "vm86" },
-       [167]   = { "query_module" },
-       [168]   = { "poll" },
-       [169]   = { "nfsservctl" },
-       [170]   = { "setresgid" },
-       [171]   = { "getresgid" },
-       [172]   = { "prctl",                    0x333331        },
-       [173]   = { "rt_sigreturn",             0xffffff        },
-       [174]   = { "rt_sigaction",             0x001441        },
-       [175]   = { "rt_sigprocmask",           0x001441        },
-       [176]   = { "rt_sigpending",            0x000014        },
-       [177]   = { "rt_sigtimedwait",          0x001444        },
-       [178]   = { "rt_sigqueueinfo",          0x000411        },
-       [179]   = { "rt_sigsuspend",            0x000014        },
-       [180]   = { "pread",                    0x003341        },
-       [181]   = { "pwrite",                   0x003341        },
-       [182]   = { "chown",                    0x000115        },
-       [183]   = { "getcwd" },
-       [184]   = { "capget" },
-       [185]   = { "capset" },
-       [186]   = { "sigaltstack" },
-       [187]   = { "sendfile" },
-       [188]   = { "getpmsg" },
-       [189]   = { "putpmsg" },
-       [190]   = { "vfork",                    0xffffff        },
-       [191]   = { "ugetrlimit" },
-       [192]   = { "mmap2",                    0x313314        },
-       [193]   = { "truncate64" },
-       [194]   = { "ftruncate64" },
-       [195]   = { "stat64",                   0x000045        },
-       [196]   = { "lstat64",                  0x000045        },
-       [197]   = { "fstat64",                  0x000041        },
-       [198]   = { "lchown32" },
-       [199]   = { "getuid32",                 0xffffff        },
-       [200]   = { "getgid32",                 0xffffff        },
-       [201]   = { "geteuid32",                0xffffff        },
-       [202]   = { "getegid32",                0xffffff        },
-       [203]   = { "setreuid32" },
-       [204]   = { "setregid32" },
-       [205]   = { "getgroups32" },
-       [206]   = { "setgroups32" },
-       [207]   = { "fchown32" },
-       [208]   = { "setresuid32" },
-       [209]   = { "getresuid32" },
-       [210]   = { "setresgid32" },
-       [211]   = { "getresgid32" },
-       [212]   = { "chown32" },
-       [213]   = { "setuid32" },
-       [214]   = { "setgid32" },
-       [215]   = { "setfsuid32" },
-       [216]   = { "setfsgid32" },
-       [217]   = { "pivot_root" },
-       [218]   = { "mincore" },
-       [219]   = { "madvise" },
-       [220]   = { "getdents64" },
-       [221]   = { "fcntl64" },
-       [223]   = { "security" },
-       [224]   = { "gettid" },
-       [225]   = { "readahead" },
-       [226]   = { "setxattr" },
-       [227]   = { "lsetxattr" },
-       [228]   = { "fsetxattr" },
-       [229]   = { "getxattr" },
-       [230]   = { "lgetxattr" },
-       [231]   = { "fgetxattr" },
-       [232]   = { "listxattr" },
-       [233]   = { "llistxattr" },
-       [234]   = { "flistxattr" },
-       [235]   = { "removexattr" },
-       [236]   = { "lremovexattr" },
-       [237]   = { "fremovexattr" },
-       [238]   = { "tkill" },
-       [239]   = { "sendfile64" },
-       [240]   = { "futex" },
-       [241]   = { "sched_setaffinity" },
-       [242]   = { "sched_getaffinity" },
-       [243]   = { "set_thread_area" },
-       [244]   = { "get_thread_area" },
-       [245]   = { "io_setup" },
-       [246]   = { "io_destroy" },
-       [247]   = { "io_getevents" },
-       [248]   = { "io_submit" },
-       [249]   = { "io_cancel" },
-       [250]   = { "fadvise64" },
-       [252]   = { "exit_group",               0x000001        },
-       [253]   = { "lookup_dcookie" },
-       [254]   = { "epoll_create" },
-       [255]   = { "epoll_ctl" },
-       [256]   = { "epoll_wait" },
-       [257]   = { "remap_file_pages" },
-       [258]   = { "set_tid_address" },
-       [259]   = { "timer_create" },
-       [260]   = { "timer_settime" },
-       [261]   = { "timer_gettime" },
-       [262]   = { "timer_getoverrun" },
-       [263]   = { "timer_delete" },
-       [264]   = { "clock_settime" },
-       [265]   = { "clock_gettime" },
-       [266]   = { "clock_getres" },
-       [267]   = { "clock_nanosleep" },
-       [268]   = { "statfs64" },
-       [269]   = { "fstatfs64" },
-       [270]   = { "tgkill" },
-       [271]   = { "utimes" },
-       [272]   = { "fadvise64_64" },
-       [273]   = { "vserver" },
-       [274]   = { "mbind" },
-       [275]   = { "get_mempolicy" },
-       [276]   = { "set_mempolicy" },
-       [277]   = { "mq_open" },
-       [278]   = { "mq_unlink" },
-       [279]   = { "mq_timedsend" },
-       [280]   = { "mq_timedreceive" },
-       [281]   = { "mq_notify" },
-       [282]   = { "mq_getsetattr" },
-       [283]   = { "sys_kexec_load" },
-};
-
-asmlinkage void do_syscall_trace(int leaving)
+/*
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
+ */
+asmlinkage unsigned long syscall_trace_entry(void)
 {
-#if 0
-       unsigned long *argp;
-       const char *name;
-       unsigned argmask;
-       char buffer[16];
-
-       if (!kstrace)
-               return;
-
-       if (!current->mm)
-               return;
-
-       if (__frame->gr7 == __NR_close)
-               return;
-
-#if 0
-       if (__frame->gr7 != __NR_mmap2 &&
-           __frame->gr7 != __NR_vfork &&
-           __frame->gr7 != __NR_execve &&
-           __frame->gr7 != __NR_exit)
-               return;
-#endif
-
-       argmask = 0;
-       name = NULL;
-       if (__frame->gr7 < NR_syscalls) {
-               name = __syscall_name_table[__frame->gr7].name;
-               argmask = __syscall_name_table[__frame->gr7].argmask;
-       }
-       if (!name) {
-               sprintf(buffer, "sys_%lx", __frame->gr7);
-               name = buffer;
-       }
-
-       if (!leaving) {
-               if (!argmask) {
-                       printk(KERN_CRIT "[%d] %s(%lx,%lx,%lx,%lx,%lx,%lx)\n",
-                              current->pid,
-                              name,
-                              __frame->gr8,
-                              __frame->gr9,
-                              __frame->gr10,
-                              __frame->gr11,
-                              __frame->gr12,
-                              __frame->gr13);
-               }
-               else if (argmask == 0xffffff) {
-                       printk(KERN_CRIT "[%d] %s()\n",
-                              current->pid,
-                              name);
-               }
-               else {
-                       printk(KERN_CRIT "[%d] %s(",
-                              current->pid,
-                              name);
-
-                       argp = &__frame->gr8;
-
-                       do {
-                               switch (argmask & 0xf) {
-                               case 1:
-                                       printk("%ld", (long) *argp);
-                                       break;
-                               case 2:
-                                       printk("%lo", *argp);
-                                       break;
-                               case 3:
-                                       printk("%lx", *argp);
-                                       break;
-                               case 4:
-                                       printk("%p", (void *) *argp);
-                                       break;
-                               case 5:
-                                       printk("\"%s\"", (char *) *argp);
-                                       break;
-                               }
-
-                               argp++;
-                               argmask >>= 4;
-                               if (argmask)
-                                       printk(",");
-
-                       } while (argmask);
-
-                       printk(")\n");
-               }
-       }
-       else {
-               if ((int)__frame->gr8 > -4096 && (int)__frame->gr8 < 4096)
-                       printk(KERN_CRIT "[%d] %s() = %ld\n", current->pid, name, __frame->gr8);
-               else
-                       printk(KERN_CRIT "[%d] %s() = %lx\n", current->pid, name, __frame->gr8);
+       __frame->__status |= REG__STATUS_SYSC_ENTRY;
+       if (tracehook_report_syscall_entry(__frame)) {
+               /* tracing decided this syscall should not happen, so
+                * We'll return a bogus call number to get an ENOSYS
+                * error, but leave the original number in
+                * __frame->syscallno
+                */
+               return ULONG_MAX;
        }
-       return;
-#endif
-
-       if (!test_thread_flag(TIF_SYSCALL_TRACE))
-               return;
-
-       if (!(current->ptrace & PT_PTRACED))
-               return;
 
-       /* we need to indicate entry or exit to strace */
-       if (leaving)
-               __frame->__status |= REG__STATUS_SYSC_EXIT;
-       else
-               __frame->__status |= REG__STATUS_SYSC_ENTRY;
-
-       ptrace_notify(SIGTRAP);
+       return __frame->syscallno;
+}
 
-       /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
-        */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(void)
+{
+       __frame->__status |= REG__STATUS_SYSC_EXIT;
+       tracehook_report_syscall_exit(__frame, 0);
 }
index 3bdb368..4a7a62c 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/unistd.h>
 #include <linux/personality.h>
 #include <linux/freezer.h>
+#include <linux/tracehook.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -516,6 +517,9 @@ static void do_signal(void)
                         * clear the TIF_RESTORE_SIGMASK flag */
                        if (test_thread_flag(TIF_RESTORE_SIGMASK))
                                clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+                       tracehook_signal_handler(signr, &info, &ka, __frame,
+                                                test_thread_flag(TIF_SINGLESTEP));
                }
 
                return;
@@ -564,4 +568,10 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
        if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
                do_signal();
 
+       /* deal with notification on about to resume userspace execution */
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(__frame);
+       }
+
 } /* end do_notify_resume() */
index 9fb771a..374f88d 100644 (file)
@@ -23,8 +23,7 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
        char *p, ch;
        long err = -EFAULT;
 
-       if (count < 0)
-               BUG();
+       BUG_ON(count < 0);
 
        p = dst;
 
@@ -76,8 +75,7 @@ long strnlen_user(const char __user *src, long count)
        long err = 0;
        char ch;
 
-       if (count < 0)
-               BUG();
+       BUG_ON(count < 0);
 
 #ifndef CONFIG_MMU
        if ((unsigned long) src < memory_start)
index 52ff9ae..4e1ba0b 100644 (file)
@@ -116,8 +116,7 @@ EXPORT_SYMBOL(dma_free_coherent);
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
                          enum dma_data_direction direction)
 {
-       if (direction == DMA_NONE)
-                BUG();
+       BUG_ON(direction == DMA_NONE);
 
        frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -151,8 +150,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
                frv_cache_wback_inv(sg_dma_address(&sg[i]),
                                    sg_dma_address(&sg[i]) + sg_dma_len(&sg[i]));
 
-       if (direction == DMA_NONE)
-                BUG();
+       BUG_ON(direction == DMA_NONE);
 
        return nents;
 }
index 3ddedeb..45954f0 100644 (file)
@@ -48,8 +48,7 @@ EXPORT_SYMBOL(dma_free_coherent);
 dma_addr_t dma_map_single(struct device *dev, void *ptr, size_t size,
                          enum dma_data_direction direction)
 {
-       if (direction == DMA_NONE)
-                BUG();
+       BUG_ON(direction == DMA_NONE);
 
        frv_cache_wback_inv((unsigned long) ptr, (unsigned long) ptr + size);
 
@@ -81,8 +80,7 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
        void *vaddr;
        int i;
 
-       if (direction == DMA_NONE)
-                BUG();
+       BUG_ON(direction == DMA_NONE);
 
        dampr2 = __get_DAMPR(2);
 
index cc0a318..acb5047 100644 (file)
@@ -21,9 +21,10 @@ hpsim_irq_noop (unsigned int irq)
 {
 }
 
-static void
+static int
 hpsim_set_affinity_noop(unsigned int a, const struct cpumask *b)
 {
+       return 0;
 }
 
 static struct hw_interrupt_type irq_type_hp_sim = {
index 5510317..baec6f0 100644 (file)
@@ -636,7 +636,7 @@ void __init acpi_numa_arch_fixup(void)
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity)
 {
        if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM)
                return gsi;
@@ -678,7 +678,8 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
 
        fadt = (struct acpi_table_fadt *)fadt_header;
 
-       acpi_register_gsi(fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
+       acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE,
+                                ACPI_ACTIVE_LOW);
        return 0;
 }
 
index 166e0d8..f92cef4 100644 (file)
@@ -329,7 +329,7 @@ unmask_irq (unsigned int irq)
 }
 
 
-static void
+static int
 iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
 #ifdef CONFIG_SMP
@@ -343,15 +343,15 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
 
        cpu = cpumask_first_and(cpu_online_mask, mask);
        if (cpu >= nr_cpu_ids)
-               return;
+               return -1;
 
        if (irq_prepare_move(irq, cpu))
-               return;
+               return -1;
 
        dest = cpu_physical_id(cpu);
 
        if (!iosapic_intr_info[irq].count)
-               return;                 /* not an IOSAPIC interrupt */
+               return -1;                      /* not an IOSAPIC interrupt */
 
        set_irq_affinity_info(irq, dest, redir);
 
@@ -376,7 +376,9 @@ iosapic_set_affinity(unsigned int irq, const struct cpumask *mask)
                iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32);
                iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32);
        }
+
 #endif
+       return 0;
 }
 
 /*
index 2b15e23..0f8ade9 100644 (file)
@@ -12,7 +12,7 @@
 static struct irq_chip ia64_msi_chip;
 
 #ifdef CONFIG_SMP
-static void ia64_set_msi_irq_affinity(unsigned int irq,
+static int ia64_set_msi_irq_affinity(unsigned int irq,
                                      const cpumask_t *cpu_mask)
 {
        struct msi_msg msg;
@@ -20,10 +20,10 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
        int cpu = first_cpu(*cpu_mask);
 
        if (!cpu_online(cpu))
-               return;
+               return -1;
 
        if (irq_prepare_move(irq, cpu))
-               return;
+               return -1;
 
        read_msi_msg(irq, &msg);
 
@@ -39,6 +39,8 @@ static void ia64_set_msi_irq_affinity(unsigned int irq,
 
        write_msi_msg(irq, &msg);
        cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
+
+       return 0;
 }
 #endif /* CONFIG_SMP */
 
@@ -130,17 +132,17 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #ifdef CONFIG_DMAR
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_cfg *cfg = irq_cfg + irq;
        struct msi_msg msg;
        int cpu = cpumask_first(mask);
 
        if (!cpu_online(cpu))
-               return;
+               return -1;
 
        if (irq_prepare_move(irq, cpu))
-               return;
+               return -1;
 
        dmar_msi_read(irq, &msg);
 
@@ -151,6 +153,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
        dmar_msi_write(irq, &msg);
        cpumask_copy(irq_desc[irq].affinity, mask);
+
+       return 0;
 }
 #endif /* CONFIG_SMP */
 
index 66fd705..764f26a 100644 (file)
@@ -227,7 +227,7 @@ finish_up:
        return new_irq_info;
 }
 
-static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
+static int sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
 {
        struct sn_irq_info *sn_irq_info, *sn_irq_info_safe;
        nasid_t nasid;
@@ -239,6 +239,8 @@ static void sn_set_affinity_irq(unsigned int irq, const struct cpumask *mask)
        list_for_each_entry_safe(sn_irq_info, sn_irq_info_safe,
                                 sn_irq_lh[irq], list)
                (void)sn_retarget_vector(sn_irq_info, nasid, slice);
+
+       return 0;
 }
 
 #ifdef CONFIG_SMP
index 81e4289..fbbfb97 100644 (file)
@@ -151,7 +151,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
 }
 
 #ifdef CONFIG_SMP
-static void sn_set_msi_irq_affinity(unsigned int irq,
+static int sn_set_msi_irq_affinity(unsigned int irq,
                                    const struct cpumask *cpu_mask)
 {
        struct msi_msg msg;
@@ -168,7 +168,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
        cpu = cpumask_first(cpu_mask);
        sn_irq_info = sn_msi_info[irq].sn_irq_info;
        if (sn_irq_info == NULL || sn_irq_info->irq_int_bit >= 0)
-               return;
+               return -1;
 
        /*
         * Release XIO resources for the old MSI PCI address
@@ -189,7 +189,7 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
        new_irq_info = sn_retarget_vector(sn_irq_info, nasid, slice);
        sn_msi_info[irq].sn_irq_info = new_irq_info;
        if (new_irq_info == NULL)
-               return;
+               return -1;
 
        /*
         * Map the xio address into bus space
@@ -206,6 +206,8 @@ static void sn_set_msi_irq_affinity(unsigned int irq,
 
        write_msi_msg(irq, &msg);
        cpumask_copy(irq_desc[irq].affinity, cpu_mask);
+
+       return 0;
 }
 #endif /* CONFIG_SMP */
 
index 09b1287..25f3b0a 100644 (file)
@@ -72,6 +72,7 @@ config MIPS_COBALT
        select IRQ_CPU
        select IRQ_GT641XX
        select PCI_GT64XXX_PCI0
+       select PCI
        select SYS_HAS_CPU_NEVADA
        select SYS_HAS_EARLY_PRINTK
        select SYS_SUPPORTS_32BIT_KERNEL
@@ -593,7 +594,7 @@ config WR_PPMC
          board, which is based on GT64120 bridge chip.
 
 config CAVIUM_OCTEON_SIMULATOR
-       bool "Support for the Cavium Networks Octeon Simulator"
+       bool "Cavium Networks Octeon Simulator"
        select CEVT_R4K
        select 64BIT_PHYS_ADDR
        select DMA_COHERENT
@@ -607,7 +608,7 @@ config CAVIUM_OCTEON_SIMULATOR
          hardware.
 
 config CAVIUM_OCTEON_REFERENCE_BOARD
-       bool "Support for the Cavium Networks Octeon reference board"
+       bool "Cavium Networks Octeon reference board"
        select CEVT_R4K
        select 64BIT_PHYS_ADDR
        select DMA_COHERENT
index 1c19af8..d3a0c81 100644 (file)
@@ -177,7 +177,7 @@ static void octeon_irq_ciu0_disable(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
 {
        int cpu;
        int bit = irq - OCTEON_IRQ_WORKQ0;      /* Bit 0-63 of EN0 */
@@ -199,6 +199,8 @@ static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask
         */
        cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2));
        write_unlock(&octeon_irq_ciu0_rwlock);
+
+       return 0;
 }
 #endif
 
@@ -292,7 +294,7 @@ static void octeon_irq_ciu1_disable(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
+static int octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
 {
        int cpu;
        int bit = irq - OCTEON_IRQ_WDOG0;       /* Bit 0-63 of EN1 */
@@ -315,6 +317,8 @@ static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask
         */
        cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1));
        write_unlock(&octeon_irq_ciu1_rwlock);
+
+       return 0;
 }
 #endif
 
index 744cd8f..1260443 100644 (file)
@@ -39,8 +39,8 @@ struct cache_desc {
 #define MIPS_CACHE_PINDEX      0x00000020      /* Physically indexed cache */
 
 struct cpuinfo_mips {
-       unsigned long           udelay_val;
-       unsigned long           asid_cache;
+       unsigned int            udelay_val;
+       unsigned int            asid_cache;
 
        /*
         * Capability and feature descriptor structure for MIPS CPU
index b0bccd2..a07e51b 100644 (file)
 #ifndef _ASM_DELAY_H
 #define _ASM_DELAY_H
 
-#include <linux/param.h>
-#include <linux/smp.h>
+extern void __delay(unsigned int loops);
+extern void __ndelay(unsigned int ns);
+extern void __udelay(unsigned int us);
 
-#include <asm/compiler.h>
-#include <asm/war.h>
-
-static inline void __delay(unsigned long loops)
-{
-       if (sizeof(long) == 4)
-               __asm__ __volatile__ (
-               "       .set    noreorder                               \n"
-               "       .align  3                                       \n"
-               "1:     bnez    %0, 1b                                  \n"
-               "       subu    %0, 1                                   \n"
-               "       .set    reorder                                 \n"
-               : "=r" (loops)
-               : "0" (loops));
-       else if (sizeof(long) == 8 && !DADDI_WAR)
-               __asm__ __volatile__ (
-               "       .set    noreorder                               \n"
-               "       .align  3                                       \n"
-               "1:     bnez    %0, 1b                                  \n"
-               "       dsubu   %0, 1                                   \n"
-               "       .set    reorder                                 \n"
-               : "=r" (loops)
-               : "0" (loops));
-       else if (sizeof(long) == 8 && DADDI_WAR)
-               __asm__ __volatile__ (
-               "       .set    noreorder                               \n"
-               "       .align  3                                       \n"
-               "1:     bnez    %0, 1b                                  \n"
-               "       dsubu   %0, %2                                  \n"
-               "       .set    reorder                                 \n"
-               : "=r" (loops)
-               : "0" (loops), "r" (1));
-}
-
-
-/*
- * Division by multiplication: you don't have to worry about
- * loss of precision.
- *
- * Use only for very small delays ( < 1 msec).  Should probably use a
- * lookup table, really, as the multiplications take much too long with
- * short delays.  This is a "reasonable" implementation, though (and the
- * first constant multiplications gets optimized away if the delay is
- * a constant)
- */
-
-static inline void __udelay(unsigned long usecs, unsigned long lpj)
-{
-       unsigned long hi, lo;
-
-       /*
-        * The rates of 128 is rounded wrongly by the catchall case
-        * for 64-bit.  Excessive precission?  Probably ...
-        */
-#if defined(CONFIG_64BIT) && (HZ == 128)
-       usecs *= 0x0008637bd05af6c7UL;          /* 2**64 / (1000000 / HZ) */
-#elif defined(CONFIG_64BIT)
-       usecs *= (0x8000000000000000UL / (500000 / HZ));
-#else /* 32-bit junk follows here */
-       usecs *= (unsigned long) (((0x8000000000000000ULL / (500000 / HZ)) +
-                                  0x80000000ULL) >> 32);
-#endif
-
-       if (sizeof(long) == 4)
-               __asm__("multu\t%2, %3"
-               : "=h" (usecs), "=l" (lo)
-               : "r" (usecs), "r" (lpj)
-               : GCC_REG_ACCUM);
-       else if (sizeof(long) == 8 && !R4000_WAR)
-               __asm__("dmultu\t%2, %3"
-               : "=h" (usecs), "=l" (lo)
-               : "r" (usecs), "r" (lpj)
-               : GCC_REG_ACCUM);
-       else if (sizeof(long) == 8 && R4000_WAR)
-               __asm__("dmultu\t%3, %4\n\tmfhi\t%0"
-               : "=r" (usecs), "=h" (hi), "=l" (lo)
-               : "r" (usecs), "r" (lpj)
-               : GCC_REG_ACCUM);
-
-       __delay(usecs);
-}
-
-#define __udelay_val cpu_data[raw_smp_processor_id()].udelay_val
-
-#define udelay(usecs) __udelay((usecs), __udelay_val)
+#define ndelay(ns) __udelay(ns)
+#define udelay(us) __udelay(us)
 
 /* make sure "usecs *= ..." in udelay do not overflow. */
 #if HZ >= 1000
index 85067e2..9161634 100644 (file)
         ((nr)   << _IOC_NRSHIFT) | \
         ((size) << _IOC_SIZESHIFT))
 
+#ifdef __KERNEL__
 /* provoke compile error for invalid uses of size argument */
 extern unsigned int __invalid_size_argument_for_IOC;
 #define _IOC_TYPECHECK(t) \
        ((sizeof(t) == sizeof(t[1]) && \
          sizeof(t) < (1 << _IOC_SIZEBITS)) ? \
          sizeof(t) : __invalid_size_argument_for_IOC)
+#else
+#define _IOC_TYPECHECK(t)      (sizeof(t))
+#endif
 
 /* used to create numbers */
 #define _IO(type, nr)          _IOC(_IOC_NONE, (type), (nr), 0)
index 3214ade..4f1eed1 100644 (file)
@@ -49,7 +49,7 @@ static inline void smtc_im_ack_irq(unsigned int irq)
 #ifdef CONFIG_MIPS_MT_SMTC_IRQAFF
 #include <linux/cpumask.h>
 
-extern void plat_set_irq_affinity(unsigned int irq,
+extern int plat_set_irq_affinity(unsigned int irq,
                                  const struct cpumask *affinity);
 extern void smtc_forward_irq(unsigned int irq);
 
index 87deb8f..3f43c2e 100644 (file)
@@ -155,7 +155,7 @@ static void gic_unmask_irq(unsigned int irq)
 
 static DEFINE_SPINLOCK(gic_lock);
 
-static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+static int gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
        cpumask_t       tmp = CPU_MASK_NONE;
        unsigned long   flags;
@@ -166,7 +166,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 
        cpumask_and(&tmp, cpumask, cpu_online_mask);
        if (cpus_empty(tmp))
-               return;
+               return -1;
 
        /* Assumption : cpumask refers to a single CPU */
        spin_lock_irqsave(&gic_lock, flags);
@@ -190,6 +190,7 @@ static void gic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
        cpumask_copy(irq_desc[irq].affinity, cpumask);
        spin_unlock_irqrestore(&gic_lock, flags);
 
+       return 0;
 }
 #endif
 
index 26760ca..e0a4ac1 100644 (file)
@@ -42,7 +42,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
        seq_printf(m, fmt, __cpu_name[n],
                                   (version >> 4) & 0x0f, version & 0x0f,
                                   (fp_vers >> 4) & 0x0f, fp_vers & 0x0f);
-       seq_printf(m, "BogoMIPS\t\t: %lu.%02lu\n",
+       seq_printf(m, "BogoMIPS\t\t: %u.%02u\n",
                      cpu_data[n].udelay_val / (500000/HZ),
                      (cpu_data[n].udelay_val / (5000/HZ)) % 100);
        seq_printf(m, "wait instruction\t: %s\n", cpu_wait ? "yes" : "no");
index c13c7ad..2adead5 100644 (file)
@@ -2,8 +2,8 @@
 # Makefile for MIPS-specific library files..
 #
 
-lib-y  += csum_partial.o memcpy.o memcpy-inatomic.o memset.o strlen_user.o \
-          strncpy_user.o strnlen_user.o uncached.o
+lib-y  += csum_partial.o delay.o memcpy.o memcpy-inatomic.o memset.o \
+          strlen_user.o strncpy_user.o strnlen_user.o uncached.o
 
 obj-y                  += iomap.o
 obj-$(CONFIG_PCI)      += iomap-pci.o
diff --git a/arch/mips/lib/delay.c b/arch/mips/lib/delay.c
new file mode 100644 (file)
index 0000000..f69c6b5
--- /dev/null
@@ -0,0 +1,56 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994 by Waldorf Electronics
+ * Copyright (C) 1995 - 2000, 01, 03 by Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2007  Maciej W. Rozycki
+ */
+#include <linux/module.h>
+#include <linux/param.h>
+#include <linux/smp.h>
+
+#include <asm/compiler.h>
+#include <asm/war.h>
+
+inline void __delay(unsigned int loops)
+{
+       __asm__ __volatile__ (
+       "       .set    noreorder                               \n"
+       "       .align  3                                       \n"
+       "1:     bnez    %0, 1b                                  \n"
+       "       subu    %0, 1                                   \n"
+       "       .set    reorder                                 \n"
+       : "=r" (loops)
+       : "0" (loops));
+}
+EXPORT_SYMBOL(__delay);
+
+/*
+ * Division by multiplication: you don't have to worry about
+ * loss of precision.
+ *
+ * Use only for very small delays ( < 1 msec).  Should probably use a
+ * lookup table, really, as the multiplications take much too long with
+ * short delays.  This is a "reasonable" implementation, though (and the
+ * first constant multiplications gets optimized away if the delay is
+ * a constant)
+ */
+
+void __udelay(unsigned long us)
+{
+       unsigned int lpj = current_cpu_data.udelay_val;
+
+       __delay((us * 0x000010c7 * HZ * lpj) >> 32);
+}
+EXPORT_SYMBOL(__udelay);
+
+void __ndelay(unsigned long ns)
+{
+       unsigned int lpj = current_cpu_data.udelay_val;
+
+       __delay((us * 0x00000005 * HZ * lpj) >> 32);
+}
+EXPORT_SYMBOL(__ndelay);
index 5ba3188..499ffe5 100644 (file)
@@ -114,7 +114,7 @@ struct plat_smp_ops msmtc_smp_ops = {
  */
 
 
-void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+int plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 {
        cpumask_t tmask;
        int cpu = 0;
@@ -156,5 +156,7 @@ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
 
        /* Do any generic SMTC IRQ affinity setup */
        smtc_set_irq_affinity(irq, tmask);
+
+       return 0;
 }
 #endif /* CONFIG_MIPS_MT_SMTC_IRQAFF */
index c147c4b..690de06 100644 (file)
@@ -50,7 +50,7 @@ static void enable_bcm1480_irq(unsigned int irq);
 static void disable_bcm1480_irq(unsigned int irq);
 static void ack_bcm1480_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_PCI
@@ -109,7 +109,7 @@ void bcm1480_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        int i = 0, old_cpu, cpu, int_on, k;
        u64 cur_ints;
@@ -118,7 +118,7 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
 
        if (cpumask_weight(mask) != 1) {
                printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
-               return;
+               return -1;
        }
        i = cpumask_first(mask);
 
@@ -152,6 +152,8 @@ static void bcm1480_set_affinity(unsigned int irq, const struct cpumask *mask)
                }
        }
        spin_unlock_irqrestore(&bcm1480_imr_lock, flags);
+
+       return 0;
 }
 #endif
 
index 3de30f7..eb5396c 100644 (file)
@@ -288,13 +288,7 @@ void __init prom_init(void)
         */
        cfe_cons_handle = cfe_getstdhandle(CFE_STDHANDLE_CONSOLE);
        if (cfe_getenv("LINUX_CMDLINE", arcs_cmdline, CL_SIZE) < 0) {
-               if (argc < 0) {
-                       /*
-                        * It's OK for direct boot to not provide a
-                        *  command line
-                        */
-                       strcpy(arcs_cmdline, "root=/dev/ram0 ");
-               } else {
+               if (argc >= 0) {
                        /* The loader should have set the command line */
                        /* too early for panic to do any good */
                        printk("LINUX_CMDLINE not defined in cfe.");
index 38cb998..409dec7 100644 (file)
@@ -50,7 +50,7 @@ static void enable_sb1250_irq(unsigned int irq);
 static void disable_sb1250_irq(unsigned int irq);
 static void ack_sb1250_irq(unsigned int irq);
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask);
 #endif
 
 #ifdef CONFIG_SIBYTE_HAS_LDT
@@ -103,7 +103,7 @@ void sb1250_unmask_irq(int cpu, int irq)
 }
 
 #ifdef CONFIG_SMP
-static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        int i = 0, old_cpu, cpu, int_on;
        u64 cur_ints;
@@ -113,7 +113,7 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
 
        if (cpumask_weight(mask) > 1) {
                printk("attempted to set irq affinity for irq %d to multiple CPUs\n", irq);
-               return;
+               return -1;
        }
 
        /* Convert logical CPU to physical CPU */
@@ -143,6 +143,8 @@ static void sb1250_set_affinity(unsigned int irq, const struct cpumask *mask)
                                        R_IMR_INTERRUPT_MASK));
        }
        spin_unlock_irqrestore(&sb1250_imr_lock, flags);
+
+       return 0;
 }
 #endif
 
index 3559267..89faaca 100644 (file)
@@ -8,6 +8,7 @@ mainmenu "Linux Kernel Configuration"
 config MN10300
        def_bool y
        select HAVE_OPROFILE
+       select HAVE_ARCH_TRACEHOOK
 
 config AM33
        def_bool y
index bf09f8b..4910546 100644 (file)
@@ -34,7 +34,7 @@
  */
 typedef unsigned long elf_greg_t;
 
-#define ELF_NGREG (sizeof (struct pt_regs) / sizeof(elf_greg_t))
+#define ELF_NGREG ((sizeof(struct pt_regs) / sizeof(elf_greg_t)) - 1)
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 #define ELF_NFPREG 32
@@ -76,6 +76,7 @@ do {                                                                  \
 } while (0)
 
 #define USE_ELF_CORE_DUMP
+#define CORE_DUMP_USE_REGSET
 #define ELF_EXEC_PAGESIZE      4096
 
 /*
index 7323927..f7d4b0d 100644 (file)
@@ -143,13 +143,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
 unsigned long get_wchan(struct task_struct *p);
 
-#define task_pt_regs(task)                                             \
-({                                                                     \
-       struct pt_regs *__regs__;                                       \
-       __regs__ = (struct pt_regs *) (KSTK_TOP(task_stack_page(task)) - 8); \
-       __regs__ - 1;                                                   \
-})
-
+#define task_pt_regs(task) ((task)->thread.uregs)
 #define KSTK_EIP(task) (task_pt_regs(task)->pc)
 #define KSTK_ESP(task) (task_pt_regs(task)->sp)
 
index 7b06cc6..921942e 100644 (file)
@@ -91,9 +91,17 @@ extern struct pt_regs *__frame; /* current frame pointer */
 #if defined(__KERNEL__)
 
 #if !defined(__ASSEMBLY__)
+struct task_struct;
+
 #define user_mode(regs)                        (((regs)->epsw & EPSW_nSL) == EPSW_nSL)
 #define instruction_pointer(regs)      ((regs)->pc)
+#define user_stack_pointer(regs)       ((regs)->sp)
 extern void show_regs(struct pt_regs *);
+
+#define arch_has_single_step() (1)
+extern void user_enable_single_step(struct task_struct *);
+extern void user_disable_single_step(struct task_struct *);
+
 #endif  /*  !__ASSEMBLY  */
 
 #define profile_pc(regs) ((regs)->pc)
index 3dc3e46..7408a27 100644 (file)
@@ -76,7 +76,7 @@ ENTRY(system_call)
        cmp     nr_syscalls,d0
        bcc     syscall_badsys
        btst    _TIF_SYSCALL_TRACE,(TI_flags,a2)
-       bne     syscall_trace_entry
+       bne     syscall_entry_trace
 syscall_call:
        add     d0,d0,a1
        add     a1,a1
@@ -104,11 +104,10 @@ restore_all:
 syscall_exit_work:
        btst    _TIF_SYSCALL_TRACE,d2
        beq     work_pending
-       __sti                           # could let do_syscall_trace() call
+       __sti                           # could let syscall_trace_exit() call
                                        # schedule() instead
        mov     fp,d0
-       mov     1,d1
-       call    do_syscall_trace[],0    # do_syscall_trace(regs,entryexit)
+       call    syscall_trace_exit[],0  # do_syscall_trace(regs)
        jmp     resume_userspace
 
        ALIGN
@@ -138,13 +137,11 @@ work_notifysig:
        jmp     resume_userspace
 
        # perform syscall entry tracing
-syscall_trace_entry:
+syscall_entry_trace:
        mov     -ENOSYS,d0
        mov     d0,(REG_D0,fp)
        mov     fp,d0
-       clr     d1
-       call    do_syscall_trace[],0
-       mov     (REG_ORIG_D0,fp),d0
+       call    syscall_trace_entry[],0 # returns the syscall number to actually use
        mov     (REG_D1,fp),d1
        cmp     nr_syscalls,d0
        bcs     syscall_call
index d6d6cdc..e143339 100644 (file)
@@ -17,6 +17,9 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/regset.h>
+#include <linux/elf.h>
+#include <linux/tracehook.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -64,12 +67,6 @@ static inline int get_stack_long(struct task_struct *task, int offset)
                ((unsigned long) task->thread.uregs + offset);
 }
 
-/*
- * this routine will put a word on the processes privileged stack.
- * the offset is how far from the base addr as stored in the TSS.
- * this routine assumes that all the privileged stacks are in our
- * data space.
- */
 static inline
 int put_stack_long(struct task_struct *task, int offset, unsigned long data)
 {
@@ -80,94 +77,233 @@ int put_stack_long(struct task_struct *task, int offset, unsigned long data)
        return 0;
 }
 
-static inline unsigned long get_fpregs(struct fpu_state_struct *buf,
-                                      struct task_struct *tsk)
+/*
+ * retrieve the contents of MN10300 userspace general registers
+ */
+static int genregs_get(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      void *kbuf, void __user *ubuf)
 {
-       return __copy_to_user(buf, &tsk->thread.fpu_state,
-                             sizeof(struct fpu_state_struct));
+       const struct pt_regs *regs = task_pt_regs(target);
+       int ret;
+
+       /* we need to skip regs->next */
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 regs, 0, PT_ORIG_D0 * sizeof(long));
+       if (ret < 0)
+               return ret;
+
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+                                 NR_PTREGS * sizeof(long));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                       NR_PTREGS * sizeof(long), -1);
 }
 
-static inline unsigned long set_fpregs(struct task_struct *tsk,
-                                      struct fpu_state_struct *buf)
+/*
+ * update the contents of the MN10300 userspace general registers
+ */
+static int genregs_set(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      const void *kbuf, const void __user *ubuf)
 {
-       return __copy_from_user(&tsk->thread.fpu_state, buf,
-                               sizeof(struct fpu_state_struct));
+       struct pt_regs *regs = task_pt_regs(target);
+       unsigned long tmp;
+       int ret;
+
+       /* we need to skip regs->next */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                regs, 0, PT_ORIG_D0 * sizeof(long));
+       if (ret < 0)
+               return ret;
+
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->orig_d0, PT_ORIG_D0 * sizeof(long),
+                                PT_EPSW * sizeof(long));
+       if (ret < 0)
+               return ret;
+
+       /* we need to mask off changes to EPSW */
+       tmp = regs->epsw;
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &tmp, PT_EPSW * sizeof(long),
+                                PT_PC * sizeof(long));
+       tmp &= EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N | EPSW_FLAG_Z;
+       tmp |= regs->epsw & ~(EPSW_FLAG_V | EPSW_FLAG_C | EPSW_FLAG_N |
+                             EPSW_FLAG_Z);
+       regs->epsw = tmp;
+
+       if (ret < 0)
+               return ret;
+
+       /* and finally load the PC */
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &regs->pc, PT_PC * sizeof(long),
+                                NR_PTREGS * sizeof(long));
+
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                        NR_PTREGS * sizeof(long), -1);
 }
 
-static inline void fpsave_init(struct task_struct *task)
+/*
+ * retrieve the contents of MN10300 userspace FPU registers
+ */
+static int fpuregs_get(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      void *kbuf, void __user *ubuf)
 {
-       memset(&task->thread.fpu_state, 0, sizeof(struct fpu_state_struct));
+       const struct fpu_state_struct *fpregs = &target->thread.fpu_state;
+       int ret;
+
+       unlazy_fpu(target);
+
+       ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                 fpregs, 0, sizeof(*fpregs));
+       if (ret < 0)
+               return ret;
+
+       return user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
+                                       sizeof(*fpregs), -1);
 }
 
 /*
- * make sure the single step bit is not set
+ * update the contents of the MN10300 userspace FPU registers
  */
-void ptrace_disable(struct task_struct *child)
+static int fpuregs_set(struct task_struct *target,
+                      const struct user_regset *regset,
+                      unsigned int pos, unsigned int count,
+                      const void *kbuf, const void __user *ubuf)
+{
+       struct fpu_state_struct fpu_state = target->thread.fpu_state;
+       int ret;
+
+       ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                &fpu_state, 0, sizeof(fpu_state));
+       if (ret < 0)
+               return ret;
+
+       fpu_kill_state(target);
+       target->thread.fpu_state = fpu_state;
+       set_using_fpu(target);
+
+       return user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+                                        sizeof(fpu_state), -1);
+}
+
+/*
+ * determine if the FPU registers have actually been used
+ */
+static int fpuregs_active(struct task_struct *target,
+                         const struct user_regset *regset)
+{
+       return is_using_fpu(target) ? regset->n : 0;
+}
+
+/*
+ * Define the register sets available on the MN10300 under Linux
+ */
+enum mn10300_regset {
+       REGSET_GENERAL,
+       REGSET_FPU,
+};
+
+static const struct user_regset mn10300_regsets[] = {
+       /*
+        * General register format is:
+        *      A3, A2, D3, D2, MCVF, MCRL, MCRH, MDRQ
+        *      E1, E0, E7...E2, SP, LAR, LIR, MDR
+        *      A1, A0, D1, D0, ORIG_D0, EPSW, PC
+        */
+       [REGSET_GENERAL] = {
+               .core_note_type = NT_PRSTATUS,
+               .n              = ELF_NGREG,
+               .size           = sizeof(long),
+               .align          = sizeof(long),
+               .get            = genregs_get,
+               .set            = genregs_set,
+       },
+       /*
+        * FPU register format is:
+        *      FS0-31, FPCR
+        */
+       [REGSET_FPU] = {
+               .core_note_type = NT_PRFPREG,
+               .n              = sizeof(struct fpu_state_struct) / sizeof(long),
+               .size           = sizeof(long),
+               .align          = sizeof(long),
+               .get            = fpuregs_get,
+               .set            = fpuregs_set,
+               .active         = fpuregs_active,
+       },
+};
+
+static const struct user_regset_view user_mn10300_native_view = {
+       .name           = "mn10300",
+       .e_machine      = EM_MN10300,
+       .regsets        = mn10300_regsets,
+       .n              = ARRAY_SIZE(mn10300_regsets),
+};
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+       return &user_mn10300_native_view;
+}
+
+/*
+ * set the single-step bit
+ */
+void user_enable_single_step(struct task_struct *child)
 {
 #ifndef CONFIG_MN10300_USING_JTAG
        struct user *dummy = NULL;
        long tmp;
 
        tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
-       tmp &= ~EPSW_T;
+       tmp |= EPSW_T;
        put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
 #endif
 }
 
 /*
- * set the single step bit
+ * make sure the single-step bit is not set
  */
-void ptrace_enable(struct task_struct *child)
+void user_disable_single_step(struct task_struct *child)
 {
 #ifndef CONFIG_MN10300_USING_JTAG
        struct user *dummy = NULL;
        long tmp;
 
        tmp = get_stack_long(child, (unsigned long) &dummy->regs.epsw);
-       tmp |= EPSW_T;
+       tmp &= ~EPSW_T;
        put_stack_long(child, (unsigned long) &dummy->regs.epsw, tmp);
 #endif
 }
 
+void ptrace_disable(struct task_struct *child)
+{
+       user_disable_single_step(child);
+}
+
 /*
  * handle the arch-specific side of process tracing
  */
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
-       struct fpu_state_struct fpu_state;
-       int i, ret;
+       unsigned long tmp;
+       int ret;
 
        switch (request) {
-       /* read the word at location addr. */
-       case PTRACE_PEEKTEXT: {
-               unsigned long tmp;
-               int copied;
-
-               copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-               ret = -EIO;
-               if (copied != sizeof(tmp))
-                       break;
-               ret = put_user(tmp, (unsigned long *) data);
-               break;
-       }
-
-       /* read the word at location addr. */
-       case PTRACE_PEEKDATA: {
-               unsigned long tmp;
-               int copied;
-
-               copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
-               ret = -EIO;
-               if (copied != sizeof(tmp))
-                       break;
-               ret = put_user(tmp, (unsigned long *) data);
-               break;
-       }
-
        /* read the word at location addr in the USER area. */
-       case PTRACE_PEEKUSR: {
-               unsigned long tmp;
-
+       case PTRACE_PEEKUSR:
                ret = -EIO;
                if ((addr & 3) || addr < 0 ||
                    addr > sizeof(struct user) - 3)
@@ -179,17 +315,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                                             ptrace_regid_to_frame[addr]);
                ret = put_user(tmp, (unsigned long *) data);
                break;
-       }
-
-       /* write the word at location addr. */
-       case PTRACE_POKETEXT:
-       case PTRACE_POKEDATA:
-               if (access_process_vm(child, addr, &data, sizeof(data), 1) ==
-                   sizeof(data))
-                       ret = 0;
-               else
-                       ret = -EIO;
-               break;
 
                /* write the word at location addr in the USER area */
        case PTRACE_POKEUSR:
@@ -204,132 +329,32 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
                                             data);
                break;
 
-               /* continue and stop at next (return from) syscall */
-       case PTRACE_SYSCALL:
-               /* restart after signal. */
-       case PTRACE_CONT:
-               ret = -EIO;
-               if ((unsigned long) data > _NSIG)
-                       break;
-               if (request == PTRACE_SYSCALL)
-                       set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               else
-                       clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               child->exit_code = data;
-               ptrace_disable(child);
-               wake_up_process(child);
-               ret = 0;
-               break;
-
-               /*
-                * make the child exit
-                * - the best I can do is send it a sigkill
-                * - perhaps it should be put in the status that it wants to
-                *   exit
-                */
-       case PTRACE_KILL:
-               ret = 0;
-               if (child->exit_state == EXIT_ZOMBIE)   /* already dead */
-                       break;
-               child->exit_code = SIGKILL;
-               clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-               ptrace_disable(child);
-               wake_up_process(child);
-               break;
-
-       case PTRACE_SINGLESTEP: /* set the trap flag. */
-#ifndef CONFIG_MN10300_USING_JTAG
-               ret = -EIO;
-               if ((unsigned long) data > _NSIG)
-                       break;
-               clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
-               ptrace_enable(child);
-               child->exit_code = data;
-               wake_up_process(child);
-               ret = 0;
-#else
-               ret = -EINVAL;
-#endif
-               break;
-
-       case PTRACE_DETACH:     /* detach a process that was attached. */
-               ret = ptrace_detach(child, data);
-               break;
-
-               /* Get all gp regs from the child. */
-       case PTRACE_GETREGS: {
-               unsigned long tmp;
-
-               if (!access_ok(VERIFY_WRITE, (unsigned *) data, NR_PTREGS << 2)) {
-                       ret = -EIO;
-                       break;
-               }
-
-               for (i = 0; i < NR_PTREGS << 2; i += 4) {
-                       tmp = get_stack_long(child, ptrace_regid_to_frame[i]);
-                       __put_user(tmp, (unsigned long *) data);
-                       data += sizeof(tmp);
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_SETREGS: { /* Set all gp regs in the child. */
-               unsigned long tmp;
-
-               if (!access_ok(VERIFY_READ, (unsigned long *)data,
-                              sizeof(struct pt_regs))) {
-                       ret = -EIO;
-                       break;
-               }
-
-               for (i = 0; i < NR_PTREGS << 2; i += 4) {
-                       __get_user(tmp, (unsigned long *) data);
-                       put_stack_long(child, ptrace_regid_to_frame[i], tmp);
-                       data += sizeof(tmp);
-               }
-               ret = 0;
-               break;
-       }
-
-       case PTRACE_GETFPREGS: { /* Get the child FPU state. */
-               if (is_using_fpu(child)) {
-                       unlazy_fpu(child);
-                       fpu_state = child->thread.fpu_state;
-               } else {
-                       memset(&fpu_state, 0, sizeof(fpu_state));
-               }
-
-               ret = -EIO;
-               if (copy_to_user((void *) data, &fpu_state,
-                                sizeof(fpu_state)) == 0)
-                       ret = 0;
-               break;
-       }
-
-       case PTRACE_SETFPREGS: { /* Set the child FPU state. */
-               ret = -EFAULT;
-               if (copy_from_user(&fpu_state, (const void *) data,
-                                  sizeof(fpu_state)) == 0) {
-                       fpu_kill_state(child);
-                       child->thread.fpu_state = fpu_state;
-                       set_using_fpu(child);
-                       ret = 0;
-               }
-               break;
-       }
-
-       case PTRACE_SETOPTIONS: {
-               if (data & PTRACE_O_TRACESYSGOOD)
-                       child->ptrace |= PT_TRACESYSGOOD;
-               else
-                       child->ptrace &= ~PT_TRACESYSGOOD;
-               ret = 0;
-               break;
-       }
+       case PTRACE_GETREGS:    /* Get all integer regs from the child. */
+               return copy_regset_to_user(child, &user_mn10300_native_view,
+                                          REGSET_GENERAL,
+                                          0, NR_PTREGS * sizeof(long),
+                                          (void __user *)data);
+
+       case PTRACE_SETREGS:    /* Set all integer regs in the child. */
+               return copy_regset_from_user(child, &user_mn10300_native_view,
+                                            REGSET_GENERAL,
+                                            0, NR_PTREGS * sizeof(long),
+                                            (const void __user *)data);
+
+       case PTRACE_GETFPREGS:  /* Get the child FPU state. */
+               return copy_regset_to_user(child, &user_mn10300_native_view,
+                                          REGSET_FPU,
+                                          0, sizeof(struct fpu_state_struct),
+                                          (void __user *)data);
+
+       case PTRACE_SETFPREGS:  /* Set the child FPU state. */
+               return copy_regset_from_user(child, &user_mn10300_native_view,
+                                            REGSET_FPU,
+                                            0, sizeof(struct fpu_state_struct),
+                                            (const void __user *)data);
 
        default:
-               ret = -EIO;
+               ret = ptrace_request(child, request, addr, data);
                break;
        }
 
@@ -337,43 +362,26 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 }
 
 /*
- * notification of system call entry/exit
- * - triggered by current->work.syscall_trace
+ * handle tracing of system call entry
+ * - return the revised system call number or ULONG_MAX to cause ENOSYS
  */
-asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
+asmlinkage unsigned long syscall_trace_entry(struct pt_regs *regs)
 {
-#if 0
-       /* just in case... */
-       printk(KERN_DEBUG "[%d] syscall_%lu(%lx,%lx,%lx,%lx) = %lx\n",
-              current->pid,
-              regs->orig_d0,
-              regs->a0,
-              regs->d1,
-              regs->a3,
-              regs->a2,
-              regs->d0);
-       return;
-#endif
-
-       if (!test_thread_flag(TIF_SYSCALL_TRACE) &&
-           !test_thread_flag(TIF_SINGLESTEP))
-               return;
-       if (!(current->ptrace & PT_PTRACED))
-               return;
+       if (tracehook_report_syscall_entry(regs))
+               /* tracing decided this syscall should not happen, so
+                * We'll return a bogus call number to get an ENOSYS
+                * error, but leave the original number in
+                * regs->orig_d0
+                */
+               return ULONG_MAX;
 
-       /* the 0x80 provides a way for the tracing parent to distinguish
-          between a syscall stop and SIGTRAP delivery */
-       ptrace_notify(SIGTRAP |
-                     ((current->ptrace & PT_TRACESYSGOOD) &&
-                      !test_thread_flag(TIF_SINGLESTEP) ? 0x80 : 0));
+       return regs->orig_d0;
+}
 
-       /*
-        * this isn't the same as continuing with a signal, but it will do
-        * for normal use.  strace only continues with a signal if the
-        * stopping signal is not SIGTRAP.  -brl
-        */
-       if (current->exit_code) {
-               send_sig(current->exit_code, current, 1);
-               current->exit_code = 0;
-       }
+/*
+ * handle tracing of system call exit
+ */
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+       tracehook_report_syscall_exit(regs, 0);
 }
index 841ca99..9f7572a 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/tty.h>
 #include <linux/personality.h>
 #include <linux/suspend.h>
+#include <linux/tracehook.h>
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
@@ -511,6 +512,9 @@ static void do_signal(struct pt_regs *regs)
                         * clear the TIF_RESTORE_SIGMASK flag */
                        if (test_thread_flag(TIF_RESTORE_SIGMASK))
                                clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+                       tracehook_signal_handler(signr, &info, &ka, regs,
+                                                test_thread_flag(TIF_SINGLESTEP));
                }
 
                return;
@@ -561,4 +565,9 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
        /* deal with pending signal delivery */
        if (thread_info_flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
                do_signal(regs);
+
+       if (thread_info_flags & _TIF_NOTIFY_RESUME) {
+               clear_thread_flag(TIF_NOTIFY_RESUME);
+               tracehook_notify_resume(__frame);
+       }
 }
index 7892080..7095147 100644 (file)
@@ -165,24 +165,6 @@ ENTRY(itlb_aerror)
 ENTRY(dtlb_aerror)
        and     ~EPSW_NMID,epsw
        add     -4,sp
-       mov     d1,(sp)
-
-       movhu   (MMUFCR_DFC),d1                 # is it the initial valid write
-                                               # to this page?
-       and     MMUFCR_xFC_INITWR,d1
-       beq     dtlb_pagefault                  # jump if not
-
-       mov     (DPTEL),d1                      # set the dirty bit
-                                               # (don't replace with BSET!)
-       or      _PAGE_DIRTY,d1
-       mov     d1,(DPTEL)
-       mov     (sp),d1
-       add     4,sp
-       rti
-
-       ALIGN
-dtlb_pagefault:
-       mov     (sp),d1
        SAVE_ALL
        add     -4,sp                           # need to pass three params
 
index 4ea4229..8007f1e 100644 (file)
@@ -130,15 +130,17 @@ int cpu_check_affinity(unsigned int irq, const struct cpumask *dest)
        return cpu_dest;
 }
 
-static void cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int cpu_set_affinity_irq(unsigned int irq, const struct cpumask *dest)
 {
        int cpu_dest;
 
        cpu_dest = cpu_check_affinity(irq, dest);
        if (cpu_dest < 0)
-               return;
+               return -1;
 
        cpumask_copy(&irq_desc[irq].affinity, dest);
+
+       return 0;
 }
 #endif
 
index 80b5134..be3581a 100644 (file)
@@ -333,7 +333,7 @@ static void xics_eoi_lpar(unsigned int virq)
        lpar_xirr_info_set((0xff << 24) | irq);
 }
 
-static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
+static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 {
        unsigned int irq;
        int status;
@@ -342,14 +342,14 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
 
        irq = (unsigned int)irq_map[virq].hwirq;
        if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
-               return;
+               return -1;
 
        status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
 
        if (status) {
                printk(KERN_ERR "%s: ibm,get-xive irq=%u returns %d\n",
                        __func__, irq, status);
-               return;
+               return -1;
        }
 
        /*
@@ -363,7 +363,7 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
                printk(KERN_WARNING
                        "%s: No online cpus in the mask %s for irq %d\n",
                        __func__, cpulist, virq);
-               return;
+               return -1;
        }
 
        status = rtas_call(ibm_set_xive, 3, 1, NULL,
@@ -372,8 +372,10 @@ static void xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
        if (status) {
                printk(KERN_ERR "%s: ibm,set-xive irq=%u returns %d\n",
                        __func__, irq, status);
-               return;
+               return -1;
        }
+
+       return 0;
 }
 
 static struct irq_chip xics_pic_direct = {
index 0efc12d..352d8c3 100644 (file)
@@ -807,7 +807,7 @@ static void mpic_end_ipi(unsigned int irq)
 
 #endif /* CONFIG_SMP */
 
-void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
+int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 {
        struct mpic *mpic = mpic_from_irq(irq);
        unsigned int src = mpic_irq_to_hw(irq);
@@ -824,6 +824,8 @@ void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask)
                mpic_irq_write(src, MPIC_INFO(IRQ_DESTINATION),
                               mpic_physmask(cpus_addr(tmp)[0]));
        }
+
+       return 0;
 }
 
 static unsigned int mpic_type_to_vecpri(struct mpic *mpic, unsigned int type)
index 3cef2af..eff433c 100644 (file)
@@ -36,6 +36,6 @@ static inline int mpic_pasemi_msi_init(struct mpic *mpic)
 
 extern int mpic_set_irq_type(unsigned int virq, unsigned int flow_type);
 extern void mpic_set_vector(unsigned int virq, unsigned int vector);
-extern void mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
+extern int mpic_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 
 #endif /* _POWERPC_SYSDEV_MPIC_H */
index 639ac80..6586572 100644 (file)
@@ -102,8 +102,8 @@ struct thread_info {
 #define TI_KERN_CNTD1  0x00000488
 #define TI_PCR         0x00000490
 #define TI_RESTART_BLOCK 0x00000498
-#define TI_KUNA_REGS   0x000004c0
-#define TI_KUNA_INSN   0x000004c8
+#define TI_KUNA_REGS   0x000004c8
+#define TI_KUNA_INSN   0x000004d0
 #define TI_FPREGS      0x00000500
 
 /* We embed this in the uppermost byte of thread_info->flags */
index 5deabe9..e5e78f9 100644 (file)
@@ -318,10 +318,12 @@ static void sun4u_irq_enable(unsigned int virt_irq)
        }
 }
 
-static void sun4u_set_affinity(unsigned int virt_irq,
+static int sun4u_set_affinity(unsigned int virt_irq,
                               const struct cpumask *mask)
 {
        sun4u_irq_enable(virt_irq);
+
+       return 0;
 }
 
 /* Don't do anything.  The desc->status check for IRQ_DISABLED in
@@ -377,7 +379,7 @@ static void sun4v_irq_enable(unsigned int virt_irq)
                       ino, err);
 }
 
-static void sun4v_set_affinity(unsigned int virt_irq,
+static int sun4v_set_affinity(unsigned int virt_irq,
                               const struct cpumask *mask)
 {
        unsigned int ino = virt_irq_table[virt_irq].dev_ino;
@@ -388,6 +390,8 @@ static void sun4v_set_affinity(unsigned int virt_irq,
        if (err != HV_EOK)
                printk(KERN_ERR "sun4v_intr_settarget(%x,%lu): "
                       "err(%d)\n", ino, cpuid, err);
+
+       return 0;
 }
 
 static void sun4v_irq_disable(unsigned int virt_irq)
@@ -445,7 +449,7 @@ static void sun4v_virq_enable(unsigned int virt_irq)
                       dev_handle, dev_ino, err);
 }
 
-static void sun4v_virt_set_affinity(unsigned int virt_irq,
+static int sun4v_virt_set_affinity(unsigned int virt_irq,
                                    const struct cpumask *mask)
 {
        unsigned long cpuid, dev_handle, dev_ino;
@@ -461,6 +465,8 @@ static void sun4v_virt_set_affinity(unsigned int virt_irq,
                printk(KERN_ERR "sun4v_vintr_set_target(%lx,%lx,%lu): "
                       "err(%d)\n",
                       dev_handle, dev_ino, cpuid, err);
+
+       return 0;
 }
 
 static void sun4v_virq_disable(unsigned int virt_irq)
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
new file mode 100644 (file)
index 0000000..ad8ec35
--- /dev/null
@@ -0,0 +1,16 @@
+
+obj-$(CONFIG_KVM) += kvm/
+
+# Xen paravirtualization support
+obj-$(CONFIG_XEN) += xen/
+
+# lguest paravirtualization support
+obj-$(CONFIG_LGUEST_GUEST) += lguest/
+
+obj-y += kernel/
+obj-y += mm/
+
+obj-y += crypto/
+obj-y += vdso/
+obj-$(CONFIG_IA32_EMULATION) += ia32/
+
index a6efe0a..aafae3b 100644 (file)
@@ -47,6 +47,11 @@ config X86
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_LZMA
 
+config OUTPUT_FORMAT
+       string
+       default "elf32-i386" if X86_32
+       default "elf64-x86-64" if X86_64
+
 config ARCH_DEFCONFIG
        string
        default "arch/x86/configs/i386_defconfig" if X86_32
@@ -274,15 +279,9 @@ config SPARSE_IRQ
 
          If you don't know what to do here, say N.
 
-config NUMA_MIGRATE_IRQ_DESC
-       bool "Move irq desc when changing irq smp_affinity"
+config NUMA_IRQ_DESC
+       def_bool y
        depends on SPARSE_IRQ && NUMA
-       depends on BROKEN
-       default n
-       ---help---
-         This enables moving irq_desc to cpu/node that irq will use handled.
-
-         If you don't know what to do here, say N.
 
 config X86_MPPARSE
        bool "Enable MPS table" if ACPI
@@ -355,7 +354,7 @@ config X86_UV
        depends on X86_64
        depends on X86_EXTENDED_PLATFORM
        depends on NUMA
-       select X86_X2APIC
+       depends on X86_X2APIC
        ---help---
          This option is needed in order to support SGI Ultraviolet systems.
          If you don't have one of these, you should say N here.
@@ -1466,9 +1465,7 @@ config KEXEC_JUMP
 
 config PHYSICAL_START
        hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
-       default "0x1000000" if X86_NUMAQ
-       default "0x200000" if X86_64
-       default "0x100000"
+       default "0x1000000"
        ---help---
          This gives the physical address where the kernel is loaded.
 
@@ -1487,15 +1484,15 @@ config PHYSICAL_START
          to be specifically compiled to run from a specific memory area
          (normally a reserved region) and this option comes handy.
 
-         So if you are using bzImage for capturing the crash dump, leave
-         the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y.
-         Otherwise if you plan to use vmlinux for capturing the crash dump
-         change this value to start of the reserved region (Typically 16MB
-         0x1000000). In other words, it can be set based on the "X" value as
-         specified in the "crashkernel=YM@XM" command line boot parameter
-         passed to the panic-ed kernel. Typically this parameter is set as
-         crashkernel=64M@16M. Please take a look at
-         Documentation/kdump/kdump.txt for more details about crash dumps.
+         So if you are using bzImage for capturing the crash dump,
+         leave the value here unchanged to 0x1000000 and set
+         CONFIG_RELOCATABLE=y.  Otherwise if you plan to use vmlinux
+         for capturing the crash dump change this value to start of
+         the reserved region.  In other words, it can be set based on
+         the "X" value as specified in the "crashkernel=YM@XM"
+         command line boot parameter passed to the panic-ed
+         kernel. Please take a look at Documentation/kdump/kdump.txt
+         for more details about crash dumps.
 
          Usage of bzImage for capturing the crash dump is recommended as
          one does not have to build two kernels. Same kernel can be used
@@ -1508,8 +1505,8 @@ config PHYSICAL_START
          Don't change this unless you know what you are doing.
 
 config RELOCATABLE
-       bool "Build a relocatable kernel (EXPERIMENTAL)"
-       depends on EXPERIMENTAL
+       bool "Build a relocatable kernel"
+       default y
        ---help---
          This builds a kernel image that retains relocation information
          so it can be loaded someplace besides the default 1MB.
@@ -1524,12 +1521,16 @@ config RELOCATABLE
          it has been loaded at and the compile time physical address
          (CONFIG_PHYSICAL_START) is ignored.
 
+# Relocation on x86-32 needs some additional build support
+config X86_NEED_RELOCS
+       def_bool y
+       depends on X86_32 && RELOCATABLE
+
 config PHYSICAL_ALIGN
        hex
        prompt "Alignment value to which kernel should be aligned" if X86_32
-       default "0x100000" if X86_32
-       default "0x200000" if X86_64
-       range 0x2000 0x400000
+       default "0x1000000"
+       range 0x2000 0x1000000
        ---help---
          This value puts the alignment restrictions on physical address
          where kernel is loaded and run from. Kernel is compiled for an
index d8359e7..d105f29 100644 (file)
@@ -159,14 +159,30 @@ config IOMMU_DEBUG
          options. See Documentation/x86_64/boot-options.txt for more
          details.
 
+config IOMMU_STRESS
+       bool "Enable IOMMU stress-test mode"
+       ---help---
+         This option disables various optimizations in IOMMU related
+         code to do real stress testing of the IOMMU code. This option
+         will cause a performance drop and should only be enabled for
+         testing.
+
 config IOMMU_LEAK
        bool "IOMMU leak tracing"
-       depends on DEBUG_KERNEL
-       depends on IOMMU_DEBUG
+       depends on IOMMU_DEBUG && DMA_API_DEBUG
        ---help---
          Add a simple leak tracer to the IOMMU code. This is useful when you
          are debugging a buggy device driver that leaks IOMMU mappings.
 
+config X86_DS_SELFTEST
+    bool "DS selftest"
+    default y
+    depends on DEBUG_KERNEL
+    depends on X86_DS
+       ---help---
+         Perform Debug Store selftests at boot time.
+         If in doubt, say "N".
+
 config HAVE_MMIOTRACE_SUPPORT
        def_bool y
 
index 8c86b72..edbd0ca 100644 (file)
@@ -7,8 +7,6 @@ else
         KBUILD_DEFCONFIG := $(ARCH)_defconfig
 endif
 
-core-$(CONFIG_KVM) += arch/x86/kvm/
-
 # BITS is used as extension for files which are available in a 32 bit
 # and a 64 bit version to simplify shared Makefiles.
 # e.g.: obj-y += foo_$(BITS).o
@@ -118,21 +116,8 @@ head-y += arch/x86/kernel/init_task.o
 
 libs-y  += arch/x86/lib/
 
-# Sub architecture files that needs linking first
-core-y += $(fcore-y)
-
-# Xen paravirtualization support
-core-$(CONFIG_XEN) += arch/x86/xen/
-
-# lguest paravirtualization support
-core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/
-
-core-y += arch/x86/kernel/
-core-y += arch/x86/mm/
-
-core-y += arch/x86/crypto/
-core-y += arch/x86/vdso/
-core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
+# See arch/x86/Kbuild for content of core part of the kernel
+core-y += arch/x86/
 
 # drivers-y are linked after core-y
 drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/
index 172cf8a..851fe93 100644 (file)
@@ -3,6 +3,8 @@ bzImage
 cpustr.h
 mkcpustr
 offsets.h
+voffset.h
+zoffset.h
 setup
 setup.bin
 setup.elf
index 6633b6e..8d16ada 100644 (file)
@@ -26,9 +26,10 @@ targets              := vmlinux.bin setup.bin setup.elf bzImage
 targets                += fdimage fdimage144 fdimage288 image.iso mtools.conf
 subdir-                := compressed
 
-setup-y                += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
+setup-y                += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o
 setup-y                += header.o main.o mca.o memory.o pm.o pmjump.o
-setup-y                += printf.o string.o tty.o video.o video-mode.o version.o
+setup-y                += printf.o regs.o string.o tty.o video.o video-mode.o
+setup-y                += version.o
 setup-$(CONFIG_X86_APM_BOOT) += apm.o
 
 # The link order of the video-*.o modules can matter.  In particular,
@@ -86,19 +87,27 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
 
 SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
 
-sed-offsets := -e 's/^00*/0/' \
-        -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p'
+sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p'
 
-quiet_cmd_offsets = OFFSETS $@
-      cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@
+quiet_cmd_voffset = VOFFSET $@
+      cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@
 
-$(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE
-       $(call if_changed,offsets)
+targets += voffset.h
+$(obj)/voffset.h: vmlinux FORCE
+       $(call if_changed,voffset)
+
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+
+quiet_cmd_zoffset = ZOFFSET $@
+      cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
+
+targets += zoffset.h
+$(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE
+       $(call if_changed,zoffset)
 
-targets += offsets.h
 
 AFLAGS_header.o += -I$(obj)
-$(obj)/header.o: $(obj)/offsets.h
+$(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h
 
 LDFLAGS_setup.elf      := -T
 $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE
index 7c19ce8..64a31a6 100644 (file)
@@ -2,7 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007-2008 rPath, Inc. - All Rights Reserved
- *   Copyright 2009 Intel Corporation
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -90,8 +90,11 @@ static int a20_test_long(void)
 
 static void enable_a20_bios(void)
 {
-       asm volatile("pushfl; int $0x15; popfl"
-                    : : "a" ((u16)0x2401));
+       struct biosregs ireg;
+
+       initregs(&ireg);
+       ireg.ax = 0x2401;
+       intcall(0x15, &ireg, NULL);
 }
 
 static void enable_a20_kbc(void)
index 7aa6033..ee27483 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   Original APM BIOS checking by Stephen Rothwell, May 1994
  *   (sfr@canb.auug.org.au)
 
 int query_apm_bios(void)
 {
-       u16 ax, bx, cx, dx, di;
-       u32 ebx, esi;
-       u8 err;
+       struct biosregs ireg, oreg;
 
        /* APM BIOS installation check */
-       ax = 0x5300;
-       bx = cx = 0;
-       asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
-                    : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
-                    : : "esi", "edi");
+       initregs(&ireg);
+       ireg.ah = 0x53;
+       intcall(0x15, &ireg, &oreg);
 
-       if (err)
+       if (oreg.flags & X86_EFLAGS_CF)
                return -1;              /* No APM BIOS */
 
-       if (bx != 0x504d)       /* "PM" signature */
+       if (oreg.bx != 0x504d)          /* "PM" signature */
                return -1;
 
-       if (!(cx & 0x02))               /* 32 bits supported? */
+       if (!(oreg.cx & 0x02))          /* 32 bits supported? */
                return -1;
 
        /* Disconnect first, just in case */
-       ax = 0x5304;
-       bx = 0;
-       asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
-                    : "+a" (ax), "+b" (bx)
-                    : : "ecx", "edx", "esi", "edi");
-
-       /* Paranoia */
-       ebx = esi = 0;
-       cx = dx = di = 0;
+       ireg.al = 0x04;
+       intcall(0x15, &ireg, NULL);
 
        /* 32-bit connect */
-       asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6"
-                    : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx),
-                      "+S" (esi), "+D" (di), "=m" (err)
-                    : "a" (0x5303));
-
-       boot_params.apm_bios_info.cseg = ax;
-       boot_params.apm_bios_info.offset = ebx;
-       boot_params.apm_bios_info.cseg_16 = cx;
-       boot_params.apm_bios_info.dseg = dx;
-       boot_params.apm_bios_info.cseg_len = (u16)esi;
-       boot_params.apm_bios_info.cseg_16_len = esi >> 16;
-       boot_params.apm_bios_info.dseg_len = di;
-
-       if (err)
+       ireg.al = 0x03;
+       intcall(0x15, &ireg, &oreg);
+
+       boot_params.apm_bios_info.cseg        = oreg.ax;
+       boot_params.apm_bios_info.offset      = oreg.ebx;
+       boot_params.apm_bios_info.cseg_16     = oreg.cx;
+       boot_params.apm_bios_info.dseg        = oreg.dx;
+       boot_params.apm_bios_info.cseg_len    = oreg.si;
+       boot_params.apm_bios_info.cseg_16_len = oreg.hsi;
+       boot_params.apm_bios_info.dseg_len    = oreg.di;
+
+       if (oreg.flags & X86_EFLAGS_CF)
                return -1;
 
        /* Redo the installation check as the 32-bit connect;
           some BIOSes return different flags this way... */
 
-       ax = 0x5300;
-       bx = cx = 0;
-       asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0"
-                    : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx)
-                    : : "esi", "edi");
+       ireg.al = 0x00;
+       intcall(0x15, &ireg, &oreg);
 
-       if (err || bx != 0x504d) {
+       if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) {
                /* Failure with 32-bit connect, try to disconect and ignore */
-               ax = 0x5304;
-               bx = 0;
-               asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp"
-                            : "+a" (ax), "+b" (bx)
-                            : : "ecx", "edx", "esi", "edi");
+               ireg.al = 0x04;
+               intcall(0x15, &ireg, NULL);
                return -1;
        }
 
-       boot_params.apm_bios_info.version = ax;
-       boot_params.apm_bios_info.flags cx;
+       boot_params.apm_bios_info.version = oreg.ax;
+       boot_params.apm_bios_info.flags   = oreg.cx;
        return 0;
 }
 
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S
new file mode 100644 (file)
index 0000000..5077937
--- /dev/null
@@ -0,0 +1,82 @@
+/* -----------------------------------------------------------------------
+ *
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * "Glove box" for BIOS calls.  Avoids the constant problems with BIOSes
+ * touching registers they shouldn't be.
+ */
+
+       .code16
+       .text
+       .globl  intcall
+       .type   intcall, @function
+intcall:
+       /* Self-modify the INT instruction.  Ugly, but works. */
+       cmpb    %al, 3f
+       je      1f
+       movb    %al, 3f
+       jmp     1f              /* Synchronize pipeline */
+1:
+       /* Save state */
+       pushfl
+       pushw   %fs
+       pushw   %gs
+       pushal
+
+       /* Copy input state to stack frame */
+       subw    $44, %sp
+       movw    %dx, %si
+       movw    %sp, %di
+       movw    $11, %cx
+       rep; movsd
+
+       /* Pop full state from the stack */
+       popal
+       popw    %gs
+       popw    %fs
+       popw    %es
+       popw    %ds
+       popfl
+
+       /* Actual INT */
+       .byte   0xcd            /* INT opcode */
+3:     .byte   0
+
+       /* Push full state to the stack */
+       pushfl
+       pushw   %ds
+       pushw   %es
+       pushw   %fs
+       pushw   %gs
+       pushal
+
+       /* Re-establish C environment invariants */
+       cld
+       movzwl  %sp, %esp
+       movw    %cs, %ax
+       movw    %ax, %ds
+       movw    %ax, %es
+
+       /* Copy output state from stack frame */
+       movw    68(%esp), %di   /* Original %cx == 3rd argument */
+       andw    %di, %di
+       jz      4f
+       movw    %sp, %si
+       movw    $11, %cx
+       rep; movsd
+4:     addw    $44, %sp
+
+       /* Restore state and return */
+       popal
+       popw    %gs
+       popw    %fs
+       popfl
+       retl
+       .size   intcall, .-intcall
index 7b2692e..98239d2 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -26,6 +27,7 @@
 #include <asm/setup.h>
 #include "bitops.h"
 #include <asm/cpufeature.h>
+#include <asm/processor-flags.h>
 
 /* Useful macros */
 #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@@ -241,6 +243,49 @@ int enable_a20(void);
 /* apm.c */
 int query_apm_bios(void);
 
+/* bioscall.c */
+struct biosregs {
+       union {
+               struct {
+                       u32 edi;
+                       u32 esi;
+                       u32 ebp;
+                       u32 _esp;
+                       u32 ebx;
+                       u32 edx;
+                       u32 ecx;
+                       u32 eax;
+                       u32 _fsgs;
+                       u32 _dses;
+                       u32 eflags;
+               };
+               struct {
+                       u16 di, hdi;
+                       u16 si, hsi;
+                       u16 bp, hbp;
+                       u16 _sp, _hsp;
+                       u16 bx, hbx;
+                       u16 dx, hdx;
+                       u16 cx, hcx;
+                       u16 ax, hax;
+                       u16 gs, fs;
+                       u16 es, ds;
+                       u16 flags, hflags;
+               };
+               struct {
+                       u8 dil, dih, edi2, edi3;
+                       u8 sil, sih, esi2, esi3;
+                       u8 bpl, bph, ebp2, ebp3;
+                       u8 _spl, _sph, _esp2, _esp3;
+                       u8 bl, bh, ebx2, ebx3;
+                       u8 dl, dh, edx2, edx3;
+                       u8 cl, ch, ecx2, ecx3;
+                       u8 al, ah, eax2, eax3;
+               };
+       };
+};
+void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg);
+
 /* cmdline.c */
 int cmdline_find_option(const char *option, char *buffer, int bufsize);
 int cmdline_find_option_bool(const char *option);
@@ -279,6 +324,9 @@ int sprintf(char *buf, const char *fmt, ...);
 int vsprintf(char *buf, const char *fmt, va_list args);
 int printf(const char *fmt, ...);
 
+/* regs.c */
+void initregs(struct biosregs *regs);
+
 /* string.c */
 int strcmp(const char *str1, const char *str2);
 size_t strnlen(const char *s, size_t maxlen);
index 63eff3b..4a46fab 100644 (file)
@@ -1,3 +1,6 @@
 relocs
 vmlinux.bin.all
 vmlinux.relocs
+vmlinux.lds
+mkpiggy
+piggy.S
index 65551c9..49c8a4c 100644 (file)
@@ -19,7 +19,9 @@ KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
 LDFLAGS := -m elf_$(UTS_MACHINE)
 LDFLAGS_vmlinux := -T
 
-$(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
+hostprogs-y    := mkpiggy
+
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE
        $(call if_changed,ld)
        @:
 
@@ -29,7 +31,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE
 
 
 targets += vmlinux.bin.all vmlinux.relocs relocs
-hostprogs-$(CONFIG_X86_32) += relocs
+hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs
 
 quiet_cmd_relocs = RELOCS  $@
       cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $<
@@ -37,46 +39,22 @@ $(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE
        $(call if_changed,relocs)
 
 vmlinux.bin.all-y := $(obj)/vmlinux.bin
-vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs
-quiet_cmd_relocbin = BUILD   $@
-      cmd_relocbin = cat $(filter-out FORCE,$^) > $@
-$(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE
-       $(call if_changed,relocbin)
-
-ifeq ($(CONFIG_X86_32),y)
+vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs
 
-ifdef CONFIG_RELOCATABLE
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
-       $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE
-       $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE
-       $(call if_changed,lzma)
-else
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,lzma)
-endif
-LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
 
-else
+suffix-$(CONFIG_KERNEL_GZIP)   := gz
+suffix-$(CONFIG_KERNEL_BZIP2)  := bz2
+suffix-$(CONFIG_KERNEL_LZMA)   := lzma
 
-$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
-       $(call if_changed,lzma)
-
-LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
-endif
+quiet_cmd_mkpiggy = MKPIGGY $@
+      cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
 
-suffix_$(CONFIG_KERNEL_GZIP)  = gz
-suffix_$(CONFIG_KERNEL_BZIP2) = bz2
-suffix_$(CONFIG_KERNEL_LZMA)  = lzma
-
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
-       $(call if_changed,ld)
+targets += piggy.S
+$(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE
+       $(call if_changed,mkpiggy)
index 3a8a866..75e4f00 100644 (file)
  * the page directory. [According to comments etc elsewhere on a compressed
  * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC]
  *
- * Page 0 is deliberately kept safe, since System Management Mode code in 
+ * Page 0 is deliberately kept safe, since System Management Mode code in
  * laptops may need to access the BIOS data stored there.  This is also
- * useful for future device drivers that either access the BIOS via VM86 
+ * useful for future device drivers that either access the BIOS via VM86
  * mode.
  */
 
 /*
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
-.text
+       .text
 
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/boot.h>
 #include <asm/asm-offsets.h>
 
-.section ".text.head","ax",@progbits
+       .section ".text.head","ax",@progbits
 ENTRY(startup_32)
        cld
-       /* test KEEP_SEGMENTS flag to see if the bootloader is asking
-        * us to not reload segments */
-       testb $(1<<6), BP_loadflags(%esi)
-       jnz 1f
+       /*
+        * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+        * us to not reload segments
+        */
+       testb   $(1<<6), BP_loadflags(%esi)
+       jnz     1f
 
        cli
-       movl $(__BOOT_DS),%eax
-       movl %eax,%ds
-       movl %eax,%es
-       movl %eax,%fs
-       movl %eax,%gs
-       movl %eax,%ss
+       movl    $__BOOT_DS, %eax
+       movl    %eax, %ds
+       movl    %eax, %es
+       movl    %eax, %fs
+       movl    %eax, %gs
+       movl    %eax, %ss
 1:
 
-/* Calculate the delta between where we were compiled to run
+/*
+ * Calculate the delta between where we were compiled to run
  * at and where we were actually loaded at.  This can only be done
  * with a short local call on x86.  Nothing  else will tell us what
  * address we are running at.  The reserved chunk of the real-mode
  * data at 0x1e4 (defined as a scratch field) are used as the stack
  * for this calculation. Only 4 bytes are needed.
  */
-       leal (0x1e4+4)(%esi), %esp
-       call 1f
-1:     popl %ebp
-       subl $1b, %ebp
+       leal    (BP_scratch+4)(%esi), %esp
+       call    1f
+1:     popl    %ebp
+       subl    $1b, %ebp
 
-/* %ebp contains the address we are loaded at by the boot loader and %ebx
+/*
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
  * contains the address where we should move the kernel image temporarily
  * for safe in-place decompression.
  */
 
 #ifdef CONFIG_RELOCATABLE
-       movl    %ebp, %ebx
-       addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebx
-       andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx
+       movl    %ebp, %ebx
+       movl    BP_kernel_alignment(%esi), %eax
+       decl    %eax
+       addl    %eax, %ebx
+       notl    %eax
+       andl    %eax, %ebx
 #else
-       movl $LOAD_PHYSICAL_ADDR, %ebx
+       movl    $LOAD_PHYSICAL_ADDR, %ebx
 #endif
 
-       /* Replace the compressed data size with the uncompressed size */
-       subl input_len(%ebp), %ebx
-       movl output_len(%ebp), %eax
-       addl %eax, %ebx
-       /* Add 8 bytes for every 32K input block */
-       shrl $12, %eax
-       addl %eax, %ebx
-       /* Add 32K + 18 bytes of extra slack */
-       addl $(32768 + 18), %ebx
-       /* Align on a 4K boundary */
-       addl $4095, %ebx
-       andl $~4095, %ebx
-
-/* Copy the compressed kernel to the end of our buffer
+       /* Target address to relocate to for decompression */
+       addl    $z_extract_offset, %ebx
+
+       /* Set up the stack */
+       leal    boot_stack_end(%ebx), %esp
+
+       /* Zero EFLAGS */
+       pushl   $0
+       popfl
+
+/*
+ * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
-       pushl %esi
-       leal _end(%ebp), %esi
-       leal _end(%ebx), %edi
-       movl $(_end - startup_32), %ecx
+       pushl   %esi
+       leal    (_bss-4)(%ebp), %esi
+       leal    (_bss-4)(%ebx), %edi
+       movl    $(_bss - startup_32), %ecx
+       shrl    $2, %ecx
        std
-       rep
-       movsb
+       rep     movsl
        cld
-       popl %esi
-
-/* Compute the kernel start address.
- */
-#ifdef CONFIG_RELOCATABLE
-       addl    $(CONFIG_PHYSICAL_ALIGN - 1), %ebp
-       andl    $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp
-#else
-       movl    $LOAD_PHYSICAL_ADDR, %ebp
-#endif
+       popl    %esi
 
 /*
  * Jump to the relocated address.
  */
-       leal relocated(%ebx), %eax
-       jmp *%eax
+       leal    relocated(%ebx), %eax
+       jmp     *%eax
 ENDPROC(startup_32)
 
-.section ".text"
+       .text
 relocated:
 
 /*
- * Clear BSS
- */
-       xorl %eax,%eax
-       leal _edata(%ebx),%edi
-       leal _end(%ebx), %ecx
-       subl %edi,%ecx
-       cld
-       rep
-       stosb
-
-/*
- * Setup the stack for the decompressor
+ * Clear BSS (stack is currently empty)
  */
-       leal boot_stack_end(%ebx), %esp
+       xorl    %eax, %eax
+       leal    _bss(%ebx), %edi
+       leal    _ebss(%ebx), %ecx
+       subl    %edi, %ecx
+       shrl    $2, %ecx
+       rep     stosl
 
 /*
  * Do the decompression, and jump to the new kernel..
  */
-       movl output_len(%ebx), %eax
-       pushl %eax
-                       # push arguments for decompress_kernel:
-       pushl %ebp      # output address
-       movl input_len(%ebx), %eax
-       pushl %eax      # input_len
-       leal input_data(%ebx), %eax
-       pushl %eax      # input_data
-       leal boot_heap(%ebx), %eax
-       pushl %eax      # heap area
-       pushl %esi      # real mode pointer
-       call decompress_kernel
-       addl $20, %esp
-       popl %ecx
+       leal    z_extract_offset_negative(%ebx), %ebp
+                               /* push arguments for decompress_kernel: */
+       pushl   %ebp            /* output address */
+       pushl   $z_input_len    /* input_len */
+       leal    input_data(%ebx), %eax
+       pushl   %eax            /* input_data */
+       leal    boot_heap(%ebx), %eax
+       pushl   %eax            /* heap area */
+       pushl   %esi            /* real mode pointer */
+       call    decompress_kernel
+       addl    $20, %esp
 
 #if CONFIG_RELOCATABLE
-/* Find the address of the relocations.
+/*
+ * Find the address of the relocations.
  */
-       movl %ebp, %edi
-       addl %ecx, %edi
+       leal    z_output_len(%ebp), %edi
 
-/* Calculate the delta between where vmlinux was compiled to run
+/*
+ * Calculate the delta between where vmlinux was compiled to run
  * and where it was actually loaded.
  */
-       movl %ebp, %ebx
-       subl $LOAD_PHYSICAL_ADDR, %ebx
-       jz   2f         /* Nothing to be done if loaded at compiled addr. */
+       movl    %ebp, %ebx
+       subl    $LOAD_PHYSICAL_ADDR, %ebx
+       jz      2f      /* Nothing to be done if loaded at compiled addr. */
 /*
  * Process relocations.
  */
 
-1:     subl $4, %edi
-       movl 0(%edi), %ecx
-       testl %ecx, %ecx
-       jz 2f
-       addl %ebx, -__PAGE_OFFSET(%ebx, %ecx)
-       jmp 1b
+1:     subl    $4, %edi
+       movl    (%edi), %ecx
+       testl   %ecx, %ecx
+       jz      2f
+       addl    %ebx, -__PAGE_OFFSET(%ebx, %ecx)
+       jmp     1b
 2:
 #endif
 
 /*
  * Jump to the decompressed kernel.
  */
-       xorl %ebx,%ebx
-       jmp *%ebp
+       xorl    %ebx, %ebx
+       jmp     *%ebp
 
-.bss
-/* Stack and heap for uncompression */
-.balign 4
+/*
+ * Stack and heap for uncompression
+ */
+       .bss
+       .balign 4
 boot_heap:
        .fill BOOT_HEAP_SIZE, 1, 0
 boot_stack:
index ed4a829..f62c284 100644 (file)
@@ -21,8 +21,8 @@
 /*
  * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996
  */
-.code32
-.text
+       .code32
+       .text
 
 #include <linux/linkage.h>
 #include <asm/segment.h>
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
 
-.section ".text.head"
+       .section ".text.head"
        .code32
 ENTRY(startup_32)
        cld
-       /* test KEEP_SEGMENTS flag to see if the bootloader is asking
-        * us to not reload segments */
+       /*
+        * Test KEEP_SEGMENTS flag to see if the bootloader is asking
+        * us to not reload segments
+        */
        testb $(1<<6), BP_loadflags(%esi)
        jnz 1f
 
@@ -49,14 +51,15 @@ ENTRY(startup_32)
        movl    %eax, %ss
 1:
 
-/* Calculate the delta between where we were compiled to run
+/*
+ * Calculate the delta between where we were compiled to run
  * at and where we were actually loaded at.  This can only be done
  * with a short local call on x86.  Nothing  else will tell us what
  * address we are running at.  The reserved chunk of the real-mode
  * data at 0x1e4 (defined as a scratch field) are used as the stack
  * for this calculation. Only 4 bytes are needed.
  */
-       leal    (0x1e4+4)(%esi), %esp
+       leal    (BP_scratch+4)(%esi), %esp
        call    1f
 1:     popl    %ebp
        subl    $1b, %ebp
@@ -70,32 +73,28 @@ ENTRY(startup_32)
        testl   %eax, %eax
        jnz     no_longmode
 
-/* Compute the delta between where we were compiled to run at
+/*
+ * Compute the delta between where we were compiled to run at
  * and where the code will actually run at.
- */
-/* %ebp contains the address we are loaded at by the boot loader and %ebx
+ *
+ * %ebp contains the address we are loaded at by the boot loader and %ebx
  * contains the address where we should move the kernel image temporarily
  * for safe in-place decompression.
  */
 
 #ifdef CONFIG_RELOCATABLE
        movl    %ebp, %ebx
-       addl    $(PMD_PAGE_SIZE -1), %ebx
-       andl    $PMD_PAGE_MASK, %ebx
+       movl    BP_kernel_alignment(%esi), %eax
+       decl    %eax
+       addl    %eax, %ebx
+       notl    %eax
+       andl    %eax, %ebx
 #else
-       movl    $CONFIG_PHYSICAL_START, %ebx
+       movl    $LOAD_PHYSICAL_ADDR, %ebx
 #endif
 
-       /* Replace the compressed data size with the uncompressed size */
-       subl    input_len(%ebp), %ebx
-       movl    output_len(%ebp), %eax
-       addl    %eax, %ebx
-       /* Add 8 bytes for every 32K input block */
-       shrl    $12, %eax
-       addl    %eax, %ebx
-       /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
-       addl    $(32768 + 18 + 4095), %ebx
-       andl    $~4095, %ebx
+       /* Target address to relocate to for decompression */
+       addl    $z_extract_offset, %ebx
 
 /*
  * Prepare for entering 64 bit mode
@@ -114,7 +113,7 @@ ENTRY(startup_32)
  /*
   * Build early 4G boot pagetable
   */
-       /* Initialize Page tables to 0*/
+       /* Initialize Page tables to 0 */
        leal    pgtable(%ebx), %edi
        xorl    %eax, %eax
        movl    $((4096*6)/4), %ecx
@@ -155,7 +154,8 @@ ENTRY(startup_32)
        btsl    $_EFER_LME, %eax
        wrmsr
 
-       /* Setup for the jump to 64bit mode
+       /*
+        * Setup for the jump to 64bit mode
         *
         * When the jump is performend we will be in long mode but
         * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1
@@ -184,7 +184,8 @@ no_longmode:
 
 #include "../../kernel/verify_cpu_64.S"
 
-       /* Be careful here startup_64 needs to be at a predictable
+       /*
+        * Be careful here startup_64 needs to be at a predictable
         * address so I can export it in an ELF header.  Bootloaders
         * should look at the ELF header to find this address, as
         * it may change in the future.
@@ -192,7 +193,8 @@ no_longmode:
        .code64
        .org 0x200
 ENTRY(startup_64)
-       /* We come here either from startup_32 or directly from a
+       /*
+        * We come here either from startup_32 or directly from a
         * 64bit bootloader.  If we come here from a bootloader we depend on
         * an identity mapped page table being provied that maps our
         * entire text+data+bss and hopefully all of memory.
@@ -209,50 +211,54 @@ ENTRY(startup_64)
        movl    $0x20, %eax
        ltr     %ax
 
-       /* Compute the decompressed kernel start address.  It is where
+       /*
+        * Compute the decompressed kernel start address.  It is where
         * we were loaded at aligned to a 2M boundary. %rbp contains the
         * decompressed kernel start address.
         *
         * If it is a relocatable kernel then decompress and run the kernel
         * from load address aligned to 2MB addr, otherwise decompress and
-        * run the kernel from CONFIG_PHYSICAL_START
+        * run the kernel from LOAD_PHYSICAL_ADDR
+        *
+        * We cannot rely on the calculation done in 32-bit mode, since we
+        * may have been invoked via the 64-bit entry point.
         */
 
        /* Start with the delta to where the kernel will run at. */
 #ifdef CONFIG_RELOCATABLE
        leaq    startup_32(%rip) /* - $startup_32 */, %rbp
-       addq    $(PMD_PAGE_SIZE - 1), %rbp
-       andq    $PMD_PAGE_MASK, %rbp
-       movq    %rbp, %rbx
+       movl    BP_kernel_alignment(%rsi), %eax
+       decl    %eax
+       addq    %rax, %rbp
+       notq    %rax
+       andq    %rax, %rbp
 #else
-       movq    $CONFIG_PHYSICAL_START, %rbp
-       movq    %rbp, %rbx
+       movq    $LOAD_PHYSICAL_ADDR, %rbp
 #endif
 
-       /* Replace the compressed data size with the uncompressed size */
-       movl    input_len(%rip), %eax
-       subq    %rax, %rbx
-       movl    output_len(%rip), %eax
-       addq    %rax, %rbx
-       /* Add 8 bytes for every 32K input block */
-       shrq    $12, %rax
-       addq    %rax, %rbx
-       /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */
-       addq    $(32768 + 18 + 4095), %rbx
-       andq    $~4095, %rbx
-
-/* Copy the compressed kernel to the end of our buffer
+       /* Target address to relocate to for decompression */
+       leaq    z_extract_offset(%rbp), %rbx
+
+       /* Set up the stack */
+       leaq    boot_stack_end(%rbx), %rsp
+
+       /* Zero EFLAGS */
+       pushq   $0
+       popfq
+
+/*
+ * Copy the compressed kernel to the end of our buffer
  * where decompression in place becomes safe.
  */
-       leaq    _end_before_pgt(%rip), %r8
-       leaq    _end_before_pgt(%rbx), %r9
-       movq    $_end_before_pgt /* - $startup_32 */, %rcx
-1:     subq    $8, %r8
-       subq    $8, %r9
-       movq    0(%r8), %rax
-       movq    %rax, 0(%r9)
-       subq    $8, %rcx
-       jnz     1b
+       pushq   %rsi
+       leaq    (_bss-8)(%rip), %rsi
+       leaq    (_bss-8)(%rbx), %rdi
+       movq    $_bss /* - $startup_32 */, %rcx
+       shrq    $3, %rcx
+       std
+       rep     movsq
+       cld
+       popq    %rsi
 
 /*
  * Jump to the relocated address.
@@ -260,37 +266,28 @@ ENTRY(startup_64)
        leaq    relocated(%rbx), %rax
        jmp     *%rax
 
-.section ".text"
+       .text
 relocated:
 
 /*
- * Clear BSS
+ * Clear BSS (stack is currently empty)
  */
-       xorq    %rax, %rax
-       leaq    _edata(%rbx), %rdi
-       leaq    _end_before_pgt(%rbx), %rcx
+       xorl    %eax, %eax
+       leaq    _bss(%rip), %rdi
+       leaq    _ebss(%rip), %rcx
        subq    %rdi, %rcx
-       cld
-       rep
-       stosb
-
-       /* Setup the stack */
-       leaq    boot_stack_end(%rip), %rsp
-
-       /* zero EFLAGS after setting rsp */
-       pushq   $0
-       popfq
+       shrq    $3, %rcx
+       rep     stosq
 
 /*
  * Do the decompression, and jump to the new kernel..
  */
-       pushq   %rsi                    # Save the real mode argument
-       movq    %rsi, %rdi              # real mode address
-       leaq    boot_heap(%rip), %rsi   # malloc area for uncompression
-       leaq    input_data(%rip), %rdx  # input_data
-       movl    input_len(%rip), %eax
-       movq    %rax, %rcx              # input_len
-       movq    %rbp, %r8               # output
+       pushq   %rsi                    /* Save the real mode argument */
+       movq    %rsi, %rdi              /* real mode address */
+       leaq    boot_heap(%rip), %rsi   /* malloc area for uncompression */
+       leaq    input_data(%rip), %rdx  /* input_data */
+       movl    $z_input_len, %ecx      /* input_len */
+       movq    %rbp, %r8               /* output target address */
        call    decompress_kernel
        popq    %rsi
 
@@ -311,11 +308,21 @@ gdt:
        .quad   0x0000000000000000      /* TS continued */
 gdt_end:
 
-.bss
-/* Stack and heap for uncompression */
-.balign 4
+/*
+ * Stack and heap for uncompression
+ */
+       .bss
+       .balign 4
 boot_heap:
        .fill BOOT_HEAP_SIZE, 1, 0
 boot_stack:
        .fill BOOT_STACK_SIZE, 1, 0
 boot_stack_end:
+
+/*
+ * Space for page tables (not in .bss so not zeroed)
+ */
+       .section ".pgtable","a",@nobits
+       .balign 4096
+pgtable:
+       .fill 6*4096, 1, 0
index e45be73..842b2a3 100644 (file)
@@ -325,20 +325,18 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
        free_mem_ptr     = heap;        /* Heap */
        free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
 
+       if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
+               error("Destination address inappropriately aligned");
 #ifdef CONFIG_X86_64
-       if ((unsigned long)output & (__KERNEL_ALIGN - 1))
-               error("Destination address not 2M aligned");
-       if ((unsigned long)output >= 0xffffffffffUL)
+       if (heap > 0x3fffffffffffUL)
                error("Destination address too large");
 #else
-       if ((u32)output & (CONFIG_PHYSICAL_ALIGN - 1))
-               error("Destination address not CONFIG_PHYSICAL_ALIGN aligned");
        if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff))
                error("Destination address too large");
+#endif
 #ifndef CONFIG_RELOCATABLE
-       if ((u32)output != LOAD_PHYSICAL_ADDR)
+       if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
                error("Wrong destination address");
-#endif
 #endif
 
        if (!quiet)
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c
new file mode 100644 (file)
index 0000000..bcbd36c
--- /dev/null
@@ -0,0 +1,97 @@
+/* ----------------------------------------------------------------------- *
+ *
+ *  Copyright (C) 2009 Intel Corporation. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License version
+ *  2 as published by the Free Software Foundation.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ *  02110-1301, USA.
+ *
+ *  H. Peter Anvin <hpa@linux.intel.com>
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Compute the desired load offset from a compressed program; outputs
+ * a small assembly wrapper with the appropriate symbols defined.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+static uint32_t getle32(const void *p)
+{
+       const uint8_t *cp = p;
+
+       return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) +
+               ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24);
+}
+
+int main(int argc, char *argv[])
+{
+       uint32_t olen;
+       long ilen;
+       unsigned long offs;
+       FILE *f;
+
+       if (argc < 2) {
+               fprintf(stderr, "Usage: %s compressed_file\n", argv[0]);
+               return 1;
+       }
+
+       /* Get the information for the compressed kernel image first */
+
+       f = fopen(argv[1], "r");
+       if (!f) {
+               perror(argv[1]);
+               return 1;
+       }
+
+
+       if (fseek(f, -4L, SEEK_END)) {
+               perror(argv[1]);
+       }
+       fread(&olen, sizeof olen, 1, f);
+       ilen = ftell(f);
+       olen = getle32(&olen);
+       fclose(f);
+
+       /*
+        * Now we have the input (compressed) and output (uncompressed)
+        * sizes, compute the necessary decompression offset...
+        */
+
+       offs = (olen > ilen) ? olen - ilen : 0;
+       offs += olen >> 12;     /* Add 8 bytes for each 32K block */
+       offs += 32*1024 + 18;   /* Add 32K + 18 bytes slack */
+       offs = (offs+4095) & ~4095; /* Round to a 4K boundary */
+
+       printf(".section \".rodata.compressed\",\"a\",@progbits\n");
+       printf(".globl z_input_len\n");
+       printf("z_input_len = %lu\n", ilen);
+       printf(".globl z_output_len\n");
+       printf("z_output_len = %lu\n", (unsigned long)olen);
+       printf(".globl z_extract_offset\n");
+       printf("z_extract_offset = 0x%lx\n", offs);
+       /* z_extract_offset_negative allows simplification of head_32.S */
+       printf(".globl z_extract_offset_negative\n");
+       printf("z_extract_offset_negative = -0x%lx\n", offs);
+
+       printf(".globl input_data, input_data_end\n");
+       printf("input_data:\n");
+       printf(".incbin \"%s\"\n", argv[1]);
+       printf("input_data_end:\n");
+
+       return 0;
+}
similarity index 57%
rename from arch/x86/boot/compressed/vmlinux_64.lds
rename to arch/x86/boot/compressed/vmlinux.lds.S
index bef1ac8..cc353e1 100644 (file)
@@ -1,6 +1,17 @@
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+
+#undef i386
+
+#include <asm/page_types.h>
+
+#ifdef CONFIG_X86_64
 OUTPUT_ARCH(i386:x86-64)
 ENTRY(startup_64)
+#else
+OUTPUT_ARCH(i386)
+ENTRY(startup_32)
+#endif
+
 SECTIONS
 {
        /* Be careful parts of head_64.S assume startup_32 is at
@@ -33,16 +44,22 @@ SECTIONS
                *(.data.*)
                _edata = . ;
        }
+       . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
        .bss : {
                _bss = . ;
                *(.bss)
                *(.bss.*)
                *(COMMON)
-               . = ALIGN(8);
-               _end_before_pgt = . ;
-               . = ALIGN(4096);
-               pgtable = . ;
-               . = . + 4096 * 6;
+               . = ALIGN(8);   /* For convenience during zeroing */
                _ebss = .;
        }
+#ifdef CONFIG_X86_64
+       . = ALIGN(PAGE_SIZE);
+       .pgtable : {
+               _pgtable = . ;
+               *(.pgtable)
+               _epgtable = . ;
+       }
+#endif
+       _end = .;
 }
diff --git a/arch/x86/boot/compressed/vmlinux.scr b/arch/x86/boot/compressed/vmlinux.scr
deleted file mode 100644 (file)
index f02382a..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-SECTIONS
-{
-  .rodata.compressed : {
-       input_len = .;
-       LONG(input_data_end - input_data) input_data = .;
-       *(.data)
-       output_len = . - 4;
-       input_data_end = .;
-       }
-}
diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds
deleted file mode 100644 (file)
index bb3c483..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(startup_32)
-SECTIONS
-{
-       /* Be careful parts of head_32.S assume startup_32 is at
-        * address 0.
-        */
-       . = 0;
-       .text.head : {
-               _head = . ;
-               *(.text.head)
-               _ehead = . ;
-       }
-       .rodata.compressed : {
-               *(.rodata.compressed)
-       }
-       .text : {
-               _text = .;      /* Text */
-               *(.text)
-               *(.text.*)
-               _etext = . ;
-       }
-       .rodata : {
-               _rodata = . ;
-               *(.rodata)       /* read-only data */
-               *(.rodata.*)
-               _erodata = . ;
-       }
-       .data : {
-               _data = . ;
-               *(.data)
-               *(.data.*)
-               _edata = . ;
-       }
-       .bss : {
-               _bss = . ;
-               *(.bss)
-               *(.bss.*)
-               *(COMMON)
-               _end = . ;
-       }
-}
index 1aae8f3..c501a5b 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
  */
 static int read_mbr(u8 devno, void *buf)
 {
-       u16 ax, bx, cx, dx;
+       struct biosregs ireg, oreg;
 
-       ax = 0x0201;            /* Legacy Read, one sector */
-       cx = 0x0001;            /* Sector 0-0-1 */
-       dx = devno;
-       bx = (size_t)buf;
-       asm volatile("pushfl; stc; int $0x13; setc %%al; popfl"
-                    : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx)
-                    : : "esi", "edi", "memory");
+       initregs(&ireg);
+       ireg.ax = 0x0201;               /* Legacy Read, one sector */
+       ireg.cx = 0x0001;               /* Sector 0-0-1 */
+       ireg.dl = devno;
+       ireg.bx = (size_t)buf;
 
-       return -(u8)ax;         /* 0 or -1 */
+       intcall(0x13, &ireg, &oreg);
+
+       return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
 }
 
 static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
@@ -72,56 +73,46 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig)
 
 static int get_edd_info(u8 devno, struct edd_info *ei)
 {
-       u16 ax, bx, cx, dx, di;
+       struct biosregs ireg, oreg;
 
        memset(ei, 0, sizeof *ei);
 
        /* Check Extensions Present */
 
-       ax = 0x4100;
-       bx = EDDMAGIC1;
-       dx = devno;
-       asm("pushfl; stc; int $0x13; setc %%al; popfl"
-           : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx)
-           : : "esi", "edi");
+       initregs(&ireg);
+       ireg.ah = 0x41;
+       ireg.bx = EDDMAGIC1;
+       ireg.dl = devno;
+       intcall(0x13, &ireg, &oreg);
 
-       if ((u8)ax)
+       if (oreg.eflags & X86_EFLAGS_CF)
                return -1;      /* No extended information */
 
-       if (bx != EDDMAGIC2)
+       if (oreg.bx != EDDMAGIC2)
                return -1;
 
        ei->device  = devno;
-       ei->version = ax >> 8;  /* EDD version number */
-       ei->interface_support = cx; /* EDD functionality subsets */
+       ei->version = oreg.ah;           /* EDD version number */
+       ei->interface_support = oreg.cx; /* EDD functionality subsets */
 
        /* Extended Get Device Parameters */
 
        ei->params.length = sizeof(ei->params);
-       ax = 0x4800;
-       dx = devno;
-       asm("pushfl; int $0x13; popfl"
-           : "+a" (ax), "+d" (dx), "=m" (ei->params)
-           : "S" (&ei->params)
-           : "ebx", "ecx", "edi");
+       ireg.ah = 0x48;
+       ireg.si = (size_t)&ei->params;
+       intcall(0x13, &ireg, &oreg);
 
        /* Get legacy CHS parameters */
 
        /* Ralf Brown recommends setting ES:DI to 0:0 */
-       ax = 0x0800;
-       dx = devno;
-       di = 0;
-       asm("pushw %%es; "
-           "movw %%di,%%es; "
-           "pushfl; stc; int $0x13; setc %%al; popfl; "
-           "popw %%es"
-           : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di)
-           : : "esi");
-
-       if ((u8)ax == 0) {
-               ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2);
-               ei->legacy_max_head = dx >> 8;
-               ei->legacy_sectors_per_track = cx & 0x3f;
+       ireg.ah = 0x08;
+       ireg.es = 0;
+       intcall(0x13, &ireg, &oreg);
+
+       if (!(oreg.eflags & X86_EFLAGS_CF)) {
+               ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2);
+               ei->legacy_max_head = oreg.dh;
+               ei->legacy_sectors_per_track = oreg.cl & 0x3f;
        }
 
        return 0;
index 5d84d1c..b31cc54 100644 (file)
@@ -22,7 +22,8 @@
 #include <asm/page_types.h>
 #include <asm/setup.h>
 #include "boot.h"
-#include "offsets.h"
+#include "voffset.h"
+#include "zoffset.h"
 
 BOOTSEG                = 0x07C0                /* original address of boot-sector */
 SYSSEG         = 0x1000                /* historical load address >> 4 */
@@ -115,7 +116,7 @@ _start:
        # Part 2 of the header, from the old setup.S
 
                .ascii  "HdrS"          # header signature
-               .word   0x0209          # header version number (>= 0x0105)
+               .word   0x020a          # header version number (>= 0x0105)
                                        # or else old loadlin-1.5 will fail)
                .globl realmode_swtch
 realmode_swtch:        .word   0, 0            # default_switch, SETUPSEG
@@ -168,7 +169,11 @@ heap_end_ptr:      .word   _end+STACK_SIZE-512
                                        # end of setup code can be used by setup
                                        # for local heap purposes.
 
-pad1:          .word   0
+ext_loader_ver:
+               .byte   0               # Extended boot loader version
+ext_loader_type:
+               .byte   0               # Extended boot loader type
+
 cmd_line_ptr:  .long   0               # (Header version 0x0202 or later)
                                        # If nonzero, a 32-bit pointer
                                        # to the kernel command line.
@@ -200,7 +205,7 @@ relocatable_kernel:    .byte 1
 #else
 relocatable_kernel:    .byte 0
 #endif
-pad2:                  .byte 0
+min_alignment:         .byte MIN_KERNEL_ALIGN_LG2      # minimum alignment
 pad3:                  .word 0
 
 cmdline_size:   .long   COMMAND_LINE_SIZE-1     #length of the command line,
@@ -212,16 +217,27 @@ hardware_subarch: .long 0                 # subarchitecture, added with 2.07
 
 hardware_subarch_data: .quad 0
 
-payload_offset:                .long input_data
-payload_length:                .long input_data_end-input_data
+payload_offset:                .long ZO_input_data
+payload_length:                .long ZO_z_input_len
 
 setup_data:            .quad 0                 # 64-bit physical pointer to
                                                # single linked list of
                                                # struct setup_data
 
+pref_address:          .quad LOAD_PHYSICAL_ADDR        # preferred load addr
+
+#define ZO_INIT_SIZE   (ZO__end - ZO_startup_32 + ZO_z_extract_offset)
+#define VO_INIT_SIZE   (VO__end - VO__text)
+#if ZO_INIT_SIZE > VO_INIT_SIZE
+#define INIT_SIZE ZO_INIT_SIZE
+#else
+#define INIT_SIZE VO_INIT_SIZE
+#endif
+init_size:             .long INIT_SIZE         # kernel initialization size
+
 # End of setup header #####################################################
 
-       .section ".inittext", "ax"
+       .section ".entrytext", "ax"
 start_of_setup:
 #ifdef SAFE_RESET_DISK_CONTROLLER
 # Reset the disk controller.
index 58f0415..140172b 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -61,11 +62,10 @@ static void copy_boot_params(void)
  */
 static void keyboard_set_repeat(void)
 {
-       u16 ax = 0x0305;
-       u16 bx = 0;
-       asm volatile("int $0x16"
-                    : "+a" (ax), "+b" (bx)
-                    : : "ecx", "edx", "esi", "edi");
+       struct biosregs ireg;
+       initregs(&ireg);
+       ireg.ax = 0x0305;
+       intcall(0x16, &ireg, NULL);
 }
 
 /*
@@ -73,18 +73,22 @@ static void keyboard_set_repeat(void)
  */
 static void query_ist(void)
 {
+       struct biosregs ireg, oreg;
+
        /* Some older BIOSes apparently crash on this call, so filter
           it from machines too old to have SpeedStep at all. */
        if (cpu.level < 6)
                return;
 
-       asm("int $0x15"
-           : "=a" (boot_params.ist_info.signature),
-             "=b" (boot_params.ist_info.command),
-             "=c" (boot_params.ist_info.event),
-             "=d" (boot_params.ist_info.perf_level)
-           : "a" (0x0000e980),  /* IST Support */
-             "d" (0x47534943)); /* Request value */
+       initregs(&ireg);
+       ireg.ax  = 0xe980;       /* IST Support */
+       ireg.edx = 0x47534943;   /* Request value */
+       intcall(0x15, &ireg, &oreg);
+
+       boot_params.ist_info.signature  = oreg.eax;
+       boot_params.ist_info.command    = oreg.ebx;
+       boot_params.ist_info.event      = oreg.ecx;
+       boot_params.ist_info.perf_level = oreg.edx;
 }
 
 /*
@@ -93,13 +97,12 @@ static void query_ist(void)
 static void set_bios_mode(void)
 {
 #ifdef CONFIG_X86_64
-       u32 eax, ebx;
+       struct biosregs ireg;
 
-       eax = 0xec00;
-       ebx = 2;
-       asm volatile("int $0x15"
-                    : "+a" (eax), "+b" (ebx)
-                    : : "ecx", "edx", "esi", "edi");
+       initregs(&ireg);
+       ireg.ax = 0xec00;
+       ireg.bx = 2;
+       intcall(0x15, &ireg, NULL);
 #endif
 }
 
index 911eaae..a95a531 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
 
 int query_mca(void)
 {
-       u8 err;
-       u16 es, bx, len;
-
-       asm("pushw %%es ; "
-           "int $0x15 ; "
-           "setc %0 ; "
-           "movw %%es, %1 ; "
-           "popw %%es"
-           : "=acd" (err), "=acdSD" (es), "=b" (bx)
-           : "a" (0xc000));
-
-       if (err)
+       struct biosregs ireg, oreg;
+       u16 len;
+
+       initregs(&ireg);
+       ireg.ah = 0xc0;
+       intcall(0x15, &ireg, &oreg);
+
+       if (oreg.eflags & X86_EFLAGS_CF)
                return -1;      /* No MCA present */
 
-       set_fs(es);
-       len = rdfs16(bx);
+       set_fs(oreg.es);
+       len = rdfs16(oreg.bx);
 
        if (len > sizeof(boot_params.sys_desc_table))
                len = sizeof(boot_params.sys_desc_table);
 
-       copy_from_fs(&boot_params.sys_desc_table, bx, len);
+       copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len);
        return 0;
 }
index 74b3d2b..cae3feb 100644 (file)
 static int detect_memory_e820(void)
 {
        int count = 0;
-       u32 next = 0;
-       u32 size, id, edi;
-       u8 err;
+       struct biosregs ireg, oreg;
        struct e820entry *desc = boot_params.e820_map;
        static struct e820entry buf; /* static so it is zeroed */
 
+       initregs(&ireg);
+       ireg.ax  = 0xe820;
+       ireg.cx  = sizeof buf;
+       ireg.edx = SMAP;
+       ireg.di  = (size_t)&buf;
+
        /*
         * Note: at least one BIOS is known which assumes that the
         * buffer pointed to by one e820 call is the same one as
@@ -41,22 +45,13 @@ static int detect_memory_e820(void)
         */
 
        do {
-               size = sizeof buf;
-
-               /* Important: %edx and %esi are clobbered by some BIOSes,
-                  so they must be either used for the error output
-                  or explicitly marked clobbered.  Given that, assume there
-                  is something out there clobbering %ebp and %edi, too. */
-               asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0"
-                   : "=d" (err), "+b" (next), "=a" (id), "+c" (size),
-                     "=D" (edi), "+m" (buf)
-                   : "D" (&buf), "d" (SMAP), "a" (0xe820)
-                   : "esi");
+               intcall(0x15, &ireg, &oreg);
+               ireg.ebx = oreg.ebx; /* for next iteration... */
 
                /* BIOSes which terminate the chain with CF = 1 as opposed
                   to %ebx = 0 don't always report the SMAP signature on
                   the final, failing, probe. */
-               if (err)
+               if (oreg.eflags & X86_EFLAGS_CF)
                        break;
 
                /* Some BIOSes stop returning SMAP in the middle of
@@ -64,60 +59,64 @@ static int detect_memory_e820(void)
                   screwed up the map at that point, we might have a
                   partial map, the full map, or complete garbage, so
                   just return failure. */
-               if (id != SMAP) {
+               if (oreg.eax != SMAP) {
                        count = 0;
                        break;
                }
 
                *desc++ = buf;
                count++;
-       } while (next && count < ARRAY_SIZE(boot_params.e820_map));
+       } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));
 
        return boot_params.e820_entries = count;
 }
 
 static int detect_memory_e801(void)
 {
-       u16 ax, bx, cx, dx;
-       u8 err;
+       struct biosregs ireg, oreg;
 
-       bx = cx = dx = 0;
-       ax = 0xe801;
-       asm("stc; int $0x15; setc %0"
-           : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx));
+       initregs(&ireg);
+       ireg.ax = 0xe801;
+       intcall(0x15, &ireg, &oreg);
 
-       if (err)
+       if (oreg.eflags & X86_EFLAGS_CF)
                return -1;
 
        /* Do we really need to do this? */
-       if (cx || dx) {
-               ax = cx;
-               bx = dx;
+       if (oreg.cx || oreg.dx) {
+               oreg.ax = oreg.cx;
+               oreg.bx = oreg.dx;
        }
 
-       if (ax > 15*1024)
+       if (oreg.ax > 15*1024) {
                return -1;      /* Bogus! */
-
-       /* This ignores memory above 16MB if we have a memory hole
-          there.  If someone actually finds a machine with a memory
-          hole at 16MB and no support for 0E820h they should probably
-          generate a fake e820 map. */
-       boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax;
+       } else if (oreg.ax == 15*1024) {
+               boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax;
+       } else {
+               /*
+                * This ignores memory above 16MB if we have a memory
+                * hole there.  If someone actually finds a machine
+                * with a memory hole at 16MB and no support for
+                * 0E820h they should probably generate a fake e820
+                * map.
+                */
+               boot_params.alt_mem_k = oreg.ax;
+       }
 
        return 0;
 }
 
 static int detect_memory_88(void)
 {
-       u16 ax;
-       u8 err;
+       struct biosregs ireg, oreg;
 
-       ax = 0x8800;
-       asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax));
+       initregs(&ireg);
+       ireg.ah = 0x88;
+       intcall(0x15, &ireg, &oreg);
 
-       boot_params.screen_info.ext_mem_k = ax;
+       boot_params.screen_info.ext_mem_k = oreg.ax;
 
-       return -err;
+       return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */
 }
 
 int detect_memory(void)
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c
new file mode 100644 (file)
index 0000000..958019b
--- /dev/null
@@ -0,0 +1,29 @@
+/* -----------------------------------------------------------------------
+ *
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
+ *
+ *   This file is part of the Linux kernel, and is made available under
+ *   the terms of the GNU General Public License version 2 or (at your
+ *   option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * Simple helper function for initializing a register set.
+ *
+ * Note that this sets EFLAGS_CF in the input register set; this
+ * makes it easier to catch functions which do nothing but don't
+ * explicitly set CF.
+ */
+
+#include "boot.h"
+
+void initregs(struct biosregs *reg)
+{
+       memset(reg, 0, sizeof *reg);
+       reg->eflags |= X86_EFLAGS_CF;
+       reg->ds = ds();
+       reg->es = ds();
+       reg->fs = fs();
+       reg->gs = gs();
+}
index bb8dc2d..0f6ec45 100644 (file)
@@ -15,8 +15,11 @@ SECTIONS
 
        . = 497;
        .header         : { *(.header) }
+       .entrytext      : { *(.entrytext) }
        .inittext       : { *(.inittext) }
        .initdata       : { *(.initdata) }
+       __end_init = .;
+
        .text           : { *(.text) }
        .text32         : { *(.text32) }
 
@@ -52,4 +55,7 @@ SECTIONS
 
        . = ASSERT(_end <= 0x8000, "Setup too big!");
        . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!");
+       /* Necessary for the very-old-loader check to work... */
+       . = ASSERT(__end_init <= 5*512, "init sections too big!");
+
 }
index 7e8e8b2..01ec69c 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
 
 void __attribute__((section(".inittext"))) putchar(int ch)
 {
-       unsigned char c = ch;
+       struct biosregs ireg;
 
-       if (c == '\n')
+       if (ch == '\n')
                putchar('\r');  /* \n -> \r\n */
 
-       /* int $0x10 is known to have bugs involving touching registers
-          it shouldn't.  Be extra conservative... */
-       asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal"
-                    : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch));
+       initregs(&ireg);
+       ireg.bx = 0x0007;
+       ireg.cx = 0x0001;
+       ireg.ah = 0x0e;
+       ireg.al = ch;
+       intcall(0x10, &ireg, NULL);
 }
 
 void __attribute__((section(".inittext"))) puts(const char *str)
 {
-       int n = 0;
-       while (*str) {
+       while (*str)
                putchar(*str++);
-               n++;
-       }
 }
 
 /*
@@ -49,14 +49,13 @@ void __attribute__((section(".inittext"))) puts(const char *str)
 
 static u8 gettime(void)
 {
-       u16 ax = 0x0200;
-       u16 cx, dx;
+       struct biosregs ireg, oreg;
 
-       asm volatile("int $0x1a"
-                    : "+a" (ax), "=c" (cx), "=d" (dx)
-                    : : "ebx", "esi", "edi");
+       initregs(&ireg);
+       ireg.ah = 0x02;
+       intcall(0x1a, &ireg, &oreg);
 
-       return dx >> 8;
+       return oreg.dh;
 }
 
 /*
@@ -64,19 +63,24 @@ static u8 gettime(void)
  */
 int getchar(void)
 {
-       u16 ax = 0;
-       asm volatile("int $0x16" : "+a" (ax));
+       struct biosregs ireg, oreg;
+
+       initregs(&ireg);
+       /* ireg.ah = 0x00; */
+       intcall(0x16, &ireg, &oreg);
 
-       return ax & 0xff;
+       return oreg.al;
 }
 
 static int kbd_pending(void)
 {
-       u8 pending;
-       asm volatile("int $0x16; setnz %0"
-                    : "=qm" (pending)
-                    : "a" (0x0100));
-       return pending;
+       struct biosregs ireg, oreg;
+
+       initregs(&ireg);
+       ireg.ah = 0x01;
+       intcall(0x16, &ireg, &oreg);
+
+       return !(oreg.eflags & X86_EFLAGS_ZF);
 }
 
 void kbd_flush(void)
index 3fa979c..d660be4 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -29,21 +30,21 @@ static int bios_set_mode(struct mode_info *mi)
 
 static int set_bios_mode(u8 mode)
 {
-       u16 ax;
+       struct biosregs ireg, oreg;
        u8 new_mode;
 
-       ax = mode;              /* AH=0x00 Set Video Mode */
-       asm volatile(INT10
-                    : "+a" (ax)
-                    : : "ebx", "ecx", "edx", "esi", "edi");
+       initregs(&ireg);
+       ireg.al = mode;         /* AH=0x00 Set Video Mode */
+       intcall(0x10, &ireg, NULL);
 
-       ax = 0x0f00;            /* Get Current Video Mode */
-       asm volatile(INT10
-                    : "+a" (ax)
-                    : : "ebx", "ecx", "edx", "esi", "edi");
+
+       ireg.ah = 0x0f;         /* Get Current Video Mode */
+       intcall(0x10, &ireg, &oreg);
 
        do_restore = 1;         /* Assume video contents were lost */
-       new_mode = ax & 0x7f;   /* Not all BIOSes are clean with the top bit */
+
+       /* Not all BIOSes are clean with the top bit */
+       new_mode = ireg.al & 0x7f;
 
        if (new_mode == mode)
                return 0;       /* Mode change OK */
@@ -53,10 +54,8 @@ static int set_bios_mode(u8 mode)
                /* Mode setting failed, but we didn't end up where we
                   started.  That's bad.  Try to revert to the original
                   video mode. */
-               ax = boot_params.screen_info.orig_video_mode;
-               asm volatile(INT10
-                            : "+a" (ax)
-                            : : "ebx", "ecx", "edx", "esi", "edi");
+               ireg.ax = boot_params.screen_info.orig_video_mode;
+               intcall(0x10, &ireg, NULL);
        }
 #endif
        return -1;
index 4a58c8c..c700147 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -31,7 +32,7 @@ static inline void vesa_store_mode_params_graphics(void) {}
 static int vesa_probe(void)
 {
 #if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID)
-       u16 ax, cx, di;
+       struct biosregs ireg, oreg;
        u16 mode;
        addr_t mode_ptr;
        struct mode_info *mi;
@@ -39,13 +40,12 @@ static int vesa_probe(void)
 
        video_vesa.modes = GET_HEAP(struct mode_info, 0);
 
-       ax = 0x4f00;
-       di = (size_t)&vginfo;
-       asm(INT10
-           : "+a" (ax), "+D" (di), "=m" (vginfo)
-           : : "ebx", "ecx", "edx", "esi");
+       initregs(&ireg);
+       ireg.ax = 0x4f00;
+       ireg.di = (size_t)&vginfo;
+       intcall(0x10, &ireg, &oreg);
 
-       if (ax != 0x004f ||
+       if (ireg.ax != 0x004f ||
            vginfo.signature != VESA_MAGIC ||
            vginfo.version < 0x0102)
                return 0;       /* Not present */
@@ -65,14 +65,12 @@ static int vesa_probe(void)
 
                memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
 
-               ax = 0x4f01;
-               cx = mode;
-               di = (size_t)&vminfo;
-               asm(INT10
-                   : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo)
-                   : : "ebx", "edx", "esi");
+               ireg.ax = 0x4f01;
+               ireg.cx = mode;
+               ireg.di = (size_t)&vminfo;
+               intcall(0x10, &ireg, &oreg);
 
-               if (ax != 0x004f)
+               if (ireg.ax != 0x004f)
                        continue;
 
                if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -111,20 +109,19 @@ static int vesa_probe(void)
 
 static int vesa_set_mode(struct mode_info *mode)
 {
-       u16 ax, bx, cx, di;
+       struct biosregs ireg, oreg;
        int is_graphic;
        u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA;
 
        memset(&vminfo, 0, sizeof vminfo); /* Just in case... */
 
-       ax = 0x4f01;
-       cx = vesa_mode;
-       di = (size_t)&vminfo;
-       asm(INT10
-           : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo)
-           : : "ebx", "edx", "esi");
+       initregs(&ireg);
+       ireg.ax = 0x4f01;
+       ireg.cx = vesa_mode;
+       ireg.di = (size_t)&vminfo;
+       intcall(0x10, &ireg, &oreg);
 
-       if (ax != 0x004f)
+       if (oreg.ax != 0x004f)
                return -1;
 
        if ((vminfo.mode_attr & 0x15) == 0x05) {
@@ -141,14 +138,12 @@ static int vesa_set_mode(struct mode_info *mode)
        }
 
 
-       ax = 0x4f02;
-       bx = vesa_mode;
-       di = 0;
-       asm volatile(INT10
-                    : "+a" (ax), "+b" (bx), "+D" (di)
-                    : : "ecx", "edx", "esi");
+       initregs(&ireg);
+       ireg.ax = 0x4f02;
+       ireg.bx = vesa_mode;
+       intcall(0x10, &ireg, &oreg);
 
-       if (ax != 0x004f)
+       if (oreg.ax != 0x004f)
                return -1;
 
        graphic_mode = is_graphic;
@@ -171,50 +166,45 @@ static int vesa_set_mode(struct mode_info *mode)
 /* Switch DAC to 8-bit mode */
 static void vesa_dac_set_8bits(void)
 {
+       struct biosregs ireg, oreg;
        u8 dac_size = 6;
 
        /* If possible, switch the DAC to 8-bit mode */
        if (vginfo.capabilities & 1) {
-               u16 ax, bx;
-
-               ax = 0x4f08;
-               bx = 0x0800;
-               asm volatile(INT10
-                            : "+a" (ax), "+b" (bx)
-                            : : "ecx", "edx", "esi", "edi");
-
-               if (ax == 0x004f)
-                       dac_size = bx >> 8;
+               initregs(&ireg);
+               ireg.ax = 0x4f08;
+               ireg.bh = 0x08;
+               intcall(0x10, &ireg, &oreg);
+               if (oreg.ax == 0x004f)
+                       dac_size = oreg.bh;
        }
 
        /* Set the color sizes to the DAC size, and offsets to 0 */
-       boot_params.screen_info.red_size = dac_size;
+       boot_params.screen_info.red_size   = dac_size;
        boot_params.screen_info.green_size = dac_size;
-       boot_params.screen_info.blue_size = dac_size;
-       boot_params.screen_info.rsvd_size = dac_size;
+       boot_params.screen_info.blue_size  = dac_size;
+       boot_params.screen_info.rsvd_size  = dac_size;
 
-       boot_params.screen_info.red_pos = 0;
-       boot_params.screen_info.green_pos = 0;
-       boot_params.screen_info.blue_pos = 0;
-       boot_params.screen_info.rsvd_pos = 0;
+       boot_params.screen_info.red_pos    = 0;
+       boot_params.screen_info.green_pos  = 0;
+       boot_params.screen_info.blue_pos   = 0;
+       boot_params.screen_info.rsvd_pos   = 0;
 }
 
 /* Save the VESA protected mode info */
 static void vesa_store_pm_info(void)
 {
-       u16 ax, bx, di, es;
+       struct biosregs ireg, oreg;
 
-       ax = 0x4f0a;
-       bx = di = 0;
-       asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es"
-           : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di)
-           : : "ecx", "esi");
+       initregs(&ireg);
+       ireg.ax = 0x4f0a;
+       intcall(0x10, &ireg, &oreg);
 
-       if (ax != 0x004f)
+       if (oreg.ax != 0x004f)
                return;
 
-       boot_params.screen_info.vesapm_seg = es;
-       boot_params.screen_info.vesapm_off = di;
+       boot_params.screen_info.vesapm_seg = oreg.es;
+       boot_params.screen_info.vesapm_off = oreg.di;
 }
 
 /*
@@ -252,7 +242,7 @@ static void vesa_store_mode_params_graphics(void)
 void vesa_store_edid(void)
 {
 #ifdef CONFIG_FIRMWARE_EDID
-       u16 ax, bx, cx, dx, di;
+       struct biosregs ireg, oreg;
 
        /* Apparently used as a nonsense token... */
        memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info);
@@ -260,33 +250,26 @@ void vesa_store_edid(void)
        if (vginfo.version < 0x0200)
                return;         /* EDID requires VBE 2.0+ */
 
-       ax = 0x4f15;            /* VBE DDC */
-       bx = 0x0000;            /* Report DDC capabilities */
-       cx = 0;                 /* Controller 0 */
-       di = 0;                 /* ES:DI must be 0 by spec */
-
-       /* Note: The VBE DDC spec is different from the main VESA spec;
-          we genuinely have to assume all registers are destroyed here. */
-
-       asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es"
-           : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di)
-           : : "esi", "edx");
+       initregs(&ireg);
+       ireg.ax = 0x4f15;               /* VBE DDC */
+       /* ireg.bx = 0x0000; */         /* Report DDC capabilities */
+       /* ireg.cx = 0; */              /* Controller 0 */
+       ireg.es = 0;                    /* ES:DI must be 0 by spec */
+       intcall(0x10, &ireg, &oreg);
 
-       if (ax != 0x004f)
+       if (oreg.ax != 0x004f)
                return;         /* No EDID */
 
        /* BH = time in seconds to transfer EDD information */
        /* BL = DDC level supported */
 
-       ax = 0x4f15;            /* VBE DDC */
-       bx = 0x0001;            /* Read EDID */
-       cx = 0;                 /* Controller 0 */
-       dx = 0;                 /* EDID block number */
-       di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */
-       asm(INT10
-           : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info),
-             "+c" (cx), "+D" (di)
-           : : "esi");
+       ireg.ax = 0x4f15;               /* VBE DDC */
+       ireg.bx = 0x0001;               /* Read EDID */
+       /* ireg.cx = 0; */              /* Controller 0 */
+       /* ireg.dx = 0; */              /* EDID block number */
+       ireg.es = ds();
+       ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */
+       intcall(0x10, &ireg, &oreg);
 #endif /* CONFIG_FIRMWARE_EDID */
 }
 
index 9e0587a..8f8d827 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
@@ -39,30 +40,30 @@ static __videocard video_vga;
 /* Set basic 80x25 mode */
 static u8 vga_set_basic_mode(void)
 {
+       struct biosregs ireg, oreg;
        u16 ax;
        u8 rows;
        u8 mode;
 
+       initregs(&ireg);
+
 #ifdef CONFIG_VIDEO_400_HACK
        if (adapter >= ADAPTER_VGA) {
-               asm volatile(INT10
-                            : : "a" (0x1202), "b" (0x0030)
-                            : "ecx", "edx", "esi", "edi");
+               ireg.ax = 0x1202;
+               ireg.bx = 0x0030;
+               intcall(0x10, &ireg, NULL);
        }
 #endif
 
        ax = 0x0f00;
-       asm volatile(INT10
-                    : "+a" (ax)
-                    : : "ebx", "ecx", "edx", "esi", "edi");
-
-       mode = (u8)ax;
+       intcall(0x10, &ireg, &oreg);
+       mode = oreg.al;
 
        set_fs(0);
        rows = rdfs8(0x484);    /* rows minus one */
 
 #ifndef CONFIG_VIDEO_400_HACK
-       if ((ax == 0x5003 || ax == 0x5007) &&
+       if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) &&
            (rows == 0 || rows == 24))
                return mode;
 #endif
@@ -71,10 +72,8 @@ static u8 vga_set_basic_mode(void)
                mode = 3;
 
        /* Set the mode */
-       ax = mode;
-       asm volatile(INT10
-                    : "+a" (ax)
-                    : : "ebx", "ecx", "edx", "esi", "edi");
+       ireg.ax = mode;         /* AH=0: set mode */
+       intcall(0x10, &ireg, NULL);
        do_restore = 1;
        return mode;
 }
@@ -82,43 +81,69 @@ static u8 vga_set_basic_mode(void)
 static void vga_set_8font(void)
 {
        /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */
+       struct biosregs ireg;
+
+       initregs(&ireg);
 
        /* Set 8x8 font */
-       asm volatile(INT10 : : "a" (0x1112), "b" (0));
+       ireg.ax = 0x1112;
+       /* ireg.bl = 0; */
+       intcall(0x10, &ireg, NULL);
 
        /* Use alternate print screen */
-       asm volatile(INT10 : : "a" (0x1200), "b" (0x20));
+       ireg.ax = 0x1200;
+       ireg.bl = 0x20;
+       intcall(0x10, &ireg, NULL);
 
        /* Turn off cursor emulation */
-       asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
+       ireg.ax = 0x1201;
+       ireg.bl = 0x34;
+       intcall(0x10, &ireg, NULL);
 
        /* Cursor is scan lines 6-7 */
-       asm volatile(INT10 : : "a" (0x0100), "c" (0x0607));
+       ireg.ax = 0x0100;
+       ireg.cx = 0x0607;
+       intcall(0x10, &ireg, NULL);
 }
 
 static void vga_set_14font(void)
 {
        /* Set 9x14 font - 80x28 on VGA */
+       struct biosregs ireg;
+
+       initregs(&ireg);
 
        /* Set 9x14 font */
-       asm volatile(INT10 : : "a" (0x1111), "b" (0));
+       ireg.ax = 0x1111;
+       /* ireg.bl = 0; */
+       intcall(0x10, &ireg, NULL);
 
        /* Turn off cursor emulation */
-       asm volatile(INT10 : : "a" (0x1201), "b" (0x34));
+       ireg.ax = 0x1201;
+       ireg.bl = 0x34;
+       intcall(0x10, &ireg, NULL);
 
        /* Cursor is scan lines 11-12 */
-       asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c));
+       ireg.ax = 0x0100;
+       ireg.cx = 0x0b0c;
+       intcall(0x10, &ireg, NULL);
 }
 
 static void vga_set_80x43(void)
 {
        /* Set 80x43 mode on VGA (not EGA) */
+       struct biosregs ireg;
+
+       initregs(&ireg);
 
        /* Set 350 scans */
-       asm volatile(INT10 : : "a" (0x1201), "b" (0x30));
+       ireg.ax = 0x1201;
+       ireg.bl = 0x30;
+       intcall(0x10, &ireg, NULL);
 
        /* Reset video mode */
-       asm volatile(INT10 : : "a" (0x0003));
+       ireg.ax = 0x0003;
+       intcall(0x10, &ireg, NULL);
 
        vga_set_8font();
 }
@@ -225,8 +250,6 @@ static int vga_set_mode(struct mode_info *mode)
  */
 static int vga_probe(void)
 {
-       u16 ega_bx;
-
        static const char *card_name[] = {
                "CGA/MDA/HGC", "EGA", "VGA"
        };
@@ -240,26 +263,26 @@ static int vga_probe(void)
                sizeof(ega_modes)/sizeof(struct mode_info),
                sizeof(vga_modes)/sizeof(struct mode_info),
        };
-       u8 vga_flag;
 
-       asm(INT10
-           : "=b" (ega_bx)
-           : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */
-           : "ecx", "edx", "esi", "edi");
+       struct biosregs ireg, oreg;
+
+       initregs(&ireg);
+
+       ireg.ax = 0x1200;
+       ireg.bl = 0x10;         /* Check EGA/VGA */
+       intcall(0x10, &ireg, &oreg);
 
 #ifndef _WAKEUP
-       boot_params.screen_info.orig_video_ega_bx = ega_bx;
+       boot_params.screen_info.orig_video_ega_bx = oreg.bx;
 #endif
 
        /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */
-       if ((u8)ega_bx != 0x10) {
+       if (oreg.bl != 0x10) {
                /* EGA/VGA */
-               asm(INT10
-                   : "=a" (vga_flag)
-                   : "a" (0x1a00)
-                   : "ebx", "ecx", "edx", "esi", "edi");
+               ireg.ax = 0x1a00;
+               intcall(0x10, &ireg, &oreg);
 
-               if (vga_flag == 0x1a) {
+               if (oreg.al == 0x1a) {
                        adapter = ADAPTER_VGA;
 #ifndef _WAKEUP
                        boot_params.screen_info.orig_video_isVGA = 1;
index 3bef2c1..bad728b 100644 (file)
@@ -2,6 +2,7 @@
  *
  *   Copyright (C) 1991, 1992 Linus Torvalds
  *   Copyright 2007 rPath, Inc. - All Rights Reserved
+ *   Copyright 2009 Intel Corporation; author H. Peter Anvin
  *
  *   This file is part of the Linux kernel, and is made available under
  *   the terms of the GNU General Public License version 2.
 
 static void store_cursor_position(void)
 {
-       u16 curpos;
-       u16 ax, bx;
+       struct biosregs ireg, oreg;
 
-       ax = 0x0300;
-       bx = 0;
-       asm(INT10
-           : "=d" (curpos), "+a" (ax), "+b" (bx)
-           : : "ecx", "esi", "edi");
+       initregs(&ireg);
+       ireg.ah = 0x03;
+       intcall(0x10, &ireg, &oreg);
 
-       boot_params.screen_info.orig_x = curpos;
-       boot_params.screen_info.orig_y = curpos >> 8;
+       boot_params.screen_info.orig_x = oreg.dl;
+       boot_params.screen_info.orig_y = oreg.dh;
 }
 
 static void store_video_mode(void)
 {
-       u16 ax, page;
+       struct biosregs ireg, oreg;
 
        /* N.B.: the saving of the video page here is a bit silly,
           since we pretty much assume page 0 everywhere. */
-       ax = 0x0f00;
-       asm(INT10
-           : "+a" (ax), "=b" (page)
-           : : "ecx", "edx", "esi", "edi");
+       initregs(&ireg);
+       ireg.ah = 0x0f;
+       intcall(0x10, &ireg, &oreg);
 
        /* Not all BIOSes are clean with respect to the top bit */
-       boot_params.screen_info.orig_video_mode = ax & 0x7f;
-       boot_params.screen_info.orig_video_page = page >> 8;
+       boot_params.screen_info.orig_video_mode = oreg.al & 0x7f;
+       boot_params.screen_info.orig_video_page = oreg.bh;
 }
 
 /*
@@ -257,7 +254,7 @@ static void restore_screen(void)
        int y;
        addr_t dst = 0;
        u16 *src = saved.data;
-       u16 ax, bx, dx;
+       struct biosregs ireg;
 
        if (graphic_mode)
                return;         /* Can't restore onto a graphic mode */
@@ -296,12 +293,11 @@ static void restore_screen(void)
        }
 
        /* Restore cursor position */
-       ax = 0x0200;            /* Set cursor position */
-       bx = 0;                 /* Page number (<< 8) */
-       dx = (saved.cury << 8)+saved.curx;
-       asm volatile(INT10
-                    : "+a" (ax), "+b" (bx), "+d" (dx)
-                    : : "ecx", "esi", "edi");
+       initregs(&ireg);
+       ireg.ah = 0x02;         /* Set cursor position */
+       ireg.dh = saved.cury;
+       ireg.dl = saved.curx;
+       intcall(0x10, &ireg, NULL);
 }
 #else
 #define save_screen()          ((void)0)
index ee63f5d..5bb174a 100644 (file)
@@ -112,20 +112,6 @@ extern int force_x, force_y;       /* Don't query the BIOS for cols/rows */
 extern int do_restore;         /* Restore screen contents */
 extern int graphic_mode;       /* Graphics mode with linear frame buffer */
 
-/*
- * int $0x10 is notorious for touching registers it shouldn't.
- * gcc doesn't like %ebp being clobbered, so define it as a push/pop
- * sequence here.
- *
- * A number of systems, including the original PC can clobber %bp in
- * certain circumstances, like when scrolling.  There exists at least
- * one Trident video card which could clobber DS under a set of
- * circumstances that we are unlikely to encounter (scrolling when
- * using an extended graphics mode of more than 800x600 pixels), but
- * it's cheap insurance to deal with that here.
- */
-#define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp"
-
 /* Accessing VGA indexed registers */
 static inline u8 in_idx(u16 port, u8 index)
 {
index 235b81d..edb992e 100644 (file)
@@ -1,12 +1,13 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc4
-# Tue Feb 24 15:50:58 2009
+# Linux kernel version: 2.6.30-rc2
+# Mon May 11 16:21:55 2009
 #
 # CONFIG_64BIT is not set
 CONFIG_X86_32=y
 # CONFIG_X86_64 is not set
 CONFIG_X86=y
+CONFIG_OUTPUT_FORMAT="elf32-i386"
 CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CMOS_UPDATE=y
@@ -33,6 +34,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y
 CONFIG_ARCH_HAS_DEFAULT_IDLE=y
 CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
 # CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
 CONFIG_ARCH_HIBERNATION_POSSIBLE=y
 CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -40,15 +42,16 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 # CONFIG_AUDIT_ARCH is not set
 CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_X86_SMP=y
 CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_X86_32_SMP=y
 CONFIG_X86_HT=y
-CONFIG_X86_BIOS_REBOOT=y
 CONFIG_X86_TRAMPOLINE=y
+CONFIG_X86_32_LAZY_GS=y
 CONFIG_KTIME_SCALAR=y
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
 
@@ -60,10 +63,17 @@ CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 # CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 CONFIG_TASKSTATS=y
@@ -113,23 +123,26 @@ CONFIG_PID_NS=y
 CONFIG_NET_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_PCSPKR_PLATFORM=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
@@ -139,6 +152,7 @@ CONFIG_AIO=y
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_PCI_QUIRKS=y
 CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
@@ -154,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 CONFIG_HAVE_GENERIC_DMA_COHERENT=y
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -167,7 +183,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
 # CONFIG_LBD is not set
-CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BLK_DEV_BSG=y
 # CONFIG_BLK_DEV_INTEGRITY is not set
 
@@ -194,12 +209,12 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_SMP=y
 CONFIG_SPARSE_IRQ=y
-CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
+# CONFIG_X86_BIGSMP is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
 # CONFIG_X86_ELAN is not set
-# CONFIG_X86_GENERICARCH is not set
-# CONFIG_X86_VSMP is not set
 # CONFIG_X86_RDC321X is not set
+# CONFIG_X86_32_NON_STANDARD is not set
 CONFIG_SCHED_OMIT_FRAME_POINTER=y
 # CONFIG_PARAVIRT_GUEST is not set
 # CONFIG_MEMTEST is not set
@@ -230,8 +245,10 @@ CONFIG_M686=y
 # CONFIG_GENERIC_CPU is not set
 CONFIG_X86_GENERIC=y
 CONFIG_X86_CPU=y
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
 CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=5
 CONFIG_X86_XADD=y
 # CONFIG_X86_PPRO_FENCE is not set
 CONFIG_X86_WP_WORKS_OK=y
@@ -247,7 +264,7 @@ CONFIG_X86_DEBUGCTLMSR=y
 CONFIG_CPU_SUP_INTEL=y
 CONFIG_CPU_SUP_CYRIX_32=y
 CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR_32=y
+CONFIG_CPU_SUP_CENTAUR=y
 CONFIG_CPU_SUP_TRANSMETA_32=y
 CONFIG_CPU_SUP_UMC_32=y
 CONFIG_X86_DS=y
@@ -279,6 +296,7 @@ CONFIG_MICROCODE_AMD=y
 CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
+# CONFIG_X86_CPU_DEBUG is not set
 # CONFIG_NOHIGHMEM is not set
 CONFIG_HIGHMEM4G=y
 # CONFIG_HIGHMEM64G is not set
@@ -302,6 +320,8 @@ CONFIG_ZONE_DMA_FLAG=1
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_HIGHPTE=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
@@ -312,6 +332,7 @@ CONFIG_MTRR=y
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
 CONFIG_SECCOMP=y
+# CONFIG_CC_STACKPROTECTOR is not set
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_300 is not set
@@ -322,8 +343,9 @@ CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
 # CONFIG_KEXEC_JUMP is not set
 CONFIG_PHYSICAL_START=0x1000000
-# CONFIG_RELOCATABLE is not set
-CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_RELOCATABLE=y
+CONFIG_X86_NEED_RELOCS=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_COMPAT_VDSO is not set
 # CONFIG_CMDLINE_BOOL is not set
@@ -363,7 +385,6 @@ CONFIG_ACPI_THERMAL=y
 CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
 # CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_SYSTEM=y
 CONFIG_X86_PM_TIMER=y
 CONFIG_ACPI_CONTAINER=y
 # CONFIG_ACPI_SBS is not set
@@ -425,6 +446,7 @@ CONFIG_PCI_BIOS=y
 CONFIG_PCI_DIRECT=y
 CONFIG_PCI_MMCONFIG=y
 CONFIG_PCI_DOMAINS=y
+# CONFIG_DMAR is not set
 CONFIG_PCIEPORTBUS=y
 # CONFIG_HOTPLUG_PCI_PCIE is not set
 CONFIG_PCIEAER=y
@@ -435,6 +457,7 @@ CONFIG_PCI_MSI=y
 # CONFIG_PCI_DEBUG is not set
 # CONFIG_PCI_STUB is not set
 CONFIG_HT_IRQ=y
+# CONFIG_PCI_IOV is not set
 CONFIG_ISA_DMA_API=y
 # CONFIG_ISA is not set
 # CONFIG_MCA is not set
@@ -481,7 +504,6 @@ CONFIG_NET=y
 #
 # Networking options
 #
-CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
@@ -639,6 +661,7 @@ CONFIG_LLC=y
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 CONFIG_NET_SCHED=y
 
 #
@@ -696,6 +719,7 @@ CONFIG_NET_SCH_FIFO=y
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_HAMRADIO=y
 
 #
@@ -706,12 +730,10 @@ CONFIG_HAMRADIO=y
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
 CONFIG_FIB_RULES=y
 CONFIG_WIRELESS=y
 CONFIG_CFG80211=y
 # CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_NL80211=y
 CONFIG_WIRELESS_OLD_REGULATORY=y
 CONFIG_WIRELESS_EXT=y
 CONFIG_WIRELESS_EXT_SYSFS=y
@@ -789,6 +811,7 @@ CONFIG_MISC_DEVICES=y
 # CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
 # CONFIG_HP_ILO is not set
+# CONFIG_ISL29003 is not set
 # CONFIG_C2PORT is not set
 
 #
@@ -842,6 +865,7 @@ CONFIG_SCSI_SPI_ATTRS=y
 # CONFIG_SCSI_LOWLEVEL is not set
 # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
 # CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 CONFIG_ATA=y
 # CONFIG_ATA_NONSTANDARD is not set
 CONFIG_ATA_ACPI=y
@@ -940,6 +964,7 @@ CONFIG_DM_ZERO=y
 CONFIG_MACINTOSH_DRIVERS=y
 CONFIG_MAC_EMUMOUSEBTN=y
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_IFB is not set
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
@@ -977,6 +1002,8 @@ CONFIG_MII=y
 CONFIG_NET_VENDOR_3COM=y
 # CONFIG_VORTEX is not set
 # CONFIG_TYPHOON is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
 CONFIG_NET_TULIP=y
 # CONFIG_DE2104X is not set
 # CONFIG_TULIP is not set
@@ -1026,6 +1053,7 @@ CONFIG_E1000=y
 CONFIG_E1000E=y
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -1040,6 +1068,7 @@ CONFIG_BNX2=y
 # CONFIG_QLA3XXX is not set
 # CONFIG_ATL1 is not set
 # CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
 # CONFIG_JME is not set
 CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1078,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -1058,6 +1088,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
+# CONFIG_BE2NET is not set
 CONFIG_TR=y
 # CONFIG_IBMOL is not set
 # CONFIG_IBMLS is not set
@@ -1073,8 +1104,8 @@ CONFIG_WLAN_80211=y
 # CONFIG_LIBERTAS is not set
 # CONFIG_LIBERTAS_THINFIRM is not set
 # CONFIG_AIRO is not set
-# CONFIG_HERMES is not set
 # CONFIG_ATMEL is not set
+# CONFIG_AT76C50X_USB is not set
 # CONFIG_AIRO_CS is not set
 # CONFIG_PCMCIA_WL3501 is not set
 # CONFIG_PRISM54 is not set
@@ -1084,21 +1115,21 @@ CONFIG_WLAN_80211=y
 # CONFIG_RTL8187 is not set
 # CONFIG_ADM8211 is not set
 # CONFIG_MAC80211_HWSIM is not set
+# CONFIG_MWL8K is not set
 # CONFIG_P54_COMMON is not set
 CONFIG_ATH5K=y
 # CONFIG_ATH5K_DEBUG is not set
 # CONFIG_ATH9K is not set
+# CONFIG_AR9170_USB is not set
 # CONFIG_IPW2100 is not set
 # CONFIG_IPW2200 is not set
-# CONFIG_IWLCORE is not set
-# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWLAGN is not set
-# CONFIG_IWL3945 is not set
+# CONFIG_IWLWIFI is not set
 # CONFIG_HOSTAP is not set
 # CONFIG_B43 is not set
 # CONFIG_B43LEGACY is not set
 # CONFIG_ZD1211RW is not set
 # CONFIG_RT2X00 is not set
+# CONFIG_HERMES is not set
 
 #
 # Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1209,6 +1240,8 @@ CONFIG_INPUT_TABLET=y
 # CONFIG_TABLET_USB_KBTAB is not set
 # CONFIG_TABLET_USB_WACOM is not set
 CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -1303,6 +1336,7 @@ CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
 CONFIG_HW_RANDOM_INTEL=y
 CONFIG_HW_RANDOM_AMD=y
 CONFIG_HW_RANDOM_GEODE=y
@@ -1390,7 +1424,6 @@ CONFIG_I2C_I801=y
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
 # CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_MAX6875 is not set
 # CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
@@ -1424,6 +1457,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_ADT7475 is not set
 # CONFIG_SENSORS_K8TEMP is not set
 # CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATK0110 is not set
 # CONFIG_SENSORS_ATXP1 is not set
 # CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_I5K_AMB is not set
@@ -1433,6 +1467,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_FSCHER is not set
 # CONFIG_SENSORS_FSCPOS is not set
 # CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_CORETEMP is not set
@@ -1448,11 +1483,14 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_LM90 is not set
 # CONFIG_SENSORS_LM92 is not set
 # CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
 # CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
 # CONFIG_SENSORS_MAX1619 is not set
 # CONFIG_SENSORS_MAX6650 is not set
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_SIS5595 is not set
 # CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
@@ -1643,7 +1681,6 @@ CONFIG_FB_EFI=y
 # CONFIG_FB_3DFX is not set
 # CONFIG_FB_VOODOO1 is not set
 # CONFIG_FB_VT8623 is not set
-# CONFIG_FB_CYBLA is not set
 # CONFIG_FB_TRIDENT is not set
 # CONFIG_FB_ARK is not set
 # CONFIG_FB_PM3 is not set
@@ -1652,6 +1689,7 @@ CONFIG_FB_EFI=y
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FB_METRONOME is not set
 # CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 # CONFIG_LCD_CLASS_DEVICE is not set
 CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1738,6 +1776,8 @@ CONFIG_SND_PCI=y
 # CONFIG_SND_INDIGO is not set
 # CONFIG_SND_INDIGOIO is not set
 # CONFIG_SND_INDIGODJ is not set
+# CONFIG_SND_INDIGOIOX is not set
+# CONFIG_SND_INDIGODJX is not set
 # CONFIG_SND_EMU10K1 is not set
 # CONFIG_SND_EMU10K1X is not set
 # CONFIG_SND_ENS1370 is not set
@@ -1811,15 +1851,17 @@ CONFIG_USB_HIDDEV=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
 CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
 CONFIG_HID_LOGITECH=y
 CONFIG_LOGITECH_FF=y
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1885,11 +1927,11 @@ CONFIG_USB_PRINTER=y
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1931,7 +1973,6 @@ CONFIG_USB_LIBUSUAL=y
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 # CONFIG_USB_APPLEDISPLAY is not set
@@ -1947,6 +1988,7 @@ CONFIG_USB_LIBUSUAL=y
 #
 # OTG and related infrastructure
 #
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_UWB is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
@@ -1958,8 +2000,10 @@ CONFIG_LEDS_CLASS=y
 #
 # CONFIG_LEDS_ALIX2 is not set
 # CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_LP5521 is not set
 # CONFIG_LEDS_CLEVO_MAIL is not set
 # CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
 
 #
 # LED Triggers
@@ -1969,6 +2013,10 @@ CONFIG_LEDS_TRIGGERS=y
 # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
 # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
 # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
 # CONFIG_ACCESSIBILITY is not set
 # CONFIG_INFINIBAND is not set
 CONFIG_EDAC=y
@@ -2037,6 +2085,7 @@ CONFIG_DMADEVICES=y
 # DMA Devices
 #
 # CONFIG_INTEL_IOATDMA is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 CONFIG_X86_PLATFORM_DEVICES=y
@@ -2071,6 +2120,7 @@ CONFIG_DMIID=y
 #
 # CONFIG_EXT2_FS is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
@@ -2100,6 +2150,11 @@ CONFIG_AUTOFS4_FS=y
 # CONFIG_FUSE_FS is not set
 CONFIG_GENERIC_ACL=y
 
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
 #
 # CD-ROM/DVD Filesystems
 #
@@ -2151,6 +2206,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -2164,7 +2220,6 @@ CONFIG_NFS_ACL_SUPPORT=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
 CONFIG_RPCSEC_GSS_KRB5=y
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -2251,6 +2306,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
@@ -2266,6 +2322,7 @@ CONFIG_TIMER_STATS=y
 # CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
 # CONFIG_DEBUG_KOBJECT is not set
 # CONFIG_DEBUG_HIGHMEM is not set
 CONFIG_DEBUG_BUGVERBOSE=y
@@ -2289,13 +2346,19 @@ CONFIG_FRAME_POINTER=y
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_DEBUG_PAGEALLOC is not set
 CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_HAVE_HW_BRANCH_TRACER=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -2305,13 +2368,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y
 # CONFIG_SYSPROF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_FTRACE_SYSCALLS is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_POWER_TRACER is not set
 # CONFIG_STACK_TRACER is not set
 # CONFIG_HW_BRANCH_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_MMIOTRACE is not set
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -2321,7 +2392,6 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_STACK_USAGE=y
-# CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_DEBUG_PER_CPU_MAPS is not set
 # CONFIG_X86_PTDUMP is not set
 CONFIG_DEBUG_RODATA=y
@@ -2329,7 +2399,7 @@ CONFIG_DEBUG_RODATA=y
 CONFIG_DEBUG_NX_TEST=m
 # CONFIG_4KSTACKS is not set
 CONFIG_DOUBLEFAULT=y
-# CONFIG_MMIOTRACE is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
 CONFIG_IO_DELAY_TYPE_0X80=0
 CONFIG_IO_DELAY_TYPE_0XED=1
 CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2365,6 +2435,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y
 CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
 # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
 # CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+# CONFIG_IMA is not set
 CONFIG_CRYPTO=y
 
 #
@@ -2380,10 +2452,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y
 CONFIG_CRYPTO_HASH=y
 CONFIG_CRYPTO_HASH2=y
 CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 CONFIG_CRYPTO_AUTHENC=y
 # CONFIG_CRYPTO_TEST is not set
@@ -2456,6 +2530,7 @@ CONFIG_CRYPTO_DES=y
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
 
 #
@@ -2467,11 +2542,13 @@ CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_GEODE is not set
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
 CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
 CONFIG_VIRTUALIZATION=y
 # CONFIG_KVM is not set
 # CONFIG_LGUEST is not set
 # CONFIG_VIRTIO_PCI is not set
 # CONFIG_VIRTIO_BALLOON is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
@@ -2489,7 +2566,10 @@ CONFIG_CRC32=y
 # CONFIG_LIBCRC32C is not set
 CONFIG_AUDIT_GENERIC=y
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
index 9fe5d21..cee1dd2 100644 (file)
@@ -1,12 +1,13 @@
 #
 # Automatically generated make config: don't edit
-# Linux kernel version: 2.6.29-rc4
-# Tue Feb 24 15:44:16 2009
+# Linux kernel version: 2.6.30-rc2
+# Mon May 11 16:22:00 2009
 #
 CONFIG_64BIT=y
 # CONFIG_X86_32 is not set
 CONFIG_X86_64=y
 CONFIG_X86=y
+CONFIG_OUTPUT_FORMAT="elf64-x86-64"
 CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
 CONFIG_GENERIC_TIME=y
 CONFIG_GENERIC_CMOS_UPDATE=y
@@ -34,6 +35,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y
 CONFIG_ARCH_HAS_DEFAULT_IDLE=y
 CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
 CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
 CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
 CONFIG_ARCH_HIBERNATION_POSSIBLE=y
 CONFIG_ARCH_SUSPEND_POSSIBLE=y
@@ -41,14 +43,14 @@ CONFIG_ZONE_DMA32=y
 CONFIG_ARCH_POPULATES_NODE_MAP=y
 CONFIG_AUDIT_ARCH=y
 CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y
 CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
 CONFIG_GENERIC_IRQ_PROBE=y
 CONFIG_GENERIC_PENDING_IRQ=y
-CONFIG_X86_SMP=y
 CONFIG_USE_GENERIC_SMP_HELPERS=y
 CONFIG_X86_64_SMP=y
 CONFIG_X86_HT=y
-CONFIG_X86_BIOS_REBOOT=y
 CONFIG_X86_TRAMPOLINE=y
 # CONFIG_KTIME_SCALAR is not set
 CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
@@ -61,10 +63,17 @@ CONFIG_LOCK_KERNEL=y
 CONFIG_INIT_ENV_ARG_LIMIT=32
 CONFIG_LOCALVERSION=""
 # CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_BZIP2 is not set
+# CONFIG_KERNEL_LZMA is not set
 CONFIG_SWAP=y
 CONFIG_SYSVIPC=y
 CONFIG_SYSVIPC_SYSCTL=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_POSIX_MQUEUE_SYSCTL=y
 CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_BSD_PROCESS_ACCT_V3 is not set
 CONFIG_TASKSTATS=y
@@ -114,23 +123,26 @@ CONFIG_PID_NS=y
 CONFIG_NET_NS=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_INITRAMFS_SOURCE=""
+CONFIG_RD_GZIP=y
+CONFIG_RD_BZIP2=y
+CONFIG_RD_LZMA=y
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 CONFIG_SYSCTL=y
+CONFIG_ANON_INODES=y
 # CONFIG_EMBEDDED is not set
 CONFIG_UID16=y
 CONFIG_SYSCTL_SYSCALL=y
 CONFIG_KALLSYMS=y
 CONFIG_KALLSYMS_ALL=y
 CONFIG_KALLSYMS_EXTRA_PASS=y
+# CONFIG_STRIP_ASM_SYMS is not set
 CONFIG_HOTPLUG=y
 CONFIG_PRINTK=y
 CONFIG_BUG=y
 CONFIG_ELF_CORE=y
 CONFIG_PCSPKR_PLATFORM=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_BASE_FULL=y
 CONFIG_FUTEX=y
-CONFIG_ANON_INODES=y
 CONFIG_EPOLL=y
 CONFIG_SIGNALFD=y
 CONFIG_TIMERFD=y
@@ -140,6 +152,7 @@ CONFIG_AIO=y
 CONFIG_VM_EVENT_COUNTERS=y
 CONFIG_PCI_QUIRKS=y
 CONFIG_SLUB_DEBUG=y
+# CONFIG_COMPAT_BRK is not set
 # CONFIG_SLAB is not set
 CONFIG_SLUB=y
 # CONFIG_SLOB is not set
@@ -155,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y
 CONFIG_HAVE_KPROBES=y
 CONFIG_HAVE_KRETPROBES=y
 CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+# CONFIG_SLOW_WORK is not set
 # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
 CONFIG_SLABINFO=y
 CONFIG_RT_MUTEXES=y
@@ -167,7 +182,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 # CONFIG_MODULE_SRCVERSION_ALL is not set
 CONFIG_STOP_MACHINE=y
 CONFIG_BLOCK=y
-CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_BLK_DEV_BSG=y
 # CONFIG_BLK_DEV_INTEGRITY is not set
 CONFIG_BLOCK_COMPAT=y
@@ -195,12 +209,10 @@ CONFIG_HIGH_RES_TIMERS=y
 CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
 CONFIG_SMP=y
 CONFIG_SPARSE_IRQ=y
-# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
-CONFIG_X86_FIND_SMP_CONFIG=y
 CONFIG_X86_MPPARSE=y
-# CONFIG_X86_ELAN is not set
-# CONFIG_X86_GENERICARCH is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
 # CONFIG_X86_VSMP is not set
+# CONFIG_X86_UV is not set
 CONFIG_SCHED_OMIT_FRAME_POINTER=y
 # CONFIG_PARAVIRT_GUEST is not set
 # CONFIG_MEMTEST is not set
@@ -230,10 +242,10 @@ CONFIG_SCHED_OMIT_FRAME_POINTER=y
 # CONFIG_MCORE2 is not set
 CONFIG_GENERIC_CPU=y
 CONFIG_X86_CPU=y
-CONFIG_X86_L1_CACHE_BYTES=128
-CONFIG_X86_INTERNODE_CACHE_BYTES=128
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
 CONFIG_X86_CMPXCHG=y
-CONFIG_X86_L1_CACHE_SHIFT=7
+CONFIG_X86_L1_CACHE_SHIFT=6
 CONFIG_X86_WP_WORKS_OK=y
 CONFIG_X86_TSC=y
 CONFIG_X86_CMPXCHG64=y
@@ -242,7 +254,7 @@ CONFIG_X86_MINIMUM_CPU_FAMILY=64
 CONFIG_X86_DEBUGCTLMSR=y
 CONFIG_CPU_SUP_INTEL=y
 CONFIG_CPU_SUP_AMD=y
-CONFIG_CPU_SUP_CENTAUR_64=y
+CONFIG_CPU_SUP_CENTAUR=y
 CONFIG_X86_DS=y
 CONFIG_X86_PTRACE_BTS=y
 CONFIG_HPET_TIMER=y
@@ -269,6 +281,7 @@ CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
 CONFIG_X86_MCE=y
 CONFIG_X86_MCE_INTEL=y
 CONFIG_X86_MCE_AMD=y
+CONFIG_X86_MCE_THRESHOLD=y
 # CONFIG_I8K is not set
 CONFIG_MICROCODE=y
 CONFIG_MICROCODE_INTEL=y
@@ -276,6 +289,7 @@ CONFIG_MICROCODE_AMD=y
 CONFIG_MICROCODE_OLD_INTERFACE=y
 CONFIG_X86_MSR=y
 CONFIG_X86_CPUID=y
+# CONFIG_X86_CPU_DEBUG is not set
 CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
 CONFIG_DIRECT_GBPAGES=y
 CONFIG_NUMA=y
@@ -309,6 +323,8 @@ CONFIG_ZONE_DMA_FLAG=1
 CONFIG_BOUNCE=y
 CONFIG_VIRT_TO_BUS=y
 CONFIG_UNEVICTABLE_LRU=y
+CONFIG_HAVE_MLOCK=y
+CONFIG_HAVE_MLOCKED_PAGE_BIT=y
 CONFIG_X86_CHECK_BIOS_CORRUPTION=y
 CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
 CONFIG_X86_RESERVE_LOW_64K=y
@@ -317,6 +333,7 @@ CONFIG_MTRR=y
 CONFIG_X86_PAT=y
 CONFIG_EFI=y
 CONFIG_SECCOMP=y
+# CONFIG_CC_STACKPROTECTOR is not set
 # CONFIG_HZ_100 is not set
 # CONFIG_HZ_250 is not set
 # CONFIG_HZ_300 is not set
@@ -325,9 +342,10 @@ CONFIG_HZ=1000
 CONFIG_SCHED_HRTICK=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+# CONFIG_KEXEC_JUMP is not set
 CONFIG_PHYSICAL_START=0x1000000
-# CONFIG_RELOCATABLE is not set
-CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_RELOCATABLE=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
 CONFIG_HOTPLUG_CPU=y
 # CONFIG_COMPAT_VDSO is not set
 # CONFIG_CMDLINE_BOOL is not set
@@ -370,7 +388,6 @@ CONFIG_ACPI_NUMA=y
 CONFIG_ACPI_BLACKLIST_YEAR=0
 # CONFIG_ACPI_DEBUG is not set
 # CONFIG_ACPI_PCI_SLOT is not set
-CONFIG_ACPI_SYSTEM=y
 CONFIG_X86_PM_TIMER=y
 CONFIG_ACPI_CONTAINER=y
 # CONFIG_ACPI_SBS is not set
@@ -436,6 +453,7 @@ CONFIG_PCI_MSI=y
 # CONFIG_PCI_DEBUG is not set
 # CONFIG_PCI_STUB is not set
 CONFIG_HT_IRQ=y
+# CONFIG_PCI_IOV is not set
 CONFIG_ISA_DMA_API=y
 CONFIG_K8_NB=y
 CONFIG_PCCARD=y
@@ -481,7 +499,6 @@ CONFIG_NET=y
 #
 # Networking options
 #
-CONFIG_COMPAT_NET_DEV_OPS=y
 CONFIG_PACKET=y
 CONFIG_PACKET_MMAP=y
 CONFIG_UNIX=y
@@ -639,6 +656,7 @@ CONFIG_LLC=y
 # CONFIG_LAPB is not set
 # CONFIG_ECONET is not set
 # CONFIG_WAN_ROUTER is not set
+# CONFIG_PHONET is not set
 CONFIG_NET_SCHED=y
 
 #
@@ -696,6 +714,7 @@ CONFIG_NET_SCH_FIFO=y
 #
 # CONFIG_NET_PKTGEN is not set
 # CONFIG_NET_TCPPROBE is not set
+# CONFIG_NET_DROP_MONITOR is not set
 CONFIG_HAMRADIO=y
 
 #
@@ -706,12 +725,10 @@ CONFIG_HAMRADIO=y
 # CONFIG_IRDA is not set
 # CONFIG_BT is not set
 # CONFIG_AF_RXRPC is not set
-# CONFIG_PHONET is not set
 CONFIG_FIB_RULES=y
 CONFIG_WIRELESS=y
 CONFIG_CFG80211=y
 # CONFIG_CFG80211_REG_DEBUG is not set
-CONFIG_NL80211=y
 CONFIG_WIRELESS_OLD_REGULATORY=y
 CONFIG_WIRELESS_EXT=y
 CONFIG_WIRELESS_EXT_SYSFS=y
@@ -788,9 +805,8 @@ CONFIG_MISC_DEVICES=y
 # CONFIG_TIFM_CORE is not set
 # CONFIG_ICS932S401 is not set
 # CONFIG_ENCLOSURE_SERVICES is not set
-# CONFIG_SGI_XP is not set
 # CONFIG_HP_ILO is not set
-# CONFIG_SGI_GRU is not set
+# CONFIG_ISL29003 is not set
 # CONFIG_C2PORT is not set
 
 #
@@ -844,6 +860,7 @@ CONFIG_SCSI_SPI_ATTRS=y
 # CONFIG_SCSI_LOWLEVEL is not set
 # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set
 # CONFIG_SCSI_DH is not set
+# CONFIG_SCSI_OSD_INITIATOR is not set
 CONFIG_ATA=y
 # CONFIG_ATA_NONSTANDARD is not set
 CONFIG_ATA_ACPI=y
@@ -940,6 +957,7 @@ CONFIG_DM_ZERO=y
 CONFIG_MACINTOSH_DRIVERS=y
 CONFIG_MAC_EMUMOUSEBTN=y
 CONFIG_NETDEVICES=y
+CONFIG_COMPAT_NET_DEV_OPS=y
 # CONFIG_IFB is not set
 # CONFIG_DUMMY is not set
 # CONFIG_BONDING is not set
@@ -977,6 +995,8 @@ CONFIG_MII=y
 CONFIG_NET_VENDOR_3COM=y
 # CONFIG_VORTEX is not set
 # CONFIG_TYPHOON is not set
+# CONFIG_ETHOC is not set
+# CONFIG_DNET is not set
 CONFIG_NET_TULIP=y
 # CONFIG_DE2104X is not set
 # CONFIG_TULIP is not set
@@ -1026,6 +1046,7 @@ CONFIG_E1000=y
 # CONFIG_E1000E is not set
 # CONFIG_IP1000 is not set
 # CONFIG_IGB is not set
+# CONFIG_IGBVF is not set
 # CONFIG_NS83820 is not set
 # CONFIG_HAMACHI is not set
 # CONFIG_YELLOWFIN is not set
@@ -1040,6 +1061,7 @@ CONFIG_TIGON3=y
 # CONFIG_QLA3XXX is not set
 # CONFIG_ATL1 is not set
 # CONFIG_ATL1E is not set
+# CONFIG_ATL1C is not set
 # CONFIG_JME is not set
 CONFIG_NETDEV_10000=y
 # CONFIG_CHELSIO_T1 is not set
@@ -1049,6 +1071,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_IXGBE is not set
 # CONFIG_IXGB is not set
 # CONFIG_S2IO is not set
+# CONFIG_VXGE is not set
 # CONFIG_MYRI10GE is not set
 # CONFIG_NETXEN_NIC is not set
 # CONFIG_NIU is not set
@@ -1058,6 +1081,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y
 # CONFIG_BNX2X is not set
 # CONFIG_QLGE is not set
 # CONFIG_SFC is not set
+# CONFIG_BE2NET is not set
 CONFIG_TR=y
 # CONFIG_IBMOL is not set
 # CONFIG_3C359 is not set
@@ -1072,8 +1096,8 @@ CONFIG_WLAN_80211=y
 # CONFIG_LIBERTAS is not set
 # CONFIG_LIBERTAS_THINFIRM is not set
 # CONFIG_AIRO is not set
-# CONFIG_HERMES is not set
 # CONFIG_ATMEL is not set
+# CONFIG_AT76C50X_USB is not set
 # CONFIG_AIRO_CS is not set
 # CONFIG_PCMCIA_WL3501 is not set
 # CONFIG_PRISM54 is not set
@@ -1083,21 +1107,21 @@ CONFIG_WLAN_80211=y
 # CONFIG_RTL8187 is not set
 # CONFIG_ADM8211 is not set
 # CONFIG_MAC80211_HWSIM is not set
+# CONFIG_MWL8K is not set
 # CONFIG_P54_COMMON is not set
 CONFIG_ATH5K=y
 # CONFIG_ATH5K_DEBUG is not set
 # CONFIG_ATH9K is not set
+# CONFIG_AR9170_USB is not set
 # CONFIG_IPW2100 is not set
 # CONFIG_IPW2200 is not set
-# CONFIG_IWLCORE is not set
-# CONFIG_IWLWIFI_LEDS is not set
-# CONFIG_IWLAGN is not set
-# CONFIG_IWL3945 is not set
+# CONFIG_IWLWIFI is not set
 # CONFIG_HOSTAP is not set
 # CONFIG_B43 is not set
 # CONFIG_B43LEGACY is not set
 # CONFIG_ZD1211RW is not set
 # CONFIG_RT2X00 is not set
+# CONFIG_HERMES is not set
 
 #
 # Enable WiMAX (Networking options) to see the WiMAX drivers
@@ -1208,6 +1232,8 @@ CONFIG_INPUT_TABLET=y
 # CONFIG_TABLET_USB_KBTAB is not set
 # CONFIG_TABLET_USB_WACOM is not set
 CONFIG_INPUT_TOUCHSCREEN=y
+# CONFIG_TOUCHSCREEN_AD7879_I2C is not set
+# CONFIG_TOUCHSCREEN_AD7879 is not set
 # CONFIG_TOUCHSCREEN_FUJITSU is not set
 # CONFIG_TOUCHSCREEN_GUNZE is not set
 # CONFIG_TOUCHSCREEN_ELO is not set
@@ -1301,6 +1327,7 @@ CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_IPMI_HANDLER is not set
 CONFIG_HW_RANDOM=y
+# CONFIG_HW_RANDOM_TIMERIOMEM is not set
 # CONFIG_HW_RANDOM_INTEL is not set
 # CONFIG_HW_RANDOM_AMD is not set
 CONFIG_NVRAM=y
@@ -1382,7 +1409,6 @@ CONFIG_I2C_I801=y
 # CONFIG_SENSORS_PCF8574 is not set
 # CONFIG_PCF8575 is not set
 # CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_MAX6875 is not set
 # CONFIG_SENSORS_TSL2550 is not set
 # CONFIG_I2C_DEBUG_CORE is not set
@@ -1416,6 +1442,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_ADT7475 is not set
 # CONFIG_SENSORS_K8TEMP is not set
 # CONFIG_SENSORS_ASB100 is not set
+# CONFIG_SENSORS_ATK0110 is not set
 # CONFIG_SENSORS_ATXP1 is not set
 # CONFIG_SENSORS_DS1621 is not set
 # CONFIG_SENSORS_I5K_AMB is not set
@@ -1425,6 +1452,7 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_FSCHER is not set
 # CONFIG_SENSORS_FSCPOS is not set
 # CONFIG_SENSORS_FSCHMD is not set
+# CONFIG_SENSORS_G760A is not set
 # CONFIG_SENSORS_GL518SM is not set
 # CONFIG_SENSORS_GL520SM is not set
 # CONFIG_SENSORS_CORETEMP is not set
@@ -1440,11 +1468,14 @@ CONFIG_HWMON=y
 # CONFIG_SENSORS_LM90 is not set
 # CONFIG_SENSORS_LM92 is not set
 # CONFIG_SENSORS_LM93 is not set
+# CONFIG_SENSORS_LTC4215 is not set
 # CONFIG_SENSORS_LTC4245 is not set
+# CONFIG_SENSORS_LM95241 is not set
 # CONFIG_SENSORS_MAX1619 is not set
 # CONFIG_SENSORS_MAX6650 is not set
 # CONFIG_SENSORS_PC87360 is not set
 # CONFIG_SENSORS_PC87427 is not set
+# CONFIG_SENSORS_PCF8591 is not set
 # CONFIG_SENSORS_SIS5595 is not set
 # CONFIG_SENSORS_DME1737 is not set
 # CONFIG_SENSORS_SMSC47M1 is not set
@@ -1635,6 +1666,7 @@ CONFIG_FB_EFI=y
 # CONFIG_FB_VIRTUAL is not set
 # CONFIG_FB_METRONOME is not set
 # CONFIG_FB_MB862XX is not set
+# CONFIG_FB_BROADSHEET is not set
 CONFIG_BACKLIGHT_LCD_SUPPORT=y
 # CONFIG_LCD_CLASS_DEVICE is not set
 CONFIG_BACKLIGHT_CLASS_DEVICE=y
@@ -1720,6 +1752,8 @@ CONFIG_SND_PCI=y
 # CONFIG_SND_INDIGO is not set
 # CONFIG_SND_INDIGOIO is not set
 # CONFIG_SND_INDIGODJ is not set
+# CONFIG_SND_INDIGOIOX is not set
+# CONFIG_SND_INDIGODJX is not set
 # CONFIG_SND_EMU10K1 is not set
 # CONFIG_SND_EMU10K1X is not set
 # CONFIG_SND_ENS1370 is not set
@@ -1792,15 +1826,17 @@ CONFIG_USB_HIDDEV=y
 #
 # Special HID drivers
 #
-CONFIG_HID_COMPAT=y
 CONFIG_HID_A4TECH=y
 CONFIG_HID_APPLE=y
 CONFIG_HID_BELKIN=y
 CONFIG_HID_CHERRY=y
 CONFIG_HID_CHICONY=y
 CONFIG_HID_CYPRESS=y
+# CONFIG_DRAGONRISE_FF is not set
 CONFIG_HID_EZKEY=y
+CONFIG_HID_KYE=y
 CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
 CONFIG_HID_LOGITECH=y
 CONFIG_LOGITECH_FF=y
 # CONFIG_LOGIRUMBLEPAD2_FF is not set
@@ -1866,11 +1902,11 @@ CONFIG_USB_PRINTER=y
 # CONFIG_USB_TMC is not set
 
 #
-# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may
 #
 
 #
-# see USB_STORAGE Help for more information
+# also be needed; see USB_STORAGE Help for more info
 #
 CONFIG_USB_STORAGE=y
 # CONFIG_USB_STORAGE_DEBUG is not set
@@ -1912,7 +1948,6 @@ CONFIG_USB_LIBUSUAL=y
 # CONFIG_USB_LED is not set
 # CONFIG_USB_CYPRESS_CY7C63 is not set
 # CONFIG_USB_CYTHERM is not set
-# CONFIG_USB_PHIDGET is not set
 # CONFIG_USB_IDMOUSE is not set
 # CONFIG_USB_FTDI_ELAN is not set
 # CONFIG_USB_APPLEDISPLAY is not set
@@ -1928,6 +1963,7 @@ CONFIG_USB_LIBUSUAL=y
 #
 # OTG and related infrastructure
 #
+# CONFIG_NOP_USB_XCEIV is not set
 # CONFIG_UWB is not set
 # CONFIG_MMC is not set
 # CONFIG_MEMSTICK is not set
@@ -1939,8 +1975,10 @@ CONFIG_LEDS_CLASS=y
 #
 # CONFIG_LEDS_ALIX2 is not set
 # CONFIG_LEDS_PCA9532 is not set
+# CONFIG_LEDS_LP5521 is not set
 # CONFIG_LEDS_CLEVO_MAIL is not set
 # CONFIG_LEDS_PCA955X is not set
+# CONFIG_LEDS_BD2802 is not set
 
 #
 # LED Triggers
@@ -1950,6 +1988,10 @@ CONFIG_LEDS_TRIGGERS=y
 # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
 # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
 # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+
+#
+# iptables trigger is under Netfilter config (LED target)
+#
 # CONFIG_ACCESSIBILITY is not set
 # CONFIG_INFINIBAND is not set
 CONFIG_EDAC=y
@@ -2018,6 +2060,7 @@ CONFIG_DMADEVICES=y
 # DMA Devices
 #
 # CONFIG_INTEL_IOATDMA is not set
+# CONFIG_AUXDISPLAY is not set
 # CONFIG_UIO is not set
 # CONFIG_STAGING is not set
 CONFIG_X86_PLATFORM_DEVICES=y
@@ -2051,6 +2094,7 @@ CONFIG_DMIID=y
 #
 # CONFIG_EXT2_FS is not set
 CONFIG_EXT3_FS=y
+# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
 CONFIG_EXT3_FS_XATTR=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
@@ -2081,6 +2125,11 @@ CONFIG_AUTOFS4_FS=y
 # CONFIG_FUSE_FS is not set
 CONFIG_GENERIC_ACL=y
 
+#
+# Caches
+#
+# CONFIG_FSCACHE is not set
+
 #
 # CD-ROM/DVD Filesystems
 #
@@ -2132,6 +2181,7 @@ CONFIG_MISC_FILESYSTEMS=y
 # CONFIG_ROMFS_FS is not set
 # CONFIG_SYSV_FS is not set
 # CONFIG_UFS_FS is not set
+# CONFIG_NILFS2_FS is not set
 CONFIG_NETWORK_FILESYSTEMS=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3=y
@@ -2145,7 +2195,6 @@ CONFIG_NFS_ACL_SUPPORT=y
 CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=y
 CONFIG_SUNRPC_GSS=y
-# CONFIG_SUNRPC_REGISTER_V4 is not set
 CONFIG_RPCSEC_GSS_KRB5=y
 # CONFIG_RPCSEC_GSS_SPKM3 is not set
 # CONFIG_SMB_FS is not set
@@ -2232,6 +2281,7 @@ CONFIG_DEBUG_FS=y
 CONFIG_DEBUG_KERNEL=y
 # CONFIG_DEBUG_SHIRQ is not set
 # CONFIG_DETECT_SOFTLOCKUP is not set
+# CONFIG_DETECT_HUNG_TASK is not set
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
@@ -2247,6 +2297,7 @@ CONFIG_TIMER_STATS=y
 # CONFIG_LOCK_STAT is not set
 # CONFIG_DEBUG_SPINLOCK_SLEEP is not set
 # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+CONFIG_STACKTRACE=y
 # CONFIG_DEBUG_KOBJECT is not set
 CONFIG_DEBUG_BUGVERBOSE=y
 # CONFIG_DEBUG_INFO is not set
@@ -2269,13 +2320,19 @@ CONFIG_FRAME_POINTER=y
 # CONFIG_FAULT_INJECTION is not set
 # CONFIG_LATENCYTOP is not set
 CONFIG_SYSCTL_SYSCALL_CHECK=y
+# CONFIG_DEBUG_PAGEALLOC is not set
 CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_NOP_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACER=y
 CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
 CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
 CONFIG_HAVE_DYNAMIC_FTRACE=y
 CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
 CONFIG_HAVE_HW_BRANCH_TRACER=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_RING_BUFFER=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
 
 #
 # Tracers
@@ -2285,13 +2342,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y
 # CONFIG_SYSPROF_TRACER is not set
 # CONFIG_SCHED_TRACER is not set
 # CONFIG_CONTEXT_SWITCH_TRACER is not set
+# CONFIG_EVENT_TRACER is not set
+# CONFIG_FTRACE_SYSCALLS is not set
 # CONFIG_BOOT_TRACER is not set
 # CONFIG_TRACE_BRANCH_PROFILING is not set
 # CONFIG_POWER_TRACER is not set
 # CONFIG_STACK_TRACER is not set
 # CONFIG_HW_BRANCH_TRACER is not set
+# CONFIG_KMEMTRACE is not set
+# CONFIG_WORKQUEUE_TRACER is not set
+CONFIG_BLK_DEV_IO_TRACE=y
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_MMIOTRACE is not set
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
-# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+# CONFIG_DYNAMIC_DEBUG is not set
+# CONFIG_DMA_API_DEBUG is not set
 # CONFIG_SAMPLES is not set
 CONFIG_HAVE_ARCH_KGDB=y
 # CONFIG_KGDB is not set
@@ -2301,14 +2366,13 @@ CONFIG_EARLY_PRINTK=y
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 CONFIG_DEBUG_STACK_USAGE=y
-# CONFIG_DEBUG_PAGEALLOC is not set
 # CONFIG_DEBUG_PER_CPU_MAPS is not set
 # CONFIG_X86_PTDUMP is not set
 CONFIG_DEBUG_RODATA=y
 # CONFIG_DEBUG_RODATA_TEST is not set
 CONFIG_DEBUG_NX_TEST=m
 # CONFIG_IOMMU_DEBUG is not set
-# CONFIG_MMIOTRACE is not set
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
 CONFIG_IO_DELAY_TYPE_0X80=0
 CONFIG_IO_DELAY_TYPE_0XED=1
 CONFIG_IO_DELAY_TYPE_UDELAY=2
@@ -2344,6 +2408,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y
 CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
 # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
 # CONFIG_SECURITY_SMACK is not set
+# CONFIG_SECURITY_TOMOYO is not set
+# CONFIG_IMA is not set
 CONFIG_CRYPTO=y
 
 #
@@ -2359,10 +2425,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y
 CONFIG_CRYPTO_HASH=y
 CONFIG_CRYPTO_HASH2=y
 CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_PCOMP=y
 CONFIG_CRYPTO_MANAGER=y
 CONFIG_CRYPTO_MANAGER2=y
 # CONFIG_CRYPTO_GF128MUL is not set
 # CONFIG_CRYPTO_NULL is not set
+CONFIG_CRYPTO_WORKQUEUE=y
 # CONFIG_CRYPTO_CRYPTD is not set
 CONFIG_CRYPTO_AUTHENC=y
 # CONFIG_CRYPTO_TEST is not set
@@ -2414,6 +2482,7 @@ CONFIG_CRYPTO_SHA1=y
 #
 CONFIG_CRYPTO_AES=y
 # CONFIG_CRYPTO_AES_X86_64 is not set
+# CONFIG_CRYPTO_AES_NI_INTEL is not set
 # CONFIG_CRYPTO_ANUBIS is not set
 CONFIG_CRYPTO_ARC4=y
 # CONFIG_CRYPTO_BLOWFISH is not set
@@ -2435,6 +2504,7 @@ CONFIG_CRYPTO_DES=y
 # Compression
 #
 # CONFIG_CRYPTO_DEFLATE is not set
+# CONFIG_CRYPTO_ZLIB is not set
 # CONFIG_CRYPTO_LZO is not set
 
 #
@@ -2444,10 +2514,12 @@ CONFIG_CRYPTO_DES=y
 CONFIG_CRYPTO_HW=y
 # CONFIG_CRYPTO_DEV_HIFN_795X is not set
 CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
 CONFIG_VIRTUALIZATION=y
 # CONFIG_KVM is not set
 # CONFIG_VIRTIO_PCI is not set
 # CONFIG_VIRTIO_BALLOON is not set
+CONFIG_BINARY_PRINTF=y
 
 #
 # Library routines
@@ -2464,7 +2536,10 @@ CONFIG_CRC32=y
 # CONFIG_CRC7 is not set
 # CONFIG_LIBCRC32C is not set
 CONFIG_ZLIB_INFLATE=y
-CONFIG_PLIST=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_LZMA=y
 CONFIG_HAS_IOMEM=y
 CONFIG_HAS_IOPORT=y
 CONFIG_HAS_DMA=y
+CONFIG_NLATTR=y
index a505202..dcef387 100644 (file)
@@ -830,4 +830,5 @@ ia32_sys_call_table:
        .quad sys_inotify_init1
        .quad compat_sys_preadv
        .quad compat_sys_pwritev
+       .quad compat_sys_rt_tgsigqueueinfo      /* 335 */
 ia32_syscall_end:
index f6aa18e..1a37bcd 100644 (file)
@@ -3,6 +3,7 @@
 
 #include <linux/types.h>
 #include <linux/stddef.h>
+#include <linux/stringify.h>
 #include <asm/asm.h>
 
 /*
@@ -74,6 +75,22 @@ static inline void alternatives_smp_switch(int smp) {}
 
 const unsigned char *const *find_nop_table(void);
 
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, feature)                       \
+                                                                       \
+      "661:\n\t" oldinstr "\n662:\n"                                   \
+      ".section .altinstructions,\"a\"\n"                              \
+      _ASM_ALIGN "\n"                                                  \
+      _ASM_PTR "661b\n"                                /* label           */   \
+      _ASM_PTR "663f\n"                                /* new instruction */   \
+      "         .byte " __stringify(feature) "\n"      /* feature bit     */   \
+      "         .byte 662b-661b\n"                     /* sourcelen       */   \
+      "         .byte 664f-663f\n"                     /* replacementlen  */   \
+      ".previous\n"                                                    \
+      ".section .altinstr_replacement, \"ax\"\n"                       \
+      "663:\n\t" newinstr "\n664:\n"           /* replacement     */   \
+      ".previous"
+
 /*
  * Alternative instructions for different CPU types or capabilities.
  *
@@ -87,18 +104,7 @@ const unsigned char *const *find_nop_table(void);
  * without volatile and memory clobber.
  */
 #define alternative(oldinstr, newinstr, feature)                       \
-       asm volatile ("661:\n\t" oldinstr "\n662:\n"                    \
-                     ".section .altinstructions,\"a\"\n"               \
-                     _ASM_ALIGN "\n"                                   \
-                     _ASM_PTR "661b\n"         /* label */             \
-                     _ASM_PTR "663f\n"         /* new instruction */   \
-                     "  .byte %c0\n"           /* feature bit */       \
-                     "  .byte 662b-661b\n"     /* sourcelen */         \
-                     "  .byte 664f-663f\n"     /* replacementlen */    \
-                     ".previous\n"                                     \
-                     ".section .altinstr_replacement,\"ax\"\n"         \
-                     "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
-                     ".previous" :: "i" (feature) : "memory")
+       asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
 
 /*
  * Alternative inline assembly with input.
@@ -109,35 +115,16 @@ const unsigned char *const *find_nop_table(void);
  * Best is to use constraints that are fixed size (like (%1) ... "r")
  * If you use variable sized constraints like "m" or "g" in the
  * replacement make sure to pad to the worst case length.
+ * Leaving an unused argument 0 to keep API compatibility.
  */
 #define alternative_input(oldinstr, newinstr, feature, input...)       \
-       asm volatile ("661:\n\t" oldinstr "\n662:\n"                    \
-                     ".section .altinstructions,\"a\"\n"               \
-                     _ASM_ALIGN "\n"                                   \
-                     _ASM_PTR "661b\n"         /* label */             \
-                     _ASM_PTR "663f\n"         /* new instruction */   \
-                     "  .byte %c0\n"           /* feature bit */       \
-                     "  .byte 662b-661b\n"     /* sourcelen */         \
-                     "  .byte 664f-663f\n"     /* replacementlen */    \
-                     ".previous\n"                                     \
-                     ".section .altinstr_replacement,\"ax\"\n"         \
-                     "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
-                     ".previous" :: "i" (feature), ##input)
+       asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)          \
+               : : "i" (0), ## input)
 
 /* Like alternative_input, but with a single output argument */
 #define alternative_io(oldinstr, newinstr, feature, output, input...)  \
-       asm volatile ("661:\n\t" oldinstr "\n662:\n"                    \
-                     ".section .altinstructions,\"a\"\n"               \
-                     _ASM_ALIGN "\n"                                   \
-                     _ASM_PTR "661b\n"         /* label */             \
-                     _ASM_PTR "663f\n"         /* new instruction */   \
-                     "  .byte %c[feat]\n"      /* feature bit */       \
-                     "  .byte 662b-661b\n"     /* sourcelen */         \
-                     "  .byte 664f-663f\n"     /* replacementlen */    \
-                     ".previous\n"                                     \
-                     ".section .altinstr_replacement,\"ax\"\n"         \
-                     "663:\n\t" newinstr "\n664:\n"  /* replacement */ \
-                     ".previous" : output : [feat] "i" (feature), ##input)
+       asm volatile (ALTERNATIVE(oldinstr, newinstr, feature)          \
+               : output : "i" (0), ## input)
 
 /*
  * use this macro(s) if you need more than one output parameter
index f712344..262e028 100644 (file)
@@ -27,6 +27,8 @@ extern int amd_iommu_init(void);
 extern int amd_iommu_init_dma_ops(void);
 extern void amd_iommu_detect(void);
 extern irqreturn_t amd_iommu_int_handler(int irq, void *data);
+extern void amd_iommu_flush_all_domains(void);
+extern void amd_iommu_flush_all_devices(void);
 #else
 static inline int amd_iommu_init(void) { return -ENODEV; }
 static inline void amd_iommu_detect(void) { }
index 95c8cd9..0c878ca 100644 (file)
 #define PD_DMA_OPS_MASK                (1UL << 0) /* domain used for dma_ops */
 #define PD_DEFAULT_MASK                (1UL << 1) /* domain is a default dma_ops
                                              domain for an IOMMU */
+extern bool amd_iommu_dump;
+#define DUMP_printk(format, arg...)                                    \
+       do {                                                            \
+               if (amd_iommu_dump)                                             \
+                       printk(KERN_INFO "AMD IOMMU: " format, ## arg); \
+       } while(0);
+
+/*
+ * Make iterating over all IOMMUs easier
+ */
+#define for_each_iommu(iommu) \
+       list_for_each_entry((iommu), &amd_iommu_list, list)
+#define for_each_iommu_safe(iommu, next) \
+       list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list)
+
+#define APERTURE_RANGE_SHIFT   27      /* 128 MB */
+#define APERTURE_RANGE_SIZE    (1ULL << APERTURE_RANGE_SHIFT)
+#define APERTURE_RANGE_PAGES   (APERTURE_RANGE_SIZE >> PAGE_SHIFT)
+#define APERTURE_MAX_RANGES    32      /* allows 4GB of DMA address space */
+#define APERTURE_RANGE_INDEX(a)        ((a) >> APERTURE_RANGE_SHIFT)
+#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
 
 /*
  * This structure contains generic data for  IOMMU protection domains
@@ -209,6 +230,26 @@ struct protection_domain {
        void *priv;             /* private data */
 };
 
+/*
+ * For dynamic growth the aperture size is split into ranges of 128MB of
+ * DMA address space each. This struct represents one such range.
+ */
+struct aperture_range {
+
+       /* address allocation bitmap */
+       unsigned long *bitmap;
+
+       /*
+        * Array of PTE pages for the aperture. In this array we save all the
+        * leaf pages of the domain page table used for the aperture. This way
+        * we don't need to walk the page table to find a specific PTE. We can
+        * just calculate its address in constant time.
+        */
+       u64 *pte_pages[64];
+
+       unsigned long offset;
+};
+
 /*
  * Data container for a dma_ops specific protection domain
  */
@@ -222,18 +263,10 @@ struct dma_ops_domain {
        unsigned long aperture_size;
 
        /* address we start to search for free addresses */
-       unsigned long next_bit;
-
-       /* address allocation bitmap */
-       unsigned long *bitmap;
+       unsigned long next_address;
 
-       /*
-        * Array of PTE pages for the aperture. In this array we save all the
-        * leaf pages of the domain page table used for the aperture. This way
-        * we don't need to walk the page table to find a specific PTE. We can
-        * just calculate its address in constant time.
-        */
-       u64 **pte_pages;
+       /* address space relevant data */
+       struct aperture_range *aperture[APERTURE_MAX_RANGES];
 
        /* This will be set to true when TLB needs to be flushed */
        bool need_flush;
index 42f2f83..bb7d479 100644 (file)
@@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void);
 extern void native_apic_icr_write(u32 low, u32 id);
 extern u64 native_apic_icr_read(void);
 
-#define EIM_8BIT_APIC_ID       0
-#define EIM_32BIT_APIC_ID      1
+extern int x2apic_mode;
 
 #ifdef CONFIG_X86_X2APIC
 /*
@@ -166,10 +165,9 @@ static inline u64 native_x2apic_icr_read(void)
        return val;
 }
 
-extern int x2apic, x2apic_phys;
+extern int x2apic_phys;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
-extern void enable_IR_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
 static inline int x2apic_enabled(void)
 {
@@ -183,6 +181,8 @@ static inline int x2apic_enabled(void)
                return 1;
        return 0;
 }
+
+#define x2apic_supported()     (cpu_has_x2apic)
 #else
 static inline void check_x2apic(void)
 {
@@ -190,28 +190,20 @@ static inline void check_x2apic(void)
 static inline void enable_x2apic(void)
 {
 }
-static inline void enable_IR_x2apic(void)
-{
-}
 static inline int x2apic_enabled(void)
 {
        return 0;
 }
 
-#define        x2apic  0
-
+#define        x2apic_preenabled 0
+#define        x2apic_supported()      0
 #endif
 
-extern int get_physical_broadcast(void);
+extern void enable_IR_x2apic(void);
 
-#ifdef CONFIG_X86_X2APIC
-static inline void ack_x2APIC_irq(void)
-{
-       /* Docs say use 0 for future compatibility */
-       native_apic_msr_write(APIC_EOI, 0);
-}
-#endif
+extern int get_physical_broadcast(void);
 
+extern void apic_disable(void);
 extern int lapic_get_maxlvt(void);
 extern void clear_local_APIC(void);
 extern void connect_bsp_APIC(void);
@@ -252,7 +244,7 @@ static inline void lapic_shutdown(void) { }
 #define local_apic_timer_c2_ok         1
 static inline void init_apic_mappings(void) { }
 static inline void disable_local_APIC(void) { }
-
+static inline void apic_disable(void) { }
 #endif /* !CONFIG_X86_LOCAL_APIC */
 
 #ifdef CONFIG_X86_64
@@ -410,7 +402,7 @@ static inline unsigned default_get_apic_id(unsigned long x)
 {
        unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
 
-       if (APIC_XAPIC(ver))
+       if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID))
                return (x >> 24) & 0xFF;
        else
                return (x >> 24) & 0x0F;
@@ -478,6 +470,9 @@ static inline unsigned int read_apic_id(void)
 extern void default_setup_apic_routing(void);
 
 #ifdef CONFIG_X86_32
+
+extern struct apic apic_default;
+
 /*
  * Set up the logical destination ID.
  *
index bc9514f..7ddb36a 100644 (file)
@@ -22,6 +22,7 @@
 #  define      APIC_INTEGRATED(x)      (1)
 #endif
 #define                APIC_XAPIC(x)           ((x) >= 0x14)
+#define                APIC_EXT_SPACE(x)       ((x) & 0x80000000)
 #define        APIC_TASKPRI    0x80
 #define                APIC_TPRI_MASK          0xFFu
 #define        APIC_ARBPRI     0x90
 #define                APIC_TDR_DIV_32         0x8
 #define                APIC_TDR_DIV_64         0x9
 #define                APIC_TDR_DIV_128        0xA
-#define        APIC_EILVT0     0x500
+#define        APIC_EFEAT      0x400
+#define        APIC_ECTRL      0x410
+#define APIC_EILVTn(n) (0x500 + 0x10 * n)
 #define                APIC_EILVT_NR_AMD_K8    1       /* # of extended interrupts */
 #define                APIC_EILVT_NR_AMD_10H   4
 #define                APIC_EILVT_LVTOFF(x)    (((x) >> 4) & 0xF)
 #define                APIC_EILVT_MSG_NMI      0x4
 #define                APIC_EILVT_MSG_EXT      0x7
 #define                APIC_EILVT_MASKED       (1 << 16)
-#define        APIC_EILVT1     0x510
-#define        APIC_EILVT2     0x520
-#define        APIC_EILVT3     0x530
 
 #define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
 #define APIC_BASE_MSR  0x800
index 6ba23dd..418e632 100644 (file)
@@ -8,11 +8,26 @@
 
 #ifdef __KERNEL__
 
+#include <asm/page_types.h>
+
 /* Physical address where kernel should be loaded. */
 #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
                                + (CONFIG_PHYSICAL_ALIGN - 1)) \
                                & ~(CONFIG_PHYSICAL_ALIGN - 1))
 
+/* Minimum kernel alignment, as a power of two */
+#ifdef CONFIG_x86_64
+#define MIN_KERNEL_ALIGN_LG2   PMD_SHIFT
+#else
+#define MIN_KERNEL_ALIGN_LG2   (PAGE_SHIFT+1)
+#endif
+#define MIN_KERNEL_ALIGN       (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)
+
+#if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \
+       (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2))
+#error "Invalid value for CONFIG_PHYSICAL_ALIGN"
+#endif
+
 #ifdef CONFIG_KERNEL_BZIP2
 #define BOOT_HEAP_SIZE             0x400000
 #else /* !CONFIG_KERNEL_BZIP2 */
index 433adae..1724e8d 100644 (file)
@@ -50,7 +50,8 @@ struct setup_header {
        __u32   ramdisk_size;
        __u32   bootsect_kludge;
        __u16   heap_end_ptr;
-       __u16   _pad1;
+       __u8    ext_loader_ver;
+       __u8    ext_loader_type;
        __u32   cmd_line_ptr;
        __u32   initrd_addr_max;
        __u32   kernel_alignment;
index 2228020..d96c1ee 100644 (file)
@@ -86,105 +86,7 @@ enum cpu_file_bit {
        CPU_VALUE_BIT,                          /* value                */
 };
 
-#define        CPU_FILE_VALUE                  (1 << CPU_VALUE_BIT)
-
-/*
- * DisplayFamily_DisplayModel  Processor Families/Processor Number Series
- * --------------------------  ------------------------------------------
- * 05_01, 05_02, 05_04         Pentium, Pentium with MMX
- *
- * 06_01                       Pentium Pro
- * 06_03, 06_05                        Pentium II Xeon, Pentium II
- * 06_07, 06_08, 06_0A, 06_0B  Pentium III Xeon, Pentum III
- *
- * 06_09, 060D                 Pentium M
- *
- * 06_0E                       Core Duo, Core Solo
- *
- * 06_0F                       Xeon 3000, 3200, 5100, 5300, 7300 series,
- *                             Core 2 Quad, Core 2 Extreme, Core 2 Duo,
- *                             Pentium dual-core
- * 06_17                       Xeon 5200, 5400 series, Core 2 Quad Q9650
- *
- * 06_1C                       Atom
- *
- * 0F_00, 0F_01, 0F_02         Xeon, Xeon MP, Pentium 4
- * 0F_03, 0F_04                        Xeon, Xeon MP, Pentium 4, Pentium D
- *
- * 0F_06                       Xeon 7100, 5000 Series, Xeon MP,
- *                             Pentium 4, Pentium D
- */
-
-/* Register processors bits */
-enum cpu_processor_bit {
-       CPU_NONE,
-/* Intel */
-       CPU_INTEL_PENTIUM_BIT,
-       CPU_INTEL_P6_BIT,
-       CPU_INTEL_PENTIUM_M_BIT,
-       CPU_INTEL_CORE_BIT,
-       CPU_INTEL_CORE2_BIT,
-       CPU_INTEL_ATOM_BIT,
-       CPU_INTEL_XEON_P4_BIT,
-       CPU_INTEL_XEON_MP_BIT,
-/* AMD */
-       CPU_AMD_K6_BIT,
-       CPU_AMD_K7_BIT,
-       CPU_AMD_K8_BIT,
-       CPU_AMD_0F_BIT,
-       CPU_AMD_10_BIT,
-       CPU_AMD_11_BIT,
-};
-
-#define        CPU_INTEL_PENTIUM       (1 << CPU_INTEL_PENTIUM_BIT)
-#define        CPU_INTEL_P6            (1 << CPU_INTEL_P6_BIT)
-#define        CPU_INTEL_PENTIUM_M     (1 << CPU_INTEL_PENTIUM_M_BIT)
-#define        CPU_INTEL_CORE          (1 << CPU_INTEL_CORE_BIT)
-#define        CPU_INTEL_CORE2         (1 << CPU_INTEL_CORE2_BIT)
-#define        CPU_INTEL_ATOM          (1 << CPU_INTEL_ATOM_BIT)
-#define        CPU_INTEL_XEON_P4       (1 << CPU_INTEL_XEON_P4_BIT)
-#define        CPU_INTEL_XEON_MP       (1 << CPU_INTEL_XEON_MP_BIT)
-
-#define        CPU_INTEL_PX            (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
-#define        CPU_INTEL_COREX         (CPU_INTEL_CORE | CPU_INTEL_CORE2)
-#define        CPU_INTEL_XEON          (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
-#define        CPU_CO_AT               (CPU_INTEL_CORE | CPU_INTEL_ATOM)
-#define        CPU_C2_AT               (CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
-#define        CPU_CX_AT               (CPU_INTEL_COREX | CPU_INTEL_ATOM)
-#define        CPU_CX_XE               (CPU_INTEL_COREX | CPU_INTEL_XEON)
-#define        CPU_P6_XE               (CPU_INTEL_P6 | CPU_INTEL_XEON)
-#define        CPU_PM_CO_AT            (CPU_INTEL_PENTIUM_M | CPU_CO_AT)
-#define        CPU_C2_AT_XE            (CPU_C2_AT | CPU_INTEL_XEON)
-#define        CPU_CX_AT_XE            (CPU_CX_AT | CPU_INTEL_XEON)
-#define        CPU_P6_CX_AT            (CPU_INTEL_P6 | CPU_CX_AT)
-#define        CPU_P6_CX_XE            (CPU_P6_XE | CPU_INTEL_COREX)
-#define        CPU_P6_CX_AT_XE         (CPU_INTEL_P6 | CPU_CX_AT_XE)
-#define        CPU_PM_CX_AT_XE         (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
-#define        CPU_PM_CX_AT            (CPU_INTEL_PENTIUM_M | CPU_CX_AT)
-#define        CPU_PM_CX_XE            (CPU_INTEL_PENTIUM_M | CPU_CX_XE)
-#define        CPU_PX_CX_AT            (CPU_INTEL_PX | CPU_CX_AT)
-#define        CPU_PX_CX_AT_XE         (CPU_INTEL_PX | CPU_CX_AT_XE)
-
-/* Select all supported Intel CPUs */
-#define        CPU_INTEL_ALL           (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
-
-#define        CPU_AMD_K6              (1 << CPU_AMD_K6_BIT)
-#define        CPU_AMD_K7              (1 << CPU_AMD_K7_BIT)
-#define        CPU_AMD_K8              (1 << CPU_AMD_K8_BIT)
-#define        CPU_AMD_0F              (1 << CPU_AMD_0F_BIT)
-#define        CPU_AMD_10              (1 << CPU_AMD_10_BIT)
-#define        CPU_AMD_11              (1 << CPU_AMD_11_BIT)
-
-#define        CPU_K10_PLUS            (CPU_AMD_10 | CPU_AMD_11)
-#define        CPU_K0F_PLUS            (CPU_AMD_0F | CPU_K10_PLUS)
-#define        CPU_K8_PLUS             (CPU_AMD_K8 | CPU_K0F_PLUS)
-#define        CPU_K7_PLUS             (CPU_AMD_K7 | CPU_K8_PLUS)
-
-/* Select all supported AMD CPUs */
-#define        CPU_AMD_ALL             (CPU_AMD_K6 | CPU_K7_PLUS)
-
-/* Select all supported CPUs */
-#define        CPU_ALL                 (CPU_INTEL_ALL | CPU_AMD_ALL)
+#define        CPU_FILE_VALUE          (1 << CPU_VALUE_BIT)
 
 #define MAX_CPU_FILES          512
 
@@ -220,7 +122,6 @@ struct cpu_debug_range {
        unsigned                min;            /* Register range min   */
        unsigned                max;            /* Register range max   */
        unsigned                flag;           /* Supported flags      */
-       unsigned                model;          /* Supported models     */
 };
 
 #endif /* _ASM_X86_CPU_DEBUG_H */
index bb83b1c..19af421 100644 (file)
@@ -22,7 +22,7 @@
 #define X86_FEATURE_TSC                (0*32+ 4) /* Time Stamp Counter */
 #define X86_FEATURE_MSR                (0*32+ 5) /* Model-Specific Registers */
 #define X86_FEATURE_PAE                (0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE                (0*32+ 7) /* Machine Check Architecture */
+#define X86_FEATURE_MCE                (0*32+ 7) /* Machine Check Exception */
 #define X86_FEATURE_CX8                (0*32+ 8) /* CMPXCHG8 instruction */
 #define X86_FEATURE_APIC       (0*32+ 9) /* Onboard APIC */
 #define X86_FEATURE_SEP                (0*32+11) /* SYSENTER/SYSEXIT */
@@ -94,6 +94,7 @@
 #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */
 #define X86_FEATURE_NONSTOP_TSC        (3*32+24) /* TSC does not stop in C states */
 #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */
+#define X86_FEATURE_EXTD_APICID        (3*32+26) /* has extended APICID (8 bits) */
 
 /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
 #define X86_FEATURE_XMM3       (4*32+ 0) /* "pni" SSE-3 */
@@ -192,11 +193,11 @@ extern const char * const x86_power_flags[32];
 #define clear_cpu_cap(c, bit)  clear_bit(bit, (unsigned long *)((c)->x86_capability))
 #define setup_clear_cpu_cap(bit) do { \
        clear_cpu_cap(&boot_cpu_data, bit);     \
-       set_bit(bit, (unsigned long *)cleared_cpu_caps); \
+       set_bit(bit, (unsigned long *)cpu_caps_cleared); \
 } while (0)
 #define setup_force_cpu_cap(bit) do { \
        set_cpu_cap(&boot_cpu_data, bit);       \
-       clear_bit(bit, (unsigned long *)cleared_cpu_caps);      \
+       set_bit(bit, (unsigned long *)cpu_caps_set);    \
 } while (0)
 
 #define cpu_has_fpu            boot_cpu_has(X86_FEATURE_FPU)
index a8f672b..70dac19 100644 (file)
@@ -15,8 +15,8 @@
  * - buffer allocation (memory accounting)
  *
  *
- * Copyright (C) 2007-2008 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008
+ * Copyright (C) 2007-2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
 #ifndef _ASM_X86_DS_H
@@ -83,8 +83,10 @@ enum ds_feature {
  * The interrupt threshold is independent from the overflow callback
  * to allow users to use their own overflow interrupt handling mechanism.
  *
- * task: the task to request recording for;
- *       NULL for per-cpu recording on the current cpu
+ * The function might sleep.
+ *
+ * task: the task to request recording for
+ * cpu:  the cpu to request recording for
  * base: the base pointer for the (non-pageable) buffer;
  * size: the size of the provided buffer in bytes
  * ovfl: pointer to a function to be called on buffer overflow;
@@ -93,19 +95,28 @@ enum ds_feature {
  *     -1 if no interrupt threshold is requested.
  * flags: a bit-mask of the above flags
  */
-extern struct bts_tracer *ds_request_bts(struct task_struct *task,
-                                        void *base, size_t size,
-                                        bts_ovfl_callback_t ovfl,
-                                        size_t th, unsigned int flags);
-extern struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-                                          void *base, size_t size,
-                                          pebs_ovfl_callback_t ovfl,
-                                          size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+                                             void *base, size_t size,
+                                             bts_ovfl_callback_t ovfl,
+                                             size_t th, unsigned int flags);
+extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+                                            bts_ovfl_callback_t ovfl,
+                                            size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+                                               void *base, size_t size,
+                                               pebs_ovfl_callback_t ovfl,
+                                               size_t th, unsigned int flags);
+extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
+                                              void *base, size_t size,
+                                              pebs_ovfl_callback_t ovfl,
+                                              size_t th, unsigned int flags);
 
 /*
  * Release BTS or PEBS resources
  * Suspend and resume BTS or PEBS tracing
  *
+ * Must be called with irq's enabled.
+ *
  * tracer: the tracer handle returned from ds_request_~()
  */
 extern void ds_release_bts(struct bts_tracer *tracer);
@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer);
 extern void ds_suspend_pebs(struct pebs_tracer *tracer);
 extern void ds_resume_pebs(struct pebs_tracer *tracer);
 
+/*
+ * Release BTS or PEBS resources
+ * Suspend and resume BTS or PEBS tracing
+ *
+ * Cpu tracers must call this on the traced cpu.
+ * Task tracers must call ds_release_~_noirq() for themselves.
+ *
+ * May be called with irq's disabled.
+ *
+ * Returns 0 if successful;
+ * -EPERM if the cpu tracer does not trace the current cpu.
+ * -EPERM if the task tracer does not trace itself.
+ *
+ * tracer: the tracer handle returned from ds_request_~()
+ */
+extern int ds_release_bts_noirq(struct bts_tracer *tracer);
+extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
+extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
+extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
+extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
+
 
 /*
  * The raw DS buffer state as it is used for BTS and PEBS recording.
@@ -170,9 +203,9 @@ struct bts_struct {
                } lbr;
                /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
                struct {
-                       __u64 jiffies;
+                       __u64 clock;
                        pid_t pid;
-               } timestamp;
+               } event;
        } variant;
 };
 
@@ -201,8 +234,12 @@ struct bts_trace {
 struct pebs_trace {
        struct ds_trace ds;
 
-       /* the PEBS reset value */
-       unsigned long long reset_value;
+       /* the number of valid counters in the below array */
+       unsigned int counters;
+
+#define MAX_PEBS_COUNTERS 4
+       /* the counter reset value */
+       unsigned long long counter_reset[MAX_PEBS_COUNTERS];
 };
 
 
@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer);
  * Returns 0 on success; -Eerrno on error
  *
  * tracer: the tracer handle returned from ds_request_pebs()
+ * counter: the index of the counter
  * value: the new counter reset value
  */
-extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value);
+extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
+                            unsigned int counter, u64 value);
 
 /*
  * Initialization
@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
  */
 extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
 
-/*
- * Task clone/init and cleanup work
- */
-extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father);
-extern void ds_exit_thread(struct task_struct *tsk);
-
 #else /* CONFIG_X86_DS */
 
 struct cpuinfo_x86;
 static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
 static inline void ds_switch_to(struct task_struct *prev,
                                struct task_struct *next) {}
-static inline void ds_copy_thread(struct task_struct *tsk,
-                                 struct task_struct *father) {}
-static inline void ds_exit_thread(struct task_struct *tsk) {}
 
 #endif /* CONFIG_X86_DS */
 #endif /* _ASM_X86_DS_H */
index b762ea4..3bd1777 100644 (file)
@@ -63,7 +63,26 @@ extern unsigned long io_apic_irqs;
 extern void init_VISWS_APIC_irqs(void);
 extern void setup_IO_APIC(void);
 extern void disable_IO_APIC(void);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+
+struct io_apic_irq_attr {
+       int ioapic;
+       int ioapic_pin;
+       int trigger;
+       int polarity;
+};
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+                                       int ioapic, int ioapic_pin,
+                                       int trigger, int polarity)
+{
+       irq_attr->ioapic     = ioapic;
+       irq_attr->ioapic_pin = ioapic_pin;
+       irq_attr->trigger    = trigger;
+       irq_attr->polarity   = polarity;
+}
+
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin,
+                                       struct io_apic_irq_attr *irq_attr);
 extern void setup_ioapic_dest(void);
 
 extern void enable_IO_APIC(void);
@@ -78,7 +97,11 @@ extern void eisa_set_level_irq(unsigned int irq);
 /* SMP */
 extern void smp_apic_timer_interrupt(struct pt_regs *);
 extern void smp_spurious_interrupt(struct pt_regs *);
+extern void smp_generic_interrupt(struct pt_regs *);
 extern void smp_error_interrupt(struct pt_regs *);
+#ifdef CONFIG_X86_IO_APIC
+extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
+#endif
 #ifdef CONFIG_SMP
 extern void smp_reschedule_interrupt(struct pt_regs *);
 extern void smp_call_function_interrupt(struct pt_regs *);
index 71c9e51..175adf5 100644 (file)
@@ -67,7 +67,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
                     ".previous\n"
                     _ASM_EXTABLE(1b, 3b)
                     : [err] "=r" (err)
-#if 0 /* See comment in __save_init_fpu() below. */
+#if 0 /* See comment in fxsave() below. */
                     : [fx] "r" (fx), "m" (*fx), "0" (0));
 #else
                     : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0));
@@ -75,14 +75,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
        return err;
 }
 
-static inline int restore_fpu_checking(struct task_struct *tsk)
-{
-       if (task_thread_info(tsk)->status & TS_XSAVE)
-               return xrstor_checking(&tsk->thread.xstate->xsave);
-       else
-               return fxrstor_checking(&tsk->thread.xstate->fxsave);
-}
-
 /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
    is pending. Clear the x87 state here by setting it to fixed
    values. The kernel data segment can be sometimes 0 and sometimes
@@ -120,7 +112,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
                     ".previous\n"
                     _ASM_EXTABLE(1b, 3b)
                     : [err] "=r" (err), "=m" (*fx)
-#if 0 /* See comment in __fxsave_clear() below. */
+#if 0 /* See comment in fxsave() below. */
                     : [fx] "r" (fx), "0" (0));
 #else
                     : [fx] "cdaSDb" (fx), "0" (0));
@@ -185,12 +177,9 @@ static inline void tolerant_fwait(void)
        asm volatile("fnclex ; fwait");
 }
 
-static inline void restore_fpu(struct task_struct *tsk)
+/* perform fxrstor iff the processor has extended states, otherwise frstor */
+static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
 {
-       if (task_thread_info(tsk)->status & TS_XSAVE) {
-               xrstor_checking(&tsk->thread.xstate->xsave);
-               return;
-       }
        /*
         * The "nop" is needed to make the instructions the same
         * length.
@@ -199,7 +188,9 @@ static inline void restore_fpu(struct task_struct *tsk)
                "nop ; frstor %1",
                "fxrstor %1",
                X86_FEATURE_FXSR,
-               "m" (tsk->thread.xstate->fxsave));
+               "m" (*fx));
+
+       return 0;
 }
 
 /* We need a safe address that is cheap to find and that is already
@@ -262,6 +253,14 @@ end:
 
 #endif /* CONFIG_X86_64 */
 
+static inline int restore_fpu_checking(struct task_struct *tsk)
+{
+       if (task_thread_info(tsk)->status & TS_XSAVE)
+               return xrstor_checking(&tsk->thread.xstate->xsave);
+       else
+               return fxrstor_checking(&tsk->thread.xstate->fxsave);
+}
+
 /*
  * Signal frame handlers...
  */
@@ -305,18 +304,18 @@ static inline void kernel_fpu_end(void)
 /*
  * Some instructions like VIA's padlock instructions generate a spurious
  * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context aswell. To prevent these kernel instructions
- * in interrupt context interact wrongly with other user/kernel fpu usage, we
+ * get used from interrupt context as well. To prevent these kernel instructions
+ * in interrupt context interacting wrongly with other user/kernel fpu usage, we
  * should use them only in the context of irq_ts_save/restore()
  */
 static inline int irq_ts_save(void)
 {
        /*
-        * If we are in process context, we are ok to take a spurious DNA fault.
-        * Otherwise, doing clts() in process context require pre-emption to
-        * be disabled or some heavy lifting like kernel_fpu_begin()
+        * If in process context and not atomic, we can take a spurious DNA fault.
+        * Otherwise, doing clts() in process context requires disabling preemption
+        * or some heavy lifting like kernel_fpu_begin()
         */
-       if (!in_interrupt())
+       if (!in_atomic())
                return 0;
 
        if (read_cr0() & X86_CR0_TS) {
index 1a99e6c..58d7091 100644 (file)
@@ -60,8 +60,4 @@ extern struct irq_chip i8259A_chip;
 extern void mask_8259A(void);
 extern void unmask_8259A(void);
 
-#ifdef CONFIG_X86_32
-extern void init_ISA_irqs(void);
-#endif
-
 #endif /* _ASM_X86_I8259_H */
index 9d826e4..daf866e 100644 (file)
@@ -154,22 +154,19 @@ extern int timer_through_8259;
 extern int io_apic_get_unique_id(int ioapic, int apic_id);
 extern int io_apic_get_version(int ioapic);
 extern int io_apic_get_redir_entries(int ioapic);
-extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
-                                  int edge_level, int active_high_low);
 #endif /* CONFIG_ACPI */
 
+struct io_apic_irq_attr;
+extern int io_apic_set_pci_routing(struct device *dev, int irq,
+                struct io_apic_irq_attr *irq_attr);
 extern int (*ioapic_renumber_irq)(int ioapic, int irq);
 extern void ioapic_init_mappings(void);
 
-#ifdef CONFIG_X86_64
 extern struct IO_APIC_route_entry **alloc_ioapic_entries(void);
 extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries);
 extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
 extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries);
-extern void reinit_intr_remapped_IO_APIC(int intr_remapping,
-       struct IO_APIC_route_entry **ioapic_entries);
-#endif
 
 extern void probe_nr_irqs_gsi(void);
 
index 86af260..0e9fe1d 100644 (file)
@@ -1,3 +1,6 @@
+#ifndef _ASM_X86_IOMAP_H
+#define _ASM_X86_IOMAP_H
+
 /*
  * Copyright Â© 2008 Ingo Molnar
  *
@@ -31,3 +34,5 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
 
 void
 iounmap_atomic(void *kvaddr, enum km_type type);
+
+#endif /* _ASM_X86_IOMAP_H */
index 0396760..f275e22 100644 (file)
@@ -1,6 +1,6 @@
 #ifndef _ASM_X86_IRQ_REMAPPING_H
 #define _ASM_X86_IRQ_REMAPPING_H
 
-#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
+#define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8)
 
 #endif /* _ASM_X86_IRQ_REMAPPING_H */
index 3cbd79b..910b5a3 100644 (file)
@@ -34,6 +34,7 @@
 
 #ifdef CONFIG_X86_32
 # define SYSCALL_VECTOR                        0x80
+# define IA32_SYSCALL_VECTOR           0x80
 #else
 # define IA32_SYSCALL_VECTOR           0x80
 #endif
index 54c8cc5..c2d1f3b 100644 (file)
@@ -12,4 +12,17 @@ extern int cache_k8_northbridges(void);
 extern void k8_flush_garts(void);
 extern int k8_scan_nodes(unsigned long start, unsigned long end);
 
+#ifdef CONFIG_K8_NB
+static inline struct pci_dev *node_to_k8_nb_misc(int node)
+{
+       return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL;
+}
+#else
+static inline struct pci_dev *node_to_k8_nb_misc(int node)
+{
+       return NULL;
+}
+#endif
+
+
 #endif /* _ASM_X86_K8_H */
index c882664..ef51b50 100644 (file)
@@ -9,20 +9,31 @@ struct cpu_signature {
 
 struct device;
 
+enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND };
+
 struct microcode_ops {
-       int  (*request_microcode_user) (int cpu, const void __user *buf, size_t size);
-       int  (*request_microcode_fw) (int cpu, struct device *device);
+       enum ucode_state (*request_microcode_user) (int cpu,
+                               const void __user *buf, size_t size);
 
-       void (*apply_microcode) (int cpu);
+       enum ucode_state (*request_microcode_fw) (int cpu,
+                               struct device *device);
 
-       int  (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
        void (*microcode_fini_cpu) (int cpu);
+
+       /*
+        * The generic 'microcode_core' part guarantees that
+        * the callbacks below run on a target cpu when they
+        * are being called.
+        * See also the "Synchronization" section in microcode_core.c.
+        */
+       int (*apply_microcode) (int cpu);
+       int (*collect_cpu_info) (int cpu, struct cpu_signature *csig);
 };
 
 struct ucode_cpu_info {
-       struct cpu_signature cpu_sig;
-       int valid;
-       void *mc;
+       struct cpu_signature    cpu_sig;
+       int                     valid;
+       void                    *mc;
 };
 extern struct ucode_cpu_info ucode_cpu_info[];
 
index 642fc7f..e2a1bb6 100644 (file)
@@ -61,9 +61,11 @@ extern void get_smp_config(void);
 #ifdef CONFIG_X86_MPPARSE
 extern void find_smp_config(void);
 extern void early_reserve_e820_mpc_new(void);
+extern int enable_update_mptable;
 #else
 static inline void find_smp_config(void) { }
 static inline void early_reserve_e820_mpc_new(void) { }
+#define enable_update_mptable 0
 #endif
 
 void __cpuinit generic_processor_info(int apicid, int version);
@@ -72,20 +74,13 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base);
 extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
                                   u32 gsi);
 extern void mp_config_acpi_legacy_irqs(void);
-extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low);
+struct device;
+extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level,
+                                int active_high_low);
 extern int acpi_probe_gsi(void);
 #ifdef CONFIG_X86_IO_APIC
-extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
-                               u32 gsi, int triggering, int polarity);
 extern int mp_find_ioapic(int gsi);
 extern int mp_find_ioapic_pin(int ioapic, int gsi);
-#else
-static inline int
-mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
-                  u32 gsi, int triggering, int polarity)
-{
-       return 0;
-}
 #endif
 #else /* !CONFIG_ACPI: */
 static inline int acpi_probe_gsi(void)
index ec41fc1..4d58d04 100644 (file)
 #define MSR_K8_TOP_MEM1                        0xc001001a
 #define MSR_K8_TOP_MEM2                        0xc001001d
 #define MSR_K8_SYSCFG                  0xc0010010
-#define MSR_K8_HWCR                    0xc0010015
 #define MSR_K8_INT_PENDING_MSG         0xc0010055
 /* C1E active bits in int pending message */
 #define K8_INTP_C1E_ACTIVE_MASK                0x18000000
index c45a0a5..c972644 100644 (file)
@@ -64,7 +64,7 @@ static inline int nmi_watchdog_active(void)
         * but since they are power of two we could use a
         * cheaper way --cvg
         */
-       return nmi_watchdog & 0x3;
+       return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC);
 }
 #endif
 
index 064ed6d..c4ae822 100644 (file)
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks,
 extern void numa_init_array(void);
 extern int numa_off;
 
-extern void srat_reserve_add_area(int nodeid);
-extern int hotadd_percent;
-
 extern s16 apicid_to_node[MAX_LOCAL_APIC];
 
 extern unsigned long numa_free_all_bootmem(void);
@@ -27,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start,
                               unsigned long end);
 
 #ifdef CONFIG_NUMA
+/*
+ * Too small node sizes may confuse the VM badly. Usually they
+ * result from BIOS bugs. So dont recognize nodes as standalone
+ * NUMA entities that have less than this amount of RAM listed:
+ */
+#define NODE_MIN_SIZE (4*1024*1024)
+
 extern void __init init_cpu_to_node(void);
 extern void __cpuinit numa_set_node(int cpu, int node);
 extern void __cpuinit numa_clear_node(int cpu);
index 0f915ae..6f1b733 100644 (file)
@@ -54,10 +54,6 @@ extern unsigned int __VMALLOC_RESERVE;
 extern int sysctl_legacy_va_layout;
 
 extern void find_low_pfn_range(void);
-extern unsigned long init_memory_mapping(unsigned long start,
-                                        unsigned long end);
-extern void initmem_init(unsigned long, unsigned long);
-extern void free_initmem(void);
 extern void setup_bootmem_allocator(void);
 
 #endif /* !__ASSEMBLY__ */
index d38c91b..8d382d3 100644 (file)
  */
 #define __PAGE_OFFSET           _AC(0xffff880000000000, UL)
 
-#define __PHYSICAL_START       CONFIG_PHYSICAL_START
-#define __KERNEL_ALIGN         0x200000
-
-/*
- * Make sure kernel is aligned to 2MB address. Catching it at compile
- * time is better. Change your config file and compile the kernel
- * for a 2MB aligned address (CONFIG_PHYSICAL_START)
- */
-#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
-#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
-#endif
+#define __PHYSICAL_START       ((CONFIG_PHYSICAL_START +               \
+                                 (CONFIG_PHYSICAL_ALIGN - 1)) &        \
+                                ~(CONFIG_PHYSICAL_ALIGN - 1))
 
 #define __START_KERNEL         (__START_KERNEL_map + __PHYSICAL_START)
 #define __START_KERNEL_map     _AC(0xffffffff80000000, UL)
 
-/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 #define __PHYSICAL_MASK_SHIFT  46
 #define __VIRTUAL_MASK_SHIFT   48
 
@@ -71,12 +63,6 @@ extern unsigned long __phys_addr(unsigned long);
 
 #define vmemmap ((struct page *)VMEMMAP_START)
 
-extern unsigned long init_memory_mapping(unsigned long start,
-                                        unsigned long end);
-
-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
-extern void free_initmem(void);
-
 extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
 extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
 
index 826ad37..6473f5c 100644 (file)
@@ -46,6 +46,12 @@ extern int devmem_is_allowed(unsigned long pagenr);
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
+extern unsigned long init_memory_mapping(unsigned long start,
+                                        unsigned long end);
+
+extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern void free_initmem(void);
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_X86_PAGE_DEFS_H */
index a53da00..4fb37c8 100644 (file)
@@ -56,6 +56,7 @@ struct desc_ptr;
 struct tss_struct;
 struct mm_struct;
 struct desc_struct;
+struct task_struct;
 
 /*
  * Wrapper type for pointers to code which uses the non-standard
@@ -203,7 +204,8 @@ struct pv_cpu_ops {
 
        void (*swapgs)(void);
 
-       struct pv_lazy_ops lazy_mode;
+       void (*start_context_switch)(struct task_struct *prev);
+       void (*end_context_switch)(struct task_struct *next);
 };
 
 struct pv_irq_ops {
@@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode {
 };
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
-void paravirt_enter_lazy_cpu(void);
-void paravirt_leave_lazy_cpu(void);
+void paravirt_start_context_switch(struct task_struct *prev);
+void paravirt_end_context_switch(struct task_struct *next);
+
 void paravirt_enter_lazy_mmu(void);
 void paravirt_leave_lazy_mmu(void);
-void paravirt_leave_lazy(enum paravirt_lazy_mode mode);
 
-#define  __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-static inline void arch_enter_lazy_cpu_mode(void)
+#define  __HAVE_ARCH_START_CONTEXT_SWITCH
+static inline void arch_start_context_switch(struct task_struct *prev)
 {
-       PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter);
+       PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev);
 }
 
-static inline void arch_leave_lazy_cpu_mode(void)
+static inline void arch_end_context_switch(struct task_struct *next)
 {
-       PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave);
+       PVOP_VCALL1(pv_cpu_ops.end_context_switch, next);
 }
 
-void arch_flush_lazy_cpu_mode(void);
-
 #define  __HAVE_ARCH_ENTER_LAZY_MMU_MODE
 static inline void arch_enter_lazy_mmu_mode(void)
 {
index 29d96d1..18ef7eb 100644 (file)
@@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
 #define pte_val(x)     native_pte_val(x)
 #define __pte(x)       native_make_pte(x)
 
+#define arch_end_context_switch(prev)  do {} while(0)
+
 #endif /* CONFIG_PARAVIRT */
 
 /*
@@ -503,6 +505,8 @@ static inline int pgd_none(pgd_t pgd)
 
 #ifndef __ASSEMBLY__
 
+extern int direct_gbpages;
+
 /* local pte updates need not use xchg for locking */
 static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
 {
index 6b87bc6..abde308 100644 (file)
@@ -25,10 +25,6 @@ extern pgd_t init_level4_pgt[];
 
 extern void paging_init(void);
 
-#endif /* !__ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
-
 #define pte_ERROR(e)                                   \
        printk("%s:%d: bad pte %p(%016lx).\n",          \
               __FILE__, __LINE__, &(e), pte_val(e))
@@ -135,8 +131,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 
 #define update_mmu_cache(vma, address, pte) do { } while (0)
 
-extern int direct_gbpages;
-
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
 #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
index fbf42b8..766ea16 100644 (file)
@@ -51,11 +51,11 @@ typedef struct { pteval_t pte; } pte_t;
 #define PGDIR_SIZE     (_AC(1, UL) << PGDIR_SHIFT)
 #define PGDIR_MASK     (~(PGDIR_SIZE - 1))
 
-
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
 #define MAXMEM          _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START    _AC(0xffffc20000000000, UL)
-#define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
-#define VMEMMAP_START   _AC(0xffffe20000000000, UL)
+#define VMALLOC_START    _AC(0xffffc90000000000, UL)
+#define VMALLOC_END      _AC(0xffffe8ffffffffff, UL)
+#define VMEMMAP_START   _AC(0xffffea0000000000, UL)
 #define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
 #define MODULES_END      _AC(0xffffffffff000000, UL)
 #define MODULES_LEN   (MODULES_END - MODULES_VADDR)
index b8238dc..4d258ad 100644 (file)
@@ -273,7 +273,6 @@ typedef struct page *pgtable_t;
 
 extern pteval_t __supported_pte_mask;
 extern int nx_enabled;
-extern void set_nx(void);
 
 #define pgprot_writecombine    pgprot_writecombine
 extern pgprot_t pgprot_writecombine(pgprot_t prot);
index c2cceae..c776826 100644 (file)
@@ -135,7 +135,8 @@ extern struct cpuinfo_x86   boot_cpu_data;
 extern struct cpuinfo_x86      new_cpu_data;
 
 extern struct tss_struct       doublefault_tss;
-extern __u32                   cleared_cpu_caps[NCAPINTS];
+extern __u32                   cpu_caps_cleared[NCAPINTS];
+extern __u32                   cpu_caps_set[NCAPINTS];
 
 #ifdef CONFIG_SMP
 DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
@@ -409,9 +410,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary);
 extern unsigned int xstate_size;
 extern void free_thread_xstate(struct task_struct *);
 extern struct kmem_cache *task_xstate_cachep;
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
 
 struct thread_struct {
        /* Cached TLS descriptors: */
@@ -427,8 +425,12 @@ struct thread_struct {
        unsigned short          fsindex;
        unsigned short          gsindex;
 #endif
+#ifdef CONFIG_X86_32
        unsigned long           ip;
+#endif
+#ifdef CONFIG_X86_64
        unsigned long           fs;
+#endif
        unsigned long           gs;
        /* Hardware debugging registers: */
        unsigned long           debugreg0;
@@ -460,14 +462,8 @@ struct thread_struct {
        unsigned                io_bitmap_max;
 /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
        unsigned long   debugctlmsr;
-#ifdef CONFIG_X86_DS
-/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+       /* Debug Store context; see asm/ds.h */
        struct ds_context       *ds_ctx;
-#endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
-       unsigned int    bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
 };
 
 static inline unsigned long native_get_debugreg(int regno)
@@ -795,6 +791,21 @@ static inline unsigned long get_debugctlmsr(void)
     return debugctlmsr;
 }
 
+static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+{
+       u64 debugctlmsr = 0;
+       u32 val1, val2;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+       if (boot_cpu_data.x86 < 6)
+               return 0;
+#endif
+       rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+       debugctlmsr = val1 | ((u64)val2 << 32);
+
+       return debugctlmsr;
+}
+
 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 {
 #ifndef CONFIG_X86_DEBUGCTLMSR
@@ -804,6 +815,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
        wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
+static inline void update_debugctlmsr_on_cpu(int cpu,
+                                            unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+       if (boot_cpu_data.x86 < 6)
+               return;
+#endif
+       wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+                    (u32)((u64)debugctlmsr),
+                    (u32)((u64)debugctlmsr >> 32));
+}
+
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
@@ -814,6 +837,7 @@ extern unsigned int         BIOS_revision;
 
 /* Boot loader type from the setup header: */
 extern int                     bootloader_type;
+extern int                     bootloader_version;
 
 extern char                    ignore_fpu_irq;
 
@@ -874,7 +898,6 @@ static inline void spin_lock_prefetch(const void *x)
        .vm86_info              = NULL,                                   \
        .sysenter_cs            = __KERNEL_CS,                            \
        .io_bitmap_ptr          = NULL,                                   \
-       .fs                     = __KERNEL_PERCPU,                        \
 }
 
 /*
index 624f133..0f0d908 100644 (file)
@@ -236,12 +236,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
 extern int do_set_thread_area(struct task_struct *p, int idx,
                              struct user_desc __user *info, int can_allocate);
 
-extern void x86_ptrace_untrace(struct task_struct *);
-extern void x86_ptrace_fork(struct task_struct *child,
-                           unsigned long clone_flags);
+#ifdef CONFIG_X86_PTRACE_BTS
+extern void ptrace_bts_untrace(struct task_struct *tsk);
 
-#define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk)
-#define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags)
+#define arch_ptrace_untrace(tsk)       ptrace_bts_untrace(tsk)
+#endif /* CONFIG_X86_PTRACE_BTS */
 
 #endif /* __KERNEL__ */
 
index a4737dd..64cf2d2 100644 (file)
 #endif
 
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT
+/* Paravirtualized systems may not have PSE or PGE available */
 #define NEED_PSE       0
-#define NEED_MSR       (1<<(X86_FEATURE_MSR & 31))
 #define NEED_PGE       0
+#else
+#define NEED_PSE       (1<<(X86_FEATURE_PSE) & 31)
+#define NEED_PGE       (1<<(X86_FEATURE_PGE) & 31)
+#endif
+#define NEED_MSR       (1<<(X86_FEATURE_MSR & 31))
 #define NEED_FXSR      (1<<(X86_FEATURE_FXSR & 31))
 #define NEED_XMM       (1<<(X86_FEATURE_XMM & 31))
 #define NEED_XMM2      (1<<(X86_FEATURE_XMM2 & 31))
index bdc2ada..4093d1e 100644 (file)
@@ -33,7 +33,6 @@ struct x86_quirks {
        int (*setup_ioapic_ids)(void);
 };
 
-extern void x86_quirk_pre_intr_init(void);
 extern void x86_quirk_intr_init(void);
 
 extern void x86_quirk_trap_init(void);
index 19e0d88..6a84ed1 100644 (file)
@@ -180,7 +180,7 @@ extern int safe_smp_processor_id(void);
 static inline int logical_smp_processor_id(void)
 {
        /* we don't want to mark this access volatile - bad code generation */
-       return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+       return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
 }
 
 #endif
index e3cc3c0..4517d6b 100644 (file)
@@ -27,7 +27,7 @@
 #else /* CONFIG_X86_32 */
 # define SECTION_SIZE_BITS     27 /* matt - 128 is convenient right now */
 # define MAX_PHYSADDR_BITS     44
-# define MAX_PHYSMEM_BITS      44 /* Can be max 45 bits */
+# define MAX_PHYSMEM_BITS      46
 #endif
 
 #endif /* CONFIG_SPARSEMEM */
index 7043408..372b76e 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * syscalls.h - Linux syscall interfaces (arch-specific)
  *
- * Copyright (c) 2008 Jaswinder Singh
+ * Copyright (c) 2008 Jaswinder Singh Rajput
  *
  * This file is released under the GPLv2.
  * See the file COPYING for more details.
 
 #include <linux/compiler.h>
 #include <linux/linkage.h>
-#include <linux/types.h>
 #include <linux/signal.h>
+#include <linux/types.h>
 
 /* Common in X86_32 and X86_64 */
 /* kernel/ioport.c */
 asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
 
+/* kernel/process.c */
+int sys_fork(struct pt_regs *);
+int sys_vfork(struct pt_regs *);
+
 /* kernel/ldt.c */
 asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
 
+/* kernel/signal.c */
+long sys_rt_sigreturn(struct pt_regs *);
+
 /* kernel/tls.c */
 asmlinkage int sys_set_thread_area(struct user_desc __user *);
 asmlinkage int sys_get_thread_area(struct user_desc __user *);
 
 /* X86_32 only */
 #ifdef CONFIG_X86_32
+/* kernel/ioport.c */
+long sys_iopl(struct pt_regs *);
+
 /* kernel/process_32.c */
-int sys_fork(struct pt_regs *);
 int sys_clone(struct pt_regs *);
-int sys_vfork(struct pt_regs *);
 int sys_execve(struct pt_regs *);
 
-/* kernel/signal_32.c */
+/* kernel/signal.c */
 asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
 asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
                             struct old_sigaction __user *);
 int sys_sigaltstack(struct pt_regs *);
 unsigned long sys_sigreturn(struct pt_regs *);
-long sys_rt_sigreturn(struct pt_regs *);
-
-/* kernel/ioport.c */
-long sys_iopl(struct pt_regs *);
 
 /* kernel/sys_i386_32.c */
+struct mmap_arg_struct;
+struct sel_arg_struct;
+struct oldold_utsname;
+struct old_utsname;
+
 asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
                          unsigned long, unsigned long, unsigned long);
-struct mmap_arg_struct;
 asmlinkage int old_mmap(struct mmap_arg_struct __user *);
-struct sel_arg_struct;
 asmlinkage int old_select(struct sel_arg_struct __user *);
 asmlinkage int sys_ipc(uint, int, int, int, void __user *, long);
-struct old_utsname;
 asmlinkage int sys_uname(struct old_utsname __user *);
-struct oldold_utsname;
 asmlinkage int sys_olduname(struct oldold_utsname __user *);
 
 /* kernel/vm86_32.c */
@@ -65,29 +70,27 @@ int sys_vm86(struct pt_regs *);
 #else /* CONFIG_X86_32 */
 
 /* X86_64 only */
+/* kernel/ioport.c */
+asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
+
 /* kernel/process_64.c */
-asmlinkage long sys_fork(struct pt_regs *);
 asmlinkage long sys_clone(unsigned long, unsigned long,
                          void __user *, void __user *,
                          struct pt_regs *);
-asmlinkage long sys_vfork(struct pt_regs *);
 asmlinkage long sys_execve(char __user *, char __user * __user *,
                           char __user * __user *,
                           struct pt_regs *);
 long sys_arch_prctl(int, unsigned long);
 
-/* kernel/ioport.c */
-asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
-
-/* kernel/signal_64.c */
+/* kernel/signal.c */
 asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
                                struct pt_regs *);
-long sys_rt_sigreturn(struct pt_regs *);
 
 /* kernel/sys_x86_64.c */
+struct new_utsname;
+
 asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
                         unsigned long, unsigned long, unsigned long);
-struct new_utsname;
 asmlinkage long sys_uname(struct new_utsname __user *);
 
 #endif /* CONFIG_X86_32 */
index f729563..c4ee805 100644 (file)
@@ -67,6 +67,7 @@ static inline int user_termio_to_kernel_termios(struct ktermios *termios,
        SET_LOW_TERMIOS_BITS(termios, termio, c_oflag);
        SET_LOW_TERMIOS_BITS(termios, termio, c_cflag);
        SET_LOW_TERMIOS_BITS(termios, termio, c_lflag);
+       get_user(termios->c_line, &termio->c_line);
        return copy_from_user(termios->c_cc, termio->c_cc, NCC);
 }
 
index 8820a73..602c769 100644 (file)
@@ -94,7 +94,8 @@ struct thread_info {
 #define TIF_FORCED_TF          24      /* true if TF in eflags artificially */
 #define TIF_DEBUGCTLMSR                25      /* uses thread_struct.debugctlmsr */
 #define TIF_DS_AREA_MSR                26      /* uses thread_struct.ds_area_msr */
-#define TIF_SYSCALL_FTRACE     27      /* for ftrace syscall instrumentation */
+#define TIF_LAZY_MMU_UPDATES   27      /* task is updating the mmu lazily */
+#define TIF_SYSCALL_FTRACE     28      /* for ftrace syscall instrumentation */
 
 #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
@@ -116,6 +117,7 @@ struct thread_info {
 #define _TIF_FORCED_TF         (1 << TIF_FORCED_TF)
 #define _TIF_DEBUGCTLMSR       (1 << TIF_DEBUGCTLMSR)
 #define _TIF_DS_AREA_MSR       (1 << TIF_DS_AREA_MSR)
+#define _TIF_LAZY_MMU_UPDATES  (1 << TIF_LAZY_MMU_UPDATES)
 #define _TIF_SYSCALL_FTRACE    (1 << TIF_SYSCALL_FTRACE)
 
 /* work to do in syscall_trace_enter() */
index 16a5c84..a5ecc9c 100644 (file)
@@ -17,7 +17,7 @@
 
 static inline void __native_flush_tlb(void)
 {
-       write_cr3(read_cr3());
+       native_write_cr3(native_read_cr3());
 }
 
 static inline void __native_flush_tlb_global(void)
@@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void)
         */
        raw_local_irq_save(flags);
 
-       cr4 = read_cr4();
+       cr4 = native_read_cr4();
        /* clear PGE */
-       write_cr4(cr4 & ~X86_CR4_PGE);
+       native_write_cr4(cr4 & ~X86_CR4_PGE);
        /* write old PGE again and flush TLBs */
-       write_cr4(cr4);
+       native_write_cr4(cr4);
 
        raw_local_irq_restore(flags);
 }
index f44b49a..066ef59 100644 (file)
@@ -203,7 +203,8 @@ struct pci_bus;
 void x86_pci_root_bus_res_quirks(struct pci_bus *b);
 
 #ifdef CONFIG_SMP
-#define mc_capable()   (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)
+#define mc_capable()   ((boot_cpu_data.x86_max_cores > 1) && \
+                       (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids))
 #define smt_capable()                  (smp_num_siblings > 1)
 #endif
 
index 0d53425..bfd74c0 100644 (file)
@@ -2,6 +2,7 @@
 #define _ASM_X86_TRAPS_H
 
 #include <asm/debugreg.h>
+#include <asm/siginfo.h>                       /* TRAP_TRACE, ... */
 
 #ifdef CONFIG_X86_32
 #define dotraplinkage
@@ -13,6 +14,9 @@ asmlinkage void divide_error(void);
 asmlinkage void debug(void);
 asmlinkage void nmi(void);
 asmlinkage void int3(void);
+asmlinkage void xen_debug(void);
+asmlinkage void xen_int3(void);
+asmlinkage void xen_stack_segment(void);
 asmlinkage void overflow(void);
 asmlinkage void bounds(void);
 asmlinkage void invalid_op(void);
@@ -74,7 +78,6 @@ static inline int get_si_code(unsigned long condition)
 }
 
 extern int panic_on_unrecovered_nmi;
-extern int kstack_depth_to_print;
 
 void math_error(void __user *);
 void math_emulate(struct math_emu_info *);
index 6e72d74..708dae6 100644 (file)
 #define __NR_inotify_init1     332
 #define __NR_preadv            333
 #define __NR_pwritev           334
+#define __NR_rt_tgsigqueueinfo 335
 
 #ifdef __KERNEL__
 
index f818294..4e2b054 100644 (file)
@@ -657,6 +657,8 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
 __SYSCALL(__NR_preadv, sys_preadv)
 #define __NR_pwritev                           296
 __SYSCALL(__NR_pwritev, sys_pwritev)
+#define __NR_rt_tgsigqueueinfo                 297
+__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
 
 
 #ifndef __NO_STUBS
index 9b0e61b..bddd44f 100644 (file)
@@ -37,7 +37,7 @@
 #define UV_CPUS_PER_ACT_STATUS         32
 #define UV_ACT_STATUS_MASK             0x3
 #define UV_ACT_STATUS_SIZE             2
-#define UV_ACTIVATION_DESCRIPTOR_SIZE  32
+#define UV_ADP_SIZE                    32
 #define UV_DISTRIBUTION_SIZE           256
 #define UV_SW_ACK_NPENDING             8
 #define UV_NET_ENDPOINT_INTD           0x38
index d3a98ea..341070f 100644 (file)
@@ -133,6 +133,7 @@ struct uv_scir_s {
 struct uv_hub_info_s {
        unsigned long           global_mmr_base;
        unsigned long           gpa_mask;
+       unsigned int            gnode_extra;
        unsigned long           gnode_upper;
        unsigned long           lowmem_remap_top;
        unsigned long           lowmem_remap_base;
@@ -159,7 +160,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
  *             p -  PNODE (local part of nsids, right shifted 1)
  */
 #define UV_NASID_TO_PNODE(n)           (((n) >> 1) & uv_hub_info->pnode_mask)
-#define UV_PNODE_TO_NASID(p)           (((p) << 1) | uv_hub_info->gnode_upper)
+#define UV_PNODE_TO_GNODE(p)           ((p) |uv_hub_info->gnode_extra)
+#define UV_PNODE_TO_NASID(p)           (UV_PNODE_TO_GNODE(p) << 1)
 
 #define UV_LOCAL_MMR_BASE              0xf4000000UL
 #define UV_GLOBAL_MMR32_BASE           0xf8000000UL
@@ -173,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
 #define UV_GLOBAL_MMR32_PNODE_BITS(p)  ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT))
 
 #define UV_GLOBAL_MMR64_PNODE_BITS(p)                                  \
-       ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT)
+       ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT)
 
 #define UV_APIC_PNODE_SHIFT    6
 
index 88d1bfc..4f78bd6 100644 (file)
@@ -28,7 +28,7 @@ CFLAGS_paravirt.o     := $(nostackp)
 obj-y                  := process_$(BITS).o signal.o entry_$(BITS).o
 obj-y                  += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
 obj-y                  += time_$(BITS).o ioport.o ldt.o dumpstack.o
-obj-y                  += setup.o i8259.o irqinit_$(BITS).o
+obj-y                  += setup.o i8259.o irqinit.o
 obj-$(CONFIG_X86_VISWS)        += visws_quirks.o
 obj-$(CONFIG_X86_32)   += probe_roms_32.o
 obj-$(CONFIG_X86_32)   += sys_i386_32.o i386_ksyms_32.o
@@ -44,6 +44,7 @@ obj-y                         += process.o
 obj-y                          += i387.o xsave.o
 obj-y                          += ptrace.o
 obj-$(CONFIG_X86_DS)           += ds.o
+obj-$(CONFIG_X86_DS_SELFTEST)          += ds_selftest.o
 obj-$(CONFIG_X86_32)           += tls.o
 obj-$(CONFIG_IA32_EMULATION)   += tls.o
 obj-y                          += step.o
index 723989d..6310861 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/irq.h>
 #include <linux/bootmem.h>
 #include <linux/ioport.h>
+#include <linux/pci.h>
 
 #include <asm/pgtable.h>
 #include <asm/io_apic.h>
@@ -522,7 +523,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
  * success: return IRQ number (>=0)
  * failure: return < 0
  */
-int acpi_register_gsi(u32 gsi, int triggering, int polarity)
+int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
 {
        unsigned int irq;
        unsigned int plat_gsi = gsi;
@@ -532,14 +533,14 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity)
         * Make sure all (legacy) PCI IRQs are set as level-triggered.
         */
        if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
-               if (triggering == ACPI_LEVEL_SENSITIVE)
+               if (trigger == ACPI_LEVEL_SENSITIVE)
                        eisa_set_level_irq(gsi);
        }
 #endif
 
 #ifdef CONFIG_X86_IO_APIC
        if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) {
-               plat_gsi = mp_register_gsi(gsi, triggering, polarity);
+               plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity);
        }
 #endif
        acpi_gsi_to_irq(plat_gsi, &irq);
@@ -903,10 +904,8 @@ extern int es7000_plat;
 #endif
 
 static struct {
-       int apic_id;
        int gsi_base;
        int gsi_end;
-       DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
 } mp_ioapic_routing[MAX_IO_APICS];
 
 int mp_find_ioapic(int gsi)
@@ -986,16 +985,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
 
        set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
        mp_ioapics[idx].apicid = uniq_ioapic_id(id);
-#ifdef CONFIG_X86_32
        mp_ioapics[idx].apicver = io_apic_get_version(idx);
-#else
-       mp_ioapics[idx].apicver = 0;
-#endif
+
        /*
         * Build basic GSI lookup table to facilitate gsi->io_apic lookups
         * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
         */
-       mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
        mp_ioapic_routing[idx].gsi_base = gsi_base;
        mp_ioapic_routing[idx].gsi_end = gsi_base +
            io_apic_get_redir_entries(idx);
@@ -1158,26 +1153,52 @@ void __init mp_config_acpi_legacy_irqs(void)
        }
 }
 
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
+static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
+                       int polarity)
 {
+#ifdef CONFIG_X86_MPPARSE
+       struct mpc_intsrc mp_irq;
+       struct pci_dev *pdev;
+       unsigned char number;
+       unsigned int devfn;
        int ioapic;
-       int ioapic_pin;
-#ifdef CONFIG_X86_32
-#define MAX_GSI_NUM    4096
-#define IRQ_COMPRESSION_START  64
+       u8 pin;
 
-       static int pci_irq = IRQ_COMPRESSION_START;
-       /*
-        * Mapping between Global System Interrupts, which
-        * represent all possible interrupts, and IRQs
-        * assigned to actual devices.
-        */
-       static int gsi_to_irq[MAX_GSI_NUM];
-#else
+       if (!acpi_ioapic)
+               return 0;
+       if (!dev)
+               return 0;
+       if (dev->bus != &pci_bus_type)
+               return 0;
+
+       pdev = to_pci_dev(dev);
+       number = pdev->bus->number;
+       devfn = pdev->devfn;
+       pin = pdev->pin;
+       /* print the entry should happen on mptable identically */
+       mp_irq.type = MP_INTSRC;
+       mp_irq.irqtype = mp_INT;
+       mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+                               (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+       mp_irq.srcbus = number;
+       mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+       ioapic = mp_find_ioapic(gsi);
+       mp_irq.dstapic = mp_ioapics[ioapic].apicid;
+       mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
+
+       save_mp_irq(&mp_irq);
+#endif
+       return 0;
+}
+
+int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity)
+{
+       int ioapic;
+       int ioapic_pin;
+       struct io_apic_irq_attr irq_attr;
 
        if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
                return gsi;
-#endif
 
        /* Don't set up the ACPI SCI because it's already set up */
        if (acpi_gbl_FADT.sci_interrupt == gsi)
@@ -1196,93 +1217,22 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
                gsi = ioapic_renumber_irq(ioapic, gsi);
 #endif
 
-       /*
-        * Avoid pin reprogramming.  PRTs typically include entries
-        * with redundant pin->gsi mappings (but unique PCI devices);
-        * we only program the IOAPIC on the first.
-        */
        if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
                printk(KERN_ERR "Invalid reference to IOAPIC pin "
-                      "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
+                      "%d-%d\n", mp_ioapics[ioapic].apicid,
                       ioapic_pin);
                return gsi;
        }
-       if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
-               pr_debug("Pin %d-%d already programmed\n",
-                        mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
-#ifdef CONFIG_X86_32
-               return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
-#else
-               return gsi;
-#endif
-       }
-
-       set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
-#ifdef CONFIG_X86_32
-       /*
-        * For GSI >= 64, use IRQ compression
-        */
-       if ((gsi >= IRQ_COMPRESSION_START)
-           && (triggering == ACPI_LEVEL_SENSITIVE)) {
-               /*
-                * For PCI devices assign IRQs in order, avoiding gaps
-                * due to unused I/O APIC pins.
-                */
-               int irq = gsi;
-               if (gsi < MAX_GSI_NUM) {
-                       /*
-                        * Retain the VIA chipset work-around (gsi > 15), but
-                        * avoid a problem where the 8254 timer (IRQ0) is setup
-                        * via an override (so it's not on pin 0 of the ioapic),
-                        * and at the same time, the pin 0 interrupt is a PCI
-                        * type.  The gsi > 15 test could cause these two pins
-                        * to be shared as IRQ0, and they are not shareable.
-                        * So test for this condition, and if necessary, avoid
-                        * the pin collision.
-                        */
-                       gsi = pci_irq++;
-                       /*
-                        * Don't assign IRQ used by ACPI SCI
-                        */
-                       if (gsi == acpi_gbl_FADT.sci_interrupt)
-                               gsi = pci_irq++;
-                       gsi_to_irq[irq] = gsi;
-               } else {
-                       printk(KERN_ERR "GSI %u is too high\n", gsi);
-                       return gsi;
-               }
-       }
-#endif
-       io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
-                               triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
-                               polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
-       return gsi;
-}
 
-int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
-                       u32 gsi, int triggering, int polarity)
-{
-#ifdef CONFIG_X86_MPPARSE
-       struct mpc_intsrc mp_irq;
-       int ioapic;
+       if (enable_update_mptable)
+               mp_config_acpi_gsi(dev, gsi, trigger, polarity);
 
-       if (!acpi_ioapic)
-               return 0;
+       set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin,
+                            trigger == ACPI_EDGE_SENSITIVE ? 0 : 1,
+                            polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
+       io_apic_set_pci_routing(dev, gsi, &irq_attr);
 
-       /* print the entry should happen on mptable identically */
-       mp_irq.type = MP_INTSRC;
-       mp_irq.irqtype = mp_INT;
-       mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
-                               (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
-       mp_irq.srcbus = number;
-       mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
-       ioapic = mp_find_ioapic(gsi);
-       mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
-       mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
-
-       save_mp_irq(&mp_irq);
-#endif
-       return 0;
+       return gsi;
 }
 
 /*
index 1c31cc0..167bc16 100644 (file)
@@ -9,7 +9,7 @@
 always         := wakeup.bin
 targets                := wakeup.elf wakeup.lds
 
-wakeup-y       += wakeup.o wakemain.o video-mode.o copy.o
+wakeup-y       += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o
 
 # The link order of the video-*.o modules can matter.  In particular,
 # video-vga.o *must* be listed first, followed by video-vesa.o.
diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S
new file mode 100644 (file)
index 0000000..f51eb0b
--- /dev/null
@@ -0,0 +1 @@
+#include "../../../boot/bioscall.S"
diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c
new file mode 100644 (file)
index 0000000..6206033
--- /dev/null
@@ -0,0 +1 @@
+#include "../../../boot/regs.c"
index a97db99..1c60554 100644 (file)
@@ -55,7 +55,16 @@ struct iommu_cmd {
 static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
                             struct unity_map_entry *e);
 static struct dma_ops_domain *find_protection_domain(u16 devid);
+static u64* alloc_pte(struct protection_domain *dom,
+                     unsigned long address, u64
+                     **pte_page, gfp_t gfp);
+static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
+                                     unsigned long start_page,
+                                     unsigned int pages);
 
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
 
 #ifdef CONFIG_AMD_IOMMU_STATS
 
@@ -213,7 +222,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data)
 {
        struct amd_iommu *iommu;
 
-       list_for_each_entry(iommu, &amd_iommu_list, list)
+       for_each_iommu(iommu)
                iommu_poll_events(iommu);
 
        return IRQ_HANDLED;
@@ -440,7 +449,7 @@ static void iommu_flush_domain(u16 domid)
        __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
                                      domid, 1, 1);
 
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
                spin_lock_irqsave(&iommu->lock, flags);
                __iommu_queue_command(iommu, &cmd);
                __iommu_completion_wait(iommu);
@@ -449,6 +458,35 @@ static void iommu_flush_domain(u16 domid)
        }
 }
 
+void amd_iommu_flush_all_domains(void)
+{
+       int i;
+
+       for (i = 1; i < MAX_DOMAIN_ID; ++i) {
+               if (!test_bit(i, amd_iommu_pd_alloc_bitmap))
+                       continue;
+               iommu_flush_domain(i);
+       }
+}
+
+void amd_iommu_flush_all_devices(void)
+{
+       struct amd_iommu *iommu;
+       int i;
+
+       for (i = 0; i <= amd_iommu_last_bdf; ++i) {
+               if (amd_iommu_pd_table[i] == NULL)
+                       continue;
+
+               iommu = amd_iommu_rlookup_table[i];
+               if (!iommu)
+                       continue;
+
+               iommu_queue_inv_dev_entry(iommu, i);
+               iommu_completion_wait(iommu);
+       }
+}
+
 /****************************************************************************
  *
  * The functions below are used the create the page table mappings for
@@ -468,7 +506,7 @@ static int iommu_map_page(struct protection_domain *dom,
                          unsigned long phys_addr,
                          int prot)
 {
-       u64 __pte, *pte, *page;
+       u64 __pte, *pte;
 
        bus_addr  = PAGE_ALIGN(bus_addr);
        phys_addr = PAGE_ALIGN(phys_addr);
@@ -477,27 +515,7 @@ static int iommu_map_page(struct protection_domain *dom,
        if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
                return -EINVAL;
 
-       pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
-
-       if (!IOMMU_PTE_PRESENT(*pte)) {
-               page = (u64 *)get_zeroed_page(GFP_KERNEL);
-               if (!page)
-                       return -ENOMEM;
-               *pte = IOMMU_L2_PDE(virt_to_phys(page));
-       }
-
-       pte = IOMMU_PTE_PAGE(*pte);
-       pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
-
-       if (!IOMMU_PTE_PRESENT(*pte)) {
-               page = (u64 *)get_zeroed_page(GFP_KERNEL);
-               if (!page)
-                       return -ENOMEM;
-               *pte = IOMMU_L1_PDE(virt_to_phys(page));
-       }
-
-       pte = IOMMU_PTE_PAGE(*pte);
-       pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)];
+       pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL);
 
        if (IOMMU_PTE_PRESENT(*pte))
                return -EBUSY;
@@ -595,7 +613,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
                 * as allocated in the aperture
                 */
                if (addr < dma_dom->aperture_size)
-                       __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap);
+                       __set_bit(addr >> PAGE_SHIFT,
+                                 dma_dom->aperture[0]->bitmap);
        }
 
        return 0;
@@ -632,42 +651,191 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom,
  ****************************************************************************/
 
 /*
- * The address allocator core function.
+ * The address allocator core functions.
  *
  * called with domain->lock held
  */
+
+/*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64* fetch_pte(struct protection_domain *domain,
+                     unsigned long address)
+{
+       u64 *pte;
+
+       pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte))
+               return NULL;
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte))
+               return NULL;
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+       return pte;
+}
+
+/*
+ * This function is used to add a new aperture range to an existing
+ * aperture in case of dma_ops domain allocation or address allocation
+ * failure.
+ */
+static int alloc_new_range(struct amd_iommu *iommu,
+                          struct dma_ops_domain *dma_dom,
+                          bool populate, gfp_t gfp)
+{
+       int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
+       int i;
+
+#ifdef CONFIG_IOMMU_STRESS
+       populate = false;
+#endif
+
+       if (index >= APERTURE_MAX_RANGES)
+               return -ENOMEM;
+
+       dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
+       if (!dma_dom->aperture[index])
+               return -ENOMEM;
+
+       dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
+       if (!dma_dom->aperture[index]->bitmap)
+               goto out_free;
+
+       dma_dom->aperture[index]->offset = dma_dom->aperture_size;
+
+       if (populate) {
+               unsigned long address = dma_dom->aperture_size;
+               int i, num_ptes = APERTURE_RANGE_PAGES / 512;
+               u64 *pte, *pte_page;
+
+               for (i = 0; i < num_ptes; ++i) {
+                       pte = alloc_pte(&dma_dom->domain, address,
+                                       &pte_page, gfp);
+                       if (!pte)
+                               goto out_free;
+
+                       dma_dom->aperture[index]->pte_pages[i] = pte_page;
+
+                       address += APERTURE_RANGE_SIZE / 64;
+               }
+       }
+
+       dma_dom->aperture_size += APERTURE_RANGE_SIZE;
+
+       /* Intialize the exclusion range if necessary */
+       if (iommu->exclusion_start &&
+           iommu->exclusion_start >= dma_dom->aperture[index]->offset &&
+           iommu->exclusion_start < dma_dom->aperture_size) {
+               unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
+               int pages = iommu_num_pages(iommu->exclusion_start,
+                                           iommu->exclusion_length,
+                                           PAGE_SIZE);
+               dma_ops_reserve_addresses(dma_dom, startpage, pages);
+       }
+
+       /*
+        * Check for areas already mapped as present in the new aperture
+        * range and mark those pages as reserved in the allocator. Such
+        * mappings may already exist as a result of requested unity
+        * mappings for devices.
+        */
+       for (i = dma_dom->aperture[index]->offset;
+            i < dma_dom->aperture_size;
+            i += PAGE_SIZE) {
+               u64 *pte = fetch_pte(&dma_dom->domain, i);
+               if (!pte || !IOMMU_PTE_PRESENT(*pte))
+                       continue;
+
+               dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1);
+       }
+
+       return 0;
+
+out_free:
+       free_page((unsigned long)dma_dom->aperture[index]->bitmap);
+
+       kfree(dma_dom->aperture[index]);
+       dma_dom->aperture[index] = NULL;
+
+       return -ENOMEM;
+}
+
+static unsigned long dma_ops_area_alloc(struct device *dev,
+                                       struct dma_ops_domain *dom,
+                                       unsigned int pages,
+                                       unsigned long align_mask,
+                                       u64 dma_mask,
+                                       unsigned long start)
+{
+       unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
+       int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
+       int i = start >> APERTURE_RANGE_SHIFT;
+       unsigned long boundary_size;
+       unsigned long address = -1;
+       unsigned long limit;
+
+       next_bit >>= PAGE_SHIFT;
+
+       boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
+                       PAGE_SIZE) >> PAGE_SHIFT;
+
+       for (;i < max_index; ++i) {
+               unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
+
+               if (dom->aperture[i]->offset >= dma_mask)
+                       break;
+
+               limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
+                                              dma_mask >> PAGE_SHIFT);
+
+               address = iommu_area_alloc(dom->aperture[i]->bitmap,
+                                          limit, next_bit, pages, 0,
+                                           boundary_size, align_mask);
+               if (address != -1) {
+                       address = dom->aperture[i]->offset +
+                                 (address << PAGE_SHIFT);
+                       dom->next_address = address + (pages << PAGE_SHIFT);
+                       break;
+               }
+
+               next_bit = 0;
+       }
+
+       return address;
+}
+
 static unsigned long dma_ops_alloc_addresses(struct device *dev,
                                             struct dma_ops_domain *dom,
                                             unsigned int pages,
                                             unsigned long align_mask,
                                             u64 dma_mask)
 {
-       unsigned long limit;
        unsigned long address;
-       unsigned long boundary_size;
 
-       boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-                       PAGE_SIZE) >> PAGE_SHIFT;
-       limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0,
-                                      dma_mask >> PAGE_SHIFT);
+#ifdef CONFIG_IOMMU_STRESS
+       dom->next_address = 0;
+       dom->need_flush = true;
+#endif
 
-       if (dom->next_bit >= limit) {
-               dom->next_bit = 0;
-               dom->need_flush = true;
-       }
+       address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+                                    dma_mask, dom->next_address);
 
-       address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages,
-                                  0 , boundary_size, align_mask);
        if (address == -1) {
-               address = iommu_area_alloc(dom->bitmap, limit, 0, pages,
-                               0, boundary_size, align_mask);
+               dom->next_address = 0;
+               address = dma_ops_area_alloc(dev, dom, pages, align_mask,
+                                            dma_mask, 0);
                dom->need_flush = true;
        }
 
-       if (likely(address != -1)) {
-               dom->next_bit = address + pages;
-               address <<= PAGE_SHIFT;
-       } else
+       if (unlikely(address == -1))
                address = bad_dma_address;
 
        WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size);
@@ -684,11 +852,23 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
                                   unsigned long address,
                                   unsigned int pages)
 {
-       address >>= PAGE_SHIFT;
-       iommu_area_free(dom->bitmap, address, pages);
+       unsigned i = address >> APERTURE_RANGE_SHIFT;
+       struct aperture_range *range = dom->aperture[i];
 
-       if (address >= dom->next_bit)
+       BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
+
+#ifdef CONFIG_IOMMU_STRESS
+       if (i < 4)
+               return;
+#endif
+
+       if (address >= dom->next_address)
                dom->need_flush = true;
+
+       address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
+
+       iommu_area_free(range->bitmap, address, pages);
+
 }
 
 /****************************************************************************
@@ -736,12 +916,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
                                      unsigned long start_page,
                                      unsigned int pages)
 {
-       unsigned int last_page = dom->aperture_size >> PAGE_SHIFT;
+       unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT;
 
        if (start_page + pages > last_page)
                pages = last_page - start_page;
 
-       iommu_area_reserve(dom->bitmap, start_page, pages);
+       for (i = start_page; i < start_page + pages; ++i) {
+               int index = i / APERTURE_RANGE_PAGES;
+               int page  = i % APERTURE_RANGE_PAGES;
+               __set_bit(page, dom->aperture[index]->bitmap);
+       }
 }
 
 static void free_pagetable(struct protection_domain *domain)
@@ -780,14 +964,19 @@ static void free_pagetable(struct protection_domain *domain)
  */
 static void dma_ops_domain_free(struct dma_ops_domain *dom)
 {
+       int i;
+
        if (!dom)
                return;
 
        free_pagetable(&dom->domain);
 
-       kfree(dom->pte_pages);
-
-       kfree(dom->bitmap);
+       for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
+               if (!dom->aperture[i])
+                       continue;
+               free_page((unsigned long)dom->aperture[i]->bitmap);
+               kfree(dom->aperture[i]);
+       }
 
        kfree(dom);
 }
@@ -797,19 +986,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
  * It also intializes the page table and the address allocator data
  * structures required for the dma_ops interface
  */
-static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
-                                                  unsigned order)
+static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu)
 {
        struct dma_ops_domain *dma_dom;
-       unsigned i, num_pte_pages;
-       u64 *l2_pde;
-       u64 address;
-
-       /*
-        * Currently the DMA aperture must be between 32 MB and 1GB in size
-        */
-       if ((order < 25) || (order > 30))
-               return NULL;
 
        dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
        if (!dma_dom)
@@ -826,55 +1005,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
        dma_dom->domain.priv = dma_dom;
        if (!dma_dom->domain.pt_root)
                goto free_dma_dom;
-       dma_dom->aperture_size = (1ULL << order);
-       dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8),
-                                 GFP_KERNEL);
-       if (!dma_dom->bitmap)
-               goto free_dma_dom;
-       /*
-        * mark the first page as allocated so we never return 0 as
-        * a valid dma-address. So we can use 0 as error value
-        */
-       dma_dom->bitmap[0] = 1;
-       dma_dom->next_bit = 0;
 
        dma_dom->need_flush = false;
        dma_dom->target_dev = 0xffff;
 
-       /* Intialize the exclusion range if necessary */
-       if (iommu->exclusion_start &&
-           iommu->exclusion_start < dma_dom->aperture_size) {
-               unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
-               int pages = iommu_num_pages(iommu->exclusion_start,
-                                           iommu->exclusion_length,
-                                           PAGE_SIZE);
-               dma_ops_reserve_addresses(dma_dom, startpage, pages);
-       }
+       if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL))
+               goto free_dma_dom;
 
        /*
-        * At the last step, build the page tables so we don't need to
-        * allocate page table pages in the dma_ops mapping/unmapping
-        * path.
+        * mark the first page as allocated so we never return 0 as
+        * a valid dma-address. So we can use 0 as error value
         */
-       num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512);
-       dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *),
-                       GFP_KERNEL);
-       if (!dma_dom->pte_pages)
-               goto free_dma_dom;
-
-       l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL);
-       if (l2_pde == NULL)
-               goto free_dma_dom;
+       dma_dom->aperture[0]->bitmap[0] = 1;
+       dma_dom->next_address = 0;
 
-       dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde));
-
-       for (i = 0; i < num_pte_pages; ++i) {
-               dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL);
-               if (!dma_dom->pte_pages[i])
-                       goto free_dma_dom;
-               address = virt_to_phys(dma_dom->pte_pages[i]);
-               l2_pde[i] = IOMMU_L1_PDE(address);
-       }
 
        return dma_dom;
 
@@ -983,7 +1127,6 @@ static int device_change_notifier(struct notifier_block *nb,
        struct protection_domain *domain;
        struct dma_ops_domain *dma_domain;
        struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
        unsigned long flags;
 
        if (devid > amd_iommu_last_bdf)
@@ -1002,17 +1145,7 @@ static int device_change_notifier(struct notifier_block *nb,
                          "to a non-dma-ops domain\n", dev_name(dev));
 
        switch (action) {
-       case BUS_NOTIFY_BOUND_DRIVER:
-               if (domain)
-                       goto out;
-               dma_domain = find_protection_domain(devid);
-               if (!dma_domain)
-                       dma_domain = iommu->default_dom;
-               attach_device(iommu, &dma_domain->domain, devid);
-               printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-                      "device %s\n", dma_domain->domain.id, dev_name(dev));
-               break;
-       case BUS_NOTIFY_UNBIND_DRIVER:
+       case BUS_NOTIFY_UNBOUND_DRIVER:
                if (!domain)
                        goto out;
                detach_device(domain, devid);
@@ -1022,7 +1155,7 @@ static int device_change_notifier(struct notifier_block *nb,
                dma_domain = find_protection_domain(devid);
                if (dma_domain)
                        goto out;
-               dma_domain = dma_ops_domain_alloc(iommu, order);
+               dma_domain = dma_ops_domain_alloc(iommu);
                if (!dma_domain)
                        goto out;
                dma_domain->target_dev = devid;
@@ -1133,8 +1266,8 @@ static int get_device_resources(struct device *dev,
                        dma_dom = (*iommu)->default_dom;
                *domain = &dma_dom->domain;
                attach_device(*iommu, *domain, *bdf);
-               printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
-                               "device %s\n", (*domain)->id, dev_name(dev));
+               DUMP_printk("Using protection domain %d for device %s\n",
+                           (*domain)->id, dev_name(dev));
        }
 
        if (domain_for_device(_bdf) == NULL)
@@ -1143,6 +1276,66 @@ static int get_device_resources(struct device *dev,
        return 1;
 }
 
+/*
+ * If the pte_page is not yet allocated this function is called
+ */
+static u64* alloc_pte(struct protection_domain *dom,
+                     unsigned long address, u64 **pte_page, gfp_t gfp)
+{
+       u64 *pte, *page;
+
+       pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte)) {
+               page = (u64 *)get_zeroed_page(gfp);
+               if (!page)
+                       return NULL;
+               *pte = IOMMU_L2_PDE(virt_to_phys(page));
+       }
+
+       pte = IOMMU_PTE_PAGE(*pte);
+       pte = &pte[IOMMU_PTE_L1_INDEX(address)];
+
+       if (!IOMMU_PTE_PRESENT(*pte)) {
+               page = (u64 *)get_zeroed_page(gfp);
+               if (!page)
+                       return NULL;
+               *pte = IOMMU_L1_PDE(virt_to_phys(page));
+       }
+
+       pte = IOMMU_PTE_PAGE(*pte);
+
+       if (pte_page)
+               *pte_page = pte;
+
+       pte = &pte[IOMMU_PTE_L0_INDEX(address)];
+
+       return pte;
+}
+
+/*
+ * This function fetches the PTE for a given address in the aperture
+ */
+static u64* dma_ops_get_pte(struct dma_ops_domain *dom,
+                           unsigned long address)
+{
+       struct aperture_range *aperture;
+       u64 *pte, *pte_page;
+
+       aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+       if (!aperture)
+               return NULL;
+
+       pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+       if (!pte) {
+               pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC);
+               aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page;
+       } else
+               pte += IOMMU_PTE_L0_INDEX(address);
+
+       return pte;
+}
+
 /*
  * This is the generic map function. It maps one 4kb page at paddr to
  * the given address in the DMA address space for the domain.
@@ -1159,8 +1352,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu,
 
        paddr &= PAGE_MASK;
 
-       pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
-       pte += IOMMU_PTE_L0_INDEX(address);
+       pte  = dma_ops_get_pte(dom, address);
+       if (!pte)
+               return bad_dma_address;
 
        __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC;
 
@@ -1185,14 +1379,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
                                 struct dma_ops_domain *dom,
                                 unsigned long address)
 {
+       struct aperture_range *aperture;
        u64 *pte;
 
        if (address >= dom->aperture_size)
                return;
 
-       WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
+       aperture = dom->aperture[APERTURE_RANGE_INDEX(address)];
+       if (!aperture)
+               return;
+
+       pte  = aperture->pte_pages[APERTURE_PAGE_INDEX(address)];
+       if (!pte)
+               return;
 
-       pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
        pte += IOMMU_PTE_L0_INDEX(address);
 
        WARN_ON(!*pte);
@@ -1216,7 +1416,7 @@ static dma_addr_t __map_single(struct device *dev,
                               u64 dma_mask)
 {
        dma_addr_t offset = paddr & ~PAGE_MASK;
-       dma_addr_t address, start;
+       dma_addr_t address, start, ret;
        unsigned int pages;
        unsigned long align_mask = 0;
        int i;
@@ -1232,14 +1432,33 @@ static dma_addr_t __map_single(struct device *dev,
        if (align)
                align_mask = (1UL << get_order(size)) - 1;
 
+retry:
        address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
                                          dma_mask);
-       if (unlikely(address == bad_dma_address))
-               goto out;
+       if (unlikely(address == bad_dma_address)) {
+               /*
+                * setting next_address here will let the address
+                * allocator only scan the new allocated range in the
+                * first run. This is a small optimization.
+                */
+               dma_dom->next_address = dma_dom->aperture_size;
+
+               if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC))
+                       goto out;
+
+               /*
+                * aperture was sucessfully enlarged by 128 MB, try
+                * allocation again
+                */
+               goto retry;
+       }
 
        start = address;
        for (i = 0; i < pages; ++i) {
-               dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+               ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir);
+               if (ret == bad_dma_address)
+                       goto out_unmap;
+
                paddr += PAGE_SIZE;
                start += PAGE_SIZE;
        }
@@ -1255,6 +1474,17 @@ static dma_addr_t __map_single(struct device *dev,
 
 out:
        return address;
+
+out_unmap:
+
+       for (--i; i >= 0; --i) {
+               start -= PAGE_SIZE;
+               dma_ops_domain_unmap(iommu, dma_dom, start);
+       }
+
+       dma_ops_free_addresses(dma_dom, address, pages);
+
+       return bad_dma_address;
 }
 
 /*
@@ -1537,8 +1767,10 @@ static void *alloc_coherent(struct device *dev, size_t size,
        *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
                                 size, DMA_BIDIRECTIONAL, true, dma_mask);
 
-       if (*dma_addr == bad_dma_address)
+       if (*dma_addr == bad_dma_address) {
+               spin_unlock_irqrestore(&domain->lock, flags);
                goto out_free;
+       }
 
        iommu_completion_wait(iommu);
 
@@ -1625,7 +1857,6 @@ static void prealloc_protection_domains(void)
        struct pci_dev *dev = NULL;
        struct dma_ops_domain *dma_dom;
        struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
        u16 devid;
 
        while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
@@ -1638,7 +1869,7 @@ static void prealloc_protection_domains(void)
                iommu = amd_iommu_rlookup_table[devid];
                if (!iommu)
                        continue;
-               dma_dom = dma_ops_domain_alloc(iommu, order);
+               dma_dom = dma_ops_domain_alloc(iommu);
                if (!dma_dom)
                        continue;
                init_unity_mappings_for_device(dma_dom, devid);
@@ -1664,7 +1895,6 @@ static struct dma_map_ops amd_iommu_dma_ops = {
 int __init amd_iommu_init_dma_ops(void)
 {
        struct amd_iommu *iommu;
-       int order = amd_iommu_aperture_order;
        int ret;
 
        /*
@@ -1672,8 +1902,8 @@ int __init amd_iommu_init_dma_ops(void)
         * found in the system. Devices not assigned to any other
         * protection domain will be assigned to the default one.
         */
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
-               iommu->default_dom = dma_ops_domain_alloc(iommu, order);
+       for_each_iommu(iommu) {
+               iommu->default_dom = dma_ops_domain_alloc(iommu);
                if (iommu->default_dom == NULL)
                        return -ENOMEM;
                iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
@@ -1710,7 +1940,7 @@ int __init amd_iommu_init_dma_ops(void)
 
 free_domains:
 
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
                if (iommu->default_dom)
                        dma_ops_domain_free(iommu->default_dom);
        }
@@ -1842,7 +2072,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 
        old_domain = domain_for_device(devid);
        if (old_domain)
-               return -EBUSY;
+               detach_device(old_domain, devid);
 
        attach_device(iommu, domain, devid);
 
index 8c0be09..238989e 100644 (file)
@@ -115,15 +115,21 @@ struct ivmd_header {
        u64 range_length;
 } __attribute__((packed));
 
+bool amd_iommu_dump;
+
 static int __initdata amd_iommu_detected;
 
 u16 amd_iommu_last_bdf;                        /* largest PCI device id we have
                                           to handle */
 LIST_HEAD(amd_iommu_unity_map);                /* a list of required unity mappings
                                           we find in ACPI */
-unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
+#ifdef CONFIG_IOMMU_STRESS
+bool amd_iommu_isolate = false;
+#else
 bool amd_iommu_isolate = true;         /* if true, device isolation is
                                           enabled */
+#endif
+
 bool amd_iommu_unmap_flush;            /* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);             /* list of all AMD IOMMUs in the
@@ -175,7 +181,7 @@ static inline void update_last_devid(u16 devid)
 static inline unsigned long tbl_size(int entry_size)
 {
        unsigned shift = PAGE_SHIFT +
-                        get_order(amd_iommu_last_bdf * entry_size);
+                        get_order(((int)amd_iommu_last_bdf + 1) * entry_size);
 
        return 1UL << shift;
 }
@@ -193,7 +199,7 @@ static inline unsigned long tbl_size(int entry_size)
  * This function set the exclusion range in the IOMMU. DMA accesses to the
  * exclusion range are passed through untranslated
  */
-static void __init iommu_set_exclusion_range(struct amd_iommu *iommu)
+static void iommu_set_exclusion_range(struct amd_iommu *iommu)
 {
        u64 start = iommu->exclusion_start & PAGE_MASK;
        u64 limit = (start + iommu->exclusion_length) & PAGE_MASK;
@@ -225,7 +231,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu)
 }
 
 /* Generic functions to enable/disable certain features of the IOMMU. */
-static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
+static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit)
 {
        u32 ctrl;
 
@@ -244,7 +250,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
 }
 
 /* Function to enable the hardware */
-static void __init iommu_enable(struct amd_iommu *iommu)
+static void iommu_enable(struct amd_iommu *iommu)
 {
        printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
               dev_name(&iommu->dev->dev), iommu->cap_ptr);
@@ -252,11 +258,9 @@ static void __init iommu_enable(struct amd_iommu *iommu)
        iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
 }
 
-/* Function to enable IOMMU event logging and event interrupts */
-static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
+static void iommu_disable(struct amd_iommu *iommu)
 {
-       iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
-       iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+       iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
 }
 
 /*
@@ -413,25 +417,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu)
 {
        u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
                        get_order(CMD_BUFFER_SIZE));
-       u64 entry;
 
        if (cmd_buf == NULL)
                return NULL;
 
        iommu->cmd_buf_size = CMD_BUFFER_SIZE;
 
-       entry = (u64)virt_to_phys(cmd_buf);
+       return cmd_buf;
+}
+
+/*
+ * This function writes the command buffer address to the hardware and
+ * enables it.
+ */
+static void iommu_enable_command_buffer(struct amd_iommu *iommu)
+{
+       u64 entry;
+
+       BUG_ON(iommu->cmd_buf == NULL);
+
+       entry = (u64)virt_to_phys(iommu->cmd_buf);
        entry |= MMIO_CMD_SIZE_512;
+
        memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET,
-                       &entry, sizeof(entry));
+                   &entry, sizeof(entry));
 
        /* set head and tail to zero manually */
        writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
        writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
 
        iommu_feature_enable(iommu, CONTROL_CMDBUF_EN);
-
-       return cmd_buf;
 }
 
 static void __init free_command_buffer(struct amd_iommu *iommu)
@@ -443,20 +458,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
 /* allocates the memory where the IOMMU will log its events to */
 static u8 * __init alloc_event_buffer(struct amd_iommu *iommu)
 {
-       u64 entry;
        iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
                                                get_order(EVT_BUFFER_SIZE));
 
        if (iommu->evt_buf == NULL)
                return NULL;
 
+       return iommu->evt_buf;
+}
+
+static void iommu_enable_event_buffer(struct amd_iommu *iommu)
+{
+       u64 entry;
+
+       BUG_ON(iommu->evt_buf == NULL);
+
        entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK;
+
        memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET,
                    &entry, sizeof(entry));
 
-       iommu->evt_buf_size = EVT_BUFFER_SIZE;
-
-       return iommu->evt_buf;
+       iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
 }
 
 static void __init free_event_buffer(struct amd_iommu *iommu)
@@ -596,32 +618,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
        p += sizeof(struct ivhd_header);
        end += h->length;
 
+
        while (p < end) {
                e = (struct ivhd_entry *)p;
                switch (e->type) {
                case IVHD_DEV_ALL:
+
+                       DUMP_printk("  DEV_ALL\t\t\t first devid: %02x:%02x.%x"
+                                   " last device %02x:%02x.%x flags: %02x\n",
+                                   PCI_BUS(iommu->first_device),
+                                   PCI_SLOT(iommu->first_device),
+                                   PCI_FUNC(iommu->first_device),
+                                   PCI_BUS(iommu->last_device),
+                                   PCI_SLOT(iommu->last_device),
+                                   PCI_FUNC(iommu->last_device),
+                                   e->flags);
+
                        for (dev_i = iommu->first_device;
                                        dev_i <= iommu->last_device; ++dev_i)
                                set_dev_entry_from_acpi(iommu, dev_i,
                                                        e->flags, 0);
                        break;
                case IVHD_DEV_SELECT:
+
+                       DUMP_printk("  DEV_SELECT\t\t\t devid: %02x:%02x.%x "
+                                   "flags: %02x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags);
+
                        devid = e->devid;
                        set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
                        break;
                case IVHD_DEV_SELECT_RANGE_START:
+
+                       DUMP_printk("  DEV_SELECT_RANGE_START\t "
+                                   "devid: %02x:%02x.%x flags: %02x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags);
+
                        devid_start = e->devid;
                        flags = e->flags;
                        ext_flags = 0;
                        alias = false;
                        break;
                case IVHD_DEV_ALIAS:
+
+                       DUMP_printk("  DEV_ALIAS\t\t\t devid: %02x:%02x.%x "
+                                   "flags: %02x devid_to: %02x:%02x.%x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags,
+                                   PCI_BUS(e->ext >> 8),
+                                   PCI_SLOT(e->ext >> 8),
+                                   PCI_FUNC(e->ext >> 8));
+
                        devid = e->devid;
                        devid_to = e->ext >> 8;
-                       set_dev_entry_from_acpi(iommu, devid, e->flags, 0);
+                       set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0);
                        amd_iommu_alias_table[devid] = devid_to;
                        break;
                case IVHD_DEV_ALIAS_RANGE:
+
+                       DUMP_printk("  DEV_ALIAS_RANGE\t\t "
+                                   "devid: %02x:%02x.%x flags: %02x "
+                                   "devid_to: %02x:%02x.%x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags,
+                                   PCI_BUS(e->ext >> 8),
+                                   PCI_SLOT(e->ext >> 8),
+                                   PCI_FUNC(e->ext >> 8));
+
                        devid_start = e->devid;
                        flags = e->flags;
                        devid_to = e->ext >> 8;
@@ -629,17 +702,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu,
                        alias = true;
                        break;
                case IVHD_DEV_EXT_SELECT:
+
+                       DUMP_printk("  DEV_EXT_SELECT\t\t devid: %02x:%02x.%x "
+                                   "flags: %02x ext: %08x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags, e->ext);
+
                        devid = e->devid;
                        set_dev_entry_from_acpi(iommu, devid, e->flags,
                                                e->ext);
                        break;
                case IVHD_DEV_EXT_SELECT_RANGE:
+
+                       DUMP_printk("  DEV_EXT_SELECT_RANGE\t devid: "
+                                   "%02x:%02x.%x flags: %02x ext: %08x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid),
+                                   e->flags, e->ext);
+
                        devid_start = e->devid;
                        flags = e->flags;
                        ext_flags = e->ext;
                        alias = false;
                        break;
                case IVHD_DEV_RANGE_END:
+
+                       DUMP_printk("  DEV_RANGE_END\t\t devid: %02x:%02x.%x\n",
+                                   PCI_BUS(e->devid),
+                                   PCI_SLOT(e->devid),
+                                   PCI_FUNC(e->devid));
+
                        devid = e->devid;
                        for (dev_i = devid_start; dev_i <= devid; ++dev_i) {
                                if (alias)
@@ -679,7 +774,7 @@ static void __init free_iommu_all(void)
 {
        struct amd_iommu *iommu, *next;
 
-       list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) {
+       for_each_iommu_safe(iommu, next) {
                list_del(&iommu->list);
                free_iommu_one(iommu);
                kfree(iommu);
@@ -710,7 +805,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
        if (!iommu->mmio_base)
                return -ENOMEM;
 
-       iommu_set_device_table(iommu);
        iommu->cmd_buf = alloc_command_buffer(iommu);
        if (!iommu->cmd_buf)
                return -ENOMEM;
@@ -746,6 +840,15 @@ static int __init init_iommu_all(struct acpi_table_header *table)
                h = (struct ivhd_header *)p;
                switch (*p) {
                case ACPI_IVHD_TYPE:
+
+                       DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x "
+                                   "seg: %d flags: %01x info %04x\n",
+                                   PCI_BUS(h->devid), PCI_SLOT(h->devid),
+                                   PCI_FUNC(h->devid), h->cap_ptr,
+                                   h->pci_seg, h->flags, h->info);
+                       DUMP_printk("       mmio-addr: %016llx\n",
+                                   h->mmio_phys);
+
                        iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL);
                        if (iommu == NULL)
                                return -ENOMEM;
@@ -773,56 +876,9 @@ static int __init init_iommu_all(struct acpi_table_header *table)
  *
  ****************************************************************************/
 
-static int __init iommu_setup_msix(struct amd_iommu *iommu)
-{
-       struct amd_iommu *curr;
-       struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */
-       int nvec = 0, i;
-
-       list_for_each_entry(curr, &amd_iommu_list, list) {
-               if (curr->dev == iommu->dev) {
-                       entries[nvec].entry = curr->evt_msi_num;
-                       entries[nvec].vector = 0;
-                       curr->int_enabled = true;
-                       nvec++;
-               }
-       }
-
-       if (pci_enable_msix(iommu->dev, entries, nvec)) {
-               pci_disable_msix(iommu->dev);
-               return 1;
-       }
-
-       for (i = 0; i < nvec; ++i) {
-               int r = request_irq(entries->vector, amd_iommu_int_handler,
-                                   IRQF_SAMPLE_RANDOM,
-                                   "AMD IOMMU",
-                                   NULL);
-               if (r)
-                       goto out_free;
-       }
-
-       return 0;
-
-out_free:
-       for (i -= 1; i >= 0; --i)
-               free_irq(entries->vector, NULL);
-
-       pci_disable_msix(iommu->dev);
-
-       return 1;
-}
-
 static int __init iommu_setup_msi(struct amd_iommu *iommu)
 {
        int r;
-       struct amd_iommu *curr;
-
-       list_for_each_entry(curr, &amd_iommu_list, list) {
-               if (curr->dev == iommu->dev)
-                       curr->int_enabled = true;
-       }
-
 
        if (pci_enable_msi(iommu->dev))
                return 1;
@@ -837,17 +893,18 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu)
                return 1;
        }
 
+       iommu->int_enabled = true;
+       iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
+
        return 0;
 }
 
-static int __init iommu_init_msi(struct amd_iommu *iommu)
+static int iommu_init_msi(struct amd_iommu *iommu)
 {
        if (iommu->int_enabled)
                return 0;
 
-       if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX))
-               return iommu_setup_msix(iommu);
-       else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
+       if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
                return iommu_setup_msi(iommu);
 
        return 1;
@@ -899,6 +956,7 @@ static int __init init_exclusion_range(struct ivmd_header *m)
 static int __init init_unity_map_range(struct ivmd_header *m)
 {
        struct unity_map_entry *e = 0;
+       char *s;
 
        e = kzalloc(sizeof(*e), GFP_KERNEL);
        if (e == NULL)
@@ -906,14 +964,19 @@ static int __init init_unity_map_range(struct ivmd_header *m)
 
        switch (m->type) {
        default:
+               kfree(e);
+               return 0;
        case ACPI_IVMD_TYPE:
+               s = "IVMD_TYPEi\t\t\t";
                e->devid_start = e->devid_end = m->devid;
                break;
        case ACPI_IVMD_TYPE_ALL:
+               s = "IVMD_TYPE_ALL\t\t";
                e->devid_start = 0;
                e->devid_end = amd_iommu_last_bdf;
                break;
        case ACPI_IVMD_TYPE_RANGE:
+               s = "IVMD_TYPE_RANGE\t\t";
                e->devid_start = m->devid;
                e->devid_end = m->aux;
                break;
@@ -922,6 +985,13 @@ static int __init init_unity_map_range(struct ivmd_header *m)
        e->address_end = e->address_start + PAGE_ALIGN(m->range_length);
        e->prot = m->flags >> 1;
 
+       DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x"
+                   " range_start: %016llx range_end: %016llx flags: %x\n", s,
+                   PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start),
+                   PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end),
+                   PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end),
+                   e->address_start, e->address_end, m->flags);
+
        list_add_tail(&e->list, &amd_iommu_unity_map);
 
        return 0;
@@ -967,18 +1037,28 @@ static void init_device_table(void)
  * This function finally enables all IOMMUs found in the system after
  * they have been initialized
  */
-static void __init enable_iommus(void)
+static void enable_iommus(void)
 {
        struct amd_iommu *iommu;
 
-       list_for_each_entry(iommu, &amd_iommu_list, list) {
+       for_each_iommu(iommu) {
+               iommu_set_device_table(iommu);
+               iommu_enable_command_buffer(iommu);
+               iommu_enable_event_buffer(iommu);
                iommu_set_exclusion_range(iommu);
                iommu_init_msi(iommu);
-               iommu_enable_event_logging(iommu);
                iommu_enable(iommu);
        }
 }
 
+static void disable_iommus(void)
+{
+       struct amd_iommu *iommu;
+
+       for_each_iommu(iommu)
+               iommu_disable(iommu);
+}
+
 /*
  * Suspend/Resume support
  * disable suspend until real resume implemented
@@ -986,12 +1066,31 @@ static void __init enable_iommus(void)
 
 static int amd_iommu_resume(struct sys_device *dev)
 {
+       /*
+        * Disable IOMMUs before reprogramming the hardware registers.
+        * IOMMU is still enabled from the resume kernel.
+        */
+       disable_iommus();
+
+       /* re-load the hardware */
+       enable_iommus();
+
+       /*
+        * we have to flush after the IOMMUs are enabled because a
+        * disabled IOMMU will never execute the commands we send
+        */
+       amd_iommu_flush_all_domains();
+       amd_iommu_flush_all_devices();
+
        return 0;
 }
 
 static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
 {
-       return -EINVAL;
+       /* disable IOMMUs to go out of the way for BIOS */
+       disable_iommus();
+
+       return 0;
 }
 
 static struct sysdev_class amd_iommu_sysdev_class = {
@@ -1137,9 +1236,6 @@ int __init amd_iommu_init(void)
 
        enable_iommus();
 
-       printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n",
-                       (1 << (amd_iommu_aperture_order-20)));
-
        printk(KERN_INFO "AMD IOMMU: device isolation ");
        if (amd_iommu_isolate)
                printk("enabled\n");
@@ -1211,6 +1307,13 @@ void __init amd_iommu_detect(void)
  *
  ****************************************************************************/
 
+static int __init parse_amd_iommu_dump(char *str)
+{
+       amd_iommu_dump = true;
+
+       return 1;
+}
+
 static int __init parse_amd_iommu_options(char *str)
 {
        for (; *str; ++str) {
@@ -1225,15 +1328,5 @@ static int __init parse_amd_iommu_options(char *str)
        return 1;
 }
 
-static int __init parse_amd_iommu_size_options(char *str)
-{
-       unsigned order = PAGE_SHIFT + get_order(memparse(str, &str));
-
-       if ((order > 24) && (order < 31))
-               amd_iommu_aperture_order = order;
-
-       return 1;
-}
-
+__setup("amd_iommu_dump", parse_amd_iommu_dump);
 __setup("amd_iommu=", parse_amd_iommu_options);
-__setup("amd_iommu_size=", parse_amd_iommu_size_options);
index f287092..a4c9cf0 100644 (file)
@@ -98,6 +98,29 @@ early_param("lapic", parse_lapic);
 /* Local APIC was disabled by the BIOS and enabled by the kernel */
 static int enabled_via_apicbase;
 
+/*
+ * Handle interrupt mode configuration register (IMCR).
+ * This register controls whether the interrupt signals
+ * that reach the BSP come from the master PIC or from the
+ * local APIC. Before entering Symmetric I/O Mode, either
+ * the BIOS or the operating system must switch out of
+ * PIC Mode by changing the IMCR.
+ */
+static inline void imcr_pic_to_apic(void)
+{
+       /* select IMCR register */
+       outb(0x70, 0x22);
+       /* NMI and 8259 INTR go through APIC */
+       outb(0x01, 0x23);
+}
+
+static inline void imcr_apic_to_pic(void)
+{
+       /* select IMCR register */
+       outb(0x70, 0x22);
+       /* NMI and 8259 INTR go directly to BSP */
+       outb(0x00, 0x23);
+}
 #endif
 
 #ifdef CONFIG_X86_64
@@ -111,13 +134,19 @@ static __init int setup_apicpmtimer(char *s)
 __setup("apicpmtimer", setup_apicpmtimer);
 #endif
 
+int x2apic_mode;
 #ifdef CONFIG_X86_X2APIC
-int x2apic;
 /* x2apic enabled before OS handover */
 static int x2apic_preenabled;
 static int disable_x2apic;
 static __init int setup_nox2apic(char *str)
 {
+       if (x2apic_enabled()) {
+               pr_warning("Bios already enabled x2apic, "
+                          "can't enforce nox2apic");
+               return 0;
+       }
+
        disable_x2apic = 1;
        setup_clear_cpu_cap(X86_FEATURE_X2APIC);
        return 0;
@@ -209,6 +238,31 @@ static int modern_apic(void)
        return lapic_get_version() >= 0x14;
 }
 
+/*
+ * bare function to substitute write operation
+ * and it's _that_ fast :)
+ */
+static void native_apic_write_dummy(u32 reg, u32 v)
+{
+       WARN_ON_ONCE((cpu_has_apic || !disable_apic));
+}
+
+static u32 native_apic_read_dummy(u32 reg)
+{
+       WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+       return 0;
+}
+
+/*
+ * right after this call apic->write/read doesn't do anything
+ * note that there is no restore operation it works one way
+ */
+void apic_disable(void)
+{
+       apic->read = native_apic_read_dummy;
+       apic->write = native_apic_write_dummy;
+}
+
 void native_apic_wait_icr_idle(void)
 {
        while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
@@ -348,7 +402,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
 
 static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
 {
-       unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+       unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0);
        unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
 
        apic_write(reg, v);
@@ -815,7 +869,7 @@ void clear_local_APIC(void)
        u32 v;
 
        /* APIC hasn't been mapped yet */
-       if (!x2apic && !apic_phys)
+       if (!x2apic_mode && !apic_phys)
                return;
 
        maxlvt = lapic_get_maxlvt();
@@ -1287,7 +1341,7 @@ void check_x2apic(void)
 {
        if (x2apic_enabled()) {
                pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
-               x2apic_preenabled = x2apic = 1;
+               x2apic_preenabled = x2apic_mode = 1;
        }
 }
 
@@ -1295,7 +1349,7 @@ void enable_x2apic(void)
 {
        int msr, msr2;
 
-       if (!x2apic)
+       if (!x2apic_mode)
                return;
 
        rdmsr(MSR_IA32_APICBASE, msr, msr2);
@@ -1304,6 +1358,7 @@ void enable_x2apic(void)
                wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
        }
 }
+#endif /* CONFIG_X86_X2APIC */
 
 void __init enable_IR_x2apic(void)
 {
@@ -1312,32 +1367,21 @@ void __init enable_IR_x2apic(void)
        unsigned long flags;
        struct IO_APIC_route_entry **ioapic_entries = NULL;
 
-       if (!cpu_has_x2apic)
-               return;
-
-       if (!x2apic_preenabled && disable_x2apic) {
-               pr_info("Skipped enabling x2apic and Interrupt-remapping "
-                       "because of nox2apic\n");
-               return;
+       ret = dmar_table_init();
+       if (ret) {
+               pr_debug("dmar_table_init() failed with %d:\n", ret);
+               goto ir_failed;
        }
 
-       if (x2apic_preenabled && disable_x2apic)
-               panic("Bios already enabled x2apic, can't enforce nox2apic");
-
-       if (!x2apic_preenabled && skip_ioapic_setup) {
-               pr_info("Skipped enabling x2apic and Interrupt-remapping "
-                       "because of skipping io-apic setup\n");
-               return;
+       if (!intr_remapping_supported()) {
+               pr_debug("intr-remapping not supported\n");
+               goto ir_failed;
        }
 
-       ret = dmar_table_init();
-       if (ret) {
-               pr_info("dmar_table_init() failed with %d:\n", ret);
 
-               if (x2apic_preenabled)
-                       panic("x2apic enabled by bios. But IR enabling failed");
-               else
-                       pr_info("Not enabling x2apic,Intr-remapping\n");
+       if (!x2apic_preenabled && skip_ioapic_setup) {
+               pr_info("Skipped enabling intr-remap because of skipping "
+                       "io-apic setup\n");
                return;
        }
 
@@ -1357,19 +1401,16 @@ void __init enable_IR_x2apic(void)
        mask_IO_APIC_setup(ioapic_entries);
        mask_8259A();
 
-       ret = enable_intr_remapping(EIM_32BIT_APIC_ID);
-
-       if (ret && x2apic_preenabled) {
-               local_irq_restore(flags);
-               panic("x2apic enabled by bios. But IR enabling failed");
-       }
-
+       ret = enable_intr_remapping(x2apic_supported());
        if (ret)
                goto end_restore;
 
-       if (!x2apic) {
-               x2apic = 1;
+       pr_info("Enabled Interrupt-remapping\n");
+
+       if (x2apic_supported() && !x2apic_mode) {
+               x2apic_mode = 1;
                enable_x2apic();
+               pr_info("Enabled x2apic\n");
        }
 
 end_restore:
@@ -1378,37 +1419,34 @@ end_restore:
                 * IR enabling failed
                 */
                restore_IO_APIC_setup(ioapic_entries);
-       else
-               reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries);
 
        unmask_8259A();
        local_irq_restore(flags);
 
 end:
-       if (!ret) {
-               if (!x2apic_preenabled)
-                       pr_info("Enabled x2apic and interrupt-remapping\n");
-               else
-                       pr_info("Enabled Interrupt-remapping\n");
-       } else
-               pr_err("Failed to enable Interrupt-remapping and x2apic\n");
        if (ioapic_entries)
                free_ioapic_entries(ioapic_entries);
+
+       if (!ret)
+               return;
+
+ir_failed:
+       if (x2apic_preenabled)
+               panic("x2apic enabled by bios. But IR enabling failed");
+       else if (cpu_has_x2apic)
+               pr_info("Not enabling x2apic,Intr-remapping\n");
 #else
        if (!cpu_has_x2apic)
                return;
 
        if (x2apic_preenabled)
                panic("x2apic enabled prior OS handover,"
-                     " enable CONFIG_INTR_REMAP");
-
-       pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
-               " and x2apic\n");
+                     " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
 #endif
 
        return;
 }
-#endif /* CONFIG_X86_X2APIC */
+
 
 #ifdef CONFIG_X86_64
 /*
@@ -1425,7 +1463,6 @@ static int __init detect_init_APIC(void)
        }
 
        mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-       boot_cpu_physical_apicid = 0;
        return 0;
 }
 #else
@@ -1539,32 +1576,49 @@ void __init early_init_lapic_mapping(void)
  */
 void __init init_apic_mappings(void)
 {
-       if (x2apic) {
+       unsigned int new_apicid;
+
+       if (x2apic_mode) {
                boot_cpu_physical_apicid = read_apic_id();
                return;
        }
 
-       /*
-        * If no local APIC can be found then set up a fake all
-        * zeroes page to simulate the local APIC and another
-        * one for the IO-APIC.
-        */
+       /* If no local APIC can be found return early */
        if (!smp_found_config && detect_init_APIC()) {
-               apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
-               apic_phys = __pa(apic_phys);
-       } else
+               /* lets NOP'ify apic operations */
+               pr_info("APIC: disable apic facility\n");
+               apic_disable();
+       } else {
                apic_phys = mp_lapic_addr;
 
-       set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-       apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
-                               APIC_BASE, apic_phys);
+               /*
+                * acpi lapic path already maps that address in
+                * acpi_register_lapic_address()
+                */
+               if (!acpi_lapic)
+                       set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+
+               apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+                                       APIC_BASE, apic_phys);
+       }
 
        /*
         * Fetch the APIC ID of the BSP in case we have a
         * default configuration (or the MP table is broken).
         */
-       if (boot_cpu_physical_apicid == -1U)
-               boot_cpu_physical_apicid = read_apic_id();
+       new_apicid = read_apic_id();
+       if (boot_cpu_physical_apicid != new_apicid) {
+               boot_cpu_physical_apicid = new_apicid;
+               /*
+                * yeah -- we lie about apic_version
+                * in case if apic was disabled via boot option
+                * but it's not a problem for SMP compiled kernel
+                * since smp_sanity_check is prepared for such a case
+                * and disable smp mode
+                */
+               apic_version[new_apicid] =
+                        GET_APIC_VERSION(apic_read(APIC_LVR));
+       }
 }
 
 /*
@@ -1733,8 +1787,7 @@ void __init connect_bsp_APIC(void)
                 */
                apic_printk(APIC_VERBOSE, "leaving PIC mode, "
                                "enabling APIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x01, 0x23);
+               imcr_pic_to_apic();
        }
 #endif
        if (apic->enable_apic_mode)
@@ -1762,8 +1815,7 @@ void disconnect_bsp_APIC(int virt_wire_setup)
                 */
                apic_printk(APIC_VERBOSE, "disabling APIC mode, "
                                "entering PIC mode.\n");
-               outb(0x70, 0x22);
-               outb(0x00, 0x23);
+               imcr_apic_to_pic();
                return;
        }
 #endif
@@ -1969,10 +2021,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 
        local_irq_save(flags);
        disable_local_APIC();
-#ifdef CONFIG_INTR_REMAP
+
        if (intr_remapping_enabled)
                disable_intr_remapping();
-#endif
+
        local_irq_restore(flags);
        return 0;
 }
@@ -1982,42 +2034,34 @@ static int lapic_resume(struct sys_device *dev)
        unsigned int l, h;
        unsigned long flags;
        int maxlvt;
-
-#ifdef CONFIG_INTR_REMAP
-       int ret;
+       int ret = 0;
        struct IO_APIC_route_entry **ioapic_entries = NULL;
 
        if (!apic_pm_state.active)
                return 0;
 
        local_irq_save(flags);
-       if (x2apic) {
+       if (intr_remapping_enabled) {
                ioapic_entries = alloc_ioapic_entries();
                if (!ioapic_entries) {
                        WARN(1, "Alloc ioapic_entries in lapic resume failed.");
-                       return -ENOMEM;
+                       ret = -ENOMEM;
+                       goto restore;
                }
 
                ret = save_IO_APIC_setup(ioapic_entries);
                if (ret) {
                        WARN(1, "Saving IO-APIC state failed: %d\n", ret);
                        free_ioapic_entries(ioapic_entries);
-                       return ret;
+                       goto restore;
                }
 
                mask_IO_APIC_setup(ioapic_entries);
                mask_8259A();
-               enable_x2apic();
        }
-#else
-       if (!apic_pm_state.active)
-               return 0;
 
-       local_irq_save(flags);
-       if (x2apic)
+       if (x2apic_mode)
                enable_x2apic();
-#endif
-
        else {
                /*
                 * Make sure the APICBASE points to the right address
@@ -2055,21 +2099,16 @@ static int lapic_resume(struct sys_device *dev)
        apic_write(APIC_ESR, 0);
        apic_read(APIC_ESR);
 
-#ifdef CONFIG_INTR_REMAP
-       if (intr_remapping_enabled)
-               reenable_intr_remapping(EIM_32BIT_APIC_ID);
-
-       if (x2apic) {
+       if (intr_remapping_enabled) {
+               reenable_intr_remapping(x2apic_mode);
                unmask_8259A();
                restore_IO_APIC_setup(ioapic_entries);
                free_ioapic_entries(ioapic_entries);
        }
-#endif
-
+restore:
        local_irq_restore(flags);
 
-
-       return 0;
+       return ret;
 }
 
 /*
@@ -2117,31 +2156,14 @@ static void apic_pm_activate(void) { }
 #endif /* CONFIG_PM */
 
 #ifdef CONFIG_X86_64
-/*
- * apic_is_clustered_box() -- Check if we can expect good TSC
- *
- * Thus far, the major user of this is IBM's Summit2 series:
- *
- * Clustered boxes may have unsynced TSC problems if they are
- * multi-chassis. Use available data to take a good guess.
- * If in doubt, go HPET.
- */
-__cpuinit int apic_is_clustered_box(void)
+
+static int __cpuinit apic_cluster_num(void)
 {
        int i, clusters, zeros;
        unsigned id;
        u16 *bios_cpu_apicid;
        DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
 
-       /*
-        * there is not this kind of box with AMD CPU yet.
-        * Some AMD box with quadcore cpu and 8 sockets apicid
-        * will be [4, 0x23] or [8, 0x27] could be thought to
-        * vsmp box still need checking...
-        */
-       if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
-               return 0;
-
        bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
        bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
 
@@ -2177,18 +2199,67 @@ __cpuinit int apic_is_clustered_box(void)
                        ++zeros;
        }
 
-       /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
-        * not guaranteed to be synced between boards
-        */
-       if (is_vsmp_box() && clusters > 1)
+       return clusters;
+}
+
+static int __cpuinitdata multi_checked;
+static int __cpuinitdata multi;
+
+static int __cpuinit set_multi(const struct dmi_system_id *d)
+{
+       if (multi)
+               return 0;
+       pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
+       multi = 1;
+       return 0;
+}
+
+static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = {
+       {
+               .callback = set_multi,
+               .ident = "IBM System Summit2",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
+               },
+       },
+       {}
+};
+
+static void __cpuinit dmi_check_multi(void)
+{
+       if (multi_checked)
+               return;
+
+       dmi_check_system(multi_dmi_table);
+       multi_checked = 1;
+}
+
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis.
+ * Use DMI to check them
+ */
+__cpuinit int apic_is_clustered_box(void)
+{
+       dmi_check_multi();
+       if (multi)
                return 1;
 
+       if (!is_vsmp_box())
+               return 0;
+
        /*
-        * If clusters > 2, then should be multi-chassis.
-        * May have to revisit this when multi-core + hyperthreaded CPUs come
-        * out, but AFAIK this will work even for them.
+        * ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+        * not guaranteed to be synced between boards
         */
-       return (clusters > 2);
+       if (apic_cluster_num() > 1)
+               return 1;
+
+       return 0;
 }
 #endif
 
index 306e5e8..d0c99ab 100644 (file)
@@ -161,7 +161,7 @@ static int flat_apic_id_registered(void)
 
 static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
 {
-       return hard_smp_processor_id() >> index_msb;
+       return initial_apic_id >> index_msb;
 }
 
 struct apic apic_flat =  {
@@ -235,7 +235,7 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
         * regardless of how many processors are present (x86_64 ES7000
         * is an example).
         */
-       if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+       if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
                (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
                printk(KERN_DEBUG "system APIC only can use physical flat");
                return 1;
index 3029477..69328ac 100644 (file)
@@ -145,7 +145,7 @@ es7000_rename_gsi(int ioapic, int gsi)
        return gsi;
 }
 
-static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
 {
        unsigned long vect = 0, psaival = 0;
 
index 30da617..1946fac 100644 (file)
@@ -59,6 +59,7 @@
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
 #include <asm/hpet.h>
+#include <asm/hw_irq.h>
 #include <asm/uv/uv_hub.h>
 #include <asm/uv/uv_irq.h>
 
@@ -129,12 +130,9 @@ struct irq_pin_list {
        struct irq_pin_list *next;
 };
 
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
 {
        struct irq_pin_list *pin;
-       int node;
-
-       node = cpu_to_node(cpu);
 
        pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
 
@@ -148,9 +146,6 @@ struct irq_cfg {
        unsigned move_cleanup_count;
        u8 vector;
        u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-       u8 move_desc_pending : 1;
-#endif
 };
 
 /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -212,12 +207,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
        return cfg;
 }
 
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
 {
        struct irq_cfg *cfg;
-       int node;
-
-       node = cpu_to_node(cpu);
 
        cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
        if (cfg) {
@@ -238,13 +230,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
        return cfg;
 }
 
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
 {
        struct irq_cfg *cfg;
 
        cfg = desc->chip_data;
        if (!cfg) {
-               desc->chip_data = get_one_free_irq_cfg(cpu);
+               desc->chip_data = get_one_free_irq_cfg(node);
                if (!desc->chip_data) {
                        printk(KERN_ERR "can not alloc irq_cfg\n");
                        BUG_ON(1);
@@ -254,10 +246,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu)
        return 0;
 }
 
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
 static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
 {
        struct irq_pin_list *old_entry, *head, *tail, *entry;
 
@@ -266,7 +257,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
        if (!old_entry)
                return;
 
-       entry = get_one_free_irq_2_pin(cpu);
+       entry = get_one_free_irq_2_pin(node);
        if (!entry)
                return;
 
@@ -276,7 +267,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
        tail            = entry;
        old_entry       = old_entry->next;
        while (old_entry) {
-               entry = get_one_free_irq_2_pin(cpu);
+               entry = get_one_free_irq_2_pin(node);
                if (!entry) {
                        entry = head;
                        while (entry) {
@@ -316,12 +307,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
 }
 
 void arch_init_copy_chip_data(struct irq_desc *old_desc,
-                                struct irq_desc *desc, int cpu)
+                                struct irq_desc *desc, int node)
 {
        struct irq_cfg *cfg;
        struct irq_cfg *old_cfg;
 
-       cfg = get_one_free_irq_cfg(cpu);
+       cfg = get_one_free_irq_cfg(node);
 
        if (!cfg)
                return;
@@ -332,7 +323,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc,
 
        memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
 
-       init_copy_irq_2_pin(old_cfg, cfg, cpu);
+       init_copy_irq_2_pin(old_cfg, cfg, node);
 }
 
 static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -356,19 +347,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
                old_desc->chip_data = NULL;
        }
 }
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg = desc->chip_data;
-
-       if (!cfg->move_in_progress) {
-               /* it means that domain is not changed */
-               if (!cpumask_intersects(desc->affinity, mask))
-                       cfg->move_desc_pending = 1;
-       }
-}
-#endif
+/* end for move_irq_desc */
 
 #else
 static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -378,13 +357,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq)
 
 #endif
 
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
 struct io_apic {
        unsigned int index;
        unsigned int unused[3];
@@ -518,132 +490,18 @@ static void ioapic_mask_entry(int apic, int pin)
        spin_unlock_irqrestore(&ioapic_lock, flags);
 }
 
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
-       cpumask_var_t cleanup_mask;
-
-       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-               unsigned int i;
-               cfg->move_cleanup_count = 0;
-               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-                       cfg->move_cleanup_count++;
-               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-                       apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-       } else {
-               cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
-               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-               free_cpumask_var(cleanup_mask);
-       }
-       cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
-       int apic, pin;
-       struct irq_pin_list *entry;
-       u8 vector = cfg->vector;
-
-       entry = cfg->irq_2_pin;
-       for (;;) {
-               unsigned int reg;
-
-               if (!entry)
-                       break;
-
-               apic = entry->apic;
-               pin = entry->pin;
-               /*
-                * With interrupt-remapping, destination information comes
-                * from interrupt-remapping table entry.
-                */
-               if (!irq_remapped(irq))
-                       io_apic_write(apic, 0x11 + pin*2, dest);
-               reg = io_apic_read(apic, 0x10 + pin*2);
-               reg &= ~IO_APIC_REDIR_VECTOR_MASK;
-               reg |= vector;
-               io_apic_modify(apic, 0x10 + pin*2, reg);
-               if (!entry->next)
-                       break;
-               entry = entry->next;
-       }
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg;
-       unsigned int irq;
-
-       if (!cpumask_intersects(mask, cpu_online_mask))
-               return BAD_APICID;
-
-       irq = desc->irq;
-       cfg = desc->chip_data;
-       if (assign_irq_vector(irq, cfg, mask))
-               return BAD_APICID;
-
-       /* check that before desc->addinity get updated */
-       set_extra_move_desc(desc, mask);
-
-       cpumask_copy(desc->affinity, mask);
-
-       return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-       struct irq_cfg *cfg;
-       unsigned long flags;
-       unsigned int dest;
-       unsigned int irq;
-
-       irq = desc->irq;
-       cfg = desc->chip_data;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       dest = set_desc_affinity(desc, mask);
-       if (dest != BAD_APICID) {
-               /* Only the high 8 bits are valid. */
-               dest = SET_APIC_LOGICAL_ID(dest);
-               __target_IO_APIC_irq(irq, dest, cfg);
-       }
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
-       struct irq_desc *desc;
-
-       desc = irq_to_desc(irq);
-
-       set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
 /*
  * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
  * shared ISA-space IRQs, so we have to support them. We are super
  * fast in the common case, and fast for shared ISA-space IRQs.
  */
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
        struct irq_pin_list *entry;
 
        entry = cfg->irq_2_pin;
        if (!entry) {
-               entry = get_one_free_irq_2_pin(cpu);
+               entry = get_one_free_irq_2_pin(node);
                if (!entry) {
                        printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
                                        apic, pin);
@@ -663,7 +521,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
                entry = entry->next;
        }
 
-       entry->next = get_one_free_irq_2_pin(cpu);
+       entry->next = get_one_free_irq_2_pin(node);
        entry = entry->next;
        entry->apic = apic;
        entry->pin = pin;
@@ -672,7 +530,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
 /*
  * Reroute an IRQ to a different pin.
  */
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
                                      int oldapic, int oldpin,
                                      int newapic, int newpin)
 {
@@ -692,7 +550,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
 
        /* why? call replace before add? */
        if (!replaced)
-               add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+               add_pin_to_irq_node(cfg, node, newapic, newpin);
 }
 
 static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -850,7 +708,6 @@ static int __init ioapic_pirq_setup(char *str)
 __setup("pirq=", ioapic_pirq_setup);
 #endif /* CONFIG_X86_32 */
 
-#ifdef CONFIG_INTR_REMAP
 struct IO_APIC_route_entry **alloc_ioapic_entries(void)
 {
        int apic;
@@ -948,20 +805,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
        return 0;
 }
 
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
-       struct IO_APIC_route_entry **ioapic_entries)
-
-{
-       /*
-        * for now plain restore of previous settings.
-        * TBD: In the case of OS enabling interrupt-remapping,
-        * IO-APIC RTE's need to be setup to point to interrupt-remapping
-        * table entries. for now, do a plain restore, and wait for
-        * the setup_IO_APIC_irqs() to do proper initialization.
-        */
-       restore_IO_APIC_setup(ioapic_entries);
-}
-
 void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 {
        int apic;
@@ -971,7 +814,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
 
        kfree(ioapic_entries);
 }
-#endif
 
 /*
  * Find the IRQ entry number of a certain pin.
@@ -1032,54 +874,6 @@ static int __init find_isa_irq_apic(int irq, int type)
        return -1;
 }
 
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
-       int apic, i, best_guess = -1;
-
-       apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
-               bus, slot, pin);
-       if (test_bit(bus, mp_bus_not_pci)) {
-               apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
-               return -1;
-       }
-       for (i = 0; i < mp_irq_entries; i++) {
-               int lbus = mp_irqs[i].srcbus;
-
-               for (apic = 0; apic < nr_ioapics; apic++)
-                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
-                           mp_irqs[i].dstapic == MP_APIC_ALL)
-                               break;
-
-               if (!test_bit(lbus, mp_bus_not_pci) &&
-                   !mp_irqs[i].irqtype &&
-                   (bus == lbus) &&
-                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
-                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
-                       if (!(apic || IO_APIC_IRQ(irq)))
-                               continue;
-
-                       if (pin == (mp_irqs[i].srcbusirq & 3))
-                               return irq;
-                       /*
-                        * Use the first all-but-pin matching entry as a
-                        * best-guess fuzzy result for broken mptables.
-                        */
-                       if (best_guess < 0)
-                               best_guess = irq;
-               }
-       }
-       return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
 #if defined(CONFIG_EISA) || defined(CONFIG_MCA)
 /*
  * EISA Edge/Level control register, ELCR
@@ -1298,6 +1092,64 @@ static int pin_2_irq(int idx, int apic, int pin)
        return irq;
 }
 
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       int apic, i, best_guess = -1;
+
+       apic_printk(APIC_DEBUG,
+                   "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+                   bus, slot, pin);
+       if (test_bit(bus, mp_bus_not_pci)) {
+               apic_printk(APIC_VERBOSE,
+                           "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+               return -1;
+       }
+       for (i = 0; i < mp_irq_entries; i++) {
+               int lbus = mp_irqs[i].srcbus;
+
+               for (apic = 0; apic < nr_ioapics; apic++)
+                       if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+                           mp_irqs[i].dstapic == MP_APIC_ALL)
+                               break;
+
+               if (!test_bit(lbus, mp_bus_not_pci) &&
+                   !mp_irqs[i].irqtype &&
+                   (bus == lbus) &&
+                   (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+                       int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+                       if (!(apic || IO_APIC_IRQ(irq)))
+                               continue;
+
+                       if (pin == (mp_irqs[i].srcbusirq & 3)) {
+                               set_io_apic_irq_attr(irq_attr, apic,
+                                                    mp_irqs[i].dstirq,
+                                                    irq_trigger(i),
+                                                    irq_polarity(i));
+                               return irq;
+                       }
+                       /*
+                        * Use the first all-but-pin matching entry as a
+                        * best-guess fuzzy result for broken mptables.
+                        */
+                       if (best_guess < 0) {
+                               set_io_apic_irq_attr(irq_attr, apic,
+                                                    mp_irqs[i].dstirq,
+                                                    irq_trigger(i),
+                                                    irq_polarity(i));
+                               best_guess = irq;
+                       }
+               }
+       }
+       return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
 void lock_vector_lock(void)
 {
        /* Used to the online set of cpus does not change
@@ -1628,58 +1480,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq
        ioapic_write_entry(apic_id, pin, entry);
 }
 
+static struct {
+       DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
 static void __init setup_IO_APIC_irqs(void)
 {
-       int apic_id, pin, idx, irq;
+       int apic_id = 0, pin, idx, irq;
        int notcon = 0;
        struct irq_desc *desc;
        struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
+       int node = cpu_to_node(boot_cpu_id);
 
        apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
 
-       for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
-               for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
-
-                       idx = find_irq_entry(apic_id, pin, mp_INT);
-                       if (idx == -1) {
-                               if (!notcon) {
-                                       notcon = 1;
-                                       apic_printk(APIC_VERBOSE,
-                                               KERN_DEBUG " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               } else
-                                       apic_printk(APIC_VERBOSE, " %d-%d",
-                                               mp_ioapics[apic_id].apicid, pin);
-                               continue;
-                       }
-                       if (notcon) {
-                               apic_printk(APIC_VERBOSE,
-                                       " (apicid-pin) not connected\n");
-                               notcon = 0;
-                       }
+#ifdef CONFIG_ACPI
+       if (!acpi_disabled && acpi_ioapic) {
+               apic_id = mp_find_ioapic(0);
+               if (apic_id < 0)
+                       apic_id = 0;
+       }
+#endif
 
-                       irq = pin_2_irq(idx, apic_id, pin);
+       for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+               idx = find_irq_entry(apic_id, pin, mp_INT);
+               if (idx == -1) {
+                       if (!notcon) {
+                               notcon = 1;
+                               apic_printk(APIC_VERBOSE,
+                                       KERN_DEBUG " %d-%d",
+                                       mp_ioapics[apic_id].apicid, pin);
+                       } else
+                               apic_printk(APIC_VERBOSE, " %d-%d",
+                                       mp_ioapics[apic_id].apicid, pin);
+                       continue;
+               }
+               if (notcon) {
+                       apic_printk(APIC_VERBOSE,
+                               " (apicid-pin) not connected\n");
+                       notcon = 0;
+               }
 
-                       /*
-                        * Skip the timer IRQ if there's a quirk handler
-                        * installed and if it returns 1:
-                        */
-                       if (apic->multi_timer_check &&
-                                       apic->multi_timer_check(apic_id, irq))
-                               continue;
+               irq = pin_2_irq(idx, apic_id, pin);
 
-                       desc = irq_to_desc_alloc_cpu(irq, cpu);
-                       if (!desc) {
-                               printk(KERN_INFO "can not get irq_desc for %d\n", irq);
-                               continue;
-                       }
-                       cfg = desc->chip_data;
-                       add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
+               /*
+                * Skip the timer IRQ if there's a quirk handler
+                * installed and if it returns 1:
+                */
+               if (apic->multi_timer_check &&
+                               apic->multi_timer_check(apic_id, irq))
+                       continue;
 
-                       setup_IO_APIC_irq(apic_id, pin, irq, desc,
-                                       irq_trigger(idx), irq_polarity(idx));
+               desc = irq_to_desc_alloc_node(irq, node);
+               if (!desc) {
+                       printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+                       continue;
                }
+               cfg = desc->chip_data;
+               add_pin_to_irq_node(cfg, node, apic_id, pin);
+               /*
+                * don't mark it in pin_programmed, so later acpi could
+                * set it correctly when irq < 16
+                */
+               setup_IO_APIC_irq(apic_id, pin, irq, desc,
+                               irq_trigger(idx), irq_polarity(idx));
        }
 
        if (notcon)
@@ -1869,7 +1733,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base)
 
 __apicdebuginit(void) print_local_APIC(void *dummy)
 {
-       unsigned int v, ver, maxlvt;
+       unsigned int i, v, ver, maxlvt;
        u64 icr;
 
        if (apic_verbosity == APIC_QUIET)
@@ -1957,6 +1821,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy)
        printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
        v = apic_read(APIC_TDCR);
        printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+       if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+               v = apic_read(APIC_EFEAT);
+               maxlvt = (v >> 16) & 0xff;
+               printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+               v = apic_read(APIC_ECTRL);
+               printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+               for (i = 0; i < maxlvt; i++) {
+                       v = apic_read(APIC_EILVTn(i));
+                       printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+               }
+       }
        printk("\n");
 }
 
@@ -2005,6 +1881,11 @@ __apicdebuginit(void) print_PIC(void)
 __apicdebuginit(int) print_all_ICs(void)
 {
        print_PIC();
+
+       /* don't print out if apic is not there */
+       if (!cpu_has_apic || disable_apic)
+               return 0;
+
        print_all_local_APICs();
        print_IO_APIC();
 
@@ -2360,9 +2241,121 @@ static int ioapic_retrigger_irq(unsigned int irq)
  */
 
 #ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+       cpumask_var_t cleanup_mask;
+
+       if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+               unsigned int i;
+               cfg->move_cleanup_count = 0;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       cfg->move_cleanup_count++;
+               for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+                       apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+       } else {
+               cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+               cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+               apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+               free_cpumask_var(cleanup_mask);
+       }
+       cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+       int apic, pin;
+       struct irq_pin_list *entry;
+       u8 vector = cfg->vector;
+
+       entry = cfg->irq_2_pin;
+       for (;;) {
+               unsigned int reg;
+
+               if (!entry)
+                       break;
+
+               apic = entry->apic;
+               pin = entry->pin;
+               /*
+                * With interrupt-remapping, destination information comes
+                * from interrupt-remapping table entry.
+                */
+               if (!irq_remapped(irq))
+                       io_apic_write(apic, 0x11 + pin*2, dest);
+               reg = io_apic_read(apic, 0x10 + pin*2);
+               reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+               reg |= vector;
+               io_apic_modify(apic, 0x10 + pin*2, reg);
+               if (!entry->next)
+                       break;
+               entry = entry->next;
+       }
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+       struct irq_cfg *cfg;
+       unsigned int irq;
+
+       if (!cpumask_intersects(mask, cpu_online_mask))
+               return BAD_APICID;
+
+       irq = desc->irq;
+       cfg = desc->chip_data;
+       if (assign_irq_vector(irq, cfg, mask))
+               return BAD_APICID;
+
+       cpumask_copy(desc->affinity, mask);
+
+       return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+}
+
+static int
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+       struct irq_cfg *cfg;
+       unsigned long flags;
+       unsigned int dest;
+       unsigned int irq;
+       int ret = -1;
+
+       irq = desc->irq;
+       cfg = desc->chip_data;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       dest = set_desc_affinity(desc, mask);
+       if (dest != BAD_APICID) {
+               /* Only the high 8 bits are valid. */
+               dest = SET_APIC_LOGICAL_ID(dest);
+               __target_IO_APIC_irq(irq, dest, cfg);
+               ret = 0;
+       }
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+
+       return ret;
+}
+
+static int
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+       struct irq_desc *desc;
+
+       desc = irq_to_desc(irq);
+
+       return set_ioapic_affinity_irq_desc(desc, mask);
+}
+
+#ifdef CONFIG_INTR_REMAP
 
-#ifdef CONFIG_INTR_REMAP
-
 /*
  * Migrate the IO-APIC irq in the presence of intr-remapping.
  *
@@ -2374,26 +2367,25 @@ static int ioapic_retrigger_irq(unsigned int irq)
  * Real vector that is used for interrupting cpu will be coming from
  * the interrupt-remapping table entry.
  */
-static void
+static int
 migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
 {
        struct irq_cfg *cfg;
        struct irte irte;
        unsigned int dest;
        unsigned int irq;
+       int ret = -1;
 
        if (!cpumask_intersects(mask, cpu_online_mask))
-               return;
+               return ret;
 
        irq = desc->irq;
        if (get_irte(irq, &irte))
-               return;
+               return ret;
 
        cfg = desc->chip_data;
        if (assign_irq_vector(irq, cfg, mask))
-               return;
-
-       set_extra_move_desc(desc, mask);
+               return ret;
 
        dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
 
@@ -2409,27 +2401,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
                send_cleanup_vector(cfg);
 
        cpumask_copy(desc->affinity, mask);
+
+       return 0;
 }
 
 /*
  * Migrates the IRQ destination in the process context.
  */
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
                                            const struct cpumask *mask)
 {
-       migrate_ioapic_irq_desc(desc, mask);
+       return migrate_ioapic_irq_desc(desc, mask);
 }
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
                                       const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
 
-       set_ir_ioapic_affinity_irq_desc(desc, mask);
+       return set_ir_ioapic_affinity_irq_desc(desc, mask);
 }
 #else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
                                                   const struct cpumask *mask)
 {
+       return 0;
 }
 #endif
 
@@ -2491,86 +2486,19 @@ static void irq_complete_move(struct irq_desc **descp)
        struct irq_cfg *cfg = desc->chip_data;
        unsigned vector, me;
 
-       if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-               if (likely(!cfg->move_desc_pending))
-                       return;
-
-               /* domain has not changed, but affinity did */
-               me = smp_processor_id();
-               if (cpumask_test_cpu(me, desc->affinity)) {
-                       *descp = desc = move_irq_desc(desc, me);
-                       /* get the new one */
-                       cfg = desc->chip_data;
-                       cfg->move_desc_pending = 0;
-               }
-#endif
+       if (likely(!cfg->move_in_progress))
                return;
-       }
 
        vector = ~get_irq_regs()->orig_ax;
        me = smp_processor_id();
 
-       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-               *descp = desc = move_irq_desc(desc, me);
-               /* get the new one */
-               cfg = desc->chip_data;
-#endif
+       if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
                send_cleanup_vector(cfg);
-       }
 }
 #else
 static inline void irq_complete_move(struct irq_desc **descp) {}
 #endif
 
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
-       int apic, pin;
-       struct irq_pin_list *entry;
-
-       entry = cfg->irq_2_pin;
-       for (;;) {
-
-               if (!entry)
-                       break;
-
-               apic = entry->apic;
-               pin = entry->pin;
-               io_apic_eoi(apic, pin);
-               entry = entry->next;
-       }
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
-       struct irq_cfg *cfg;
-       unsigned long flags;
-       unsigned int irq;
-
-       irq = desc->irq;
-       cfg = desc->chip_data;
-
-       spin_lock_irqsave(&ioapic_lock, flags);
-       __eoi_ioapic_irq(irq, cfg);
-       spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
-       struct irq_desc *desc = irq_to_desc(irq);
-       ack_x2APIC_irq();
-       eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
-       ack_x2APIC_irq();
-}
-#endif
-
 static void ack_apic_edge(unsigned int irq)
 {
        struct irq_desc *desc = irq_to_desc(irq);
@@ -2634,9 +2562,6 @@ static void ack_apic_level(unsigned int irq)
         */
        ack_APIC_irq();
 
-       if (irq_remapped(irq))
-               eoi_ioapic_irq(desc);
-
        /* Now we can move and renable the irq */
        if (unlikely(do_unmask_irq)) {
                /* Only migrate the irq if the ack has been received.
@@ -2683,22 +2608,50 @@ static void ack_apic_level(unsigned int irq)
 }
 
 #ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+       int apic, pin;
+       struct irq_pin_list *entry;
+
+       entry = cfg->irq_2_pin;
+       for (;;) {
+
+               if (!entry)
+                       break;
+
+               apic = entry->apic;
+               pin = entry->pin;
+               io_apic_eoi(apic, pin);
+               entry = entry->next;
+       }
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+       struct irq_cfg *cfg;
+       unsigned long flags;
+       unsigned int irq;
+
+       irq = desc->irq;
+       cfg = desc->chip_data;
+
+       spin_lock_irqsave(&ioapic_lock, flags);
+       __eoi_ioapic_irq(irq, cfg);
+       spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
 static void ir_ack_apic_edge(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_edge(irq);
-#endif
-       return ack_apic_edge(irq);
+       ack_APIC_irq();
 }
 
 static void ir_ack_apic_level(unsigned int irq)
 {
-#ifdef CONFIG_X86_X2APIC
-       if (x2apic_enabled())
-               return ack_x2apic_level(irq);
-#endif
-       return ack_apic_level(irq);
+       struct irq_desc *desc = irq_to_desc(irq);
+
+       ack_APIC_irq();
+       eoi_ioapic_irq(desc);
 }
 #endif /* CONFIG_INTR_REMAP */
 
@@ -2903,7 +2856,7 @@ static inline void __init check_timer(void)
 {
        struct irq_desc *desc = irq_to_desc(0);
        struct irq_cfg *cfg = desc->chip_data;
-       int cpu = boot_cpu_id;
+       int node = cpu_to_node(boot_cpu_id);
        int apic1, pin1, apic2, pin2;
        unsigned long flags;
        int no_pin1 = 0;
@@ -2969,7 +2922,7 @@ static inline void __init check_timer(void)
                 * Ok, does IRQ0 through the IOAPIC work?
                 */
                if (no_pin1) {
-                       add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+                       add_pin_to_irq_node(cfg, node, apic1, pin1);
                        setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
                } else {
                        /* for edge trigger, setup_IO_APIC_irq already
@@ -3006,7 +2959,7 @@ static inline void __init check_timer(void)
                /*
                 * legacy devices should be connected to IO APIC #0
                 */
-               replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+               replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
                setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
                enable_8259A_irq(0);
                if (timer_irq_works()) {
@@ -3218,14 +3171,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
 /*
  * Dynamic irq allocate and deallocation
  */
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
 {
        /* Allocate an unused irq */
        unsigned int irq;
        unsigned int new;
        unsigned long flags;
        struct irq_cfg *cfg_new = NULL;
-       int cpu = boot_cpu_id;
        struct irq_desc *desc_new = NULL;
 
        irq = 0;
@@ -3234,7 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
        spin_lock_irqsave(&vector_lock, flags);
        for (new = irq_want; new < nr_irqs; new++) {
-               desc_new = irq_to_desc_alloc_cpu(new, cpu);
+               desc_new = irq_to_desc_alloc_node(new, node);
                if (!desc_new) {
                        printk(KERN_INFO "can not get irq_desc for %d\n", new);
                        continue;
@@ -3243,6 +3195,9 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
                if (cfg_new->vector != 0)
                        continue;
+
+               desc_new = move_irq_desc(desc_new, node);
+
                if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
                        irq = new;
                break;
@@ -3260,11 +3215,12 @@ unsigned int create_irq_nr(unsigned int irq_want)
 
 int create_irq(void)
 {
+       int node = cpu_to_node(boot_cpu_id);
        unsigned int irq_want;
        int irq;
 
        irq_want = nr_irqs_gsi;
-       irq = create_irq_nr(irq_want);
+       irq = create_irq_nr(irq_want, node);
 
        if (irq == 0)
                irq = -1;
@@ -3366,7 +3322,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
 }
 
 #ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
@@ -3375,7 +3331,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        cfg = desc->chip_data;
 
@@ -3387,13 +3343,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        write_msi_msg_desc(desc, &msg);
+
+       return 0;
 }
 #ifdef CONFIG_INTR_REMAP
 /*
  * Migrate the MSI irq to another cpumask. This migration is
  * done in the process context using interrupt-remapping hardware.
  */
-static void
+static int
 ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
@@ -3402,11 +3360,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
        struct irte irte;
 
        if (get_irte(irq, &irte))
-               return;
+               return -1;
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        irte.vector = cfg->vector;
        irte.dest_id = IRTE_DEST(dest);
@@ -3423,6 +3381,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
         */
        if (cfg->move_in_progress)
                send_cleanup_vector(cfg);
+
+       return 0;
 }
 
 #endif
@@ -3518,15 +3478,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
        unsigned int irq_want;
        struct intel_iommu *iommu = NULL;
        int index = 0;
+       int node;
 
        /* x86 doesn't support multiple MSI yet */
        if (type == PCI_CAP_ID_MSI && nvec > 1)
                return 1;
 
+       node = dev_to_node(&dev->dev);
        irq_want = nr_irqs_gsi;
        sub_handle = 0;
        list_for_each_entry(msidesc, &dev->msi_list, list) {
-               irq = create_irq_nr(irq_want);
+               irq = create_irq_nr(irq_want, node);
                if (irq == 0)
                        return -1;
                irq_want = irq + 1;
@@ -3576,7 +3538,7 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
 #ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
@@ -3585,7 +3547,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        cfg = desc->chip_data;
 
@@ -3597,6 +3559,8 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        dmar_msi_write(irq, &msg);
+
+       return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3630,7 +3594,7 @@ int arch_setup_dmar_msi(unsigned int irq)
 #ifdef CONFIG_HPET_TIMER
 
 #ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
@@ -3639,7 +3603,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        cfg = desc->chip_data;
 
@@ -3651,6 +3615,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        hpet_msi_write(irq, &msg);
+
+       return 0;
 }
 
 #endif /* CONFIG_SMP */
@@ -3707,7 +3673,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
        write_ht_irq_msg(irq, &msg);
 }
 
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 {
        struct irq_desc *desc = irq_to_desc(irq);
        struct irq_cfg *cfg;
@@ -3715,11 +3681,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
 
        dest = set_desc_affinity(desc, mask);
        if (dest == BAD_APICID)
-               return;
+               return -1;
 
        cfg = desc->chip_data;
 
        target_ht_irq(irq, dest, cfg->vector);
+
+       return 0;
 }
 
 #endif
@@ -3794,6 +3762,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
        unsigned long flags;
        int err;
 
+       BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
        cfg = irq_cfg(irq);
 
        err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3807,15 +3777,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
 
        mmr_value = 0;
        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
-       entry->vector = cfg->vector;
-       entry->delivery_mode = apic->irq_delivery_mode;
-       entry->dest_mode = apic->irq_dest_mode;
-       entry->polarity = 0;
-       entry->trigger = 0;
-       entry->mask = 0;
-       entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+       entry->vector           = cfg->vector;
+       entry->delivery_mode    = apic->irq_delivery_mode;
+       entry->dest_mode        = apic->irq_dest_mode;
+       entry->polarity         = 0;
+       entry->trigger          = 0;
+       entry->mask             = 0;
+       entry->dest             = apic->cpu_mask_to_apicid(eligible_cpu);
 
        mmr_pnode = uv_blade_to_pnode(mmr_blade);
        uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3833,10 +3801,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
        struct uv_IO_APIC_route_entry *entry;
        int mmr_pnode;
 
+       BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
        mmr_value = 0;
        entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
-       BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
        entry->mask = 1;
 
        mmr_pnode = uv_blade_to_pnode(mmr_blade);
@@ -3900,6 +3868,71 @@ int __init arch_probe_nr_irqs(void)
 }
 #endif
 
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       struct irq_desc *desc;
+       struct irq_cfg *cfg;
+       int node;
+       int ioapic, pin;
+       int trigger, polarity;
+
+       ioapic = irq_attr->ioapic;
+       if (!IO_APIC_IRQ(irq)) {
+               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+                       ioapic);
+               return -EINVAL;
+       }
+
+       if (dev)
+               node = dev_to_node(dev);
+       else
+               node = cpu_to_node(boot_cpu_id);
+
+       desc = irq_to_desc_alloc_node(irq, node);
+       if (!desc) {
+               printk(KERN_INFO "can not get irq_desc %d\n", irq);
+               return 0;
+       }
+
+       pin = irq_attr->ioapic_pin;
+       trigger = irq_attr->trigger;
+       polarity = irq_attr->polarity;
+
+       /*
+        * IRQs < 16 are already in the irq_2_pin[] map
+        */
+       if (irq >= NR_IRQS_LEGACY) {
+               cfg = desc->chip_data;
+               add_pin_to_irq_node(cfg, node, ioapic, pin);
+       }
+
+       setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+       return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+                               struct io_apic_irq_attr *irq_attr)
+{
+       int ioapic, pin;
+       /*
+        * Avoid pin reprogramming.  PRTs typically include entries
+        * with redundant pin->gsi mappings (but unique PCI devices);
+        * we only program the IOAPIC on the first.
+        */
+       ioapic = irq_attr->ioapic;
+       pin = irq_attr->ioapic_pin;
+       if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+               pr_debug("Pin %d-%d already programmed\n",
+                        mp_ioapics[ioapic].apicid, pin);
+               return 0;
+       }
+       set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+       return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
 /* --------------------------------------------------------------------------
                           ACPI-based IOAPIC Configuration
    -------------------------------------------------------------------------- */
@@ -3980,6 +4013,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
 
        return apic_id;
 }
+#endif
 
 int __init io_apic_get_version(int ioapic)
 {
@@ -3992,39 +4026,6 @@ int __init io_apic_get_version(int ioapic)
 
        return reg_01.bits.version;
 }
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
-       struct irq_desc *desc;
-       struct irq_cfg *cfg;
-       int cpu = boot_cpu_id;
-
-       if (!IO_APIC_IRQ(irq)) {
-               apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
-                       ioapic);
-               return -EINVAL;
-       }
-
-       desc = irq_to_desc_alloc_cpu(irq, cpu);
-       if (!desc) {
-               printk(KERN_INFO "can not get irq_desc %d\n", irq);
-               return 0;
-       }
-
-       /*
-        * IRQs < 16 are already in the irq_2_pin[] map
-        */
-       if (irq >= NR_IRQS_LEGACY) {
-               cfg = desc->chip_data;
-               add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
-       }
-
-       setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
-       return 0;
-}
-
 
 int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 {
@@ -4055,51 +4056,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 #ifdef CONFIG_SMP
 void __init setup_ioapic_dest(void)
 {
-       int pin, ioapic, irq, irq_entry;
+       int pin, ioapic = 0, irq, irq_entry;
        struct irq_desc *desc;
-       struct irq_cfg *cfg;
        const struct cpumask *mask;
 
        if (skip_ioapic_setup == 1)
                return;
 
-       for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
-               for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
-                       irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-                       if (irq_entry == -1)
-                               continue;
-                       irq = pin_2_irq(irq_entry, ioapic, pin);
-
-                       /* setup_IO_APIC_irqs could fail to get vector for some device
-                        * when you have too many devices, because at that time only boot
-                        * cpu is online.
-                        */
-                       desc = irq_to_desc(irq);
-                       cfg = desc->chip_data;
-                       if (!cfg->vector) {
-                               setup_IO_APIC_irq(ioapic, pin, irq, desc,
-                                                 irq_trigger(irq_entry),
-                                                 irq_polarity(irq_entry));
-                               continue;
+#ifdef CONFIG_ACPI
+       if (!acpi_disabled && acpi_ioapic) {
+               ioapic = mp_find_ioapic(0);
+               if (ioapic < 0)
+                       ioapic = 0;
+       }
+#endif
 
-                       }
+       for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+               irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+               if (irq_entry == -1)
+                       continue;
+               irq = pin_2_irq(irq_entry, ioapic, pin);
 
-                       /*
-                        * Honour affinities which have been set in early boot
-                        */
-                       if (desc->status &
-                           (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
-                               mask = desc->affinity;
-                       else
-                               mask = apic->target_cpus();
+               desc = irq_to_desc(irq);
 
-                       if (intr_remapping_enabled)
-                               set_ir_ioapic_affinity_irq_desc(desc, mask);
-                       else
-                               set_ioapic_affinity_irq_desc(desc, mask);
-               }
+               /*
+                * Honour affinities which have been set in early boot
+                */
+               if (desc->status &
+                   (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+                       mask = desc->affinity;
+               else
+                       mask = apic->target_cpus();
 
+               if (intr_remapping_enabled)
+                       set_ir_ioapic_affinity_irq_desc(desc, mask);
+               else
+                       set_ioapic_affinity_irq_desc(desc, mask);
        }
+
 }
 #endif
 
index ce4fbfa..a691302 100644 (file)
@@ -104,7 +104,7 @@ static __init void nmi_cpu_busy(void *data)
 }
 #endif
 
-static void report_broken_nmi(int cpu, int *prev_nmi_count)
+static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
 {
        printk(KERN_CONT "\n");
 
index 01eda2a..440a8bc 100644 (file)
@@ -160,7 +160,6 @@ extern struct apic apic_summit;
 extern struct apic apic_bigsmp;
 extern struct apic apic_es7000;
 extern struct apic apic_es7000_cluster;
-extern struct apic apic_default;
 
 struct apic *apic = &apic_default;
 EXPORT_SYMBOL_GPL(apic);
index 1783652..bc3e880 100644 (file)
@@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = {
 void __init default_setup_apic_routing(void)
 {
 #ifdef CONFIG_X86_X2APIC
-       if (x2apic && (apic != &apic_x2apic_phys &&
+       if (x2apic_mode && (apic != &apic_x2apic_phys &&
 #ifdef CONFIG_X86_UV
                       apic != &apic_x2apic_uv_x &&
 #endif
index 9cfe1f4..344eee4 100644 (file)
@@ -173,13 +173,6 @@ static inline int is_WPEG(struct rio_detail *rio){
                rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
 }
 
-
-/* In clustered mode, the high nibble of APIC ID is a cluster number.
- * The low nibble is a 4-bit bitmap. */
-#define XAPIC_DEST_CPUS_SHIFT  4
-#define XAPIC_DEST_CPUS_MASK   ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
-#define XAPIC_DEST_CLUSTER_MASK        (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
-
 #define SUMMIT_APIC_DFR_VALUE  (APIC_DFR_CLUSTER)
 
 static const struct cpumask *summit_target_cpus(void)
index 4a903e2..8e4cbb2 100644 (file)
@@ -10,7 +10,7 @@
 #include <asm/apic.h>
 #include <asm/ipi.h>
 
-DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
 
 static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
index 2bda693..ef0ae20 100644 (file)
@@ -105,7 +105,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
        cpumask_set_cpu(cpu, retmask);
 }
 
-static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
 {
 #ifdef CONFIG_SMP
        unsigned long val;
@@ -562,7 +562,7 @@ void __init uv_system_init(void)
        union uvh_node_id_u node_id;
        unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
        int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
-       int max_pnode = 0;
+       int gnode_extra, max_pnode = 0;
        unsigned long mmr_base, present, paddr;
        unsigned short pnode_mask;
 
@@ -574,6 +574,13 @@ void __init uv_system_init(void)
        mmr_base =
            uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
            ~UV_MMR_ENABLE;
+       pnode_mask = (1 << n_val) - 1;
+       node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+       gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
+       gnode_upper = ((unsigned long)gnode_extra  << m_val);
+       printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
+                       n_val, m_val, gnode_upper, gnode_extra);
+
        printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
 
        for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
@@ -583,15 +590,18 @@ void __init uv_system_init(void)
 
        bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
        uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+       BUG_ON(!uv_blade_info);
 
        get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
 
        bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
        uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
+       BUG_ON(!uv_node_to_blade);
        memset(uv_node_to_blade, 255, bytes);
 
        bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
        uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
+       BUG_ON(!uv_cpu_to_blade);
        memset(uv_cpu_to_blade, 255, bytes);
 
        blade = 0;
@@ -607,11 +617,6 @@ void __init uv_system_init(void)
                }
        }
 
-       pnode_mask = (1 << n_val) - 1;
-       node_id.v = uv_read_local_mmr(UVH_NODE_ID);
-       gnode_upper = (((unsigned long)node_id.s.node_id) &
-                      ~((1 << n_val) - 1)) << m_val;
-
        uv_bios_init();
        uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
                            &sn_coherency_id, &sn_region_size);
@@ -634,6 +639,7 @@ void __init uv_system_init(void)
                uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
                uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
                uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+               uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
                uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
                uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
                uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
index 5a6aa1c..1a830cb 100644 (file)
@@ -146,4 +146,5 @@ void foo(void)
        OFFSET(BP_loadflags, boot_params, hdr.loadflags);
        OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
        OFFSET(BP_version, boot_params, hdr.version);
+       OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 }
index e72f062..898ecc4 100644 (file)
@@ -125,6 +125,7 @@ int main(void)
        OFFSET(BP_loadflags, boot_params, hdr.loadflags);
        OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
        OFFSET(BP_version, boot_params, hdr.version);
+       OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 
        BLANK();
        DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
index 7e4a459..e5b27d8 100644 (file)
@@ -6,6 +6,7 @@
 #include <asm/processor.h>
 #include <asm/apic.h>
 #include <asm/cpu.h>
+#include <asm/pci-direct.h>
 
 #ifdef CONFIG_X86_64
 # include <asm/numa_64.h>
@@ -272,7 +273,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
        int cpu = smp_processor_id();
        int node;
-       unsigned apicid = hard_smp_processor_id();
+       unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
        node = c->phys_proc_id;
        if (apicid_to_node[apicid] != NUMA_NO_NODE)
@@ -351,6 +352,15 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
                    (c->x86_model == 8 && c->x86_mask >= 8))
                        set_cpu_cap(c, X86_FEATURE_K6_MTRR);
 #endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
+       /* check CPU config space for extended APIC ID */
+       if (c->x86 >= 0xf) {
+               unsigned int val;
+               val = read_pci_config(0, 24, 0, 0x68);
+               if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+                       set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+       }
+#endif
 }
 
 static void __cpuinit init_amd(struct cpuinfo_x86 *c)
index 77848d9..b0517aa 100644 (file)
@@ -299,7 +299,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
        return NULL;            /* Not found */
 }
 
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
 
 void load_percpu_segment(int cpu)
 {
@@ -768,6 +769,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
        if (this_cpu->c_identify)
                this_cpu->c_identify(c);
 
+       /* Clear/Set all flags overriden by options, after probe */
+       for (i = 0; i < NCAPINTS; i++) {
+               c->x86_capability[i] &= ~cpu_caps_cleared[i];
+               c->x86_capability[i] |= cpu_caps_set[i];
+       }
+
 #ifdef CONFIG_X86_64
        c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
 #endif
@@ -813,6 +820,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 #endif
 
        init_hypervisor(c);
+
+       /*
+        * Clear/Set all flags overriden by options, need do it
+        * before following smp all cpus cap AND.
+        */
+       for (i = 0; i < NCAPINTS; i++) {
+               c->x86_capability[i] &= ~cpu_caps_cleared[i];
+               c->x86_capability[i] |= cpu_caps_set[i];
+       }
+
        /*
         * On SMP, boot_cpu_data holds the common feature set between
         * all CPUs; so make sure that we indicate which features are
@@ -825,10 +842,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
                        boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
        }
 
-       /* Clear all flags overriden by options */
-       for (i = 0; i < NCAPINTS; i++)
-               c->x86_capability[i] &= ~cleared_cpu_caps[i];
-
 #ifdef CONFIG_X86_MCE
        /* Init Machine Check Exception if available. */
        mcheck_init(c);
index 46e29ab..6b2a52d 100644 (file)
@@ -32,9 +32,7 @@
 
 static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
 static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
-static DEFINE_PER_CPU(unsigned, cpu_modelflag);
 static DEFINE_PER_CPU(int, cpu_priv_count);
-static DEFINE_PER_CPU(unsigned, cpu_model);
 
 static DEFINE_MUTEX(cpu_debug_lock);
 
@@ -80,302 +78,102 @@ static struct cpu_file_base cpu_file[] = {
        { "value",      CPU_REG_ALL,    1       },
 };
 
-/* Intel Registers Range */
-static struct cpu_debug_range cpu_intel_range[] = {
-       { 0x00000000, 0x00000001, CPU_MC,       CPU_INTEL_ALL           },
-       { 0x00000006, 0x00000007, CPU_MONITOR,  CPU_CX_AT_XE            },
-       { 0x00000010, 0x00000010, CPU_TIME,     CPU_INTEL_ALL           },
-       { 0x00000011, 0x00000013, CPU_PMC,      CPU_INTEL_PENTIUM       },
-       { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE         },
-       { 0x0000001B, 0x0000001B, CPU_APIC,     CPU_P6_CX_AT_XE         },
-
-       { 0x0000002A, 0x0000002A, CPU_POWERON,  CPU_PX_CX_AT_XE         },
-       { 0x0000002B, 0x0000002B, CPU_POWERON,  CPU_INTEL_XEON          },
-       { 0x0000002C, 0x0000002C, CPU_FREQ,     CPU_INTEL_XEON          },
-       { 0x0000003A, 0x0000003A, CPU_CONTROL,  CPU_CX_AT_XE            },
-
-       { 0x00000040, 0x00000043, CPU_LBRANCH,  CPU_PM_CX_AT_XE         },
-       { 0x00000044, 0x00000047, CPU_LBRANCH,  CPU_PM_CO_AT            },
-       { 0x00000060, 0x00000063, CPU_LBRANCH,  CPU_C2_AT               },
-       { 0x00000064, 0x00000067, CPU_LBRANCH,  CPU_INTEL_ATOM          },
-
-       { 0x00000079, 0x00000079, CPU_BIOS,     CPU_P6_CX_AT_XE         },
-       { 0x00000088, 0x0000008A, CPU_CACHE,    CPU_INTEL_P6            },
-       { 0x0000008B, 0x0000008B, CPU_BIOS,     CPU_P6_CX_AT_XE         },
-       { 0x0000009B, 0x0000009B, CPU_MONITOR,  CPU_INTEL_XEON          },
-
-       { 0x000000C1, 0x000000C2, CPU_PMC,      CPU_P6_CX_AT            },
-       { 0x000000CD, 0x000000CD, CPU_FREQ,     CPU_CX_AT               },
-       { 0x000000E7, 0x000000E8, CPU_PERF,     CPU_CX_AT               },
-       { 0x000000FE, 0x000000FE, CPU_MTRR,     CPU_P6_CX_XE            },
-
-       { 0x00000116, 0x00000116, CPU_CACHE,    CPU_INTEL_P6            },
-       { 0x00000118, 0x00000118, CPU_CACHE,    CPU_INTEL_P6            },
-       { 0x00000119, 0x00000119, CPU_CACHE,    CPU_INTEL_PX            },
-       { 0x0000011A, 0x0000011B, CPU_CACHE,    CPU_INTEL_P6            },
-       { 0x0000011E, 0x0000011E, CPU_CACHE,    CPU_PX_CX_AT            },
-
-       { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE         },
-       { 0x00000179, 0x0000017A, CPU_MC,       CPU_PX_CX_AT_XE         },
-       { 0x0000017B, 0x0000017B, CPU_MC,       CPU_P6_XE               },
-       { 0x00000186, 0x00000187, CPU_PMC,      CPU_P6_CX_AT            },
-       { 0x00000198, 0x00000199, CPU_PERF,     CPU_PM_CX_AT_XE         },
-       { 0x0000019A, 0x0000019A, CPU_TIME,     CPU_PM_CX_AT_XE         },
-       { 0x0000019B, 0x0000019D, CPU_THERM,    CPU_PM_CX_AT_XE         },
-       { 0x000001A0, 0x000001A0, CPU_MISC,     CPU_PM_CX_AT_XE         },
-
-       { 0x000001C9, 0x000001C9, CPU_LBRANCH,  CPU_PM_CX_AT            },
-       { 0x000001D7, 0x000001D8, CPU_LBRANCH,  CPU_INTEL_XEON          },
-       { 0x000001D9, 0x000001D9, CPU_DEBUG,    CPU_CX_AT_XE            },
-       { 0x000001DA, 0x000001DA, CPU_LBRANCH,  CPU_INTEL_XEON          },
-       { 0x000001DB, 0x000001DB, CPU_LBRANCH,  CPU_P6_XE               },
-       { 0x000001DC, 0x000001DC, CPU_LBRANCH,  CPU_INTEL_P6            },
-       { 0x000001DD, 0x000001DE, CPU_LBRANCH,  CPU_PX_CX_AT_XE         },
-       { 0x000001E0, 0x000001E0, CPU_LBRANCH,  CPU_INTEL_P6            },
-
-       { 0x00000200, 0x0000020F, CPU_MTRR,     CPU_P6_CX_XE            },
-       { 0x00000250, 0x00000250, CPU_MTRR,     CPU_P6_CX_XE            },
-       { 0x00000258, 0x00000259, CPU_MTRR,     CPU_P6_CX_XE            },
-       { 0x00000268, 0x0000026F, CPU_MTRR,     CPU_P6_CX_XE            },
-       { 0x00000277, 0x00000277, CPU_PAT,      CPU_C2_AT_XE            },
-       { 0x000002FF, 0x000002FF, CPU_MTRR,     CPU_P6_CX_XE            },
-
-       { 0x00000300, 0x00000308, CPU_PMC,      CPU_INTEL_XEON          },
-       { 0x00000309, 0x0000030B, CPU_PMC,      CPU_C2_AT_XE            },
-       { 0x0000030C, 0x00000311, CPU_PMC,      CPU_INTEL_XEON          },
-       { 0x00000345, 0x00000345, CPU_PMC,      CPU_C2_AT               },
-       { 0x00000360, 0x00000371, CPU_PMC,      CPU_INTEL_XEON          },
-       { 0x0000038D, 0x00000390, CPU_PMC,      CPU_C2_AT               },
-       { 0x000003A0, 0x000003BE, CPU_PMC,      CPU_INTEL_XEON          },
-       { 0x000003C0, 0x000003CD, CPU_PMC,      CPU_INTEL_XEON          },
-       { 0x000003E0, 0x000003E1, CPU_PMC,      CPU_INTEL_XEON          },
-       { 0x000003F0, 0x000003F0, CPU_PMC,      CPU_INTEL_XEON          },
-       { 0x000003F1, 0x000003F1, CPU_PMC,      CPU_C2_AT_XE            },
-       { 0x000003F2, 0x000003F2, CPU_PMC,      CPU_INTEL_XEON          },
-
-       { 0x00000400, 0x00000402, CPU_MC,       CPU_PM_CX_AT_XE         },
-       { 0x00000403, 0x00000403, CPU_MC,       CPU_INTEL_XEON          },
-       { 0x00000404, 0x00000406, CPU_MC,       CPU_PM_CX_AT_XE         },
-       { 0x00000407, 0x00000407, CPU_MC,       CPU_INTEL_XEON          },
-       { 0x00000408, 0x0000040A, CPU_MC,       CPU_PM_CX_AT_XE         },
-       { 0x0000040B, 0x0000040B, CPU_MC,       CPU_INTEL_XEON          },
-       { 0x0000040C, 0x0000040E, CPU_MC,       CPU_PM_CX_XE            },
-       { 0x0000040F, 0x0000040F, CPU_MC,       CPU_INTEL_XEON          },
-       { 0x00000410, 0x00000412, CPU_MC,       CPU_PM_CX_AT_XE         },
-       { 0x00000413, 0x00000417, CPU_MC,       CPU_CX_AT_XE            },
-       { 0x00000480, 0x0000048B, CPU_VMX,      CPU_CX_AT_XE            },
-
-       { 0x00000600, 0x00000600, CPU_DEBUG,    CPU_PM_CX_AT_XE         },
-       { 0x00000680, 0x0000068F, CPU_LBRANCH,  CPU_INTEL_XEON          },
-       { 0x000006C0, 0x000006CF, CPU_LBRANCH,  CPU_INTEL_XEON          },
-
-       { 0x000107CC, 0x000107D3, CPU_PMC,      CPU_INTEL_XEON_MP       },
-
-       { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON          },
-       { 0xC0000081, 0xC0000082, CPU_CALL,     CPU_INTEL_XEON          },
-       { 0xC0000084, 0xC0000084, CPU_CALL,     CPU_INTEL_XEON          },
-       { 0xC0000100, 0xC0000102, CPU_BASE,     CPU_INTEL_XEON          },
+/* CPU Registers Range */
+static struct cpu_debug_range cpu_reg_range[] = {
+       { 0x00000000, 0x00000001, CPU_MC,       },
+       { 0x00000006, 0x00000007, CPU_MONITOR,  },
+       { 0x00000010, 0x00000010, CPU_TIME,     },
+       { 0x00000011, 0x00000013, CPU_PMC,      },
+       { 0x00000017, 0x00000017, CPU_PLATFORM, },
+       { 0x0000001B, 0x0000001B, CPU_APIC,     },
+       { 0x0000002A, 0x0000002B, CPU_POWERON,  },
+       { 0x0000002C, 0x0000002C, CPU_FREQ,     },
+       { 0x0000003A, 0x0000003A, CPU_CONTROL,  },
+       { 0x00000040, 0x00000047, CPU_LBRANCH,  },
+       { 0x00000060, 0x00000067, CPU_LBRANCH,  },
+       { 0x00000079, 0x00000079, CPU_BIOS,     },
+       { 0x00000088, 0x0000008A, CPU_CACHE,    },
+       { 0x0000008B, 0x0000008B, CPU_BIOS,     },
+       { 0x0000009B, 0x0000009B, CPU_MONITOR,  },
+       { 0x000000C1, 0x000000C4, CPU_PMC,      },
+       { 0x000000CD, 0x000000CD, CPU_FREQ,     },
+       { 0x000000E7, 0x000000E8, CPU_PERF,     },
+       { 0x000000FE, 0x000000FE, CPU_MTRR,     },
+
+       { 0x00000116, 0x0000011E, CPU_CACHE,    },
+       { 0x00000174, 0x00000176, CPU_SYSENTER, },
+       { 0x00000179, 0x0000017B, CPU_MC,       },
+       { 0x00000186, 0x00000189, CPU_PMC,      },
+       { 0x00000198, 0x00000199, CPU_PERF,     },
+       { 0x0000019A, 0x0000019A, CPU_TIME,     },
+       { 0x0000019B, 0x0000019D, CPU_THERM,    },
+       { 0x000001A0, 0x000001A0, CPU_MISC,     },
+       { 0x000001C9, 0x000001C9, CPU_LBRANCH,  },
+       { 0x000001D7, 0x000001D8, CPU_LBRANCH,  },
+       { 0x000001D9, 0x000001D9, CPU_DEBUG,    },
+       { 0x000001DA, 0x000001E0, CPU_LBRANCH,  },
+
+       { 0x00000200, 0x0000020F, CPU_MTRR,     },
+       { 0x00000250, 0x00000250, CPU_MTRR,     },
+       { 0x00000258, 0x00000259, CPU_MTRR,     },
+       { 0x00000268, 0x0000026F, CPU_MTRR,     },
+       { 0x00000277, 0x00000277, CPU_PAT,      },
+       { 0x000002FF, 0x000002FF, CPU_MTRR,     },
+
+       { 0x00000300, 0x00000311, CPU_PMC,      },
+       { 0x00000345, 0x00000345, CPU_PMC,      },
+       { 0x00000360, 0x00000371, CPU_PMC,      },
+       { 0x0000038D, 0x00000390, CPU_PMC,      },
+       { 0x000003A0, 0x000003BE, CPU_PMC,      },
+       { 0x000003C0, 0x000003CD, CPU_PMC,      },
+       { 0x000003E0, 0x000003E1, CPU_PMC,      },
+       { 0x000003F0, 0x000003F2, CPU_PMC,      },
+
+       { 0x00000400, 0x00000417, CPU_MC,       },
+       { 0x00000480, 0x0000048B, CPU_VMX,      },
+
+       { 0x00000600, 0x00000600, CPU_DEBUG,    },
+       { 0x00000680, 0x0000068F, CPU_LBRANCH,  },
+       { 0x000006C0, 0x000006CF, CPU_LBRANCH,  },
+
+       { 0x000107CC, 0x000107D3, CPU_PMC,      },
+
+       { 0xC0000080, 0xC0000080, CPU_FEATURES, },
+       { 0xC0000081, 0xC0000084, CPU_CALL,     },
+       { 0xC0000100, 0xC0000102, CPU_BASE,     },
+       { 0xC0000103, 0xC0000103, CPU_TIME,     },
+
+       { 0xC0010000, 0xC0010007, CPU_PMC,      },
+       { 0xC0010010, 0xC0010010, CPU_CONF,     },
+       { 0xC0010015, 0xC0010015, CPU_CONF,     },
+       { 0xC0010016, 0xC001001A, CPU_MTRR,     },
+       { 0xC001001D, 0xC001001D, CPU_MTRR,     },
+       { 0xC001001F, 0xC001001F, CPU_CONF,     },
+       { 0xC0010030, 0xC0010035, CPU_BIOS,     },
+       { 0xC0010044, 0xC0010048, CPU_MC,       },
+       { 0xC0010050, 0xC0010056, CPU_SMM,      },
+       { 0xC0010058, 0xC0010058, CPU_CONF,     },
+       { 0xC0010060, 0xC0010060, CPU_CACHE,    },
+       { 0xC0010061, 0xC0010068, CPU_SMM,      },
+       { 0xC0010069, 0xC001006B, CPU_SMM,      },
+       { 0xC0010070, 0xC0010071, CPU_SMM,      },
+       { 0xC0010111, 0xC0010113, CPU_SMM,      },
+       { 0xC0010114, 0xC0010118, CPU_SVM,      },
+       { 0xC0010140, 0xC0010141, CPU_OSVM,     },
+       { 0xC0011022, 0xC0011023, CPU_CONF,     },
 };
 
-/* AMD Registers Range */
-static struct cpu_debug_range cpu_amd_range[] = {
-       { 0x00000000, 0x00000001, CPU_MC,       CPU_K10_PLUS,           },
-       { 0x00000010, 0x00000010, CPU_TIME,     CPU_K8_PLUS,            },
-       { 0x0000001B, 0x0000001B, CPU_APIC,     CPU_K8_PLUS,            },
-       { 0x0000002A, 0x0000002A, CPU_POWERON,  CPU_K7_PLUS             },
-       { 0x0000008B, 0x0000008B, CPU_VER,      CPU_K8_PLUS             },
-       { 0x000000FE, 0x000000FE, CPU_MTRR,     CPU_K8_PLUS,            },
-
-       { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS,            },
-       { 0x00000179, 0x0000017B, CPU_MC,       CPU_K8_PLUS,            },
-       { 0x000001D9, 0x000001D9, CPU_DEBUG,    CPU_K8_PLUS,            },
-       { 0x000001DB, 0x000001DE, CPU_LBRANCH,  CPU_K8_PLUS,            },
-
-       { 0x00000200, 0x0000020F, CPU_MTRR,     CPU_K8_PLUS,            },
-       { 0x00000250, 0x00000250, CPU_MTRR,     CPU_K8_PLUS,            },
-       { 0x00000258, 0x00000259, CPU_MTRR,     CPU_K8_PLUS,            },
-       { 0x00000268, 0x0000026F, CPU_MTRR,     CPU_K8_PLUS,            },
-       { 0x00000277, 0x00000277, CPU_PAT,      CPU_K8_PLUS,            },
-       { 0x000002FF, 0x000002FF, CPU_MTRR,     CPU_K8_PLUS,            },
-
-       { 0x00000400, 0x00000413, CPU_MC,       CPU_K8_PLUS,            },
-
-       { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL,            },
-       { 0xC0000081, 0xC0000084, CPU_CALL,     CPU_K8_PLUS,            },
-       { 0xC0000100, 0xC0000102, CPU_BASE,     CPU_K8_PLUS,            },
-       { 0xC0000103, 0xC0000103, CPU_TIME,     CPU_K10_PLUS,           },
-
-       { 0xC0010000, 0xC0010007, CPU_PMC,      CPU_K8_PLUS,            },
-       { 0xC0010010, 0xC0010010, CPU_CONF,     CPU_K7_PLUS,            },
-       { 0xC0010015, 0xC0010015, CPU_CONF,     CPU_K7_PLUS,            },
-       { 0xC0010016, 0xC001001A, CPU_MTRR,     CPU_K8_PLUS,            },
-       { 0xC001001D, 0xC001001D, CPU_MTRR,     CPU_K8_PLUS,            },
-       { 0xC001001F, 0xC001001F, CPU_CONF,     CPU_K8_PLUS,            },
-       { 0xC0010030, 0xC0010035, CPU_BIOS,     CPU_K8_PLUS,            },
-       { 0xC0010044, 0xC0010048, CPU_MC,       CPU_K8_PLUS,            },
-       { 0xC0010050, 0xC0010056, CPU_SMM,      CPU_K0F_PLUS,           },
-       { 0xC0010058, 0xC0010058, CPU_CONF,     CPU_K10_PLUS,           },
-       { 0xC0010060, 0xC0010060, CPU_CACHE,    CPU_AMD_11,             },
-       { 0xC0010061, 0xC0010068, CPU_SMM,      CPU_K10_PLUS,           },
-       { 0xC0010069, 0xC001006B, CPU_SMM,      CPU_AMD_11,             },
-       { 0xC0010070, 0xC0010071, CPU_SMM,      CPU_K10_PLUS,           },
-       { 0xC0010111, 0xC0010113, CPU_SMM,      CPU_K8_PLUS,            },
-       { 0xC0010114, 0xC0010118, CPU_SVM,      CPU_K10_PLUS,           },
-       { 0xC0010140, 0xC0010141, CPU_OSVM,     CPU_K10_PLUS,           },
-       { 0xC0011022, 0xC0011023, CPU_CONF,     CPU_K10_PLUS,           },
-};
-
-
-/* Intel */
-static int get_intel_modelflag(unsigned model)
-{
-       int flag;
-
-       switch (model) {
-       case 0x0501:
-       case 0x0502:
-       case 0x0504:
-               flag = CPU_INTEL_PENTIUM;
-               break;
-       case 0x0601:
-       case 0x0603:
-       case 0x0605:
-       case 0x0607:
-       case 0x0608:
-       case 0x060A:
-       case 0x060B:
-               flag = CPU_INTEL_P6;
-               break;
-       case 0x0609:
-       case 0x060D:
-               flag = CPU_INTEL_PENTIUM_M;
-               break;
-       case 0x060E:
-               flag = CPU_INTEL_CORE;
-               break;
-       case 0x060F:
-       case 0x0617:
-               flag = CPU_INTEL_CORE2;
-               break;
-       case 0x061C:
-               flag = CPU_INTEL_ATOM;
-               break;
-       case 0x0F00:
-       case 0x0F01:
-       case 0x0F02:
-       case 0x0F03:
-       case 0x0F04:
-               flag = CPU_INTEL_XEON_P4;
-               break;
-       case 0x0F06:
-               flag = CPU_INTEL_XEON_MP;
-               break;
-       default:
-               flag = CPU_NONE;
-               break;
-       }
-
-       return flag;
-}
-
-/* AMD */
-static int get_amd_modelflag(unsigned model)
-{
-       int flag;
-
-       switch (model >> 8) {
-       case 0x6:
-               flag = CPU_AMD_K6;
-               break;
-       case 0x7:
-               flag = CPU_AMD_K7;
-               break;
-       case 0x8:
-               flag = CPU_AMD_K8;
-               break;
-       case 0xf:
-               flag = CPU_AMD_0F;
-               break;
-       case 0x10:
-               flag = CPU_AMD_10;
-               break;
-       case 0x11:
-               flag = CPU_AMD_11;
-               break;
-       default:
-               flag = CPU_NONE;
-               break;
-       }
-
-       return flag;
-}
-
-static int get_cpu_modelflag(unsigned cpu)
-{
-       int flag;
-
-       flag = per_cpu(cpu_model, cpu);
-
-       switch (flag >> 16) {
-       case X86_VENDOR_INTEL:
-               flag = get_intel_modelflag(flag);
-               break;
-       case X86_VENDOR_AMD:
-               flag = get_amd_modelflag(flag & 0xffff);
-               break;
-       default:
-               flag = CPU_NONE;
-               break;
-       }
-
-       return flag;
-}
-
-static int get_cpu_range_count(unsigned cpu)
-{
-       int index;
-
-       switch (per_cpu(cpu_model, cpu) >> 16) {
-       case X86_VENDOR_INTEL:
-               index = ARRAY_SIZE(cpu_intel_range);
-               break;
-       case X86_VENDOR_AMD:
-               index = ARRAY_SIZE(cpu_amd_range);
-               break;
-       default:
-               index = 0;
-               break;
-       }
-
-       return index;
-}
-
 static int is_typeflag_valid(unsigned cpu, unsigned flag)
 {
-       unsigned vendor, modelflag;
-       int i, index;
+       int i;
 
        /* Standard Registers should be always valid */
        if (flag >= CPU_TSS)
                return 1;
 
-       modelflag = per_cpu(cpu_modelflag, cpu);
-       vendor = per_cpu(cpu_model, cpu) >> 16;
-       index = get_cpu_range_count(cpu);
-
-       for (i = 0; i < index; i++) {
-               switch (vendor) {
-               case X86_VENDOR_INTEL:
-                       if ((cpu_intel_range[i].model & modelflag) &&
-                           (cpu_intel_range[i].flag & flag))
-                               return 1;
-                       break;
-               case X86_VENDOR_AMD:
-                       if ((cpu_amd_range[i].model & modelflag) &&
-                           (cpu_amd_range[i].flag & flag))
-                               return 1;
-                       break;
-               }
+       for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
+               if (cpu_reg_range[i].flag == flag)
+                       return 1;
        }
 
        /* Invalid */
@@ -385,26 +183,11 @@ static int is_typeflag_valid(unsigned cpu, unsigned flag)
 static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
                              int index, unsigned flag)
 {
-       unsigned modelflag;
-
-       modelflag = per_cpu(cpu_modelflag, cpu);
-       *max = 0;
-       switch (per_cpu(cpu_model, cpu) >> 16) {
-       case X86_VENDOR_INTEL:
-               if ((cpu_intel_range[index].model & modelflag) &&
-                   (cpu_intel_range[index].flag & flag)) {
-                       *min = cpu_intel_range[index].min;
-                       *max = cpu_intel_range[index].max;
-               }
-               break;
-       case X86_VENDOR_AMD:
-               if ((cpu_amd_range[index].model & modelflag) &&
-                   (cpu_amd_range[index].flag & flag)) {
-                       *min = cpu_amd_range[index].min;
-                       *max = cpu_amd_range[index].max;
-               }
-               break;
-       }
+       if (cpu_reg_range[index].flag == flag) {
+               *min = cpu_reg_range[index].min;
+               *max = cpu_reg_range[index].max;
+       } else
+               *max = 0;
 
        return *max;
 }
@@ -434,7 +217,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
        unsigned msr, msr_min, msr_max;
        struct cpu_private *priv;
        u32 low, high;
-       int i, range;
+       int i;
 
        if (seq) {
                priv = seq->private;
@@ -446,9 +229,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
                }
        }
 
-       range = get_cpu_range_count(cpu);
-
-       for (i = 0; i < range; i++) {
+       for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
                if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
                        continue;
 
@@ -588,8 +369,20 @@ static void print_apic(void *arg)
        seq_printf(seq, " TMICT\t\t: %08x\n",  apic_read(APIC_TMICT));
        seq_printf(seq, " TMCCT\t\t: %08x\n",  apic_read(APIC_TMCCT));
        seq_printf(seq, " TDCR\t\t: %08x\n",  apic_read(APIC_TDCR));
-#endif /* CONFIG_X86_LOCAL_APIC */
+       if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+               unsigned int i, v, maxeilvt;
+
+               v = apic_read(APIC_EFEAT);
+               maxeilvt = (v >> 16) & 0xff;
+               seq_printf(seq, " EFEAT\t\t: %08x\n", v);
+               seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL));
 
+               for (i = 0; i < maxeilvt; i++) {
+                       v = apic_read(APIC_EILVTn(i));
+                       seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v);
+               }
+       }
+#endif /* CONFIG_X86_LOCAL_APIC */
        seq_printf(seq, "\n MSR\t:\n");
 }
 
@@ -788,13 +581,11 @@ static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
 {
        struct dentry *cpu_dentry = NULL;
        unsigned reg, reg_min, reg_max;
-       int i, range, err = 0;
+       int i, err = 0;
        char reg_dir[12];
        u32 low, high;
 
-       range = get_cpu_range_count(cpu);
-
-       for (i = 0; i < range; i++) {
+       for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) {
                if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
                                   cpu_base[type].flag))
                        continue;
@@ -850,10 +641,6 @@ static int cpu_init_cpu(void)
                cpui = &cpu_data(cpu);
                if (!cpu_has(cpui, X86_FEATURE_MSR))
                        continue;
-               per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
-                                          (cpui->x86 << 8) |
-                                          (cpui->x86_model));
-               per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
 
                sprintf(cpu_dir, "cpu%d", cpu);
                cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
index 52c8398..f138c6c 100644 (file)
@@ -220,11 +220,14 @@ config X86_LONGHAUL
          If in doubt, say N.
 
 config X86_E_POWERSAVER
-       tristate "VIA C7 Enhanced PowerSaver"
+       tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)"
        select CPU_FREQ_TABLE
-       depends on X86_32
+       depends on X86_32 && EXPERIMENTAL
        help
-         This adds the CPUFreq driver for VIA C7 processors.
+         This adds the CPUFreq driver for VIA C7 processors.  However, this driver
+         does not have any safeguards to prevent operating the CPU out of spec
+         and is thus considered dangerous.  Please use the regular ACPI cpufreq
+         driver, enabled by CONFIG_X86_ACPI_CPUFREQ.
 
          If in doubt, say N.
 
index 54b6de2..ae9b503 100644 (file)
@@ -90,11 +90,7 @@ static int check_est_cpu(unsigned int cpuid)
 {
        struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
 
-       if (cpu->x86_vendor != X86_VENDOR_INTEL ||
-           !cpu_has(cpu, X86_FEATURE_EST))
-               return 0;
-
-       return 1;
+       return cpu_has(cpu, X86_FEATURE_EST);
 }
 
 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
@@ -550,7 +546,7 @@ static int __init acpi_cpufreq_early_init(void)
                return -ENOMEM;
        }
        for_each_possible_cpu(i) {
-               if (!alloc_cpumask_var_node(
+               if (!zalloc_cpumask_var_node(
                        &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
                        GFP_KERNEL, cpu_to_node(i))) {
 
index a8363e5..d47c775 100644 (file)
@@ -322,7 +322,7 @@ static int powernow_acpi_init(void)
                goto err0;
        }
 
-       if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
+       if (!zalloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
                                                                GFP_KERNEL)) {
                retval = -ENOMEM;
                goto err05;
index f6b32d1..cf52215 100644 (file)
@@ -835,7 +835,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
 {
        struct cpufreq_frequency_table *powernow_table;
        int ret_val = -ENODEV;
-       acpi_integer space_id;
+       acpi_integer control, status;
 
        if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
                dprintk("register performance failed: bad ACPI data\n");
@@ -848,12 +848,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
                goto err_out;
        }
 
-       space_id = data->acpi_data.control_register.space_id;
-       if ((space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
-               (space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
+       control = data->acpi_data.control_register.space_id;
+       status = data->acpi_data.status_register.space_id;
+
+       if ((control != ACPI_ADR_SPACE_FIXED_HARDWARE) ||
+           (status != ACPI_ADR_SPACE_FIXED_HARDWARE)) {
                dprintk("Invalid control/status registers (%x - %x)\n",
-                       data->acpi_data.control_register.space_id,
-                       space_id);
+                       control, status);
                goto err_out;
        }
 
@@ -886,7 +887,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
        /* notify BIOS that we exist */
        acpi_processor_notify_smm(THIS_MODULE);
 
-       if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
+       if (!zalloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
                printk(KERN_ERR PFX
                                "unable to alloc powernow_k8_data cpumask\n");
                ret_val = -ENOMEM;
index c9f1fdc..55c831e 100644 (file)
@@ -471,7 +471,7 @@ static int centrino_target (struct cpufreq_policy *policy,
 
        if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL)))
                return -ENOMEM;
-       if (unlikely(!alloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
+       if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) {
                free_cpumask_var(saved_mask);
                return -ENOMEM;
        }
index 7437fa1..daed39b 100644 (file)
@@ -229,12 +229,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
 }
 #endif
 
-static void __cpuinit srat_detect_node(void)
+static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
 {
 #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
        unsigned node;
        int cpu = smp_processor_id();
-       int apicid = hard_smp_processor_id();
+       int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid;
 
        /* Don't do the funky fallback heuristics the AMD version employs
           for now. */
@@ -400,7 +400,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
        }
 
        /* Work around errata */
-       srat_detect_node();
+       srat_detect_node(c);
 
        if (cpu_has(c, X86_FEATURE_VMX))
                detect_vmx_virtcap(c);
index 483eda9..789efe2 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <asm/processor.h>
 #include <asm/smp.h>
+#include <asm/k8.h>
 
 #define LVL_1_INST     1
 #define LVL_1_DATA     2
@@ -159,14 +160,6 @@ struct _cpuid4_info_regs {
        unsigned long can_disable;
 };
 
-#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
-static struct pci_device_id k8_nb_id[] = {
-       { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
-       { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
-       {}
-};
-#endif
-
 unsigned short                 num_cache_leaves;
 
 /* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -207,10 +200,17 @@ union l3_cache {
 };
 
 static const unsigned short __cpuinitconst assocs[] = {
-       [1] = 1, [2] = 2, [4] = 4, [6] = 8,
-       [8] = 16, [0xa] = 32, [0xb] = 48,
+       [1] = 1,
+       [2] = 2,
+       [4] = 4,
+       [6] = 8,
+       [8] = 16,
+       [0xa] = 32,
+       [0xb] = 48,
        [0xc] = 64,
-       [0xf] = 0xffff // ??
+       [0xd] = 96,
+       [0xe] = 128,
+       [0xf] = 0xffff /* fully associative - no way to show this currently */
 };
 
 static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
@@ -271,7 +271,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
        eax->split.type = types[leaf];
        eax->split.level = levels[leaf];
        if (leaf == 3)
-               eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
+               eax->split.num_threads_sharing =
+                       current_cpu_data.x86_max_cores - 1;
        else
                eax->split.num_threads_sharing = 0;
        eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
@@ -291,6 +292,14 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
 {
        if (index < 3)
                return;
+
+       if (boot_cpu_data.x86 == 0x11)
+               return;
+
+       /* see erratum #382 */
+       if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8))
+               return;
+
        this_leaf->can_disable = 1;
 }
 
@@ -696,97 +705,75 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
 #define to_object(k)   container_of(k, struct _index_kobject, kobj)
 #define to_attr(a)     container_of(a, struct _cache_attr, attr)
 
-#ifdef CONFIG_PCI
-static struct pci_dev *get_k8_northbridge(int node)
-{
-       struct pci_dev *dev = NULL;
-       int i;
-
-       for (i = 0; i <= node; i++) {
-               do {
-                       dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
-                       if (!dev)
-                               break;
-               } while (!pci_match_id(&k8_nb_id[0], dev));
-               if (!dev)
-                       break;
-       }
-       return dev;
-}
-#else
-static struct pci_dev *get_k8_northbridge(int node)
-{
-       return NULL;
-}
-#endif
-
-static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+                                 unsigned int index)
 {
-       const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
-       int node = cpu_to_node(cpumask_first(mask));
-       struct pci_dev *dev = NULL;
-       ssize_t ret = 0;
-       int i;
+       int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+       int node = cpu_to_node(cpu);
+       struct pci_dev *dev = node_to_k8_nb_misc(node);
+       unsigned int reg = 0;
 
        if (!this_leaf->can_disable)
-               return sprintf(buf, "Feature not enabled\n");
-
-       dev = get_k8_northbridge(node);
-       if (!dev) {
-               printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
                return -EINVAL;
-       }
 
-       for (i = 0; i < 2; i++) {
-               unsigned int reg;
+       if (!dev)
+               return -EINVAL;
 
-               pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
+       pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+       return sprintf(buf, "%x\n", reg);
+}
 
-               ret += sprintf(buf, "%sEntry: %d\n", buf, i);
-               ret += sprintf(buf, "%sReads:  %s\tNew Entries: %s\n",  
-                       buf,
-                       reg & 0x80000000 ? "Disabled" : "Allowed",
-                       reg & 0x40000000 ? "Disabled" : "Allowed");
-               ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
-                       buf, (reg & 0x30000) >> 16, reg & 0xfff);
-       }
-       return ret;
+#define SHOW_CACHE_DISABLE(index)                                      \
+static ssize_t                                                         \
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf)          \
+{                                                                      \
+       return show_cache_disable(this_leaf, buf, index);               \
 }
+SHOW_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
 
-static ssize_t
-store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
-                   size_t count)
+static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
+       const char *buf, size_t count, unsigned int index)
 {
-       const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
-       int node = cpu_to_node(cpumask_first(mask));
-       struct pci_dev *dev = NULL;
-       unsigned int ret, index, val;
+       int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+       int node = cpu_to_node(cpu);
+       struct pci_dev *dev = node_to_k8_nb_misc(node);
+       unsigned long val = 0;
+       unsigned int scrubber = 0;
 
        if (!this_leaf->can_disable)
-               return 0;
-
-       if (strlen(buf) > 15)
                return -EINVAL;
 
-       ret = sscanf(buf, "%x %x", &index, &val);
-       if (ret != 2)
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       if (!dev)
                return -EINVAL;
-       if (index > 1)
+
+       if (strict_strtoul(buf, 10, &val) < 0)
                return -EINVAL;
 
        val |= 0xc0000000;
-       dev = get_k8_northbridge(node);
-       if (!dev) {
-               printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
-               return -EINVAL;
-       }
+
+       pci_read_config_dword(dev, 0x58, &scrubber);
+       scrubber &= ~0x1f000000;
+       pci_write_config_dword(dev, 0x58, scrubber);
 
        pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
        wbinvd();
        pci_write_config_dword(dev, 0x1BC + index * 4, val);
+       return count;
+}
 
-       return 1;
+#define STORE_CACHE_DISABLE(index)                                     \
+static ssize_t                                                         \
+store_cache_disable_##index(struct _cpuid4_info *this_leaf,            \
+                           const char *buf, size_t count)              \
+{                                                                      \
+       return store_cache_disable(this_leaf, buf, count, index);       \
 }
+STORE_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(1)
 
 struct _cache_attr {
        struct attribute attr;
@@ -808,7 +795,10 @@ define_one_ro(size);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 
-static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
+               show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
+               show_cache_disable_1, store_cache_disable_1);
 
 static struct attribute * default_attrs[] = {
        &type.attr,
@@ -820,7 +810,8 @@ static struct attribute * default_attrs[] = {
        &size.attr,
        &shared_cpu_map.attr,
        &shared_cpu_list.attr,
-       &cache_disable.attr,
+       &cache_disable_0.attr,
+       &cache_disable_1.attr,
        NULL
 };
 
index 6fb0b35..09dd1d4 100644 (file)
@@ -1163,7 +1163,7 @@ static __init int mce_init_device(void)
        if (!mce_available(&boot_cpu_data))
                return -EIO;
 
-       alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
+       zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
 
        err = mce_init_banks();
        if (err)
index cef3ee3..65a0fce 100644 (file)
@@ -15,7 +15,6 @@
 #include <asm/hw_irq.h>
 #include <asm/idle.h>
 #include <asm/therm_throt.h>
-#include <asm/apic.h>
 
 asmlinkage void smp_thermal_interrupt(void)
 {
index ce0fe4b..1d584a1 100644 (file)
@@ -808,7 +808,7 @@ int __init mtrr_cleanup(unsigned address_bits)
 
        if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
                return 0;
-       rdmsr(MTRRdefType_MSR, def, dummy);
+       rdmsr(MSR_MTRRdefType, def, dummy);
        def &= 0xff;
        if (def != MTRR_TYPE_UNCACHABLE)
                return 0;
@@ -1003,7 +1003,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
         */
        if (!is_cpu(INTEL) || disable_mtrr_trim)
                return 0;
-       rdmsr(MTRRdefType_MSR, def, dummy);
+       rdmsr(MSR_MTRRdefType, def, dummy);
        def &= 0xff;
        if (def != MTRR_TYPE_UNCACHABLE)
                return 0;
index d21d4fb..0543f69 100644 (file)
@@ -20,9 +20,9 @@ struct fixed_range_block {
 };
 
 static struct fixed_range_block fixed_range_blocks[] = {
-       { MTRRfix64K_00000_MSR, 1 }, /* one  64k MTRR  */
-       { MTRRfix16K_80000_MSR, 2 }, /* two  16k MTRRs */
-       { MTRRfix4K_C0000_MSR,  8 }, /* eight 4k MTRRs */
+       { MSR_MTRRfix64K_00000, 1 }, /* one  64k MTRR  */
+       { MSR_MTRRfix16K_80000, 2 }, /* two  16k MTRRs */
+       { MSR_MTRRfix4K_C0000,  8 }, /* eight 4k MTRRs */
        {}
 };
 
@@ -194,12 +194,12 @@ get_fixed_ranges(mtrr_type * frs)
 
        k8_check_syscfg_dram_mod_en();
 
-       rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]);
+       rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]);
 
        for (i = 0; i < 2; i++)
-               rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]);
+               rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
        for (i = 0; i < 8; i++)
-               rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]);
+               rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
 }
 
 void mtrr_save_fixed_ranges(void *info)
@@ -310,7 +310,7 @@ void __init get_mtrr_state(void)
 
        vrs = mtrr_state.var_ranges;
 
-       rdmsr(MTRRcap_MSR, lo, dummy);
+       rdmsr(MSR_MTRRcap, lo, dummy);
        mtrr_state.have_fixed = (lo >> 8) & 1;
 
        for (i = 0; i < num_var_ranges; i++)
@@ -318,7 +318,7 @@ void __init get_mtrr_state(void)
        if (mtrr_state.have_fixed)
                get_fixed_ranges(mtrr_state.fixed_ranges);
 
-       rdmsr(MTRRdefType_MSR, lo, dummy);
+       rdmsr(MSR_MTRRdefType, lo, dummy);
        mtrr_state.def_type = (lo & 0xff);
        mtrr_state.enabled = (lo & 0xc00) >> 10;
 
@@ -583,10 +583,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
        __flush_tlb();
 
        /*  Save MTRR state */
-       rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+       rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
 
        /*  Disable MTRRs, and set the default type to uncached  */
-       mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi);
+       mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
 }
 
 static void post_set(void) __releases(set_atomicity_lock)
@@ -595,7 +595,7 @@ static void post_set(void) __releases(set_atomicity_lock)
        __flush_tlb();
 
        /* Intel (P6) standard MTRRs */
-       mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi);
+       mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
                
        /*  Enable caches  */
        write_cr0(read_cr0() & 0xbfffffff);
@@ -707,7 +707,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i
 static int generic_have_wrcomb(void)
 {
        unsigned long config, dummy;
-       rdmsr(MTRRcap_MSR, config, dummy);
+       rdmsr(MSR_MTRRcap, config, dummy);
        return (config & (1 << 10));
 }
 
index 03cda01..8fc248b 100644 (file)
@@ -104,7 +104,7 @@ static void __init set_num_var_ranges(void)
        unsigned long config = 0, dummy;
 
        if (use_intel()) {
-               rdmsr(MTRRcap_MSR, config, dummy);
+               rdmsr(MSR_MTRRcap, config, dummy);
        } else if (is_cpu(AMD))
                config = 2;
        else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
index 77f67f7..7538b76 100644 (file)
@@ -5,21 +5,6 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 
-#define MTRRcap_MSR     0x0fe
-#define MTRRdefType_MSR 0x2ff
-
-#define MTRRfix64K_00000_MSR 0x250
-#define MTRRfix16K_80000_MSR 0x258
-#define MTRRfix16K_A0000_MSR 0x259
-#define MTRRfix4K_C0000_MSR 0x268
-#define MTRRfix4K_C8000_MSR 0x269
-#define MTRRfix4K_D0000_MSR 0x26a
-#define MTRRfix4K_D8000_MSR 0x26b
-#define MTRRfix4K_E0000_MSR 0x26c
-#define MTRRfix4K_E8000_MSR 0x26d
-#define MTRRfix4K_F0000_MSR 0x26e
-#define MTRRfix4K_F8000_MSR 0x26f
-
 #define MTRR_CHANGE_MASK_FIXED     0x01
 #define MTRR_CHANGE_MASK_VARIABLE  0x02
 #define MTRR_CHANGE_MASK_DEFTYPE   0x04
index 7f7e275..1f5fb15 100644 (file)
@@ -35,7 +35,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
 
                if (use_intel())
                        /*  Save MTRR state */
-                       rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+                       rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
                else
                        /* Cyrix ARRs - everything else were excluded at the top */
                        ctxt->ccr3 = getCx86(CX86_CCR3);
@@ -46,7 +46,7 @@ void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
 {
        if (use_intel())
                /*  Disable MTRRs, and set the default type to uncached  */
-               mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL,
+               mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
                      ctxt->deftype_hi);
        else if (is_cpu(CYRIX))
                /* Cyrix ARRs - everything else were excluded at the top */
@@ -64,7 +64,7 @@ void set_mtrr_done(struct set_mtrr_context *ctxt)
                /*  Restore MTRRdefType  */
                if (use_intel())
                        /* Intel (P6) standard MTRRs */
-                       mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi);
+                       mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
                else
                        /* Cyrix ARRs - everything else was excluded at the top */
                        setCx86(CX86_CCR3, ctxt->ccr3);
index 87b67e3..48bfe13 100644 (file)
  * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
  */
 
-
-#include <asm/ds.h>
-
-#include <linux/errno.h>
+#include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/slab.h>
+#include <linux/errno.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
-#include <linux/kernel.h>
+#include <linux/trace_clock.h>
+
+#include <asm/ds.h>
 
+#include "ds_selftest.h"
 
 /*
- * The configuration for a particular DS hardware implementation.
+ * The configuration for a particular DS hardware implementation:
  */
 struct ds_configuration {
-       /* the name of the configuration */
-       const char *name;
-       /* the size of one pointer-typed field in the DS structure and
-          in the BTS and PEBS buffers in bytes;
-          this covers the first 8 DS fields related to buffer management. */
-       unsigned char  sizeof_field;
-       /* the size of a BTS/PEBS record in bytes */
-       unsigned char  sizeof_rec[2];
-       /* a series of bit-masks to control various features indexed
-        * by enum ds_feature */
-       unsigned long ctl[dsf_ctl_max];
+       /* The name of the configuration: */
+       const char              *name;
+
+       /* The size of pointer-typed fields in DS, BTS, and PEBS: */
+       unsigned char           sizeof_ptr_field;
+
+       /* The size of a BTS/PEBS record in bytes: */
+       unsigned char           sizeof_rec[2];
+
+       /* The number of pebs counter reset values in the DS structure. */
+       unsigned char           nr_counter_reset;
+
+       /* Control bit-masks indexed by enum ds_feature: */
+       unsigned long           ctl[dsf_ctl_max];
 };
-static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
+static struct ds_configuration ds_cfg __read_mostly;
+
+
+/* Maximal size of a DS configuration: */
+#define MAX_SIZEOF_DS          0x80
 
-#define ds_cfg per_cpu(ds_cfg_array, smp_processor_id())
+/* Maximal size of a BTS record: */
+#define MAX_SIZEOF_BTS         (3 * 8)
 
-#define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */
-#define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */
-#define DS_ALIGNMENT (1 << 3)  /* BTS and PEBS buffer alignment */
+/* BTS and PEBS buffer alignment: */
+#define DS_ALIGNMENT           (1 << 3)
 
-#define BTS_CONTROL \
- (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\
-  ds_cfg.ctl[dsf_bts_overflow])
+/* Number of buffer pointers in DS: */
+#define NUM_DS_PTR_FIELDS      8
 
+/* Size of a pebs reset value in DS: */
+#define PEBS_RESET_FIELD_SIZE  8
+
+/* Mask of control bits in the DS MSR register: */
+#define BTS_CONTROL                              \
+       ( ds_cfg.ctl[dsf_bts]                   | \
+         ds_cfg.ctl[dsf_bts_kernel]            | \
+         ds_cfg.ctl[dsf_bts_user]              | \
+         ds_cfg.ctl[dsf_bts_overflow] )
 
 /*
  * A BTS or PEBS tracer.
@@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array);
  * to identify tracers.
  */
 struct ds_tracer {
-       /* the DS context (partially) owned by this tracer */
-       struct ds_context *context;
-       /* the buffer provided on ds_request() and its size in bytes */
-       void *buffer;
-       size_t size;
+       /* The DS context (partially) owned by this tracer. */
+       struct ds_context       *context;
+       /* The buffer provided on ds_request() and its size in bytes. */
+       void                    *buffer;
+       size_t                  size;
 };
 
 struct bts_tracer {
-       /* the common DS part */
-       struct ds_tracer ds;
-       /* the trace including the DS configuration */
-       struct bts_trace trace;
-       /* buffer overflow notification function */
-       bts_ovfl_callback_t ovfl;
+       /* The common DS part: */
+       struct ds_tracer        ds;
+
+       /* The trace including the DS configuration: */
+       struct bts_trace        trace;
+
+       /* Buffer overflow notification function: */
+       bts_ovfl_callback_t     ovfl;
+
+       /* Active flags affecting trace collection. */
+       unsigned int            flags;
 };
 
 struct pebs_tracer {
-       /* the common DS part */
-       struct ds_tracer ds;
-       /* the trace including the DS configuration */
-       struct pebs_trace trace;
-       /* buffer overflow notification function */
-       pebs_ovfl_callback_t ovfl;
+       /* The common DS part: */
+       struct ds_tracer        ds;
+
+       /* The trace including the DS configuration: */
+       struct pebs_trace       trace;
+
+       /* Buffer overflow notification function: */
+       pebs_ovfl_callback_t    ovfl;
 };
 
 /*
@@ -97,6 +120,7 @@ struct pebs_tracer {
  *
  * The DS configuration consists of the following fields; different
  * architetures vary in the size of those fields.
+ *
  * - double-word aligned base linear address of the BTS buffer
  * - write pointer into the BTS buffer
  * - end linear address of the BTS buffer (one byte beyond the end of
@@ -135,21 +159,22 @@ enum ds_field {
 };
 
 enum ds_qualifier {
-       ds_bts  = 0,
+       ds_bts = 0,
        ds_pebs
 };
 
-static inline unsigned long ds_get(const unsigned char *base,
-                                  enum ds_qualifier qual, enum ds_field field)
+static inline unsigned long
+ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
 {
-       base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+       base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
        return *(unsigned long *)base;
 }
 
-static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
-                         enum ds_field field, unsigned long value)
+static inline void
+ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
+       unsigned long value)
 {
-       base += (ds_cfg.sizeof_field * (field + (4 * qual)));
+       base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
        (*(unsigned long *)base) = value;
 }
 
@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual,
  */
 static DEFINE_SPINLOCK(ds_lock);
 
-
 /*
  * We either support (system-wide) per-cpu or per-thread allocation.
  * We distinguish the two based on the task_struct pointer, where a
@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock);
  */
 static atomic_t tracers = ATOMIC_INIT(0);
 
-static inline void get_tracer(struct task_struct *task)
+static inline int get_tracer(struct task_struct *task)
 {
-       if (task)
+       int error;
+
+       spin_lock_irq(&ds_lock);
+
+       if (task) {
+               error = -EPERM;
+               if (atomic_read(&tracers) < 0)
+                       goto out;
                atomic_inc(&tracers);
-       else
+       } else {
+               error = -EPERM;
+               if (atomic_read(&tracers) > 0)
+                       goto out;
                atomic_dec(&tracers);
+       }
+
+       error = 0;
+out:
+       spin_unlock_irq(&ds_lock);
+       return error;
 }
 
 static inline void put_tracer(struct task_struct *task)
@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task)
                atomic_inc(&tracers);
 }
 
-static inline int check_tracer(struct task_struct *task)
-{
-       return task ?
-               (atomic_read(&tracers) >= 0) :
-               (atomic_read(&tracers) <= 0);
-}
-
-
 /*
  * The DS context is either attached to a thread or to a cpu:
  * - in the former case, the thread_struct contains a pointer to the
@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task)
  * deallocated when the last user puts the context.
  */
 struct ds_context {
-       /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */
-       unsigned char ds[MAX_SIZEOF_DS];
-       /* the owner of the BTS and PEBS configuration, respectively */
-       struct bts_tracer *bts_master;
-       struct pebs_tracer *pebs_master;
-       /* use count */
-       unsigned long count;
-       /* a pointer to the context location inside the thread_struct
-        * or the per_cpu context array */
-       struct ds_context **this;
-       /* a pointer to the task owning this context, or NULL, if the
-        * context is owned by a cpu */
-       struct task_struct *task;
-};
+       /* The DS configuration; goes into MSR_IA32_DS_AREA: */
+       unsigned char           ds[MAX_SIZEOF_DS];
+
+       /* The owner of the BTS and PEBS configuration, respectively: */
+       struct bts_tracer       *bts_master;
+       struct pebs_tracer      *pebs_master;
 
-static DEFINE_PER_CPU(struct ds_context *, system_context_array);
+       /* Use count: */
+       unsigned long           count;
 
-#define system_context per_cpu(system_context_array, smp_processor_id())
+       /* Pointer to the context pointer field: */
+       struct ds_context       **this;
+
+       /* The traced task; NULL for cpu tracing: */
+       struct task_struct      *task;
+
+       /* The traced cpu; only valid if task is NULL: */
+       int                     cpu;
+};
 
+static DEFINE_PER_CPU(struct ds_context *, cpu_context);
 
-static inline struct ds_context *ds_get_context(struct task_struct *task)
+
+static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
 {
        struct ds_context **p_context =
-               (task ? &task->thread.ds_ctx : &system_context);
+               (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu));
        struct ds_context *context = NULL;
        struct ds_context *new_context = NULL;
-       unsigned long irq;
 
        /* Chances are small that we already have a context. */
        new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
        if (!new_context)
                return NULL;
 
-       spin_lock_irqsave(&ds_lock, irq);
+       spin_lock_irq(&ds_lock);
 
        context = *p_context;
-       if (!context) {
+       if (likely(!context)) {
                context = new_context;
 
                context->this = p_context;
                context->task = task;
+               context->cpu = cpu;
                context->count = 0;
 
-               if (task)
-                       set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
-
-               if (!task || (task == current))
-                       wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds);
-
                *p_context = context;
        }
 
        context->count++;
 
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
 
        if (context != new_context)
                kfree(new_context);
@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
        return context;
 }
 
-static inline void ds_put_context(struct ds_context *context)
+static void ds_put_context(struct ds_context *context)
 {
+       struct task_struct *task;
        unsigned long irq;
 
        if (!context)
@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context)
 
        *(context->this) = NULL;
 
-       if (context->task)
-               clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+       task = context->task;
+
+       if (task)
+               clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
 
-       if (!context->task || (context->task == current))
-               wrmsrl(MSR_IA32_DS_AREA, 0);
+       /*
+        * We leave the (now dangling) pointer to the DS configuration in
+        * the DS_AREA msr. This is as good or as bad as replacing it with
+        * NULL - the hardware would crash if we enabled tracing.
+        *
+        * This saves us some problems with having to write an msr on a
+        * different cpu while preventing others from doing the same for the
+        * next context for that same cpu.
+        */
 
        spin_unlock_irqrestore(&ds_lock, irq);
 
+       /* The context might still be in use for context switching. */
+       if (task && (task != current))
+               wait_task_context_switch(task);
+
        kfree(context);
 }
 
+static void ds_install_ds_area(struct ds_context *context)
+{
+       unsigned long ds;
+
+       ds = (unsigned long)context->ds;
+
+       /*
+        * There is a race between the bts master and the pebs master.
+        *
+        * The thread/cpu access is synchronized via get/put_cpu() for
+        * task tracing and via wrmsr_on_cpu for cpu tracing.
+        *
+        * If bts and pebs are collected for the same task or same cpu,
+        * the same confiuration is written twice.
+        */
+       if (context->task) {
+               get_cpu();
+               if (context->task == current)
+                       wrmsrl(MSR_IA32_DS_AREA, ds);
+               set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
+               put_cpu();
+       } else
+               wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
+                            (u32)((u64)ds), (u32)((u64)ds >> 32));
+}
 
 /*
  * Call the tracer's callback on a buffer overflow.
@@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
  * The remainder of any partially written record is zeroed out.
  *
  * context: the DS context
- * qual: the buffer type
- * record: the data to write
- * size: the size of the data
+ * qual:    the buffer type
+ * record:  the data to write
+ * size:    the size of the data
  */
 static int ds_write(struct ds_context *context, enum ds_qualifier qual,
                    const void *record, size_t size)
@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
                unsigned long write_size, adj_write_size;
 
                /*
-                * write as much as possible without producing an
+                * Write as much as possible without producing an
                 * overflow interrupt.
                 *
-                * interrupt_threshold must either be
+                * Interrupt_threshold must either be
                 * - bigger than absolute_maximum or
                 * - point to a record between buffer_base and absolute_maximum
                 *
-                * index points to a valid record.
+                * Index points to a valid record.
                 */
                base   = ds_get(context->ds, qual, ds_buffer_base);
                index  = ds_get(context->ds, qual, ds_index);
@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
 
                write_end = min(end, int_th);
 
-               /* if we are already beyond the interrupt threshold,
-                * we fill the entire buffer */
+               /*
+                * If we are already beyond the interrupt threshold,
+                * we fill the entire buffer.
+                */
                if (write_end <= index)
                        write_end = end;
 
@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
                adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
                adj_write_size *= ds_cfg.sizeof_rec[qual];
 
-               /* zero out trailing bytes */
+               /* Zero out trailing bytes. */
                memset((char *)index + write_size, 0,
                       adj_write_size - write_size);
                index += adj_write_size;
@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual,
  * Later architectures use 64bit pointers throughout, whereas earlier
  * architectures use 32bit pointers in 32bit mode.
  *
- * We compute the base address for the first 8 fields based on:
+ * We compute the base address for the fields based on:
  * - the field size stored in the DS configuration
  * - the relative field position
  *
@@ -431,23 +501,23 @@ enum bts_field {
        bts_to,
        bts_flags,
 
-       bts_qual = bts_from,
-       bts_jiffies = bts_to,
-       bts_pid = bts_flags,
+       bts_qual                = bts_from,
+       bts_clock               = bts_to,
+       bts_pid                 = bts_flags,
 
-       bts_qual_mask = (bts_qual_max - 1),
-       bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
+       bts_qual_mask           = (bts_qual_max - 1),
+       bts_escape              = ((unsigned long)-1 & ~bts_qual_mask)
 };
 
 static inline unsigned long bts_get(const char *base, enum bts_field field)
 {
-       base += (ds_cfg.sizeof_field * field);
+       base += (ds_cfg.sizeof_ptr_field * field);
        return *(unsigned long *)base;
 }
 
 static inline void bts_set(char *base, enum bts_field field, unsigned long val)
 {
-       base += (ds_cfg.sizeof_field * field);;
+       base += (ds_cfg.sizeof_ptr_field * field);;
        (*(unsigned long *)base) = val;
 }
 
@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val)
  *
  * return: bytes read/written on success; -Eerrno, otherwise
  */
-static int bts_read(struct bts_tracer *tracer, const void *at,
-                   struct bts_struct *out)
+static int
+bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
 {
        if (!tracer)
                return -EINVAL;
@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at,
        memset(out, 0, sizeof(*out));
        if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
                out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
-               out->variant.timestamp.jiffies = bts_get(at, bts_jiffies);
-               out->variant.timestamp.pid = bts_get(at, bts_pid);
+               out->variant.event.clock = bts_get(at, bts_clock);
+               out->variant.event.pid = bts_get(at, bts_pid);
        } else {
                out->qualifier = bts_branch;
                out->variant.lbr.from = bts_get(at, bts_from);
@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
        case bts_task_arrives:
        case bts_task_departs:
                bts_set(raw, bts_qual, (bts_escape | in->qualifier));
-               bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies);
-               bts_set(raw, bts_pid, in->variant.timestamp.pid);
+               bts_set(raw, bts_clock, in->variant.event.clock);
+               bts_set(raw, bts_pid, in->variant.event.pid);
                break;
        default:
                return -EINVAL;
@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
                             unsigned int flags) {
        unsigned long buffer, adj;
 
-       /* adjust the buffer address and size to meet alignment
+       /*
+        * Adjust the buffer address and size to meet alignment
         * constraints:
         * - buffer is double-word aligned
         * - size is multiple of record size
@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
        trace->begin = (void *)buffer;
        trace->top = trace->begin;
        trace->end = (void *)(buffer + size);
-       /* The value for 'no threshold' is -1, which will set the
+       /*
+        * The value for 'no threshold' is -1, which will set the
         * threshold outside of the buffer, just like we want it.
         */
+       ith *= ds_cfg.sizeof_rec[qual];
        trace->ith = (void *)(buffer + size - ith);
 
        trace->flags = flags;
@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
 
 static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
                      enum ds_qualifier qual, struct task_struct *task,
-                     void *base, size_t size, size_t th, unsigned int flags)
+                     int cpu, void *base, size_t size, size_t th)
 {
        struct ds_context *context;
        int error;
+       size_t req_size;
+
+       error = -EOPNOTSUPP;
+       if (!ds_cfg.sizeof_rec[qual])
+               goto out;
 
        error = -EINVAL;
        if (!base)
                goto out;
 
-       /* we require some space to do alignment adjustments below */
+       req_size = ds_cfg.sizeof_rec[qual];
+       /* We might need space for alignment adjustments. */
+       if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
+               req_size += DS_ALIGNMENT;
+
        error = -EINVAL;
-       if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual]))
+       if (size < req_size)
                goto out;
 
        if (th != (size_t)-1) {
@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
        tracer->size = size;
 
        error = -ENOMEM;
-       context = ds_get_context(task);
+       context = ds_get_context(task, cpu);
        if (!context)
                goto out;
        tracer->context = context;
 
-       ds_init_ds_trace(trace, qual, base, size, th, flags);
+       /*
+        * Defer any tracer-specific initialization work for the context until
+        * context ownership has been clarified.
+        */
 
        error = 0;
  out:
        return error;
 }
 
-struct bts_tracer *ds_request_bts(struct task_struct *task,
-                                 void *base, size_t size,
-                                 bts_ovfl_callback_t ovfl, size_t th,
-                                 unsigned int flags)
+static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
+                                        void *base, size_t size,
+                                        bts_ovfl_callback_t ovfl, size_t th,
+                                        unsigned int flags)
 {
        struct bts_tracer *tracer;
-       unsigned long irq;
        int error;
 
+       /* Buffer overflow notification is not yet implemented. */
        error = -EOPNOTSUPP;
-       if (!ds_cfg.ctl[dsf_bts])
+       if (ovfl)
                goto out;
 
-       /* buffer overflow notification is not yet implemented */
-       error = -EOPNOTSUPP;
-       if (ovfl)
+       error = get_tracer(task);
+       if (error < 0)
                goto out;
 
        error = -ENOMEM;
        tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
        if (!tracer)
-               goto out;
+               goto out_put_tracer;
        tracer->ovfl = ovfl;
 
+       /* Do some more error checking and acquire a tracing context. */
        error = ds_request(&tracer->ds, &tracer->trace.ds,
-                          ds_bts, task, base, size, th, flags);
+                          ds_bts, task, cpu, base, size, th);
        if (error < 0)
                goto out_tracer;
 
-
-       spin_lock_irqsave(&ds_lock, irq);
-
-       error = -EPERM;
-       if (!check_tracer(task))
-               goto out_unlock;
-       get_tracer(task);
+       /* Claim the bts part of the tracing context we acquired above. */
+       spin_lock_irq(&ds_lock);
 
        error = -EPERM;
        if (tracer->ds.context->bts_master)
-               goto out_put_tracer;
+               goto out_unlock;
        tracer->ds.context->bts_master = tracer;
 
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
 
+       /*
+        * Now that we own the bts part of the context, let's complete the
+        * initialization for that part.
+        */
+       ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
+       ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+       ds_install_ds_area(tracer->ds.context);
 
        tracer->trace.read  = bts_read;
        tracer->trace.write = bts_write;
 
-       ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
+       /* Start tracing. */
        ds_resume_bts(tracer);
 
        return tracer;
 
- out_put_tracer:
-       put_tracer(task);
  out_unlock:
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
        ds_put_context(tracer->ds.context);
  out_tracer:
        kfree(tracer);
+ out_put_tracer:
+       put_tracer(task);
  out:
        return ERR_PTR(error);
 }
 
-struct pebs_tracer *ds_request_pebs(struct task_struct *task,
-                                   void *base, size_t size,
-                                   pebs_ovfl_callback_t ovfl, size_t th,
-                                   unsigned int flags)
+struct bts_tracer *ds_request_bts_task(struct task_struct *task,
+                                      void *base, size_t size,
+                                      bts_ovfl_callback_t ovfl,
+                                      size_t th, unsigned int flags)
+{
+       return ds_request_bts(task, 0, base, size, ovfl, th, flags);
+}
+
+struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
+                                     bts_ovfl_callback_t ovfl,
+                                     size_t th, unsigned int flags)
+{
+       return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
+}
+
+static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
+                                          void *base, size_t size,
+                                          pebs_ovfl_callback_t ovfl, size_t th,
+                                          unsigned int flags)
 {
        struct pebs_tracer *tracer;
-       unsigned long irq;
        int error;
 
-       /* buffer overflow notification is not yet implemented */
+       /* Buffer overflow notification is not yet implemented. */
        error = -EOPNOTSUPP;
        if (ovfl)
                goto out;
 
+       error = get_tracer(task);
+       if (error < 0)
+               goto out;
+
        error = -ENOMEM;
        tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
        if (!tracer)
-               goto out;
+               goto out_put_tracer;
        tracer->ovfl = ovfl;
 
+       /* Do some more error checking and acquire a tracing context. */
        error = ds_request(&tracer->ds, &tracer->trace.ds,
-                          ds_pebs, task, base, size, th, flags);
+                          ds_pebs, task, cpu, base, size, th);
        if (error < 0)
                goto out_tracer;
 
-       spin_lock_irqsave(&ds_lock, irq);
-
-       error = -EPERM;
-       if (!check_tracer(task))
-               goto out_unlock;
-       get_tracer(task);
+       /* Claim the pebs part of the tracing context we acquired above. */
+       spin_lock_irq(&ds_lock);
 
        error = -EPERM;
        if (tracer->ds.context->pebs_master)
-               goto out_put_tracer;
+               goto out_unlock;
        tracer->ds.context->pebs_master = tracer;
 
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
 
+       /*
+        * Now that we own the pebs part of the context, let's complete the
+        * initialization for that part.
+        */
+       ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
        ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
+       ds_install_ds_area(tracer->ds.context);
+
+       /* Start tracing. */
        ds_resume_pebs(tracer);
 
        return tracer;
 
- out_put_tracer:
-       put_tracer(task);
  out_unlock:
-       spin_unlock_irqrestore(&ds_lock, irq);
+       spin_unlock_irq(&ds_lock);
        ds_put_context(tracer->ds.context);
  out_tracer:
        kfree(tracer);
+ out_put_tracer:
+       put_tracer(task);
  out:
        return ERR_PTR(error);
 }
 
-void ds_release_bts(struct bts_tracer *tracer)
+struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
+                                        void *base, size_t size,
+                                        pebs_ovfl_callback_t ovfl,
+                                        size_t th, unsigned int flags)
 {
-       if (!tracer)
-               return;
+       return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
+}
 
-       ds_suspend_bts(tracer);
+struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
+                                       pebs_ovfl_callback_t ovfl,
+                                       size_t th, unsigned int flags)
+{
+       return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
+}
+
+static void ds_free_bts(struct bts_tracer *tracer)
+{
+       struct task_struct *task;
+
+       task = tracer->ds.context->task;
 
        WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
        tracer->ds.context->bts_master = NULL;
 
-       put_tracer(tracer->ds.context->task);
+       /* Make sure tracing stopped and the tracer is not in use. */
+       if (task && (task != current))
+               wait_task_context_switch(task);
+
        ds_put_context(tracer->ds.context);
+       put_tracer(task);
 
        kfree(tracer);
 }
 
+void ds_release_bts(struct bts_tracer *tracer)
+{
+       might_sleep();
+
+       if (!tracer)
+               return;
+
+       ds_suspend_bts(tracer);
+       ds_free_bts(tracer);
+}
+
+int ds_release_bts_noirq(struct bts_tracer *tracer)
+{
+       struct task_struct *task;
+       unsigned long irq;
+       int error;
+
+       if (!tracer)
+               return 0;
+
+       task = tracer->ds.context->task;
+
+       local_irq_save(irq);
+
+       error = -EPERM;
+       if (!task &&
+           (tracer->ds.context->cpu != smp_processor_id()))
+               goto out;
+
+       error = -EPERM;
+       if (task && (task != current))
+               goto out;
+
+       ds_suspend_bts_noirq(tracer);
+       ds_free_bts(tracer);
+
+       error = 0;
+ out:
+       local_irq_restore(irq);
+       return error;
+}
+
+static void update_task_debugctlmsr(struct task_struct *task,
+                                   unsigned long debugctlmsr)
+{
+       task->thread.debugctlmsr = debugctlmsr;
+
+       get_cpu();
+       if (task == current)
+               update_debugctlmsr(debugctlmsr);
+       put_cpu();
+}
+
 void ds_suspend_bts(struct bts_tracer *tracer)
 {
        struct task_struct *task;
+       unsigned long debugctlmsr;
+       int cpu;
 
        if (!tracer)
                return;
 
+       tracer->flags = 0;
+
        task = tracer->ds.context->task;
+       cpu  = tracer->ds.context->cpu;
 
-       if (!task || (task == current))
-               update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL);
+       WARN_ON(!task && irqs_disabled());
 
-       if (task) {
-               task->thread.debugctlmsr &= ~BTS_CONTROL;
+       debugctlmsr = (task ?
+                      task->thread.debugctlmsr :
+                      get_debugctlmsr_on_cpu(cpu));
+       debugctlmsr &= ~BTS_CONTROL;
 
-               if (!task->thread.debugctlmsr)
-                       clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-       }
+       if (task)
+               update_task_debugctlmsr(task, debugctlmsr);
+       else
+               update_debugctlmsr_on_cpu(cpu, debugctlmsr);
 }
 
-void ds_resume_bts(struct bts_tracer *tracer)
+int ds_suspend_bts_noirq(struct bts_tracer *tracer)
 {
        struct task_struct *task;
-       unsigned long control;
+       unsigned long debugctlmsr, irq;
+       int cpu, error = 0;
 
        if (!tracer)
-               return;
+               return 0;
+
+       tracer->flags = 0;
 
        task = tracer->ds.context->task;
+       cpu  = tracer->ds.context->cpu;
+
+       local_irq_save(irq);
+
+       error = -EPERM;
+       if (!task && (cpu != smp_processor_id()))
+               goto out;
+
+       debugctlmsr = (task ?
+                      task->thread.debugctlmsr :
+                      get_debugctlmsr());
+       debugctlmsr &= ~BTS_CONTROL;
+
+       if (task)
+               update_task_debugctlmsr(task, debugctlmsr);
+       else
+               update_debugctlmsr(debugctlmsr);
+
+       error = 0;
+ out:
+       local_irq_restore(irq);
+       return error;
+}
+
+static unsigned long ds_bts_control(struct bts_tracer *tracer)
+{
+       unsigned long control;
 
        control = ds_cfg.ctl[dsf_bts];
        if (!(tracer->trace.ds.flags & BTS_KERNEL))
@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer)
        if (!(tracer->trace.ds.flags & BTS_USER))
                control |= ds_cfg.ctl[dsf_bts_user];
 
-       if (task) {
-               task->thread.debugctlmsr |= control;
-               set_tsk_thread_flag(task, TIF_DEBUGCTLMSR);
-       }
-
-       if (!task || (task == current))
-               update_debugctlmsr(get_debugctlmsr() | control);
+       return control;
 }
 
-void ds_release_pebs(struct pebs_tracer *tracer)
+void ds_resume_bts(struct bts_tracer *tracer)
 {
+       struct task_struct *task;
+       unsigned long debugctlmsr;
+       int cpu;
+
        if (!tracer)
                return;
 
-       ds_suspend_pebs(tracer);
+       tracer->flags = tracer->trace.ds.flags;
+
+       task = tracer->ds.context->task;
+       cpu  = tracer->ds.context->cpu;
+
+       WARN_ON(!task && irqs_disabled());
+
+       debugctlmsr = (task ?
+                      task->thread.debugctlmsr :
+                      get_debugctlmsr_on_cpu(cpu));
+       debugctlmsr |= ds_bts_control(tracer);
+
+       if (task)
+               update_task_debugctlmsr(task, debugctlmsr);
+       else
+               update_debugctlmsr_on_cpu(cpu, debugctlmsr);
+}
+
+int ds_resume_bts_noirq(struct bts_tracer *tracer)
+{
+       struct task_struct *task;
+       unsigned long debugctlmsr, irq;
+       int cpu, error = 0;
+
+       if (!tracer)
+               return 0;
+
+       tracer->flags = tracer->trace.ds.flags;
+
+       task = tracer->ds.context->task;
+       cpu  = tracer->ds.context->cpu;
+
+       local_irq_save(irq);
+
+       error = -EPERM;
+       if (!task && (cpu != smp_processor_id()))
+               goto out;
+
+       debugctlmsr = (task ?
+                      task->thread.debugctlmsr :
+                      get_debugctlmsr());
+       debugctlmsr |= ds_bts_control(tracer);
+
+       if (task)
+               update_task_debugctlmsr(task, debugctlmsr);
+       else
+               update_debugctlmsr(debugctlmsr);
+
+       error = 0;
+ out:
+       local_irq_restore(irq);
+       return error;
+}
+
+static void ds_free_pebs(struct pebs_tracer *tracer)
+{
+       struct task_struct *task;
+
+       task = tracer->ds.context->task;
 
        WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
        tracer->ds.context->pebs_master = NULL;
 
-       put_tracer(tracer->ds.context->task);
        ds_put_context(tracer->ds.context);
+       put_tracer(task);
 
        kfree(tracer);
 }
 
+void ds_release_pebs(struct pebs_tracer *tracer)
+{
+       might_sleep();
+
+       if (!tracer)
+               return;
+
+       ds_suspend_pebs(tracer);
+       ds_free_pebs(tracer);
+}
+
+int ds_release_pebs_noirq(struct pebs_tracer *tracer)
+{
+       struct task_struct *task;
+       unsigned long irq;
+       int error;
+
+       if (!tracer)
+               return 0;
+
+       task = tracer->ds.context->task;
+
+       local_irq_save(irq);
+
+       error = -EPERM;
+       if (!task &&
+           (tracer->ds.context->cpu != smp_processor_id()))
+               goto out;
+
+       error = -EPERM;
+       if (task && (task != current))
+               goto out;
+
+       ds_suspend_pebs_noirq(tracer);
+       ds_free_pebs(tracer);
+
+       error = 0;
+ out:
+       local_irq_restore(irq);
+       return error;
+}
+
 void ds_suspend_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
+{
+       return 0;
+}
+
 void ds_resume_pebs(struct pebs_tracer *tracer)
 {
 
 }
 
+int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
+{
+       return 0;
+}
+
 const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
 {
        if (!tracer)
@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
                return NULL;
 
        ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
-       tracer->trace.reset_value =
-               *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8));
+
+       tracer->trace.counters = ds_cfg.nr_counter_reset;
+       memcpy(tracer->trace.counter_reset,
+              tracer->ds.context->ds +
+              (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
+              ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
 
        return &tracer->trace;
 }
@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer)
 
        tracer->trace.ds.top = tracer->trace.ds.begin;
 
-       ds_set(tracer->ds.context->ds, ds_bts, ds_index,
+       ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
               (unsigned long)tracer->trace.ds.top);
 
        return 0;
 }
 
-int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value)
+int ds_set_pebs_reset(struct pebs_tracer *tracer,
+                     unsigned int counter, u64 value)
 {
        if (!tracer)
                return -EINVAL;
 
-       *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value;
+       if (ds_cfg.nr_counter_reset < counter)
+               return -EINVAL;
+
+       *(u64 *)(tracer->ds.context->ds +
+                (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
+                (counter * PEBS_RESET_FIELD_SIZE)) = value;
 
        return 0;
 }
@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = {
        .ctl[dsf_bts]           = (1 << 2) | (1 << 3),
        .ctl[dsf_bts_kernel]    = (1 << 5),
        .ctl[dsf_bts_user]      = (1 << 6),
-
-       .sizeof_field           = sizeof(long),
-       .sizeof_rec[ds_bts]     = sizeof(long) * 3,
-#ifdef __i386__
-       .sizeof_rec[ds_pebs]    = sizeof(long) * 10,
-#else
-       .sizeof_rec[ds_pebs]    = sizeof(long) * 18,
-#endif
+       .nr_counter_reset       = 1,
 };
 static const struct ds_configuration ds_cfg_pentium_m = {
        .name = "Pentium M",
        .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
-
-       .sizeof_field           = sizeof(long),
-       .sizeof_rec[ds_bts]     = sizeof(long) * 3,
-#ifdef __i386__
-       .sizeof_rec[ds_pebs]    = sizeof(long) * 10,
-#else
-       .sizeof_rec[ds_pebs]    = sizeof(long) * 18,
-#endif
+       .nr_counter_reset       = 1,
 };
 static const struct ds_configuration ds_cfg_core2_atom = {
        .name = "Core 2/Atom",
        .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
        .ctl[dsf_bts_kernel]    = (1 << 9),
        .ctl[dsf_bts_user]      = (1 << 10),
-
-       .sizeof_field           = 8,
-       .sizeof_rec[ds_bts]     = 8 * 3,
-       .sizeof_rec[ds_pebs]    = 8 * 18,
+       .nr_counter_reset       = 1,
+};
+static const struct ds_configuration ds_cfg_core_i7 = {
+       .name = "Core i7",
+       .ctl[dsf_bts]           = (1 << 6) | (1 << 7),
+       .ctl[dsf_bts_kernel]    = (1 << 9),
+       .ctl[dsf_bts_user]      = (1 << 10),
+       .nr_counter_reset       = 4,
 };
 
 static void
-ds_configure(const struct ds_configuration *cfg)
+ds_configure(const struct ds_configuration *cfg,
+            struct cpuinfo_x86 *cpu)
 {
+       unsigned long nr_pebs_fields = 0;
+
+       printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
+
+#ifdef __i386__
+       nr_pebs_fields = 10;
+#else
+       nr_pebs_fields = 18;
+#endif
+
+       /*
+        * Starting with version 2, architectural performance
+        * monitoring supports a format specifier.
+        */
+       if ((cpuid_eax(0xa) & 0xff) > 1) {
+               unsigned long perf_capabilities, format;
+
+               rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+               format = (perf_capabilities >> 8) & 0xf;
+
+               switch (format) {
+               case 0:
+                       nr_pebs_fields = 18;
+                       break;
+               case 1:
+                       nr_pebs_fields = 22;
+                       break;
+               default:
+                       printk(KERN_INFO
+                              "[ds] unknown PEBS format: %lu\n", format);
+                       nr_pebs_fields = 0;
+                       break;
+               }
+       }
+
        memset(&ds_cfg, 0, sizeof(ds_cfg));
        ds_cfg = *cfg;
 
-       printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name);
+       ds_cfg.sizeof_ptr_field =
+               (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
+
+       ds_cfg.sizeof_rec[ds_bts]  = ds_cfg.sizeof_ptr_field * 3;
+       ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
 
-       if (!cpu_has_bts) {
-               ds_cfg.ctl[dsf_bts] = 0;
+       if (!cpu_has(cpu, X86_FEATURE_BTS)) {
+               ds_cfg.sizeof_rec[ds_bts] = 0;
                printk(KERN_INFO "[ds] bts not available\n");
        }
-       if (!cpu_has_pebs)
+       if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
+               ds_cfg.sizeof_rec[ds_pebs] = 0;
                printk(KERN_INFO "[ds] pebs not available\n");
+       }
+
+       printk(KERN_INFO "[ds] sizes: address: %u bit, ",
+              8 * ds_cfg.sizeof_ptr_field);
+       printk("bts/pebs record: %u/%u bytes\n",
+              ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
 
-       WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field));
+       WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
 }
 
 void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
 {
+       /* Only configure the first cpu. Others are identical. */
+       if (ds_cfg.name)
+               return;
+
        switch (c->x86) {
        case 0x6:
                switch (c->x86_model) {
                case 0x9:
                case 0xd: /* Pentium M */
-                       ds_configure(&ds_cfg_pentium_m);
+                       ds_configure(&ds_cfg_pentium_m, c);
                        break;
                case 0xf:
                case 0x17: /* Core2 */
                case 0x1c: /* Atom */
-                       ds_configure(&ds_cfg_core2_atom);
+                       ds_configure(&ds_cfg_core2_atom, c);
+                       break;
+               case 0x1a: /* Core i7 */
+                       ds_configure(&ds_cfg_core_i7, c);
                        break;
-               case 0x1a: /* i7 */
                default:
-                       /* sorry, don't know about them */
+                       /* Sorry, don't know about them. */
                        break;
                }
                break;
@@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
                case 0x0:
                case 0x1:
                case 0x2: /* Netburst */
-                       ds_configure(&ds_cfg_netburst);
+                       ds_configure(&ds_cfg_netburst, c);
                        break;
                default:
-                       /* sorry, don't know about them */
+                       /* Sorry, don't know about them. */
                        break;
                }
                break;
        default:
-               /* sorry, don't know about them */
+               /* Sorry, don't know about them. */
                break;
        }
 }
 
+static inline void ds_take_timestamp(struct ds_context *context,
+                                    enum bts_qualifier qualifier,
+                                    struct task_struct *task)
+{
+       struct bts_tracer *tracer = context->bts_master;
+       struct bts_struct ts;
+
+       /* Prevent compilers from reading the tracer pointer twice. */
+       barrier();
+
+       if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
+               return;
+
+       memset(&ts, 0, sizeof(ts));
+       ts.qualifier            = qualifier;
+       ts.variant.event.clock  = trace_clock_global();
+       ts.variant.event.pid    = task->pid;
+
+       bts_write(tracer, &ts);
+}
+
 /*
  * Change the DS configuration from tracing prev to tracing next.
  */
 void ds_switch_to(struct task_struct *prev, struct task_struct *next)
 {
-       struct ds_context *prev_ctx = prev->thread.ds_ctx;
-       struct ds_context *next_ctx = next->thread.ds_ctx;
+       struct ds_context *prev_ctx     = prev->thread.ds_ctx;
+       struct ds_context *next_ctx     = next->thread.ds_ctx;
+       unsigned long debugctlmsr       = next->thread.debugctlmsr;
+
+       /* Make sure all data is read before we start. */
+       barrier();
 
        if (prev_ctx) {
                update_debugctlmsr(0);
 
-               if (prev_ctx->bts_master &&
-                   (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-                       struct bts_struct ts = {
-                               .qualifier = bts_task_departs,
-                               .variant.timestamp.jiffies = jiffies_64,
-                               .variant.timestamp.pid = prev->pid
-                       };
-                       bts_write(prev_ctx->bts_master, &ts);
-               }
+               ds_take_timestamp(prev_ctx, bts_task_departs, prev);
        }
 
        if (next_ctx) {
-               if (next_ctx->bts_master &&
-                   (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) {
-                       struct bts_struct ts = {
-                               .qualifier = bts_task_arrives,
-                               .variant.timestamp.jiffies = jiffies_64,
-                               .variant.timestamp.pid = next->pid
-                       };
-                       bts_write(next_ctx->bts_master, &ts);
-               }
+               ds_take_timestamp(next_ctx, bts_task_arrives, next);
 
                wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
        }
 
-       update_debugctlmsr(next->thread.debugctlmsr);
+       update_debugctlmsr(debugctlmsr);
 }
 
-void ds_copy_thread(struct task_struct *tsk, struct task_struct *father)
+static __init int ds_selftest(void)
 {
-       clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR);
-       tsk->thread.ds_ctx = NULL;
-}
+       if (ds_cfg.sizeof_rec[ds_bts]) {
+               int error;
 
-void ds_exit_thread(struct task_struct *tsk)
-{
+               error = ds_selftest_bts();
+               if (error) {
+                       WARN(1, "[ds] selftest failed. disabling bts.\n");
+                       ds_cfg.sizeof_rec[ds_bts] = 0;
+               }
+       }
+
+       if (ds_cfg.sizeof_rec[ds_pebs]) {
+               int error;
+
+               error = ds_selftest_pebs();
+               if (error) {
+                       WARN(1, "[ds] selftest failed. disabling pebs.\n");
+                       ds_cfg.sizeof_rec[ds_pebs] = 0;
+               }
+       }
+
+       return 0;
 }
+device_initcall(ds_selftest);
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
new file mode 100644 (file)
index 0000000..6bc7c19
--- /dev/null
@@ -0,0 +1,408 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#include "ds_selftest.h"
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+
+#include <asm/ds.h>
+
+
+#define BUFFER_SIZE            521     /* Intentionally chose an odd size. */
+#define SMALL_BUFFER_SIZE      24      /* A single bts entry. */
+
+struct ds_selftest_bts_conf {
+       struct bts_tracer *tracer;
+       int error;
+       int (*suspend)(struct bts_tracer *);
+       int (*resume)(struct bts_tracer *);
+};
+
+static int ds_selftest_bts_consistency(const struct bts_trace *trace)
+{
+       int error = 0;
+
+       if (!trace) {
+               printk(KERN_CONT "failed to access trace...");
+               /* Bail out. Other tests are pointless. */
+               return -1;
+       }
+
+       if (!trace->read) {
+               printk(KERN_CONT "bts read not available...");
+               error = -1;
+       }
+
+       /* Do some sanity checks on the trace configuration. */
+       if (!trace->ds.n) {
+               printk(KERN_CONT "empty bts buffer...");
+               error = -1;
+       }
+       if (!trace->ds.size) {
+               printk(KERN_CONT "bad bts trace setup...");
+               error = -1;
+       }
+       if (trace->ds.end !=
+           (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
+               printk(KERN_CONT "bad bts buffer setup...");
+               error = -1;
+       }
+       /*
+        * We allow top in [begin; end], since its not clear when the
+        * overflow adjustment happens: after the increment or before the
+        * write.
+        */
+       if ((trace->ds.top < trace->ds.begin) ||
+           (trace->ds.end < trace->ds.top)) {
+               printk(KERN_CONT "bts top out of bounds...");
+               error = -1;
+       }
+
+       return error;
+}
+
+static int ds_selftest_bts_read(struct bts_tracer *tracer,
+                               const struct bts_trace *trace,
+                               const void *from, const void *to)
+{
+       const unsigned char *at;
+
+       /*
+        * Check a few things which do not belong to this test.
+        * They should be covered by other tests.
+        */
+       if (!trace)
+               return -1;
+
+       if (!trace->read)
+               return -1;
+
+       if (to < from)
+               return -1;
+
+       if (from < trace->ds.begin)
+               return -1;
+
+       if (trace->ds.end < to)
+               return -1;
+
+       if (!trace->ds.size)
+               return -1;
+
+       /* Now to the test itself. */
+       for (at = from; (void *)at < to; at += trace->ds.size) {
+               struct bts_struct bts;
+               unsigned long index;
+               int error;
+
+               if (((void *)at - trace->ds.begin) % trace->ds.size) {
+                       printk(KERN_CONT
+                              "read from non-integer index...");
+                       return -1;
+               }
+               index = ((void *)at - trace->ds.begin) / trace->ds.size;
+
+               memset(&bts, 0, sizeof(bts));
+               error = trace->read(tracer, at, &bts);
+               if (error < 0) {
+                       printk(KERN_CONT
+                              "error reading bts trace at [%lu] (0x%p)...",
+                              index, at);
+                       return error;
+               }
+
+               switch (bts.qualifier) {
+               case BTS_BRANCH:
+                       break;
+               default:
+                       printk(KERN_CONT
+                              "unexpected bts entry %llu at [%lu] (0x%p)...",
+                              bts.qualifier, index, at);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static void ds_selftest_bts_cpu(void *arg)
+{
+       struct ds_selftest_bts_conf *conf = arg;
+       const struct bts_trace *trace;
+       void *top;
+
+       if (IS_ERR(conf->tracer)) {
+               conf->error = PTR_ERR(conf->tracer);
+               conf->tracer = NULL;
+
+               printk(KERN_CONT
+                      "initialization failed (err: %d)...", conf->error);
+               return;
+       }
+
+       /* We should meanwhile have enough trace. */
+       conf->error = conf->suspend(conf->tracer);
+       if (conf->error < 0)
+               return;
+
+       /* Let's see if we can access the trace. */
+       trace = ds_read_bts(conf->tracer);
+
+       conf->error = ds_selftest_bts_consistency(trace);
+       if (conf->error < 0)
+               return;
+
+       /* If everything went well, we should have a few trace entries. */
+       if (trace->ds.top == trace->ds.begin) {
+               /*
+                * It is possible but highly unlikely that we got a
+                * buffer overflow and end up at exactly the same
+                * position we started from.
+                * Let's issue a warning, but continue.
+                */
+               printk(KERN_CONT "no trace/overflow...");
+       }
+
+       /* Let's try to read the trace we collected. */
+       conf->error =
+               ds_selftest_bts_read(conf->tracer, trace,
+                                    trace->ds.begin, trace->ds.top);
+       if (conf->error < 0)
+               return;
+
+       /*
+        * Let's read the trace again.
+        * Since we suspended tracing, we should get the same result.
+        */
+       top = trace->ds.top;
+
+       trace = ds_read_bts(conf->tracer);
+       conf->error = ds_selftest_bts_consistency(trace);
+       if (conf->error < 0)
+               return;
+
+       if (top != trace->ds.top) {
+               printk(KERN_CONT "suspend not working...");
+               conf->error = -1;
+               return;
+       }
+
+       /* Let's collect some more trace - see if resume is working. */
+       conf->error = conf->resume(conf->tracer);
+       if (conf->error < 0)
+               return;
+
+       conf->error = conf->suspend(conf->tracer);
+       if (conf->error < 0)
+               return;
+
+       trace = ds_read_bts(conf->tracer);
+
+       conf->error = ds_selftest_bts_consistency(trace);
+       if (conf->error < 0)
+               return;
+
+       if (trace->ds.top == top) {
+               /*
+                * It is possible but highly unlikely that we got a
+                * buffer overflow and end up at exactly the same
+                * position we started from.
+                * Let's issue a warning and check the full trace.
+                */
+               printk(KERN_CONT
+                      "no resume progress/overflow...");
+
+               conf->error =
+                       ds_selftest_bts_read(conf->tracer, trace,
+                                            trace->ds.begin, trace->ds.end);
+       } else if (trace->ds.top < top) {
+               /*
+                * We had a buffer overflow - the entire buffer should
+                * contain trace records.
+                */
+               conf->error =
+                       ds_selftest_bts_read(conf->tracer, trace,
+                                            trace->ds.begin, trace->ds.end);
+       } else {
+               /*
+                * It is quite likely that the buffer did not overflow.
+                * Let's just check the delta trace.
+                */
+               conf->error =
+                       ds_selftest_bts_read(conf->tracer, trace, top,
+                                            trace->ds.top);
+       }
+       if (conf->error < 0)
+               return;
+
+       conf->error = 0;
+}
+
+static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
+{
+       ds_suspend_bts(tracer);
+       return 0;
+}
+
+static int ds_resume_bts_wrap(struct bts_tracer *tracer)
+{
+       ds_resume_bts(tracer);
+       return 0;
+}
+
+static void ds_release_bts_noirq_wrap(void *tracer)
+{
+       (void)ds_release_bts_noirq(tracer);
+}
+
+static int ds_selftest_bts_bad_release_noirq(int cpu,
+                                            struct bts_tracer *tracer)
+{
+       int error = -EPERM;
+
+       /* Try to release the tracer on the wrong cpu. */
+       get_cpu();
+       if (cpu != smp_processor_id()) {
+               error = ds_release_bts_noirq(tracer);
+               if (error != -EPERM)
+                       printk(KERN_CONT "release on wrong cpu...");
+       }
+       put_cpu();
+
+       return error ? 0 : -1;
+}
+
+static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
+{
+       struct bts_tracer *tracer;
+       int error;
+
+       /* Try to request cpu tracing while task tracing is active. */
+       tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
+                                   (size_t)-1, BTS_KERNEL);
+       error = PTR_ERR(tracer);
+       if (!IS_ERR(tracer)) {
+               ds_release_bts(tracer);
+               error = 0;
+       }
+
+       if (error != -EPERM)
+               printk(KERN_CONT "cpu/task tracing overlap...");
+
+       return error ? 0 : -1;
+}
+
+static int ds_selftest_bts_bad_request_task(void *buffer)
+{
+       struct bts_tracer *tracer;
+       int error;
+
+       /* Try to request cpu tracing while task tracing is active. */
+       tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
+                                   (size_t)-1, BTS_KERNEL);
+       error = PTR_ERR(tracer);
+       if (!IS_ERR(tracer)) {
+               error = 0;
+               ds_release_bts(tracer);
+       }
+
+       if (error != -EPERM)
+               printk(KERN_CONT "task/cpu tracing overlap...");
+
+       return error ? 0 : -1;
+}
+
+int ds_selftest_bts(void)
+{
+       struct ds_selftest_bts_conf conf;
+       unsigned char buffer[BUFFER_SIZE], *small_buffer;
+       unsigned long irq;
+       int cpu;
+
+       printk(KERN_INFO "[ds] bts selftest...");
+       conf.error = 0;
+
+       small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               conf.suspend = ds_suspend_bts_wrap;
+               conf.resume = ds_resume_bts_wrap;
+               conf.tracer =
+                       ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+                                          NULL, (size_t)-1, BTS_KERNEL);
+               ds_selftest_bts_cpu(&conf);
+               if (conf.error >= 0)
+                       conf.error = ds_selftest_bts_bad_request_task(buffer);
+               ds_release_bts(conf.tracer);
+               if (conf.error < 0)
+                       goto out;
+
+               conf.suspend = ds_suspend_bts_noirq;
+               conf.resume = ds_resume_bts_noirq;
+               conf.tracer =
+                       ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
+                                          NULL, (size_t)-1, BTS_KERNEL);
+               smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
+               if (conf.error >= 0) {
+                       conf.error =
+                               ds_selftest_bts_bad_release_noirq(cpu,
+                                                                 conf.tracer);
+                       /* We must not release the tracer twice. */
+                       if (conf.error < 0)
+                               conf.tracer = NULL;
+               }
+               if (conf.error >= 0)
+                       conf.error = ds_selftest_bts_bad_request_task(buffer);
+               smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
+                                        conf.tracer, 1);
+               if (conf.error < 0)
+                       goto out;
+       }
+
+       conf.suspend = ds_suspend_bts_wrap;
+       conf.resume = ds_resume_bts_wrap;
+       conf.tracer =
+               ds_request_bts_task(current, buffer, BUFFER_SIZE,
+                                   NULL, (size_t)-1, BTS_KERNEL);
+       ds_selftest_bts_cpu(&conf);
+       if (conf.error >= 0)
+               conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+       ds_release_bts(conf.tracer);
+       if (conf.error < 0)
+               goto out;
+
+       conf.suspend = ds_suspend_bts_noirq;
+       conf.resume = ds_resume_bts_noirq;
+       conf.tracer =
+               ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
+                                  NULL, (size_t)-1, BTS_KERNEL);
+       local_irq_save(irq);
+       ds_selftest_bts_cpu(&conf);
+       if (conf.error >= 0)
+               conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
+       ds_release_bts_noirq(conf.tracer);
+       local_irq_restore(irq);
+       if (conf.error < 0)
+               goto out;
+
+       conf.error = 0;
+ out:
+       put_online_cpus();
+       printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
+
+       return conf.error;
+}
+
+int ds_selftest_pebs(void)
+{
+       return 0;
+}
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
new file mode 100644 (file)
index 0000000..2ba8745
--- /dev/null
@@ -0,0 +1,15 @@
+/*
+ * Debug Store support - selftest
+ *
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Markus Metzger <markus.t.metzger@intel.com>, 2009
+ */
+
+#ifdef CONFIG_X86_DS_SELFTEST
+extern int ds_selftest_bts(void);
+extern int ds_selftest_pebs(void);
+#else
+static inline int ds_selftest_bts(void) { return 0; }
+static inline int ds_selftest_pebs(void) { return 0; }
+#endif
index da87590..81086c2 100644 (file)
@@ -29,7 +29,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
                unsigned long *sp, unsigned long bp, char *log_lvl);
 
 extern unsigned int code_bytes;
-extern int kstack_depth_to_print;
 
 /* The form of the top of the frame on the stack */
 struct stack_frame {
index 0062813..7271fa3 100644 (file)
@@ -617,7 +617,7 @@ __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
  */
 __init void e820_setup_gap(void)
 {
-       unsigned long gapstart, gapsize, round;
+       unsigned long gapstart, gapsize;
        int found;
 
        gapstart = 0x10000000;
@@ -635,14 +635,9 @@ __init void e820_setup_gap(void)
 #endif
 
        /*
-        * See how much we want to round up: start off with
-        * rounding to the next 1MB area.
+        * e820_reserve_resources_late protect stolen RAM already
         */
-       round = 0x100000;
-       while ((gapsize >> 4) > round)
-               round += round;
-       /* Fun with two's complement */
-       pci_mem_start = (gapstart + round) & -round;
+       pci_mem_start = gapstart;
 
        printk(KERN_INFO
               "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
@@ -1371,6 +1366,23 @@ void __init e820_reserve_resources(void)
        }
 }
 
+/* How much should we pad RAM ending depending on where it is? */
+static unsigned long ram_alignment(resource_size_t pos)
+{
+       unsigned long mb = pos >> 20;
+
+       /* To 64kB in the first megabyte */
+       if (!mb)
+               return 64*1024;
+
+       /* To 1MB in the first 16MB */
+       if (mb < 16)
+               return 1024*1024;
+
+       /* To 32MB for anything above that */
+       return 32*1024*1024;
+}
+
 void __init e820_reserve_resources_late(void)
 {
        int i;
@@ -1382,6 +1394,24 @@ void __init e820_reserve_resources_late(void)
                        insert_resource_expand_to_fit(&iomem_resource, res);
                res++;
        }
+
+       /*
+        * Try to bump up RAM regions to reasonable boundaries to
+        * avoid stolen RAM:
+        */
+       for (i = 0; i < e820.nr_map; i++) {
+               struct e820entry *entry = &e820_saved.map[i];
+               resource_size_t start, end;
+
+               if (entry->type != E820_RAM)
+                       continue;
+               start = entry->addr + entry->size;
+               end = round_up(start, ram_alignment(start));
+               if (start == end)
+                       continue;
+               reserve_region_with_split(&iomem_resource, start,
+                                                 end - 1, "RAM buffer");
+       }
 }
 
 char *__init default_machine_specific_memory_setup(void)
index 76b8cd9..ebdb85c 100644 (file)
@@ -96,6 +96,7 @@ static void __init nvidia_bugs(int num, int slot, int func)
 
 }
 
+#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
 #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
 static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
 {
@@ -114,6 +115,7 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
        d &= 0xff;
        return d;
 }
+#endif
 
 static void __init ati_bugs(int num, int slot, int func)
 {
index 38946c6..1c17d7c 100644 (file)
@@ -147,27 +147,14 @@ END(ftrace_graph_caller)
 GLOBAL(return_to_handler)
        subq  $80, %rsp
 
+       /* Save the return values */
        movq %rax, (%rsp)
-       movq %rcx, 8(%rsp)
-       movq %rdx, 16(%rsp)
-       movq %rsi, 24(%rsp)
-       movq %rdi, 32(%rsp)
-       movq %r8, 40(%rsp)
-       movq %r9, 48(%rsp)
-       movq %r10, 56(%rsp)
-       movq %r11, 64(%rsp)
+       movq %rdx, 8(%rsp)
 
        call ftrace_return_to_handler
 
        movq %rax, 72(%rsp)
-       movq 64(%rsp), %r11
-       movq 56(%rsp), %r10
-       movq 48(%rsp), %r9
-       movq 40(%rsp), %r8
-       movq 32(%rsp), %rdi
-       movq 24(%rsp), %rsi
-       movq 16(%rsp), %rdx
-       movq 8(%rsp), %rcx
+       movq 8(%rsp), %rdx
        movq (%rsp), %rax
        addq $72, %rsp
        retq
@@ -1379,6 +1366,11 @@ END(xen_failsafe_callback)
 paranoidzeroentry_ist debug do_debug DEBUG_STACK
 paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
 paranoiderrorentry stack_segment do_stack_segment
+#ifdef CONFIG_XEN
+zeroentry xen_debug do_debug
+zeroentry xen_int3 do_int3
+errorentry xen_stack_segment do_stack_segment
+#endif
 errorentry general_protection do_general_protection
 errorentry page_fault do_page_fault
 #ifdef CONFIG_X86_MCE
index 3068388..dc5ed4b 100644 (file)
@@ -608,13 +608,6 @@ ignore_int:
 ENTRY(initial_code)
        .long i386_start_kernel
 
-.section .text
-/*
- * Real beginning of normal "text" segment
- */
-ENTRY(stext)
-ENTRY(_stext)
-
 /*
  * BSS section
  */
index c3fe010..9a391bb 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/io_apic.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
+#include <asm/hw_irq.h>
 
 atomic_t irq_err_count;
 
@@ -24,9 +25,9 @@ void (*generic_interrupt_extension)(void) = NULL;
  */
 void ack_bad_irq(unsigned int irq)
 {
-       printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq);
+       if (printk_ratelimit())
+               pr_err("unexpected IRQ trap at vector %02x\n", irq);
 
-#ifdef CONFIG_X86_LOCAL_APIC
        /*
         * Currently unexpected vectors happen only on SMP and APIC.
         * We _must_ ack these because every local APIC has only N
@@ -36,9 +37,7 @@ void ack_bad_irq(unsigned int irq)
         * completely.
         * But only ack when the APIC is enabled -AK
         */
-       if (cpu_has_apic)
-               ack_APIC_irq();
-#endif
+       ack_APIC_irq();
 }
 
 #define irq_stats(x)           (&per_cpu(irq_stat, x))
@@ -178,7 +177,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
        sum += irq_stats(cpu)->irq_thermal_count;
 # ifdef CONFIG_X86_64
        sum += irq_stats(cpu)->irq_threshold_count;
-#endif
+# endif
 #endif
        return sum;
 }
@@ -213,14 +212,11 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
        irq = __get_cpu_var(vector_irq)[vector];
 
        if (!handle_irq(irq, regs)) {
-#ifdef CONFIG_X86_64
-               if (!disable_apic)
-                       ack_APIC_irq();
-#endif
+               ack_APIC_irq();
 
                if (printk_ratelimit())
-                       printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
-                              __func__, smp_processor_id(), vector, irq);
+                       pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
+                               __func__, smp_processor_id(), vector, irq);
        }
 
        irq_exit();
similarity index 68%
rename from arch/x86/kernel/irqinit_32.c
rename to arch/x86/kernel/irqinit.c
index 368b0a8..2e08b10 100644 (file)
@@ -1,20 +1,25 @@
+#include <linux/linkage.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/ioport.h>
 #include <linux/interrupt.h>
+#include <linux/timex.h>
 #include <linux/slab.h>
 #include <linux/random.h>
+#include <linux/kprobes.h>
 #include <linux/init.h>
 #include <linux/kernel_stat.h>
 #include <linux/sysdev.h>
 #include <linux/bitops.h>
+#include <linux/acpi.h>
 #include <linux/io.h>
 #include <linux/delay.h>
 
 #include <asm/atomic.h>
 #include <asm/system.h>
 #include <asm/timer.h>
+#include <asm/hw_irq.h>
 #include <asm/pgtable.h>
 #include <asm/desc.h>
 #include <asm/apic.h>
 #include <asm/i8259.h>
 #include <asm/traps.h>
 
+/*
+ * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
+ * (these are usually mapped to vectors 0x30-0x3f)
+ */
+
+/*
+ * The IO-APIC gives us many more interrupt sources. Most of these
+ * are unused but an SMP system is supposed to have enough memory ...
+ * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
+ * across the spectrum, so we really want to be prepared to get all
+ * of these. Plus, more powerful systems might have more than 64
+ * IO-APIC registers.
+ *
+ * (these are usually mapped into the 0x30-0xff vector range)
+ */
 
+#ifdef CONFIG_X86_32
 /*
  * Note that on a 486, we don't want to do a SIGFPE on an irq13
  * as the irq is unreliable, and exception 16 works correctly
@@ -52,30 +73,7 @@ static struct irqaction fpu_irq = {
        .handler = math_error_irq,
        .name = "fpu",
 };
-
-void __init init_ISA_irqs(void)
-{
-       int i;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-       init_bsp_APIC();
 #endif
-       init_8259A(0);
-
-       /*
-        * 16 old-style INTA-cycle interrupts:
-        */
-       for (i = 0; i < NR_IRQS_LEGACY; i++) {
-               struct irq_desc *desc = irq_to_desc(i);
-
-               desc->status = IRQ_DISABLED;
-               desc->action = NULL;
-               desc->depth = 1;
-
-               set_irq_chip_and_handler_name(i, &i8259A_chip,
-                                             handle_level_irq, "XT");
-       }
-}
 
 /*
  * IRQ2 is cascade interrupt to second interrupt controller
@@ -118,29 +116,37 @@ int vector_used_by_percpu_irq(unsigned int vector)
        return 0;
 }
 
-/* Overridden in paravirt.c */
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-void __init native_init_IRQ(void)
+static void __init init_ISA_irqs(void)
 {
        int i;
 
-       /* Execute any quirks before the call gates are initialised: */
-       x86_quirk_pre_intr_init();
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+       init_bsp_APIC();
+#endif
+       init_8259A(0);
 
        /*
-        * Cover the whole vector space, no vector can escape
-        * us. (some of these will be overridden and become
-        * 'special' SMP interrupts)
+        * 16 old-style INTA-cycle interrupts:
         */
-       for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
-               /* SYSCALL_VECTOR was reserved in trap_init. */
-               if (i != SYSCALL_VECTOR)
-                       set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+       for (i = 0; i < NR_IRQS_LEGACY; i++) {
+               struct irq_desc *desc = irq_to_desc(i);
+
+               desc->status = IRQ_DISABLED;
+               desc->action = NULL;
+               desc->depth = 1;
+
+               set_irq_chip_and_handler_name(i, &i8259A_chip,
+                                             handle_level_irq, "XT");
        }
+}
 
+/* Overridden in paravirt.c */
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
 
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
+static void __init smp_intr_init(void)
+{
+#ifdef CONFIG_SMP
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
        /*
         * The reschedule interrupt is a CPU-to-CPU reschedule-helper
         * IPI, driven by wakeup.
@@ -160,16 +166,27 @@ void __init native_init_IRQ(void)
        /* IPI for generic function call */
        alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
 
-       /* IPI for single call function */
+       /* IPI for generic single function call */
        alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-                                call_function_single_interrupt);
+                       call_function_single_interrupt);
 
        /* Low priority IPI to cleanup after moving an irq */
        set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
        set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
 #endif
+#endif /* CONFIG_SMP */
+}
+
+static void __init apic_intr_init(void)
+{
+       smp_intr_init();
+
+#ifdef CONFIG_X86_64
+       alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
+       alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
+#endif
 
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
        /* self generated IPI for local APIC timer */
        alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
@@ -179,16 +196,67 @@ void __init native_init_IRQ(void)
        /* IPI vectors for APIC spurious and error interrupts */
        alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
        alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+
+       /* Performance monitoring interrupts: */
+# ifdef CONFIG_PERF_COUNTERS
+       alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
+       alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt);
+# endif
+
 #endif
 
+#ifdef CONFIG_X86_32
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
        /* thermal monitor LVT interrupt */
        alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
 #endif
+#endif
+}
+
+/**
+ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
+ *
+ * Description:
+ *     Perform any necessary interrupt initialisation prior to setting up
+ *     the "ordinary" interrupt call gates.  For legacy reasons, the ISA
+ *     interrupts should be initialised here if the machine emulates a PC
+ *     in any way.
+ **/
+static void __init x86_quirk_pre_intr_init(void)
+{
+#ifdef CONFIG_X86_32
+       if (x86_quirks->arch_pre_intr_init) {
+               if (x86_quirks->arch_pre_intr_init())
+                       return;
+       }
+#endif
+       init_ISA_irqs();
+}
+
+void __init native_init_IRQ(void)
+{
+       int i;
+
+       /* Execute any quirks before the call gates are initialised: */
+       x86_quirk_pre_intr_init();
+
+       apic_intr_init();
+
+       /*
+        * Cover the whole vector space, no vector can escape
+        * us. (some of these will be overridden and become
+        * 'special' SMP interrupts)
+        */
+       for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+               /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
+               if (!test_bit(i, used_vectors))
+                       set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+       }
 
        if (!acpi_ioapic)
                setup_irq(2, &irq2);
 
+#ifdef CONFIG_X86_32
        /*
         * Call quirks after call gates are initialised (usually add in
         * the architecture specific gates):
@@ -203,4 +271,5 @@ void __init native_init_IRQ(void)
                setup_irq(FPU_IRQ, &fpu_irq);
 
        irq_ctx_init(smp_processor_id());
+#endif
 }
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
deleted file mode 100644 (file)
index 8cd1053..0000000
+++ /dev/null
@@ -1,177 +0,0 @@
-#include <linux/linkage.h>
-#include <linux/errno.h>
-#include <linux/signal.h>
-#include <linux/sched.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/timex.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/init.h>
-#include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
-#include <linux/bitops.h>
-#include <linux/acpi.h>
-#include <linux/io.h>
-#include <linux/delay.h>
-
-#include <asm/atomic.h>
-#include <asm/system.h>
-#include <asm/hw_irq.h>
-#include <asm/pgtable.h>
-#include <asm/desc.h>
-#include <asm/apic.h>
-#include <asm/i8259.h>
-
-/*
- * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts:
- * (these are usually mapped to vectors 0x30-0x3f)
- */
-
-/*
- * The IO-APIC gives us many more interrupt sources. Most of these
- * are unused but an SMP system is supposed to have enough memory ...
- * sometimes (mostly wrt. hw bugs) we get corrupted vectors all
- * across the spectrum, so we really want to be prepared to get all
- * of these. Plus, more powerful systems might have more than 64
- * IO-APIC registers.
- *
- * (these are usually mapped into the 0x30-0xff vector range)
- */
-
-/*
- * IRQ2 is cascade interrupt to second interrupt controller
- */
-
-static struct irqaction irq2 = {
-       .handler = no_action,
-       .name = "cascade",
-};
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
-       [0 ... IRQ0_VECTOR - 1] = -1,
-       [IRQ0_VECTOR] = 0,
-       [IRQ1_VECTOR] = 1,
-       [IRQ2_VECTOR] = 2,
-       [IRQ3_VECTOR] = 3,
-       [IRQ4_VECTOR] = 4,
-       [IRQ5_VECTOR] = 5,
-       [IRQ6_VECTOR] = 6,
-       [IRQ7_VECTOR] = 7,
-       [IRQ8_VECTOR] = 8,
-       [IRQ9_VECTOR] = 9,
-       [IRQ10_VECTOR] = 10,
-       [IRQ11_VECTOR] = 11,
-       [IRQ12_VECTOR] = 12,
-       [IRQ13_VECTOR] = 13,
-       [IRQ14_VECTOR] = 14,
-       [IRQ15_VECTOR] = 15,
-       [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1
-};
-
-int vector_used_by_percpu_irq(unsigned int vector)
-{
-       int cpu;
-
-       for_each_online_cpu(cpu) {
-               if (per_cpu(vector_irq, cpu)[vector] != -1)
-                       return 1;
-       }
-
-       return 0;
-}
-
-static void __init init_ISA_irqs(void)
-{
-       int i;
-
-       init_bsp_APIC();
-       init_8259A(0);
-
-       for (i = 0; i < NR_IRQS_LEGACY; i++) {
-               struct irq_desc *desc = irq_to_desc(i);
-
-               desc->status = IRQ_DISABLED;
-               desc->action = NULL;
-               desc->depth = 1;
-
-               /*
-                * 16 old-style INTA-cycle interrupts:
-                */
-               set_irq_chip_and_handler_name(i, &i8259A_chip,
-                                                     handle_level_irq, "XT");
-       }
-}
-
-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
-
-static void __init smp_intr_init(void)
-{
-#ifdef CONFIG_SMP
-       /*
-        * The reschedule interrupt is a CPU-to-CPU reschedule-helper
-        * IPI, driven by wakeup.
-        */
-       alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
-
-       /* IPIs for invalidation */
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
-       alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
-
-       /* IPI for generic function call */
-       alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
-
-       /* IPI for generic single function call */
-       alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
-                       call_function_single_interrupt);
-
-       /* Low priority IPI to cleanup after moving an irq */
-       set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
-       set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
-#endif
-}
-
-static void __init apic_intr_init(void)
-{
-       smp_intr_init();
-
-       alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
-       alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
-
-       /* self generated IPI for local APIC timer */
-       alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
-
-       /* generic IPI for platform specific use */
-       alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
-
-       /* IPI vectors for APIC spurious and error interrupts */
-       alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
-       alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
-}
-
-void __init native_init_IRQ(void)
-{
-       int i;
-
-       init_ISA_irqs();
-       /*
-        * Cover the whole vector space, no vector can escape
-        * us. (some of these will be overridden and become
-        * 'special' SMP interrupts)
-        */
-       for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
-               int vector = FIRST_EXTERNAL_VECTOR + i;
-               if (vector != IA32_SYSCALL_VECTOR)
-                       set_intr_gate(vector, interrupt[i]);
-       }
-
-       apic_intr_init();
-
-       if (!acpi_ioapic)
-               setup_irq(2, &irq2);
-}
index b1f4dff..8d82a77 100644 (file)
@@ -142,7 +142,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
        gdb_regs32[GDB_PS]      = *(unsigned long *)(p->thread.sp + 8);
        gdb_regs32[GDB_CS]      = __KERNEL_CS;
        gdb_regs32[GDB_SS]      = __KERNEL_DS;
-       gdb_regs[GDB_PC]        = p->thread.ip;
+       gdb_regs[GDB_PC]        = 0;
        gdb_regs[GDB_R8]        = 0;
        gdb_regs[GDB_R9]        = 0;
        gdb_regs[GDB_R10]       = 0;
index 33019dd..6551ded 100644 (file)
@@ -195,7 +195,7 @@ static void kvm_leave_lazy_mmu(void)
        struct kvm_para_state *state = kvm_para_state();
 
        mmu_queue_flush(state);
-       paravirt_leave_lazy(paravirt_get_lazy_mode());
+       paravirt_leave_lazy_mmu();
        state->mode = paravirt_get_lazy_mode();
 }
 
index 453b579..366baa1 100644 (file)
  *  Licensed under the terms of the GNU General Public
  *  License version 2. See file COPYING for details.
  */
-#include <linux/platform_device.h>
-#include <linux/capability.h>
-#include <linux/miscdevice.h>
 #include <linux/firmware.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
 #include <linux/pci_ids.h>
 #include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
 #include <linux/pci.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
@@ -79,9 +67,6 @@ struct microcode_amd {
 #define UCODE_CONTAINER_SECTION_HDR    8
 #define UCODE_CONTAINER_HEADER_SIZE    12
 
-/* serialize access to the physical write */
-static DEFINE_SPINLOCK(microcode_update_lock);
-
 static struct equiv_cpu_entry *equiv_cpu_table;
 
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
@@ -144,9 +129,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev)
        return 1;
 }
 
-static void apply_microcode_amd(int cpu)
+static int apply_microcode_amd(int cpu)
 {
-       unsigned long flags;
        u32 rev, dummy;
        int cpu_num = raw_smp_processor_id();
        struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
@@ -156,25 +140,25 @@ static void apply_microcode_amd(int cpu)
        BUG_ON(cpu_num != cpu);
 
        if (mc_amd == NULL)
-               return;
+               return 0;
 
-       spin_lock_irqsave(&microcode_update_lock, flags);
        wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
        /* get patch id after patching */
        rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
-       spin_unlock_irqrestore(&microcode_update_lock, flags);
 
        /* check current patch id and patch's id for match */
        if (rev != mc_amd->hdr.patch_id) {
                printk(KERN_ERR "microcode: CPU%d: update failed "
                       "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id);
-               return;
+               return -1;
        }
 
        printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n",
               cpu, rev);
 
        uci->cpu_sig.rev = rev;
+
+       return 0;
 }
 
 static int get_ucode_data(void *to, const u8 *from, size_t n)
@@ -257,13 +241,12 @@ static int install_equiv_cpu_table(const u8 *buf)
 
 static void free_equiv_cpu_table(void)
 {
-       if (equiv_cpu_table) {
-               vfree(equiv_cpu_table);
-               equiv_cpu_table = NULL;
-       }
+       vfree(equiv_cpu_table);
+       equiv_cpu_table = NULL;
 }
 
-static int generic_load_microcode(int cpu, const u8 *data, size_t size)
+static enum ucode_state
+generic_load_microcode(int cpu, const u8 *data, size_t size)
 {
        struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
        const u8 *ucode_ptr = data;
@@ -272,12 +255,13 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size)
        int new_rev = uci->cpu_sig.rev;
        unsigned int leftover;
        unsigned long offset;
+       enum ucode_state state = UCODE_OK;
 
        offset = install_equiv_cpu_table(ucode_ptr);
        if (!offset) {
                printk(KERN_ERR "microcode: failed to create "
                       "equivalent cpu table\n");
-               return -EINVAL;
+               return UCODE_ERROR;
        }
 
        ucode_ptr += offset;
@@ -293,8 +277,7 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size)
 
                mc_header = (struct microcode_header_amd *)mc;
                if (get_matching_microcode(cpu, mc, new_rev)) {
-                       if (new_mc)
-                               vfree(new_mc);
+                       vfree(new_mc);
                        new_rev = mc_header->patch_id;
                        new_mc  = mc;
                } else
@@ -306,34 +289,32 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size)
 
        if (new_mc) {
                if (!leftover) {
-                       if (uci->mc)
-                               vfree(uci->mc);
+                       vfree(uci->mc);
                        uci->mc = new_mc;
                        pr_debug("microcode: CPU%d found a matching microcode "
                                 "update with version 0x%x (current=0x%x)\n",
                                 cpu, new_rev, uci->cpu_sig.rev);
-               } else
+               } else {
                        vfree(new_mc);
-       }
+                       state = UCODE_ERROR;
+               }
+       } else
+               state = UCODE_NFOUND;
 
        free_equiv_cpu_table();
 
-       return (int)leftover;
+       return state;
 }
 
-static int request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_fw(int cpu, struct device *device)
 {
        const char *fw_name = "amd-ucode/microcode_amd.bin";
        const struct firmware *firmware;
-       int ret;
-
-       /* We should bind the task to the CPU */
-       BUG_ON(cpu != raw_smp_processor_id());
+       enum ucode_state ret;
 
-       ret = request_firmware(&firmware, fw_name, device);
-       if (ret) {
+       if (request_firmware(&firmware, fw_name, device)) {
                printk(KERN_ERR "microcode: failed to load file %s\n", fw_name);
-               return ret;
+               return UCODE_NFOUND;
        }
 
        ret = generic_load_microcode(cpu, firmware->data, firmware->size);
@@ -343,11 +324,12 @@ static int request_microcode_fw(int cpu, struct device *device)
        return ret;
 }
 
-static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+static enum ucode_state
+request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
        printk(KERN_INFO "microcode: AMD microcode update via "
               "/dev/cpu/microcode not supported\n");
-       return -1;
+       return UCODE_ERROR;
 }
 
 static void microcode_fini_cpu_amd(int cpu)
index 98c470c..9c44615 100644 (file)
  *             Thanks to Stuart Swales for pointing out this bug.
  */
 #include <linux/platform_device.h>
-#include <linux/capability.h>
 #include <linux/miscdevice.h>
-#include <linux/firmware.h>
+#include <linux/capability.h>
 #include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
-#include <asm/msr.h>
 
 MODULE_DESCRIPTION("Microcode Update Driver");
 MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -101,36 +92,110 @@ MODULE_LICENSE("GPL");
 
 static struct microcode_ops    *microcode_ops;
 
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+/*
+ * Synchronization.
+ *
+ * All non cpu-hotplug-callback call sites use:
+ *
+ * - microcode_mutex to synchronize with each other;
+ * - get/put_online_cpus() to synchronize with
+ *   the cpu-hotplug-callback call sites.
+ *
+ * We guarantee that only a single cpu is being
+ * updated at any particular moment of time.
+ */
 static DEFINE_MUTEX(microcode_mutex);
 
 struct ucode_cpu_info          ucode_cpu_info[NR_CPUS];
 EXPORT_SYMBOL_GPL(ucode_cpu_info);
 
+/*
+ * Operations that are run on a target cpu:
+ */
+
+struct cpu_info_ctx {
+       struct cpu_signature    *cpu_sig;
+       int                     err;
+};
+
+static void collect_cpu_info_local(void *arg)
+{
+       struct cpu_info_ctx *ctx = arg;
+
+       ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(),
+                                                  ctx->cpu_sig);
+}
+
+static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig)
+{
+       struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 };
+       int ret;
+
+       ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1);
+       if (!ret)
+               ret = ctx.err;
+
+       return ret;
+}
+
+static int collect_cpu_info(int cpu)
+{
+       struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+       int ret;
+
+       memset(uci, 0, sizeof(*uci));
+
+       ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig);
+       if (!ret)
+               uci->valid = 1;
+
+       return ret;
+}
+
+struct apply_microcode_ctx {
+       int err;
+};
+
+static void apply_microcode_local(void *arg)
+{
+       struct apply_microcode_ctx *ctx = arg;
+
+       ctx->err = microcode_ops->apply_microcode(smp_processor_id());
+}
+
+static int apply_microcode_on_target(int cpu)
+{
+       struct apply_microcode_ctx ctx = { .err = 0 };
+       int ret;
+
+       ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1);
+       if (!ret)
+               ret = ctx.err;
+
+       return ret;
+}
+
 #ifdef CONFIG_MICROCODE_OLD_INTERFACE
 static int do_microcode_update(const void __user *buf, size_t size)
 {
-       cpumask_t old;
        int error = 0;
        int cpu;
 
-       old = current->cpus_allowed;
-
        for_each_online_cpu(cpu) {
                struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+               enum ucode_state ustate;
 
                if (!uci->valid)
                        continue;
 
-               set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
-               error = microcode_ops->request_microcode_user(cpu, buf, size);
-               if (error < 0)
-                       goto out;
-               if (!error)
-                       microcode_ops->apply_microcode(cpu);
+               ustate = microcode_ops->request_microcode_user(cpu, buf, size);
+               if (ustate == UCODE_ERROR) {
+                       error = -1;
+                       break;
+               } else if (ustate == UCODE_OK)
+                       apply_microcode_on_target(cpu);
        }
-out:
-       set_cpus_allowed_ptr(current, &old);
+
        return error;
 }
 
@@ -143,19 +208,17 @@ static int microcode_open(struct inode *unused1, struct file *unused2)
 static ssize_t microcode_write(struct file *file, const char __user *buf,
                               size_t len, loff_t *ppos)
 {
-       ssize_t ret;
+       ssize_t ret = -EINVAL;
 
        if ((len >> PAGE_SHIFT) > num_physpages) {
-               printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
-                      num_physpages);
-               return -EINVAL;
+               pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+               return ret;
        }
 
        get_online_cpus();
        mutex_lock(&microcode_mutex);
 
-       ret = do_microcode_update(buf, len);
-       if (!ret)
+       if (do_microcode_update(buf, len) == 0)
                ret = (ssize_t)len;
 
        mutex_unlock(&microcode_mutex);
@@ -165,15 +228,15 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
 }
 
 static const struct file_operations microcode_fops = {
-       .owner          = THIS_MODULE,
-       .write          = microcode_write,
-       .open           = microcode_open,
+       .owner                  = THIS_MODULE,
+       .write                  = microcode_write,
+       .open                   = microcode_open,
 };
 
 static struct miscdevice microcode_dev = {
-       .minor          = MICROCODE_MINOR,
-       .name           = "microcode",
-       .fops           = &microcode_fops,
+       .minor                  = MICROCODE_MINOR,
+       .name                   = "microcode",
+       .fops                   = &microcode_fops,
 };
 
 static int __init microcode_dev_init(void)
@@ -182,9 +245,7 @@ static int __init microcode_dev_init(void)
 
        error = misc_register(&microcode_dev);
        if (error) {
-               printk(KERN_ERR
-                       "microcode: can't misc_register on minor=%d\n",
-                       MICROCODE_MINOR);
+               pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
                return error;
        }
 
@@ -205,42 +266,51 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
 /* fake device for request_firmware */
 static struct platform_device  *microcode_pdev;
 
-static long reload_for_cpu(void *unused)
+static int reload_for_cpu(int cpu)
 {
-       struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
+       struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
        int err = 0;
 
        mutex_lock(&microcode_mutex);
        if (uci->valid) {
-               err = microcode_ops->request_microcode_fw(smp_processor_id(),
-                                                         &microcode_pdev->dev);
-               if (!err)
-                       microcode_ops->apply_microcode(smp_processor_id());
+               enum ucode_state ustate;
+
+               ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+               if (ustate == UCODE_OK)
+                       apply_microcode_on_target(cpu);
+               else
+                       if (ustate == UCODE_ERROR)
+                               err = -EINVAL;
        }
        mutex_unlock(&microcode_mutex);
+
        return err;
 }
 
 static ssize_t reload_store(struct sys_device *dev,
                            struct sysdev_attribute *attr,
-                           const char *buf, size_t sz)
+                           const char *buf, size_t size)
 {
-       char *end;
-       unsigned long val = simple_strtoul(buf, &end, 0);
-       int err = 0;
+       unsigned long val;
        int cpu = dev->id;
+       int ret = 0;
+       char *end;
 
+       val = simple_strtoul(buf, &end, 0);
        if (end == buf)
                return -EINVAL;
+
        if (val == 1) {
                get_online_cpus();
                if (cpu_online(cpu))
-                       err = work_on_cpu(cpu, reload_for_cpu, NULL);
+                       ret = reload_for_cpu(cpu);
                put_online_cpus();
        }
-       if (err)
-               return err;
-       return sz;
+
+       if (!ret)
+               ret = size;
+
+       return ret;
 }
 
 static ssize_t version_show(struct sys_device *dev,
@@ -271,11 +341,11 @@ static struct attribute *mc_default_attrs[] = {
 };
 
 static struct attribute_group mc_attr_group = {
-       .attrs          = mc_default_attrs,
-       .name           = "microcode",
+       .attrs                  = mc_default_attrs,
+       .name                   = "microcode",
 };
 
-static void __microcode_fini_cpu(int cpu)
+static void microcode_fini_cpu(int cpu)
 {
        struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
@@ -283,103 +353,68 @@ static void __microcode_fini_cpu(int cpu)
        uci->valid = 0;
 }
 
-static void microcode_fini_cpu(int cpu)
-{
-       mutex_lock(&microcode_mutex);
-       __microcode_fini_cpu(cpu);
-       mutex_unlock(&microcode_mutex);
-}
-
-static void collect_cpu_info(int cpu)
+static enum ucode_state microcode_resume_cpu(int cpu)
 {
        struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-       memset(uci, 0, sizeof(*uci));
-       if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig))
-               uci->valid = 1;
+       if (!uci->mc)
+               return UCODE_NFOUND;
+
+       pr_debug("microcode: CPU%d updated upon resume\n", cpu);
+       apply_microcode_on_target(cpu);
+
+       return UCODE_OK;
 }
 
-static int microcode_resume_cpu(int cpu)
+static enum ucode_state microcode_init_cpu(int cpu)
 {
-       struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-       struct cpu_signature nsig;
+       enum ucode_state ustate;
 
-       pr_debug("microcode: CPU%d resumed\n", cpu);
+       if (collect_cpu_info(cpu))
+               return UCODE_ERROR;
 
-       if (!uci->mc)
-               return 1;
+       /* --dimm. Trigger a delayed update? */
+       if (system_state != SYSTEM_RUNNING)
+               return UCODE_NFOUND;
 
-       /*
-        * Let's verify that the 'cached' ucode does belong
-        * to this cpu (a bit of paranoia):
-        */
-       if (microcode_ops->collect_cpu_info(cpu, &nsig)) {
-               __microcode_fini_cpu(cpu);
-               printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n",
-                               cpu);
-               return -1;
-       }
+       ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
 
-       if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) {
-               __microcode_fini_cpu(cpu);
-               printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n",
-                               cpu);
-               /* Should we look for a new ucode here? */
-               return 1;
+       if (ustate == UCODE_OK) {
+               pr_debug("microcode: CPU%d updated upon init\n", cpu);
+               apply_microcode_on_target(cpu);
        }
 
-       return 0;
+       return ustate;
 }
 
-static long microcode_update_cpu(void *unused)
+static enum ucode_state microcode_update_cpu(int cpu)
 {
-       struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
-       int err = 0;
+       struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+       enum ucode_state ustate;
 
-       /*
-        * Check if the system resume is in progress (uci->valid != NULL),
-        * otherwise just request a firmware:
-        */
-       if (uci->valid) {
-               err = microcode_resume_cpu(smp_processor_id());
-       } else {
-               collect_cpu_info(smp_processor_id());
-               if (uci->valid && system_state == SYSTEM_RUNNING)
-                       err = microcode_ops->request_microcode_fw(
-                                       smp_processor_id(),
-                                       &microcode_pdev->dev);
-       }
-       if (!err)
-               microcode_ops->apply_microcode(smp_processor_id());
-       return err;
-}
+       if (uci->valid)
+               ustate = microcode_resume_cpu(cpu);
+       else
+               ustate = microcode_init_cpu(cpu);
 
-static int microcode_init_cpu(int cpu)
-{
-       int err;
-       mutex_lock(&microcode_mutex);
-       err = work_on_cpu(cpu, microcode_update_cpu, NULL);
-       mutex_unlock(&microcode_mutex);
-
-       return err;
+       return ustate;
 }
 
 static int mc_sysdev_add(struct sys_device *sys_dev)
 {
        int err, cpu = sys_dev->id;
-       struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
        if (!cpu_online(cpu))
                return 0;
 
        pr_debug("microcode: CPU%d added\n", cpu);
-       memset(uci, 0, sizeof(*uci));
 
        err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
        if (err)
                return err;
 
-       err = microcode_init_cpu(cpu);
+       if (microcode_init_cpu(cpu) == UCODE_ERROR)
+               err = -EINVAL;
 
        return err;
 }
@@ -400,19 +435,30 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
 static int mc_sysdev_resume(struct sys_device *dev)
 {
        int cpu = dev->id;
+       struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
        if (!cpu_online(cpu))
                return 0;
 
-       /* only CPU 0 will apply ucode here */
-       microcode_update_cpu(NULL);
+       /*
+        * All non-bootup cpus are still disabled,
+        * so only CPU 0 will apply ucode here.
+        *
+        * Moreover, there can be no concurrent
+        * updates from any other places at this point.
+        */
+       WARN_ON(cpu != 0);
+
+       if (uci->valid && uci->mc)
+               microcode_ops->apply_microcode(cpu);
+
        return 0;
 }
 
 static struct sysdev_driver mc_sysdev_driver = {
-       .add            = mc_sysdev_add,
-       .remove         = mc_sysdev_remove,
-       .resume         = mc_sysdev_resume,
+       .add                    = mc_sysdev_add,
+       .remove                 = mc_sysdev_remove,
+       .resume                 = mc_sysdev_resume,
 };
 
 static __cpuinit int
@@ -425,15 +471,12 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
        switch (action) {
        case CPU_ONLINE:
        case CPU_ONLINE_FROZEN:
-               if (microcode_init_cpu(cpu))
-                       printk(KERN_ERR "microcode: failed to init CPU%d\n",
-                              cpu);
+               microcode_update_cpu(cpu);
        case CPU_DOWN_FAILED:
        case CPU_DOWN_FAILED_FROZEN:
                pr_debug("microcode: CPU%d added\n", cpu);
                if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
-                       printk(KERN_ERR "microcode: Failed to create the sysfs "
-                               "group for CPU%d\n", cpu);
+                       pr_err("microcode: Failed to create group for CPU%d\n", cpu);
                break;
        case CPU_DOWN_PREPARE:
        case CPU_DOWN_PREPARE_FROZEN:
@@ -465,13 +508,10 @@ static int __init microcode_init(void)
                microcode_ops = init_amd_microcode();
 
        if (!microcode_ops) {
-               printk(KERN_ERR "microcode: no support for this CPU vendor\n");
+               pr_err("microcode: no support for this CPU vendor\n");
                return -ENODEV;
        }
 
-       error = microcode_dev_init();
-       if (error)
-               return error;
        microcode_pdev = platform_device_register_simple("microcode", -1,
                                                         NULL, 0);
        if (IS_ERR(microcode_pdev)) {
@@ -480,23 +520,31 @@ static int __init microcode_init(void)
        }
 
        get_online_cpus();
+       mutex_lock(&microcode_mutex);
+
        error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver);
+
+       mutex_unlock(&microcode_mutex);
        put_online_cpus();
+
        if (error) {
-               microcode_dev_exit();
                platform_device_unregister(microcode_pdev);
                return error;
        }
 
+       error = microcode_dev_init();
+       if (error)
+               return error;
+
        register_hotcpu_notifier(&mc_cpu_notifier);
 
-       printk(KERN_INFO
-              "Microcode Update Driver: v" MICROCODE_VERSION
+       pr_info("Microcode Update Driver: v" MICROCODE_VERSION
               " <tigran@aivazian.fsnet.co.uk>,"
               " Peter Oruba\n");
 
        return 0;
 }
+module_init(microcode_init);
 
 static void __exit microcode_exit(void)
 {
@@ -505,16 +553,17 @@ static void __exit microcode_exit(void)
        unregister_hotcpu_notifier(&mc_cpu_notifier);
 
        get_online_cpus();
+       mutex_lock(&microcode_mutex);
+
        sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver);
+
+       mutex_unlock(&microcode_mutex);
        put_online_cpus();
 
        platform_device_unregister(microcode_pdev);
 
        microcode_ops = NULL;
 
-       printk(KERN_INFO
-              "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
+       pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
 }
-
-module_init(microcode_init);
 module_exit(microcode_exit);
index 149b9ec..0d334dd 100644 (file)
  *             Fix sigmatch() macro to handle old CPUs with pf == 0.
  *             Thanks to Stuart Swales for pointing out this bug.
  */
-#include <linux/platform_device.h>
-#include <linux/capability.h>
-#include <linux/miscdevice.h>
 #include <linux/firmware.h>
-#include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
 #include <linux/uaccess.h>
-#include <linux/vmalloc.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/cpu.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
+#include <linux/vmalloc.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
@@ -150,13 +137,9 @@ struct extended_sigtable {
 
 #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
 
-/* serialize access to the physical write to MSR 0x79 */
-static DEFINE_SPINLOCK(microcode_update_lock);
-
 static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
 {
        struct cpuinfo_x86 *c = &cpu_data(cpu_num);
-       unsigned long flags;
        unsigned int val[2];
 
        memset(csig, 0, sizeof(*csig));
@@ -176,18 +159,14 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
                csig->pf = 1 << ((val[1] >> 18) & 7);
        }
 
-       /* serialize access to the physical write to MSR 0x79 */
-       spin_lock_irqsave(&microcode_update_lock, flags);
-
        wrmsr(MSR_IA32_UCODE_REV, 0, 0);
        /* see notes above for revision 1.07.  Apparent chip bug */
        sync_core();
        /* get the current revision from MSR 0x8B */
        rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev);
-       spin_unlock_irqrestore(&microcode_update_lock, flags);
 
-       pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n",
-                       csig->sig, csig->pf, csig->rev);
+       printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n",
+                       cpu_num, csig->sig, csig->pf, csig->rev);
 
        return 0;
 }
@@ -318,11 +297,10 @@ get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev)
        return 0;
 }
 
-static void apply_microcode(int cpu)
+static int apply_microcode(int cpu)
 {
        struct microcode_intel *mc_intel;
        struct ucode_cpu_info *uci;
-       unsigned long flags;
        unsigned int val[2];
        int cpu_num;
 
@@ -334,10 +312,7 @@ static void apply_microcode(int cpu)
        BUG_ON(cpu_num != cpu);
 
        if (mc_intel == NULL)
-               return;
-
-       /* serialize access to the physical write to MSR 0x79 */
-       spin_lock_irqsave(&microcode_update_lock, flags);
+               return 0;
 
        /* write microcode via MSR 0x79 */
        wrmsr(MSR_IA32_UCODE_WRITE,
@@ -351,30 +326,32 @@ static void apply_microcode(int cpu)
        /* get the current revision from MSR 0x8B */
        rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
 
-       spin_unlock_irqrestore(&microcode_update_lock, flags);
        if (val[1] != mc_intel->hdr.rev) {
-               printk(KERN_ERR "microcode: CPU%d update from revision "
-                               "0x%x to 0x%x failed\n",
-                       cpu_num, uci->cpu_sig.rev, val[1]);
-               return;
+               printk(KERN_ERR "microcode: CPU%d update "
+                               "to revision 0x%x failed\n",
+                       cpu_num, mc_intel->hdr.rev);
+               return -1;
        }
-       printk(KERN_INFO "microcode: CPU%d updated from revision "
-                        "0x%x to 0x%x, date = %04x-%02x-%02x \n",
-               cpu_num, uci->cpu_sig.rev, val[1],
+       printk(KERN_INFO "microcode: CPU%d updated to revision "
+                        "0x%x, date = %04x-%02x-%02x \n",
+               cpu_num, val[1],
                mc_intel->hdr.date & 0xffff,
                mc_intel->hdr.date >> 24,
                (mc_intel->hdr.date >> 16) & 0xff);
 
        uci->cpu_sig.rev = val[1];
+
+       return 0;
 }
 
-static int generic_load_microcode(int cpu, void *data, size_t size,
-               int (*get_ucode_data)(void *, const void *, size_t))
+static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
+                               int (*get_ucode_data)(void *, const void *, size_t))
 {
        struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
        u8 *ucode_ptr = data, *new_mc = NULL, *mc;
        int new_rev = uci->cpu_sig.rev;
        unsigned int leftover = size;
+       enum ucode_state state = UCODE_OK;
 
        while (leftover) {
                struct microcode_header_intel mc_header;
@@ -412,11 +389,15 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
                leftover  -= mc_size;
        }
 
-       if (!new_mc)
+       if (leftover) {
+               if (new_mc)
+                       vfree(new_mc);
+               state = UCODE_ERROR;
                goto out;
+       }
 
-       if (leftover) {
-               vfree(new_mc);
+       if (!new_mc) {
+               state = UCODE_NFOUND;
                goto out;
        }
 
@@ -427,9 +408,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size,
        pr_debug("microcode: CPU%d found a matching microcode update with"
                 " version 0x%x (current=0x%x)\n",
                        cpu, new_rev, uci->cpu_sig.rev);
-
- out:
-       return (int)leftover;
+out:
+       return state;
 }
 
 static int get_ucode_fw(void *to, const void *from, size_t n)
@@ -438,21 +418,19 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
        return 0;
 }
 
-static int request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_fw(int cpu, struct device *device)
 {
        char name[30];
        struct cpuinfo_x86 *c = &cpu_data(cpu);
        const struct firmware *firmware;
-       int ret;
+       enum ucode_state ret;
 
-       /* We should bind the task to the CPU */
-       BUG_ON(cpu != raw_smp_processor_id());
        sprintf(name, "intel-ucode/%02x-%02x-%02x",
                c->x86, c->x86_model, c->x86_mask);
-       ret = request_firmware(&firmware, name, device);
-       if (ret) {
+
+       if (request_firmware(&firmware, name, device)) {
                pr_debug("microcode: data file %s load failed\n", name);
-               return ret;
+               return UCODE_NFOUND;
        }
 
        ret = generic_load_microcode(cpu, (void *)firmware->data,
@@ -468,11 +446,9 @@ static int get_ucode_user(void *to, const void *from, size_t n)
        return copy_from_user(to, from, n);
 }
 
-static int request_microcode_user(int cpu, const void __user *buf, size_t size)
+static enum ucode_state
+request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
-       /* We should bind the task to the CPU */
-       BUG_ON(cpu != raw_smp_processor_id());
-
        return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
 }
 
index 70fd7e4..651c93b 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/smp.h>
+#include <linux/pci.h>
 
 #include <asm/mtrr.h>
 #include <asm/mpspec.h>
@@ -870,24 +871,17 @@ static
 inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
 #endif /* CONFIG_X86_IO_APIC */
 
-static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
-                     int count)
+static int
+check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 {
-       if (!mpc_new_phys) {
-               pr_info("No spare slots, try to append...take your risk, "
-                       "new mpc_length %x\n", count);
-       } else {
-               if (count <= mpc_new_length)
-                       pr_info("No spare slots, try to append..., "
-                               "new mpc_length %x\n", count);
-               else {
-                       pr_err("mpc_new_length %lx is too small\n",
-                               mpc_new_length);
-                       return -1;
-               }
+       int ret = 0;
+
+       if (!mpc_new_phys || count <= mpc_new_length) {
+               WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
+               return -1;
        }
 
-       return 0;
+       return ret;
 }
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
@@ -946,7 +940,7 @@ static int  __init replace_intsrc_all(struct mpc_table *mpc,
                } else {
                        struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
                        count += sizeof(struct mpc_intsrc);
-                       if (!check_slot(mpc_new_phys, mpc_new_length, count))
+                       if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
                                goto out;
                        assign_to_mpc_intsrc(&mp_irqs[i], m);
                        mpc->length = count;
@@ -963,11 +957,14 @@ out:
        return 0;
 }
 
-static int __initdata enable_update_mptable;
+int enable_update_mptable;
 
 static int __init update_mptable_setup(char *str)
 {
        enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+       pci_routeirq = 1;
+#endif
        return 0;
 }
 early_param("update_mptable", update_mptable_setup);
@@ -980,6 +977,9 @@ static int __initdata alloc_mptable;
 static int __init parse_alloc_mptable_opt(char *p)
 {
        enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+       pci_routeirq = 1;
+#endif
        alloc_mptable = 1;
        if (!p)
                return 0;
index 9faf43b..70ec9b9 100644 (file)
@@ -248,18 +248,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA
 
 static inline void enter_lazy(enum paravirt_lazy_mode mode)
 {
-       BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
-       BUG_ON(preemptible());
+       BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
 
-       __get_cpu_var(paravirt_lazy_mode) = mode;
+       percpu_write(paravirt_lazy_mode, mode);
 }
 
-void paravirt_leave_lazy(enum paravirt_lazy_mode mode)
+static void leave_lazy(enum paravirt_lazy_mode mode)
 {
-       BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode);
-       BUG_ON(preemptible());
+       BUG_ON(percpu_read(paravirt_lazy_mode) != mode);
 
-       __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
+       percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
 }
 
 void paravirt_enter_lazy_mmu(void)
@@ -269,22 +267,36 @@ void paravirt_enter_lazy_mmu(void)
 
 void paravirt_leave_lazy_mmu(void)
 {
-       paravirt_leave_lazy(PARAVIRT_LAZY_MMU);
+       leave_lazy(PARAVIRT_LAZY_MMU);
 }
 
-void paravirt_enter_lazy_cpu(void)
+void paravirt_start_context_switch(struct task_struct *prev)
 {
+       BUG_ON(preemptible());
+
+       if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
+               arch_leave_lazy_mmu_mode();
+               set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
+       }
        enter_lazy(PARAVIRT_LAZY_CPU);
 }
 
-void paravirt_leave_lazy_cpu(void)
+void paravirt_end_context_switch(struct task_struct *next)
 {
-       paravirt_leave_lazy(PARAVIRT_LAZY_CPU);
+       BUG_ON(preemptible());
+
+       leave_lazy(PARAVIRT_LAZY_CPU);
+
+       if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
+               arch_enter_lazy_mmu_mode();
 }
 
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
 {
-       return __get_cpu_var(paravirt_lazy_mode);
+       if (in_interrupt())
+               return PARAVIRT_LAZY_NONE;
+
+       return percpu_read(paravirt_lazy_mode);
 }
 
 void arch_flush_lazy_mmu_mode(void)
@@ -292,7 +304,6 @@ void arch_flush_lazy_mmu_mode(void)
        preempt_disable();
 
        if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
-               WARN_ON(preempt_count() == 1);
                arch_leave_lazy_mmu_mode();
                arch_enter_lazy_mmu_mode();
        }
@@ -300,19 +311,6 @@ void arch_flush_lazy_mmu_mode(void)
        preempt_enable();
 }
 
-void arch_flush_lazy_cpu_mode(void)
-{
-       preempt_disable();
-
-       if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
-               WARN_ON(preempt_count() == 1);
-               arch_leave_lazy_cpu_mode();
-               arch_enter_lazy_cpu_mode();
-       }
-
-       preempt_enable();
-}
-
 struct pv_info pv_info = {
        .name = "bare hardware",
        .paravirt_enabled = 0,
@@ -404,10 +402,8 @@ struct pv_cpu_ops pv_cpu_ops = {
        .set_iopl_mask = native_set_iopl_mask,
        .io_delay = native_io_delay,
 
-       .lazy_mode = {
-               .enter = paravirt_nop,
-               .leave = paravirt_nop,
-       },
+       .start_context_switch = paravirt_nop,
+       .end_context_switch = paravirt_nop,
 };
 
 struct pv_apic_ops pv_apic_ops = {
index 755c21e..971a3be 100644 (file)
@@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = {
 
 static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
 
-/* enable this to stress test the chip's TCE cache */
-#ifdef CONFIG_IOMMU_DEBUG
-static int debugging = 1;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
-       int expected, unsigned long start, unsigned long end)
-{
-       unsigned long idx = start;
-
-       BUG_ON(start >= end);
-
-       while (idx < end) {
-               if (!!test_bit(idx, bitmap) != expected)
-                       return idx;
-               ++idx;
-       }
-
-       /* all bits have the expected value */
-       return ~0UL;
-}
-#else /* debugging is disabled */
-static int debugging;
-
-static inline unsigned long verify_bit_range(unsigned long* bitmap,
-       int expected, unsigned long start, unsigned long end)
-{
-       return ~0UL;
-}
-
-#endif /* CONFIG_IOMMU_DEBUG */
-
 static inline int translation_enabled(struct iommu_table *tbl)
 {
        /* only PHBs with translation enabled have an IOMMU table */
@@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
 {
        unsigned long index;
        unsigned long end;
-       unsigned long badbit;
        unsigned long flags;
 
        index = start_addr >> PAGE_SHIFT;
@@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl,
 
        spin_lock_irqsave(&tbl->it_lock, flags);
 
-       badbit = verify_bit_range(tbl->it_map, 0, index, end);
-       if (badbit != ~0UL) {
-               if (printk_ratelimit())
-                       printk(KERN_ERR "Calgary: entry already allocated at "
-                              "0x%lx tbl %p dma 0x%lx npages %u\n",
-                              badbit, tbl, start_addr, npages);
-       }
-
        iommu_area_reserve(tbl->it_map, index, npages);
 
        spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
        unsigned int npages)
 {
        unsigned long entry;
-       unsigned long badbit;
        unsigned long badend;
        unsigned long flags;
 
@@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 
        spin_lock_irqsave(&tbl->it_lock, flags);
 
-       badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
-       if (badbit != ~0UL) {
-               if (printk_ratelimit())
-                       printk(KERN_ERR "Calgary: bit is off at 0x%lx "
-                              "tbl %p dma 0x%Lx entry 0x%lx npages %u\n",
-                              badbit, tbl, dma_addr, entry, npages);
-       }
-
        iommu_area_free(tbl->it_map, entry, npages);
 
        spin_unlock_irqrestore(&tbl->it_lock, flags);
@@ -1488,9 +1439,8 @@ void __init detect_calgary(void)
                iommu_detected = 1;
                calgary_detected = 1;
                printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
-               printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, "
-                      "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size,
-                      debugging ? "enabled" : "disabled");
+               printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
+                      specified_table_size);
 
                /* swiotlb for devices that aren't behind the Calgary. */
                if (max_pfn > MAX_DMA32_PFN)
index b284b58..cfd9f90 100644 (file)
@@ -144,48 +144,21 @@ static void flush_gart(void)
 }
 
 #ifdef CONFIG_IOMMU_LEAK
-
-#define SET_LEAK(x)                                                    \
-       do {                                                            \
-               if (iommu_leak_tab)                                     \
-                       iommu_leak_tab[x] = __builtin_return_address(0);\
-       } while (0)
-
-#define CLEAR_LEAK(x)                                                  \
-       do {                                                            \
-               if (iommu_leak_tab)                                     \
-                       iommu_leak_tab[x] = NULL;                       \
-       } while (0)
-
 /* Debugging aid for drivers that don't free their IOMMU tables */
-static void **iommu_leak_tab;
 static int leak_trace;
 static int iommu_leak_pages = 20;
 
 static void dump_leak(void)
 {
-       int i;
        static int dump;
 
-       if (dump || !iommu_leak_tab)
+       if (dump)
                return;
        dump = 1;
-       show_stack(NULL, NULL);
 
-       /* Very crude. dump some from the end of the table too */
-       printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n",
-              iommu_leak_pages);
-       for (i = 0; i < iommu_leak_pages; i += 2) {
-               printk(KERN_DEBUG "%lu: ", iommu_pages-i);
-               printk_address((unsigned long) iommu_leak_tab[iommu_pages-i],
-                               0);
-               printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' ');
-       }
-       printk(KERN_DEBUG "\n");
+       show_stack(NULL, NULL);
+       debug_dma_dump_mappings(NULL);
 }
-#else
-# define SET_LEAK(x)
-# define CLEAR_LEAK(x)
 #endif
 
 static void iommu_full(struct device *dev, size_t size, int dir)
@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
 
        for (i = 0; i < npages; i++) {
                iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem);
-               SET_LEAK(iommu_page + i);
                phys_mem += PAGE_SIZE;
        }
        return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK);
@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
        npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
        for (i = 0; i < npages; i++) {
                iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
-               CLEAR_LEAK(iommu_page + i);
        }
        free_iommu(iommu_page, npages);
 }
@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
                pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE);
                while (pages--) {
                        iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
-                       SET_LEAK(iommu_page);
                        addr += PAGE_SIZE;
                        iommu_page++;
                }
@@ -688,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
 
        agp_gatt_table = gatt;
 
-       enable_gart_translations();
-
        error = sysdev_class_register(&gart_sysdev_class);
        if (!error)
                error = sysdev_register(&device_gart);
@@ -801,11 +769,12 @@ void __init gart_iommu_init(void)
 
 #ifdef CONFIG_IOMMU_LEAK
        if (leak_trace) {
-               iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
-                                 get_order(iommu_pages*sizeof(void *)));
-               if (!iommu_leak_tab)
+               int ret;
+
+               ret = dma_debug_resize_entries(iommu_pages);
+               if (ret)
                        printk(KERN_DEBUG
-                              "PCI-DMA: Cannot allocate leak trace area\n");
+                              "PCI-DMA: Cannot trace all the entries\n");
        }
 #endif
 
@@ -845,6 +814,14 @@ void __init gart_iommu_init(void)
         * the pages as Not-Present:
         */
        wbinvd();
+       
+       /*
+        * Now all caches are flushed and we can safely enable
+        * GART hardware.  Doing it early leaves the possibility
+        * of stale cache entries that can lead to GART PTE
+        * errors.
+        */
+       enable_gart_translations();
 
        /*
         * Try to workaround a bug (thanks to BenH):
index 221a385..a1712f2 100644 (file)
@@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
        return paddr;
 }
 
-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
        return baddr;
 }
index ca98915..3bb2be1 100644 (file)
@@ -8,12 +8,15 @@
 #include <linux/module.h>
 #include <linux/pm.h>
 #include <linux/clockchips.h>
+#include <linux/random.h>
 #include <trace/power.h>
 #include <asm/system.h>
 #include <asm/apic.h>
+#include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/ds.h>
 
 unsigned long idle_halt;
 EXPORT_SYMBOL(idle_halt);
@@ -45,6 +48,8 @@ void free_thread_xstate(struct task_struct *tsk)
                kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
                tsk->thread.xstate = NULL;
        }
+
+       WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
 
 void free_thread_info(struct thread_info *ti)
@@ -83,8 +88,6 @@ void exit_thread(void)
                put_cpu();
                kfree(bp);
        }
-
-       ds_exit_thread(current);
 }
 
 void flush_thread(void)
@@ -613,3 +616,16 @@ static int __init idle_setup(char *str)
 }
 early_param("idle", idle_setup);
 
+unsigned long arch_align_stack(unsigned long sp)
+{
+       if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+               sp -= get_random_int() % 8192;
+       return sp & ~0xf;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+       unsigned long range_end = mm->brk + 0x02000000;
+       return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
index 76f8f84..59f4524 100644 (file)
@@ -9,8 +9,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
@@ -33,7 +31,6 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/personality.h>
 #include <linux/tick.h>
 #include <linux/percpu.h>
@@ -290,7 +287,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
                p->thread.io_bitmap_max = 0;
        }
 
-       ds_copy_thread(p, current);
+       clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+       p->thread.ds_ctx = NULL;
 
        clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
        p->thread.debugctlmsr = 0;
@@ -407,7 +405,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * done before math_state_restore, so the TS bit is up
         * to date.
         */
-       arch_leave_lazy_cpu_mode();
+       arch_end_context_switch(next_p);
 
        /* If the task has used fpu the last 5 timeslices, just do a full
         * restore of the math state immediately to avoid the trap; the
@@ -497,15 +495,3 @@ unsigned long get_wchan(struct task_struct *p)
        return 0;
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-       if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-               sp -= get_random_int() % 8192;
-       return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-       unsigned long range_end = mm->brk + 0x02000000;
-       return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
index b751a41..ebefb54 100644 (file)
@@ -14,8 +14,6 @@
  * This file handles the architecture-dependent parts of process handling..
  */
 
-#include <stdarg.h>
-
 #include <linux/stackprotector.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
@@ -32,7 +30,6 @@
 #include <linux/delay.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
-#include <linux/random.h>
 #include <linux/notifier.h>
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
@@ -335,7 +332,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
                        goto out;
        }
 
-       ds_copy_thread(p, me);
+       clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+       p->thread.ds_ctx = NULL;
 
        clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
        p->thread.debugctlmsr = 0;
@@ -428,7 +426,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         * done before math_state_restore, so the TS bit is up
         * to date.
         */
-       arch_leave_lazy_cpu_mode();
+       arch_end_context_switch(next_p);
 
        /*
         * Switch FS and GS.
@@ -660,15 +658,3 @@ long sys_arch_prctl(int code, unsigned long addr)
        return do_arch_prctl(current, code, addr);
 }
 
-unsigned long arch_align_stack(unsigned long sp)
-{
-       if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
-               sp -= get_random_int() % 8192;
-       return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-       unsigned long range_end = mm->brk + 0x02000000;
-       return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
index 23b7c8f..09ecbde 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/signal.h>
+#include <linux/workqueue.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target,
 }
 
 #ifdef CONFIG_X86_PTRACE_BTS
+/*
+ * A branch trace store context.
+ *
+ * Contexts may only be installed by ptrace_bts_config() and only for
+ * ptraced tasks.
+ *
+ * Contexts are destroyed when the tracee is detached from the tracer.
+ * The actual destruction work requires interrupts enabled, so the
+ * work is deferred and will be scheduled during __ptrace_unlink().
+ *
+ * Contexts hold an additional task_struct reference on the traced
+ * task, as well as a reference on the tracer's mm.
+ *
+ * Ptrace already holds a task_struct for the duration of ptrace operations,
+ * but since destruction is deferred, it may be executed after both
+ * tracer and tracee exited.
+ */
+struct bts_context {
+       /* The branch trace handle. */
+       struct bts_tracer       *tracer;
+
+       /* The buffer used to store the branch trace and its size. */
+       void                    *buffer;
+       unsigned int            size;
+
+       /* The mm that paid for the above buffer. */
+       struct mm_struct        *mm;
+
+       /* The task this context belongs to. */
+       struct task_struct      *task;
+
+       /* The signal to send on a bts buffer overflow. */
+       unsigned int            bts_ovfl_signal;
+
+       /* The work struct to destroy a context. */
+       struct work_struct      work;
+};
+
+static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
+{
+       void *buffer = NULL;
+       int err = -ENOMEM;
+
+       err = account_locked_memory(current->mm, current->signal->rlim, size);
+       if (err < 0)
+               return err;
+
+       buffer = kzalloc(size, GFP_KERNEL);
+       if (!buffer)
+               goto out_refund;
+
+       context->buffer = buffer;
+       context->size = size;
+       context->mm = get_task_mm(current);
+
+       return 0;
+
+ out_refund:
+       refund_locked_memory(current->mm, size);
+       return err;
+}
+
+static inline void free_bts_buffer(struct bts_context *context)
+{
+       if (!context->buffer)
+               return;
+
+       kfree(context->buffer);
+       context->buffer = NULL;
+
+       refund_locked_memory(context->mm, context->size);
+       context->size = 0;
+
+       mmput(context->mm);
+       context->mm = NULL;
+}
+
+static void free_bts_context_work(struct work_struct *w)
+{
+       struct bts_context *context;
+
+       context = container_of(w, struct bts_context, work);
+
+       ds_release_bts(context->tracer);
+       put_task_struct(context->task);
+       free_bts_buffer(context);
+       kfree(context);
+}
+
+static inline void free_bts_context(struct bts_context *context)
+{
+       INIT_WORK(&context->work, free_bts_context_work);
+       schedule_work(&context->work);
+}
+
+static inline struct bts_context *alloc_bts_context(struct task_struct *task)
+{
+       struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
+       if (context) {
+               context->task = task;
+               task->bts = context;
+
+               get_task_struct(task);
+       }
+
+       return context;
+}
+
 static int ptrace_bts_read_record(struct task_struct *child, size_t index,
                                  struct bts_struct __user *out)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
        struct bts_struct bts;
        const unsigned char *at;
        int error;
 
-       trace = ds_read_bts(child->bts);
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        at = trace->ds.top - ((index + 1) * trace->ds.size);
        if ((void *)at < trace->ds.begin)
@@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index,
        if (!trace->read)
                return -EOPNOTSUPP;
 
-       error = trace->read(child->bts, at, &bts);
+       error = trace->read(context->tracer, at, &bts);
        if (error < 0)
                return error;
 
@@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child,
                            long size,
                            struct bts_struct __user *out)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
        const unsigned char *at;
        int error, drained = 0;
 
-       trace = ds_read_bts(child->bts);
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        if (!trace->read)
                return -EOPNOTSUPP;
@@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child,
        for (at = trace->ds.begin; (void *)at < trace->ds.top;
             out++, drained++, at += trace->ds.size) {
                struct bts_struct bts;
-               int error;
 
-               error = trace->read(child->bts, at, &bts);
+               error = trace->read(context->tracer, at, &bts);
                if (error < 0)
                        return error;
 
@@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child,
 
        memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-       error = ds_reset_bts(child->bts);
+       error = ds_reset_bts(context->tracer);
        if (error < 0)
                return error;
 
        return drained;
 }
 
-static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size)
-{
-       child->bts_buffer = alloc_locked_buffer(size);
-       if (!child->bts_buffer)
-               return -ENOMEM;
-
-       child->bts_size = size;
-
-       return 0;
-}
-
-static void ptrace_bts_free_buffer(struct task_struct *child)
-{
-       free_locked_buffer(child->bts_buffer, child->bts_size);
-       child->bts_buffer = NULL;
-       child->bts_size = 0;
-}
-
 static int ptrace_bts_config(struct task_struct *child,
                             long cfg_size,
                             const struct ptrace_bts_config __user *ucfg)
 {
+       struct bts_context *context;
        struct ptrace_bts_config cfg;
        unsigned int flags = 0;
 
@@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child,
        if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
                return -EFAULT;
 
-       if (child->bts) {
-               ds_release_bts(child->bts);
-               child->bts = NULL;
-       }
+       context = child->bts;
+       if (!context)
+               context = alloc_bts_context(child);
+       if (!context)
+               return -ENOMEM;
 
        if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
                if (!cfg.signal)
                        return -EINVAL;
 
-               child->thread.bts_ovfl_signal = cfg.signal;
                return -EOPNOTSUPP;
+               context->bts_ovfl_signal = cfg.signal;
        }
 
-       if ((cfg.flags & PTRACE_BTS_O_ALLOC) &&
-           (cfg.size != child->bts_size)) {
-               int error;
+       ds_release_bts(context->tracer);
+       context->tracer = NULL;
 
-               ptrace_bts_free_buffer(child);
+       if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
+               int err;
 
-               error = ptrace_bts_allocate_buffer(child, cfg.size);
-               if (error < 0)
-                       return error;
+               free_bts_buffer(context);
+               if (!cfg.size)
+                       return 0;
+
+               err = alloc_bts_buffer(context, cfg.size);
+               if (err < 0)
+                       return err;
        }
 
        if (cfg.flags & PTRACE_BTS_O_TRACE)
@@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child,
        if (cfg.flags & PTRACE_BTS_O_SCHED)
                flags |= BTS_TIMESTAMPS;
 
-       child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size,
-                                   /* ovfl = */ NULL, /* th = */ (size_t)-1,
-                                   flags);
-       if (IS_ERR(child->bts)) {
-               int error = PTR_ERR(child->bts);
-
-               ptrace_bts_free_buffer(child);
-               child->bts = NULL;
+       context->tracer =
+               ds_request_bts_task(child, context->buffer, context->size,
+                                   NULL, (size_t)-1, flags);
+       if (unlikely(IS_ERR(context->tracer))) {
+               int error = PTR_ERR(context->tracer);
 
+               free_bts_buffer(context);
+               context->tracer = NULL;
                return error;
        }
 
@@ -727,20 +832,25 @@ static int ptrace_bts_status(struct task_struct *child,
                             long cfg_size,
                             struct ptrace_bts_config __user *ucfg)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
        struct ptrace_bts_config cfg;
 
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
        if (cfg_size < sizeof(cfg))
                return -EIO;
 
-       trace = ds_read_bts(child->bts);
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        memset(&cfg, 0, sizeof(cfg));
-       cfg.size = trace->ds.end - trace->ds.begin;
-       cfg.signal = child->thread.bts_ovfl_signal;
-       cfg.bts_size = sizeof(struct bts_struct);
+       cfg.size        = trace->ds.end - trace->ds.begin;
+       cfg.signal      = context->bts_ovfl_signal;
+       cfg.bts_size    = sizeof(struct bts_struct);
 
        if (cfg.signal)
                cfg.flags |= PTRACE_BTS_O_SIGNAL;
@@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child,
 
 static int ptrace_bts_clear(struct task_struct *child)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
 
-       trace = ds_read_bts(child->bts);
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
 
-       return ds_reset_bts(child->bts);
+       return ds_reset_bts(context->tracer);
 }
 
 static int ptrace_bts_size(struct task_struct *child)
 {
+       struct bts_context *context;
        const struct bts_trace *trace;
 
-       trace = ds_read_bts(child->bts);
+       context = child->bts;
+       if (!context)
+               return -ESRCH;
+
+       trace = ds_read_bts(context->tracer);
        if (!trace)
-               return -EPERM;
+               return -ESRCH;
 
        return (trace->ds.top - trace->ds.begin) / trace->ds.size;
 }
 
-static void ptrace_bts_fork(struct task_struct *tsk)
-{
-       tsk->bts = NULL;
-       tsk->bts_buffer = NULL;
-       tsk->bts_size = 0;
-       tsk->thread.bts_ovfl_signal = 0;
-}
-
-static void ptrace_bts_untrace(struct task_struct *child)
+/*
+ * Called from __ptrace_unlink() after the child has been moved back
+ * to its original parent.
+ */
+void ptrace_bts_untrace(struct task_struct *child)
 {
        if (unlikely(child->bts)) {
-               ds_release_bts(child->bts);
+               free_bts_context(child->bts);
                child->bts = NULL;
-
-               /* We cannot update total_vm and locked_vm since
-                  child's mm is already gone. But we can reclaim the
-                  memory. */
-               kfree(child->bts_buffer);
-               child->bts_buffer = NULL;
-               child->bts_size = 0;
        }
 }
-
-static void ptrace_bts_detach(struct task_struct *child)
-{
-       /*
-        * Ptrace_detach() races with ptrace_untrace() in case
-        * the child dies and is reaped by another thread.
-        *
-        * We only do the memory accounting at this point and
-        * leave the buffer deallocation and the bts tracer
-        * release to ptrace_bts_untrace() which will be called
-        * later on with tasklist_lock held.
-        */
-       release_locked_buffer(child->bts_buffer, child->bts_size);
-}
-#else
-static inline void ptrace_bts_fork(struct task_struct *tsk) {}
-static inline void ptrace_bts_detach(struct task_struct *child) {}
-static inline void ptrace_bts_untrace(struct task_struct *child) {}
 #endif /* CONFIG_X86_PTRACE_BTS */
 
-void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-       ptrace_bts_fork(child);
-}
-
-void x86_ptrace_untrace(struct task_struct *child)
-{
-       ptrace_bts_untrace(child);
-}
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child)
 #ifdef TIF_SYSCALL_EMU
        clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
 #endif
-       ptrace_bts_detach(child);
 }
 
 #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
index 7563b31..af71d06 100644 (file)
@@ -491,5 +491,42 @@ void force_hpet_resume(void)
                break;
        }
 }
+#endif
+
+#if defined(CONFIG_PCI) && defined(CONFIG_NUMA)
+/* Set correct numa_node information for AMD NB functions */
+static void __init quirk_amd_nb_node(struct pci_dev *dev)
+{
+       struct pci_dev *nb_ht;
+       unsigned int devfn;
+       u32 val;
+
+       devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
+       nb_ht = pci_get_slot(dev->bus, devfn);
+       if (!nb_ht)
+               return;
+
+       pci_read_config_dword(nb_ht, 0x60, &val);
+       set_dev_node(&dev->dev, val & 7);
+       pci_dev_put(dev);
+}
 
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
+                       quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
+                       quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
+                       quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC,
+                       quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT,
+                       quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP,
+                       quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM,
+                       quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC,
+                       quirk_amd_nb_node);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK,
+                       quirk_amd_nb_node);
 #endif
index 667188e..d2d1ce8 100644 (file)
@@ -192,6 +192,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
                        DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
                },
        },
+       {   /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */
+               .callback = set_bios_reboot,
+               .ident = "Dell OptiPlex 360",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"),
+                       DMI_MATCH(DMI_BOARD_NAME, "0T656F"),
+               },
+       },
        {       /* Handle problems with rebooting on Dell 2400's */
                .callback = set_bios_reboot,
                .ident = "Dell PowerEdge 2400",
index b415843..d1c636b 100644 (file)
 #define ARCH_SETUP
 #endif
 
+/*
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
+ * apertures, ACPI and other tables without having to play with fixmaps.
+ */
+unsigned long max_low_pfn_mapped;
+unsigned long max_pfn_mapped;
+
 RESERVE_BRK(dmi_alloc, 65536);
 
 unsigned int boot_cpu_id __read_mostly;
@@ -214,8 +222,8 @@ unsigned long mmu_cr4_features;
 unsigned long mmu_cr4_features = X86_CR4_PAE;
 #endif
 
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
+/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
+int bootloader_type, bootloader_version;
 
 /*
  * Setup options
@@ -706,6 +714,12 @@ void __init setup_arch(char **cmdline_p)
 #endif
        saved_video_mode = boot_params.hdr.vid_mode;
        bootloader_type = boot_params.hdr.type_of_loader;
+       if ((bootloader_type >> 4) == 0xe) {
+               bootloader_type &= 0xf;
+               bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
+       }
+       bootloader_version  = bootloader_type & 0xf;
+       bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
 
 #ifdef CONFIG_BLK_DEV_RAM
        rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
@@ -854,12 +868,16 @@ void __init setup_arch(char **cmdline_p)
                max_low_pfn = max_pfn;
 
        high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+       max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
 #endif
 
 #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
        setup_bios_corruption_check();
 #endif
 
+       printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
+                       max_pfn_mapped<<PAGE_SHIFT);
+
        reserve_brk();
 
        /* max_pfn_mapped is updated here */
@@ -996,24 +1014,6 @@ void __init setup_arch(char **cmdline_p)
 
 #ifdef CONFIG_X86_32
 
-/**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- *     Perform any necessary interrupt initialisation prior to setting up
- *     the "ordinary" interrupt call gates.  For legacy reasons, the ISA
- *     interrupts should be initialised here if the machine emulates a PC
- *     in any way.
- **/
-void __init x86_quirk_pre_intr_init(void)
-{
-       if (x86_quirks->arch_pre_intr_init) {
-               if (x86_quirks->arch_pre_intr_init())
-                       return;
-       }
-       init_ISA_irqs();
-}
-
 /**
  * x86_quirk_intr_init - post gate setup interrupt initialisation
  *
index 8f0e13b..9c3f082 100644 (file)
@@ -425,6 +425,14 @@ void __init setup_per_cpu_areas(void)
        early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
+#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
+       /*
+        * make sure boot cpu node_number is right, when boot cpu is on the
+        * node that doesn't have mem installed
+        */
+       per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id);
+#endif
+
        /* Setup node to cpumask map */
        setup_node_to_cpumask_map();
 
index 13f33ea..f6db48c 100644 (file)
@@ -193,19 +193,19 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
 }
 
 struct smp_ops smp_ops = {
-       .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
-       .smp_prepare_cpus = native_smp_prepare_cpus,
-       .smp_cpus_done = native_smp_cpus_done,
+       .smp_prepare_boot_cpu   = native_smp_prepare_boot_cpu,
+       .smp_prepare_cpus       = native_smp_prepare_cpus,
+       .smp_cpus_done          = native_smp_cpus_done,
 
-       .smp_send_stop = native_smp_send_stop,
-       .smp_send_reschedule = native_smp_send_reschedule,
+       .smp_send_stop          = native_smp_send_stop,
+       .smp_send_reschedule    = native_smp_send_reschedule,
 
-       .cpu_up = native_cpu_up,
-       .cpu_die = native_cpu_die,
-       .cpu_disable = native_cpu_disable,
-       .play_dead = native_play_dead,
+       .cpu_up                 = native_cpu_up,
+       .cpu_die                = native_cpu_die,
+       .cpu_disable            = native_cpu_disable,
+       .play_dead              = native_play_dead,
 
-       .send_call_func_ipi = native_send_call_func_ipi,
+       .send_call_func_ipi     = native_send_call_func_ipi,
        .send_call_func_single_ipi = native_send_call_func_single_ipi,
 };
 EXPORT_SYMBOL_GPL(smp_ops);
index 58d24ef..7c80007 100644 (file)
@@ -504,7 +504,7 @@ void __inquire_remote_apic(int apicid)
  * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this
  * won't ... remember to clear down the APIC, etc later.
  */
-int __devinit
+int __cpuinit
 wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
 {
        unsigned long send_status, accept_status = 0;
@@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
        return (send_status | accept_status);
 }
 
-int __devinit
+static int __cpuinit
 wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
 {
        unsigned long send_status, accept_status = 0;
@@ -822,10 +822,12 @@ do_rest:
        /* mark "stuck" area as not stuck */
        *((volatile unsigned long *)trampoline_base) = 0;
 
-       /*
-        * Cleanup possible dangling ends...
-        */
-       smpboot_restore_warm_reset_vector();
+       if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
+               /*
+                * Cleanup possible dangling ends...
+                */
+               smpboot_restore_warm_reset_vector();
+       }
 
        return boot_error;
 }
@@ -990,10 +992,12 @@ static int __init smp_sanity_check(unsigned max_cpus)
         */
        if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
            !cpu_has_apic) {
-               printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n",
-                       boot_cpu_physical_apicid);
-               printk(KERN_ERR "... forcing use of dummy APIC emulation."
+               if (!disable_apic) {
+                       pr_err("BIOS bug, local APIC #%d not detected!...\n",
+                               boot_cpu_physical_apicid);
+                       pr_err("... forcing use of dummy APIC emulation."
                                "(tell your hw vendor)\n");
+               }
                smpboot_clear_io_apic();
                arch_disable_smp_support();
                return -1;
index f7bddc2..4aaf7e4 100644 (file)
@@ -20,7 +20,7 @@ save_stack_warning_symbol(void *data, char *msg, unsigned long symbol)
 
 static int save_stack_stack(void *data, char *name)
 {
-       return -1;
+       return 0;
 }
 
 static void save_stack_address(void *data, unsigned long addr, int reliable)
index ff5c873..734f92c 100644 (file)
@@ -334,3 +334,4 @@ ENTRY(sys_call_table)
        .long sys_inotify_init1
        .long sys_preadv
        .long sys_pwritev
+       .long sys_rt_tgsigqueueinfo     /* 335 */
index ed0c337..124d40c 100644 (file)
@@ -715,7 +715,12 @@ uv_activation_descriptor_init(int node, int pnode)
        struct bau_desc *adp;
        struct bau_desc *ad2;
 
-       adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node);
+       /*
+        * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR)
+        * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade
+        */
+       adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)*
+               UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node);
        BUG_ON(!adp);
 
        pa = uv_gpa(adp); /* need the real nasid*/
@@ -729,7 +734,13 @@ uv_activation_descriptor_init(int node, int pnode)
                                      (n << UV_DESC_BASE_PNODE_SHIFT | m));
        }
 
-       for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
+       /*
+        * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each
+        * cpu even though we only use the first one; one descriptor can
+        * describe a broadcast to 256 nodes.
+        */
+       for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR);
+               i++, ad2++) {
                memset(ad2, 0, sizeof(struct bau_desc));
                ad2->header.sw_ack_flag = 1;
                /*
@@ -832,7 +843,7 @@ static int __init uv_bau_init(void)
                return 0;
 
        for_each_possible_cpu(cur_cpu)
-               alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
+               zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
                                       GFP_KERNEL, cpu_to_node(cur_cpu));
 
        uv_bau_retry_limit = 1;
index a1d2883..ede0245 100644 (file)
@@ -839,9 +839,6 @@ asmlinkage void math_state_restore(void)
        }
 
        clts();                         /* Allow maths ops (or we recurse) */
-#ifdef CONFIG_X86_32
-       restore_fpu(tsk);
-#else
        /*
         * Paranoid restore. send a SIGSEGV if we fail to restore the state.
         */
@@ -850,7 +847,7 @@ asmlinkage void math_state_restore(void)
                force_sig(SIGSEGV, tsk);
                return;
        }
-#endif
+
        thread->status |= TS_USEDFPU;   /* So we fnsave on switch_to() */
        tsk->fpu_counter++;
 }
@@ -969,11 +966,8 @@ void __init trap_init(void)
        for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
                set_bit(i, used_vectors);
 
-#ifdef CONFIG_X86_64
        set_bit(IA32_SYSCALL_VECTOR, used_vectors);
-#else
-       set_bit(SYSCALL_VECTOR, used_vectors);
-#endif
+
        /*
         * Should be a barrier for any external CPU state:
         */
index d57de05..3e1c057 100644 (file)
@@ -384,13 +384,13 @@ unsigned long native_calibrate_tsc(void)
 {
        u64 tsc1, tsc2, delta, ref1, ref2;
        unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
-       unsigned long flags, latch, ms, fast_calibrate, tsc_khz;
+       unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz;
        int hpet = is_hpet_enabled(), i, loopmin;
 
-       tsc_khz = get_hypervisor_tsc_freq();
-       if (tsc_khz) {
+       hv_tsc_khz = get_hypervisor_tsc_freq();
+       if (hv_tsc_khz) {
                printk(KERN_INFO "TSC: Frequency read from the hypervisor\n");
-               return tsc_khz;
+               return hv_tsc_khz;
        }
 
        local_irq_save(flags);
@@ -710,7 +710,16 @@ static cycle_t read_tsc(struct clocksource *cs)
 #ifdef CONFIG_X86_64
 static cycle_t __vsyscall_fn vread_tsc(void)
 {
-       cycle_t ret = (cycle_t)vget_cycles();
+       cycle_t ret;
+
+       /*
+        * Surround the RDTSC by barriers, to make sure it's not
+        * speculated to outside the seqlock critical section and
+        * does not cause time warps:
+        */
+       rdtsc_barrier();
+       ret = (cycle_t)vget_cycles();
+       rdtsc_barrier();
 
        return ret >= __vsyscall_gtod_data.clock.cycle_last ?
                ret : __vsyscall_gtod_data.clock.cycle_last;
index bf36328..027b5b4 100644 (file)
@@ -34,6 +34,7 @@ static __cpuinitdata atomic_t stop_count;
  * of a critical section, to be able to prove TSC time-warps:
  */
 static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+
 static __cpuinitdata cycles_t last_tsc;
 static __cpuinitdata cycles_t max_warp;
 static __cpuinitdata int nr_warps;
@@ -113,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu)
                return;
 
        if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
-               printk(KERN_INFO
-                      "Skipping synchronization checks as TSC is reliable.\n");
+               pr_info("Skipping synchronization checks as TSC is reliable.\n");
                return;
        }
 
-       printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
-                         smp_processor_id(), cpu);
+       pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:",
+               smp_processor_id(), cpu);
 
        /*
         * Reset it - in case this is a second bootup:
@@ -143,8 +143,8 @@ void __cpuinit check_tsc_sync_source(int cpu)
 
        if (nr_warps) {
                printk("\n");
-               printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs,"
-                                   " turning off TSC clock.\n", max_warp);
+               pr_warning("Measured %Ld cycles TSC warp between CPUs, "
+                          "turning off TSC clock.\n", max_warp);
                mark_tsc_unstable("check_tsc_sync_source failed");
        } else {
                printk(" passed.\n");
@@ -195,5 +195,3 @@ void __cpuinit check_tsc_sync_target(void)
        while (atomic_read(&stop_count) != cpus)
                cpu_relax();
 }
-#undef NR_LOOPS
-
index d7ac84e..9c4e625 100644 (file)
@@ -287,10 +287,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
        info->regs.pt.ds = 0;
        info->regs.pt.es = 0;
        info->regs.pt.fs = 0;
-
-/* we are clearing gs later just before "jmp resume_userspace",
- * because it is not saved/restored.
- */
+#ifndef CONFIG_X86_32_LAZY_GS
+       info->regs.pt.gs = 0;
+#endif
 
 /*
  * The flags register is also special: we cannot trust that the user
@@ -318,9 +317,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
        }
 
 /*
- * Save old state, set default return value (%ax) to 0
+ * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL)
  */
-       info->regs32->ax = 0;
+       info->regs32->ax = VM86_SIGNAL;
        tsk->thread.saved_sp0 = tsk->thread.sp0;
        tsk->thread.saved_fs = info->regs32->fs;
        tsk->thread.saved_gs = get_user_gs(info->regs32);
@@ -343,7 +342,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
        __asm__ __volatile__(
                "movl %0,%%esp\n\t"
                "movl %1,%%ebp\n\t"
+#ifdef CONFIG_X86_32_LAZY_GS
                "mov  %2, %%gs\n\t"
+#endif
                "jmp resume_userspace"
                : /* no outputs */
                :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0));
index 95deb9f..b263423 100644 (file)
@@ -462,22 +462,28 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
 }
 #endif
 
-static void vmi_enter_lazy_cpu(void)
+static void vmi_start_context_switch(struct task_struct *prev)
 {
-       paravirt_enter_lazy_cpu();
+       paravirt_start_context_switch(prev);
        vmi_ops.set_lazy_mode(2);
 }
 
+static void vmi_end_context_switch(struct task_struct *next)
+{
+       vmi_ops.set_lazy_mode(0);
+       paravirt_end_context_switch(next);
+}
+
 static void vmi_enter_lazy_mmu(void)
 {
        paravirt_enter_lazy_mmu();
        vmi_ops.set_lazy_mode(1);
 }
 
-static void vmi_leave_lazy(void)
+static void vmi_leave_lazy_mmu(void)
 {
-       paravirt_leave_lazy(paravirt_get_lazy_mode());
        vmi_ops.set_lazy_mode(0);
+       paravirt_leave_lazy_mmu();
 }
 
 static inline int __init check_vmi_rom(struct vrom_header *rom)
@@ -711,14 +717,14 @@ static inline int __init activate_vmi(void)
        para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask);
        para_fill(pv_cpu_ops.io_delay, IODelay);
 
-       para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu,
+       para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch,
                  set_lazy_mode, SetLazyMode);
-       para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy,
+       para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch,
                  set_lazy_mode, SetLazyMode);
 
        para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu,
                  set_lazy_mode, SetLazyMode);
-       para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy,
+       para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu,
                  set_lazy_mode, SetLazyMode);
 
        /* user and kernel flush are just handled with different flags to FlushTLB */
index 849ee61..4c85b2e 100644 (file)
@@ -1,5 +1,431 @@
+/*
+ * ld script for the x86 kernel
+ *
+ * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * Modernisation, unification and other changes and fixes:
+ *   Copyright (C) 2007-2009  Sam Ravnborg <sam@ravnborg.org>
+ *
+ *
+ * Don't define absolute symbols until and unless you know that symbol
+ * value is should remain constant even if kernel image is relocated
+ * at run time. Absolute symbols are not relocated. If symbol value should
+ * change if kernel is relocated, make the symbol section relative and
+ * put it inside the section definition.
+ */
+
 #ifdef CONFIG_X86_32
-# include "vmlinux_32.lds.S"
+#define LOAD_OFFSET __PAGE_OFFSET
 #else
-# include "vmlinux_64.lds.S"
+#define LOAD_OFFSET __START_KERNEL_map
 #endif
+
+#include <asm-generic/vmlinux.lds.h>
+#include <asm/asm-offsets.h>
+#include <asm/thread_info.h>
+#include <asm/page_types.h>
+#include <asm/cache.h>
+#include <asm/boot.h>
+
+#undef i386     /* in case the preprocessor is a 32bit one */
+
+OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)
+
+#ifdef CONFIG_X86_32
+OUTPUT_ARCH(i386)
+ENTRY(phys_startup_32)
+jiffies = jiffies_64;
+#else
+OUTPUT_ARCH(i386:x86-64)
+ENTRY(phys_startup_64)
+jiffies_64 = jiffies;
+#endif
+
+PHDRS {
+       text PT_LOAD FLAGS(5);          /* R_E */
+       data PT_LOAD FLAGS(7);          /* RWE */
+#ifdef CONFIG_X86_64
+       user PT_LOAD FLAGS(7);          /* RWE */
+       data.init PT_LOAD FLAGS(7);     /* RWE */
+#ifdef CONFIG_SMP
+       percpu PT_LOAD FLAGS(7);        /* RWE */
+#endif
+       data.init2 PT_LOAD FLAGS(7);    /* RWE */
+#endif
+       note PT_NOTE FLAGS(0);          /* ___ */
+}
+
+SECTIONS
+{
+#ifdef CONFIG_X86_32
+        . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
+        phys_startup_32 = startup_32 - LOAD_OFFSET;
+#else
+        . = __START_KERNEL;
+        phys_startup_64 = startup_64 - LOAD_OFFSET;
+#endif
+
+       /* Text and read-only data */
+
+       /* bootstrapping code */
+       .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
+               _text = .;
+               *(.text.head)
+       } :text = 0x9090
+
+       /* The rest of the text */
+       .text :  AT(ADDR(.text) - LOAD_OFFSET) {
+#ifdef CONFIG_X86_32
+               /* not really needed, already page aligned */
+               . = ALIGN(PAGE_SIZE);
+               *(.text.page_aligned)
+#endif
+               . = ALIGN(8);
+               _stext = .;
+               TEXT_TEXT
+               SCHED_TEXT
+               LOCK_TEXT
+               KPROBES_TEXT
+               IRQENTRY_TEXT
+               *(.fixup)
+               *(.gnu.warning)
+               /* End of text section */
+               _etext = .;
+       } :text = 0x9090
+
+       NOTES :text :note
+
+       /* Exception table */
+       . = ALIGN(16);
+       __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
+               __start___ex_table = .;
+               *(__ex_table)
+               __stop___ex_table = .;
+       } :text = 0x9090
+
+       RODATA
+
+       /* Data */
+       . = ALIGN(PAGE_SIZE);
+       .data : AT(ADDR(.data) - LOAD_OFFSET) {
+               DATA_DATA
+               CONSTRUCTORS
+
+#ifdef CONFIG_X86_64
+               /* End of data section */
+               _edata = .;
+#endif
+       } :data
+
+#ifdef CONFIG_X86_32
+       /* 32 bit has nosave before _edata */
+       . = ALIGN(PAGE_SIZE);
+       .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+               __nosave_begin = .;
+               *(.data.nosave)
+               . = ALIGN(PAGE_SIZE);
+               __nosave_end = .;
+       }
+#endif
+
+       . = ALIGN(PAGE_SIZE);
+       .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
+               *(.data.page_aligned)
+               *(.data.idt)
+       }
+
+#ifdef CONFIG_X86_32
+       . = ALIGN(32);
+#else
+       . = ALIGN(PAGE_SIZE);
+       . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+#endif
+       .data.cacheline_aligned :
+               AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
+               *(.data.cacheline_aligned)
+       }
+
+       /* rarely changed data like cpu maps */
+#ifdef CONFIG_X86_32
+       . = ALIGN(32);
+#else
+       . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
+#endif
+       .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
+               *(.data.read_mostly)
+
+#ifdef CONFIG_X86_32
+               /* End of data section */
+               _edata = .;
+#endif
+       }
+
+#ifdef CONFIG_X86_64
+
+#define VSYSCALL_ADDR (-10*1024*1024)
+#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \
+                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \
+                            SIZEOF(.data.read_mostly) + 4095) & ~(4095))
+
+#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
+#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
+
+#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
+#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
+
+       . = VSYSCALL_ADDR;
+       .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) {
+               *(.vsyscall_0)
+       } :user
+
+       __vsyscall_0 = VSYSCALL_VIRT_ADDR;
+
+       . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+       .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {
+               *(.vsyscall_fn)
+       }
+
+       . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+       .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) {
+               *(.vsyscall_gtod_data)
+       }
+
+       vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
+       .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) {
+               *(.vsyscall_clock)
+       }
+       vsyscall_clock = VVIRT(.vsyscall_clock);
+
+
+       .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {
+               *(.vsyscall_1)
+       }
+       .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {
+               *(.vsyscall_2)
+       }
+
+       .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) {
+               *(.vgetcpu_mode)
+       }
+       vgetcpu_mode = VVIRT(.vgetcpu_mode);
+
+       . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+       .jiffies : AT(VLOAD(.jiffies)) {
+               *(.jiffies)
+       }
+       jiffies = VVIRT(.jiffies);
+
+       .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {
+               *(.vsyscall_3)
+       }
+
+       . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
+
+#undef VSYSCALL_ADDR
+#undef VSYSCALL_PHYS_ADDR
+#undef VSYSCALL_VIRT_ADDR
+#undef VLOAD_OFFSET
+#undef VLOAD
+#undef VVIRT_OFFSET
+#undef VVIRT
+
+#endif /* CONFIG_X86_64 */
+
+       /* init_task */
+       . = ALIGN(THREAD_SIZE);
+       .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
+               *(.data.init_task)
+       }
+#ifdef CONFIG_X86_64
+        :data.init
+#endif
+
+       /*
+        * smp_locks might be freed after init
+        * start/end must be page aligned
+        */
+       . = ALIGN(PAGE_SIZE);
+       .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
+               __smp_locks = .;
+               *(.smp_locks)
+               __smp_locks_end = .;
+               . = ALIGN(PAGE_SIZE);
+       }
+
+       /* Init code and data - will be freed after init */
+       . = ALIGN(PAGE_SIZE);
+       .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+               __init_begin = .; /* paired with __init_end */
+               _sinittext = .;
+               INIT_TEXT
+               _einittext = .;
+       }
+
+       .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
+               INIT_DATA
+       }
+
+       . = ALIGN(16);
+       .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
+               __setup_start = .;
+               *(.init.setup)
+               __setup_end = .;
+       }
+       .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
+               __initcall_start = .;
+               INITCALLS
+               __initcall_end = .;
+       }
+
+       .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
+               __con_initcall_start = .;
+               *(.con_initcall.init)
+               __con_initcall_end = .;
+       }
+
+       .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
+               __x86_cpu_dev_start = .;
+               *(.x86_cpu_dev.init)
+               __x86_cpu_dev_end = .;
+       }
+
+       SECURITY_INIT
+
+       . = ALIGN(8);
+       .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+               __parainstructions = .;
+               *(.parainstructions)
+               __parainstructions_end = .;
+       }
+
+       . = ALIGN(8);
+       .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+               __alt_instructions = .;
+               *(.altinstructions)
+               __alt_instructions_end = .;
+       }
+
+       .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+               *(.altinstr_replacement)
+       }
+
+       /*
+        * .exit.text is discard at runtime, not link time, to deal with
+        *  references from .altinstructions and .eh_frame
+        */
+       .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
+               EXIT_TEXT
+       }
+
+       .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
+               EXIT_DATA
+       }
+
+#ifdef CONFIG_BLK_DEV_INITRD
+       . = ALIGN(PAGE_SIZE);
+       .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
+               __initramfs_start = .;
+               *(.init.ramfs)
+               __initramfs_end = .;
+       }
+#endif
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_SMP)
+       /*
+        * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
+        * output PHDR, so the next output section - __data_nosave - should
+        * start another section data.init2.  Also, pda should be at the head of
+        * percpu area.  Preallocate it and define the percpu offset symbol
+        * so that it can be accessed as a percpu variable.
+        */
+       . = ALIGN(PAGE_SIZE);
+       PERCPU_VADDR(0, :percpu)
+#else
+       PERCPU(PAGE_SIZE)
+#endif
+
+       . = ALIGN(PAGE_SIZE);
+
+       /* freed after init ends here */
+       .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) {
+               __init_end = .;
+       }
+
+#ifdef CONFIG_X86_64
+       .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
+               . = ALIGN(PAGE_SIZE);
+               __nosave_begin = .;
+               *(.data.nosave)
+               . = ALIGN(PAGE_SIZE);
+               __nosave_end = .;
+       } :data.init2
+       /* use another section data.init2, see PERCPU_VADDR() above */
+#endif
+
+       /* BSS */
+       . = ALIGN(PAGE_SIZE);
+       .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
+               __bss_start = .;
+               *(.bss.page_aligned)
+               *(.bss)
+               . = ALIGN(4);
+               __bss_stop = .;
+       }
+
+       . = ALIGN(PAGE_SIZE);
+       .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
+               __brk_base = .;
+               . += 64 * 1024;         /* 64k alignment slop space */
+               *(.brk_reservation)     /* areas brk users have reserved */
+               __brk_limit = .;
+       }
+
+       .end : AT(ADDR(.end) - LOAD_OFFSET) {
+               _end = .;
+       }
+
+       /* Sections to be discarded */
+       /DISCARD/ : {
+               *(.exitcall.exit)
+               *(.eh_frame)
+               *(.discard)
+       }
+
+        STABS_DEBUG
+        DWARF_DEBUG
+}
+
+
+#ifdef CONFIG_X86_32
+ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
+        "kernel image bigger than KERNEL_IMAGE_SIZE")
+#else
+/*
+ * Per-cpu symbols which need to be offset from __per_cpu_load
+ * for the boot processor.
+ */
+#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
+INIT_PER_CPU(gdt_page);
+INIT_PER_CPU(irq_stack_union);
+
+/*
+ * Build-time check on the image size:
+ */
+ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
+       "kernel image bigger than KERNEL_IMAGE_SIZE")
+
+#ifdef CONFIG_SMP
+ASSERT((per_cpu__irq_stack_union == 0),
+        "irq_stack_union is not at start of per-cpu area");
+#endif
+
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_KEXEC
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+       "kexec control code size is too big")
+#endif
+
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
deleted file mode 100644 (file)
index 62ad500..0000000
+++ /dev/null
@@ -1,229 +0,0 @@
-/* ld script to make i386 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- *
- * Don't define absolute symbols until and unless you know that symbol
- * value is should remain constant even if kernel image is relocated
- * at run time. Absolute symbols are not relocated. If symbol value should
- * change if kernel is relocated, make the symbol section relative and
- * put it inside the section definition.
- */
-
-#define LOAD_OFFSET __PAGE_OFFSET
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/thread_info.h>
-#include <asm/page_types.h>
-#include <asm/cache.h>
-#include <asm/boot.h>
-
-OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
-OUTPUT_ARCH(i386)
-ENTRY(phys_startup_32)
-jiffies = jiffies_64;
-
-PHDRS {
-       text PT_LOAD FLAGS(5);  /* R_E */
-       data PT_LOAD FLAGS(7);  /* RWE */
-       note PT_NOTE FLAGS(0);  /* ___ */
-}
-SECTIONS
-{
-  . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
-  phys_startup_32 = startup_32 - LOAD_OFFSET;
-
-  .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) {
-       _text = .;                      /* Text and read-only data */
-       *(.text.head)
-  } :text = 0x9090
-
-  /* read-only */
-  .text : AT(ADDR(.text) - LOAD_OFFSET) {
-       . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */
-       *(.text.page_aligned)
-       TEXT_TEXT
-       SCHED_TEXT
-       LOCK_TEXT
-       KPROBES_TEXT
-       IRQENTRY_TEXT
-       *(.fixup)
-       *(.gnu.warning)
-       _etext = .;                     /* End of text section */
-  } :text = 0x9090
-
-  NOTES :text :note
-
-  . = ALIGN(16);               /* Exception table */
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-       __start___ex_table = .;
-        *(__ex_table)
-       __stop___ex_table = .;
-  } :text = 0x9090
-
-  RODATA
-
-  /* writeable */
-  . = ALIGN(PAGE_SIZE);
-  .data : AT(ADDR(.data) - LOAD_OFFSET) {      /* Data */
-       DATA_DATA
-       CONSTRUCTORS
-       } :data
-
-  . = ALIGN(PAGE_SIZE);
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-       __nosave_begin = .;
-       *(.data.nosave)
-       . = ALIGN(PAGE_SIZE);
-       __nosave_end = .;
-  }
-
-  . = ALIGN(PAGE_SIZE);
-  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-       *(.data.page_aligned)
-       *(.data.idt)
-  }
-
-  . = ALIGN(32);
-  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-       *(.data.cacheline_aligned)
-  }
-
-  /* rarely changed data like cpu maps */
-  . = ALIGN(32);
-  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
-       *(.data.read_mostly)
-       _edata = .;             /* End of data section */
-  }
-
-  . = ALIGN(THREAD_SIZE);      /* init_task */
-  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
-       *(.data.init_task)
-  }
-
-  /* might get freed after init */
-  . = ALIGN(PAGE_SIZE);
-  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
-       __smp_locks = .;
-       *(.smp_locks)
-       __smp_locks_end = .;
-  }
-  /* will be freed after init
-   * Following ALIGN() is required to make sure no other data falls on the
-   * same page where __smp_alt_end is pointing as that page might be freed
-   * after boot. Always make sure that ALIGN() directive is present after
-   * the section which contains __smp_alt_end.
-   */
-  . = ALIGN(PAGE_SIZE);
-
-  /* will be freed after init */
-  . = ALIGN(PAGE_SIZE);                /* Init code and data */
-  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-       __init_begin = .;
-       _sinittext = .;
-       INIT_TEXT
-       _einittext = .;
-  }
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-       INIT_DATA
-  }
-  . = ALIGN(16);
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-       __setup_start = .;
-       *(.init.setup)
-       __setup_end = .;
-   }
-  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-       __initcall_start = .;
-       INITCALLS
-       __initcall_end = .;
-  }
-  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-       __con_initcall_start = .;
-       *(.con_initcall.init)
-       __con_initcall_end = .;
-  }
-  .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-       __x86_cpu_dev_start = .;
-       *(.x86_cpu_dev.init)
-       __x86_cpu_dev_end = .;
-  }
-  SECURITY_INIT
-  . = ALIGN(4);
-  .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
-       __alt_instructions = .;
-       *(.altinstructions)
-       __alt_instructions_end = .;
-  }
-  .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-       *(.altinstr_replacement)
-  }
-  . = ALIGN(4);
-  .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-       __parainstructions = .;
-       *(.parainstructions)
-       __parainstructions_end = .;
-  }
-  /* .exit.text is discard at runtime, not link time, to deal with references
-     from .altinstructions and .eh_frame */
-  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-       EXIT_TEXT
-  }
-  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-       EXIT_DATA
-  }
-#if defined(CONFIG_BLK_DEV_INITRD)
-  . = ALIGN(PAGE_SIZE);
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-       __initramfs_start = .;
-       *(.init.ramfs)
-       __initramfs_end = .;
-  }
-#endif
-  PERCPU(PAGE_SIZE)
-  . = ALIGN(PAGE_SIZE);
-  /* freed after init ends here */
-
-  .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-       __init_end = .;
-       __bss_start = .;                /* BSS */
-       *(.bss.page_aligned)
-       *(.bss)
-       . = ALIGN(4);
-       __bss_stop = .;
-  }
-
-  .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
-       . = ALIGN(PAGE_SIZE);
-       __brk_base = . ;
-       . += 64 * 1024 ;        /* 64k alignment slop space */
-       *(.brk_reservation)     /* areas brk users have reserved */
-       __brk_limit = . ;
-  }
-
-  .end : AT(ADDR(.end) - LOAD_OFFSET) {
-       _end = . ;
-  }
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-       *(.exitcall.exit)
-       *(.discard)
-       }
-
-  STABS_DEBUG
-
-  DWARF_DEBUG
-}
-
-/*
- * Build-time check on the image size:
- */
-ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
-       "kernel image bigger than KERNEL_IMAGE_SIZE")
-
-#ifdef CONFIG_KEXEC
-/* Link time checks */
-#include <asm/kexec.h>
-
-ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
-       "kexec control code size is too big")
-#endif
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
deleted file mode 100644 (file)
index c874250..0000000
+++ /dev/null
@@ -1,298 +0,0 @@
-/* ld script to make x86-64 Linux kernel
- * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
- */
-
-#define LOAD_OFFSET __START_KERNEL_map
-
-#include <asm-generic/vmlinux.lds.h>
-#include <asm/asm-offsets.h>
-#include <asm/page_types.h>
-
-#undef i386    /* in case the preprocessor is a 32bit one */
-
-OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
-OUTPUT_ARCH(i386:x86-64)
-ENTRY(phys_startup_64)
-jiffies_64 = jiffies;
-PHDRS {
-       text PT_LOAD FLAGS(5);  /* R_E */
-       data PT_LOAD FLAGS(7);  /* RWE */
-       user PT_LOAD FLAGS(7);  /* RWE */
-       data.init PT_LOAD FLAGS(7);     /* RWE */
-#ifdef CONFIG_SMP
-       percpu PT_LOAD FLAGS(7);        /* RWE */
-#endif
-       data.init2 PT_LOAD FLAGS(7);    /* RWE */
-       note PT_NOTE FLAGS(0);  /* ___ */
-}
-SECTIONS
-{
-  . = __START_KERNEL;
-  phys_startup_64 = startup_64 - LOAD_OFFSET;
-  .text :  AT(ADDR(.text) - LOAD_OFFSET) {
-       _text = .;                      /* Text and read-only data */
-       /* First the code that has to be first for bootstrapping */
-       *(.text.head)
-       _stext = .;
-       /* Then the rest */
-       TEXT_TEXT
-       SCHED_TEXT
-       LOCK_TEXT
-       KPROBES_TEXT
-       IRQENTRY_TEXT
-       *(.fixup)
-       *(.gnu.warning)
-       _etext = .;             /* End of text section */
-  } :text = 0x9090
-
-  NOTES :text :note
-
-  . = ALIGN(16);               /* Exception table */
-  __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
-       __start___ex_table = .;
-        *(__ex_table)
-       __stop___ex_table = .;
-  } :text = 0x9090
-
-  RODATA
-
-  . = ALIGN(PAGE_SIZE);                /* Align data segment to page size boundary */
-                               /* Data */
-  .data : AT(ADDR(.data) - LOAD_OFFSET) {
-       DATA_DATA
-       CONSTRUCTORS
-       _edata = .;                     /* End of data section */
-       } :data
-
-
-  .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
-       . = ALIGN(PAGE_SIZE);
-       . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-       *(.data.cacheline_aligned)
-  }
-  . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
-  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) {
-       *(.data.read_mostly)
-  }
-
-#define VSYSCALL_ADDR (-10*1024*1024)
-#define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-#define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095))
-
-#define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR)
-#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-
-#define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR)
-#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
-  . = VSYSCALL_ADDR;
-  .vsyscall_0 :         AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user
-  __vsyscall_0 = VSYSCALL_VIRT_ADDR;
-
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-  .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) }
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-  .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data))
-               { *(.vsyscall_gtod_data) }
-  vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
-  .vsyscall_clock : AT(VLOAD(.vsyscall_clock))
-               { *(.vsyscall_clock) }
-  vsyscall_clock = VVIRT(.vsyscall_clock);
-
-
-  .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
-               { *(.vsyscall_1) }
-  .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2))
-               { *(.vsyscall_2) }
-
-  .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) }
-  vgetcpu_mode = VVIRT(.vgetcpu_mode);
-
-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
-  .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) }
-  jiffies = VVIRT(.jiffies);
-
-  .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3))
-               { *(.vsyscall_3) }
-
-  . = VSYSCALL_VIRT_ADDR + PAGE_SIZE;
-
-#undef VSYSCALL_ADDR
-#undef VSYSCALL_PHYS_ADDR
-#undef VSYSCALL_VIRT_ADDR
-#undef VLOAD_OFFSET
-#undef VLOAD
-#undef VVIRT_OFFSET
-#undef VVIRT
-
-  .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
-       . = ALIGN(THREAD_SIZE); /* init_task */
-       *(.data.init_task)
-  }:data.init
-
-  .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
-       . = ALIGN(PAGE_SIZE);
-       *(.data.page_aligned)
-  }
-
-  .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
-       /* might get freed after init */
-       . = ALIGN(PAGE_SIZE);
-       __smp_alt_begin = .;
-       __smp_locks = .;
-       *(.smp_locks)
-       __smp_locks_end = .;
-       . = ALIGN(PAGE_SIZE);
-       __smp_alt_end = .;
-  }
-
-  . = ALIGN(PAGE_SIZE);                /* Init code and data */
-  __init_begin = .;    /* paired with __init_end */
-  .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
-       _sinittext = .;
-       INIT_TEXT
-       _einittext = .;
-  }
-  .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) {
-       __initdata_begin = .;
-       INIT_DATA
-       __initdata_end = .;
-   }
-
-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
-       . = ALIGN(16);
-       __setup_start = .;
-       *(.init.setup)
-       __setup_end = .;
-  }
-  .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-       __initcall_start = .;
-       INITCALLS
-       __initcall_end = .;
-  }
-  .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
-       __con_initcall_start = .;
-       *(.con_initcall.init)
-       __con_initcall_end = .;
-  }
-  .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
-       __x86_cpu_dev_start = .;
-       *(.x86_cpu_dev.init)
-       __x86_cpu_dev_end = .;
-  }
-  SECURITY_INIT
-
-  . = ALIGN(8);
-  .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
-       __parainstructions = .;
-       *(.parainstructions)
-       __parainstructions_end = .;
-  }
-
-  .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
-       . = ALIGN(8);
-       __alt_instructions = .;
-       *(.altinstructions)
-       __alt_instructions_end = .;
-  }
-  .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
-       *(.altinstr_replacement)
-  }
-  /* .exit.text is discard at runtime, not link time, to deal with references
-     from .altinstructions and .eh_frame */
-  .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {
-       EXIT_TEXT
-  }
-  .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {
-       EXIT_DATA
-  }
-
-#ifdef CONFIG_BLK_DEV_INITRD
-  . = ALIGN(PAGE_SIZE);
-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
-       __initramfs_start = .;
-       *(.init.ramfs)
-       __initramfs_end = .;
-  }
-#endif
-
-#ifdef CONFIG_SMP
-  /*
-   * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
-   * output PHDR, so the next output section - __data_nosave - should
-   * start another section data.init2.  Also, pda should be at the head of
-   * percpu area.  Preallocate it and define the percpu offset symbol
-   * so that it can be accessed as a percpu variable.
-   */
-  . = ALIGN(PAGE_SIZE);
-  PERCPU_VADDR(0, :percpu)
-#else
-  PERCPU(PAGE_SIZE)
-#endif
-
-  . = ALIGN(PAGE_SIZE);
-  __init_end = .;
-
-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
-       . = ALIGN(PAGE_SIZE);
-       __nosave_begin = .;
-       *(.data.nosave)
-       . = ALIGN(PAGE_SIZE);
-       __nosave_end = .;
-  } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
-
-  .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
-       . = ALIGN(PAGE_SIZE);
-       __bss_start = .;                /* BSS */
-       *(.bss.page_aligned)
-       *(.bss)
-       __bss_stop = .;
-  }
-
-  .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
-       . = ALIGN(PAGE_SIZE);
-       __brk_base = . ;
-       . += 64 * 1024 ;        /* 64k alignment slop space */
-       *(.brk_reservation)     /* areas brk users have reserved */
-       __brk_limit = . ;
-  }
-
-  _end = . ;
-
-  /* Sections to be discarded */
-  /DISCARD/ : {
-       *(.exitcall.exit)
-       *(.eh_frame)
-       *(.discard)
-       }
-
-  STABS_DEBUG
-
-  DWARF_DEBUG
-}
-
- /*
-  * Per-cpu symbols which need to be offset from __per_cpu_load
-  * for the boot processor.
-  */
-#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
-INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(irq_stack_union);
-
-/*
- * Build-time check on the image size:
- */
-ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
-       "kernel image bigger than KERNEL_IMAGE_SIZE")
-
-#ifdef CONFIG_SMP
-ASSERT((per_cpu__irq_stack_union == 0),
-        "irq_stack_union is not at start of per-cpu area");
-#endif
-
-#ifdef CONFIG_KEXEC
-#include <asm/kexec.h>
-
-ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
-       "kexec control code size is too big")
-#endif
index 44153af..25ee06a 100644 (file)
@@ -132,15 +132,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv)
                        return;
                }
 
-               /*
-                * Surround the RDTSC by barriers, to make sure it's not
-                * speculated to outside the seqlock critical section and
-                * does not cause time warps:
-                */
-               rdtsc_barrier();
                now = vread();
-               rdtsc_barrier();
-
                base = __vsyscall_gtod_data.clock.cycle_last;
                mask = __vsyscall_gtod_data.clock.mask;
                mult = __vsyscall_gtod_data.clock.mult;
index 27f0c9e..94e0e54 100644 (file)
@@ -1 +1,2 @@
 obj-y          := i386_head.o boot.o
+CFLAGS_boot.o  := $(call cc-option, -fno-stack-protector)
index ca7ec44..4e0c265 100644 (file)
@@ -67,6 +67,7 @@
 #include <asm/mce.h>
 #include <asm/io.h>
 #include <asm/i387.h>
+#include <asm/stackprotector.h>
 #include <asm/reboot.h>                /* for struct machine_ops */
 
 /*G:010 Welcome to the Guest!
@@ -166,10 +167,16 @@ static void lazy_hcall3(unsigned long call,
 
 /* When lazy mode is turned off reset the per-cpu lazy mode variable and then
  * issue the do-nothing hypercall to flush any stored calls. */
-static void lguest_leave_lazy_mode(void)
+static void lguest_leave_lazy_mmu_mode(void)
 {
-       paravirt_leave_lazy(paravirt_get_lazy_mode());
        kvm_hypercall0(LHCALL_FLUSH_ASYNC);
+       paravirt_leave_lazy_mmu();
+}
+
+static void lguest_end_context_switch(struct task_struct *next)
+{
+       kvm_hypercall0(LHCALL_FLUSH_ASYNC);
+       paravirt_end_context_switch(next);
 }
 
 /*G:033
@@ -636,7 +643,7 @@ static void __init lguest_init_IRQ(void)
 
 void lguest_setup_irq(unsigned int irq)
 {
-       irq_to_desc_alloc_cpu(irq, 0);
+       irq_to_desc_alloc_node(irq, 0);
        set_irq_chip_and_handler_name(irq, &lguest_irq_controller,
                                      handle_level_irq, "level");
 }
@@ -1053,8 +1060,8 @@ __init void lguest_init(void)
        pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry;
        pv_cpu_ops.write_idt_entry = lguest_write_idt_entry;
        pv_cpu_ops.wbinvd = lguest_wbinvd;
-       pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu;
-       pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+       pv_cpu_ops.start_context_switch = paravirt_start_context_switch;
+       pv_cpu_ops.end_context_switch = lguest_end_context_switch;
 
        /* pagetable management */
        pv_mmu_ops.write_cr3 = lguest_write_cr3;
@@ -1067,7 +1074,7 @@ __init void lguest_init(void)
        pv_mmu_ops.read_cr2 = lguest_read_cr2;
        pv_mmu_ops.read_cr3 = lguest_read_cr3;
        pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
-       pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode;
+       pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
        pv_mmu_ops.pte_update = lguest_pte_update;
        pv_mmu_ops.pte_update_defer = lguest_pte_update;
 
@@ -1088,13 +1095,21 @@ __init void lguest_init(void)
         * lguest_init() where the rest of the fairly chaotic boot setup
         * occurs. */
 
+       /* The stack protector is a weird thing where gcc places a canary
+        * value on the stack and then checks it on return.  This file is
+        * compiled with -fno-stack-protector it, so we got this far without
+        * problems.  The value of the canary is kept at offset 20 from the
+        * %gs register, so we need to set that up before calling C functions
+        * in other files. */
+       setup_stack_canary_segment(0);
+       /* We could just call load_stack_canary_segment(), but we might as
+        * call switch_to_new_gdt() which loads the whole table and sets up
+        * the per-cpu segment descriptor register %fs as well. */
+       switch_to_new_gdt(0);
+
        /* As described in head_32.S, we map the first 128M of memory. */
        max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
 
-       /* Load the %fs segment register (the per-cpu segment register) with
-        * the normal data segment to get through booting. */
-       asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory");
-
        /* The Host<->Guest Switcher lives at the top of our address space, and
         * the Host told us how big it is when we made LGUEST_INIT hypercall:
         * it put the answer in lguest_data.reserve_mem  */
index e7277cb..a725b7f 100644 (file)
@@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st,
                   st->current_address >= st->marker[1].start_address) {
                const char *unit = units;
                unsigned long delta;
+               int width = sizeof(unsigned long) * 2;
 
                /*
                 * Now print the actual finished series
                 */
-               seq_printf(m, "0x%p-0x%p   ",
-                          (void *)st->start_address,
-                          (void *)st->current_address);
+               seq_printf(m, "0x%0*lx-0x%0*lx   ",
+                          width, st->start_address,
+                          width, st->current_address);
 
                delta = (st->current_address - st->start_address) >> 10;
                while (!(delta & 1023) && unit[1]) {
index a03b727..5ec7ae3 100644 (file)
@@ -3,40 +3,16 @@
  *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
-#include <linux/interrupt.h>
-#include <linux/mmiotrace.h>
-#include <linux/bootmem.h>
-#include <linux/compiler.h>
-#include <linux/highmem.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/vt_kern.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/errno.h>
-#include <linux/magic.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/tty.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
-
-#include <asm-generic/sections.h>
-
-#include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
-#include <asm/segment.h>
-#include <asm/system.h>
-#include <asm/proto.h>
-#include <asm/traps.h>
-#include <asm/desc.h>
+#include <linux/magic.h>               /* STACK_END_MAGIC              */
+#include <linux/sched.h>               /* test_thread_flag(), ...      */
+#include <linux/kdebug.h>              /* oops_begin/end, ...          */
+#include <linux/module.h>              /* search_exception_table       */
+#include <linux/bootmem.h>             /* max_low_pfn                  */
+#include <linux/kprobes.h>             /* __kprobes, ...               */
+#include <linux/mmiotrace.h>           /* kmmio_handler, ...           */
+
+#include <asm/traps.h>                 /* dotraplinkage, ...           */
+#include <asm/pgalloc.h>               /* pgd_*(), ...                 */
 
 /*
  * Page fault error code bits:
@@ -225,12 +201,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
        if (!pmd_present(*pmd_k))
                return NULL;
 
-       if (!pmd_present(*pmd)) {
+       if (!pmd_present(*pmd))
                set_pmd(pmd, *pmd_k);
-               arch_flush_lazy_mmu_mode();
-       } else {
+       else
                BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
-       }
 
        return pmd_k;
 }
@@ -538,8 +512,6 @@ bad:
 static int is_errata93(struct pt_regs *regs, unsigned long address)
 {
 #ifdef CONFIG_X86_64
-       static int once;
-
        if (address != regs->ip)
                return 0;
 
@@ -549,10 +521,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address)
        address |= 0xffffffffUL << 32;
        if ((address >= (u64)_stext && address <= (u64)_etext) ||
            (address >= MODULES_VADDR && address <= MODULES_END)) {
-               if (!once) {
-                       printk(errata93_warning);
-                       once = 1;
-               }
+               printk_once(errata93_warning);
                regs->ip = address;
                return 1;
        }
index 8126e8d..58f621e 100644 (file)
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
        vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
        BUG_ON(!pte_none(*(kmap_pte-idx)));
        set_pte(kmap_pte-idx, mk_pte(page, prot));
-       arch_flush_lazy_mmu_mode();
 
        return (void *)vaddr;
 }
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type)
 #endif
        }
 
-       arch_flush_lazy_mmu_mode();
        pagefault_enable();
 }
 
index ae4f7b5..34c1bfb 100644 (file)
@@ -1,3 +1,4 @@
+#include <linux/initrd.h>
 #include <linux/ioport.h>
 #include <linux/swap.h>
 
@@ -10,6 +11,9 @@
 #include <asm/setup.h>
 #include <asm/system.h>
 #include <asm/tlbflush.h>
+#include <asm/tlb.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
 unsigned long __initdata e820_table_start;
 unsigned long __meminitdata e820_table_end;
@@ -23,6 +27,69 @@ int direct_gbpages
 #endif
 ;
 
+int nx_enabled;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static int disable_nx __cpuinitdata;
+
+/*
+ * noexec = on|off
+ *
+ * Control non-executable mappings for processes.
+ *
+ * on      Enable
+ * off     Disable
+ */
+static int __init noexec_setup(char *str)
+{
+       if (!str)
+               return -EINVAL;
+       if (!strncmp(str, "on", 2)) {
+               __supported_pte_mask |= _PAGE_NX;
+               disable_nx = 0;
+       } else if (!strncmp(str, "off", 3)) {
+               disable_nx = 1;
+               __supported_pte_mask &= ~_PAGE_NX;
+       }
+       return 0;
+}
+early_param("noexec", noexec_setup);
+#endif
+
+#ifdef CONFIG_X86_PAE
+static void __init set_nx(void)
+{
+       unsigned int v[4], l, h;
+
+       if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+               cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
+               if ((v[3] & (1 << 20)) && !disable_nx) {
+                       rdmsr(MSR_EFER, l, h);
+                       l |= EFER_NX;
+                       wrmsr(MSR_EFER, l, h);
+                       nx_enabled = 1;
+                       __supported_pte_mask |= _PAGE_NX;
+               }
+       }
+}
+#else
+static inline void set_nx(void)
+{
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __cpuinit check_efer(void)
+{
+       unsigned long efer;
+
+       rdmsrl(MSR_EFER, efer);
+       if (!(efer & EFER_NX) || disable_nx)
+               __supported_pte_mask &= ~_PAGE_NX;
+}
+#endif
+
 static void __init find_early_table_space(unsigned long end, int use_pse,
                                          int use_gbpages)
 {
@@ -66,12 +133,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
         */
 #ifdef CONFIG_X86_32
        start = 0x7000;
-       e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
-                                       tables, PAGE_SIZE);
-#else /* CONFIG_X86_64 */
+#else
        start = 0x8000;
-       e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
 #endif
+       e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
+                                       tables, PAGE_SIZE);
        if (e820_table_start == -1UL)
                panic("Cannot find space for the kernel page tables");
 
@@ -159,12 +225,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
        use_gbpages = direct_gbpages;
 #endif
 
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_PAE
        set_nx();
        if (nx_enabled)
                printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
 
        /* Enable PSE if available */
        if (cpu_has_pse)
@@ -175,7 +238,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
                set_in_cr4(X86_CR4_PGE);
                __supported_pte_mask |= _PAGE_GLOBAL;
        }
-#endif
 
        if (use_gbpages)
                page_size_mask |= 1 << PG_LEVEL_1G;
index 749559e..949708d 100644 (file)
 #include <asm/paravirt.h>
 #include <asm/setup.h>
 #include <asm/cacheflush.h>
+#include <asm/page_types.h>
 #include <asm/init.h>
 
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 unsigned long highstart_pfn, highend_pfn;
 
 static noinline int do_test_wp_bit(void);
@@ -587,61 +584,9 @@ void zap_low_mappings(void)
        flush_tlb_all();
 }
 
-int nx_enabled;
-
 pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP);
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-#ifdef CONFIG_X86_PAE
-
-static int disable_nx __initdata;
-
-/*
- * noexec = on|off
- *
- * Control non executable mappings.
- *
- * on      Enable
- * off     Disable
- */
-static int __init noexec_setup(char *str)
-{
-       if (!str || !strcmp(str, "on")) {
-               if (cpu_has_nx) {
-                       __supported_pte_mask |= _PAGE_NX;
-                       disable_nx = 0;
-               }
-       } else {
-               if (!strcmp(str, "off")) {
-                       disable_nx = 1;
-                       __supported_pte_mask &= ~_PAGE_NX;
-               } else {
-                       return -EINVAL;
-               }
-       }
-
-       return 0;
-}
-early_param("noexec", noexec_setup);
-
-void __init set_nx(void)
-{
-       unsigned int v[4], l, h;
-
-       if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
-               cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
-               if ((v[3] & (1 << 20)) && !disable_nx) {
-                       rdmsr(MSR_EFER, l, h);
-                       l |= EFER_NX;
-                       wrmsr(MSR_EFER, l, h);
-                       nx_enabled = 1;
-                       __supported_pte_mask |= _PAGE_NX;
-               }
-       }
-}
-#endif
-
 /* user-defined highmem size */
 static unsigned int highmem_pages = -1;
 
@@ -761,15 +706,15 @@ void __init initmem_init(unsigned long start_pfn,
        highstart_pfn = highend_pfn = max_pfn;
        if (max_pfn > max_low_pfn)
                highstart_pfn = max_low_pfn;
-       memory_present(0, 0, highend_pfn);
        e820_register_active_regions(0, 0, highend_pfn);
+       sparse_memory_present_with_active_regions(0);
        printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
                pages_to_mb(highend_pfn - highstart_pfn));
        num_physpages = highend_pfn;
        high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
 #else
-       memory_present(0, 0, max_low_pfn);
        e820_register_active_regions(0, 0, max_low_pfn);
+       sparse_memory_present_with_active_regions(0);
        num_physpages = max_low_pfn;
        high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
 #endif
index 1753e80..52bb951 100644 (file)
 #include <asm/cacheflush.h>
 #include <asm/init.h>
 
-/*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
 static unsigned long dma_reserve __initdata;
 
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
 static int __init parse_direct_gbpages_off(char *arg)
 {
        direct_gbpages = 0;
@@ -85,39 +75,6 @@ early_param("gbpages", parse_direct_gbpages_on);
 pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
 EXPORT_SYMBOL_GPL(__supported_pte_mask);
 
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec=on|off
- * Control non-executable mappings for 64-bit processes.
- *
- * on  Enable (default)
- * off Disable
- */
-static int __init nonx_setup(char *str)
-{
-       if (!str)
-               return -EINVAL;
-       if (!strncmp(str, "on", 2)) {
-               __supported_pte_mask |= _PAGE_NX;
-               disable_nx = 0;
-       } else if (!strncmp(str, "off", 3)) {
-               disable_nx = 1;
-               __supported_pte_mask &= ~_PAGE_NX;
-       }
-       return 0;
-}
-early_param("noexec", nonx_setup);
-
-void __cpuinit check_efer(void)
-{
-       unsigned long efer;
-
-       rdmsrl(MSR_EFER, efer);
-       if (!(efer & EFER_NX) || disable_nx)
-               __supported_pte_mask &= ~_PAGE_NX;
-}
-
 int force_personality32;
 
 /*
@@ -628,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
        early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
        reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
 }
+#endif
 
 void __init paging_init(void)
 {
@@ -638,11 +596,10 @@ void __init paging_init(void)
        max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
        max_zone_pfns[ZONE_NORMAL] = max_pfn;
 
-       memory_present(0, 0, max_pfn);
+       sparse_memory_present_with_active_regions(MAX_NUMNODES);
        sparse_init();
        free_area_init_nodes(max_zone_pfns);
 }
-#endif
 
 /*
  * Memory hotplug specific functions
index 8056545..fe6f84c 100644 (file)
@@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type)
        if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
                kpte_clear_flush(kmap_pte-idx, vaddr);
 
-       arch_flush_lazy_mmu_mode();
        pagefault_enable();
 }
 EXPORT_SYMBOL_GPL(iounmap_atomic);
index 50dc802..16ccbd7 100644 (file)
@@ -32,7 +32,7 @@ struct kmmio_fault_page {
        struct list_head list;
        struct kmmio_fault_page *release_next;
        unsigned long page; /* location of the fault page */
-       bool old_presence; /* page presence prior to arming */
+       pteval_t old_presence; /* page presence prior to arming */
        bool armed;
 
        /*
@@ -97,60 +97,62 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
 static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
 {
        struct list_head *head;
-       struct kmmio_fault_page *p;
+       struct kmmio_fault_page *f;
 
        page &= PAGE_MASK;
        head = kmmio_page_list(page);
-       list_for_each_entry_rcu(p, head, list) {
-               if (p->page == page)
-                       return p;
+       list_for_each_entry_rcu(f, head, list) {
+               if (f->page == page)
+                       return f;
        }
        return NULL;
 }
 
-static void set_pmd_presence(pmd_t *pmd, bool present, bool *old)
+static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
 {
        pmdval_t v = pmd_val(*pmd);
-       *old = !!(v & _PAGE_PRESENT);
-       v &= ~_PAGE_PRESENT;
-       if (present)
-               v |= _PAGE_PRESENT;
+       if (clear) {
+               *old = v & _PAGE_PRESENT;
+               v &= ~_PAGE_PRESENT;
+       } else  /* presume this has been called with clear==true previously */
+               v |= *old;
        set_pmd(pmd, __pmd(v));
 }
 
-static void set_pte_presence(pte_t *pte, bool present, bool *old)
+static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
 {
        pteval_t v = pte_val(*pte);
-       *old = !!(v & _PAGE_PRESENT);
-       v &= ~_PAGE_PRESENT;
-       if (present)
-               v |= _PAGE_PRESENT;
+       if (clear) {
+               *old = v & _PAGE_PRESENT;
+               v &= ~_PAGE_PRESENT;
+       } else  /* presume this has been called with clear==true previously */
+               v |= *old;
        set_pte_atomic(pte, __pte(v));
 }
 
-static int set_page_presence(unsigned long addr, bool present, bool *old)
+static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
 {
        unsigned int level;
-       pte_t *pte = lookup_address(addr, &level);
+       pte_t *pte = lookup_address(f->page, &level);
 
        if (!pte) {
-               pr_err("kmmio: no pte for page 0x%08lx\n", addr);
+               pr_err("kmmio: no pte for page 0x%08lx\n", f->page);
                return -1;
        }
 
        switch (level) {
        case PG_LEVEL_2M:
-               set_pmd_presence((pmd_t *)pte, present, old);
+               clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
                break;
        case PG_LEVEL_4K:
-               set_pte_presence(pte, present, old);
+               clear_pte_presence(pte, clear, &f->old_presence);
                break;
        default:
                pr_err("kmmio: unexpected page level 0x%x.\n", level);
                return -1;
        }
 
-       __flush_tlb_one(addr);
+       __flush_tlb_one(f->page);
        return 0;
 }
 
@@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
        WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n");
        if (f->armed) {
                pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n",
-                                       f->page, f->count, f->old_presence);
+                                       f->page, f->count, !!f->old_presence);
        }
-       ret = set_page_presence(f->page, false, &f->old_presence);
+       ret = clear_page_presence(f, true);
        WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page);
        f->armed = true;
        return ret;
@@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
 /** Restore the given page to saved presence state. */
 static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
 {
-       bool tmp;
-       int ret = set_page_presence(f->page, f->old_presence, &tmp);
+       int ret = clear_page_presence(f, false);
        WARN_ONCE(ret < 0,
                        KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page);
        f->armed = false;
@@ -310,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
        struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
 
        if (!ctx->active) {
-               pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+               /*
+                * debug traps without an active context are due to either
+                * something external causing them (f.e. using a debugger while
+                * mmio tracing enabled), or erroneous behaviour
+                */
+               pr_warning("kmmio: unexpected debug trap on CPU %d.\n",
                                                        smp_processor_id());
                goto out;
        }
@@ -439,12 +445,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
                                                head,
                                                struct kmmio_delayed_release,
                                                rcu);
-       struct kmmio_fault_page *p = dr->release_list;
-       while (p) {
-               struct kmmio_fault_page *next = p->release_next;
-               BUG_ON(p->count);
-               kfree(p);
-               p = next;
+       struct kmmio_fault_page *f = dr->release_list;
+       while (f) {
+               struct kmmio_fault_page *next = f->release_next;
+               BUG_ON(f->count);
+               kfree(f);
+               f = next;
        }
        kfree(dr);
 }
@@ -453,19 +459,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
 {
        struct kmmio_delayed_release *dr =
                container_of(head, struct kmmio_delayed_release, rcu);
-       struct kmmio_fault_page *p = dr->release_list;
+       struct kmmio_fault_page *f = dr->release_list;
        struct kmmio_fault_page **prevp = &dr->release_list;
        unsigned long flags;
 
        spin_lock_irqsave(&kmmio_lock, flags);
-       while (p) {
-               if (!p->count) {
-                       list_del_rcu(&p->list);
-                       prevp = &p->release_next;
+       while (f) {
+               if (!f->count) {
+                       list_del_rcu(&f->list);
+                       prevp = &f->release_next;
                } else {
-                       *prevp = p->release_next;
+                       *prevp = f->release_next;
                }
-               p = p->release_next;
+               f = f->release_next;
        }
        spin_unlock_irqrestore(&kmmio_lock, flags);
 
@@ -528,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 }
 EXPORT_SYMBOL(unregister_kmmio_probe);
 
-static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
-                                                               void *args)
+static int
+kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
 {
        struct die_args *arg = args;
 
@@ -544,11 +550,23 @@ static struct notifier_block nb_die = {
        .notifier_call = kmmio_die_notifier
 };
 
-static int __init init_kmmio(void)
+int kmmio_init(void)
 {
        int i;
+
        for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
                INIT_LIST_HEAD(&kmmio_page_table[i]);
+
        return register_die_notifier(&nb_die);
 }
-fs_initcall(init_kmmio); /* should be before device_initcall() */
+
+void kmmio_cleanup(void)
+{
+       int i;
+
+       unregister_die_notifier(&nb_die);
+       for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
+               WARN_ONCE(!list_empty(&kmmio_page_table[i]),
+                       KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
+       }
+}
index 605c8be..c0bedcd 100644 (file)
@@ -40,23 +40,23 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
 
 static void __init memtest(u64 pattern, u64 start_phys, u64 size)
 {
-       u64 i, count;
-       u64 *start;
+       u64 *p;
+       void *start, *end;
        u64 start_bad, last_bad;
        u64 start_phys_aligned;
        size_t incr;
 
        incr = sizeof(pattern);
        start_phys_aligned = ALIGN(start_phys, incr);
-       count = (size - (start_phys_aligned - start_phys))/incr;
        start = __va(start_phys_aligned);
+       end = start + size - (start_phys_aligned - start_phys);
        start_bad = 0;
        last_bad = 0;
 
-       for (i = 0; i < count; i++)
-               start[i] = pattern;
-       for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
-               if (*start == pattern)
+       for (p = start; p < end; p++)
+               *p = pattern;
+       for (p = start; p < end; p++, start_phys_aligned += incr) {
+               if (*p == pattern)
                        continue;
                if (start_phys_aligned == last_bad + incr) {
                        last_bad += incr;
index c9342ed..132772a 100644 (file)
@@ -451,6 +451,7 @@ void enable_mmiotrace(void)
 
        if (nommiotrace)
                pr_info(NAME "MMIO tracing disabled.\n");
+       kmmio_init();
        enter_uniprocessor();
        spin_lock_irq(&trace_lock);
        atomic_inc(&mmiotrace_enabled);
@@ -473,6 +474,7 @@ void disable_mmiotrace(void)
 
        clear_trace_list(); /* guarantees: no more kmmio callbacks */
        leave_uniprocessor();
+       kmmio_cleanup();
        pr_info(NAME "disabled.\n");
 out:
        mutex_unlock(&mmiotrace_mutex);
index 2d05a12..459913b 100644 (file)
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
 }
 
 /* Initialize bootmem allocator for a node */
-void __init setup_node_bootmem(int nodeid, unsigned long start,
-                              unsigned long end)
+void __init
+setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
 {
        unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
+       const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
        unsigned long bootmap_start, nodedata_phys;
        void *bootmap;
-       const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
        int nid;
 
        if (!end)
                return;
 
+       /*
+        * Don't confuse VM with a node that doesn't have the
+        * minimum amount of memory:
+        */
+       if (end && (end - start) < NODE_MIN_SIZE)
+               return;
+
        start = roundup(start, ZONE_ALIGN);
 
        printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
@@ -272,9 +279,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start,
                reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
                                 bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
 
-#ifdef CONFIG_ACPI_NUMA
-       srat_reserve_add_area(nodeid);
-#endif
        node_set_online(nodeid);
 }
 
@@ -578,21 +582,6 @@ unsigned long __init numa_free_all_bootmem(void)
        return pages;
 }
 
-void __init paging_init(void)
-{
-       unsigned long max_zone_pfns[MAX_NR_ZONES];
-
-       memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
-       max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
-       max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
-       max_zone_pfns[ZONE_NORMAL] = max_pfn;
-
-       sparse_memory_present_with_active_regions(MAX_NUMNODES);
-       sparse_init();
-
-       free_area_init_nodes(max_zone_pfns);
-}
-
 static __init int numa_setup(char *opt)
 {
        if (!opt)
@@ -606,8 +595,6 @@ static __init int numa_setup(char *opt)
 #ifdef CONFIG_ACPI_NUMA
        if (!strncmp(opt, "noacpi", 6))
                acpi_numa = -1;
-       if (!strncmp(opt, "hotadd=", 7))
-               hotadd_percent = simple_strtoul(opt+7, NULL, 10);
 #endif
        return 0;
 }
index e17efed..6ce9518 100644 (file)
@@ -839,13 +839,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 
        vm_unmap_aliases();
 
-       /*
-        * If we're called with lazy mmu updates enabled, the
-        * in-memory pte state may be stale.  Flush pending updates to
-        * bring them up to date.
-        */
-       arch_flush_lazy_mmu_mode();
-
        cpa.vaddr = addr;
        cpa.pages = pages;
        cpa.numpages = numpages;
@@ -890,13 +883,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
        } else
                cpa_flush_all(cache);
 
-       /*
-        * If we've been called with lazy mmu updates enabled, then
-        * make sure that everything gets flushed out before we
-        * return.
-        */
-       arch_flush_lazy_mmu_mode();
-
 out:
        return ret;
 }
index 0176595..2dfcbf9 100644 (file)
@@ -31,17 +31,11 @@ static nodemask_t nodes_parsed __initdata;
 static nodemask_t cpu_nodes_parsed __initdata;
 static struct bootnode nodes[MAX_NUMNODES] __initdata;
 static struct bootnode nodes_add[MAX_NUMNODES];
-static int found_add_area __initdata;
-int hotadd_percent __initdata = 0;
 
 static int num_node_memblks __initdata;
 static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata;
 static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata;
 
-/* Too small nodes confuse the VM badly. Usually they result
-   from BIOS bugs. */
-#define NODE_MIN_SIZE (4*1024*1024)
-
 static __init int setup_node(int pxm)
 {
        return acpi_map_pxm_to_node(pxm);
@@ -66,9 +60,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end)
 {
        struct bootnode *nd = &nodes[i];
 
-       if (found_add_area)
-               return;
-
        if (nd->start < start) {
                nd->start = start;
                if (nd->end < nd->start)
@@ -86,7 +77,6 @@ static __init void bad_srat(void)
        int i;
        printk(KERN_ERR "SRAT: SRAT not used.\n");
        acpi_numa = -1;
-       found_add_area = 0;
        for (i = 0; i < MAX_LOCAL_APIC; i++)
                apicid_to_node[i] = NUMA_NO_NODE;
        for (i = 0; i < MAX_NUMNODES; i++)
@@ -182,24 +172,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa)
               pxm, apic_id, node);
 }
 
-static int update_end_of_memory(unsigned long end) {return -1;}
-static int hotadd_enough_memory(struct bootnode *nd) {return 1;}
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static inline int save_add_info(void) {return 1;}
 #else
 static inline int save_add_info(void) {return 0;}
 #endif
 /*
- * Update nodes_add and decide if to include add are in the zone.
- * Both SPARSE and RESERVE need nodes_add information.
- * This code supports one contiguous hot add area per node.
+ * Update nodes_add[]
+ * This code supports one contiguous hot add area per node
  */
-static int __init
-reserve_hotadd(int node, unsigned long start, unsigned long end)
+static void __init
+update_nodes_add(int node, unsigned long start, unsigned long end)
 {
        unsigned long s_pfn = start >> PAGE_SHIFT;
        unsigned long e_pfn = end >> PAGE_SHIFT;
-       int ret = 0, changed = 0;
+       int changed = 0;
        struct bootnode *nd = &nodes_add[node];
 
        /* I had some trouble with strange memory hotadd regions breaking
@@ -210,7 +197,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
           mistakes */
        if ((signed long)(end - start) < NODE_MIN_SIZE) {
                printk(KERN_ERR "SRAT: Hotplug area too small\n");
-               return -1;
+               return;
        }
 
        /* This check might be a bit too strict, but I'm keeping it for now. */
@@ -218,12 +205,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
                printk(KERN_ERR
                        "SRAT: Hotplug area %lu -> %lu has existing memory\n",
                        s_pfn, e_pfn);
-               return -1;
-       }
-
-       if (!hotadd_enough_memory(&nodes_add[node]))  {
-               printk(KERN_ERR "SRAT: Hotplug area too large\n");
-               return -1;
+               return;
        }
 
        /* Looks good */
@@ -245,11 +227,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end)
                        printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
        }
 
-       ret = update_end_of_memory(nd->end);
-
        if (changed)
-               printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
-       return ret;
+               printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n",
+                                nd->start, nd->end);
 }
 
 /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
@@ -310,13 +290,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
               start, end);
        e820_register_active_regions(node, start >> PAGE_SHIFT,
                                     end >> PAGE_SHIFT);
-       push_node_boundaries(node, nd->start >> PAGE_SHIFT,
-                                               nd->end >> PAGE_SHIFT);
 
-       if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) &&
-           (reserve_hotadd(node, start, end) < 0)) {
-               /* Ignore hotadd region. Undo damage */
-               printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
+       if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
+               update_nodes_add(node, start, end);
+               /* restore nodes[node] */
                *nd = oldnode;
                if ((nd->start | nd->end) == 0)
                        node_clear(node, nodes_parsed);
@@ -345,9 +322,9 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
                        pxmram = 0;
        }
 
-       e820ram = max_pfn - absent_pages_in_range(0, max_pfn);
-       /* We seem to lose 3 pages somewhere. Allow a bit of slack. */
-       if ((long)(e820ram - pxmram) >= 1*1024*1024) {
+       e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT);
+       /* We seem to lose 3 pages somewhere. Allow 1M of slack. */
+       if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) {
                printk(KERN_ERR
        "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n",
                        (pxmram << PAGE_SHIFT) >> 20,
@@ -357,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes)
        return 1;
 }
 
-static void __init unparse_node(int node)
-{
-       int i;
-       node_clear(node, nodes_parsed);
-       node_clear(node, cpu_nodes_parsed);
-       for (i = 0; i < MAX_LOCAL_APIC; i++) {
-               if (apicid_to_node[i] == node)
-                       apicid_to_node[i] = NUMA_NO_NODE;
-       }
-}
-
 void __init acpi_numa_arch_fixup(void) {}
 
 /* Use the information discovered above to actually set up the nodes. */
@@ -379,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
                return -1;
 
        /* First clean up the node list */
-       for (i = 0; i < MAX_NUMNODES; i++) {
+       for (i = 0; i < MAX_NUMNODES; i++)
                cutoff_node(i, start, end);
-               /*
-                * don't confuse VM with a node that doesn't have the
-                * minimum memory.
-                */
-               if (nodes[i].end &&
-                       (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) {
-                       unparse_node(i);
-                       node_set_offline(i);
-               }
-       }
 
        if (!nodes_cover_memory(nodes)) {
                bad_srat();
@@ -423,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
 
                if (node == NUMA_NO_NODE)
                        continue;
-               if (!node_isset(node, node_possible_map))
+               if (!node_online(node))
                        numa_clear_node(i);
        }
        numa_init_array();
@@ -510,26 +466,6 @@ static int null_slit_node_compare(int a, int b)
 }
 #endif /* CONFIG_NUMA_EMU */
 
-void __init srat_reserve_add_area(int nodeid)
-{
-       if (found_add_area && nodes_add[nodeid].end) {
-               u64 total_mb;
-
-               printk(KERN_INFO "SRAT: Reserving hot-add memory space "
-                               "for node %d at %Lx-%Lx\n",
-                       nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end);
-               total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start)
-                                       >> PAGE_SHIFT;
-               total_mb *= sizeof(struct page);
-               total_mb >>= 20;
-               printk(KERN_INFO "SRAT: This will cost you %Lu MB of "
-                               "pre-allocated memory.\n", (unsigned long long)total_mb);
-               reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start,
-                              nodes_add[nodeid].end - nodes_add[nodeid].start,
-                              BOOTMEM_DEFAULT);
-       }
-}
-
 int __node_distance(int a, int b)
 {
        int index;
index 202864a..3b285e6 100644 (file)
@@ -356,14 +356,11 @@ static void exit_sysfs(void)
 #define exit_sysfs() do { } while (0)
 #endif /* CONFIG_PM */
 
-static int p4force;
-module_param(p4force, int, 0);
-
 static int __init p4_init(char **cpu_type)
 {
        __u8 cpu_model = boot_cpu_data.x86_model;
 
-       if (!p4force && (cpu_model > 6 || cpu_model == 5))
+       if (cpu_model > 6 || cpu_model == 5)
                return 0;
 
 #ifndef CONFIG_SMP
@@ -389,10 +386,25 @@ static int __init p4_init(char **cpu_type)
        return 0;
 }
 
+static int force_arch_perfmon;
+static int force_cpu_type(const char *str, struct kernel_param *kp)
+{
+       if (!strcmp(str, "archperfmon")) {
+               force_arch_perfmon = 1;
+               printk(KERN_INFO "oprofile: forcing architectural perfmon\n");
+       }
+
+       return 0;
+}
+module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0);
+
 static int __init ppro_init(char **cpu_type)
 {
        __u8 cpu_model = boot_cpu_data.x86_model;
 
+       if (force_arch_perfmon && cpu_has_arch_perfmon)
+               return 0;
+
        switch (cpu_model) {
        case 0 ... 2:
                *cpu_type = "i386/ppro";
@@ -414,6 +426,13 @@ static int __init ppro_init(char **cpu_type)
        case 15: case 23:
                *cpu_type = "i386/core_2";
                break;
+       case 26:
+               arch_perfmon_setup_counters();
+               *cpu_type = "i386/core_i7";
+               break;
+       case 28:
+               *cpu_type = "i386/atom";
+               break;
        default:
                /* Unknown */
                return 0;
index fecbce6..0696d50 100644 (file)
@@ -889,6 +889,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign)
                return 0;
        }
 
+       if (io_apic_assign_pci_irqs)
+               return 0;
+
        /* Find IRQ routing entry */
 
        if (!pirq_table)
@@ -1039,56 +1042,15 @@ static void __init pcibios_fixup_irqs(void)
                pirq_penalty[dev->irq]++;
        }
 
+       if (io_apic_assign_pci_irqs)
+               return;
+
        dev = NULL;
        while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
                pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
                if (!pin)
                        continue;
 
-#ifdef CONFIG_X86_IO_APIC
-               /*
-                * Recalculate IRQ numbers if we use the I/O APIC.
-                */
-               if (io_apic_assign_pci_irqs) {
-                       int irq;
-
-                       /*
-                        * interrupt pins are numbered starting from 1
-                        */
-                       irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-                               PCI_SLOT(dev->devfn), pin - 1);
-                       /*
-                        * Busses behind bridges are typically not listed in the
-                        * MP-table.  In this case we have to look up the IRQ
-                        * based on the parent bus, parent slot, and pin number.
-                        * The SMP code detects such bridged busses itself so we
-                        * should get into this branch reliably.
-                        */
-                       if (irq < 0 && dev->bus->parent) {
-                               /* go back to the bridge */
-                               struct pci_dev *bridge = dev->bus->self;
-                               int bus;
-
-                               pin = pci_swizzle_interrupt_pin(dev, pin);
-                               bus = bridge->bus->number;
-                               irq = IO_APIC_get_PCI_irq_vector(bus,
-                                               PCI_SLOT(bridge->devfn), pin - 1);
-                               if (irq >= 0)
-                                       dev_warn(&dev->dev,
-                                               "using bridge %s INT %c to "
-                                                       "get IRQ %d\n",
-                                                pci_name(bridge),
-                                                'A' + pin - 1, irq);
-                       }
-                       if (irq >= 0) {
-                               dev_info(&dev->dev,
-                                       "PCI->APIC IRQ transform: INT %c "
-                                               "-> IRQ %d\n",
-                                       'A' + pin - 1, irq);
-                               dev->irq = irq;
-                       }
-               }
-#endif
                /*
                 * Still no IRQ? Try to lookup one...
                 */
@@ -1183,6 +1145,19 @@ int __init pcibios_irq_init(void)
        pcibios_enable_irq = pirq_enable_irq;
 
        pcibios_fixup_irqs();
+
+       if (io_apic_assign_pci_irqs && pci_routeirq) {
+               struct pci_dev *dev = NULL;
+               /*
+                * PCI IRQ routing is set up by pci_enable_device(), but we
+                * also do it here in case there are still broken drivers that
+                * don't use pci_enable_device().
+                */
+               printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
+               for_each_pci_dev(dev)
+                       pirq_enable_irq(dev);
+       }
+
        return 0;
 }
 
@@ -1213,16 +1188,23 @@ void pcibios_penalize_isa_irq(int irq, int active)
 static int pirq_enable_irq(struct pci_dev *dev)
 {
        u8 pin;
-       struct pci_dev *temp_dev;
 
        pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
-       if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+       if (pin && !pcibios_lookup_irq(dev, 1)) {
                char *msg = "";
 
+               if (!io_apic_assign_pci_irqs && dev->irq)
+                       return 0;
+
                if (io_apic_assign_pci_irqs) {
+#ifdef CONFIG_X86_IO_APIC
+                       struct pci_dev *temp_dev;
                        int irq;
+                       struct io_apic_irq_attr irq_attr;
 
-                       irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1);
+                       irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+                                               PCI_SLOT(dev->devfn),
+                                               pin - 1, &irq_attr);
                        /*
                         * Busses behind bridges are typically not listed in the MP-table.
                         * In this case we have to look up the IRQ based on the parent bus,
@@ -1235,7 +1217,8 @@ static int pirq_enable_irq(struct pci_dev *dev)
 
                                pin = pci_swizzle_interrupt_pin(dev, pin);
                                irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
-                                               PCI_SLOT(bridge->devfn), pin - 1);
+                                               PCI_SLOT(bridge->devfn),
+                                               pin - 1, &irq_attr);
                                if (irq >= 0)
                                        dev_warn(&dev->dev, "using bridge %s "
                                                 "INT %c to get IRQ %d\n",
@@ -1245,12 +1228,15 @@ static int pirq_enable_irq(struct pci_dev *dev)
                        }
                        dev = temp_dev;
                        if (irq >= 0) {
+                               io_apic_set_pci_routing(&dev->dev, irq,
+                                                        &irq_attr);
+                               dev->irq = irq;
                                dev_info(&dev->dev, "PCI->APIC IRQ transform: "
                                         "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
-                               dev->irq = irq;
                                return 0;
                        } else
                                msg = "; probably buggy MP table";
+#endif
                } else if (pci_probe & PCI_BIOS_IRQ_SCAN)
                        msg = "";
                else
index 5fa10bb..8766b0e 100644 (file)
@@ -375,7 +375,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res,
                if (!fixmem32)
                        return AE_OK;
                if ((mcfg_res->start >= fixmem32->address) &&
-                   (mcfg_res->end <= (fixmem32->address +
+                   (mcfg_res->end < (fixmem32->address +
                                      fixmem32->address_length))) {
                        mcfg_res->flags = 1;
                        return AE_CTRL_TERMINATE;
@@ -392,7 +392,7 @@ static acpi_status __init check_mcfg_resource(struct acpi_resource *res,
                return AE_OK;
 
        if ((mcfg_res->start >= address.minimum) &&
-           (mcfg_res->end <= (address.minimum + address.address_length))) {
+           (mcfg_res->end < (address.minimum + address.address_length))) {
                mcfg_res->flags = 1;
                return AE_CTRL_TERMINATE;
        }
@@ -418,7 +418,7 @@ static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used)
        struct resource mcfg_res;
 
        mcfg_res.start = start;
-       mcfg_res.end = end;
+       mcfg_res.end = end - 1;
        mcfg_res.flags = 0;
 
        acpi_get_devices("PNP0C01", find_mboard_resource, &mcfg_res, NULL);
index 7133cdf..cac0833 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/sched.h>
 #include <linux/init.h>
 #include <linux/random.h>
+#include <linux/elf.h>
 #include <asm/vsyscall.h>
 #include <asm/vgtod.h>
 #include <asm/proto.h>
index f09e8c3..0a1700a 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/start_kernel.h>
 #include <linux/sched.h>
+#include <linux/kprobes.h>
 #include <linux/bootmem.h>
 #include <linux/module.h>
 #include <linux/mm.h>
@@ -44,6 +45,7 @@
 #include <asm/processor.h>
 #include <asm/proto.h>
 #include <asm/msr-index.h>
+#include <asm/traps.h>
 #include <asm/setup.h>
 #include <asm/desc.h>
 #include <asm/pgtable.h>
@@ -240,10 +242,10 @@ static unsigned long xen_get_debugreg(int reg)
        return HYPERVISOR_get_debugreg(reg);
 }
 
-void xen_leave_lazy(void)
+static void xen_end_context_switch(struct task_struct *next)
 {
-       paravirt_leave_lazy(paravirt_get_lazy_mode());
        xen_mc_flush();
+       paravirt_end_context_switch(next);
 }
 
 static unsigned long xen_store_tr(void)
@@ -428,11 +430,44 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
 static int cvt_gate_to_trap(int vector, const gate_desc *val,
                            struct trap_info *info)
 {
+       unsigned long addr;
+
        if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT)
                return 0;
 
        info->vector = vector;
-       info->address = gate_offset(*val);
+
+       addr = gate_offset(*val);
+#ifdef CONFIG_X86_64
+       /*
+        * Look for known traps using IST, and substitute them
+        * appropriately.  The debugger ones are the only ones we care
+        * about.  Xen will handle faults like double_fault and
+        * machine_check, so we should never see them.  Warn if
+        * there's an unexpected IST-using fault handler.
+        */
+       if (addr == (unsigned long)debug)
+               addr = (unsigned long)xen_debug;
+       else if (addr == (unsigned long)int3)
+               addr = (unsigned long)xen_int3;
+       else if (addr == (unsigned long)stack_segment)
+               addr = (unsigned long)xen_stack_segment;
+       else if (addr == (unsigned long)double_fault ||
+                addr == (unsigned long)nmi) {
+               /* Don't need to handle these */
+               return 0;
+#ifdef CONFIG_X86_MCE
+       } else if (addr == (unsigned long)machine_check) {
+               return 0;
+#endif
+       } else {
+               /* Some other trap using IST? */
+               if (WARN_ON(val->ist != 0))
+                       return 0;
+       }
+#endif /* CONFIG_X86_64 */
+       info->address = addr;
+
        info->cs = gate_segment(*val);
        info->flags = val->dpl;
        /* interrupt gates clear IF */
@@ -623,10 +658,26 @@ static void xen_clts(void)
        xen_mc_issue(PARAVIRT_LAZY_CPU);
 }
 
+static DEFINE_PER_CPU(unsigned long, xen_cr0_value);
+
+static unsigned long xen_read_cr0(void)
+{
+       unsigned long cr0 = percpu_read(xen_cr0_value);
+
+       if (unlikely(cr0 == 0)) {
+               cr0 = native_read_cr0();
+               percpu_write(xen_cr0_value, cr0);
+       }
+
+       return cr0;
+}
+
 static void xen_write_cr0(unsigned long cr0)
 {
        struct multicall_space mcs;
 
+       percpu_write(xen_cr0_value, cr0);
+
        /* Only pay attention to cr0.TS; everything else is
           ignored. */
        mcs = xen_mc_entry(0);
@@ -812,7 +863,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 
        .clts = xen_clts,
 
-       .read_cr0 = native_read_cr0,
+       .read_cr0 = xen_read_cr0,
        .write_cr0 = xen_write_cr0,
 
        .read_cr4 = native_read_cr4,
@@ -860,10 +911,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
        /* Xen takes care of %gs when switching to usermode for us */
        .swapgs = paravirt_nop,
 
-       .lazy_mode = {
-               .enter = paravirt_enter_lazy_cpu,
-               .leave = xen_leave_lazy,
-       },
+       .start_context_switch = paravirt_start_context_switch,
+       .end_context_switch = xen_end_context_switch,
 };
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
index fba55b1..4ceb285 100644 (file)
@@ -452,10 +452,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
                    pte_t *ptep, pte_t pteval)
 {
-       /* updates to init_mm may be done without lock */
-       if (mm == &init_mm)
-               preempt_disable();
-
        ADD_STATS(set_pte_at, 1);
 //     ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep));
        ADD_STATS(set_pte_at_current, mm == current->mm);
@@ -476,9 +472,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
        }
        xen_set_pte(ptep, pteval);
 
-out:
-       if (mm == &init_mm)
-               preempt_enable();
+out:   return;
 }
 
 pte_t xen_ptep_modify_prot_start(struct mm_struct *mm,
@@ -1152,10 +1146,8 @@ static void drop_other_mm_ref(void *info)
 
        /* If this cpu still has a stale cr3 reference, then make sure
           it has been flushed. */
-       if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
+       if (percpu_read(xen_current_cr3) == __pa(mm->pgd))
                load_cr3(swapper_pg_dir);
-               arch_flush_lazy_cpu_mode();
-       }
 }
 
 static void xen_drop_mm_ref(struct mm_struct *mm)
@@ -1168,7 +1160,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm)
                        load_cr3(swapper_pg_dir);
                else
                        leave_mm(smp_processor_id());
-               arch_flush_lazy_cpu_mode();
        }
 
        /* Get the "official" set of cpus referring to our pagetable. */
@@ -1876,6 +1867,14 @@ __init void xen_post_allocator_init(void)
        xen_mark_init_mm_pinned();
 }
 
+static void xen_leave_lazy_mmu(void)
+{
+       preempt_disable();
+       xen_mc_flush();
+       paravirt_leave_lazy_mmu();
+       preempt_enable();
+}
+
 const struct pv_mmu_ops xen_mmu_ops __initdata = {
        .pagetable_setup_start = xen_pagetable_setup_start,
        .pagetable_setup_done = xen_pagetable_setup_done,
@@ -1949,7 +1948,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = {
 
        .lazy_mode = {
                .enter = paravirt_enter_lazy_mmu,
-               .leave = xen_leave_lazy,
+               .leave = xen_leave_lazy_mmu,
        },
 
        .set_fixmap = xen_set_fixmap,
index 15c6c68..ad0047f 100644 (file)
@@ -61,9 +61,9 @@ char * __init xen_memory_setup(void)
         *  - xen_start_info
         * See comment above "struct start_info" in <xen/interface/xen.h>
         */
-       e820_add_region(__pa(xen_start_info->mfn_list),
-                       xen_start_info->pt_base - xen_start_info->mfn_list,
-                       E820_RESERVED);
+       reserve_early(__pa(xen_start_info->mfn_list),
+                     __pa(xen_start_info->pt_base),
+                       "XEN START INFO");
 
        sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
index ca6596b..22494fd 100644 (file)
@@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_ident_map_ISA(void);
 void xen_reserve_top(void);
 
-void xen_leave_lazy(void);
 void xen_post_allocator_init(void);
 
 char * __init xen_memory_setup(void);
index c89883b..648f15c 100644 (file)
 #include <linux/task_io_accounting_ops.h>
 #include <linux/blktrace_api.h>
 #include <linux/fault-inject.h>
-#include <trace/block.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/block.h>
 
 #include "blk.h"
 
-DEFINE_TRACE(block_plug);
-DEFINE_TRACE(block_unplug_io);
-DEFINE_TRACE(block_unplug_timer);
-DEFINE_TRACE(block_getrq);
-DEFINE_TRACE(block_sleeprq);
-DEFINE_TRACE(block_rq_requeue);
-DEFINE_TRACE(block_bio_backmerge);
-DEFINE_TRACE(block_bio_frontmerge);
-DEFINE_TRACE(block_bio_queue);
-DEFINE_TRACE(block_rq_complete);
-DEFINE_TRACE(block_remap);     /* Also used in drivers/md/dm.c */
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
 
@@ -1277,7 +1269,7 @@ static inline void blk_partition_remap(struct bio *bio)
                bio->bi_bdev = bdev->bd_contains;
 
                trace_block_remap(bdev_get_queue(bio->bi_bdev), bio,
-                                   bdev->bd_dev, bio->bi_sector,
+                                   bdev->bd_dev,
                                    bio->bi_sector - p->start_sect);
        }
 }
@@ -1446,8 +1438,7 @@ static inline void __generic_make_request(struct bio *bio)
                        goto end_io;
 
                if (old_sector != -1)
-                       trace_block_remap(q, bio, old_dev, bio->bi_sector,
-                                           old_sector);
+                       trace_block_remap(q, bio, old_dev, old_sector);
 
                trace_block_bio_queue(q, bio);
 
@@ -1741,10 +1732,14 @@ static int __end_that_request_first(struct request *req, int error,
        trace_block_rq_complete(req->q, req);
 
        /*
-        * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
-        * sense key with us all the way through
+        * For fs requests, rq is just carrier of independent bio's
+        * and each partial completion should be handled separately.
+        * Reset per-request error on each partial completion.
+        *
+        * TODO: tj: This is too subtle.  It would be better to let
+        * low level drivers do what they see fit.
         */
-       if (!blk_pc_request(req))
+       if (blk_fs_request(req))
                req->errors = 0;
 
        if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) {
index 3ff9bba..26f9ec2 100644 (file)
@@ -383,16 +383,21 @@ struct kobj_type blk_queue_ktype = {
 int blk_register_queue(struct gendisk *disk)
 {
        int ret;
+       struct device *dev = disk_to_dev(disk);
 
        struct request_queue *q = disk->queue;
 
        if (WARN_ON(!q))
                return -ENXIO;
 
+       ret = blk_trace_init_sysfs(dev);
+       if (ret)
+               return ret;
+
        if (!q->request_fn)
                return 0;
 
-       ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj),
+       ret = kobject_add(&q->kobj, kobject_get(&dev->kobj),
                          "%s", "queue");
        if (ret < 0)
                return ret;
index 206060e..dd81be4 100644 (file)
@@ -315,6 +315,7 @@ out:
        blk_put_request(rq);
        if (next_rq) {
                blk_rq_unmap_user(next_rq->bio);
+               next_rq->bio = NULL;
                blk_put_request(next_rq);
        }
        return ERR_PTR(ret);
@@ -448,6 +449,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
                hdr->dout_resid = rq->data_len;
                hdr->din_resid = rq->next_rq->data_len;
                blk_rq_unmap_user(bidi_bio);
+               rq->next_rq->bio = NULL;
                blk_put_request(rq->next_rq);
        } else if (rq_data_dir(rq) == READ)
                hdr->din_resid = rq->data_len;
@@ -466,6 +468,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr,
        blk_rq_unmap_user(bio);
        if (rq->cmd != rq->__cmd)
                kfree(rq->cmd);
+       rq->bio = NULL;
        blk_put_request(rq);
 
        return ret;
index f87615d..f8c218c 100644 (file)
@@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg)
        memcpy(&buts.name, &cbuts.name, 32);
 
        mutex_lock(&bdev->bd_mutex);
-       ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts);
+       ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts);
        mutex_unlock(&bdev->bd_mutex);
        if (ret)
                return ret;
index 7073a90..e220f0c 100644 (file)
 #include <linux/compiler.h>
 #include <linux/delay.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <linux/hash.h>
 #include <linux/uaccess.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 
 static DEFINE_SPINLOCK(elv_list_lock);
 static LIST_HEAD(elv_list);
 
-DEFINE_TRACE(block_rq_abort);
-
 /*
  * Merge hash stuff.
  */
@@ -55,9 +54,6 @@ static const int elv_hash_shift = 6;
 #define rq_hash_key(rq)                ((rq)->sector + (rq)->nr_sectors)
 #define ELV_ON_HASH(rq)                (!hlist_unhashed(&(rq)->hash))
 
-DEFINE_TRACE(block_rq_insert);
-DEFINE_TRACE(block_rq_issue);
-
 /*
  * Query io scheduler to see if the current process issuing bio may be
  * merged with rq.
index 51b9f82..2faa9e2 100644 (file)
@@ -401,7 +401,8 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
                /* Interrupt Line values above 0xF are forbidden */
                if (dev->irq > 0 && (dev->irq <= 0xF)) {
                        printk(" - using IRQ %d\n", dev->irq);
-                       acpi_register_gsi(dev->irq, ACPI_LEVEL_SENSITIVE,
+                       acpi_register_gsi(&dev->dev, dev->irq,
+                                         ACPI_LEVEL_SENSITIVE,
                                          ACPI_ACTIVE_LOW);
                        return 0;
                } else {
@@ -410,7 +411,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
                }
        }
 
-       rc = acpi_register_gsi(gsi, triggering, polarity);
+       rc = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
        if (rc < 0) {
                dev_warn(&dev->dev, "PCI INT %c: failed to register GSI\n",
                         pin_name(pin));
index 45ad328..23f0fb8 100644 (file)
@@ -844,7 +844,7 @@ static int acpi_processor_add(struct acpi_device *device)
        if (!pr)
                return -ENOMEM;
 
-       if (!alloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
+       if (!zalloc_cpumask_var(&pr->throttling.shared_cpu_map, GFP_KERNEL)) {
                kfree(pr);
                return -ENOMEM;
        }
index 08186ec..6b91c26 100644 (file)
@@ -220,6 +220,7 @@ enum {
        AHCI_HFLAG_NO_HOTPLUG           = (1 << 7), /* ignore PxSERR.DIAG.N */
        AHCI_HFLAG_SECT255              = (1 << 8), /* max 255 sectors */
        AHCI_HFLAG_YES_NCQ              = (1 << 9), /* force NCQ cap on */
+       AHCI_HFLAG_NO_SUSPEND           = (1 << 10), /* don't suspend */
 
        /* ap->flags bits */
 
@@ -2316,9 +2317,17 @@ static int ahci_port_suspend(struct ata_port *ap, pm_message_t mesg)
 static int ahci_pci_device_suspend(struct pci_dev *pdev, pm_message_t mesg)
 {
        struct ata_host *host = dev_get_drvdata(&pdev->dev);
+       struct ahci_host_priv *hpriv = host->private_data;
        void __iomem *mmio = host->iomap[AHCI_PCI_BAR];
        u32 ctl;
 
+       if (mesg.event & PM_EVENT_SUSPEND &&
+           hpriv->flags & AHCI_HFLAG_NO_SUSPEND) {
+               dev_printk(KERN_ERR, &pdev->dev,
+                          "BIOS update required for suspend/resume\n");
+               return -EIO;
+       }
+
        if (mesg.event & PM_EVENT_SLEEP) {
                /* AHCI spec rev1.1 section 8.3.3:
                 * Software must disable interrupts prior to requesting a
@@ -2610,6 +2619,63 @@ static bool ahci_broken_system_poweroff(struct pci_dev *pdev)
        return false;
 }
 
+static bool ahci_broken_suspend(struct pci_dev *pdev)
+{
+       static const struct dmi_system_id sysids[] = {
+               /*
+                * On HP dv[4-6] and HDX18 with earlier BIOSen, link
+                * to the harddisk doesn't become online after
+                * resuming from STR.  Warn and fail suspend.
+                */
+               {
+                       .ident = "dv4",
+                       .matches = {
+                               DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+                               DMI_MATCH(DMI_PRODUCT_NAME,
+                                         "HP Pavilion dv4 Notebook PC"),
+                       },
+                       .driver_data = "F.30", /* cutoff BIOS version */
+               },
+               {
+                       .ident = "dv5",
+                       .matches = {
+                               DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+                               DMI_MATCH(DMI_PRODUCT_NAME,
+                                         "HP Pavilion dv5 Notebook PC"),
+                       },
+                       .driver_data = "F.16", /* cutoff BIOS version */
+               },
+               {
+                       .ident = "dv6",
+                       .matches = {
+                               DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+                               DMI_MATCH(DMI_PRODUCT_NAME,
+                                         "HP Pavilion dv6 Notebook PC"),
+                       },
+                       .driver_data = "F.21",  /* cutoff BIOS version */
+               },
+               {
+                       .ident = "HDX18",
+                       .matches = {
+                               DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+                               DMI_MATCH(DMI_PRODUCT_NAME,
+                                         "HP HDX18 Notebook PC"),
+                       },
+                       .driver_data = "F.23",  /* cutoff BIOS version */
+               },
+               { }     /* terminate list */
+       };
+       const struct dmi_system_id *dmi = dmi_first_match(sysids);
+       const char *ver;
+
+       if (!dmi || pdev->bus->number || pdev->devfn != PCI_DEVFN(0x1f, 2))
+               return false;
+
+       ver = dmi_get_system_info(DMI_BIOS_VERSION);
+
+       return !ver || strcmp(ver, dmi->driver_data) < 0;
+}
+
 static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
        static int printed_version;
@@ -2715,6 +2781,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
                        "quirky BIOS, skipping spindown on poweroff\n");
        }
 
+       if (ahci_broken_suspend(pdev)) {
+               hpriv->flags |= AHCI_HFLAG_NO_SUSPEND;
+               dev_printk(KERN_WARNING, &pdev->dev,
+                          "BIOS update required for suspend/resume\n");
+       }
+
        /* CAP.NP sometimes indicate the index of the last enabled
         * port, at other times, that of the last possible port, so
         * determining the maximum port number requires looking at
index d51a17c..1aeb708 100644 (file)
@@ -1455,6 +1455,15 @@ static bool piix_broken_system_poweroff(struct pci_dev *pdev)
                        /* PCI slot number of the controller */
                        .driver_data = (void *)0x1FUL,
                },
+               {
+                       .ident = "HP Compaq nc6000",
+                       .matches = {
+                               DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
+                               DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6000"),
+                       },
+                       /* PCI slot number of the controller */
+                       .driver_data = (void *)0x1FUL,
+               },
 
                { }     /* terminate list */
        };
index 751b7ea..fc9c5d6 100644 (file)
@@ -497,14 +497,16 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        };
        /* Revision 0x20 added DMA */
        static const struct ata_port_info info_20 = {
-               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+                                                       ATA_FLAG_IGN_SIMPLEX,
                .pio_mask = ATA_PIO4,
                .mwdma_mask = ATA_MWDMA2,
                .port_ops = &ali_20_port_ops
        };
        /* Revision 0x20 with support logic added UDMA */
        static const struct ata_port_info info_20_udma = {
-               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+                                                       ATA_FLAG_IGN_SIMPLEX,
                .pio_mask = ATA_PIO4,
                .mwdma_mask = ATA_MWDMA2,
                .udma_mask = ATA_UDMA2,
@@ -512,7 +514,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        };
        /* Revision 0xC2 adds UDMA66 */
        static const struct ata_port_info info_c2 = {
-               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+                                                       ATA_FLAG_IGN_SIMPLEX,
                .pio_mask = ATA_PIO4,
                .mwdma_mask = ATA_MWDMA2,
                .udma_mask = ATA_UDMA4,
@@ -520,7 +523,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        };
        /* Revision 0xC3 is UDMA66 for now */
        static const struct ata_port_info info_c3 = {
-               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+                                                       ATA_FLAG_IGN_SIMPLEX,
                .pio_mask = ATA_PIO4,
                .mwdma_mask = ATA_MWDMA2,
                .udma_mask = ATA_UDMA4,
@@ -528,7 +532,8 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        };
        /* Revision 0xC4 is UDMA100 */
        static const struct ata_port_info info_c4 = {
-               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48,
+               .flags = ATA_FLAG_SLAVE_POSS | ATA_FLAG_PIO_LBA48 |
+                                                       ATA_FLAG_IGN_SIMPLEX,
                .pio_mask = ATA_PIO4,
                .mwdma_mask = ATA_MWDMA2,
                .udma_mask = ATA_UDMA5,
@@ -536,7 +541,7 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
        };
        /* Revision 0xC5 is UDMA133 with LBA48 DMA */
        static const struct ata_port_info info_c5 = {
-               .flags = ATA_FLAG_SLAVE_POSS,
+               .flags = ATA_FLAG_SLAVE_POSS |  ATA_FLAG_IGN_SIMPLEX,
                .pio_mask = ATA_PIO4,
                .mwdma_mask = ATA_MWDMA2,
                .udma_mask = ATA_UDMA6,
index 2085e0a..2a6412f 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/ata.h>
 
 #define DRV_NAME       "pata_efar"
-#define DRV_VERSION    "0.4.4"
+#define DRV_VERSION    "0.4.5"
 
 /**
  *     efar_pre_reset  -       Enable bits
@@ -98,18 +98,17 @@ static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev)
                            { 2, 1 },
                            { 2, 3 }, };
 
-       if (pio > 2)
-               control |= 1;   /* TIME1 enable */
+       if (pio > 1)
+               control |= 1;   /* TIME */
        if (ata_pio_need_iordy(adev))   /* PIO 3/4 require IORDY */
-               control |= 2;   /* IE enable */
-       /* Intel specifies that the PPE functionality is for disk only */
+               control |= 2;   /* IE */
+       /* Intel specifies that the prefetch/posting is for disk only */
        if (adev->class == ATA_DEV_ATA)
-               control |= 4;   /* PPE enable */
+               control |= 4;   /* PPE */
 
        pci_read_config_word(dev, idetm_port, &idetm_data);
 
-       /* Enable PPE, IE and TIME as appropriate */
-
+       /* Set PPE, IE, and TIME as appropriate */
        if (adev->devno == 0) {
                idetm_data &= 0xCCF0;
                idetm_data |= control;
@@ -129,7 +128,7 @@ static void efar_set_piomode (struct ata_port *ap, struct ata_device *adev)
                pci_write_config_byte(dev, 0x44, slave_data);
        }
 
-       idetm_data |= 0x4000;   /* Ensure SITRE is enabled */
+       idetm_data |= 0x4000;   /* Ensure SITRE is set */
        pci_write_config_word(dev, idetm_port, idetm_data);
 }
 
index f72c6c5..6932e56 100644 (file)
@@ -48,6 +48,7 @@
  *
  */
 
+#include <linux/async.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
@@ -1028,6 +1029,7 @@ static __init int legacy_init_one(struct legacy_probe *probe)
                                &legacy_sht);
        if (ret)
                goto fail;
+       async_synchronize_full();
        ld->platform_dev = pdev;
 
        /* Nothing found means we drop the port as its probably not there */
index 9a69809..f0d52f7 100644 (file)
@@ -26,7 +26,7 @@ static unsigned int netcell_read_id(struct ata_device *adev,
        unsigned int err_mask = ata_do_dev_read_id(adev, tf, id);
        /* Firmware forgets to mark words 85-87 valid */
        if (err_mask == 0)
-               id[ATA_ID_CSF_DEFAULT] |= 0x0400;
+               id[ATA_ID_CSF_DEFAULT] |= 0x4000;
        return err_mask;
 }
 
index af761dc..4895f0e 100644 (file)
@@ -277,8 +277,8 @@ static int hci_uart_tty_open(struct tty_struct *tty)
        /* FIXME: why is this needed. Note don't use ldisc_ref here as the
           open path is before the ldisc is referencable */
 
-       if (tty->ldisc.ops->flush_buffer)
-               tty->ldisc.ops->flush_buffer(tty);
+       if (tty->ldisc->ops->flush_buffer)
+               tty->ldisc->ops->flush_buffer(tty);
        tty_driver_flush_buffer(tty);
 
        return 0;
@@ -463,7 +463,6 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file * file,
                                clear_bit(HCI_UART_PROTO_SET, &hu->flags);
                                return err;
                        }
-                       tty->low_latency = 1;
                } else
                        return -EBUSY;
                break;
index 735bbe2..02ecfd5 100644 (file)
@@ -97,6 +97,19 @@ config DEVKMEM
          kind of kernel debugging operations.
          When in doubt, say "N".
 
+config BFIN_JTAG_COMM
+       tristate "Blackfin JTAG Communication"
+       depends on BLACKFIN
+       help
+         Add support for emulating a TTY device over the Blackfin JTAG.
+
+         To compile this driver as a module, choose M here: the
+         module will be called bfin_jtag_comm.
+
+config BFIN_JTAG_COMM_CONSOLE
+       bool "Console on Blackfin JTAG"
+       depends on BFIN_JTAG_COMM=y
+
 config SERIAL_NONSTANDARD
        bool "Non-standard serial port support"
        depends on HAS_IOMEM
index 9caf5b5..189efcf 100644 (file)
@@ -13,6 +13,7 @@ obj-$(CONFIG_LEGACY_PTYS)     += pty.o
 obj-$(CONFIG_UNIX98_PTYS)      += pty.o
 obj-y                          += misc.o
 obj-$(CONFIG_VT)               += vt_ioctl.o vc_screen.o selection.o keyboard.o
+obj-$(CONFIG_BFIN_JTAG_COMM)   += bfin_jtag_comm.o
 obj-$(CONFIG_CONSOLE_TRANSLATIONS) += consolemap.o consolemap_deftbl.o
 obj-$(CONFIG_HW_CONSOLE)       += vt.o defkeymap.o
 obj-$(CONFIG_AUDIT)            += tty_audit.o
diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c
new file mode 100644 (file)
index 0000000..44c113d
--- /dev/null
@@ -0,0 +1,365 @@
+/*
+ * TTY over Blackfin JTAG Communication
+ *
+ * Copyright 2008-2009 Analog Devices Inc.
+ *
+ * Enter bugs at http://blackfin.uclinux.org/
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/circ_buf.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+#include <asm/atomic.h>
+
+/* See the Debug/Emulation chapter in the HRM */
+#define EMUDOF   0x00000001    /* EMUDAT_OUT full & valid */
+#define EMUDIF   0x00000002    /* EMUDAT_IN full & valid */
+#define EMUDOOVF 0x00000004    /* EMUDAT_OUT overflow */
+#define EMUDIOVF 0x00000008    /* EMUDAT_IN overflow */
+
+#define DRV_NAME "bfin-jtag-comm"
+#define DEV_NAME "ttyBFJC"
+
+#define pr_init(fmt, args...) ({ static const __initdata char __fmt[] = fmt; printk(__fmt, ## args); })
+#define debug(fmt, args...) pr_debug(DRV_NAME ": " fmt, ## args)
+
+static inline uint32_t bfin_write_emudat(uint32_t emudat)
+{
+       __asm__ __volatile__("emudat = %0;" : : "d"(emudat));
+       return emudat;
+}
+
+static inline uint32_t bfin_read_emudat(void)
+{
+       uint32_t emudat;
+       __asm__ __volatile__("%0 = emudat;" : "=d"(emudat));
+       return emudat;
+}
+
+static inline uint32_t bfin_write_emudat_chars(char a, char b, char c, char d)
+{
+       return bfin_write_emudat((a << 0) | (b << 8) | (c << 16) | (d << 24));
+}
+
+#define CIRC_SIZE 2048 /* see comment in tty_io.c:do_tty_write() */
+#define CIRC_MASK (CIRC_SIZE - 1)
+#define circ_empty(circ)     ((circ)->head == (circ)->tail)
+#define circ_free(circ)      CIRC_SPACE((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_cnt(circ)       CIRC_CNT((circ)->head, (circ)->tail, CIRC_SIZE)
+#define circ_byte(circ, idx) ((circ)->buf[(idx) & CIRC_MASK])
+
+static struct tty_driver *bfin_jc_driver;
+static struct task_struct *bfin_jc_kthread;
+static struct tty_struct * volatile bfin_jc_tty;
+static unsigned long bfin_jc_count;
+static DEFINE_MUTEX(bfin_jc_tty_mutex);
+static volatile struct circ_buf bfin_jc_write_buf;
+
+static int
+bfin_jc_emudat_manager(void *arg)
+{
+       uint32_t inbound_len = 0, outbound_len = 0;
+
+       while (!kthread_should_stop()) {
+               /* no one left to give data to, so sleep */
+               if (bfin_jc_tty == NULL && circ_empty(&bfin_jc_write_buf)) {
+                       debug("waiting for readers\n");
+                       __set_current_state(TASK_UNINTERRUPTIBLE);
+                       schedule();
+                       __set_current_state(TASK_RUNNING);
+               }
+
+               /* no data available, so just chill */
+               if (!(bfin_read_DBGSTAT() & EMUDIF) && circ_empty(&bfin_jc_write_buf)) {
+                       debug("waiting for data (in_len = %i) (circ: %i %i)\n",
+                               inbound_len, bfin_jc_write_buf.tail, bfin_jc_write_buf.head);
+                       if (inbound_len)
+                               schedule();
+                       else
+                               schedule_timeout_interruptible(HZ);
+                       continue;
+               }
+
+               /* if incoming data is ready, eat it */
+               if (bfin_read_DBGSTAT() & EMUDIF) {
+                       struct tty_struct *tty;
+                       mutex_lock(&bfin_jc_tty_mutex);
+                       tty = (struct tty_struct *)bfin_jc_tty;
+                       if (tty != NULL) {
+                               uint32_t emudat = bfin_read_emudat();
+                               if (inbound_len == 0) {
+                                       debug("incoming length: 0x%08x\n", emudat);
+                                       inbound_len = emudat;
+                               } else {
+                                       size_t num_chars = (4 <= inbound_len ? 4 : inbound_len);
+                                       debug("  incoming data: 0x%08x (pushing %zu)\n", emudat, num_chars);
+                                       inbound_len -= num_chars;
+                                       tty_insert_flip_string(tty, (unsigned char *)&emudat, num_chars);
+                                       tty_flip_buffer_push(tty);
+                               }
+                       }
+                       mutex_unlock(&bfin_jc_tty_mutex);
+               }
+
+               /* if outgoing data is ready, post it */
+               if (!(bfin_read_DBGSTAT() & EMUDOF) && !circ_empty(&bfin_jc_write_buf)) {
+                       if (outbound_len == 0) {
+                               outbound_len = circ_cnt(&bfin_jc_write_buf);
+                               bfin_write_emudat(outbound_len);
+                               debug("outgoing length: 0x%08x\n", outbound_len);
+                       } else {
+                               struct tty_struct *tty;
+                               int tail = bfin_jc_write_buf.tail;
+                               size_t ate = (4 <= outbound_len ? 4 : outbound_len);
+                               uint32_t emudat =
+                               bfin_write_emudat_chars(
+                                       circ_byte(&bfin_jc_write_buf, tail + 0),
+                                       circ_byte(&bfin_jc_write_buf, tail + 1),
+                                       circ_byte(&bfin_jc_write_buf, tail + 2),
+                                       circ_byte(&bfin_jc_write_buf, tail + 3)
+                               );
+                               bfin_jc_write_buf.tail += ate;
+                               outbound_len -= ate;
+                               mutex_lock(&bfin_jc_tty_mutex);
+                               tty = (struct tty_struct *)bfin_jc_tty;
+                               if (tty)
+                                       tty_wakeup(tty);
+                               mutex_unlock(&bfin_jc_tty_mutex);
+                               debug("  outgoing data: 0x%08x (pushing %zu)\n", emudat, ate);
+                       }
+               }
+       }
+
+       __set_current_state(TASK_RUNNING);
+       return 0;
+}
+
+static int
+bfin_jc_open(struct tty_struct *tty, struct file *filp)
+{
+       mutex_lock(&bfin_jc_tty_mutex);
+       debug("open %lu\n", bfin_jc_count);
+       ++bfin_jc_count;
+       bfin_jc_tty = tty;
+       wake_up_process(bfin_jc_kthread);
+       mutex_unlock(&bfin_jc_tty_mutex);
+       return 0;
+}
+
+static void
+bfin_jc_close(struct tty_struct *tty, struct file *filp)
+{
+       mutex_lock(&bfin_jc_tty_mutex);
+       debug("close %lu\n", bfin_jc_count);
+       if (--bfin_jc_count == 0)
+               bfin_jc_tty = NULL;
+       wake_up_process(bfin_jc_kthread);
+       mutex_unlock(&bfin_jc_tty_mutex);
+}
+
+/* XXX: we dont handle the put_char() case where we must handle count = 1 */
+static int
+bfin_jc_circ_write(const unsigned char *buf, int count)
+{
+       int i;
+       count = min(count, circ_free(&bfin_jc_write_buf));
+       debug("going to write chunk of %i bytes\n", count);
+       for (i = 0; i < count; ++i)
+               circ_byte(&bfin_jc_write_buf, bfin_jc_write_buf.head + i) = buf[i];
+       bfin_jc_write_buf.head += i;
+       return i;
+}
+
+#ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE
+# define acquire_console_sem()
+# define release_console_sem()
+#endif
+static int
+bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count)
+{
+       int i;
+       acquire_console_sem();
+       i = bfin_jc_circ_write(buf, count);
+       release_console_sem();
+       wake_up_process(bfin_jc_kthread);
+       return i;
+}
+
+static void
+bfin_jc_flush_chars(struct tty_struct *tty)
+{
+       wake_up_process(bfin_jc_kthread);
+}
+
+static int
+bfin_jc_write_room(struct tty_struct *tty)
+{
+       return circ_free(&bfin_jc_write_buf);
+}
+
+static int
+bfin_jc_chars_in_buffer(struct tty_struct *tty)
+{
+       return circ_cnt(&bfin_jc_write_buf);
+}
+
+static void
+bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+       unsigned long expire = jiffies + timeout;
+       while (!circ_empty(&bfin_jc_write_buf)) {
+               if (signal_pending(current))
+                       break;
+               if (time_after(jiffies, expire))
+                       break;
+       }
+}
+
+static struct tty_operations bfin_jc_ops = {
+       .open            = bfin_jc_open,
+       .close           = bfin_jc_close,
+       .write           = bfin_jc_write,
+       /*.put_char        = bfin_jc_put_char,*/
+       .flush_chars     = bfin_jc_flush_chars,
+       .write_room      = bfin_jc_write_room,
+       .chars_in_buffer = bfin_jc_chars_in_buffer,
+       .wait_until_sent = bfin_jc_wait_until_sent,
+};
+
+static int __init bfin_jc_init(void)
+{
+       int ret;
+
+       bfin_jc_kthread = kthread_create(bfin_jc_emudat_manager, NULL, DRV_NAME);
+       if (IS_ERR(bfin_jc_kthread))
+               return PTR_ERR(bfin_jc_kthread);
+
+       ret = -ENOMEM;
+
+       bfin_jc_write_buf.head = bfin_jc_write_buf.tail = 0;
+       bfin_jc_write_buf.buf = kmalloc(CIRC_SIZE, GFP_KERNEL);
+       if (!bfin_jc_write_buf.buf)
+               goto err;
+
+       bfin_jc_driver = alloc_tty_driver(1);
+       if (!bfin_jc_driver)
+               goto err;
+
+       bfin_jc_driver->owner        = THIS_MODULE;
+       bfin_jc_driver->driver_name  = DRV_NAME;
+       bfin_jc_driver->name         = DEV_NAME;
+       bfin_jc_driver->type         = TTY_DRIVER_TYPE_SERIAL;
+       bfin_jc_driver->subtype      = SERIAL_TYPE_NORMAL;
+       bfin_jc_driver->init_termios = tty_std_termios;
+       tty_set_operations(bfin_jc_driver, &bfin_jc_ops);
+
+       ret = tty_register_driver(bfin_jc_driver);
+       if (ret)
+               goto err;
+
+       pr_init(KERN_INFO DRV_NAME ": initialized\n");
+
+       return 0;
+
+ err:
+       put_tty_driver(bfin_jc_driver);
+       kfree(bfin_jc_write_buf.buf);
+       kthread_stop(bfin_jc_kthread);
+       return ret;
+}
+module_init(bfin_jc_init);
+
+static void __exit bfin_jc_exit(void)
+{
+       kthread_stop(bfin_jc_kthread);
+       kfree(bfin_jc_write_buf.buf);
+       tty_unregister_driver(bfin_jc_driver);
+       put_tty_driver(bfin_jc_driver);
+}
+module_exit(bfin_jc_exit);
+
+#if defined(CONFIG_BFIN_JTAG_COMM_CONSOLE) || defined(CONFIG_EARLY_PRINTK)
+static void
+bfin_jc_straight_buffer_write(const char *buf, unsigned count)
+{
+       unsigned ate = 0;
+       while (bfin_read_DBGSTAT() & EMUDOF)
+               continue;
+       bfin_write_emudat(count);
+       while (ate < count) {
+               while (bfin_read_DBGSTAT() & EMUDOF)
+                       continue;
+               bfin_write_emudat_chars(buf[ate], buf[ate+1], buf[ate+2], buf[ate+3]);
+               ate += 4;
+       }
+}
+#endif
+
+#ifdef CONFIG_BFIN_JTAG_COMM_CONSOLE
+static void
+bfin_jc_console_write(struct console *co, const char *buf, unsigned count)
+{
+       if (bfin_jc_kthread == NULL)
+               bfin_jc_straight_buffer_write(buf, count);
+       else
+               bfin_jc_circ_write(buf, count);
+}
+
+static struct tty_driver *
+bfin_jc_console_device(struct console *co, int *index)
+{
+       *index = co->index;
+       return bfin_jc_driver;
+}
+
+static struct console bfin_jc_console = {
+       .name    = DEV_NAME,
+       .write   = bfin_jc_console_write,
+       .device  = bfin_jc_console_device,
+       .flags   = CON_ANYTIME | CON_PRINTBUFFER,
+       .index   = -1,
+};
+
+static int __init bfin_jc_console_init(void)
+{
+       register_console(&bfin_jc_console);
+       return 0;
+}
+console_initcall(bfin_jc_console_init);
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK
+static void __init
+bfin_jc_early_write(struct console *co, const char *buf, unsigned int count)
+{
+       bfin_jc_straight_buffer_write(buf, count);
+}
+
+static struct __initdata console bfin_jc_early_console = {
+       .name   = "early_BFJC",
+       .write   = bfin_jc_early_write,
+       .flags   = CON_ANYTIME | CON_PRINTBUFFER,
+       .index   = -1,
+};
+
+struct console * __init
+bfin_jc_early_init(unsigned int port, unsigned int cflag)
+{
+       return &bfin_jc_early_console;
+}
+#endif
+
+MODULE_AUTHOR("Mike Frysinger <vapier@gentoo.org>");
+MODULE_DESCRIPTION("TTY over Blackfin JTAG Communication");
+MODULE_LICENSE("GPL");
index 1fdb9f6..f3366d3 100644 (file)
 
 #define NR_PORTS       256
 
-#define ZE_V1_NPORTS   64
 #define ZO_V1  0
 #define ZO_V2  1
 #define ZE_V1  2
 static void cy_throttle(struct tty_struct *tty);
 static void cy_send_xchar(struct tty_struct *tty, char ch);
 
-#define IS_CYC_Z(card) ((card).num_chips == (unsigned int)-1)
-
-#define Z_FPGA_CHECK(card) \
-       ((readl(&((struct RUNTIME_9060 __iomem *) \
-               ((card).ctl_addr))->init_ctrl) & (1<<17)) != 0)
-
-#define ISZLOADED(card)        (((ZO_V1 == readl(&((struct RUNTIME_9060 __iomem *) \
-                       ((card).ctl_addr))->mail_box_0)) || \
-                       Z_FPGA_CHECK(card)) && \
-                       (ZFIRM_ID == readl(&((struct FIRM_ID __iomem *) \
-                       ((card).base_addr+ID_ADDRESS))->signature)))
-
 #ifndef SERIAL_XMIT_SIZE
 #define        SERIAL_XMIT_SIZE        (min(PAGE_SIZE, 4096))
 #endif
@@ -687,8 +674,6 @@ static void cy_send_xchar(struct tty_struct *tty, char ch);
 #define DRIVER_VERSION 0x02010203
 #define RAM_SIZE 0x80000
 
-#define Z_FPGA_LOADED(X)       ((readl(&(X)->init_ctrl) & (1<<17)) != 0)
-
 enum zblock_type {
        ZBLOCK_PRG = 0,
        ZBLOCK_FPGA = 1
@@ -883,6 +868,29 @@ static void cyz_rx_restart(unsigned long);
 static struct timer_list cyz_rx_full_timer[NR_PORTS];
 #endif                         /* CONFIG_CYZ_INTR */
 
+static inline bool cy_is_Z(struct cyclades_card *card)
+{
+       return card->num_chips == (unsigned int)-1;
+}
+
+static inline bool __cyz_fpga_loaded(struct RUNTIME_9060 __iomem *ctl_addr)
+{
+       return readl(&ctl_addr->init_ctrl) & (1 << 17);
+}
+
+static inline bool cyz_fpga_loaded(struct cyclades_card *card)
+{
+       return __cyz_fpga_loaded(card->ctl_addr.p9060);
+}
+
+static inline bool cyz_is_loaded(struct cyclades_card *card)
+{
+       struct FIRM_ID __iomem *fw_id = card->base_addr + ID_ADDRESS;
+
+       return (card->hw_ver == ZO_V1 || cyz_fpga_loaded(card)) &&
+                       readl(&fw_id->signature) == ZFIRM_ID;
+}
+
 static inline int serial_paranoia_check(struct cyclades_port *info,
                char *name, const char *routine)
 {
@@ -1395,19 +1403,15 @@ cyz_fetch_msg(struct cyclades_card *cinfo,
        unsigned long loc_doorbell;
 
        firm_id = cinfo->base_addr + ID_ADDRESS;
-       if (!ISZLOADED(*cinfo))
-               return -1;
        zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
        board_ctrl = &zfw_ctrl->board_ctrl;
 
-       loc_doorbell = readl(&((struct RUNTIME_9060 __iomem *)
-                                 (cinfo->ctl_addr))->loc_doorbell);
+       loc_doorbell = readl(&cinfo->ctl_addr.p9060->loc_doorbell);
        if (loc_doorbell) {
                *cmd = (char)(0xff & loc_doorbell);
                *channel = readl(&board_ctrl->fwcmd_channel);
                *param = (__u32) readl(&board_ctrl->fwcmd_param);
-               cy_writel(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-                         loc_doorbell, 0xffffffff);
+               cy_writel(&cinfo->ctl_addr.p9060->loc_doorbell, 0xffffffff);
                return 1;
        }
        return 0;
@@ -1424,15 +1428,14 @@ cyz_issue_cmd(struct cyclades_card *cinfo,
        unsigned int index;
 
        firm_id = cinfo->base_addr + ID_ADDRESS;
-       if (!ISZLOADED(*cinfo))
+       if (!cyz_is_loaded(cinfo))
                return -1;
 
        zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
        board_ctrl = &zfw_ctrl->board_ctrl;
 
        index = 0;
-       pci_doorbell =
-           &((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->pci_doorbell;
+       pci_doorbell = &cinfo->ctl_addr.p9060->pci_doorbell;
        while ((readl(pci_doorbell) & 0xff) != 0) {
                if (index++ == 1000)
                        return (int)(readl(pci_doorbell) & 0xff);
@@ -1624,10 +1627,8 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
        static struct BOARD_CTRL __iomem *board_ctrl;
        static struct CH_CTRL __iomem *ch_ctrl;
        static struct BUF_CTRL __iomem *buf_ctrl;
-       __u32 channel;
+       __u32 channel, param, fw_ver;
        __u8 cmd;
-       __u32 param;
-       __u32 hw_ver, fw_ver;
        int special_count;
        int delta_count;
 
@@ -1635,8 +1636,6 @@ static void cyz_handle_cmd(struct cyclades_card *cinfo)
        zfw_ctrl = cinfo->base_addr + (readl(&firm_id->zfwctrl_addr) & 0xfffff);
        board_ctrl = &zfw_ctrl->board_ctrl;
        fw_ver = readl(&board_ctrl->fw_version);
-       hw_ver = readl(&((struct RUNTIME_9060 __iomem *)(cinfo->ctl_addr))->
-                       mail_box_0);
 
        while (cyz_fetch_msg(cinfo, &channel, &cmd, &param) == 1) {
                special_count = 0;
@@ -1737,15 +1736,7 @@ static irqreturn_t cyz_interrupt(int irq, void *dev_id)
 {
        struct cyclades_card *cinfo = dev_id;
 
-       if (unlikely(cinfo == NULL)) {
-#ifdef CY_DEBUG_INTERRUPTS
-               printk(KERN_DEBUG "cyz_interrupt: spurious interrupt %d\n",
-                                                                       irq);
-#endif
-               return IRQ_NONE;        /* spurious interrupt */
-       }
-
-       if (unlikely(!ISZLOADED(*cinfo))) {
+       if (unlikely(!cyz_is_loaded(cinfo))) {
 #ifdef CY_DEBUG_INTERRUPTS
                printk(KERN_DEBUG "cyz_interrupt: board not yet loaded "
                                "(IRQ%d).\n", irq);
@@ -1785,7 +1776,6 @@ static void cyz_poll(unsigned long arg)
        struct tty_struct *tty;
        struct FIRM_ID __iomem *firm_id;
        struct ZFW_CTRL __iomem *zfw_ctrl;
-       struct BOARD_CTRL __iomem *board_ctrl;
        struct BUF_CTRL __iomem *buf_ctrl;
        unsigned long expires = jiffies + HZ;
        unsigned int port, card;
@@ -1793,19 +1783,17 @@ static void cyz_poll(unsigned long arg)
        for (card = 0; card < NR_CARDS; card++) {
                cinfo = &cy_card[card];
 
-               if (!IS_CYC_Z(*cinfo))
+               if (!cy_is_Z(cinfo))
                        continue;
-               if (!ISZLOADED(*cinfo))
+               if (!cyz_is_loaded(cinfo))
                        continue;
 
                firm_id = cinfo->base_addr + ID_ADDRESS;
                zfw_ctrl = cinfo->base_addr +
                                (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-               board_ctrl = &(zfw_ctrl->board_ctrl);
 
        /* Skip first polling cycle to avoid racing conditions with the FW */
                if (!cinfo->intr_enabled) {
-                       cinfo->nports = (int)readl(&board_ctrl->n_channel);
                        cinfo->intr_enabled = 1;
                        continue;
                }
@@ -1874,7 +1862,7 @@ static int startup(struct cyclades_port *info)
 
        set_line_char(info);
 
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -1931,7 +1919,7 @@ static int startup(struct cyclades_port *info)
                base_addr = card->base_addr;
 
                firm_id = base_addr + ID_ADDRESS;
-               if (!ISZLOADED(*card))
+               if (!cyz_is_loaded(card))
                        return -ENODEV;
 
                zfw_ctrl = card->base_addr +
@@ -2026,7 +2014,7 @@ static void start_xmit(struct cyclades_port *info)
 
        card = info->card;
        channel = info->line - card->first_line;
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -2070,7 +2058,7 @@ static void shutdown(struct cyclades_port *info)
 
        card = info->card;
        channel = info->line - card->first_line;
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -2126,7 +2114,7 @@ static void shutdown(struct cyclades_port *info)
 #endif
 
                firm_id = base_addr + ID_ADDRESS;
-               if (!ISZLOADED(*card))
+               if (!cyz_is_loaded(card))
                        return;
 
                zfw_ctrl = card->base_addr +
@@ -2233,7 +2221,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp,
 #endif
        info->port.blocked_open++;
 
-       if (!IS_CYC_Z(*cinfo)) {
+       if (!cy_is_Z(cinfo)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = cinfo->bus_index;
@@ -2296,7 +2284,7 @@ block_til_ready(struct tty_struct *tty, struct file *filp,
 
                base_addr = cinfo->base_addr;
                firm_id = base_addr + ID_ADDRESS;
-               if (!ISZLOADED(*cinfo)) {
+               if (!cyz_is_loaded(cinfo)) {
                        __set_current_state(TASK_RUNNING);
                        remove_wait_queue(&info->port.open_wait, &wait);
                        return -EINVAL;
@@ -2397,16 +2385,14 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
           treat it as absent from the system.  This
           will make the user pay attention.
         */
-       if (IS_CYC_Z(*info->card)) {
+       if (cy_is_Z(info->card)) {
                struct cyclades_card *cinfo = info->card;
                struct FIRM_ID __iomem *firm_id = cinfo->base_addr + ID_ADDRESS;
 
-               if (!ISZLOADED(*cinfo)) {
-                       if (((ZE_V1 == readl(&((struct RUNTIME_9060 __iomem *)
-                                        (cinfo->ctl_addr))->mail_box_0)) &&
-                                       Z_FPGA_CHECK(*cinfo)) &&
-                                       (ZFIRM_HLT == readl(
-                                               &firm_id->signature))) {
+               if (!cyz_is_loaded(cinfo)) {
+                       if (cinfo->hw_ver == ZE_V1 && cyz_fpga_loaded(cinfo) &&
+                                       readl(&firm_id->signature) ==
+                                       ZFIRM_HLT) {
                                printk(KERN_ERR "cyc:Cyclades-Z Error: you "
                                        "need an external power supply for "
                                        "this number of ports.\nFirmware "
@@ -2423,18 +2409,13 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
                   interrupts should be enabled as soon as the first open
                   happens to one of its ports. */
                        if (!cinfo->intr_enabled) {
-                               struct ZFW_CTRL __iomem *zfw_ctrl;
-                               struct BOARD_CTRL __iomem *board_ctrl;
-
-                               zfw_ctrl = cinfo->base_addr +
-                                       (readl(&firm_id->zfwctrl_addr) &
-                                        0xfffff);
-
-                               board_ctrl = &zfw_ctrl->board_ctrl;
+                               u16 intr;
 
                                /* Enable interrupts on the PLX chip */
-                               cy_writew(cinfo->ctl_addr + 0x68,
-                                       readw(cinfo->ctl_addr + 0x68) | 0x0900);
+                               intr = readw(&cinfo->ctl_addr.p9060->
+                                               intr_ctrl_stat) | 0x0900;
+                               cy_writew(&cinfo->ctl_addr.p9060->
+                                               intr_ctrl_stat, intr);
                                /* Enable interrupts on the FW */
                                retval = cyz_issue_cmd(cinfo, 0,
                                                C_CM_IRQ_ENBL, 0L);
@@ -2442,8 +2423,6 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
                                        printk(KERN_ERR "cyc:IRQ enable retval "
                                                "was %x\n", retval);
                                }
-                               cinfo->nports =
-                                       (int)readl(&board_ctrl->n_channel);
                                cinfo->intr_enabled = 1;
                        }
                }
@@ -2556,7 +2535,7 @@ static void cy_wait_until_sent(struct tty_struct *tty, int timeout)
 #endif
        card = info->card;
        channel = (info->line) - (card->first_line);
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -2601,7 +2580,7 @@ static void cy_flush_buffer(struct tty_struct *tty)
        info->xmit_cnt = info->xmit_head = info->xmit_tail = 0;
        spin_unlock_irqrestore(&card->card_lock, flags);
 
-       if (IS_CYC_Z(*card)) {  /* If it is a Z card, flush the on-board
+       if (cy_is_Z(card)) {    /* If it is a Z card, flush the on-board
                                           buffers as well */
                spin_lock_irqsave(&card->card_lock, flags);
                retval = cyz_issue_cmd(card, channel, C_CM_FLUSH_TX, 0L);
@@ -2682,7 +2661,7 @@ static void cy_close(struct tty_struct *tty, struct file *filp)
 
        spin_lock_irqsave(&card->card_lock, flags);
 
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                int channel = info->line - card->first_line;
                int index = card->bus_index;
                void __iomem *base_addr = card->base_addr +
@@ -2902,7 +2881,7 @@ static int cy_chars_in_buffer(struct tty_struct *tty)
        channel = (info->line) - (card->first_line);
 
 #ifdef Z_EXT_CHARS_IN_BUFFER
-       if (!IS_CYC_Z(cy_card[card])) {
+       if (!cy_is_Z(card)) {
 #endif                         /* Z_EXT_CHARS_IN_BUFFER */
 #ifdef CY_DEBUG_IO
                printk(KERN_DEBUG "cyc:cy_chars_in_buffer ttyC%d %d\n",
@@ -2984,7 +2963,6 @@ static void set_line_char(struct cyclades_port *info)
        void __iomem *base_addr;
        int chip, channel, index;
        unsigned cflag, iflag;
-       unsigned short chip_number;
        int baud, baud_rate = 0;
        int i;
 
@@ -3013,9 +2991,8 @@ static void set_line_char(struct cyclades_port *info)
 
        card = info->card;
        channel = info->line - card->first_line;
-       chip_number = channel / 4;
 
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
 
                index = card->bus_index;
 
@@ -3233,21 +3210,17 @@ static void set_line_char(struct cyclades_port *info)
        } else {
                struct FIRM_ID __iomem *firm_id;
                struct ZFW_CTRL __iomem *zfw_ctrl;
-               struct BOARD_CTRL __iomem *board_ctrl;
                struct CH_CTRL __iomem *ch_ctrl;
-               struct BUF_CTRL __iomem *buf_ctrl;
                __u32 sw_flow;
                int retval;
 
                firm_id = card->base_addr + ID_ADDRESS;
-               if (!ISZLOADED(*card))
+               if (!cyz_is_loaded(card))
                        return;
 
                zfw_ctrl = card->base_addr +
                        (readl(&firm_id->zfwctrl_addr) & 0xfffff);
-               board_ctrl = &zfw_ctrl->board_ctrl;
                ch_ctrl = &(zfw_ctrl->ch_ctrl[channel]);
-               buf_ctrl = &zfw_ctrl->buf_ctrl[channel];
 
                /* baud rate */
                baud = tty_get_baud_rate(info->port.tty);
@@ -3457,7 +3430,7 @@ static int get_lsr_info(struct cyclades_port *info, unsigned int __user *value)
 
        card = info->card;
        channel = (info->line) - (card->first_line);
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -3497,7 +3470,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
 
        card = info->card;
        channel = info->line - card->first_line;
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -3523,7 +3496,7 @@ static int cy_tiocmget(struct tty_struct *tty, struct file *file)
        } else {
                base_addr = card->base_addr;
                firm_id = card->base_addr + ID_ADDRESS;
-               if (ISZLOADED(*card)) {
+               if (cyz_is_loaded(card)) {
                        zfw_ctrl = card->base_addr +
                                (readl(&firm_id->zfwctrl_addr) & 0xfffff);
                        board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3566,7 +3539,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
 
        card = info->card;
        channel = (info->line) - (card->first_line);
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -3641,7 +3614,7 @@ cy_tiocmset(struct tty_struct *tty, struct file *file,
                base_addr = card->base_addr;
 
                firm_id = card->base_addr + ID_ADDRESS;
-               if (ISZLOADED(*card)) {
+               if (cyz_is_loaded(card)) {
                        zfw_ctrl = card->base_addr +
                                (readl(&firm_id->zfwctrl_addr) & 0xfffff);
                        board_ctrl = &zfw_ctrl->board_ctrl;
@@ -3713,7 +3686,7 @@ static int cy_break(struct tty_struct *tty, int break_state)
        card = info->card;
 
        spin_lock_irqsave(&card->card_lock, flags);
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                /* Let the transmit ISR take care of this (since it
                   requires stuffing characters into the output stream).
                 */
@@ -3782,7 +3755,7 @@ static int set_threshold(struct cyclades_port *info, unsigned long value)
 
        card = info->card;
        channel = info->line - card->first_line;
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -3810,7 +3783,7 @@ static int get_threshold(struct cyclades_port *info,
 
        card = info->card;
        channel = info->line - card->first_line;
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -3844,7 +3817,7 @@ static int set_timeout(struct cyclades_port *info, unsigned long value)
 
        card = info->card;
        channel = info->line - card->first_line;
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -3867,7 +3840,7 @@ static int get_timeout(struct cyclades_port *info,
 
        card = info->card;
        channel = info->line - card->first_line;
-       if (!IS_CYC_Z(*card)) {
+       if (!cy_is_Z(card)) {
                chip = channel >> 2;
                channel &= 0x03;
                index = card->bus_index;
@@ -4121,7 +4094,7 @@ static void cy_send_xchar(struct tty_struct *tty, char ch)
        card = info->card;
        channel = info->line - card->first_line;
 
-       if (IS_CYC_Z(*card)) {
+       if (cy_is_Z(card)) {
                if (ch == STOP_CHAR(tty))
                        cyz_issue_cmd(card, channel, C_CM_SENDXOFF, 0L);
                else if (ch == START_CHAR(tty))
@@ -4154,7 +4127,7 @@ static void cy_throttle(struct tty_struct *tty)
        card = info->card;
 
        if (I_IXOFF(tty)) {
-               if (!IS_CYC_Z(*card))
+               if (!cy_is_Z(card))
                        cy_send_xchar(tty, STOP_CHAR(tty));
                else
                        info->throttle = 1;
@@ -4162,7 +4135,7 @@ static void cy_throttle(struct tty_struct *tty)
 
        if (tty->termios->c_cflag & CRTSCTS) {
                channel = info->line - card->first_line;
-               if (!IS_CYC_Z(*card)) {
+               if (!cy_is_Z(card)) {
                        chip = channel >> 2;
                        channel &= 0x03;
                        index = card->bus_index;
@@ -4219,7 +4192,7 @@ static void cy_unthrottle(struct tty_struct *tty)
        if (tty->termios->c_cflag & CRTSCTS) {
                card = info->card;
                channel = info->line - card->first_line;
-               if (!IS_CYC_Z(*card)) {
+               if (!cy_is_Z(card)) {
                        chip = channel >> 2;
                        channel &= 0x03;
                        index = card->bus_index;
@@ -4263,7 +4236,7 @@ static void cy_stop(struct tty_struct *tty)
 
        cinfo = info->card;
        channel = info->line - cinfo->first_line;
-       if (!IS_CYC_Z(*cinfo)) {
+       if (!cy_is_Z(cinfo)) {
                index = cinfo->bus_index;
                chip = channel >> 2;
                channel &= 0x03;
@@ -4296,7 +4269,7 @@ static void cy_start(struct tty_struct *tty)
        cinfo = info->card;
        channel = info->line - cinfo->first_line;
        index = cinfo->bus_index;
-       if (!IS_CYC_Z(*cinfo)) {
+       if (!cy_is_Z(cinfo)) {
                chip = channel >> 2;
                channel &= 0x03;
                base_addr = cinfo->base_addr + (cy_chip_offset[chip] << index);
@@ -4347,33 +4320,20 @@ static void cy_hangup(struct tty_struct *tty)
 static int __devinit cy_init_card(struct cyclades_card *cinfo)
 {
        struct cyclades_port *info;
-       u32 uninitialized_var(mailbox);
-       unsigned int nports, port;
+       unsigned int port;
        unsigned short chip_number;
-       int uninitialized_var(index);
 
        spin_lock_init(&cinfo->card_lock);
+       cinfo->intr_enabled = 0;
 
-       if (IS_CYC_Z(*cinfo)) { /* Cyclades-Z */
-               mailbox = readl(&((struct RUNTIME_9060 __iomem *)
-                                    cinfo->ctl_addr)->mail_box_0);
-               nports = (mailbox == ZE_V1) ? ZE_V1_NPORTS : 8;
-               cinfo->intr_enabled = 0;
-               cinfo->nports = 0;      /* Will be correctly set later, after
-                                          Z FW is loaded */
-       } else {
-               index = cinfo->bus_index;
-               nports = cinfo->nports = CyPORTS_PER_CHIP * cinfo->num_chips;
-       }
-
-       cinfo->ports = kzalloc(sizeof(*cinfo->ports) * nports, GFP_KERNEL);
+       cinfo->ports = kcalloc(cinfo->nports, sizeof(*cinfo->ports),
+                       GFP_KERNEL);
        if (cinfo->ports == NULL) {
                printk(KERN_ERR "Cyclades: cannot allocate ports\n");
-               cinfo->nports = 0;
                return -ENOMEM;
        }
 
-       for (port = cinfo->first_line; port < cinfo->first_line + nports;
+       for (port = cinfo->first_line; port < cinfo->first_line + cinfo->nports;
                        port++) {
                info = &cinfo->ports[port - cinfo->first_line];
                tty_port_init(&info->port);
@@ -4387,9 +4347,9 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
                init_completion(&info->shutdown_wait);
                init_waitqueue_head(&info->delta_msr_wait);
 
-               if (IS_CYC_Z(*cinfo)) {
+               if (cy_is_Z(cinfo)) {
                        info->type = PORT_STARTECH;
-                       if (mailbox == ZO_V1)
+                       if (cinfo->hw_ver == ZO_V1)
                                info->xmit_fifo_size = CYZ_FIFO_SIZE;
                        else
                                info->xmit_fifo_size = 4 * CYZ_FIFO_SIZE;
@@ -4398,6 +4358,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
                                cyz_rx_restart, (unsigned long)info);
 #endif
                } else {
+                       int index = cinfo->bus_index;
                        info->type = PORT_CIRRUS;
                        info->xmit_fifo_size = CyMAX_CHAR_FIFO;
                        info->cor1 = CyPARITY_NONE | Cy_1_STOP | Cy_8_BITS;
@@ -4430,7 +4391,7 @@ static int __devinit cy_init_card(struct cyclades_card *cinfo)
        }
 
 #ifndef CONFIG_CYZ_INTR
-       if (IS_CYC_Z(*cinfo) && !timer_pending(&cyz_timerlist)) {
+       if (cy_is_Z(cinfo) && !timer_pending(&cyz_timerlist)) {
                mod_timer(&cyz_timerlist, jiffies + 1);
 #ifdef CY_PCI_DEBUG
                printk(KERN_DEBUG "Cyclades-Z polling initialized\n");
@@ -4621,11 +4582,12 @@ static int __init cy_detect_isa(void)
 
                /* set cy_card */
                cy_card[j].base_addr = cy_isa_address;
-               cy_card[j].ctl_addr = NULL;
+               cy_card[j].ctl_addr.p9050 = NULL;
                cy_card[j].irq = (int)cy_isa_irq;
                cy_card[j].bus_index = 0;
                cy_card[j].first_line = cy_next_channel;
-               cy_card[j].num_chips = cy_isa_nchan / 4;
+               cy_card[j].num_chips = cy_isa_nchan / CyPORTS_PER_CHIP;
+               cy_card[j].nports = cy_isa_nchan;
                if (cy_init_card(&cy_card[j])) {
                        cy_card[j].base_addr = NULL;
                        free_irq(cy_isa_irq, &cy_card[j]);
@@ -4781,7 +4743,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
        struct CUSTOM_REG __iomem *cust = base_addr;
        struct ZFW_CTRL __iomem *pt_zfwctrl;
        void __iomem *tmp;
-       u32 mailbox, status;
+       u32 mailbox, status, nchan;
        unsigned int i;
        int retval;
 
@@ -4793,7 +4755,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 
        /* Check whether the firmware is already loaded and running. If
           positive, skip this board */
-       if (Z_FPGA_LOADED(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) {
+       if (__cyz_fpga_loaded(ctl_addr) && readl(&fid->signature) == ZFIRM_ID) {
                u32 cntval = readl(base_addr + 0x190);
 
                udelay(100);
@@ -4812,7 +4774,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
 
        mailbox = readl(&ctl_addr->mail_box_0);
 
-       if (mailbox == 0 || Z_FPGA_LOADED(ctl_addr)) {
+       if (mailbox == 0 || __cyz_fpga_loaded(ctl_addr)) {
                /* stops CPU and set window to beginning of RAM */
                cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
                cy_writel(&cust->cpu_stop, 0);
@@ -4828,7 +4790,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
                                base_addr);
                if (retval)
                        goto err_rel;
-               if (!Z_FPGA_LOADED(ctl_addr)) {
+               if (!__cyz_fpga_loaded(ctl_addr)) {
                        dev_err(&pdev->dev, "fw upload successful, but fw is "
                                        "not loaded\n");
                        goto err_rel;
@@ -4887,7 +4849,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
                                "system before loading the new FW to the "
                                "Cyclades-Z.\n");
 
-                       if (Z_FPGA_LOADED(ctl_addr))
+                       if (__cyz_fpga_loaded(ctl_addr))
                                plx_init(pdev, irq, ctl_addr);
 
                        retval = -EIO;
@@ -4902,16 +4864,16 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
                        base_addr + ID_ADDRESS, readl(&fid->zfwctrl_addr),
                        base_addr + readl(&fid->zfwctrl_addr));
 
+       nchan = readl(&pt_zfwctrl->board_ctrl.n_channel);
        dev_info(&pdev->dev, "Cyclades-Z FW loaded: version = %x, ports = %u\n",
-               readl(&pt_zfwctrl->board_ctrl.fw_version),
-               readl(&pt_zfwctrl->board_ctrl.n_channel));
+               readl(&pt_zfwctrl->board_ctrl.fw_version), nchan);
 
-       if (readl(&pt_zfwctrl->board_ctrl.n_channel) == 0) {
+       if (nchan == 0) {
                dev_warn(&pdev->dev, "no Cyclades-Z ports were found. Please "
                        "check the connection between the Z host card and the "
                        "serial expanders.\n");
 
-               if (Z_FPGA_LOADED(ctl_addr))
+               if (__cyz_fpga_loaded(ctl_addr))
                        plx_init(pdev, irq, ctl_addr);
 
                dev_info(&pdev->dev, "Null number of ports detected. Board "
@@ -4932,9 +4894,7 @@ static int __devinit cyz_load_fw(struct pci_dev *pdev, void __iomem *base_addr,
        cy_writel(&ctl_addr->intr_ctrl_stat, readl(&ctl_addr->intr_ctrl_stat) |
                        0x00030800UL);
 
-       plx_init(pdev, irq, ctl_addr);
-
-       return 0;
+       return nchan;
 err_rel:
        release_firmware(fw);
 err:
@@ -4946,7 +4906,7 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 {
        void __iomem *addr0 = NULL, *addr2 = NULL;
        char *card_name = NULL;
-       u32 mailbox;
+       u32 uninitialized_var(mailbox);
        unsigned int device_id, nchan = 0, card_no, i;
        unsigned char plx_ver;
        int retval, irq;
@@ -5023,11 +4983,12 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
                }
 
                /* Disable interrupts on the PLX before resetting it */
-               cy_writew(addr0 + 0x68, readw(addr0 + 0x68) & ~0x0900);
+               cy_writew(&ctl_addr->intr_ctrl_stat,
+                               readw(&ctl_addr->intr_ctrl_stat) & ~0x0900);
 
                plx_init(pdev, irq, addr0);
 
-               mailbox = (u32)readl(&ctl_addr->mail_box_0);
+               mailbox = readl(&ctl_addr->mail_box_0);
 
                addr2 = ioremap_nocache(pci_resource_start(pdev, 2),
                                mailbox == ZE_V1 ? CyPCI_Ze_win : CyPCI_Zwin);
@@ -5038,12 +4999,8 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
 
                if (mailbox == ZE_V1) {
                        card_name = "Cyclades-Ze";
-
-                       readl(&ctl_addr->mail_box_0);
-                       nchan = ZE_V1_NPORTS;
                } else {
                        card_name = "Cyclades-8Zo";
-
 #ifdef CY_PCI_DEBUG
                        if (mailbox == ZO_V1) {
                                cy_writel(&ctl_addr->loc_addr_base, WIN_CREG);
@@ -5065,15 +5022,12 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
                         */
                        if ((mailbox == ZO_V1) || (mailbox == ZO_V2))
                                cy_writel(addr2 + ID_ADDRESS, 0L);
-
-                       retval = cyz_load_fw(pdev, addr2, addr0, irq);
-                       if (retval)
-                               goto err_unmap;
-                       /* This must be a Cyclades-8Zo/PCI.  The extendable
-                          version will have a different device_id and will
-                          be allocated its maximum number of ports. */
-                       nchan = 8;
                }
+
+               retval = cyz_load_fw(pdev, addr2, addr0, irq);
+               if (retval <= 0)
+                       goto err_unmap;
+               nchan = retval;
        }
 
        if ((cy_next_channel + nchan) > NR_PORTS) {
@@ -5103,8 +5057,10 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
                        dev_err(&pdev->dev, "could not allocate IRQ\n");
                        goto err_unmap;
                }
-               cy_card[card_no].num_chips = nchan / 4;
+               cy_card[card_no].num_chips = nchan / CyPORTS_PER_CHIP;
        } else {
+               cy_card[card_no].hw_ver = mailbox;
+               cy_card[card_no].num_chips = (unsigned int)-1;
 #ifdef CONFIG_CYZ_INTR
                /* allocate IRQ only if board has an IRQ */
                if (irq != 0 && irq != 255) {
@@ -5117,15 +5073,15 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
                        }
                }
 #endif                         /* CONFIG_CYZ_INTR */
-               cy_card[card_no].num_chips = (unsigned int)-1;
        }
 
        /* set cy_card */
        cy_card[card_no].base_addr = addr2;
-       cy_card[card_no].ctl_addr = addr0;
+       cy_card[card_no].ctl_addr.p9050 = addr0;
        cy_card[card_no].irq = irq;
        cy_card[card_no].bus_index = 1;
        cy_card[card_no].first_line = cy_next_channel;
+       cy_card[card_no].nports = nchan;
        retval = cy_init_card(&cy_card[card_no]);
        if (retval)
                goto err_null;
@@ -5138,17 +5094,20 @@ static int __devinit cy_pci_probe(struct pci_dev *pdev,
                plx_ver = readb(addr2 + CyPLX_VER) & 0x0f;
                switch (plx_ver) {
                case PLX_9050:
-
                        cy_writeb(addr0 + 0x4c, 0x43);
                        break;
 
                case PLX_9060:
                case PLX_9080:
                default:        /* Old boards, use PLX_9060 */
-                       plx_init(pdev, irq, addr0);
-                       cy_writew(addr0 + 0x68, readw(addr0 + 0x68) | 0x0900);
+               {
+                       struct RUNTIME_9060 __iomem *ctl_addr = addr0;
+                       plx_init(pdev, irq, ctl_addr);
+                       cy_writew(&ctl_addr->intr_ctrl_stat,
+                               readw(&ctl_addr->intr_ctrl_stat) | 0x0900);
                        break;
                }
+               }
        }
 
        dev_info(&pdev->dev, "%s/PCI #%d found: %d channels starting from "
@@ -5179,22 +5138,23 @@ static void __devexit cy_pci_remove(struct pci_dev *pdev)
        unsigned int i;
 
        /* non-Z with old PLX */
-       if (!IS_CYC_Z(*cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
+       if (!cy_is_Z(cinfo) && (readb(cinfo->base_addr + CyPLX_VER) & 0x0f) ==
                        PLX_9050)
-               cy_writeb(cinfo->ctl_addr + 0x4c, 0);
+               cy_writeb(cinfo->ctl_addr.p9050 + 0x4c, 0);
        else
 #ifndef CONFIG_CYZ_INTR
-               if (!IS_CYC_Z(*cinfo))
+               if (!cy_is_Z(cinfo))
 #endif
-               cy_writew(cinfo->ctl_addr + 0x68,
-                               readw(cinfo->ctl_addr + 0x68) & ~0x0900);
+               cy_writew(&cinfo->ctl_addr.p9060->intr_ctrl_stat,
+                       readw(&cinfo->ctl_addr.p9060->intr_ctrl_stat) &
+                       ~0x0900);
 
        iounmap(cinfo->base_addr);
-       if (cinfo->ctl_addr)
-               iounmap(cinfo->ctl_addr);
+       if (cinfo->ctl_addr.p9050)
+               iounmap(cinfo->ctl_addr.p9050);
        if (cinfo->irq
 #ifndef CONFIG_CYZ_INTR
-               && !IS_CYC_Z(*cinfo)
+               && !cy_is_Z(cinfo)
 #endif /* CONFIG_CYZ_INTR */
                )
                free_irq(cinfo->irq, cinfo);
@@ -5240,7 +5200,7 @@ static int cyclades_proc_show(struct seq_file *m, void *v)
                                        (cur_jifs - info->idle_stats.recv_idle)/
                                        HZ, info->idle_stats.overruns,
                                        /* FIXME: double check locking */
-                                       (long)info->port.tty->ldisc.ops->num);
+                                       (long)info->port.tty->ldisc->ops->num);
                        else
                                seq_printf(m, "%3d %8lu %10lu %8lu "
                                        "%10lu %8lu %9lu %6ld\n",
@@ -5386,11 +5346,11 @@ static void __exit cy_cleanup_module(void)
                        /* clear interrupt */
                        cy_writeb(card->base_addr + Cy_ClrIntr, 0);
                        iounmap(card->base_addr);
-                       if (card->ctl_addr)
-                               iounmap(card->ctl_addr);
+                       if (card->ctl_addr.p9050)
+                               iounmap(card->ctl_addr.p9050);
                        if (card->irq
 #ifndef CONFIG_CYZ_INTR
-                               && !IS_CYC_Z(*card)
+                               && !cy_is_Z(card)
 #endif /* CONFIG_CYZ_INTR */
                                )
                                free_irq(card->irq, card);
index af7c13c..abef1f7 100644 (file)
@@ -745,7 +745,7 @@ static int epca_carrier_raised(struct tty_port *port)
        return 0;
 }
 
-static void epca_raise_dtr_rts(struct tty_port *port)
+static void epca_dtr_rts(struct tty_port *port, int onoff)
 {
 }
 
@@ -925,7 +925,7 @@ static const struct tty_operations pc_ops = {
 
 static const struct tty_port_operations epca_port_ops = {
        .carrier_raised = epca_carrier_raised,
-       .raise_dtr_rts = epca_raise_dtr_rts,
+       .dtr_rts = epca_dtr_rts,
 };
 
 static int info_open(struct tty_struct *tty, struct file *filp)
@@ -1518,7 +1518,7 @@ static void doevent(int crd)
                if (event & MODEMCHG_IND) {
                        /* A modem signal change has been indicated */
                        ch->imodem = mstat;
-                       if (test_bit(ASYNC_CHECK_CD, &ch->port.flags)) {
+                       if (test_bit(ASYNCB_CHECK_CD, &ch->port.flags)) {
                                /* We are now receiving dcd */
                                if (mstat & ch->dcd)
                                        wake_up_interruptible(&ch->port.open_wait);
@@ -1765,9 +1765,9 @@ static void epcaparam(struct tty_struct *tty, struct channel *ch)
                 * that the driver will wait on carrier detect.
                 */
                if (ts->c_cflag & CLOCAL)
-                       clear_bit(ASYNC_CHECK_CD, &ch->port.flags);
+                       clear_bit(ASYNCB_CHECK_CD, &ch->port.flags);
                else
-                       set_bit(ASYNC_CHECK_CD, &ch->port.flags);
+                       set_bit(ASYNCB_CHECK_CD, &ch->port.flags);
                mval = ch->m_dtr | ch->m_rts;
        } /* End CBAUD not detected */
        iflag = termios2digi_i(ch, ts->c_iflag);
@@ -2114,8 +2114,8 @@ static int pc_ioctl(struct tty_struct *tty, struct file *file,
                        tty_wait_until_sent(tty, 0);
                } else {
                        /* ldisc lock already held in ioctl */
-                       if (tty->ldisc.ops->flush_buffer)
-                               tty->ldisc.ops->flush_buffer(tty);
+                       if (tty->ldisc->ops->flush_buffer)
+                               tty->ldisc->ops->flush_buffer(tty);
                }
                unlock_kernel();
                /* Fall Thru */
@@ -2244,7 +2244,8 @@ static void do_softint(struct work_struct *work)
                        if (test_and_clear_bit(EPCA_EVENT_HANGUP, &ch->event)) {
                                tty_hangup(tty);
                                wake_up_interruptible(&ch->port.open_wait);
-                               clear_bit(ASYNC_NORMAL_ACTIVE, &ch->port.flags);
+                               clear_bit(ASYNCB_NORMAL_ACTIVE,
+                                               &ch->port.flags);
                        }
                }
                tty_kref_put(tty);
index 340ba4f..4a9f349 100644 (file)
@@ -224,7 +224,7 @@ static void hpet_timer_set_irq(struct hpet_dev *devp)
                        break;
                }
 
-               gsi = acpi_register_gsi(irq, ACPI_LEVEL_SENSITIVE,
+               gsi = acpi_register_gsi(NULL, irq, ACPI_LEVEL_SENSITIVE,
                                        ACPI_ACTIVE_LOW);
                if (gsi > 0)
                        break;
@@ -939,7 +939,7 @@ static acpi_status hpet_resources(struct acpi_resource *res, void *data)
                irqp = &res->data.extended_irq;
 
                for (i = 0; i < irqp->interrupt_count; i++) {
-                       irq = acpi_register_gsi(irqp->interrupts[i],
+                       irq = acpi_register_gsi(NULL, irqp->interrupts[i],
                                      irqp->triggering, irqp->polarity);
                        if (irq < 0)
                                return AE_ERROR;
index 0061e18..0d10b89 100644 (file)
@@ -868,11 +868,11 @@ i2Input(i2ChanStrPtr pCh)
                amountToMove = count;
        }
        // Move the first block
-       pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+       pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
                 &(pCh->Ibuf[stripIndex]), NULL, amountToMove );
        // If we needed to wrap, do the second data move
        if (count > amountToMove) {
-               pCh->pTTY->ldisc.ops->receive_buf( pCh->pTTY,
+               pCh->pTTY->ldisc->ops->receive_buf( pCh->pTTY,
                 pCh->Ibuf, NULL, count - amountToMove );
        }
        // Bump and wrap the stripIndex all at once by the amount of data read. This
index afd9247..517271c 100644 (file)
@@ -1315,8 +1315,8 @@ static inline void  isig(int sig, struct tty_struct *tty, int flush)
        if (tty->pgrp)
                kill_pgrp(tty->pgrp, sig, 1);
        if (flush || !L_NOFLSH(tty)) {
-               if ( tty->ldisc.ops->flush_buffer )  
-                       tty->ldisc.ops->flush_buffer(tty);
+               if ( tty->ldisc->ops->flush_buffer )  
+                       tty->ldisc->ops->flush_buffer(tty);
                i2InputFlush( tty->driver_data );
        }
 }
index a59eac5..4d745a8 100644 (file)
@@ -329,7 +329,7 @@ static inline void drop_rts(struct isi_port *port)
 
 /* card->lock MUST NOT be held */
 
-static void isicom_raise_dtr_rts(struct tty_port *port)
+static void isicom_dtr_rts(struct tty_port *port, int on)
 {
        struct isi_port *ip = container_of(port, struct isi_port, port);
        struct isi_board *card = ip->card;
@@ -339,10 +339,17 @@ static void isicom_raise_dtr_rts(struct tty_port *port)
        if (!lock_card(card))
                return;
 
-       outw(0x8000 | (channel << card->shift_count) | 0x02, base);
-       outw(0x0f04, base);
-       InterruptTheCard(base);
-       ip->status |= (ISI_DTR | ISI_RTS);
+       if (on) {
+               outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+               outw(0x0f04, base);
+               InterruptTheCard(base);
+               ip->status |= (ISI_DTR | ISI_RTS);
+       } else {
+               outw(0x8000 | (channel << card->shift_count) | 0x02, base);
+               outw(0x0C04, base);
+               InterruptTheCard(base);
+               ip->status &= ~(ISI_DTR | ISI_RTS);
+       }
        unlock_card(card);
 }
 
@@ -1339,7 +1346,7 @@ static const struct tty_operations isicom_ops = {
 
 static const struct tty_port_operations isicom_port_ops = {
        .carrier_raised         = isicom_carrier_raised,
-       .raise_dtr_rts          = isicom_raise_dtr_rts,
+       .dtr_rts                = isicom_dtr_rts,
 };
 
 static int __devinit reset_card(struct pci_dev *pdev,
index fff19f7..e18800c 100644 (file)
@@ -1140,14 +1140,14 @@ static int stli_carrier_raised(struct tty_port *port)
        return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stli_raise_dtr_rts(struct tty_port *port)
+static void stli_dtr_rts(struct tty_port *port, int on)
 {
        struct stliport *portp = container_of(port, struct stliport, port);
        struct stlibrd *brdp = stli_brds[portp->brdnr];
-       stli_mkasysigs(&portp->asig, 1, 1);
+       stli_mkasysigs(&portp->asig, on, on);
        if (stli_cmdwait(brdp, portp, A_SETSIGNALS, &portp->asig,
                sizeof(asysigs_t), 0) < 0)
-                       printk(KERN_WARNING "istallion: dtr raise failed.\n");
+                       printk(KERN_WARNING "istallion: dtr set failed.\n");
 }
 
 
@@ -4417,7 +4417,7 @@ static const struct tty_operations stli_ops = {
 
 static const struct tty_port_operations stli_port_ops = {
        .carrier_raised = stli_carrier_raised,
-       .raise_dtr_rts = stli_raise_dtr_rts,
+       .dtr_rts = stli_dtr_rts,
 };
 
 /*****************************************************************************/
index 8f05c38..f96d0be 100644 (file)
@@ -694,6 +694,8 @@ static ssize_t read_zero(struct file * file, char __user * buf,
                written += chunk - unwritten;
                if (unwritten)
                        break;
+               if (signal_pending(current))
+                       return written ? written : -ERESTARTSYS;
                buf += chunk;
                count -= chunk;
                cond_resched();
index 4a4cab7..65b6ff2 100644 (file)
@@ -1184,6 +1184,11 @@ static int moxa_open(struct tty_struct *tty, struct file *filp)
                return -ENODEV;
        }
 
+       if (port % MAX_PORTS_PER_BOARD >= brd->numPorts) {
+               mutex_unlock(&moxa_openlock);
+               return -ENODEV;
+       }
+
        ch = &brd->ports[port % MAX_PORTS_PER_BOARD];
        ch->port.count++;
        tty->driver_data = ch;
index a420e8d..9533f43 100644 (file)
@@ -547,14 +547,18 @@ static int mxser_carrier_raised(struct tty_port *port)
        return (inb(mp->ioaddr + UART_MSR) & UART_MSR_DCD)?1:0;
 }
 
-static void mxser_raise_dtr_rts(struct tty_port *port)
+static void mxser_dtr_rts(struct tty_port *port, int on)
 {
        struct mxser_port *mp = container_of(port, struct mxser_port, port);
        unsigned long flags;
 
        spin_lock_irqsave(&mp->slock, flags);
-       outb(inb(mp->ioaddr + UART_MCR) |
-               UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+       if (on)
+               outb(inb(mp->ioaddr + UART_MCR) |
+                       UART_MCR_DTR | UART_MCR_RTS, mp->ioaddr + UART_MCR);
+       else
+               outb(inb(mp->ioaddr + UART_MCR)&~(UART_MCR_DTR | UART_MCR_RTS),
+                       mp->ioaddr + UART_MCR);
        spin_unlock_irqrestore(&mp->slock, flags);
 }
 
@@ -2356,7 +2360,7 @@ static const struct tty_operations mxser_ops = {
 
 struct tty_port_operations mxser_port_ops = {
        .carrier_raised = mxser_carrier_raised,
-       .raise_dtr_rts = mxser_raise_dtr_rts,
+       .dtr_rts = mxser_dtr_rts,
 };
 
 /*
@@ -2711,7 +2715,7 @@ static int __init mxser_module_init(void)
                        continue;
 
                brd = &mxser_boards[m];
-               retval = mxser_get_ISA_conf(!ioaddr[b], brd);
+               retval = mxser_get_ISA_conf(ioaddr[b], brd);
                if (retval <= 0) {
                        brd->info = NULL;
                        continue;
index bacb3e2..461ece5 100644 (file)
@@ -342,8 +342,8 @@ static int n_hdlc_tty_open (struct tty_struct *tty)
 #endif
        
        /* Flush any pending characters in the driver and discipline. */
-       if (tty->ldisc.ops->flush_buffer)
-               tty->ldisc.ops->flush_buffer(tty);
+       if (tty->ldisc->ops->flush_buffer)
+               tty->ldisc->ops->flush_buffer(tty);
 
        tty_driver_flush_buffer(tty);
                
index f6f0e4e..94a5d50 100644 (file)
 #define ECHO_OP_SET_CANON_COL 0x81
 #define ECHO_OP_ERASE_TAB 0x82
 
-static inline unsigned char *alloc_buf(void)
-{
-       gfp_t prio = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL;
-
-       if (PAGE_SIZE != N_TTY_BUF_SIZE)
-               return kmalloc(N_TTY_BUF_SIZE, prio);
-       else
-               return (unsigned char *)__get_free_page(prio);
-}
-
-static inline void free_buf(unsigned char *buf)
-{
-       if (PAGE_SIZE != N_TTY_BUF_SIZE)
-               kfree(buf);
-       else
-               free_page((unsigned long) buf);
-}
-
 static inline int tty_put_user(struct tty_struct *tty, unsigned char x,
                               unsigned char __user *ptr)
 {
@@ -1558,11 +1540,11 @@ static void n_tty_close(struct tty_struct *tty)
 {
        n_tty_flush_buffer(tty);
        if (tty->read_buf) {
-               free_buf(tty->read_buf);
+               kfree(tty->read_buf);
                tty->read_buf = NULL;
        }
        if (tty->echo_buf) {
-               free_buf(tty->echo_buf);
+               kfree(tty->echo_buf);
                tty->echo_buf = NULL;
        }
 }
@@ -1584,17 +1566,16 @@ static int n_tty_open(struct tty_struct *tty)
 
        /* These are ugly. Currently a malloc failure here can panic */
        if (!tty->read_buf) {
-               tty->read_buf = alloc_buf();
+               tty->read_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
                if (!tty->read_buf)
                        return -ENOMEM;
        }
        if (!tty->echo_buf) {
-               tty->echo_buf = alloc_buf();
+               tty->echo_buf = kzalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
+
                if (!tty->echo_buf)
                        return -ENOMEM;
        }
-       memset(tty->read_buf, 0, N_TTY_BUF_SIZE);
-       memset(tty->echo_buf, 0, N_TTY_BUF_SIZE);
        reset_buffer_flags(tty);
        tty->column = 0;
        n_tty_set_termios(tty, NULL);
index 19d79fc..77b3648 100644 (file)
@@ -383,7 +383,7 @@ static void async_mode(MGSLPC_INFO *info);
 static void tx_timeout(unsigned long context);
 
 static int carrier_raised(struct tty_port *port);
-static void raise_dtr_rts(struct tty_port *port);
+static void dtr_rts(struct tty_port *port, int onoff);
 
 #if SYNCLINK_GENERIC_HDLC
 #define dev_to_port(D) (dev_to_hdlc(D)->priv)
@@ -513,7 +513,7 @@ static void ldisc_receive_buf(struct tty_struct *tty,
 
 static const struct tty_port_operations mgslpc_port_ops = {
        .carrier_raised = carrier_raised,
-       .raise_dtr_rts = raise_dtr_rts
+       .dtr_rts = dtr_rts
 };
 
 static int mgslpc_probe(struct pcmcia_device *link)
@@ -2528,13 +2528,16 @@ static int carrier_raised(struct tty_port *port)
        return 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int onoff)
 {
        MGSLPC_INFO *info = container_of(port, MGSLPC_INFO, port);
        unsigned long flags;
 
        spin_lock_irqsave(&info->lock,flags);
-       info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+       if (onoff)
+               info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+       else
+               info->serial_signals &= ~SerialSignal_RTS + SerialSignal_DTR;
        set_signals(info);
        spin_unlock_irqrestore(&info->lock,flags);
 }
index 31038a0..5acd29e 100644 (file)
@@ -30,7 +30,6 @@
 
 #include <asm/system.h>
 
-/* These are global because they are accessed in tty_io.c */
 #ifdef CONFIG_UNIX98_PTYS
 static struct tty_driver *ptm_driver;
 static struct tty_driver *pts_driver;
@@ -111,7 +110,7 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf,
        c = to->receive_room;
        if (c > count)
                c = count;
-       to->ldisc.ops->receive_buf(to, buf, NULL, c);
+       to->ldisc->ops->receive_buf(to, buf, NULL, c);
 
        return c;
 }
@@ -149,11 +148,11 @@ static int pty_chars_in_buffer(struct tty_struct *tty)
        int count;
 
        /* We should get the line discipline lock for "tty->link" */
-       if (!to || !to->ldisc.ops->chars_in_buffer)
+       if (!to || !to->ldisc->ops->chars_in_buffer)
                return 0;
 
        /* The ldisc must report 0 if no characters available to be read */
-       count = to->ldisc.ops->chars_in_buffer(to);
+       count = to->ldisc->ops->chars_in_buffer(to);
 
        if (tty->driver->subtype == PTY_TYPE_SLAVE)
                return count;
@@ -187,8 +186,8 @@ static void pty_flush_buffer(struct tty_struct *tty)
        if (!to)
                return;
 
-       if (to->ldisc.ops->flush_buffer)
-               to->ldisc.ops->flush_buffer(to);
+       if (to->ldisc->ops->flush_buffer)
+               to->ldisc->ops->flush_buffer(to);
 
        if (to->packet) {
                spin_lock_irqsave(&tty->ctrl_lock, flags);
index f59fc5c..63d5b62 100644 (file)
@@ -872,11 +872,16 @@ static int carrier_raised(struct tty_port *port)
        return (sGetChanStatusLo(&info->channel) & CD_ACT) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
        struct r_port *info = container_of(port, struct r_port, port);
-       sSetDTR(&info->channel);
-       sSetRTS(&info->channel);
+       if (on) {
+               sSetDTR(&info->channel);
+               sSetRTS(&info->channel);
+       } else {
+               sClrDTR(&info->channel);
+               sClrRTS(&info->channel);
+       }
 }
 
 /*
@@ -934,7 +939,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
        /*
         * Info->count is now 1; so it's safe to sleep now.
         */
-       if (!test_bit(ASYNC_INITIALIZED, &port->flags)) {
+       if (!test_bit(ASYNCB_INITIALIZED, &port->flags)) {
                cp = &info->channel;
                sSetRxTrigger(cp, TRIG_1);
                if (sGetChanStatus(cp) & CD_ACT)
@@ -958,7 +963,7 @@ static int rp_open(struct tty_struct *tty, struct file *filp)
                sEnRxFIFO(cp);
                sEnTransmit(cp);
 
-               set_bit(ASYNC_INITIALIZED, &info->port.flags);
+               set_bit(ASYNCB_INITIALIZED, &info->port.flags);
 
                /*
                 * Set up the tty->alt_speed kludge
@@ -1641,7 +1646,7 @@ static int rp_write(struct tty_struct *tty,
        /*  Write remaining data into the port's xmit_buf */
        while (1) {
                /* Hung up ? */
-               if (!test_bit(ASYNC_NORMAL_ACTIVE, &info->port.flags))
+               if (!test_bit(ASYNCB_NORMAL_ACTIVE, &info->port.flags))
                        goto end;
                c = min(count, XMIT_BUF_SIZE - info->xmit_cnt - 1);
                c = min(c, XMIT_BUF_SIZE - info->xmit_head);
@@ -2250,7 +2255,7 @@ static const struct tty_operations rocket_ops = {
 
 static const struct tty_port_operations rocket_port_ops = {
        .carrier_raised = carrier_raised,
-       .raise_dtr_rts = raise_dtr_rts,
+       .dtr_rts = dtr_rts,
 };
 
 /*
index cb8ca56..f97b9e8 100644 (file)
@@ -327,7 +327,7 @@ int paste_selection(struct tty_struct *tty)
                }
                count = sel_buffer_lth - pasted;
                count = min(count, tty->receive_room);
-               tty->ldisc.ops->receive_buf(tty, sel_buffer + pasted,
+               tty->ldisc->ops->receive_buf(tty, sel_buffer + pasted,
                                                                NULL, count);
                pasted += count;
        }
index 2ad813a..53e504f 100644 (file)
@@ -772,11 +772,11 @@ static int stl_carrier_raised(struct tty_port *port)
        return (portp->sigs & TIOCM_CD) ? 1 : 0;
 }
 
-static void stl_raise_dtr_rts(struct tty_port *port)
+static void stl_dtr_rts(struct tty_port *port, int on)
 {
        struct stlport *portp = container_of(port, struct stlport, port);
        /* Takes brd_lock internally */
-       stl_setsignals(portp, 1, 1);
+       stl_setsignals(portp, on, on);
 }
 
 /*****************************************************************************/
@@ -2547,7 +2547,7 @@ static const struct tty_operations stl_ops = {
 
 static const struct tty_port_operations stl_port_ops = {
        .carrier_raised = stl_carrier_raised,
-       .raise_dtr_rts = stl_raise_dtr_rts,
+       .dtr_rts = stl_dtr_rts,
 };
 
 /*****************************************************************************/
index afd0b26..afded3a 100644 (file)
@@ -3247,13 +3247,16 @@ static int carrier_raised(struct tty_port *port)
        return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
        struct mgsl_struct *info = container_of(port, struct mgsl_struct, port);
        unsigned long flags;
 
        spin_lock_irqsave(&info->irq_spinlock,flags);
-       info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+       if (on)
+               info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+       else
+               info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
        usc_set_serial_signals(info);
        spin_unlock_irqrestore(&info->irq_spinlock,flags);
 }
@@ -4258,7 +4261,7 @@ static void mgsl_add_device( struct mgsl_struct *info )
 
 static const struct tty_port_operations mgsl_port_ops = {
        .carrier_raised = carrier_raised,
-       .raise_dtr_rts = raise_dtr_rts,
+       .dtr_rts = dtr_rts,
 };
 
 
index 5e25649..1386625 100644 (file)
@@ -214,6 +214,7 @@ struct slgt_desc
 #define set_desc_next(a,b) (a).next   = cpu_to_le32((unsigned int)(b))
 #define set_desc_count(a,b)(a).count  = cpu_to_le16((unsigned short)(b))
 #define set_desc_eof(a,b)  (a).status = cpu_to_le16((b) ? (le16_to_cpu((a).status) | BIT0) : (le16_to_cpu((a).status) & ~BIT0))
+#define set_desc_status(a, b) (a).status = cpu_to_le16((unsigned short)(b))
 #define desc_count(a)      (le16_to_cpu((a).count))
 #define desc_status(a)     (le16_to_cpu((a).status))
 #define desc_complete(a)   (le16_to_cpu((a).status) & BIT15)
@@ -297,6 +298,7 @@ struct slgt_info {
        u32 max_frame_size;       /* as set by device config */
 
        unsigned int rbuf_fill_level;
+       unsigned int rx_pio;
        unsigned int if_mode;
        unsigned int base_clock;
 
@@ -331,6 +333,8 @@ struct slgt_info {
        struct slgt_desc *rbufs;
        unsigned int rbuf_current;
        unsigned int rbuf_index;
+       unsigned int rbuf_fill_index;
+       unsigned short rbuf_fill_count;
 
        unsigned int tbuf_count;
        struct slgt_desc *tbufs;
@@ -2110,6 +2114,40 @@ static void ri_change(struct slgt_info *info, unsigned short status)
        info->pending_bh |= BH_STATUS;
 }
 
+static void isr_rxdata(struct slgt_info *info)
+{
+       unsigned int count = info->rbuf_fill_count;
+       unsigned int i = info->rbuf_fill_index;
+       unsigned short reg;
+
+       while (rd_reg16(info, SSR) & IRQ_RXDATA) {
+               reg = rd_reg16(info, RDR);
+               DBGISR(("isr_rxdata %s RDR=%04X\n", info->device_name, reg));
+               if (desc_complete(info->rbufs[i])) {
+                       /* all buffers full */
+                       rx_stop(info);
+                       info->rx_restart = 1;
+                       continue;
+               }
+               info->rbufs[i].buf[count++] = (unsigned char)reg;
+               /* async mode saves status byte to buffer for each data byte */
+               if (info->params.mode == MGSL_MODE_ASYNC)
+                       info->rbufs[i].buf[count++] = (unsigned char)(reg >> 8);
+               if (count == info->rbuf_fill_level || (reg & BIT10)) {
+                       /* buffer full or end of frame */
+                       set_desc_count(info->rbufs[i], count);
+                       set_desc_status(info->rbufs[i], BIT15 | (reg >> 8));
+                       info->rbuf_fill_count = count = 0;
+                       if (++i == info->rbuf_count)
+                               i = 0;
+                       info->pending_bh |= BH_RECEIVE;
+               }
+       }
+
+       info->rbuf_fill_index = i;
+       info->rbuf_fill_count = count;
+}
+
 static void isr_serial(struct slgt_info *info)
 {
        unsigned short status = rd_reg16(info, SSR);
@@ -2125,6 +2163,8 @@ static void isr_serial(struct slgt_info *info)
                        if (info->tx_count)
                                isr_txeom(info, status);
                }
+               if (info->rx_pio && (status & IRQ_RXDATA))
+                       isr_rxdata(info);
                if ((status & IRQ_RXBREAK) && (status & RXBREAK)) {
                        info->icount.brk++;
                        /* process break detection if tty control allows */
@@ -2141,7 +2181,8 @@ static void isr_serial(struct slgt_info *info)
        } else {
                if (status & (IRQ_TXIDLE + IRQ_TXUNDER))
                        isr_txeom(info, status);
-
+               if (info->rx_pio && (status & IRQ_RXDATA))
+                       isr_rxdata(info);
                if (status & IRQ_RXIDLE) {
                        if (status & RXIDLE)
                                info->icount.rxidle++;
@@ -2642,6 +2683,10 @@ static int rx_enable(struct slgt_info *info, int enable)
                        return -EINVAL;
                }
                info->rbuf_fill_level = rbuf_fill_level;
+               if (rbuf_fill_level < 128)
+                       info->rx_pio = 1; /* PIO mode */
+               else
+                       info->rx_pio = 0; /* DMA mode */
                rx_stop(info); /* restart receiver to use new fill level */
        }
 
@@ -3099,13 +3144,16 @@ static int carrier_raised(struct tty_port *port)
        return (info->signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
        unsigned long flags;
        struct slgt_info *info = container_of(port, struct slgt_info, port);
 
        spin_lock_irqsave(&info->lock,flags);
-       info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+       if (on)
+               info->signals |= SerialSignal_RTS + SerialSignal_DTR;
+       else
+               info->signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
        set_signals(info);
        spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3419,7 +3467,7 @@ static void add_device(struct slgt_info *info)
 
 static const struct tty_port_operations slgt_port_ops = {
        .carrier_raised = carrier_raised,
-       .raise_dtr_rts = raise_dtr_rts,
+       .dtr_rts = dtr_rts,
 };
 
 /*
@@ -3841,15 +3889,27 @@ static void rx_start(struct slgt_info *info)
        rdma_reset(info);
        reset_rbufs(info);
 
-       /* set 1st descriptor address */
-       wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
-
-       if (info->params.mode != MGSL_MODE_ASYNC) {
-               /* enable rx DMA and DMA interrupt */
-               wr_reg32(info, RDCSR, (BIT2 + BIT0));
+       if (info->rx_pio) {
+               /* rx request when rx FIFO not empty */
+               wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) & ~BIT14));
+               slgt_irq_on(info, IRQ_RXDATA);
+               if (info->params.mode == MGSL_MODE_ASYNC) {
+                       /* enable saving of rx status */
+                       wr_reg32(info, RDCSR, BIT6);
+               }
        } else {
-               /* enable saving of rx status, rx DMA and DMA interrupt */
-               wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
+               /* rx request when rx FIFO half full */
+               wr_reg16(info, SCR, (unsigned short)(rd_reg16(info, SCR) | BIT14));
+               /* set 1st descriptor address */
+               wr_reg32(info, RDDAR, info->rbufs[0].pdesc);
+
+               if (info->params.mode != MGSL_MODE_ASYNC) {
+                       /* enable rx DMA and DMA interrupt */
+                       wr_reg32(info, RDCSR, (BIT2 + BIT0));
+               } else {
+                       /* enable saving of rx status, rx DMA and DMA interrupt */
+                       wr_reg32(info, RDCSR, (BIT6 + BIT2 + BIT0));
+               }
        }
 
        slgt_irq_on(info, IRQ_RXOVER);
@@ -4467,6 +4527,8 @@ static void free_rbufs(struct slgt_info *info, unsigned int i, unsigned int last
 static void reset_rbufs(struct slgt_info *info)
 {
        free_rbufs(info, 0, info->rbuf_count - 1);
+       info->rbuf_fill_index = 0;
+       info->rbuf_fill_count = 0;
 }
 
 /*
index 26de60e..6f727e3 100644 (file)
@@ -3277,13 +3277,16 @@ static int carrier_raised(struct tty_port *port)
        return (info->serial_signals & SerialSignal_DCD) ? 1 : 0;
 }
 
-static void raise_dtr_rts(struct tty_port *port)
+static void dtr_rts(struct tty_port *port, int on)
 {
        SLMP_INFO *info = container_of(port, SLMP_INFO, port);
        unsigned long flags;
 
        spin_lock_irqsave(&info->lock,flags);
-       info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+       if (on)
+               info->serial_signals |= SerialSignal_RTS + SerialSignal_DTR;
+       else
+               info->serial_signals &= ~(SerialSignal_RTS + SerialSignal_DTR);
        set_signals(info);
        spin_unlock_irqrestore(&info->lock,flags);
 }
@@ -3746,7 +3749,7 @@ static void add_device(SLMP_INFO *info)
 
 static const struct tty_port_operations port_ops = {
        .carrier_raised = carrier_raised,
-       .raise_dtr_rts = raise_dtr_rts,
+       .dtr_rts = dtr_rts,
 };
 
 /* Allocate and initialize a device instance structure
index 55ba6f1..ac16fbe 100644 (file)
@@ -29,10 +29,7 @@ static struct tty_audit_buf *tty_audit_buf_alloc(int major, int minor,
        buf = kmalloc(sizeof(*buf), GFP_KERNEL);
        if (!buf)
                goto err;
-       if (PAGE_SIZE != N_TTY_BUF_SIZE)
-               buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
-       else
-               buf->data = (unsigned char *)__get_free_page(GFP_KERNEL);
+       buf->data = kmalloc(N_TTY_BUF_SIZE, GFP_KERNEL);
        if (!buf->data)
                goto err_buf;
        atomic_set(&buf->count, 1);
@@ -52,10 +49,7 @@ err:
 static void tty_audit_buf_free(struct tty_audit_buf *buf)
 {
        WARN_ON(buf->valid != 0);
-       if (PAGE_SIZE != N_TTY_BUF_SIZE)
-               kfree(buf->data);
-       else
-               free_page((unsigned long)buf->data);
+       kfree(buf->data);
        kfree(buf);
 }
 
index 66b99a2..939e198 100644 (file)
@@ -295,7 +295,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line)
        struct tty_driver *p, *res = NULL;
        int tty_line = 0;
        int len;
-       char *str;
+       char *str, *stp;
 
        for (str = name; *str; str++)
                if ((*str >= '0' && *str <= '9') || *str == ',')
@@ -311,13 +311,14 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line)
        list_for_each_entry(p, &tty_drivers, tty_drivers) {
                if (strncmp(name, p->name, len) != 0)
                        continue;
-               if (*str == ',')
-                       str++;
-               if (*str == '\0')
-                       str = NULL;
+               stp = str;
+               if (*stp == ',')
+                       stp++;
+               if (*stp == '\0')
+                       stp = NULL;
 
                if (tty_line >= 0 && tty_line <= p->num && p->ops &&
-                   p->ops->poll_init && !p->ops->poll_init(p, tty_line, str)) {
+                   p->ops->poll_init && !p->ops->poll_init(p, tty_line, stp)) {
                        res = tty_driver_kref_get(p);
                        *line = tty_line;
                        break;
@@ -469,43 +470,6 @@ void tty_wakeup(struct tty_struct *tty)
 
 EXPORT_SYMBOL_GPL(tty_wakeup);
 
-/**
- *     tty_ldisc_flush -       flush line discipline queue
- *     @tty: tty
- *
- *     Flush the line discipline queue (if any) for this tty. If there
- *     is no line discipline active this is a no-op.
- */
-
-void tty_ldisc_flush(struct tty_struct *tty)
-{
-       struct tty_ldisc *ld = tty_ldisc_ref(tty);
-       if (ld) {
-               if (ld->ops->flush_buffer)
-                       ld->ops->flush_buffer(tty);
-               tty_ldisc_deref(ld);
-       }
-       tty_buffer_flush(tty);
-}
-
-EXPORT_SYMBOL_GPL(tty_ldisc_flush);
-
-/**
- *     tty_reset_termios       -       reset terminal state
- *     @tty: tty to reset
- *
- *     Restore a terminal to the driver default state
- */
-
-static void tty_reset_termios(struct tty_struct *tty)
-{
-       mutex_lock(&tty->termios_mutex);
-       *tty->termios = tty->driver->init_termios;
-       tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
-       tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
-       mutex_unlock(&tty->termios_mutex);
-}
-
 /**
  *     do_tty_hangup           -       actual handler for hangup events
  *     @work: tty device
@@ -535,7 +499,6 @@ static void do_tty_hangup(struct work_struct *work)
        struct file *cons_filp = NULL;
        struct file *filp, *f = NULL;
        struct task_struct *p;
-       struct tty_ldisc *ld;
        int    closecount = 0, n;
        unsigned long flags;
        int refs = 0;
@@ -566,40 +529,8 @@ static void do_tty_hangup(struct work_struct *work)
                filp->f_op = &hung_up_tty_fops;
        }
        file_list_unlock();
-       /*
-        * FIXME! What are the locking issues here? This may me overdoing
-        * things... This question is especially important now that we've
-        * removed the irqlock.
-        */
-       ld = tty_ldisc_ref(tty);
-       if (ld != NULL) {
-               /* We may have no line discipline at this point */
-               if (ld->ops->flush_buffer)
-                       ld->ops->flush_buffer(tty);
-               tty_driver_flush_buffer(tty);
-               if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
-                   ld->ops->write_wakeup)
-                       ld->ops->write_wakeup(tty);
-               if (ld->ops->hangup)
-                       ld->ops->hangup(tty);
-       }
-       /*
-        * FIXME: Once we trust the LDISC code better we can wait here for
-        * ldisc completion and fix the driver call race
-        */
-       wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
-       wake_up_interruptible_poll(&tty->read_wait, POLLIN);
-       /*
-        * Shutdown the current line discipline, and reset it to
-        * N_TTY.
-        */
-       if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS)
-               tty_reset_termios(tty);
-       /* Defer ldisc switch */
-       /* tty_deferred_ldisc_switch(N_TTY);
 
-         This should get done automatically when the port closes and
-         tty_release is called */
+       tty_ldisc_hangup(tty);
 
        read_lock(&tasklist_lock);
        if (tty->session) {
@@ -628,12 +559,15 @@ static void do_tty_hangup(struct work_struct *work)
        read_unlock(&tasklist_lock);
 
        spin_lock_irqsave(&tty->ctrl_lock, flags);
-       tty->flags = 0;
+       clear_bit(TTY_THROTTLED, &tty->flags);
+       clear_bit(TTY_PUSH, &tty->flags);
+       clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
        put_pid(tty->session);
        put_pid(tty->pgrp);
        tty->session = NULL;
        tty->pgrp = NULL;
        tty->ctrl_status = 0;
+       set_bit(TTY_HUPPED, &tty->flags);
        spin_unlock_irqrestore(&tty->ctrl_lock, flags);
 
        /* Account for the p->signal references we killed */
@@ -659,10 +593,7 @@ static void do_tty_hangup(struct work_struct *work)
         * can't yet guarantee all that.
         */
        set_bit(TTY_HUPPED, &tty->flags);
-       if (ld) {
-               tty_ldisc_enable(tty);
-               tty_ldisc_deref(ld);
-       }
+       tty_ldisc_enable(tty);
        unlock_kernel();
        if (f)
                fput(f);
@@ -2480,6 +2411,24 @@ static int tty_tiocmset(struct tty_struct *tty, struct file *file, unsigned int
        return tty->ops->tiocmset(tty, file, set, clear);
 }
 
+struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
+{
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver->subtype == PTY_TYPE_MASTER)
+               tty = tty->link;
+       return tty;
+}
+EXPORT_SYMBOL(tty_pair_get_tty);
+
+struct tty_struct *tty_pair_get_pty(struct tty_struct *tty)
+{
+       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
+           tty->driver->subtype == PTY_TYPE_MASTER)
+           return tty;
+       return tty->link;
+}
+EXPORT_SYMBOL(tty_pair_get_pty);
+
 /*
  * Split this up, as gcc can choke on it otherwise..
  */
@@ -2495,11 +2444,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        if (tty_paranoia_check(tty, inode, "tty_ioctl"))
                return -EINVAL;
 
-       real_tty = tty;
-       if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-           tty->driver->subtype == PTY_TYPE_MASTER)
-               real_tty = tty->link;
-
+       real_tty = tty_pair_get_tty(tty);
 
        /*
         * Factor out some common prep work
@@ -2555,7 +2500,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        case TIOCGSID:
                return tiocgsid(tty, real_tty, p);
        case TIOCGETD:
-               return put_user(tty->ldisc.ops->num, (int __user *)p);
+               return put_user(tty->ldisc->ops->num, (int __user *)p);
        case TIOCSETD:
                return tiocsetd(tty, p);
        /*
@@ -2770,6 +2715,7 @@ void initialize_tty_struct(struct tty_struct *tty,
        tty->buf.head = tty->buf.tail = NULL;
        tty_buffer_init(tty);
        mutex_init(&tty->termios_mutex);
+       mutex_init(&tty->ldisc_mutex);
        init_waitqueue_head(&tty->write_wait);
        init_waitqueue_head(&tty->read_wait);
        INIT_WORK(&tty->hangup_work, do_tty_hangup);
index 6f4c7d0..8116bb1 100644 (file)
@@ -97,14 +97,19 @@ EXPORT_SYMBOL(tty_driver_flush_buffer);
  *     @tty: terminal
  *
  *     Indicate that a tty should stop transmitting data down the stack.
+ *     Takes the termios mutex to protect against parallel throttle/unthrottle
+ *     and also to ensure the driver can consistently reference its own
+ *     termios data at this point when implementing software flow control.
  */
 
 void tty_throttle(struct tty_struct *tty)
 {
+       mutex_lock(&tty->termios_mutex);
        /* check TTY_THROTTLED first so it indicates our state */
        if (!test_and_set_bit(TTY_THROTTLED, &tty->flags) &&
            tty->ops->throttle)
                tty->ops->throttle(tty);
+       mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_throttle);
 
@@ -113,13 +118,21 @@ EXPORT_SYMBOL(tty_throttle);
  *     @tty: terminal
  *
  *     Indicate that a tty may continue transmitting data down the stack.
+ *     Takes the termios mutex to protect against parallel throttle/unthrottle
+ *     and also to ensure the driver can consistently reference its own
+ *     termios data at this point when implementing software flow control.
+ *
+ *     Drivers should however remember that the stack can issue a throttle,
+ *     then change flow control method, then unthrottle.
  */
 
 void tty_unthrottle(struct tty_struct *tty)
 {
+       mutex_lock(&tty->termios_mutex);
        if (test_and_clear_bit(TTY_THROTTLED, &tty->flags) &&
            tty->ops->unthrottle)
                tty->ops->unthrottle(tty);
+       mutex_unlock(&tty->termios_mutex);
 }
 EXPORT_SYMBOL(tty_unthrottle);
 
@@ -613,9 +626,25 @@ static int set_termios(struct tty_struct *tty, void __user *arg, int opt)
        return 0;
 }
 
+static void copy_termios(struct tty_struct *tty, struct ktermios *kterm)
+{
+       mutex_lock(&tty->termios_mutex);
+       memcpy(kterm, tty->termios, sizeof(struct ktermios));
+       mutex_unlock(&tty->termios_mutex);
+}
+
+static void copy_termios_locked(struct tty_struct *tty, struct ktermios *kterm)
+{
+       mutex_lock(&tty->termios_mutex);
+       memcpy(kterm, tty->termios_locked, sizeof(struct ktermios));
+       mutex_unlock(&tty->termios_mutex);
+}
+
 static int get_termio(struct tty_struct *tty, struct termio __user *termio)
 {
-       if (kernel_termios_to_user_termio(termio, tty->termios))
+       struct ktermios kterm;
+       copy_termios(tty, &kterm);
+       if (kernel_termios_to_user_termio(termio, &kterm))
                return -EFAULT;
        return 0;
 }
@@ -917,6 +946,8 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
        struct tty_struct *real_tty;
        void __user *p = (void __user *)arg;
        int ret = 0;
+       struct ktermios kterm;
+       struct termiox ktermx;
 
        if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
            tty->driver->subtype == PTY_TYPE_MASTER)
@@ -952,23 +983,20 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
                return set_termios(real_tty, p, TERMIOS_OLD);
 #ifndef TCGETS2
        case TCGETS:
-               mutex_lock(&real_tty->termios_mutex);
-               if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios))
+               copy_termios(real_tty, &kterm);
+               if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
                        ret = -EFAULT;
-               mutex_unlock(&real_tty->termios_mutex);
                return ret;
 #else
        case TCGETS:
-               mutex_lock(&real_tty->termios_mutex);
-               if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios))
+               copy_termios(real_tty, &kterm);
+               if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
                        ret = -EFAULT;
-               mutex_unlock(&real_tty->termios_mutex);
                return ret;
        case TCGETS2:
-               mutex_lock(&real_tty->termios_mutex);
-               if (kernel_termios_to_user_termios((struct termios2 __user *)arg, real_tty->termios))
+               copy_termios(real_tty, &kterm);
+               if (kernel_termios_to_user_termios((struct termios2 __user *)arg, &kterm))
                        ret = -EFAULT;
-               mutex_unlock(&real_tty->termios_mutex);
                return ret;
        case TCSETSF2:
                return set_termios(real_tty, p,  TERMIOS_FLUSH | TERMIOS_WAIT);
@@ -987,34 +1015,36 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
                return set_termios(real_tty, p, TERMIOS_TERMIO);
 #ifndef TCGETS2
        case TIOCGLCKTRMIOS:
-               mutex_lock(&real_tty->termios_mutex);
-               if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios_locked))
+               copy_termios_locked(real_tty, &kterm);
+               if (kernel_termios_to_user_termios((struct termios __user *)arg, &kterm))
                        ret = -EFAULT;
-               mutex_unlock(&real_tty->termios_mutex);
                return ret;
        case TIOCSLCKTRMIOS:
                if (!capable(CAP_SYS_ADMIN))
                        return -EPERM;
-               mutex_lock(&real_tty->termios_mutex);
-               if (user_termios_to_kernel_termios(real_tty->termios_locked,
+               copy_termios_locked(real_tty, &kterm);
+               if (user_termios_to_kernel_termios(&kterm,
                                               (struct termios __user *) arg))
-                       ret = -EFAULT;
+                       return -EFAULT;
+               mutex_lock(&real_tty->termios_mutex);
+               memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
                mutex_unlock(&real_tty->termios_mutex);
-               return ret;
+               return 0;
 #else
        case TIOCGLCKTRMIOS:
-               mutex_lock(&real_tty->termios_mutex);
-               if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios_locked))
+               copy_termios_locked(real_tty, &kterm);
+               if (kernel_termios_to_user_termios_1((struct termios __user *)arg, &kterm))
                        ret = -EFAULT;
-               mutex_unlock(&real_tty->termios_mutex);
                return ret;
        case TIOCSLCKTRMIOS:
                if (!capable(CAP_SYS_ADMIN))
-                       ret = -EPERM;
-               mutex_lock(&real_tty->termios_mutex);
-               if (user_termios_to_kernel_termios_1(real_tty->termios_locked,
+                       return -EPERM;
+               copy_termios_locked(real_tty, &kterm);
+               if (user_termios_to_kernel_termios_1(&kterm,
                                               (struct termios __user *) arg))
-                       ret = -EFAULT;
+                       return -EFAULT;
+               mutex_lock(&real_tty->termios_mutex);
+               memcpy(real_tty->termios_locked, &kterm, sizeof(struct ktermios));
                mutex_unlock(&real_tty->termios_mutex);
                return ret;
 #endif
@@ -1023,9 +1053,10 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
                if (real_tty->termiox == NULL)
                        return -EINVAL;
                mutex_lock(&real_tty->termios_mutex);
-               if (copy_to_user(p, real_tty->termiox, sizeof(struct termiox)))
-                       ret = -EFAULT;
+               memcpy(&ktermx, real_tty->termiox, sizeof(struct termiox));
                mutex_unlock(&real_tty->termios_mutex);
+               if (copy_to_user(p, &ktermx, sizeof(struct termiox)))
+                       ret = -EFAULT;
                return ret;
        case TCSETX:
                return set_termiox(real_tty, p, 0);
@@ -1035,10 +1066,9 @@ int tty_mode_ioctl(struct tty_struct *tty, struct file *file,
                return set_termiox(real_tty, p, TERMIOS_FLUSH);
 #endif         
        case TIOCGSOFTCAR:
-               mutex_lock(&real_tty->termios_mutex);
-               ret = put_user(C_CLOCAL(real_tty) ? 1 : 0,
+               copy_termios(real_tty, &kterm);
+               ret = put_user((kterm.c_cflag & CLOCAL) ? 1 : 0,
                                                (int __user *)arg);
-               mutex_unlock(&real_tty->termios_mutex);
                return ret;
        case TIOCSSOFTCAR:
                if (get_user(arg, (unsigned int __user *) arg))
index f78f5b0..39c8f86 100644 (file)
@@ -115,19 +115,22 @@ EXPORT_SYMBOL(tty_unregister_ldisc);
 /**
  *     tty_ldisc_try_get       -       try and reference an ldisc
  *     @disc: ldisc number
- *     @ld: tty ldisc structure to complete
  *
  *     Attempt to open and lock a line discipline into place. Return
- *     the line discipline refcounted and assigned in ld. On an error
- *     report the error code back
+ *     the line discipline refcounted or an error.
  */
 
-static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_try_get(int disc)
 {
        unsigned long flags;
+       struct tty_ldisc *ld;
        struct tty_ldisc_ops *ldops;
        int err = -EINVAL;
-       
+
+       ld = kmalloc(sizeof(struct tty_ldisc), GFP_KERNEL);
+       if (ld == NULL)
+               return ERR_PTR(-ENOMEM);
+
        spin_lock_irqsave(&tty_ldisc_lock, flags);
        ld->ops = NULL;
        ldops = tty_ldiscs[disc];
@@ -140,17 +143,19 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
                        /* lock it */
                        ldops->refcount++;
                        ld->ops = ldops;
+                       ld->refcount = 0;
                        err = 0;
                }
        }
        spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-       return err;
+       if (err)
+               return ERR_PTR(err);
+       return ld;
 }
 
 /**
  *     tty_ldisc_get           -       take a reference to an ldisc
  *     @disc: ldisc number
- *     @ld: tty line discipline structure to use
  *
  *     Takes a reference to a line discipline. Deals with refcounts and
  *     module locking counts. Returns NULL if the discipline is not available.
@@ -161,52 +166,54 @@ static int tty_ldisc_try_get(int disc, struct tty_ldisc *ld)
  *             takes tty_ldisc_lock to guard against ldisc races
  */
 
-static int tty_ldisc_get(int disc, struct tty_ldisc *ld)
+static struct tty_ldisc *tty_ldisc_get(int disc)
 {
-       int err;
+       struct tty_ldisc *ld;
 
        if (disc < N_TTY || disc >= NR_LDISCS)
-               return -EINVAL;
-       err = tty_ldisc_try_get(disc, ld);
-       if (err < 0) {
+               return ERR_PTR(-EINVAL);
+       ld = tty_ldisc_try_get(disc);
+       if (IS_ERR(ld)) {
                request_module("tty-ldisc-%d", disc);
-               err = tty_ldisc_try_get(disc, ld);
+               ld = tty_ldisc_try_get(disc);
        }
-       return err;
+       return ld;
 }
 
 /**
  *     tty_ldisc_put           -       drop ldisc reference
- *     @disc: ldisc number
+ *     @ld: ldisc
  *
  *     Drop a reference to a line discipline. Manage refcounts and
- *     module usage counts
+ *     module usage counts. Free the ldisc once the recount hits zero.
  *
  *     Locking:
  *             takes tty_ldisc_lock to guard against ldisc races
  */
 
-static void tty_ldisc_put(struct tty_ldisc_ops *ld)
+static void tty_ldisc_put(struct tty_ldisc *ld)
 {
        unsigned long flags;
-       int disc = ld->num;
+       int disc = ld->ops->num;
+       struct tty_ldisc_ops *ldo;
 
        BUG_ON(disc < N_TTY || disc >= NR_LDISCS);
 
        spin_lock_irqsave(&tty_ldisc_lock, flags);
-       ld = tty_ldiscs[disc];
-       BUG_ON(ld->refcount == 0);
-       ld->refcount--;
-       module_put(ld->owner);
+       ldo = tty_ldiscs[disc];
+       BUG_ON(ldo->refcount == 0);
+       ldo->refcount--;
+       module_put(ldo->owner);
        spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+       kfree(ld);
 }
 
-static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
+static void *tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
 {
        return (*pos < NR_LDISCS) ? pos : NULL;
 }
 
-static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+static void *tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
 {
        (*pos)++;
        return (*pos < NR_LDISCS) ? pos : NULL;
@@ -219,12 +226,13 @@ static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
 static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
 {
        int i = *(loff_t *)v;
-       struct tty_ldisc ld;
-       
-       if (tty_ldisc_get(i, &ld) < 0)
+       struct tty_ldisc *ld;
+
+       ld = tty_ldisc_try_get(i);
+       if (IS_ERR(ld))
                return 0;
-       seq_printf(m, "%-10s %2d\n", ld.ops->name ? ld.ops->name : "???", i);
-       tty_ldisc_put(ld.ops);
+       seq_printf(m, "%-10s %2d\n", ld->ops->name ? ld->ops->name : "???", i);
+       tty_ldisc_put(ld);
        return 0;
 }
 
@@ -263,8 +271,7 @@ const struct file_operations tty_ldiscs_proc_fops = {
 
 static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld)
 {
-       ld->refcount = 0;
-       tty->ldisc = *ld;
+       tty->ldisc = ld;
 }
 
 /**
@@ -286,7 +293,7 @@ static int tty_ldisc_try(struct tty_struct *tty)
        int ret = 0;
 
        spin_lock_irqsave(&tty_ldisc_lock, flags);
-       ld = &tty->ldisc;
+       ld = tty->ldisc;
        if (test_bit(TTY_LDISC, &tty->flags)) {
                ld->refcount++;
                ret = 1;
@@ -315,10 +322,9 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty)
 {
        /* wait_event is a macro */
        wait_event(tty_ldisc_wait, tty_ldisc_try(tty));
-       WARN_ON(tty->ldisc.refcount == 0);
-       return &tty->ldisc;
+       WARN_ON(tty->ldisc->refcount == 0);
+       return tty->ldisc;
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
 
 /**
@@ -335,10 +341,9 @@ EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait);
 struct tty_ldisc *tty_ldisc_ref(struct tty_struct *tty)
 {
        if (tty_ldisc_try(tty))
-               return &tty->ldisc;
+               return tty->ldisc;
        return NULL;
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_ref);
 
 /**
@@ -366,7 +371,6 @@ void tty_ldisc_deref(struct tty_ldisc *ld)
                wake_up(&tty_ldisc_wait);
        spin_unlock_irqrestore(&tty_ldisc_lock, flags);
 }
-
 EXPORT_SYMBOL_GPL(tty_ldisc_deref);
 
 /**
@@ -388,6 +392,26 @@ void tty_ldisc_enable(struct tty_struct *tty)
        wake_up(&tty_ldisc_wait);
 }
 
+/**
+ *     tty_ldisc_flush -       flush line discipline queue
+ *     @tty: tty
+ *
+ *     Flush the line discipline queue (if any) for this tty. If there
+ *     is no line discipline active this is a no-op.
+ */
+
+void tty_ldisc_flush(struct tty_struct *tty)
+{
+       struct tty_ldisc *ld = tty_ldisc_ref(tty);
+       if (ld) {
+               if (ld->ops->flush_buffer)
+                       ld->ops->flush_buffer(tty);
+               tty_ldisc_deref(ld);
+       }
+       tty_buffer_flush(tty);
+}
+EXPORT_SYMBOL_GPL(tty_ldisc_flush);
+
 /**
  *     tty_set_termios_ldisc           -       set ldisc field
  *     @tty: tty structure
@@ -407,6 +431,39 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
        mutex_unlock(&tty->termios_mutex);
 }
 
+/**
+ *     tty_ldisc_open          -       open a line discipline
+ *     @tty: tty we are opening the ldisc on
+ *     @ld: discipline to open
+ *
+ *     A helper opening method. Also a convenient debugging and check
+ *     point.
+ */
+
+static int tty_ldisc_open(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+       WARN_ON(test_and_set_bit(TTY_LDISC_OPEN, &tty->flags));
+       if (ld->ops->open)
+               return ld->ops->open(tty);
+       return 0;
+}
+
+/**
+ *     tty_ldisc_close         -       close a line discipline
+ *     @tty: tty we are opening the ldisc on
+ *     @ld: discipline to close
+ *
+ *     A helper close method. Also a convenient debugging and check
+ *     point.
+ */
+
+static void tty_ldisc_close(struct tty_struct *tty, struct tty_ldisc *ld)
+{
+       WARN_ON(!test_bit(TTY_LDISC_OPEN, &tty->flags));
+       clear_bit(TTY_LDISC_OPEN, &tty->flags);
+       if (ld->ops->close)
+               ld->ops->close(tty);
+}
 
 /**
  *     tty_ldisc_restore       -       helper for tty ldisc change
@@ -420,66 +477,136 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
 static void tty_ldisc_restore(struct tty_struct *tty, struct tty_ldisc *old)
 {
        char buf[64];
-       struct tty_ldisc new_ldisc;
+       struct tty_ldisc *new_ldisc;
+       int r;
 
        /* There is an outstanding reference here so this is safe */
-       tty_ldisc_get(old->ops->num, old);
+       old = tty_ldisc_get(old->ops->num);
+       WARN_ON(IS_ERR(old));
        tty_ldisc_assign(tty, old);
        tty_set_termios_ldisc(tty, old->ops->num);
-       if (old->ops->open && (old->ops->open(tty) < 0)) {
-               tty_ldisc_put(old->ops);
+       if (tty_ldisc_open(tty, old) < 0) {
+               tty_ldisc_put(old);
                /* This driver is always present */
-               if (tty_ldisc_get(N_TTY, &new_ldisc) < 0)
+               new_ldisc = tty_ldisc_get(N_TTY);
+               if (IS_ERR(new_ldisc))
                        panic("n_tty: get");
-               tty_ldisc_assign(tty, &new_ldisc);
+               tty_ldisc_assign(tty, new_ldisc);
                tty_set_termios_ldisc(tty, N_TTY);
-               if (new_ldisc.ops->open) {
-                       int r = new_ldisc.ops->open(tty);
-                               if (r < 0)
-                               panic("Couldn't open N_TTY ldisc for "
-                                     "%s --- error %d.",
-                                     tty_name(tty, buf), r);
-               }
+               r = tty_ldisc_open(tty, new_ldisc);
+               if (r < 0)
+                       panic("Couldn't open N_TTY ldisc for "
+                             "%s --- error %d.",
+                             tty_name(tty, buf), r);
        }
 }
 
+/**
+ *     tty_ldisc_halt          -       shut down the line discipline
+ *     @tty: tty device
+ *
+ *     Shut down the line discipline and work queue for this tty device.
+ *     The TTY_LDISC flag being cleared ensures no further references can
+ *     be obtained while the delayed work queue halt ensures that no more
+ *     data is fed to the ldisc.
+ *
+ *     In order to wait for any existing references to complete see
+ *     tty_ldisc_wait_idle.
+ */
+
+static int tty_ldisc_halt(struct tty_struct *tty)
+{
+       clear_bit(TTY_LDISC, &tty->flags);
+       return cancel_delayed_work(&tty->buf.work);
+}
+
+/**
+ *     tty_ldisc_wait_idle     -       wait for the ldisc to become idle
+ *     @tty: tty to wait for
+ *
+ *     Wait for the line discipline to become idle. The discipline must
+ *     have been halted for this to guarantee it remains idle.
+ *
+ *     tty_ldisc_lock protects the ref counts currently.
+ */
+
+static int tty_ldisc_wait_idle(struct tty_struct *tty)
+{
+       unsigned long flags;
+       spin_lock_irqsave(&tty_ldisc_lock, flags);
+       while (tty->ldisc->refcount) {
+               spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+               if (wait_event_timeout(tty_ldisc_wait,
+                               tty->ldisc->refcount == 0, 5 * HZ) == 0)
+                       return -EBUSY;
+               spin_lock_irqsave(&tty_ldisc_lock, flags);
+       }
+       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+       return 0;
+}
+
 /**
  *     tty_set_ldisc           -       set line discipline
  *     @tty: the terminal to set
  *     @ldisc: the line discipline
  *
  *     Set the discipline of a tty line. Must be called from a process
- *     context.
+ *     context. The ldisc change logic has to protect itself against any
+ *     overlapping ldisc change (including on the other end of pty pairs),
+ *     the close of one side of a tty/pty pair, and eventually hangup.
  *
- *     Locking: takes tty_ldisc_lock.
- *              called functions take termios_mutex
+ *     Locking: takes tty_ldisc_lock, termios_mutex
  */
 
 int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 {
        int retval;
-       struct tty_ldisc o_ldisc, new_ldisc;
-       int work;
-       unsigned long flags;
+       struct tty_ldisc *o_ldisc, *new_ldisc;
+       int work, o_work = 0;
        struct tty_struct *o_tty;
 
-restart:
-       /* This is a bit ugly for now but means we can break the 'ldisc
-          is part of the tty struct' assumption later */
-       retval = tty_ldisc_get(ldisc, &new_ldisc);
-       if (retval)
-               return retval;
+       new_ldisc = tty_ldisc_get(ldisc);
+       if (IS_ERR(new_ldisc))
+               return PTR_ERR(new_ldisc);
+
+       /*
+        *      We need to look at the tty locking here for pty/tty pairs
+        *      when both sides try to change in parallel.
+        */
+
+       o_tty = tty->link;      /* o_tty is the pty side or NULL */
+
+
+       /*
+        *      Check the no-op case
+        */
+
+       if (tty->ldisc->ops->num == ldisc) {
+               tty_ldisc_put(new_ldisc);
+               return 0;
+       }
 
        /*
         *      Problem: What do we do if this blocks ?
+        *      We could deadlock here
         */
 
        tty_wait_until_sent(tty, 0);
 
-       if (tty->ldisc.ops->num == ldisc) {
-               tty_ldisc_put(new_ldisc.ops);
-               return 0;
+       mutex_lock(&tty->ldisc_mutex);
+
+       /*
+        *      We could be midstream of another ldisc change which has
+        *      dropped the lock during processing. If so we need to wait.
+        */
+
+       while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
+               mutex_unlock(&tty->ldisc_mutex);
+               wait_event(tty_ldisc_wait,
+                       test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0);
+               mutex_lock(&tty->ldisc_mutex);
        }
+       set_bit(TTY_LDISC_CHANGING, &tty->flags);
 
        /*
         *      No more input please, we are switching. The new ldisc
@@ -489,8 +616,6 @@ restart:
        tty->receive_room = 0;
 
        o_ldisc = tty->ldisc;
-       o_tty = tty->link;
-
        /*
         *      Make sure we don't change while someone holds a
         *      reference to the line discipline. The TTY_LDISC bit
@@ -501,108 +626,181 @@ restart:
         *      with a userspace app continually trying to use the tty in
         *      parallel to the change and re-referencing the tty.
         */
-       clear_bit(TTY_LDISC, &tty->flags);
-       if (o_tty)
-               clear_bit(TTY_LDISC, &o_tty->flags);
 
-       spin_lock_irqsave(&tty_ldisc_lock, flags);
-       if (tty->ldisc.refcount || (o_tty && o_tty->ldisc.refcount)) {
-               if (tty->ldisc.refcount) {
-                       /* Free the new ldisc we grabbed. Must drop the lock
-                          first. */
-                       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-                       tty_ldisc_put(o_ldisc.ops);
-                       /*
-                        * There are several reasons we may be busy, including
-                        * random momentary I/O traffic. We must therefore
-                        * retry. We could distinguish between blocking ops
-                        * and retries if we made tty_ldisc_wait() smarter.
-                        * That is up for discussion.
-                        */
-                       if (wait_event_interruptible(tty_ldisc_wait, tty->ldisc.refcount == 0) < 0)
-                               return -ERESTARTSYS;
-                       goto restart;
-               }
-               if (o_tty && o_tty->ldisc.refcount) {
-                       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-                       tty_ldisc_put(o_tty->ldisc.ops);
-                       if (wait_event_interruptible(tty_ldisc_wait, o_tty->ldisc.refcount == 0) < 0)
-                               return -ERESTARTSYS;
-                       goto restart;
-               }
-       }
-       /*
-        *      If the TTY_LDISC bit is set, then we are racing against
-        *      another ldisc change
-        */
-       if (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
-               struct tty_ldisc *ld;
-               spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-               tty_ldisc_put(new_ldisc.ops);
-               ld = tty_ldisc_ref_wait(tty);
-               tty_ldisc_deref(ld);
-               goto restart;
-       }
-       /*
-        *      This flag is used to avoid two parallel ldisc changes. Once
-        *      open and close are fine grained locked this may work better
-        *      as a mutex shared with the open/close/hup paths
-        */
-       set_bit(TTY_LDISC_CHANGING, &tty->flags);
+       work = tty_ldisc_halt(tty);
        if (o_tty)
-               set_bit(TTY_LDISC_CHANGING, &o_tty->flags);
-       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-       
-       /*
-        *      From this point on we know nobody has an ldisc
-        *      usage reference, nor can they obtain one until
-        *      we say so later on.
-        */
+               o_work = tty_ldisc_halt(o_tty);
 
-       work = cancel_delayed_work(&tty->buf.work);
        /*
-        * Wait for ->hangup_work and ->buf.work handlers to terminate
-        * MUST NOT hold locks here.
+        * Wait for ->hangup_work and ->buf.work handlers to terminate.
+        * We must drop the mutex here in case a hangup is also in process.
         */
+
+       mutex_unlock(&tty->ldisc_mutex);
+
        flush_scheduled_work();
+
+       /* Let any existing reference holders finish */
+       retval = tty_ldisc_wait_idle(tty);
+       if (retval < 0) {
+               clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+               tty_ldisc_put(new_ldisc);
+               return retval;
+       }
+
+       mutex_lock(&tty->ldisc_mutex);
+       if (test_bit(TTY_HUPPED, &tty->flags)) {
+               /* We were raced by the hangup method. It will have stomped
+                  the ldisc data and closed the ldisc down */
+               clear_bit(TTY_LDISC_CHANGING, &tty->flags);
+               mutex_unlock(&tty->ldisc_mutex);
+               tty_ldisc_put(new_ldisc);
+               return -EIO;
+       }
+
        /* Shutdown the current discipline. */
-       if (o_ldisc.ops->close)
-               (o_ldisc.ops->close)(tty);
+       tty_ldisc_close(tty, o_ldisc);
 
        /* Now set up the new line discipline. */
-       tty_ldisc_assign(tty, &new_ldisc);
+       tty_ldisc_assign(tty, new_ldisc);
        tty_set_termios_ldisc(tty, ldisc);
-       if (new_ldisc.ops->open)
-               retval = (new_ldisc.ops->open)(tty);
+
+       retval = tty_ldisc_open(tty, new_ldisc);
        if (retval < 0) {
-               tty_ldisc_put(new_ldisc.ops);
-               tty_ldisc_restore(tty, &o_ldisc);
+               /* Back to the old one or N_TTY if we can't */
+               tty_ldisc_put(new_ldisc);
+               tty_ldisc_restore(tty, o_ldisc);
        }
+
        /* At this point we hold a reference to the new ldisc and a
           a reference to the old ldisc. If we ended up flipping back
           to the existing ldisc we have two references to it */
 
-       if (tty->ldisc.ops->num != o_ldisc.ops->num && tty->ops->set_ldisc)
+       if (tty->ldisc->ops->num != o_ldisc->ops->num && tty->ops->set_ldisc)
                tty->ops->set_ldisc(tty);
 
-       tty_ldisc_put(o_ldisc.ops);
+       tty_ldisc_put(o_ldisc);
 
        /*
-        *      Allow ldisc referencing to occur as soon as the driver
-        *      ldisc callback completes.
+        *      Allow ldisc referencing to occur again
         */
 
        tty_ldisc_enable(tty);
        if (o_tty)
                tty_ldisc_enable(o_tty);
 
-       /* Restart it in case no characters kick it off. Safe if
+       /* Restart the work queue in case no characters kick it off. Safe if
           already running */
        if (work)
                schedule_delayed_work(&tty->buf.work, 1);
+       if (o_work)
+               schedule_delayed_work(&o_tty->buf.work, 1);
+       mutex_unlock(&tty->ldisc_mutex);
        return retval;
 }
 
+/**
+ *     tty_reset_termios       -       reset terminal state
+ *     @tty: tty to reset
+ *
+ *     Restore a terminal to the driver default state.
+ */
+
+static void tty_reset_termios(struct tty_struct *tty)
+{
+       mutex_lock(&tty->termios_mutex);
+       *tty->termios = tty->driver->init_termios;
+       tty->termios->c_ispeed = tty_termios_input_baud_rate(tty->termios);
+       tty->termios->c_ospeed = tty_termios_baud_rate(tty->termios);
+       mutex_unlock(&tty->termios_mutex);
+}
+
+
+/**
+ *     tty_ldisc_reinit        -       reinitialise the tty ldisc
+ *     @tty: tty to reinit
+ *
+ *     Switch the tty back to N_TTY line discipline and leave the
+ *     ldisc state closed
+ */
+
+static void tty_ldisc_reinit(struct tty_struct *tty)
+{
+       struct tty_ldisc *ld;
+
+       tty_ldisc_close(tty, tty->ldisc);
+       tty_ldisc_put(tty->ldisc);
+       tty->ldisc = NULL;
+       /*
+        *      Switch the line discipline back
+        */
+       ld = tty_ldisc_get(N_TTY);
+       BUG_ON(IS_ERR(ld));
+       tty_ldisc_assign(tty, ld);
+       tty_set_termios_ldisc(tty, N_TTY);
+}
+
+/**
+ *     tty_ldisc_hangup                -       hangup ldisc reset
+ *     @tty: tty being hung up
+ *
+ *     Some tty devices reset their termios when they receive a hangup
+ *     event. In that situation we must also switch back to N_TTY properly
+ *     before we reset the termios data.
+ *
+ *     Locking: We can take the ldisc mutex as the rest of the code is
+ *     careful to allow for this.
+ *
+ *     In the pty pair case this occurs in the close() path of the
+ *     tty itself so we must be careful about locking rules.
+ */
+
+void tty_ldisc_hangup(struct tty_struct *tty)
+{
+       struct tty_ldisc *ld;
+
+       /*
+        * FIXME! What are the locking issues here? This may me overdoing
+        * things... This question is especially important now that we've
+        * removed the irqlock.
+        */
+       ld = tty_ldisc_ref(tty);
+       if (ld != NULL) {
+               /* We may have no line discipline at this point */
+               if (ld->ops->flush_buffer)
+                       ld->ops->flush_buffer(tty);
+               tty_driver_flush_buffer(tty);
+               if ((test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags)) &&
+                   ld->ops->write_wakeup)
+                       ld->ops->write_wakeup(tty);
+               if (ld->ops->hangup)
+                       ld->ops->hangup(tty);
+               tty_ldisc_deref(ld);
+       }
+       /*
+        * FIXME: Once we trust the LDISC code better we can wait here for
+        * ldisc completion and fix the driver call race
+        */
+       wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
+       wake_up_interruptible_poll(&tty->read_wait, POLLIN);
+       /*
+        * Shutdown the current line discipline, and reset it to
+        * N_TTY.
+        */
+       if (tty->driver->flags & TTY_DRIVER_RESET_TERMIOS) {
+               /* Avoid racing set_ldisc */
+               mutex_lock(&tty->ldisc_mutex);
+               /* Switch back to N_TTY */
+               tty_ldisc_reinit(tty);
+               /* At this point we have a closed ldisc and we want to
+                  reopen it. We could defer this to the next open but
+                  it means auditing a lot of other paths so this is a FIXME */
+               WARN_ON(tty_ldisc_open(tty, tty->ldisc));
+               tty_ldisc_enable(tty);
+               mutex_unlock(&tty->ldisc_mutex);
+               tty_reset_termios(tty);
+       }
+}
 
 /**
  *     tty_ldisc_setup                 -       open line discipline
@@ -610,24 +808,23 @@ restart:
  *     @o_tty: pair tty for pty/tty pairs
  *
  *     Called during the initial open of a tty/pty pair in order to set up the
- *     line discplines and bind them to the tty.
+ *     line disciplines and bind them to the tty. This has no locking issues
+ *     as the device isn't yet active.
  */
 
 int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-       struct tty_ldisc *ld = &tty->ldisc;
+       struct tty_ldisc *ld = tty->ldisc;
        int retval;
 
-       if (ld->ops->open) {
-               retval = (ld->ops->open)(tty);
-               if (retval)
-                       return retval;
-       }
-       if (o_tty && o_tty->ldisc.ops->open) {
-               retval = (o_tty->ldisc.ops->open)(o_tty);
+       retval = tty_ldisc_open(tty, ld);
+       if (retval)
+               return retval;
+
+       if (o_tty) {
+               retval = tty_ldisc_open(o_tty, o_tty->ldisc);
                if (retval) {
-                       if (ld->ops->close)
-                               (ld->ops->close)(tty);
+                       tty_ldisc_close(tty, ld);
                        return retval;
                }
                tty_ldisc_enable(o_tty);
@@ -635,32 +832,25 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
        tty_ldisc_enable(tty);
        return 0;
 }
-
 /**
  *     tty_ldisc_release               -       release line discipline
  *     @tty: tty being shut down
  *     @o_tty: pair tty for pty/tty pairs
  *
- *     Called during the final close of a tty/pty pair in order to shut down the
- *     line discpline layer.
+ *     Called during the final close of a tty/pty pair in order to shut down
+ *     the line discpline layer. On exit the ldisc assigned is N_TTY and the
+ *     ldisc has not been opened.
  */
 
 void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 {
-       unsigned long flags;
-       struct tty_ldisc ld;
        /*
         * Prevent flush_to_ldisc() from rescheduling the work for later.  Then
         * kill any delayed work. As this is the final close it does not
         * race with the set_ldisc code path.
         */
-       clear_bit(TTY_LDISC, &tty->flags);
-       cancel_delayed_work(&tty->buf.work);
-
-       /*
-        * Wait for ->hangup_work and ->buf.work handlers to terminate
-        */
 
+       tty_ldisc_halt(tty);
        flush_scheduled_work();
 
        /*
@@ -668,38 +858,19 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
         * side waiters as the file is closing so user count on the file
         * side is zero.
         */
-       spin_lock_irqsave(&tty_ldisc_lock, flags);
-       while (tty->ldisc.refcount) {
-               spin_unlock_irqrestore(&tty_ldisc_lock, flags);
-               wait_event(tty_ldisc_wait, tty->ldisc.refcount == 0);
-               spin_lock_irqsave(&tty_ldisc_lock, flags);
-       }
-       spin_unlock_irqrestore(&tty_ldisc_lock, flags);
+
+       tty_ldisc_wait_idle(tty);
+
        /*
         * Shutdown the current line discipline, and reset it to N_TTY.
         *
         * FIXME: this MUST get fixed for the new reflocking
         */
-       if (tty->ldisc.ops->close)
-               (tty->ldisc.ops->close)(tty);
-       tty_ldisc_put(tty->ldisc.ops);
 
-       /*
-        *      Switch the line discipline back
-        */
-       WARN_ON(tty_ldisc_get(N_TTY, &ld));
-       tty_ldisc_assign(tty, &ld);
-       tty_set_termios_ldisc(tty, N_TTY);
-       if (o_tty) {
-               /* FIXME: could o_tty be in setldisc here ? */
-               clear_bit(TTY_LDISC, &o_tty->flags);
-               if (o_tty->ldisc.ops->close)
-                       (o_tty->ldisc.ops->close)(o_tty);
-               tty_ldisc_put(o_tty->ldisc.ops);
-               WARN_ON(tty_ldisc_get(N_TTY, &ld));
-               tty_ldisc_assign(o_tty, &ld);
-               tty_set_termios_ldisc(o_tty, N_TTY);
-       }
+       tty_ldisc_reinit(tty);
+       /* This will need doing differently if we need to lock */
+       if (o_tty)
+               tty_ldisc_release(o_tty, NULL);
 }
 
 /**
@@ -712,10 +883,10 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 
 void tty_ldisc_init(struct tty_struct *tty)
 {
-       struct tty_ldisc ld;
-       if (tty_ldisc_get(N_TTY, &ld) < 0)
+       struct tty_ldisc *ld = tty_ldisc_get(N_TTY);
+       if (IS_ERR(ld))
                panic("n_tty: init_tty");
-       tty_ldisc_assign(tty, &ld);
+       tty_ldisc_assign(tty, ld);
 }
 
 void tty_ldisc_begin(void)
index 9b8004c..62dadfc 100644 (file)
@@ -137,7 +137,7 @@ int tty_port_carrier_raised(struct tty_port *port)
 EXPORT_SYMBOL(tty_port_carrier_raised);
 
 /**
- *     tty_port_raise_dtr_rts  -       Riase DTR/RTS
+ *     tty_port_raise_dtr_rts  -       Raise DTR/RTS
  *     @port: tty port
  *
  *     Wrapper for the DTR/RTS raise logic. For the moment this is used
@@ -147,11 +147,27 @@ EXPORT_SYMBOL(tty_port_carrier_raised);
 
 void tty_port_raise_dtr_rts(struct tty_port *port)
 {
-       if (port->ops->raise_dtr_rts)
-               port->ops->raise_dtr_rts(port);
+       if (port->ops->dtr_rts)
+               port->ops->dtr_rts(port, 1);
 }
 EXPORT_SYMBOL(tty_port_raise_dtr_rts);
 
+/**
+ *     tty_port_lower_dtr_rts  -       Lower DTR/RTS
+ *     @port: tty port
+ *
+ *     Wrapper for the DTR/RTS raise logic. For the moment this is used
+ *     to hide some internal details. This will eventually become entirely
+ *     internal to the tty port.
+ */
+
+void tty_port_lower_dtr_rts(struct tty_port *port)
+{
+       if (port->ops->dtr_rts)
+               port->ops->dtr_rts(port, 0);
+}
+EXPORT_SYMBOL(tty_port_lower_dtr_rts);
+
 /**
  *     tty_port_block_til_ready        -       Waiting logic for tty open
  *     @port: the tty port being opened
@@ -167,7 +183,7 @@ EXPORT_SYMBOL(tty_port_raise_dtr_rts);
  *             - port flags and counts
  *
  *     The passed tty_port must implement the carrier_raised method if it can
- *     do carrier detect and the raise_dtr_rts method if it supports software
+ *     do carrier detect and the dtr_rts method if it supports software
  *     management of these lines. Note that the dtr/rts raise is done each
  *     iteration as a hangup may have previously dropped them while we wait.
  */
@@ -182,7 +198,8 @@ int tty_port_block_til_ready(struct tty_port *port,
 
        /* block if port is in the process of being closed */
        if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-               interruptible_sleep_on(&port->close_wait);
+               wait_event_interruptible(port->close_wait,
+                               !(port->flags & ASYNC_CLOSING));
                if (port->flags & ASYNC_HUP_NOTIFY)
                        return -EAGAIN;
                else
@@ -205,7 +222,6 @@ int tty_port_block_til_ready(struct tty_port *port,
           before the next open may complete */
 
        retval = 0;
-       add_wait_queue(&port->open_wait, &wait);
 
        /* The port lock protects the port counts */
        spin_lock_irqsave(&port->lock, flags);
@@ -219,7 +235,7 @@ int tty_port_block_til_ready(struct tty_port *port,
                if (tty->termios->c_cflag & CBAUD)
                        tty_port_raise_dtr_rts(port);
 
-               set_current_state(TASK_INTERRUPTIBLE);
+               prepare_to_wait(&port->open_wait, &wait, TASK_INTERRUPTIBLE);
                /* Check for a hangup or uninitialised port. Return accordingly */
                if (tty_hung_up_p(filp) || !(port->flags & ASYNC_INITIALIZED)) {
                        if (port->flags & ASYNC_HUP_NOTIFY)
@@ -240,8 +256,7 @@ int tty_port_block_til_ready(struct tty_port *port,
                }
                schedule();
        }
-       set_current_state(TASK_RUNNING);
-       remove_wait_queue(&port->open_wait, &wait);
+       finish_wait(&port->open_wait, &wait);
 
        /* Update counts. A parallel hangup will have set count to zero and
           we must not mess that up further */
@@ -292,6 +307,17 @@ int tty_port_close_start(struct tty_port *port, struct tty_struct *tty, struct f
        if (port->flags & ASYNC_INITIALIZED &&
                        port->closing_wait != ASYNC_CLOSING_WAIT_NONE)
                tty_wait_until_sent(tty, port->closing_wait);
+       if (port->drain_delay) {
+               unsigned int bps = tty_get_baud_rate(tty);
+               long timeout;
+
+               if (bps > 1200)
+                       timeout = max_t(long, (HZ * 10 * port->drain_delay) / bps,
+                                                               HZ / 10);
+               else
+                       timeout = 2 * HZ;
+               schedule_timeout_interruptible(timeout);
+       }
        return 1;
 }
 EXPORT_SYMBOL(tty_port_close_start);
@@ -302,6 +328,9 @@ void tty_port_close_end(struct tty_port *port, struct tty_struct *tty)
 
        tty_ldisc_flush(tty);
 
+       if (tty->termios->c_cflag & HUPCL)
+               tty_port_lower_dtr_rts(port);
+
        spin_lock_irqsave(&port->lock, flags);
        tty->closing = 0;
 
index 47d2ad0..6e2ec0b 100644 (file)
@@ -808,7 +808,7 @@ static int cpufreq_add_dev(struct sys_device *sys_dev)
                ret = -ENOMEM;
                goto nomem_out;
        }
-       if (!alloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
+       if (!zalloc_cpumask_var(&policy->related_cpus, GFP_KERNEL)) {
                free_cpumask_var(policy->cpus);
                kfree(policy);
                ret = -ENOMEM;
index 94a7688..8fab789 100644 (file)
@@ -2294,7 +2294,12 @@ int drm_mode_connector_property_set_ioctl(struct drm_device *dev,
                }
        }
 
-       if (connector->funcs->set_property)
+       /* Do DPMS ourselves */
+       if (property == connector->dev->mode_config.dpms_property) {
+               if (connector->funcs->dpms)
+                       (*connector->funcs->dpms)(connector, (int) out_resp->value);
+               ret = 0;
+       } else if (connector->funcs->set_property)
                ret = connector->funcs->set_property(connector, property, out_resp->value);
 
        /* store the property value if succesful */
index 4589044..a6f73f1 100644 (file)
@@ -198,6 +198,29 @@ static void drm_helper_add_std_modes(struct drm_device *dev,
        }
 }
 
+/**
+ * drm_helper_encoder_in_use - check if a given encoder is in use
+ * @encoder: encoder to check
+ *
+ * LOCKING:
+ * Caller must hold mode config lock.
+ *
+ * Walk @encoders's DRM device's mode_config and see if it's in use.
+ *
+ * RETURNS:
+ * True if @encoder is part of the mode_config, false otherwise.
+ */
+bool drm_helper_encoder_in_use(struct drm_encoder *encoder)
+{
+       struct drm_connector *connector;
+       struct drm_device *dev = encoder->dev;
+       list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+               if (connector->encoder == encoder)
+                       return true;
+       return false;
+}
+EXPORT_SYMBOL(drm_helper_encoder_in_use);
+
 /**
  * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config
  * @crtc: CRTC to check
@@ -216,7 +239,7 @@ bool drm_helper_crtc_in_use(struct drm_crtc *crtc)
        struct drm_device *dev = crtc->dev;
        /* FIXME: Locking around list access? */
        list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
-               if (encoder->crtc == crtc)
+               if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder))
                        return true;
        return false;
 }
@@ -240,7 +263,7 @@ void drm_helper_disable_unused_functions(struct drm_device *dev)
 
        list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
                encoder_funcs = encoder->helper_private;
-               if (!encoder->crtc)
+               if (!drm_helper_encoder_in_use(encoder))
                        (*encoder_funcs->dpms)(encoder, DRM_MODE_DPMS_OFF);
        }
 
@@ -935,6 +958,88 @@ bool drm_helper_initial_config(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_helper_initial_config);
 
+static int drm_helper_choose_encoder_dpms(struct drm_encoder *encoder)
+{
+       int dpms = DRM_MODE_DPMS_OFF;
+       struct drm_connector *connector;
+       struct drm_device *dev = encoder->dev;
+
+       list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+               if (connector->encoder == encoder)
+                       if (connector->dpms < dpms)
+                               dpms = connector->dpms;
+       return dpms;
+}
+
+static int drm_helper_choose_crtc_dpms(struct drm_crtc *crtc)
+{
+       int dpms = DRM_MODE_DPMS_OFF;
+       struct drm_connector *connector;
+       struct drm_device *dev = crtc->dev;
+
+       list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+               if (connector->encoder && connector->encoder->crtc == crtc)
+                       if (connector->dpms < dpms)
+                               dpms = connector->dpms;
+       return dpms;
+}
+
+/**
+ * drm_helper_connector_dpms
+ * @connector affected connector
+ * @mode DPMS mode
+ *
+ * Calls the low-level connector DPMS function, then
+ * calls appropriate encoder and crtc DPMS functions as well
+ */
+void drm_helper_connector_dpms(struct drm_connector *connector, int mode)
+{
+       struct drm_encoder *encoder = connector->encoder;
+       struct drm_crtc *crtc = encoder ? encoder->crtc : NULL;
+       int old_dpms;
+
+       if (mode == connector->dpms)
+               return;
+
+       old_dpms = connector->dpms;
+       connector->dpms = mode;
+
+       /* from off to on, do crtc then encoder */
+       if (mode < old_dpms) {
+               if (crtc) {
+                       struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+                       if (crtc_funcs->dpms)
+                               (*crtc_funcs->dpms) (crtc,
+                                                    drm_helper_choose_crtc_dpms(crtc));
+               }
+               if (encoder) {
+                       struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+                       if (encoder_funcs->dpms)
+                               (*encoder_funcs->dpms) (encoder,
+                                                       drm_helper_choose_encoder_dpms(encoder));
+               }
+       }
+
+       /* from on to off, do encoder then crtc */
+       if (mode > old_dpms) {
+               if (encoder) {
+                       struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+                       if (encoder_funcs->dpms)
+                               (*encoder_funcs->dpms) (encoder,
+                                                       drm_helper_choose_encoder_dpms(encoder));
+               }
+               if (crtc) {
+                       struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
+                       if (crtc_funcs->dpms)
+                               (*crtc_funcs->dpms) (crtc,
+                                                    drm_helper_choose_crtc_dpms(crtc));
+               }
+       }
+
+       return;
+}
+EXPORT_SYMBOL(drm_helper_connector_dpms);
+
 /**
  * drm_hotplug_stage_two
  * @dev DRM device
index ca9c616..6f6b264 100644 (file)
@@ -289,6 +289,11 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_device *dev,
        struct drm_display_mode *mode;
        struct detailed_pixel_timing *pt = &timing->data.pixel_data;
 
+       /* ignore tiny modes */
+       if (((pt->hactive_hi << 8) | pt->hactive_lo) < 64 ||
+           ((pt->vactive_hi << 8) | pt->hactive_lo) < 64)
+               return NULL;
+
        if (pt->stereo) {
                printk(KERN_WARNING "stereo mode not supported\n");
                return NULL;
index 93e677a..fc8e5ac 100644 (file)
@@ -196,6 +196,7 @@ int drm_irq_install(struct drm_device *dev)
 {
        int ret = 0;
        unsigned long sh_flags = 0;
+       char *irqname;
 
        if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ))
                return -EINVAL;
@@ -227,8 +228,13 @@ int drm_irq_install(struct drm_device *dev)
        if (drm_core_check_feature(dev, DRIVER_IRQ_SHARED))
                sh_flags = IRQF_SHARED;
 
+       if (dev->devname)
+               irqname = dev->devname;
+       else
+               irqname = dev->driver->name;
+
        ret = request_irq(drm_dev_to_irq(dev), dev->driver->irq_handler,
-                         sh_flags, dev->devname, dev);
+                         sh_flags, irqname, dev);
 
        if (ret < 0) {
                mutex_lock(&dev->struct_mutex);
index 8f93729..9987ab8 100644 (file)
@@ -147,7 +147,7 @@ static ssize_t status_show(struct device *device,
        enum drm_connector_status status;
 
        status = connector->funcs->detect(connector);
-       return snprintf(buf, PAGE_SIZE, "%s",
+       return snprintf(buf, PAGE_SIZE, "%s\n",
                        drm_get_connector_status_name(status));
 }
 
@@ -166,7 +166,7 @@ static ssize_t dpms_show(struct device *device,
        if (ret)
                return 0;
 
-       return snprintf(buf, PAGE_SIZE, "%s",
+       return snprintf(buf, PAGE_SIZE, "%s\n",
                        drm_get_dpms_name((int)dpms_status));
 }
 
@@ -176,7 +176,7 @@ static ssize_t enabled_show(struct device *device,
 {
        struct drm_connector *connector = to_drm_connector(device);
 
-       return snprintf(buf, PAGE_SIZE, connector->encoder ? "enabled" :
+       return snprintf(buf, PAGE_SIZE, "%s\n", connector->encoder ? "enabled" :
                        "disabled");
 }
 
@@ -317,6 +317,7 @@ static struct device_attribute connector_attrs_opt1[] = {
 
 static struct bin_attribute edid_attr = {
        .attr.name = "edid",
+       .attr.mode = 0444,
        .size = 128,
        .read = edid_show,
 };
index 53d5445..0ccb63e 100644 (file)
@@ -987,12 +987,6 @@ static int i915_load_modeset_init(struct drm_device *dev)
        int fb_bar = IS_I9XX(dev) ? 2 : 0;
        int ret = 0;
 
-       dev->devname = kstrdup(DRIVER_NAME, GFP_KERNEL);
-       if (!dev->devname) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
        dev->mode_config.fb_base = drm_get_resource_start(dev, fb_bar) &
                0xff000000;
 
@@ -1006,7 +1000,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
        ret = i915_probe_agp(dev, &agp_size, &prealloc_size);
        if (ret)
-               goto kfree_devname;
+               goto out;
 
        /* Basic memrange allocator for stolen space (aka vram) */
        drm_mm_init(&dev_priv->vram, 0, prealloc_size);
@@ -1024,7 +1018,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
        ret = i915_gem_init_ringbuffer(dev);
        if (ret)
-               goto kfree_devname;
+               goto out;
 
        /* Allow hardware batchbuffers unless told otherwise.
         */
@@ -1056,8 +1050,6 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 destroy_ringbuffer:
        i915_gem_cleanup_ringbuffer(dev);
-kfree_devname:
-       kfree(dev->devname);
 out:
        return ret;
 }
index 670d128..39f5c65 100644 (file)
@@ -2260,9 +2260,6 @@ try_again:
                        goto try_again;
                }
 
-               BUG_ON(old_obj_priv->active ||
-                      (reg->obj->write_domain & I915_GEM_GPU_DOMAINS));
-
                /*
                 * Zap this virtual mapping so we can set up a fence again
                 * for this object next time we need it.
index 640f515..79acc4f 100644 (file)
@@ -381,11 +381,6 @@ static int intel_crt_set_property(struct drm_connector *connector,
                                  struct drm_property *property,
                                  uint64_t value)
 {
-       struct drm_device *dev = connector->dev;
-
-       if (property == dev->mode_config.dpms_property && connector->encoder)
-               intel_crt_dpms(connector->encoder, (uint32_t)(value & 0xf));
-
        return 0;
 }
 
@@ -402,6 +397,7 @@ static const struct drm_encoder_helper_funcs intel_crt_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_crt_connector_funcs = {
+       .dpms = drm_helper_connector_dpms,
        .detect = intel_crt_detect,
        .fill_modes = drm_helper_probe_single_connector_modes,
        .destroy = intel_crt_destroy,
index 8b8d6e6..1ee3007 100644 (file)
@@ -316,6 +316,7 @@ static const struct drm_encoder_helper_funcs intel_dvo_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_dvo_connector_funcs = {
+       .dpms = drm_helper_connector_dpms,
        .save = intel_dvo_save,
        .restore = intel_dvo_restore,
        .detect = intel_dvo_detect,
index d0983bb..7d6bdd7 100644 (file)
@@ -219,6 +219,7 @@ static const struct drm_encoder_helper_funcs intel_hdmi_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_hdmi_connector_funcs = {
+       .dpms = drm_helper_connector_dpms,
        .save = intel_hdmi_save,
        .restore = intel_hdmi_restore,
        .detect = intel_hdmi_detect,
index 53731f0..53cccfa 100644 (file)
@@ -343,11 +343,6 @@ static int intel_lvds_set_property(struct drm_connector *connector,
                                   struct drm_property *property,
                                   uint64_t value)
 {
-       struct drm_device *dev = connector->dev;
-
-       if (property == dev->mode_config.dpms_property && connector->encoder)
-               intel_lvds_dpms(connector->encoder, (uint32_t)(value & 0xf));
-
        return 0;
 }
 
@@ -366,6 +361,7 @@ static const struct drm_connector_helper_funcs intel_lvds_connector_helper_funcs
 };
 
 static const struct drm_connector_funcs intel_lvds_connector_funcs = {
+       .dpms = drm_helper_connector_dpms,
        .save = intel_lvds_save,
        .restore = intel_lvds_restore,
        .detect = intel_lvds_detect,
@@ -391,7 +387,7 @@ static int __init intel_no_lvds_dmi_callback(const struct dmi_system_id *id)
 }
 
 /* These systems claim to have LVDS, but really don't */
-static const struct dmi_system_id __initdata intel_no_lvds[] = {
+static const struct dmi_system_id intel_no_lvds[] = {
        {
                .callback = intel_no_lvds_dmi_callback,
                .ident = "Apple Mac Mini (Core series)",
index f3ef6bf..3093b4d 100644 (file)
@@ -1616,6 +1616,7 @@ static const struct drm_encoder_helper_funcs intel_sdvo_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_sdvo_connector_funcs = {
+       .dpms = drm_helper_connector_dpms,
        .save = intel_sdvo_save,
        .restore = intel_sdvo_restore,
        .detect = intel_sdvo_detect,
index d2c3298..98ac054 100644 (file)
@@ -1626,6 +1626,7 @@ static const struct drm_encoder_helper_funcs intel_tv_helper_funcs = {
 };
 
 static const struct drm_connector_funcs intel_tv_connector_funcs = {
+       .dpms = drm_helper_connector_dpms,
        .save = intel_tv_save,
        .restore = intel_tv_restore,
        .detect = intel_tv_detect,
index 77a7a4d..aff90bb 100644 (file)
@@ -2185,9 +2185,9 @@ void radeon_commit_ring(drm_radeon_private_t *dev_priv)
 
        /* check if the ring is padded out to 16-dword alignment */
 
-       tail_aligned = dev_priv->ring.tail & 0xf;
+       tail_aligned = dev_priv->ring.tail & (RADEON_RING_ALIGN-1);
        if (tail_aligned) {
-               int num_p2 = 16 - tail_aligned;
+               int num_p2 = RADEON_RING_ALIGN - tail_aligned;
 
                ring = dev_priv->ring.start;
                /* pad with some CP_PACKET2 */
index 8071d96..0c6bfc1 100644 (file)
@@ -1964,11 +1964,14 @@ do {                                                            \
 
 #define RING_LOCALS    int write, _nr, _align_nr; unsigned int mask; u32 *ring;
 
+#define RADEON_RING_ALIGN 16
+
 #define BEGIN_RING( n ) do {                                           \
        if ( RADEON_VERBOSE ) {                                         \
                DRM_INFO( "BEGIN_RING( %d )\n", (n));                   \
        }                                                               \
-       _align_nr = (n + 0xf) & ~0xf;                                   \
+       _align_nr = RADEON_RING_ALIGN - ((dev_priv->ring.tail + n) & (RADEON_RING_ALIGN-1));    \
+       _align_nr += n;                                                 \
        if (dev_priv->ring.space <= (_align_nr * sizeof(u32))) {        \
                 COMMIT_RING();                                         \
                radeon_wait_ring( dev_priv, _align_nr * sizeof(u32));   \
index 537da1c..e59b6de 100644 (file)
@@ -402,27 +402,23 @@ static u8 ali_cable_detect(ide_hwif_t *hwif)
        return cbl;
 }
 
-#if !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC)
+#ifndef CONFIG_SPARC64
 /**
  *     init_hwif_ali15x3       -       Initialize the ALI IDE x86 stuff
  *     @hwif: interface to configure
  *
  *     Obtain the IRQ tables for an ALi based IDE solution on the PC
  *     class platforms. This part of the code isn't applicable to the
- *     Sparc and PowerPC systems.
+ *     Sparc systems.
  */
 
 static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif)
 {
-       struct pci_dev *dev = to_pci_dev(hwif->dev);
        u8 ideic, inmir;
        s8 irq_routing_table[] = { -1,  9, 3, 10, 4,  5, 7,  6,
                                      1, 11, 0, 12, 0, 14, 0, 15 };
        int irq = -1;
 
-       if (dev->device == PCI_DEVICE_ID_AL_M5229)
-               hwif->irq = hwif->channel ? 15 : 14;
-
        if (isa_dev) {
                /*
                 * read IDE interface control
@@ -455,7 +451,7 @@ static void __devinit init_hwif_ali15x3 (ide_hwif_t *hwif)
 }
 #else
 #define init_hwif_ali15x3 NULL
-#endif /* !defined(CONFIG_SPARC64) && !defined(CONFIG_PPC) */
+#endif /* CONFIG_SPARC64 */
 
 /**
  *     init_dma_ali15x3        -       set up DMA on ALi15x3
index 7201b17..afe5a43 100644 (file)
@@ -79,34 +79,6 @@ void ide_init_pc(struct ide_atapi_pc *pc)
 }
 EXPORT_SYMBOL_GPL(ide_init_pc);
 
-/*
- * Generate a new packet command request in front of the request queue, before
- * the current request, so that it will be processed immediately, on the next
- * pass through the driver.
- */
-static void ide_queue_pc_head(ide_drive_t *drive, struct gendisk *disk,
-                             struct ide_atapi_pc *pc, struct request *rq)
-{
-       blk_rq_init(NULL, rq);
-       rq->cmd_type = REQ_TYPE_SPECIAL;
-       rq->cmd_flags |= REQ_PREEMPT;
-       rq->buffer = (char *)pc;
-       rq->rq_disk = disk;
-
-       if (pc->req_xfer) {
-               rq->data = pc->buf;
-               rq->data_len = pc->req_xfer;
-       }
-
-       memcpy(rq->cmd, pc->c, 12);
-       if (drive->media == ide_tape)
-               rq->cmd[13] = REQ_IDETAPE_PC1;
-
-       drive->hwif->rq = NULL;
-
-       elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
 /*
  * Add a special packet command request to the tail of the request queue,
  * and wait for it to be serviced.
@@ -119,19 +91,21 @@ int ide_queue_pc_tail(ide_drive_t *drive, struct gendisk *disk,
 
        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
        rq->cmd_type = REQ_TYPE_SPECIAL;
-       rq->buffer = (char *)pc;
+       rq->special = (char *)pc;
 
        if (pc->req_xfer) {
-               rq->data = pc->buf;
-               rq->data_len = pc->req_xfer;
+               error = blk_rq_map_kern(drive->queue, rq, pc->buf, pc->req_xfer,
+                                       GFP_NOIO);
+               if (error)
+                       goto put_req;
        }
 
        memcpy(rq->cmd, pc->c, 12);
        if (drive->media == ide_tape)
                rq->cmd[13] = REQ_IDETAPE_PC1;
        error = blk_execute_rq(drive->queue, disk, rq, 0);
+put_req:
        blk_put_request(rq);
-
        return error;
 }
 EXPORT_SYMBOL_GPL(ide_queue_pc_tail);
@@ -191,20 +165,103 @@ void ide_create_request_sense_cmd(ide_drive_t *drive, struct ide_atapi_pc *pc)
 }
 EXPORT_SYMBOL_GPL(ide_create_request_sense_cmd);
 
+void ide_prep_sense(ide_drive_t *drive, struct request *rq)
+{
+       struct request_sense *sense = &drive->sense_data;
+       struct request *sense_rq = &drive->sense_rq;
+       unsigned int cmd_len, sense_len;
+       int err;
+
+       debug_log("%s: enter\n", __func__);
+
+       switch (drive->media) {
+       case ide_floppy:
+               cmd_len = 255;
+               sense_len = 18;
+               break;
+       case ide_tape:
+               cmd_len = 20;
+               sense_len = 20;
+               break;
+       default:
+               cmd_len = 18;
+               sense_len = 18;
+       }
+
+       BUG_ON(sense_len > sizeof(*sense));
+
+       if (blk_sense_request(rq) || drive->sense_rq_armed)
+               return;
+
+       memset(sense, 0, sizeof(*sense));
+
+       blk_rq_init(rq->q, sense_rq);
+
+       err = blk_rq_map_kern(drive->queue, sense_rq, sense, sense_len,
+                             GFP_NOIO);
+       if (unlikely(err)) {
+               if (printk_ratelimit())
+                       printk(KERN_WARNING "%s: failed to map sense buffer\n",
+                              drive->name);
+               return;
+       }
+
+       sense_rq->rq_disk = rq->rq_disk;
+       sense_rq->cmd[0] = GPCMD_REQUEST_SENSE;
+       sense_rq->cmd[4] = cmd_len;
+       sense_rq->cmd_type = REQ_TYPE_SENSE;
+       sense_rq->cmd_flags |= REQ_PREEMPT;
+
+       if (drive->media == ide_tape)
+               sense_rq->cmd[13] = REQ_IDETAPE_PC1;
+
+       drive->sense_rq_armed = true;
+}
+EXPORT_SYMBOL_GPL(ide_prep_sense);
+
+int ide_queue_sense_rq(ide_drive_t *drive, void *special)
+{
+       /* deferred failure from ide_prep_sense() */
+       if (!drive->sense_rq_armed) {
+               printk(KERN_WARNING "%s: failed queue sense request\n",
+                      drive->name);
+               return -ENOMEM;
+       }
+
+       drive->sense_rq.special = special;
+       drive->sense_rq_armed = false;
+
+       drive->hwif->rq = NULL;
+
+       elv_add_request(drive->queue, &drive->sense_rq,
+                       ELEVATOR_INSERT_FRONT, 0);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(ide_queue_sense_rq);
+
 /*
  * Called when an error was detected during the last packet command.
- * We queue a request sense packet command in the head of the request list.
+ * We queue a request sense packet command at the head of the request
+ * queue.
  */
-void ide_retry_pc(ide_drive_t *drive, struct gendisk *disk)
+void ide_retry_pc(ide_drive_t *drive)
 {
-       struct request *rq = &drive->request_sense_rq;
+       struct request *sense_rq = &drive->sense_rq;
        struct ide_atapi_pc *pc = &drive->request_sense_pc;
 
        (void)ide_read_error(drive);
-       ide_create_request_sense_cmd(drive, pc);
+
+       /* init pc from sense_rq */
+       ide_init_pc(pc);
+       memcpy(pc->c, sense_rq->cmd, 12);
+       pc->buf = bio_data(sense_rq->bio);      /* pointer to mapped address */
+       pc->req_xfer = sense_rq->data_len;
+
        if (drive->media == ide_tape)
                set_bit(IDE_AFLAG_IGNORE_DSC, &drive->atapi_flags);
-       ide_queue_pc_head(drive, disk, pc, rq);
+
+       if (ide_queue_sense_rq(drive, pc))
+               ide_complete_rq(drive, -EIO, blk_rq_bytes(drive->hwif->rq));
 }
 EXPORT_SYMBOL_GPL(ide_retry_pc);
 
@@ -276,7 +333,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
        struct ide_cmd *cmd = &hwif->cmd;
        struct request *rq = hwif->rq;
        const struct ide_tp_ops *tp_ops = hwif->tp_ops;
-       xfer_func_t *xferfunc;
        unsigned int timeout, done;
        u16 bcount;
        u8 stat, ireason, dsc = 0;
@@ -303,11 +359,8 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                                        drive->name, rq_data_dir(pc->rq)
                                                     ? "write" : "read");
                        pc->flags |= PC_FLAG_DMA_ERROR;
-               } else {
+               } else
                        pc->xferred = pc->req_xfer;
-                       if (drive->pc_update_buffers)
-                               drive->pc_update_buffers(drive, pc);
-               }
                debug_log("%s: DMA finished\n", drive->name);
        }
 
@@ -343,7 +396,7 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                        debug_log("[cmd %x]: check condition\n", rq->cmd[0]);
 
                        /* Retry operation */
-                       ide_retry_pc(drive, rq->rq_disk);
+                       ide_retry_pc(drive);
 
                        /* queued, but not started */
                        return ide_stopped;
@@ -353,6 +406,12 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                if ((pc->flags & PC_FLAG_WAIT_FOR_DSC) && (stat & ATA_DSC) == 0)
                        dsc = 1;
 
+               /*
+                * ->pc_callback() might change rq->data_len for
+                * residual count, cache total length.
+                */
+               done = blk_rq_bytes(rq);
+
                /* Command finished - Call the callback function */
                uptodate = drive->pc_callback(drive, dsc);
 
@@ -361,7 +420,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
 
                if (blk_special_request(rq)) {
                        rq->errors = 0;
-                       done = blk_rq_bytes(rq);
                        error = 0;
                } else {
 
@@ -370,11 +428,6 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                                        rq->errors = -EIO;
                        }
 
-                       if (drive->media == ide_tape)
-                               done = ide_rq_bytes(rq); /* FIXME */
-                       else
-                               done = blk_rq_bytes(rq);
-
                        error = uptodate ? 0 : -EIO;
                }
 
@@ -407,21 +460,11 @@ static ide_startstop_t ide_pc_intr(ide_drive_t *drive)
                return ide_do_reset(drive);
        }
 
-       xferfunc = write ? tp_ops->output_data : tp_ops->input_data;
-
-       if (drive->media == ide_floppy && pc->buf == NULL) {
-               done = min_t(unsigned int, bcount, cmd->nleft);
-               ide_pio_bytes(drive, cmd, write, done);
-       } else if (drive->media == ide_tape && pc->bh) {
-               done = drive->pc_io_buffers(drive, pc, bcount, write);
-       } else {
-               done = min_t(unsigned int, bcount, pc->req_xfer - pc->xferred);
-               xferfunc(drive, NULL, pc->cur_pos, done);
-       }
+       done = min_t(unsigned int, bcount, cmd->nleft);
+       ide_pio_bytes(drive, cmd, write, done);
 
-       /* Update the current position */
+       /* Update transferred byte count */
        pc->xferred += done;
-       pc->cur_pos += done;
 
        bcount -= done;
 
@@ -599,7 +642,6 @@ ide_startstop_t ide_issue_pc(ide_drive_t *drive, struct ide_cmd *cmd)
 
                /* We haven't transferred any data yet */
                pc->xferred = 0;
-               pc->cur_pos = pc->buf;
 
                valid_tf = IDE_VALID_DEVICE;
                bcount = ((drive->media == ide_tape) ?
index 925eb9e..a75e4ee 100644 (file)
@@ -206,54 +206,25 @@ static void cdrom_analyze_sense_data(ide_drive_t *drive,
        ide_cd_log_error(drive->name, failed_command, sense);
 }
 
-static void cdrom_queue_request_sense(ide_drive_t *drive, void *sense,
-                                     struct request *failed_command)
-{
-       struct cdrom_info *info         = drive->driver_data;
-       struct request *rq              = &drive->request_sense_rq;
-
-       ide_debug_log(IDE_DBG_SENSE, "enter");
-
-       if (sense == NULL)
-               sense = &info->sense_data;
-
-       /* stuff the sense request in front of our current request */
-       blk_rq_init(NULL, rq);
-       rq->cmd_type = REQ_TYPE_ATA_PC;
-       rq->rq_disk = info->disk;
-
-       rq->data = sense;
-       rq->cmd[0] = GPCMD_REQUEST_SENSE;
-       rq->cmd[4] = 18;
-       rq->data_len = 18;
-
-       rq->cmd_type = REQ_TYPE_SENSE;
-       rq->cmd_flags |= REQ_PREEMPT;
-
-       /* NOTE! Save the failed command in "rq->buffer" */
-       rq->buffer = (void *) failed_command;
-
-       if (failed_command)
-               ide_debug_log(IDE_DBG_SENSE, "failed_cmd: 0x%x",
-                                            failed_command->cmd[0]);
-
-       drive->hwif->rq = NULL;
-
-       elv_add_request(drive->queue, rq, ELEVATOR_INSERT_FRONT, 0);
-}
-
 static void ide_cd_complete_failed_rq(ide_drive_t *drive, struct request *rq)
 {
        /*
-        * For REQ_TYPE_SENSE, "rq->buffer" points to the original
-        * failed request
+        * For REQ_TYPE_SENSE, "rq->special" points to the original
+        * failed request.  Also, the sense data should be read
+        * directly from rq which might be different from the original
+        * sense buffer if it got copied during mapping.
         */
-       struct request *failed = (struct request *)rq->buffer;
-       struct cdrom_info *info = drive->driver_data;
-       void *sense = &info->sense_data;
+       struct request *failed = (struct request *)rq->special;
+       void *sense = bio_data(rq->bio);
 
        if (failed) {
                if (failed->sense) {
+                       /*
+                        * Sense is always read into drive->sense_data.
+                        * Copy back if the failed request has its
+                        * sense pointer set.
+                        */
+                       memcpy(failed->sense, sense, 18);
                        sense = failed->sense;
                        failed->sense_len = rq->sense_len;
                }
@@ -428,7 +399,7 @@ static int cdrom_decode_status(ide_drive_t *drive, u8 stat)
 
        /* if we got a CHECK_CONDITION status, queue a request sense command */
        if (stat & ATA_ERR)
-               cdrom_queue_request_sense(drive, NULL, NULL);
+               return ide_queue_sense_rq(drive, NULL) ? 2 : 1;
        return 1;
 
 end_request:
@@ -442,8 +413,7 @@ end_request:
 
                hwif->rq = NULL;
 
-               cdrom_queue_request_sense(drive, rq->sense, rq);
-               return 1;
+               return ide_queue_sense_rq(drive, rq) ? 2 : 1;
        } else
                return 2;
 }
@@ -503,14 +473,8 @@ static void ide_cd_request_sense_fixup(ide_drive_t *drive, struct ide_cmd *cmd)
         * and some drives don't send them.  Sigh.
         */
        if (rq->cmd[0] == GPCMD_REQUEST_SENSE &&
-           cmd->nleft > 0 && cmd->nleft <= 5) {
-               unsigned int ofs = cmd->nbytes - cmd->nleft;
-
-               while (cmd->nleft > 0) {
-                       *((u8 *)rq->data + ofs++) = 0;
-                       cmd->nleft--;
-               }
-       }
+           cmd->nleft > 0 && cmd->nleft <= 5)
+               cmd->nleft = 0;
 }
 
 int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
@@ -543,8 +507,12 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
                rq->cmd_flags |= cmd_flags;
                rq->timeout = timeout;
                if (buffer) {
-                       rq->data = buffer;
-                       rq->data_len = *bufflen;
+                       error = blk_rq_map_kern(drive->queue, rq, buffer,
+                                               *bufflen, GFP_NOIO);
+                       if (error) {
+                               blk_put_request(rq);
+                               return error;
+                       }
                }
 
                error = blk_execute_rq(drive->queue, info->disk, rq, 0);
@@ -838,15 +806,10 @@ static void cdrom_do_block_pc(ide_drive_t *drive, struct request *rq)
        drive->dma = 0;
 
        /* sg request */
-       if (rq->bio || ((rq->cmd_type == REQ_TYPE_ATA_PC) && rq->data_len)) {
+       if (rq->bio) {
                struct request_queue *q = drive->queue;
+               char *buf = bio_data(rq->bio);
                unsigned int alignment;
-               char *buf;
-
-               if (rq->bio)
-                       buf = bio_data(rq->bio);
-               else
-                       buf = rq->data;
 
                drive->dma = !!(drive->dev_flags & IDE_DFLAG_USING_DMA);
 
@@ -896,6 +859,9 @@ static ide_startstop_t ide_cd_do_request(ide_drive_t *drive, struct request *rq,
                goto out_end;
        }
 
+       /* prepare sense request for this command */
+       ide_prep_sense(drive, rq);
+
        memset(&cmd, 0, sizeof(cmd));
 
        if (rq_data_dir(rq))
index 1d97101..93a3cf1 100644 (file)
@@ -87,10 +87,6 @@ struct cdrom_info {
 
        struct atapi_toc *toc;
 
-       /* The result of the last successful request sense command
-          on this device. */
-       struct request_sense sense_data;
-
        u8 max_speed;           /* Max speed of the drive. */
        u8 current_speed;       /* Current speed of the drive. */
 
index a9fbe2c..c243880 100644 (file)
@@ -411,7 +411,6 @@ static void idedisk_prepare_flush(struct request_queue *q, struct request *rq)
        cmd->protocol = ATA_PROT_NODATA;
 
        rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-       rq->cmd_flags |= REQ_SOFTBARRIER;
        rq->special = cmd;
 }
 
index a0b8cab..d9123ec 100644 (file)
@@ -510,23 +510,11 @@ ide_startstop_t ide_dma_timeout_retry(ide_drive_t *drive, int error)
        /*
         * un-busy drive etc and make sure request is sane
         */
-
        rq = hwif->rq;
-       if (!rq)
-               goto out;
-
-       hwif->rq = NULL;
-
-       rq->errors = 0;
-
-       if (!rq->bio)
-               goto out;
-
-       rq->sector = rq->bio->bi_sector;
-       rq->current_nr_sectors = bio_iovec(rq->bio)->bv_len >> 9;
-       rq->hard_cur_sectors = rq->current_nr_sectors;
-       rq->buffer = bio_data(rq->bio);
-out:
+       if (rq) {
+               hwif->rq = NULL;
+               rq->errors = 0;
+       }
        return ret;
 }
 
index 2b4868d..537b7c5 100644 (file)
@@ -134,13 +134,17 @@ static ide_startstop_t ide_floppy_issue_pc(ide_drive_t *drive,
        drive->pc = pc;
 
        if (pc->retries > IDEFLOPPY_MAX_PC_RETRIES) {
+               unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
                if (!(pc->flags & PC_FLAG_SUPPRESS_ERROR))
                        ide_floppy_report_error(floppy, pc);
+
                /* Giving up */
                pc->error = IDE_DRV_ERROR_GENERAL;
 
                drive->failed_pc = NULL;
                drive->pc_callback(drive, 0);
+               ide_complete_rq(drive, -EIO, done);
                return ide_stopped;
        }
 
@@ -216,15 +220,13 @@ static void idefloppy_blockpc_cmd(struct ide_disk_obj *floppy,
        ide_init_pc(pc);
        memcpy(pc->c, rq->cmd, sizeof(pc->c));
        pc->rq = rq;
-       if (rq->data_len && rq_data_dir(rq) == WRITE)
-               pc->flags |= PC_FLAG_WRITING;
-       pc->buf = rq->data;
-       if (rq->bio)
+       if (rq->data_len) {
                pc->flags |= PC_FLAG_DMA_OK;
-       /*
-        * possibly problematic, doesn't look like ide-floppy correctly
-        * handled scattered requests if dma fails...
-        */
+               if (rq_data_dir(rq) == WRITE)
+                       pc->flags |= PC_FLAG_WRITING;
+       }
+       /* pio will be performed by ide_pio_bytes() which handles sg fine */
+       pc->buf = NULL;
        pc->req_xfer = pc->buf_size = rq->data_len;
 }
 
@@ -265,8 +267,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
                }
                pc = &floppy->queued_pc;
                idefloppy_create_rw_cmd(drive, pc, rq, (unsigned long)block);
-       } else if (blk_special_request(rq)) {
-               pc = (struct ide_atapi_pc *) rq->buffer;
+       } else if (blk_special_request(rq) || blk_sense_request(rq)) {
+               pc = (struct ide_atapi_pc *)rq->special;
        } else if (blk_pc_request(rq)) {
                pc = &floppy->queued_pc;
                idefloppy_blockpc_cmd(floppy, pc, rq);
@@ -275,6 +277,8 @@ static ide_startstop_t ide_floppy_do_request(ide_drive_t *drive,
                goto out_end;
        }
 
+       ide_prep_sense(drive, rq);
+
        memset(&cmd, 0, sizeof(cmd));
 
        if (rq_data_dir(rq))
index 6415a2e..41d8040 100644 (file)
@@ -248,14 +248,7 @@ void ide_map_sg(ide_drive_t *drive, struct ide_cmd *cmd)
        struct scatterlist *sg = hwif->sg_table;
        struct request *rq = cmd->rq;
 
-       if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE) {
-               sg_init_one(sg, rq->buffer, rq->nr_sectors * SECTOR_SIZE);
-               cmd->sg_nents = 1;
-       } else if (!rq->bio) {
-               sg_init_one(sg, rq->data, rq->data_len);
-               cmd->sg_nents = 1;
-       } else
-               cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
+       cmd->sg_nents = blk_rq_map_sg(drive->queue, rq, sg);
 }
 EXPORT_SYMBOL_GPL(ide_map_sg);
 
@@ -371,7 +364,7 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
                if (rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
                        return execute_drive_cmd(drive, rq);
                else if (blk_pm_request(rq)) {
-                       struct request_pm_state *pm = rq->data;
+                       struct request_pm_state *pm = rq->special;
 #ifdef DEBUG_PM
                        printk("%s: start_power_step(step: %d)\n",
                                drive->name, pm->pm_step);
@@ -484,6 +477,9 @@ void do_ide_request(struct request_queue *q)
 
        spin_unlock_irq(q->queue_lock);
 
+       /* HLD do_request() callback might sleep, make sure it's okay */
+       might_sleep();
+
        if (ide_lock_host(host, hwif))
                goto plug_device_2;
 
index c1c25eb..5991b23 100644 (file)
@@ -231,7 +231,6 @@ static int generic_drive_reset(ide_drive_t *drive)
        rq->cmd_type = REQ_TYPE_SPECIAL;
        rq->cmd_len = 1;
        rq->cmd[0] = REQ_DRIVE_RESET;
-       rq->cmd_flags |= REQ_SOFTBARRIER;
        if (blk_execute_rq(drive->queue, NULL, rq, 1))
                ret = rq->errors;
        blk_put_request(rq);
index 310d03f..a914023 100644 (file)
@@ -24,11 +24,8 @@ static void issue_park_cmd(ide_drive_t *drive, unsigned long timeout)
                        start_queue = 1;
                spin_unlock_irq(&hwif->lock);
 
-               if (start_queue) {
-                       spin_lock_irq(q->queue_lock);
-                       blk_start_queueing(q);
-                       spin_unlock_irq(q->queue_lock);
-               }
+               if (start_queue)
+                       blk_run_queue(q);
                return;
        }
        spin_unlock_irq(&hwif->lock);
index 0d8a151..ba1488b 100644 (file)
@@ -7,7 +7,6 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
        ide_hwif_t *hwif = drive->hwif;
        struct request *rq;
        struct request_pm_state rqpm;
-       struct ide_cmd cmd;
        int ret;
 
        /* call ACPI _GTM only once */
@@ -15,11 +14,9 @@ int generic_ide_suspend(struct device *dev, pm_message_t mesg)
                ide_acpi_get_timing(hwif);
 
        memset(&rqpm, 0, sizeof(rqpm));
-       memset(&cmd, 0, sizeof(cmd));
        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
        rq->cmd_type = REQ_TYPE_PM_SUSPEND;
-       rq->special = &cmd;
-       rq->data = &rqpm;
+       rq->special = &rqpm;
        rqpm.pm_step = IDE_PM_START_SUSPEND;
        if (mesg.event == PM_EVENT_PRETHAW)
                mesg.event = PM_EVENT_FREEZE;
@@ -41,7 +38,6 @@ int generic_ide_resume(struct device *dev)
        ide_hwif_t *hwif = drive->hwif;
        struct request *rq;
        struct request_pm_state rqpm;
-       struct ide_cmd cmd;
        int err;
 
        /* call ACPI _PS0 / _STM only once */
@@ -53,12 +49,10 @@ int generic_ide_resume(struct device *dev)
        ide_acpi_exec_tfs(drive);
 
        memset(&rqpm, 0, sizeof(rqpm));
-       memset(&cmd, 0, sizeof(cmd));
        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
        rq->cmd_type = REQ_TYPE_PM_RESUME;
        rq->cmd_flags |= REQ_PREEMPT;
-       rq->special = &cmd;
-       rq->data = &rqpm;
+       rq->special = &rqpm;
        rqpm.pm_step = IDE_PM_START_RESUME;
        rqpm.pm_state = PM_EVENT_ON;
 
@@ -77,7 +71,7 @@ int generic_ide_resume(struct device *dev)
 
 void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
 {
-       struct request_pm_state *pm = rq->data;
+       struct request_pm_state *pm = rq->special;
 
 #ifdef DEBUG_PM
        printk(KERN_INFO "%s: complete_power_step(step: %d)\n",
@@ -107,10 +101,8 @@ void ide_complete_power_step(ide_drive_t *drive, struct request *rq)
 
 ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
 {
-       struct request_pm_state *pm = rq->data;
-       struct ide_cmd *cmd = rq->special;
-
-       memset(cmd, 0, sizeof(*cmd));
+       struct request_pm_state *pm = rq->special;
+       struct ide_cmd cmd = { };
 
        switch (pm->pm_step) {
        case IDE_PM_FLUSH_CACHE:        /* Suspend step 1 (flush cache) */
@@ -123,12 +115,12 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
                        return ide_stopped;
                }
                if (ata_id_flush_ext_enabled(drive->id))
-                       cmd->tf.command = ATA_CMD_FLUSH_EXT;
+                       cmd.tf.command = ATA_CMD_FLUSH_EXT;
                else
-                       cmd->tf.command = ATA_CMD_FLUSH;
+                       cmd.tf.command = ATA_CMD_FLUSH;
                goto out_do_tf;
        case IDE_PM_STANDBY:            /* Suspend step 2 (standby) */
-               cmd->tf.command = ATA_CMD_STANDBYNOW1;
+               cmd.tf.command = ATA_CMD_STANDBYNOW1;
                goto out_do_tf;
        case IDE_PM_RESTORE_PIO:        /* Resume step 1 (restore PIO) */
                ide_set_max_pio(drive);
@@ -141,7 +133,7 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
                        ide_complete_power_step(drive, rq);
                return ide_stopped;
        case IDE_PM_IDLE:               /* Resume step 2 (idle) */
-               cmd->tf.command = ATA_CMD_IDLEIMMEDIATE;
+               cmd.tf.command = ATA_CMD_IDLEIMMEDIATE;
                goto out_do_tf;
        case IDE_PM_RESTORE_DMA:        /* Resume step 3 (restore DMA) */
                /*
@@ -163,11 +155,11 @@ ide_startstop_t ide_start_power_step(ide_drive_t *drive, struct request *rq)
        return ide_stopped;
 
 out_do_tf:
-       cmd->valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
-       cmd->valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
-       cmd->protocol = ATA_PROT_NODATA;
+       cmd.valid.out.tf = IDE_VALID_OUT_TF | IDE_VALID_DEVICE;
+       cmd.valid.in.tf  = IDE_VALID_IN_TF  | IDE_VALID_DEVICE;
+       cmd.protocol = ATA_PROT_NODATA;
 
-       return do_rw_taskfile(drive, cmd);
+       return do_rw_taskfile(drive, &cmd);
 }
 
 /**
@@ -181,7 +173,7 @@ out_do_tf:
 void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 {
        struct request_queue *q = drive->queue;
-       struct request_pm_state *pm = rq->data;
+       struct request_pm_state *pm = rq->special;
        unsigned long flags;
 
        ide_complete_power_step(drive, rq);
@@ -207,7 +199,7 @@ void ide_complete_pm_rq(ide_drive_t *drive, struct request *rq)
 
 void ide_check_pm_state(ide_drive_t *drive, struct request *rq)
 {
-       struct request_pm_state *pm = rq->data;
+       struct request_pm_state *pm = rq->special;
 
        if (blk_pm_suspend_request(rq) &&
            pm->pm_step == IDE_PM_START_SUSPEND)
index 3a53e08..203bbea 100644 (file)
@@ -131,13 +131,6 @@ enum {
        IDETAPE_DIR_WRITE = (1 << 2),
 };
 
-struct idetape_bh {
-       u32 b_size;
-       atomic_t b_count;
-       struct idetape_bh *b_reqnext;
-       char *b_data;
-};
-
 /* Tape door status */
 #define DOOR_UNLOCKED                  0
 #define DOOR_LOCKED                    1
@@ -219,18 +212,12 @@ typedef struct ide_tape_obj {
 
        /* Data buffer size chosen based on the tape's recommendation */
        int buffer_size;
-       /* merge buffer */
-       struct idetape_bh *merge_bh;
-       /* size of the merge buffer */
-       int merge_bh_size;
-       /* pointer to current buffer head within the merge buffer */
-       struct idetape_bh *bh;
-       char *b_data;
-       int b_count;
-
-       int pages_per_buffer;
-       /* Wasted space in each stage */
-       int excess_bh_size;
+       /* Staging buffer of buffer_size bytes */
+       void *buf;
+       /* The read/write cursor */
+       void *cur;
+       /* The number of valid bytes in buf */
+       size_t valid;
 
        /* Measures average tape speed */
        unsigned long avg_time;
@@ -297,84 +284,6 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i)
        return tape;
 }
 
-static int idetape_input_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-                                 unsigned int bcount)
-{
-       struct idetape_bh *bh = pc->bh;
-       int count;
-
-       while (bcount) {
-               if (bh == NULL)
-                       break;
-               count = min(
-                       (unsigned int)(bh->b_size - atomic_read(&bh->b_count)),
-                       bcount);
-               drive->hwif->tp_ops->input_data(drive, NULL, bh->b_data +
-                                       atomic_read(&bh->b_count), count);
-               bcount -= count;
-               atomic_add(count, &bh->b_count);
-               if (atomic_read(&bh->b_count) == bh->b_size) {
-                       bh = bh->b_reqnext;
-                       if (bh)
-                               atomic_set(&bh->b_count, 0);
-               }
-       }
-
-       pc->bh = bh;
-
-       return bcount;
-}
-
-static int idetape_output_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-                                  unsigned int bcount)
-{
-       struct idetape_bh *bh = pc->bh;
-       int count;
-
-       while (bcount) {
-               if (bh == NULL)
-                       break;
-               count = min((unsigned int)pc->b_count, (unsigned int)bcount);
-               drive->hwif->tp_ops->output_data(drive, NULL, pc->b_data, count);
-               bcount -= count;
-               pc->b_data += count;
-               pc->b_count -= count;
-               if (!pc->b_count) {
-                       bh = bh->b_reqnext;
-                       pc->bh = bh;
-                       if (bh) {
-                               pc->b_data = bh->b_data;
-                               pc->b_count = atomic_read(&bh->b_count);
-                       }
-               }
-       }
-
-       return bcount;
-}
-
-static void idetape_update_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc)
-{
-       struct idetape_bh *bh = pc->bh;
-       int count;
-       unsigned int bcount = pc->xferred;
-
-       if (pc->flags & PC_FLAG_WRITING)
-               return;
-       while (bcount) {
-               if (bh == NULL) {
-                       printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-                                       __func__);
-                       return;
-               }
-               count = min((unsigned int)bh->b_size, (unsigned int)bcount);
-               atomic_set(&bh->b_count, count);
-               if (atomic_read(&bh->b_count) == bh->b_size)
-                       bh = bh->b_reqnext;
-               bcount -= count;
-       }
-       pc->bh = bh;
-}
-
 /*
  * called on each failed packet command retry to analyze the request sense. We
  * currently do not utilize this information.
@@ -392,12 +301,10 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
                 pc->c[0], tape->sense_key, tape->asc, tape->ascq);
 
        /* Correct pc->xferred by asking the tape.       */
-       if (pc->flags & PC_FLAG_DMA_ERROR) {
+       if (pc->flags & PC_FLAG_DMA_ERROR)
                pc->xferred = pc->req_xfer -
                        tape->blk_size *
                        get_unaligned_be32(&sense[3]);
-               idetape_update_buffers(drive, pc);
-       }
 
        /*
         * If error was the result of a zero-length read or write command,
@@ -436,29 +343,6 @@ static void idetape_analyze_error(ide_drive_t *drive, u8 *sense)
        }
 }
 
-/* Free data buffers completely. */
-static void ide_tape_kfree_buffer(idetape_tape_t *tape)
-{
-       struct idetape_bh *prev_bh, *bh = tape->merge_bh;
-
-       while (bh) {
-               u32 size = bh->b_size;
-
-               while (size) {
-                       unsigned int order = fls(size >> PAGE_SHIFT)-1;
-
-                       if (bh->b_data)
-                               free_pages((unsigned long)bh->b_data, order);
-
-                       size &= (order-1);
-                       bh->b_data += (1 << order) * PAGE_SIZE;
-               }
-               prev_bh = bh;
-               bh = bh->b_reqnext;
-               kfree(prev_bh);
-       }
-}
-
 static void ide_tape_handle_dsc(ide_drive_t *);
 
 static int ide_tape_callback(ide_drive_t *drive, int dsc)
@@ -496,7 +380,7 @@ static int ide_tape_callback(ide_drive_t *drive, int dsc)
                }
 
                tape->first_frame += blocks;
-               rq->current_nr_sectors -= blocks;
+               rq->data_len -= blocks * tape->blk_size;
 
                if (pc->error) {
                        uptodate = 0;
@@ -558,19 +442,6 @@ static void ide_tape_handle_dsc(ide_drive_t *drive)
        idetape_postpone_request(drive);
 }
 
-static int ide_tape_io_buffers(ide_drive_t *drive, struct ide_atapi_pc *pc,
-                               unsigned int bcount, int write)
-{
-       unsigned int bleft;
-
-       if (write)
-               bleft = idetape_output_buffers(drive, pc, bcount);
-       else
-               bleft = idetape_input_buffers(drive, pc, bcount);
-
-       return bcount - bleft;
-}
-
 /*
  * Packet Command Interface
  *
@@ -622,6 +493,8 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
 
        if (pc->retries > IDETAPE_MAX_PC_RETRIES ||
                (pc->flags & PC_FLAG_ABORT)) {
+               unsigned int done = blk_rq_bytes(drive->hwif->rq);
+
                /*
                 * We will "abort" retrying a packet command in case legitimate
                 * error code was received (crossing a filemark, or end of the
@@ -641,8 +514,10 @@ static ide_startstop_t ide_tape_issue_pc(ide_drive_t *drive,
                        /* Giving up */
                        pc->error = IDE_DRV_ERROR_GENERAL;
                }
+
                drive->failed_pc = NULL;
                drive->pc_callback(drive, 0);
+               ide_complete_rq(drive, -EIO, done);
                return ide_stopped;
        }
        debug_log(DBG_SENSE, "Retry #%d, cmd = %02X\n", pc->retries, pc->c[0]);
@@ -695,7 +570,7 @@ static ide_startstop_t idetape_media_access_finished(ide_drive_t *drive)
                                printk(KERN_ERR "ide-tape: %s: I/O error, ",
                                                tape->name);
                        /* Retry operation */
-                       ide_retry_pc(drive, tape->disk);
+                       ide_retry_pc(drive);
                        return ide_stopped;
                }
                pc->error = 0;
@@ -711,27 +586,22 @@ static void ide_tape_create_rw_cmd(idetape_tape_t *tape,
                                   struct ide_atapi_pc *pc, struct request *rq,
                                   u8 opcode)
 {
-       struct idetape_bh *bh = (struct idetape_bh *)rq->special;
-       unsigned int length = rq->current_nr_sectors;
+       unsigned int length = rq->nr_sectors;
 
        ide_init_pc(pc);
        put_unaligned(cpu_to_be32(length), (unsigned int *) &pc->c[1]);
        pc->c[1] = 1;
-       pc->bh = bh;
        pc->buf = NULL;
        pc->buf_size = length * tape->blk_size;
        pc->req_xfer = pc->buf_size;
        if (pc->req_xfer == tape->buffer_size)
                pc->flags |= PC_FLAG_DMA_OK;
 
-       if (opcode == READ_6) {
+       if (opcode == READ_6)
                pc->c[0] = READ_6;
-               atomic_set(&bh->b_count, 0);
-       } else if (opcode == WRITE_6) {
+       else if (opcode == WRITE_6) {
                pc->c[0] = WRITE_6;
                pc->flags |= PC_FLAG_WRITING;
-               pc->b_data = bh->b_data;
-               pc->b_count = atomic_read(&bh->b_count);
        }
 
        memcpy(rq->cmd, pc->c, 12);
@@ -747,12 +617,10 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
        struct ide_cmd cmd;
        u8 stat;
 
-       debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu,"
-                       " current_nr_sectors: %u\n",
-                       (unsigned long long)rq->sector, rq->nr_sectors,
-                       rq->current_nr_sectors);
+       debug_log(DBG_SENSE, "sector: %llu, nr_sectors: %lu\n",
+                 (unsigned long long)rq->sector, rq->nr_sectors);
 
-       if (!blk_special_request(rq)) {
+       if (!(blk_special_request(rq) || blk_sense_request(rq))) {
                /* We do not support buffer cache originated requests. */
                printk(KERN_NOTICE "ide-tape: %s: Unsupported request in "
                        "request queue (%d)\n", drive->name, rq->cmd_type);
@@ -828,7 +696,7 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
                goto out;
        }
        if (rq->cmd[13] & REQ_IDETAPE_PC1) {
-               pc = (struct ide_atapi_pc *) rq->buffer;
+               pc = (struct ide_atapi_pc *)rq->special;
                rq->cmd[13] &= ~(REQ_IDETAPE_PC1);
                rq->cmd[13] |= REQ_IDETAPE_PC2;
                goto out;
@@ -840,6 +708,9 @@ static ide_startstop_t idetape_do_request(ide_drive_t *drive,
        BUG();
 
 out:
+       /* prepare sense request for this command */
+       ide_prep_sense(drive, rq);
+
        memset(&cmd, 0, sizeof(cmd));
 
        if (rq_data_dir(rq))
@@ -847,167 +718,10 @@ out:
 
        cmd.rq = rq;
 
-       return ide_tape_issue_pc(drive, &cmd, pc);
-}
-
-/*
- * The function below uses __get_free_pages to allocate a data buffer of size
- * tape->buffer_size (or a bit more). We attempt to combine sequential pages as
- * much as possible.
- *
- * It returns a pointer to the newly allocated buffer, or NULL in case of
- * failure.
- */
-static struct idetape_bh *ide_tape_kmalloc_buffer(idetape_tape_t *tape,
-                                                 int full, int clear)
-{
-       struct idetape_bh *prev_bh, *bh, *merge_bh;
-       int pages = tape->pages_per_buffer;
-       unsigned int order, b_allocd;
-       char *b_data = NULL;
-
-       merge_bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-       bh = merge_bh;
-       if (bh == NULL)
-               goto abort;
-
-       order = fls(pages) - 1;
-       bh->b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-       if (!bh->b_data)
-               goto abort;
-       b_allocd = (1 << order) * PAGE_SIZE;
-       pages &= (order-1);
-
-       if (clear)
-               memset(bh->b_data, 0, b_allocd);
-       bh->b_reqnext = NULL;
-       bh->b_size = b_allocd;
-       atomic_set(&bh->b_count, full ? bh->b_size : 0);
-
-       while (pages) {
-               order = fls(pages) - 1;
-               b_data = (char *) __get_free_pages(GFP_KERNEL, order);
-               if (!b_data)
-                       goto abort;
-               b_allocd = (1 << order) * PAGE_SIZE;
-
-               if (clear)
-                       memset(b_data, 0, b_allocd);
-
-               /* newly allocated page frames below buffer header or ...*/
-               if (bh->b_data == b_data + b_allocd) {
-                       bh->b_size += b_allocd;
-                       bh->b_data -= b_allocd;
-                       if (full)
-                               atomic_add(b_allocd, &bh->b_count);
-                       continue;
-               }
-               /* they are above the header */
-               if (b_data == bh->b_data + bh->b_size) {
-                       bh->b_size += b_allocd;
-                       if (full)
-                               atomic_add(b_allocd, &bh->b_count);
-                       continue;
-               }
-               prev_bh = bh;
-               bh = kmalloc(sizeof(struct idetape_bh), GFP_KERNEL);
-               if (!bh) {
-                       free_pages((unsigned long) b_data, order);
-                       goto abort;
-               }
-               bh->b_reqnext = NULL;
-               bh->b_data = b_data;
-               bh->b_size = b_allocd;
-               atomic_set(&bh->b_count, full ? bh->b_size : 0);
-               prev_bh->b_reqnext = bh;
-
-               pages &= (order-1);
-       }
-
-       bh->b_size -= tape->excess_bh_size;
-       if (full)
-               atomic_sub(tape->excess_bh_size, &bh->b_count);
-       return merge_bh;
-abort:
-       ide_tape_kfree_buffer(tape);
-       return NULL;
-}
+       ide_init_sg_cmd(&cmd, pc->req_xfer);
+       ide_map_sg(drive, &cmd);
 
-static int idetape_copy_stage_from_user(idetape_tape_t *tape,
-                                       const char __user *buf, int n)
-{
-       struct idetape_bh *bh = tape->bh;
-       int count;
-       int ret = 0;
-
-       while (n) {
-               if (bh == NULL) {
-                       printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-                                       __func__);
-                       return 1;
-               }
-               count = min((unsigned int)
-                               (bh->b_size - atomic_read(&bh->b_count)),
-                               (unsigned int)n);
-               if (copy_from_user(bh->b_data + atomic_read(&bh->b_count), buf,
-                               count))
-                       ret = 1;
-               n -= count;
-               atomic_add(count, &bh->b_count);
-               buf += count;
-               if (atomic_read(&bh->b_count) == bh->b_size) {
-                       bh = bh->b_reqnext;
-                       if (bh)
-                               atomic_set(&bh->b_count, 0);
-               }
-       }
-       tape->bh = bh;
-       return ret;
-}
-
-static int idetape_copy_stage_to_user(idetape_tape_t *tape, char __user *buf,
-                                     int n)
-{
-       struct idetape_bh *bh = tape->bh;
-       int count;
-       int ret = 0;
-
-       while (n) {
-               if (bh == NULL) {
-                       printk(KERN_ERR "ide-tape: bh == NULL in %s\n",
-                                       __func__);
-                       return 1;
-               }
-               count = min(tape->b_count, n);
-               if  (copy_to_user(buf, tape->b_data, count))
-                       ret = 1;
-               n -= count;
-               tape->b_data += count;
-               tape->b_count -= count;
-               buf += count;
-               if (!tape->b_count) {
-                       bh = bh->b_reqnext;
-                       tape->bh = bh;
-                       if (bh) {
-                               tape->b_data = bh->b_data;
-                               tape->b_count = atomic_read(&bh->b_count);
-                       }
-               }
-       }
-       return ret;
-}
-
-static void idetape_init_merge_buffer(idetape_tape_t *tape)
-{
-       struct idetape_bh *bh = tape->merge_bh;
-       tape->bh = tape->merge_bh;
-
-       if (tape->chrdev_dir == IDETAPE_DIR_WRITE)
-               atomic_set(&bh->b_count, 0);
-       else {
-               tape->b_data = bh->b_data;
-               tape->b_count = atomic_read(&bh->b_count);
-       }
+       return ide_tape_issue_pc(drive, &cmd, pc);
 }
 
 /*
@@ -1107,10 +821,10 @@ static void __ide_tape_discard_merge_buffer(ide_drive_t *drive)
                return;
 
        clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags);
-       tape->merge_bh_size = 0;
-       if (tape->merge_bh != NULL) {
-               ide_tape_kfree_buffer(tape);
-               tape->merge_bh = NULL;
+       tape->valid = 0;
+       if (tape->buf != NULL) {
+               kfree(tape->buf);
+               tape->buf = NULL;
        }
 
        tape->chrdev_dir = IDETAPE_DIR_NONE;
@@ -1164,36 +878,44 @@ static void ide_tape_discard_merge_buffer(ide_drive_t *drive,
  * Generate a read/write request for the block device interface and wait for it
  * to be serviced.
  */
-static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int blocks,
-                                struct idetape_bh *bh)
+static int idetape_queue_rw_tail(ide_drive_t *drive, int cmd, int size)
 {
        idetape_tape_t *tape = drive->driver_data;
        struct request *rq;
-       int ret, errors;
+       int ret;
 
        debug_log(DBG_SENSE, "%s: cmd=%d\n", __func__, cmd);
+       BUG_ON(cmd != REQ_IDETAPE_READ && cmd != REQ_IDETAPE_WRITE);
+       BUG_ON(size < 0 || size % tape->blk_size);
 
        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
        rq->cmd_type = REQ_TYPE_SPECIAL;
        rq->cmd[13] = cmd;
        rq->rq_disk = tape->disk;
-       rq->special = (void *)bh;
        rq->sector = tape->first_frame;
-       rq->nr_sectors = blocks;
-       rq->current_nr_sectors = blocks;
-       blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
-       errors = rq->errors;
-       ret = tape->blk_size * (blocks - rq->current_nr_sectors);
-       blk_put_request(rq);
+       if (size) {
+               ret = blk_rq_map_kern(drive->queue, rq, tape->buf, size,
+                                     __GFP_WAIT);
+               if (ret)
+                       goto out_put;
+       }
 
-       if ((cmd & (REQ_IDETAPE_READ | REQ_IDETAPE_WRITE)) == 0)
-               return 0;
+       blk_execute_rq(drive->queue, tape->disk, rq, 0);
 
-       if (tape->merge_bh)
-               idetape_init_merge_buffer(tape);
-       if (errors == IDE_DRV_ERROR_GENERAL)
-               return -EIO;
+       /* calculate the number of transferred bytes and update buffer state */
+       size -= rq->data_len;
+       tape->cur = tape->buf;
+       if (cmd == REQ_IDETAPE_READ)
+               tape->valid = size;
+       else
+               tape->valid = 0;
+
+       ret = size;
+       if (rq->errors == IDE_DRV_ERROR_GENERAL)
+               ret = -EIO;
+out_put:
+       blk_put_request(rq);
        return ret;
 }
 
@@ -1230,153 +952,87 @@ static void idetape_create_space_cmd(struct ide_atapi_pc *pc, int count, u8 cmd)
        pc->flags |= PC_FLAG_WAIT_FOR_DSC;
 }
 
-/* Queue up a character device originated write request. */
-static int idetape_add_chrdev_write_request(ide_drive_t *drive, int blocks)
-{
-       idetape_tape_t *tape = drive->driver_data;
-
-       debug_log(DBG_CHRDEV, "Enter %s\n", __func__);
-
-       return idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
-                                    blocks, tape->merge_bh);
-}
-
 static void ide_tape_flush_merge_buffer(ide_drive_t *drive)
 {
        idetape_tape_t *tape = drive->driver_data;
-       int blocks, min;
-       struct idetape_bh *bh;
 
        if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
                printk(KERN_ERR "ide-tape: bug: Trying to empty merge buffer"
                                " but we are not writing.\n");
                return;
        }
-       if (tape->merge_bh_size > tape->buffer_size) {
-               printk(KERN_ERR "ide-tape: bug: merge_buffer too big\n");
-               tape->merge_bh_size = tape->buffer_size;
-       }
-       if (tape->merge_bh_size) {
-               blocks = tape->merge_bh_size / tape->blk_size;
-               if (tape->merge_bh_size % tape->blk_size) {
-                       unsigned int i;
-
-                       blocks++;
-                       i = tape->blk_size - tape->merge_bh_size %
-                               tape->blk_size;
-                       bh = tape->bh->b_reqnext;
-                       while (bh) {
-                               atomic_set(&bh->b_count, 0);
-                               bh = bh->b_reqnext;
-                       }
-                       bh = tape->bh;
-                       while (i) {
-                               if (bh == NULL) {
-                                       printk(KERN_INFO "ide-tape: bug,"
-                                                        " bh NULL\n");
-                                       break;
-                               }
-                               min = min(i, (unsigned int)(bh->b_size -
-                                               atomic_read(&bh->b_count)));
-                               memset(bh->b_data + atomic_read(&bh->b_count),
-                                               0, min);
-                               atomic_add(min, &bh->b_count);
-                               i -= min;
-                               bh = bh->b_reqnext;
-                       }
-               }
-               (void) idetape_add_chrdev_write_request(drive, blocks);
-               tape->merge_bh_size = 0;
-       }
-       if (tape->merge_bh != NULL) {
-               ide_tape_kfree_buffer(tape);
-               tape->merge_bh = NULL;
+       if (tape->buf) {
+               size_t aligned = roundup(tape->valid, tape->blk_size);
+
+               memset(tape->cur, 0, aligned - tape->valid);
+               idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, aligned);
+               kfree(tape->buf);
+               tape->buf = NULL;
        }
        tape->chrdev_dir = IDETAPE_DIR_NONE;
 }
 
-static int idetape_init_read(ide_drive_t *drive)
+static int idetape_init_rw(ide_drive_t *drive, int dir)
 {
        idetape_tape_t *tape = drive->driver_data;
-       int bytes_read;
+       int rc;
 
-       /* Initialize read operation */
-       if (tape->chrdev_dir != IDETAPE_DIR_READ) {
-               if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
-                       ide_tape_flush_merge_buffer(drive);
-                       idetape_flush_tape_buffers(drive);
-               }
-               if (tape->merge_bh || tape->merge_bh_size) {
-                       printk(KERN_ERR "ide-tape: merge_bh_size should be"
-                                        " 0 now\n");
-                       tape->merge_bh_size = 0;
-               }
-               tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
-               if (!tape->merge_bh)
-                       return -ENOMEM;
-               tape->chrdev_dir = IDETAPE_DIR_READ;
+       BUG_ON(dir != IDETAPE_DIR_READ && dir != IDETAPE_DIR_WRITE);
 
-               /*
-                * Issue a read 0 command to ensure that DSC handshake is
-                * switched from completion mode to buffer available mode.
-                * No point in issuing this if DSC overlap isn't supported, some
-                * drives (Seagate STT3401A) will return an error.
-                */
-               if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-                       bytes_read = idetape_queue_rw_tail(drive,
-                                                       REQ_IDETAPE_READ, 0,
-                                                       tape->merge_bh);
-                       if (bytes_read < 0) {
-                               ide_tape_kfree_buffer(tape);
-                               tape->merge_bh = NULL;
-                               tape->chrdev_dir = IDETAPE_DIR_NONE;
-                               return bytes_read;
-                       }
-               }
-       }
+       if (tape->chrdev_dir == dir)
+               return 0;
 
-       return 0;
-}
+       if (tape->chrdev_dir == IDETAPE_DIR_READ)
+               ide_tape_discard_merge_buffer(drive, 1);
+       else if (tape->chrdev_dir == IDETAPE_DIR_WRITE) {
+               ide_tape_flush_merge_buffer(drive);
+               idetape_flush_tape_buffers(drive);
+       }
 
-/* called from idetape_chrdev_read() to service a chrdev read request. */
-static int idetape_add_chrdev_read_request(ide_drive_t *drive, int blocks)
-{
-       idetape_tape_t *tape = drive->driver_data;
+       if (tape->buf || tape->valid) {
+               printk(KERN_ERR "ide-tape: valid should be 0 now\n");
+               tape->valid = 0;
+       }
 
-       debug_log(DBG_PROCS, "Enter %s, %d blocks\n", __func__, blocks);
+       tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+       if (!tape->buf)
+               return -ENOMEM;
+       tape->chrdev_dir = dir;
+       tape->cur = tape->buf;
 
-       /* If we are at a filemark, return a read length of 0 */
-       if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
-               return 0;
-
-       idetape_init_read(drive);
+       /*
+        * Issue a 0 rw command to ensure that DSC handshake is
+        * switched from completion mode to buffer available mode.  No
+        * point in issuing this if DSC overlap isn't supported, some
+        * drives (Seagate STT3401A) will return an error.
+        */
+       if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
+               int cmd = dir == IDETAPE_DIR_READ ? REQ_IDETAPE_READ
+                                                 : REQ_IDETAPE_WRITE;
+
+               rc = idetape_queue_rw_tail(drive, cmd, 0);
+               if (rc < 0) {
+                       kfree(tape->buf);
+                       tape->buf = NULL;
+                       tape->chrdev_dir = IDETAPE_DIR_NONE;
+                       return rc;
+               }
+       }
 
-       return idetape_queue_rw_tail(drive, REQ_IDETAPE_READ, blocks,
-                                    tape->merge_bh);
+       return 0;
 }
 
 static void idetape_pad_zeros(ide_drive_t *drive, int bcount)
 {
        idetape_tape_t *tape = drive->driver_data;
-       struct idetape_bh *bh;
-       int blocks;
+
+       memset(tape->buf, 0, tape->buffer_size);
 
        while (bcount) {
-               unsigned int count;
+               unsigned int count = min(tape->buffer_size, bcount);
 
-               bh = tape->merge_bh;
-               count = min(tape->buffer_size, bcount);
+               idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, count);
                bcount -= count;
-               blocks = count / tape->blk_size;
-               while (count) {
-                       atomic_set(&bh->b_count,
-                                  min(count, (unsigned int)bh->b_size));
-                       memset(bh->b_data, 0, atomic_read(&bh->b_count));
-                       count -= atomic_read(&bh->b_count);
-                       bh = bh->b_reqnext;
-               }
-               idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE, blocks,
-                                     tape->merge_bh);
        }
 }
 
@@ -1456,7 +1112,7 @@ static int idetape_space_over_filemarks(ide_drive_t *drive, short mt_op,
        }
 
        if (tape->chrdev_dir == IDETAPE_DIR_READ) {
-               tape->merge_bh_size = 0;
+               tape->valid = 0;
                if (test_and_clear_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
                        ++count;
                ide_tape_discard_merge_buffer(drive, 0);
@@ -1505,9 +1161,9 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
 {
        struct ide_tape_obj *tape = file->private_data;
        ide_drive_t *drive = tape->drive;
-       ssize_t bytes_read, temp, actually_read = 0, rc;
+       size_t done = 0;
        ssize_t ret = 0;
-       u16 ctl = *(u16 *)&tape->caps[12];
+       int rc;
 
        debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
@@ -1517,49 +1173,43 @@ static ssize_t idetape_chrdev_read(struct file *file, char __user *buf,
                            (count % tape->blk_size) == 0)
                                tape->user_bs_factor = count / tape->blk_size;
        }
-       rc = idetape_init_read(drive);
+
+       rc = idetape_init_rw(drive, IDETAPE_DIR_READ);
        if (rc < 0)
                return rc;
-       if (count == 0)
-               return (0);
-       if (tape->merge_bh_size) {
-               actually_read = min((unsigned int)(tape->merge_bh_size),
-                                   (unsigned int)count);
-               if (idetape_copy_stage_to_user(tape, buf, actually_read))
-                       ret = -EFAULT;
-               buf += actually_read;
-               tape->merge_bh_size -= actually_read;
-               count -= actually_read;
-       }
-       while (count >= tape->buffer_size) {
-               bytes_read = idetape_add_chrdev_read_request(drive, ctl);
-               if (bytes_read <= 0)
-                       goto finish;
-               if (idetape_copy_stage_to_user(tape, buf, bytes_read))
-                       ret = -EFAULT;
-               buf += bytes_read;
-               count -= bytes_read;
-               actually_read += bytes_read;
-       }
-       if (count) {
-               bytes_read = idetape_add_chrdev_read_request(drive, ctl);
-               if (bytes_read <= 0)
-                       goto finish;
-               temp = min((unsigned long)count, (unsigned long)bytes_read);
-               if (idetape_copy_stage_to_user(tape, buf, temp))
+
+       while (done < count) {
+               size_t todo;
+
+               /* refill if staging buffer is empty */
+               if (!tape->valid) {
+                       /* If we are at a filemark, nothing more to read */
+                       if (test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags))
+                               break;
+                       /* read */
+                       if (idetape_queue_rw_tail(drive, REQ_IDETAPE_READ,
+                                                 tape->buffer_size) <= 0)
+                               break;
+               }
+
+               /* copy out */
+               todo = min_t(size_t, count - done, tape->valid);
+               if (copy_to_user(buf + done, tape->cur, todo))
                        ret = -EFAULT;
-               actually_read += temp;
-               tape->merge_bh_size = bytes_read-temp;
+
+               tape->cur += todo;
+               tape->valid -= todo;
+               done += todo;
        }
-finish:
-       if (!actually_read && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
+
+       if (!done && test_bit(IDE_AFLAG_FILEMARK, &drive->atapi_flags)) {
                debug_log(DBG_SENSE, "%s: spacing over filemark\n", tape->name);
 
                idetape_space_over_filemarks(drive, MTFSF, 1);
                return 0;
        }
 
-       return ret ? ret : actually_read;
+       return ret ? ret : done;
 }
 
 static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
@@ -1567,9 +1217,9 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
 {
        struct ide_tape_obj *tape = file->private_data;
        ide_drive_t *drive = tape->drive;
-       ssize_t actually_written = 0;
+       size_t done = 0;
        ssize_t ret = 0;
-       u16 ctl = *(u16 *)&tape->caps[12];
+       int rc;
 
        /* The drive is write protected. */
        if (tape->write_prot)
@@ -1578,80 +1228,31 @@ static ssize_t idetape_chrdev_write(struct file *file, const char __user *buf,
        debug_log(DBG_CHRDEV, "Enter %s, count %Zd\n", __func__, count);
 
        /* Initialize write operation */
-       if (tape->chrdev_dir != IDETAPE_DIR_WRITE) {
-               if (tape->chrdev_dir == IDETAPE_DIR_READ)
-                       ide_tape_discard_merge_buffer(drive, 1);
-               if (tape->merge_bh || tape->merge_bh_size) {
-                       printk(KERN_ERR "ide-tape: merge_bh_size "
-                               "should be 0 now\n");
-                       tape->merge_bh_size = 0;
-               }
-               tape->merge_bh = ide_tape_kmalloc_buffer(tape, 0, 0);
-               if (!tape->merge_bh)
-                       return -ENOMEM;
-               tape->chrdev_dir = IDETAPE_DIR_WRITE;
-               idetape_init_merge_buffer(tape);
+       rc = idetape_init_rw(drive, IDETAPE_DIR_WRITE);
+       if (rc < 0)
+               return rc;
 
-               /*
-                * Issue a write 0 command to ensure that DSC handshake is
-                * switched from completion mode to buffer available mode. No
-                * point in issuing this if DSC overlap isn't supported, some
-                * drives (Seagate STT3401A) will return an error.
-                */
-               if (drive->dev_flags & IDE_DFLAG_DSC_OVERLAP) {
-                       ssize_t retval = idetape_queue_rw_tail(drive,
-                                                       REQ_IDETAPE_WRITE, 0,
-                                                       tape->merge_bh);
-                       if (retval < 0) {
-                               ide_tape_kfree_buffer(tape);
-                               tape->merge_bh = NULL;
-                               tape->chrdev_dir = IDETAPE_DIR_NONE;
-                               return retval;
-                       }
-               }
-       }
-       if (count == 0)
-               return (0);
-       if (tape->merge_bh_size) {
-               if (tape->merge_bh_size >= tape->buffer_size) {
-                       printk(KERN_ERR "ide-tape: bug: merge buf too big\n");
-                       tape->merge_bh_size = 0;
-               }
-               actually_written = min((unsigned int)
-                               (tape->buffer_size - tape->merge_bh_size),
-                               (unsigned int)count);
-               if (idetape_copy_stage_from_user(tape, buf, actually_written))
-                               ret = -EFAULT;
-               buf += actually_written;
-               tape->merge_bh_size += actually_written;
-               count -= actually_written;
-
-               if (tape->merge_bh_size == tape->buffer_size) {
-                       ssize_t retval;
-                       tape->merge_bh_size = 0;
-                       retval = idetape_add_chrdev_write_request(drive, ctl);
-                       if (retval <= 0)
-                               return (retval);
-               }
-       }
-       while (count >= tape->buffer_size) {
-               ssize_t retval;
-               if (idetape_copy_stage_from_user(tape, buf, tape->buffer_size))
-                       ret = -EFAULT;
-               buf += tape->buffer_size;
-               count -= tape->buffer_size;
-               retval = idetape_add_chrdev_write_request(drive, ctl);
-               actually_written += tape->buffer_size;
-               if (retval <= 0)
-                       return (retval);
-       }
-       if (count) {
-               actually_written += count;
-               if (idetape_copy_stage_from_user(tape, buf, count))
+       while (done < count) {
+               size_t todo;
+
+               /* flush if staging buffer is full */
+               if (tape->valid == tape->buffer_size &&
+                   idetape_queue_rw_tail(drive, REQ_IDETAPE_WRITE,
+                                         tape->buffer_size) <= 0)
+                       return rc;
+
+               /* copy in */
+               todo = min_t(size_t, count - done,
+                            tape->buffer_size - tape->valid);
+               if (copy_from_user(tape->cur, buf + done, todo))
                        ret = -EFAULT;
-               tape->merge_bh_size += count;
+
+               tape->cur += todo;
+               tape->valid += todo;
+               done += todo;
        }
-       return ret ? ret : actually_written;
+
+       return ret ? ret : done;
 }
 
 static int idetape_write_filemark(ide_drive_t *drive)
@@ -1812,7 +1413,7 @@ static int idetape_chrdev_ioctl(struct inode *inode, struct file *file,
                idetape_flush_tape_buffers(drive);
        }
        if (cmd == MTIOCGET || cmd == MTIOCPOS) {
-               block_offset = tape->merge_bh_size /
+               block_offset = tape->valid /
                        (tape->blk_size * tape->user_bs_factor);
                position = idetape_read_position(drive);
                if (position < 0)
@@ -1960,12 +1561,12 @@ static void idetape_write_release(ide_drive_t *drive, unsigned int minor)
        idetape_tape_t *tape = drive->driver_data;
 
        ide_tape_flush_merge_buffer(drive);
-       tape->merge_bh = ide_tape_kmalloc_buffer(tape, 1, 0);
-       if (tape->merge_bh != NULL) {
+       tape->buf = kmalloc(tape->buffer_size, GFP_KERNEL);
+       if (tape->buf != NULL) {
                idetape_pad_zeros(drive, tape->blk_size *
                                (tape->user_bs_factor - 1));
-               ide_tape_kfree_buffer(tape);
-               tape->merge_bh = NULL;
+               kfree(tape->buf);
+               tape->buf = NULL;
        }
        idetape_write_filemark(drive);
        idetape_flush_tape_buffers(drive);
@@ -2159,8 +1760,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
        u16 *ctl = (u16 *)&tape->caps[12];
 
        drive->pc_callback       = ide_tape_callback;
-       drive->pc_update_buffers = idetape_update_buffers;
-       drive->pc_io_buffers     = ide_tape_io_buffers;
 
        drive->dev_flags |= IDE_DFLAG_DSC_OVERLAP;
 
@@ -2191,11 +1790,6 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
                tape->buffer_size = *ctl * tape->blk_size;
        }
        buffer_size = tape->buffer_size;
-       tape->pages_per_buffer = buffer_size / PAGE_SIZE;
-       if (buffer_size % PAGE_SIZE) {
-               tape->pages_per_buffer++;
-               tape->excess_bh_size = PAGE_SIZE - buffer_size % PAGE_SIZE;
-       }
 
        /* select the "best" DSC read/write polling freq */
        speed = max(*(u16 *)&tape->caps[14], *(u16 *)&tape->caps[8]);
@@ -2238,7 +1832,7 @@ static void ide_tape_release(struct device *dev)
        ide_drive_t *drive = tape->drive;
        struct gendisk *g = tape->disk;
 
-       BUG_ON(tape->merge_bh_size);
+       BUG_ON(tape->valid);
 
        drive->dev_flags &= ~IDE_DFLAG_DSC_OVERLAP;
        drive->driver_data = NULL;
index 4aa6223..f400eb4 100644 (file)
@@ -424,7 +424,9 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
 
        rq = blk_get_request(drive->queue, READ, __GFP_WAIT);
        rq->cmd_type = REQ_TYPE_ATA_TASKFILE;
-       rq->buffer = buf;
+
+       if (cmd->tf_flags & IDE_TFLAG_WRITE)
+               rq->cmd_flags |= REQ_RW;
 
        /*
         * (ks) We transfer currently only whole sectors.
@@ -432,18 +434,20 @@ int ide_raw_taskfile(ide_drive_t *drive, struct ide_cmd *cmd, u8 *buf,
         * if we would find a solution to transfer any size.
         * To support special commands like READ LONG.
         */
-       rq->hard_nr_sectors = rq->nr_sectors = nsect;
-       rq->hard_cur_sectors = rq->current_nr_sectors = nsect;
-
-       if (cmd->tf_flags & IDE_TFLAG_WRITE)
-               rq->cmd_flags |= REQ_RW;
+       if (nsect) {
+               error = blk_rq_map_kern(drive->queue, rq, buf,
+                                       nsect * SECTOR_SIZE, __GFP_WAIT);
+               if (error)
+                       goto put_req;
+       }
 
        rq->special = cmd;
        cmd->rq = rq;
 
        error = blk_execute_rq(drive->queue, NULL, rq, 0);
-       blk_put_request(rq);
 
+put_req:
+       blk_put_request(rq);
        return error;
 }
 
index 248a54b..b3bc96f 100644 (file)
@@ -1,6 +1,6 @@
 /*
  *  Copyright (C) 1998-2002            Andre Hedrick <andre@linux-ide.org>
- *  Copyright (C) 2006-2007            MontaVista Software, Inc.
+ *  Copyright (C) 2006-2007, 2009      MontaVista Software, Inc.
  *  Copyright (C) 2007                 Bartlomiej Zolnierkiewicz
  *
  *  Portions Copyright (C) 1999 Promise Technology, Inc.
@@ -227,28 +227,19 @@ somebody_else:
        return (dma_stat & 4) == 4;     /* return 1 if INTR asserted */
 }
 
-static void pdc202xx_reset_host (ide_hwif_t *hwif)
+static void pdc202xx_reset(ide_drive_t *drive)
 {
+       ide_hwif_t *hwif        = drive->hwif;
        unsigned long high_16   = hwif->extra_base - 16;
        u8 udma_speed_flag      = inb(high_16 | 0x001f);
 
+       printk(KERN_WARNING "PDC202xx: software reset...\n");
+
        outb(udma_speed_flag | 0x10, high_16 | 0x001f);
        mdelay(100);
        outb(udma_speed_flag & ~0x10, high_16 | 0x001f);
        mdelay(2000);   /* 2 seconds ?! */
 
-       printk(KERN_WARNING "PDC202XX: %s channel reset.\n",
-               hwif->channel ? "Secondary" : "Primary");
-}
-
-static void pdc202xx_reset (ide_drive_t *drive)
-{
-       ide_hwif_t *hwif        = drive->hwif;
-       ide_hwif_t *mate        = hwif->mate;
-
-       pdc202xx_reset_host(hwif);
-       pdc202xx_reset_host(mate);
-
        ide_set_max_pio(drive);
 }
 
@@ -328,9 +319,8 @@ static const struct ide_dma_ops pdc20246_dma_ops = {
        .dma_start              = ide_dma_start,
        .dma_end                = ide_dma_end,
        .dma_test_irq           = pdc202xx_dma_test_irq,
-       .dma_lost_irq           = pdc202xx_dma_lost_irq,
+       .dma_lost_irq           = ide_dma_lost_irq,
        .dma_timer_expiry       = ide_dma_sff_timer_expiry,
-       .dma_clear              = pdc202xx_reset,
        .dma_sff_read_status    = ide_dma_sff_read_status,
 };
 
index 424f7b0..3fd8b1e 100644 (file)
@@ -20,7 +20,8 @@
 #include <linux/idr.h>
 #include <linux/hdreg.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
+
+#include <trace/events/block.h>
 
 #define DM_MSG_PREFIX "core"
 
@@ -53,8 +54,6 @@ struct dm_target_io {
        union map_info info;
 };
 
-DEFINE_TRACE(block_bio_complete);
-
 /*
  * For request-based dm.
  * One of these is allocated per request.
@@ -656,8 +655,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
                /* the bio has been remapped so dispatch it */
 
                trace_block_remap(bdev_get_queue(clone->bi_bdev), clone,
-                                   tio->io->bio->bi_bdev->bd_dev,
-                                   clone->bi_sector, sector);
+                                   tio->io->bio->bi_bdev->bd_dev, sector);
 
                generic_make_request(clone);
        } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
index 5d400ae..bb37fb1 100644 (file)
@@ -362,7 +362,7 @@ static void raid5_unplug_device(struct request_queue *q);
 
 static struct stripe_head *
 get_active_stripe(raid5_conf_t *conf, sector_t sector,
-                 int previous, int noblock)
+                 int previous, int noblock, int noquiesce)
 {
        struct stripe_head *sh;
 
@@ -372,7 +372,7 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
 
        do {
                wait_event_lock_irq(conf->wait_for_stripe,
-                                   conf->quiesce == 0,
+                                   conf->quiesce == 0 || noquiesce,
                                    conf->device_lock, /* nothing */);
                sh = __find_stripe(conf, sector, conf->generation - previous);
                if (!sh) {
@@ -2671,7 +2671,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh,
                        sector_t bn = compute_blocknr(sh, i, 1);
                        sector_t s = raid5_compute_sector(conf, bn, 0,
                                                          &dd_idx, NULL);
-                       sh2 = get_active_stripe(conf, s, 0, 1);
+                       sh2 = get_active_stripe(conf, s, 0, 1, 1);
                        if (sh2 == NULL)
                                /* so far only the early blocks of this stripe
                                 * have been requested.  When later blocks
@@ -2944,7 +2944,7 @@ static bool handle_stripe5(struct stripe_head *sh)
        /* Finish reconstruct operations initiated by the expansion process */
        if (sh->reconstruct_state == reconstruct_state_result) {
                struct stripe_head *sh2
-                       = get_active_stripe(conf, sh->sector, 1, 1);
+                       = get_active_stripe(conf, sh->sector, 1, 1, 1);
                if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
                        /* sh cannot be written until sh2 has been read.
                         * so arrange for sh to be delayed a little
@@ -3189,7 +3189,7 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
 
        if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
                struct stripe_head *sh2
-                       = get_active_stripe(conf, sh->sector, 1, 1);
+                       = get_active_stripe(conf, sh->sector, 1, 1, 1);
                if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
                        /* sh cannot be written until sh2 has been read.
                         * so arrange for sh to be delayed a little
@@ -3288,7 +3288,7 @@ static void unplug_slaves(mddev_t *mddev)
        int i;
 
        rcu_read_lock();
-       for (i=0; i<mddev->raid_disks; i++) {
+       for (i = 0; i < conf->raid_disks; i++) {
                mdk_rdev_t *rdev = rcu_dereference(conf->disks[i].rdev);
                if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
                        struct request_queue *r_queue = bdev_get_queue(rdev->bdev);
@@ -3675,7 +3675,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
                        (unsigned long long)logical_sector);
 
                sh = get_active_stripe(conf, new_sector, previous,
-                                      (bi->bi_rw&RWA_MASK));
+                                      (bi->bi_rw&RWA_MASK), 0);
                if (sh) {
                        if (unlikely(previous)) {
                                /* expansion might have moved on while waiting for a
@@ -3873,7 +3873,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
        for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
                int j;
                int skipped = 0;
-               sh = get_active_stripe(conf, stripe_addr+i, 0, 0);
+               sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1);
                set_bit(STRIPE_EXPANDING, &sh->state);
                atomic_inc(&conf->reshape_stripes);
                /* If any of this stripe is beyond the end of the old
@@ -3916,13 +3916,13 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
                raid5_compute_sector(conf, stripe_addr*(new_data_disks),
                                     1, &dd_idx, NULL);
        last_sector =
-               raid5_compute_sector(conf, ((stripe_addr+conf->chunk_size/512)
+               raid5_compute_sector(conf, ((stripe_addr+reshape_sectors)
                                            *(new_data_disks) - 1),
                                     1, &dd_idx, NULL);
        if (last_sector >= mddev->dev_sectors)
                last_sector = mddev->dev_sectors - 1;
        while (first_sector <= last_sector) {
-               sh = get_active_stripe(conf, first_sector, 1, 0);
+               sh = get_active_stripe(conf, first_sector, 1, 0, 1);
                set_bit(STRIPE_EXPAND_SOURCE, &sh->state);
                set_bit(STRIPE_HANDLE, &sh->state);
                release_stripe(sh);
@@ -4022,9 +4022,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
 
        bitmap_cond_end_sync(mddev->bitmap, sector_nr);
 
-       sh = get_active_stripe(conf, sector_nr, 0, 1);
+       sh = get_active_stripe(conf, sector_nr, 0, 1, 0);
        if (sh == NULL) {
-               sh = get_active_stripe(conf, sector_nr, 0, 0);
+               sh = get_active_stripe(conf, sector_nr, 0, 0, 0);
                /* make sure we don't swamp the stripe cache if someone else
                 * is trying to get access
                 */
@@ -4034,7 +4034,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
         * We don't need to check the 'failed' flag as when that gets set,
         * recovery aborts.
         */
-       for (i=0; i<mddev->raid_disks; i++)
+       for (i = 0; i < conf->raid_disks; i++)
                if (conf->disks[i].rdev == NULL)
                        still_degraded = 1;
 
@@ -4086,7 +4086,7 @@ static int  retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
                        /* already done this stripe */
                        continue;
 
-               sh = get_active_stripe(conf, sector, 0, 1);
+               sh = get_active_stripe(conf, sector, 0, 1, 0);
 
                if (!sh) {
                        /* failed to get a stripe - must wait */
index ff7b7de..7fde36e 100644 (file)
@@ -230,7 +230,8 @@ int ivtv_stream_alloc(struct ivtv_stream *s)
                return -ENOMEM;
        }
        if (ivtv_might_use_dma(s)) {
-               s->sg_handle = pci_map_single(itv->pdev, s->sg_dma, sizeof(struct ivtv_sg_element), s->dma);
+               s->sg_handle = pci_map_single(itv->pdev, s->sg_dma,
+                               sizeof(struct ivtv_sg_element), PCI_DMA_TODEVICE);
                ivtv_stream_sync_for_cpu(s);
        }
 
index c643d0f..b56d72f 100644 (file)
@@ -64,6 +64,31 @@ static int mvsd_setup_data(struct mvsd_host *host, struct mmc_data *data)
        unsigned int tmout;
        int tmout_index;
 
+       /*
+        * Hardware weirdness.  The FIFO_EMPTY bit of the HW_STATE
+        * register is sometimes not set before a while when some
+        * "unusual" data block sizes are used (such as with the SWITCH
+        * command), even despite the fact that the XFER_DONE interrupt
+        * was raised.  And if another data transfer starts before
+        * this bit comes to good sense (which eventually happens by
+        * itself) then the new transfer simply fails with a timeout.
+        */
+       if (!(mvsd_read(MVSD_HW_STATE) & (1 << 13))) {
+               unsigned long t = jiffies + HZ;
+               unsigned int hw_state,  count = 0;
+               do {
+                       if (time_after(jiffies, t)) {
+                               dev_warn(host->dev, "FIFO_EMPTY bit missing\n");
+                               break;
+                       }
+                       hw_state = mvsd_read(MVSD_HW_STATE);
+                       count++;
+               } while (!(hw_state & (1 << 13)));
+               dev_dbg(host->dev, "*** wait for FIFO_EMPTY bit "
+                                  "(hw=0x%04x, count=%d, jiffies=%ld)\n",
+                                  hw_state, count, jiffies - (t - HZ));
+       }
+
        /* If timeout=0 then maximum timeout index is used. */
        tmout = DIV_ROUND_UP(data->timeout_ns, host->ns_per_clk);
        tmout += data->timeout_clks;
@@ -620,9 +645,18 @@ static void mvsd_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
        if (ios->bus_width == MMC_BUS_WIDTH_4)
                ctrl_reg |= MVSD_HOST_CTRL_DATA_WIDTH_4_BITS;
 
+       /*
+        * The HI_SPEED_EN bit is causing trouble with many (but not all)
+        * high speed SD, SDHC and SDIO cards.  Not enabling that bit
+        * makes all cards work.  So let's just ignore that bit for now
+        * and revisit this issue if problems for not enabling this bit
+        * are ever reported.
+        */
+#if 0
        if (ios->timing == MMC_TIMING_MMC_HS ||
            ios->timing == MMC_TIMING_SD_HS)
                ctrl_reg |= MVSD_HOST_CTRL_HI_SPEED_EN;
+#endif
 
        host->ctrl = ctrl_reg;
        mvsd_write(MVSD_HOST_CTRL, ctrl_reg);
@@ -882,3 +916,4 @@ module_param(nodma, int, 0);
 MODULE_AUTHOR("Maen Suleiman, Nicolas Pitre");
 MODULE_DESCRIPTION("Marvell MMC,SD,SDIO Host Controller driver");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:mvsdio");
index b4a615c..f4cbe47 100644 (file)
@@ -140,6 +140,8 @@ struct mxcmci_host {
        struct work_struct      datawork;
 };
 
+static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
+
 static inline int mxcmci_use_dma(struct mxcmci_host *host)
 {
        return host->do_dma;
@@ -160,7 +162,7 @@ static void mxcmci_softreset(struct mxcmci_host *host)
        writew(0xff, host->base + MMC_REG_RES_TO);
 }
 
-static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
+static int mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 {
        unsigned int nob = data->blocks;
        unsigned int blksz = data->blksz;
@@ -168,6 +170,7 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
 #ifdef HAS_DMA
        struct scatterlist *sg;
        int i;
+       int ret;
 #endif
        if (data->flags & MMC_DATA_STREAM)
                nob = 0xffff;
@@ -183,7 +186,7 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
        for_each_sg(data->sg, sg, data->sg_len, i) {
                if (sg->offset & 3 || sg->length & 3) {
                        host->do_dma = 0;
-                       return;
+                       return 0;
                }
        }
 
@@ -192,23 +195,30 @@ static void mxcmci_setup_data(struct mxcmci_host *host, struct mmc_data *data)
                host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg,
                                             data->sg_len,  host->dma_dir);
 
-               imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize,
-                                host->res->start + MMC_REG_BUFFER_ACCESS,
-                                DMA_MODE_READ);
+               ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents,
+                               datasize,
+                               host->res->start + MMC_REG_BUFFER_ACCESS,
+                               DMA_MODE_READ);
        } else {
                host->dma_dir = DMA_TO_DEVICE;
                host->dma_nents = dma_map_sg(mmc_dev(host->mmc), data->sg,
                                             data->sg_len,  host->dma_dir);
 
-               imx_dma_setup_sg(host->dma, data->sg, host->dma_nents, datasize,
-                                host->res->start + MMC_REG_BUFFER_ACCESS,
-                                DMA_MODE_WRITE);
+               ret = imx_dma_setup_sg(host->dma, data->sg, host->dma_nents,
+                               datasize,
+                               host->res->start + MMC_REG_BUFFER_ACCESS,
+                               DMA_MODE_WRITE);
        }
 
+       if (ret) {
+               dev_err(mmc_dev(host->mmc), "failed to setup DMA : %d\n", ret);
+               return ret;
+       }
        wmb();
 
        imx_dma_enable(host->dma);
 #endif /* HAS_DMA */
+       return 0;
 }
 
 static int mxcmci_start_cmd(struct mxcmci_host *host, struct mmc_command *cmd,
@@ -345,8 +355,11 @@ static int mxcmci_poll_status(struct mxcmci_host *host, u32 mask)
                stat = readl(host->base + MMC_REG_STATUS);
                if (stat & STATUS_ERR_MASK)
                        return stat;
-               if (time_after(jiffies, timeout))
+               if (time_after(jiffies, timeout)) {
+                       mxcmci_softreset(host);
+                       mxcmci_set_clk_rate(host, host->clock);
                        return STATUS_TIME_OUT_READ;
+               }
                if (stat & mask)
                        return 0;
                cpu_relax();
@@ -531,6 +544,7 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req)
 {
        struct mxcmci_host *host = mmc_priv(mmc);
        unsigned int cmdat = host->cmdat;
+       int error;
 
        WARN_ON(host->req != NULL);
 
@@ -540,7 +554,12 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req)
        host->do_dma = 1;
 #endif
        if (req->data) {
-               mxcmci_setup_data(host, req->data);
+               error = mxcmci_setup_data(host, req->data);
+               if (error) {
+                       req->cmd->error = error;
+                       goto out;
+               }
+
 
                cmdat |= CMD_DAT_CONT_DATA_ENABLE;
 
@@ -548,7 +567,9 @@ static void mxcmci_request(struct mmc_host *mmc, struct mmc_request *req)
                        cmdat |= CMD_DAT_CONT_WRITE;
        }
 
-       if (mxcmci_start_cmd(host, req->cmd, cmdat))
+       error = mxcmci_start_cmd(host, req->cmd, cmdat);
+out:
+       if (error)
                mxcmci_finish_request(host, req);
 }
 
@@ -724,7 +745,9 @@ static int mxcmci_probe(struct platform_device *pdev)
                goto out_clk_put;
        }
 
-       mmc->f_min = clk_get_rate(host->clk) >> 7;
+       mmc->f_min = clk_get_rate(host->clk) >> 16;
+       if (mmc->f_min < 400000)
+               mmc->f_min = 400000;
        mmc->f_max = clk_get_rate(host->clk) >> 1;
 
        /* recommended in data sheet */
index bfa25c0..dceb5ee 100644 (file)
@@ -822,7 +822,7 @@ static irqreturn_t mmc_omap_irq(int irq, void *dev_id)
                del_timer(&host->cmd_abort_timer);
                host->abort = 1;
                OMAP_MMC_WRITE(host, IE, 0);
-               disable_irq(host->irq);
+               disable_irq_nosync(host->irq);
                schedule_work(&host->cmd_abort_work);
                return IRQ_HANDLED;
        }
index e62a22a..c40cb96 100644 (file)
@@ -680,7 +680,7 @@ static void mmc_omap_dma_cb(int lch, u16 ch_status, void *data)
        host->dma_ch = -1;
        /*
         * DMA Callback: run in interrupt context.
-        * mutex_unlock will through a kernel warning if used.
+        * mutex_unlock will throw a kernel warning if used.
         */
        up(&host->sem);
 }
index 3ff4ac3..128c614 100644 (file)
@@ -55,7 +55,13 @@ static u32 esdhc_readl(struct sdhci_host *host, int reg)
 
 static u16 esdhc_readw(struct sdhci_host *host, int reg)
 {
-       return in_be16(host->ioaddr + (reg ^ 0x2));
+       u16 ret;
+
+       if (unlikely(reg == SDHCI_HOST_VERSION))
+               ret = in_be16(host->ioaddr + reg);
+       else
+               ret = in_be16(host->ioaddr + (reg ^ 0x2));
+       return ret;
 }
 
 static u8 esdhc_readb(struct sdhci_host *host, int reg)
@@ -277,6 +283,7 @@ static int __devexit sdhci_of_remove(struct of_device *ofdev)
 static const struct of_device_id sdhci_of_match[] = {
        { .compatible = "fsl,mpc8379-esdhc", .data = &sdhci_esdhc, },
        { .compatible = "fsl,mpc8536-esdhc", .data = &sdhci_esdhc, },
+       { .compatible = "fsl,esdhc", .data = &sdhci_esdhc, },
        { .compatible = "generic-sdhci", },
        {},
 };
index 0119220..02700f7 100644 (file)
@@ -407,16 +407,17 @@ static int __init nand_davinci_probe(struct platform_device *pdev)
        }
        info->chip.ecc.mode = ecc_mode;
 
-       info->clk = clk_get(&pdev->dev, "AEMIFCLK");
+       info->clk = clk_get(&pdev->dev, "aemif");
        if (IS_ERR(info->clk)) {
                ret = PTR_ERR(info->clk);
-               dev_dbg(&pdev->dev, "unable to get AEMIFCLK, err %d\n", ret);
+               dev_dbg(&pdev->dev, "unable to get AEMIF clock, err %d\n", ret);
                goto err_clk;
        }
 
        ret = clk_enable(info->clk);
        if (ret < 0) {
-               dev_dbg(&pdev->dev, "unable to enable AEMIFCLK, err %d\n", ret);
+               dev_dbg(&pdev->dev, "unable to enable AEMIF clock, err %d\n",
+                       ret);
                goto err_clk_enable;
        }
 
index 8247a94..3b19e0c 100644 (file)
@@ -66,7 +66,6 @@ static const int multicast_filter_limit = 32;
 #define RX_DMA_BURST   6       /* Maximum PCI burst, '6' is 1024 */
 #define TX_DMA_BURST   6       /* Maximum PCI burst, '6' is 1024 */
 #define EarlyTxThld    0x3F    /* 0x3F means NO early transmit */
-#define RxPacketMaxSize        0x3FE8  /* 16K - 1 - ETH_HLEN - VLAN - CRC... */
 #define SafeMtu                0x1c20  /* ... actually life sucks beyond ~7k */
 #define InterFrameGap  0x03    /* 3 means InterFrameGap = the shortest one */
 
@@ -2357,10 +2356,10 @@ static u16 rtl_rw_cpluscmd(void __iomem *ioaddr)
        return cmd;
 }
 
-static void rtl_set_rx_max_size(void __iomem *ioaddr)
+static void rtl_set_rx_max_size(void __iomem *ioaddr, unsigned int rx_buf_sz)
 {
        /* Low hurts. Let's disable the filtering. */
-       RTL_W16(RxMaxSize, 16383);
+       RTL_W16(RxMaxSize, rx_buf_sz);
 }
 
 static void rtl8169_set_magic_reg(void __iomem *ioaddr, unsigned mac_version)
@@ -2407,7 +2406,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
 
        RTL_W8(EarlyTxThres, EarlyTxThld);
 
-       rtl_set_rx_max_size(ioaddr);
+       rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
 
        if ((tp->mac_version == RTL_GIGA_MAC_VER_01) ||
            (tp->mac_version == RTL_GIGA_MAC_VER_02) ||
@@ -2668,7 +2667,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
        RTL_W8(EarlyTxThres, EarlyTxThld);
 
-       rtl_set_rx_max_size(ioaddr);
+       rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
 
        tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
 
@@ -2846,7 +2845,7 @@ static void rtl_hw_start_8101(struct net_device *dev)
 
        RTL_W8(EarlyTxThres, EarlyTxThld);
 
-       rtl_set_rx_max_size(ioaddr);
+       rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
 
        tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
 
index 73348c4..4a9cc92 100644 (file)
@@ -702,7 +702,7 @@ static unsigned int iosapic_startup_irq(unsigned int irq)
 }
 
 #ifdef CONFIG_SMP
-static void iosapic_set_affinity_irq(unsigned int irq,
+static int iosapic_set_affinity_irq(unsigned int irq,
                                     const struct cpumask *dest)
 {
        struct vector_info *vi = iosapic_get_vector(irq);
@@ -712,7 +712,7 @@ static void iosapic_set_affinity_irq(unsigned int irq,
 
        dest_cpu = cpu_check_affinity(irq, dest);
        if (dest_cpu < 0)
-               return;
+               return -1;
 
        cpumask_copy(irq_desc[irq].affinity, cpumask_of(dest_cpu));
        vi->txn_addr = txn_affinity_addr(irq, dest_cpu);
@@ -724,6 +724,8 @@ static void iosapic_set_affinity_irq(unsigned int irq,
        iosapic_set_irt_data(vi, &dummy_d0, &d1);
        iosapic_wr_irt_entry(vi, d0, d1);
        spin_unlock_irqrestore(&iosapic_lock, flags);
+
+       return 0;
 }
 #endif
 
index 4e63cc9..151bf5b 100644 (file)
@@ -1,5 +1,5 @@
 /* Low-level parallel-port routines for 8255-based PC-style hardware.
- * 
+ *
  * Authors: Phil Blundell <philb@gnu.org>
  *          Tim Waugh <tim@cyberelk.demon.co.uk>
  *         Jose Renau <renau@acm.org>
@@ -11,7 +11,7 @@
  * Cleaned up include files - Russell King <linux@arm.uk.linux.org>
  * DMA support - Bert De Jonghe <bert@sophis.be>
  * Many ECP bugs fixed.  Fred Barnes & Jamie Lokier, 1999
- * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G. 
+ * More PCI support now conditional on CONFIG_PCI, 03/2001, Paul G.
  * Various hacks, Fred Barnes, 04/2001
  * Updated probing logic - Adam Belay <ambx1@neo.rr.com>
  */
 #include <linux/pnp.h>
 #include <linux/platform_device.h>
 #include <linux/sysctl.h>
+#include <linux/io.h>
+#include <linux/uaccess.h>
 
-#include <asm/io.h>
 #include <asm/dma.h>
-#include <asm/uaccess.h>
 
 #include <linux/parport.h>
 #include <linux/parport_pc.h>
@@ -82,7 +82,7 @@
 #define ECR_TST 06
 #define ECR_CNF 07
 #define ECR_MODE_MASK 0xe0
-#define ECR_WRITE(p,v) frob_econtrol((p),0xff,(v))
+#define ECR_WRITE(p, v) frob_econtrol((p), 0xff, (v))
 
 #undef DEBUG
 
@@ -109,27 +109,27 @@ static int pci_registered_parport;
 static int pnp_registered_parport;
 
 /* frob_control, but for ECR */
-static void frob_econtrol (struct parport *pb, unsigned char m,
+static void frob_econtrol(struct parport *pb, unsigned char m,
                           unsigned char v)
 {
        unsigned char ectr = 0;
 
        if (m != 0xff)
-               ectr = inb (ECONTROL (pb));
+               ectr = inb(ECONTROL(pb));
 
-       DPRINTK (KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n",
+       DPRINTK(KERN_DEBUG "frob_econtrol(%02x,%02x): %02x -> %02x\n",
                m, v, ectr, (ectr & ~m) ^ v);
 
-       outb ((ectr & ~m) ^ v, ECONTROL (pb));
+       outb((ectr & ~m) ^ v, ECONTROL(pb));
 }
 
-static __inline__ void frob_set_mode (struct parport *p, int mode)
+static inline void frob_set_mode(struct parport *p, int mode)
 {
-       frob_econtrol (p, ECR_MODE_MASK, mode << 5);
+       frob_econtrol(p, ECR_MODE_MASK, mode << 5);
 }
 
 #ifdef CONFIG_PARPORT_PC_FIFO
-/* Safely change the mode bits in the ECR 
+/* Safely change the mode bits in the ECR
    Returns:
            0    : Success
           -EBUSY: Could not drain FIFO in some finite amount of time,
@@ -141,17 +141,18 @@ static int change_mode(struct parport *p, int m)
        unsigned char oecr;
        int mode;
 
-       DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n",m);
+       DPRINTK(KERN_INFO "parport change_mode ECP-ISA to mode 0x%02x\n", m);
 
        if (!priv->ecr) {
-               printk (KERN_DEBUG "change_mode: but there's no ECR!\n");
+               printk(KERN_DEBUG "change_mode: but there's no ECR!\n");
                return 0;
        }
 
        /* Bits <7:5> contain the mode. */
-       oecr = inb (ECONTROL (p));
+       oecr = inb(ECONTROL(p));
        mode = (oecr >> 5) & 0x7;
-       if (mode == m) return 0;
+       if (mode == m)
+               return 0;
 
        if (mode >= 2 && !(priv->ctr & 0x20)) {
                /* This mode resets the FIFO, so we may
@@ -163,19 +164,21 @@ static int change_mode(struct parport *p, int m)
                case ECR_ECP: /* ECP Parallel Port mode */
                        /* Busy wait for 200us */
                        for (counter = 0; counter < 40; counter++) {
-                               if (inb (ECONTROL (p)) & 0x01)
+                               if (inb(ECONTROL(p)) & 0x01)
+                                       break;
+                               if (signal_pending(current))
                                        break;
-                               if (signal_pending (current)) break;
-                               udelay (5);
+                               udelay(5);
                        }
 
                        /* Poll slowly. */
-                       while (!(inb (ECONTROL (p)) & 0x01)) {
-                               if (time_after_eq (jiffies, expire))
+                       while (!(inb(ECONTROL(p)) & 0x01)) {
+                               if (time_after_eq(jiffies, expire))
                                        /* The FIFO is stuck. */
                                        return -EBUSY;
-                               schedule_timeout_interruptible(msecs_to_jiffies(10));
-                               if (signal_pending (current))
+                               schedule_timeout_interruptible(
+                                                       msecs_to_jiffies(10));
+                               if (signal_pending(current))
                                        break;
                        }
                }
@@ -185,20 +188,20 @@ static int change_mode(struct parport *p, int m)
                /* We have to go through mode 001 */
                oecr &= ~(7 << 5);
                oecr |= ECR_PS2 << 5;
-               ECR_WRITE (p, oecr);
+               ECR_WRITE(p, oecr);
        }
 
        /* Set the mode. */
        oecr &= ~(7 << 5);
        oecr |= m << 5;
-       ECR_WRITE (p, oecr);
+       ECR_WRITE(p, oecr);
        return 0;
 }
 
 #ifdef CONFIG_PARPORT_1284
 /* Find FIFO lossage; FIFO is reset */
 #if 0
-static int get_fifo_residue (struct parport *p)
+static int get_fifo_residue(struct parport *p)
 {
        int residue;
        int cnfga;
@@ -206,26 +209,26 @@ static int get_fifo_residue (struct parport *p)
 
        /* Adjust for the contents of the FIFO. */
        for (residue = priv->fifo_depth; ; residue--) {
-               if (inb (ECONTROL (p)) & 0x2)
+               if (inb(ECONTROL(p)) & 0x2)
                                /* Full up. */
                        break;
 
-               outb (0, FIFO (p));
+               outb(0, FIFO(p));
        }
 
-       printk (KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name,
+       printk(KERN_DEBUG "%s: %d PWords were left in FIFO\n", p->name,
                residue);
 
        /* Reset the FIFO. */
-       frob_set_mode (p, ECR_PS2);
+       frob_set_mode(p, ECR_PS2);
 
        /* Now change to config mode and clean up. FIXME */
-       frob_set_mode (p, ECR_CNF);
-       cnfga = inb (CONFIGA (p));
-       printk (KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga);
+       frob_set_mode(p, ECR_CNF);
+       cnfga = inb(CONFIGA(p));
+       printk(KERN_DEBUG "%s: cnfgA contains 0x%02x\n", p->name, cnfga);
 
        if (!(cnfga & (1<<2))) {
-               printk (KERN_DEBUG "%s: Accounting for extra byte\n", p->name);
+               printk(KERN_DEBUG "%s: Accounting for extra byte\n", p->name);
                residue++;
        }
 
@@ -233,9 +236,11 @@ static int get_fifo_residue (struct parport *p)
         * PWord != 1 byte. */
 
        /* Back to PS2 mode. */
-       frob_set_mode (p, ECR_PS2);
+       frob_set_mode(p, ECR_PS2);
 
-       DPRINTK (KERN_DEBUG "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n", inb (ECONTROL (p)));
+       DPRINTK(KERN_DEBUG
+            "*** get_fifo_residue: done residue collecting (ecr = 0x%2.2x)\n",
+                                                       inb(ECONTROL(p)));
        return residue;
 }
 #endif  /*  0 */
@@ -257,8 +262,8 @@ static int clear_epp_timeout(struct parport *pb)
        /* To clear timeout some chips require double read */
        parport_pc_read_status(pb);
        r = parport_pc_read_status(pb);
-       outb (r | 0x01, STATUS (pb)); /* Some reset by writing 1 */
-       outb (r & 0xfe, STATUS (pb)); /* Others by writing 0 */
+       outb(r | 0x01, STATUS(pb)); /* Some reset by writing 1 */
+       outb(r & 0xfe, STATUS(pb)); /* Others by writing 0 */
        r = parport_pc_read_status(pb);
 
        return !(r & 0x01);
@@ -272,7 +277,8 @@ static int clear_epp_timeout(struct parport *pb)
  * of these are in parport_pc.h.
  */
 
-static void parport_pc_init_state(struct pardevice *dev, struct parport_state *s)
+static void parport_pc_init_state(struct pardevice *dev,
+                                               struct parport_state *s)
 {
        s->u.pc.ctr = 0xc;
        if (dev->irq_func &&
@@ -289,22 +295,23 @@ static void parport_pc_save_state(struct parport *p, struct parport_state *s)
        const struct parport_pc_private *priv = p->physport->private_data;
        s->u.pc.ctr = priv->ctr;
        if (priv->ecr)
-               s->u.pc.ecr = inb (ECONTROL (p));
+               s->u.pc.ecr = inb(ECONTROL(p));
 }
 
-static void parport_pc_restore_state(struct parport *p, struct parport_state *s)
+static void parport_pc_restore_state(struct parport *p,
+                                               struct parport_state *s)
 {
        struct parport_pc_private *priv = p->physport->private_data;
        register unsigned char c = s->u.pc.ctr & priv->ctr_writable;
-       outb (c, CONTROL (p));
+       outb(c, CONTROL(p));
        priv->ctr = c;
        if (priv->ecr)
-               ECR_WRITE (p, s->u.pc.ecr);
+               ECR_WRITE(p, s->u.pc.ecr);
 }
 
 #ifdef CONFIG_PARPORT_1284
-static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
-                                       size_t length, int flags)
+static size_t parport_pc_epp_read_data(struct parport *port, void *buf,
+                                      size_t length, int flags)
 {
        size_t got = 0;
 
@@ -316,54 +323,52 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
                 *  nFault is 0 if there is at least 1 byte in the Warp's FIFO
                 *  pError is 1 if there are 16 bytes in the Warp's FIFO
                 */
-               status = inb (STATUS (port));
+               status = inb(STATUS(port));
 
-               while (!(status & 0x08) && (got < length)) {
-                       if ((left >= 16) && (status & 0x20) && !(status & 0x08)) {
+               while (!(status & 0x08) && got < length) {
+                       if (left >= 16 && (status & 0x20) && !(status & 0x08)) {
                                /* can grab 16 bytes from warp fifo */
-                               if (!((long)buf & 0x03)) {
-                                       insl (EPPDATA (port), buf, 4);
-                               } else {
-                                       insb (EPPDATA (port), buf, 16);
-                               }
+                               if (!((long)buf & 0x03))
+                                       insl(EPPDATA(port), buf, 4);
+                               else
+                                       insb(EPPDATA(port), buf, 16);
                                buf += 16;
                                got += 16;
                                left -= 16;
                        } else {
                                /* grab single byte from the warp fifo */
-                               *((char *)buf) = inb (EPPDATA (port));
+                               *((char *)buf) = inb(EPPDATA(port));
                                buf++;
                                got++;
                                left--;
                        }
-                       status = inb (STATUS (port));
+                       status = inb(STATUS(port));
                        if (status & 0x01) {
                                /* EPP timeout should never occur... */
-                               printk (KERN_DEBUG "%s: EPP timeout occurred while talking to "
-                                       "w91284pic (should not have done)\n", port->name);
-                               clear_epp_timeout (port);
+                               printk(KERN_DEBUG
+"%s: EPP timeout occurred while talking to w91284pic (should not have done)\n", port->name);
+                               clear_epp_timeout(port);
                        }
                }
                return got;
        }
        if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-               if (!(((long)buf | length) & 0x03)) {
-                       insl (EPPDATA (port), buf, (length >> 2));
-               } else {
-                       insb (EPPDATA (port), buf, length);
-               }
-               if (inb (STATUS (port)) & 0x01) {
-                       clear_epp_timeout (port);
+               if (!(((long)buf | length) & 0x03))
+                       insl(EPPDATA(port), buf, (length >> 2));
+               else
+                       insb(EPPDATA(port), buf, length);
+               if (inb(STATUS(port)) & 0x01) {
+                       clear_epp_timeout(port);
                        return -EIO;
                }
                return length;
        }
        for (; got < length; got++) {
-               *((char*)buf) = inb (EPPDATA(port));
+               *((char *)buf) = inb(EPPDATA(port));
                buf++;
-               if (inb (STATUS (port)) & 0x01) {
+               if (inb(STATUS(port)) & 0x01) {
                        /* EPP timeout */
-                       clear_epp_timeout (port);
+                       clear_epp_timeout(port);
                        break;
                }
        }
@@ -371,28 +376,27 @@ static size_t parport_pc_epp_read_data (struct parport *port, void *buf,
        return got;
 }
 
-static size_t parport_pc_epp_write_data (struct parport *port, const void *buf,
-                                        size_t length, int flags)
+static size_t parport_pc_epp_write_data(struct parport *port, const void *buf,
+                                       size_t length, int flags)
 {
        size_t written = 0;
 
        if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-               if (!(((long)buf | length) & 0x03)) {
-                       outsl (EPPDATA (port), buf, (length >> 2));
-               } else {
-                       outsb (EPPDATA (port), buf, length);
-               }
-               if (inb (STATUS (port)) & 0x01) {
-                       clear_epp_timeout (port);
+               if (!(((long)buf | length) & 0x03))
+                       outsl(EPPDATA(port), buf, (length >> 2));
+               else
+                       outsb(EPPDATA(port), buf, length);
+               if (inb(STATUS(port)) & 0x01) {
+                       clear_epp_timeout(port);
                        return -EIO;
                }
                return length;
        }
        for (; written < length; written++) {
-               outb (*((char*)buf), EPPDATA(port));
+               outb(*((char *)buf), EPPDATA(port));
                buf++;
-               if (inb (STATUS(port)) & 0x01) {
-                       clear_epp_timeout (port);
+               if (inb(STATUS(port)) & 0x01) {
+                       clear_epp_timeout(port);
                        break;
                }
        }
@@ -400,24 +404,24 @@ static size_t parport_pc_epp_write_data (struct parport *port, const void *buf,
        return written;
 }
 
-static size_t parport_pc_epp_read_addr (struct parport *port, void *buf,
+static size_t parport_pc_epp_read_addr(struct parport *port, void *buf,
                                        size_t length, int flags)
 {
        size_t got = 0;
 
        if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-               insb (EPPADDR (port), buf, length);
-               if (inb (STATUS (port)) & 0x01) {
-                       clear_epp_timeout (port);
+               insb(EPPADDR(port), buf, length);
+               if (inb(STATUS(port)) & 0x01) {
+                       clear_epp_timeout(port);
                        return -EIO;
                }
                return length;
        }
        for (; got < length; got++) {
-               *((char*)buf) = inb (EPPADDR (port));
+               *((char *)buf) = inb(EPPADDR(port));
                buf++;
-               if (inb (STATUS (port)) & 0x01) {
-                       clear_epp_timeout (port);
+               if (inb(STATUS(port)) & 0x01) {
+                       clear_epp_timeout(port);
                        break;
                }
        }
@@ -425,25 +429,25 @@ static size_t parport_pc_epp_read_addr (struct parport *port, void *buf,
        return got;
 }
 
-static size_t parport_pc_epp_write_addr (struct parport *port,
+static size_t parport_pc_epp_write_addr(struct parport *port,
                                         const void *buf, size_t length,
                                         int flags)
 {
        size_t written = 0;
 
        if ((flags & PARPORT_EPP_FAST) && (length > 1)) {
-               outsb (EPPADDR (port), buf, length);
-               if (inb (STATUS (port)) & 0x01) {
-                       clear_epp_timeout (port);
+               outsb(EPPADDR(port), buf, length);
+               if (inb(STATUS(port)) & 0x01) {
+                       clear_epp_timeout(port);
                        return -EIO;
                }
                return length;
        }
        for (; written < length; written++) {
-               outb (*((char*)buf), EPPADDR (port));
+               outb(*((char *)buf), EPPADDR(port));
                buf++;
-               if (inb (STATUS (port)) & 0x01) {
-                       clear_epp_timeout (port);
+               if (inb(STATUS(port)) & 0x01) {
+                       clear_epp_timeout(port);
                        break;
                }
        }
@@ -451,74 +455,74 @@ static size_t parport_pc_epp_write_addr (struct parport *port,
        return written;
 }
 
-static size_t parport_pc_ecpepp_read_data (struct parport *port, void *buf,
-                                          size_t length, int flags)
+static size_t parport_pc_ecpepp_read_data(struct parport *port, void *buf,
+                                         size_t length, int flags)
 {
        size_t got;
 
-       frob_set_mode (port, ECR_EPP);
-       parport_pc_data_reverse (port);
-       parport_pc_write_control (port, 0x4);
-       got = parport_pc_epp_read_data (port, buf, length, flags);
-       frob_set_mode (port, ECR_PS2);
+       frob_set_mode(port, ECR_EPP);
+       parport_pc_data_reverse(port);
+       parport_pc_write_control(port, 0x4);
+       got = parport_pc_epp_read_data(port, buf, length, flags);
+       frob_set_mode(port, ECR_PS2);
 
        return got;
 }
 
-static size_t parport_pc_ecpepp_write_data (struct parport *port,
-                                           const void *buf, size_t length,
-                                           int flags)
+static size_t parport_pc_ecpepp_write_data(struct parport *port,
+                                          const void *buf, size_t length,
+                                          int flags)
 {
        size_t written;
 
-       frob_set_mode (port, ECR_EPP);
-       parport_pc_write_control (port, 0x4);
-       parport_pc_data_forward (port);
-       written = parport_pc_epp_write_data (port, buf, length, flags);
-       frob_set_mode (port, ECR_PS2);
+       frob_set_mode(port, ECR_EPP);
+       parport_pc_write_control(port, 0x4);
+       parport_pc_data_forward(port);
+       written = parport_pc_epp_write_data(port, buf, length, flags);
+       frob_set_mode(port, ECR_PS2);
 
        return written;
 }
 
-static size_t parport_pc_ecpepp_read_addr (struct parport *port, void *buf,
-                                          size_t length, int flags)
+static size_t parport_pc_ecpepp_read_addr(struct parport *port, void *buf,
+                                         size_t length, int flags)
 {
        size_t got;
 
-       frob_set_mode (port, ECR_EPP);
-       parport_pc_data_reverse (port);
-       parport_pc_write_control (port, 0x4);
-       got = parport_pc_epp_read_addr (port, buf, length, flags);
-       frob_set_mode (port, ECR_PS2);
+       frob_set_mode(port, ECR_EPP);
+       parport_pc_data_reverse(port);
+       parport_pc_write_control(port, 0x4);
+       got = parport_pc_epp_read_addr(port, buf, length, flags);
+       frob_set_mode(port, ECR_PS2);
 
        return got;
 }
 
-static size_t parport_pc_ecpepp_write_addr (struct parport *port,
+static size_t parport_pc_ecpepp_write_addr(struct parport *port,
                                            const void *buf, size_t length,
                                            int flags)
 {
        size_t written;
 
-       frob_set_mode (port, ECR_EPP);
-       parport_pc_write_control (port, 0x4);
-       parport_pc_data_forward (port);
-       written = parport_pc_epp_write_addr (port, buf, length, flags);
-       frob_set_mode (port, ECR_PS2);
+       frob_set_mode(port, ECR_EPP);
+       parport_pc_write_control(port, 0x4);
+       parport_pc_data_forward(port);
+       written = parport_pc_epp_write_addr(port, buf, length, flags);
+       frob_set_mode(port, ECR_PS2);
 
        return written;
 }
 #endif /* IEEE 1284 support */
 
 #ifdef CONFIG_PARPORT_PC_FIFO
-static size_t parport_pc_fifo_write_block_pio (struct parport *port,
+static size_t parport_pc_fifo_write_block_pio(struct parport *port,
                                               const void *buf, size_t length)
 {
        int ret = 0;
        const unsigned char *bufp = buf;
        size_t left = length;
        unsigned long expire = jiffies + port->physport->cad->timeout;
-       const int fifo = FIFO (port);
+       const int fifo = FIFO(port);
        int poll_for = 8; /* 80 usecs */
        const struct parport_pc_private *priv = port->physport->private_data;
        const int fifo_depth = priv->fifo_depth;
@@ -526,25 +530,25 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port,
        port = port->physport;
 
        /* We don't want to be interrupted every character. */
-       parport_pc_disable_irq (port);
+       parport_pc_disable_irq(port);
        /* set nErrIntrEn and serviceIntr */
-       frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2));
+       frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
 
        /* Forward mode. */
-       parport_pc_data_forward (port); /* Must be in PS2 mode */
+       parport_pc_data_forward(port); /* Must be in PS2 mode */
 
        while (left) {
                unsigned char byte;
-               unsigned char ecrval = inb (ECONTROL (port));
+               unsigned char ecrval = inb(ECONTROL(port));
                int i = 0;
 
-               if (need_resched() && time_before (jiffies, expire))
+               if (need_resched() && time_before(jiffies, expire))
                        /* Can't yield the port. */
-                       schedule ();
+                       schedule();
 
                /* Anyone else waiting for the port? */
                if (port->waithead) {
-                       printk (KERN_DEBUG "Somebody wants the port\n");
+                       printk(KERN_DEBUG "Somebody wants the port\n");
                        break;
                }
 
@@ -552,21 +556,22 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port,
                        /* FIFO is full. Wait for interrupt. */
 
                        /* Clear serviceIntr */
-                       ECR_WRITE (port, ecrval & ~(1<<2));
-               false_alarm:
-                       ret = parport_wait_event (port, HZ);
-                       if (ret < 0) break;
+                       ECR_WRITE(port, ecrval & ~(1<<2));
+false_alarm:
+                       ret = parport_wait_event(port, HZ);
+                       if (ret < 0)
+                               break;
                        ret = 0;
-                       if (!time_before (jiffies, expire)) {
+                       if (!time_before(jiffies, expire)) {
                                /* Timed out. */
-                               printk (KERN_DEBUG "FIFO write timed out\n");
+                               printk(KERN_DEBUG "FIFO write timed out\n");
                                break;
                        }
-                       ecrval = inb (ECONTROL (port));
+                       ecrval = inb(ECONTROL(port));
                        if (!(ecrval & (1<<2))) {
                                if (need_resched() &&
-                                   time_before (jiffies, expire))
-                                       schedule ();
+                                   time_before(jiffies, expire))
+                                       schedule();
 
                                goto false_alarm;
                        }
@@ -577,38 +582,38 @@ static size_t parport_pc_fifo_write_block_pio (struct parport *port,
                /* Can't fail now. */
                expire = jiffies + port->cad->timeout;
 
-       poll:
-               if (signal_pending (current))
+poll:
+               if (signal_pending(current))
                        break;
 
                if (ecrval & 0x01) {
                        /* FIFO is empty. Blast it full. */
                        const int n = left < fifo_depth ? left : fifo_depth;
-                       outsb (fifo, bufp, n);
+                       outsb(fifo, bufp, n);
                        bufp += n;
                        left -= n;
 
                        /* Adjust the poll time. */
-                       if (i < (poll_for - 2)) poll_for--;
+                       if (i < (poll_for - 2))
+                               poll_for--;
                        continue;
                } else if (i++ < poll_for) {
-                       udelay (10);
-                       ecrval = inb (ECONTROL (port));
+                       udelay(10);
+                       ecrval = inb(ECONTROL(port));
                        goto poll;
                }
 
-               /* Half-full (call me an optimist) */
+               /* Half-full(call me an optimist) */
                byte = *bufp++;
-               outb (byte, fifo);
+               outb(byte, fifo);
                left--;
-        }
-
-dump_parport_state ("leave fifo_write_block_pio", port);
+       }
+       dump_parport_state("leave fifo_write_block_pio", port);
        return length - left;
 }
 
 #ifdef HAS_DMA
-static size_t parport_pc_fifo_write_block_dma (struct parport *port,
+static size_t parport_pc_fifo_write_block_dma(struct parport *port,
                                               const void *buf, size_t length)
 {
        int ret = 0;
@@ -621,7 +626,7 @@ static size_t parport_pc_fifo_write_block_dma (struct parport *port,
        unsigned long start = (unsigned long) buf;
        unsigned long end = (unsigned long) buf + length - 1;
 
-dump_parport_state ("enter fifo_write_block_dma", port);
+       dump_parport_state("enter fifo_write_block_dma", port);
        if (end < MAX_DMA_ADDRESS) {
                /* If it would cross a 64k boundary, cap it at the end. */
                if ((start ^ end) & ~0xffffUL)
@@ -629,8 +634,9 @@ dump_parport_state ("enter fifo_write_block_dma", port);
 
                dma_addr = dma_handle = dma_map_single(dev, (void *)buf, length,
                                                       DMA_TO_DEVICE);
-        } else {
-               /* above 16 MB we use a bounce buffer as ISA-DMA is not possible */
+       } else {
+               /* above 16 MB we use a bounce buffer as ISA-DMA
+                  is not possible */
                maxlen   = PAGE_SIZE;          /* sizeof(priv->dma_buf) */
                dma_addr = priv->dma_handle;
                dma_handle = 0;
@@ -639,12 +645,12 @@ dump_parport_state ("enter fifo_write_block_dma", port);
        port = port->physport;
 
        /* We don't want to be interrupted every character. */
-       parport_pc_disable_irq (port);
+       parport_pc_disable_irq(port);
        /* set nErrIntrEn and serviceIntr */
-       frob_econtrol (port, (1<<4) | (1<<2), (1<<4) | (1<<2));
+       frob_econtrol(port, (1<<4) | (1<<2), (1<<4) | (1<<2));
 
        /* Forward mode. */
-       parport_pc_data_forward (port); /* Must be in PS2 mode */
+       parport_pc_data_forward(port); /* Must be in PS2 mode */
 
        while (left) {
                unsigned long expire = jiffies + port->physport->cad->timeout;
@@ -665,10 +671,10 @@ dump_parport_state ("enter fifo_write_block_dma", port);
                set_dma_count(port->dma, count);
 
                /* Set DMA mode */
-               frob_econtrol (port, 1<<3, 1<<3);
+               frob_econtrol(port, 1<<3, 1<<3);
 
                /* Clear serviceIntr */
-               frob_econtrol (port, 1<<2, 0);
+               frob_econtrol(port, 1<<2, 0);
 
                enable_dma(port->dma);
                release_dma_lock(dmaflag);
@@ -676,20 +682,22 @@ dump_parport_state ("enter fifo_write_block_dma", port);
                /* assume DMA will be successful */
                left -= count;
                buf  += count;
-               if (dma_handle) dma_addr += count;
+               if (dma_handle)
+                       dma_addr += count;
 
                /* Wait for interrupt. */
-       false_alarm:
-               ret = parport_wait_event (port, HZ);
-               if (ret < 0) break;
+false_alarm:
+               ret = parport_wait_event(port, HZ);
+               if (ret < 0)
+                       break;
                ret = 0;
-               if (!time_before (jiffies, expire)) {
+               if (!time_before(jiffies, expire)) {
                        /* Timed out. */
-                       printk (KERN_DEBUG "DMA write timed out\n");
+                       printk(KERN_DEBUG "DMA write timed out\n");
                        break;
                }
                /* Is serviceIntr set? */
-               if (!(inb (ECONTROL (port)) & (1<<2))) {
+               if (!(inb(ECONTROL(port)) & (1<<2))) {
                        cond_resched();
 
                        goto false_alarm;
@@ -705,14 +713,15 @@ dump_parport_state ("enter fifo_write_block_dma", port);
 
                /* Anyone else waiting for the port? */
                if (port->waithead) {
-                       printk (KERN_DEBUG "Somebody wants the port\n");
+                       printk(KERN_DEBUG "Somebody wants the port\n");
                        break;
                }
 
                /* update for possible DMA residue ! */
                buf  -= count;
                left += count;
-               if (dma_handle) dma_addr -= count;
+               if (dma_handle)
+                       dma_addr -= count;
        }
 
        /* Maybe got here through break, so adjust for DMA residue! */
@@ -723,12 +732,12 @@ dump_parport_state ("enter fifo_write_block_dma", port);
        release_dma_lock(dmaflag);
 
        /* Turn off DMA mode */
-       frob_econtrol (port, 1<<3, 0);
+       frob_econtrol(port, 1<<3, 0);
 
        if (dma_handle)
                dma_unmap_single(dev, dma_handle, length, DMA_TO_DEVICE);
 
-dump_parport_state ("leave fifo_write_block_dma", port);
+       dump_parport_state("leave fifo_write_block_dma", port);
        return length - left;
 }
 #endif
@@ -738,13 +747,13 @@ static inline size_t parport_pc_fifo_write_block(struct parport *port,
 {
 #ifdef HAS_DMA
        if (port->dma != PARPORT_DMA_NONE)
-               return parport_pc_fifo_write_block_dma (port, buf, length);
+               return parport_pc_fifo_write_block_dma(port, buf, length);
 #endif
-       return parport_pc_fifo_write_block_pio (port, buf, length);
+       return parport_pc_fifo_write_block_pio(port, buf, length);
 }
 
 /* Parallel Port FIFO mode (ECP chipsets) */
-static size_t parport_pc_compat_write_block_pio (struct parport *port,
+static size_t parport_pc_compat_write_block_pio(struct parport *port,
                                                 const void *buf, size_t length,
                                                 int flags)
 {
@@ -756,14 +765,16 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port,
        /* Special case: a timeout of zero means we cannot call schedule().
         * Also if O_NONBLOCK is set then use the default implementation. */
        if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-               return parport_ieee1284_write_compat (port, buf,
+               return parport_ieee1284_write_compat(port, buf,
                                                      length, flags);
 
        /* Set up parallel port FIFO mode.*/
-       parport_pc_data_forward (port); /* Must be in PS2 mode */
-       parport_pc_frob_control (port, PARPORT_CONTROL_STROBE, 0);
-       r = change_mode (port, ECR_PPF); /* Parallel port FIFO */
-       if (r)  printk (KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n", port->name);
+       parport_pc_data_forward(port); /* Must be in PS2 mode */
+       parport_pc_frob_control(port, PARPORT_CONTROL_STROBE, 0);
+       r = change_mode(port, ECR_PPF); /* Parallel port FIFO */
+       if (r)
+               printk(KERN_DEBUG "%s: Warning change_mode ECR_PPF failed\n",
+                                                               port->name);
 
        port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
 
@@ -775,40 +786,39 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port,
         * the FIFO is empty, so allow 4 seconds for each position
         * in the fifo.
         */
-        expire = jiffies + (priv->fifo_depth * HZ * 4);
+       expire = jiffies + (priv->fifo_depth * HZ * 4);
        do {
                /* Wait for the FIFO to empty */
-               r = change_mode (port, ECR_PS2);
-               if (r != -EBUSY) {
+               r = change_mode(port, ECR_PS2);
+               if (r != -EBUSY)
                        break;
-               }
-       } while (time_before (jiffies, expire));
+       } while (time_before(jiffies, expire));
        if (r == -EBUSY) {
 
-               printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name);
+               printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
 
                /* Prevent further data transfer. */
-               frob_set_mode (port, ECR_TST);
+               frob_set_mode(port, ECR_TST);
 
                /* Adjust for the contents of the FIFO. */
                for (written -= priv->fifo_depth; ; written++) {
-                       if (inb (ECONTROL (port)) & 0x2) {
+                       if (inb(ECONTROL(port)) & 0x2) {
                                /* Full up. */
                                break;
                        }
-                       outb (0, FIFO (port));
+                       outb(0, FIFO(port));
                }
 
                /* Reset the FIFO and return to PS2 mode. */
-               frob_set_mode (port, ECR_PS2);
+               frob_set_mode(port, ECR_PS2);
        }
 
-       r = parport_wait_peripheral (port,
+       r = parport_wait_peripheral(port,
                                     PARPORT_STATUS_BUSY,
                                     PARPORT_STATUS_BUSY);
        if (r)
-               printk (KERN_DEBUG
-                       "%s: BUSY timeout (%d) in compat_write_block_pio\n", 
+               printk(KERN_DEBUG
+                       "%s: BUSY timeout (%d) in compat_write_block_pio\n",
                        port->name, r);
 
        port->physport->ieee1284.phase = IEEE1284_PH_FWD_IDLE;
@@ -818,7 +828,7 @@ static size_t parport_pc_compat_write_block_pio (struct parport *port,
 
 /* ECP */
 #ifdef CONFIG_PARPORT_1284
-static size_t parport_pc_ecp_write_block_pio (struct parport *port,
+static size_t parport_pc_ecp_write_block_pio(struct parport *port,
                                              const void *buf, size_t length,
                                              int flags)
 {
@@ -830,36 +840,38 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port,
        /* Special case: a timeout of zero means we cannot call schedule().
         * Also if O_NONBLOCK is set then use the default implementation. */
        if (port->physport->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-               return parport_ieee1284_ecp_write_data (port, buf,
+               return parport_ieee1284_ecp_write_data(port, buf,
                                                        length, flags);
 
        /* Switch to forward mode if necessary. */
        if (port->physport->ieee1284.phase != IEEE1284_PH_FWD_IDLE) {
                /* Event 47: Set nInit high. */
-               parport_frob_control (port,
+               parport_frob_control(port,
                                      PARPORT_CONTROL_INIT
                                      | PARPORT_CONTROL_AUTOFD,
                                      PARPORT_CONTROL_INIT
                                      | PARPORT_CONTROL_AUTOFD);
 
                /* Event 49: PError goes high. */
-               r = parport_wait_peripheral (port,
+               r = parport_wait_peripheral(port,
                                             PARPORT_STATUS_PAPEROUT,
                                             PARPORT_STATUS_PAPEROUT);
                if (r) {
-                       printk (KERN_DEBUG "%s: PError timeout (%d) "
+                       printk(KERN_DEBUG "%s: PError timeout (%d) "
                                "in ecp_write_block_pio\n", port->name, r);
                }
        }
 
        /* Set up ECP parallel port mode.*/
-       parport_pc_data_forward (port); /* Must be in PS2 mode */
-       parport_pc_frob_control (port,
+       parport_pc_data_forward(port); /* Must be in PS2 mode */
+       parport_pc_frob_control(port,
                                 PARPORT_CONTROL_STROBE |
                                 PARPORT_CONTROL_AUTOFD,
                                 0);
-       r = change_mode (port, ECR_ECP); /* ECP FIFO */
-       if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name);
+       r = change_mode(port, ECR_ECP); /* ECP FIFO */
+       if (r)
+               printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
+                                                               port->name);
        port->physport->ieee1284.phase = IEEE1284_PH_FWD_DATA;
 
        /* Write the data to the FIFO. */
@@ -873,55 +885,54 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port,
        expire = jiffies + (priv->fifo_depth * (HZ * 4));
        do {
                /* Wait for the FIFO to empty */
-               r = change_mode (port, ECR_PS2);
-               if (r != -EBUSY) {
+               r = change_mode(port, ECR_PS2);
+               if (r != -EBUSY)
                        break;
-               }
-       } while (time_before (jiffies, expire));
+       } while (time_before(jiffies, expire));
        if (r == -EBUSY) {
 
-               printk (KERN_DEBUG "%s: FIFO is stuck\n", port->name);
+               printk(KERN_DEBUG "%s: FIFO is stuck\n", port->name);
 
                /* Prevent further data transfer. */
-               frob_set_mode (port, ECR_TST);
+               frob_set_mode(port, ECR_TST);
 
                /* Adjust for the contents of the FIFO. */
                for (written -= priv->fifo_depth; ; written++) {
-                       if (inb (ECONTROL (port)) & 0x2) {
+                       if (inb(ECONTROL(port)) & 0x2) {
                                /* Full up. */
                                break;
                        }
-                       outb (0, FIFO (port));
+                       outb(0, FIFO(port));
                }
 
                /* Reset the FIFO and return to PS2 mode. */
-               frob_set_mode (port, ECR_PS2);
+               frob_set_mode(port, ECR_PS2);
 
                /* Host transfer recovery. */
-               parport_pc_data_reverse (port); /* Must be in PS2 mode */
-               udelay (5);
-               parport_frob_control (port, PARPORT_CONTROL_INIT, 0);
-               r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0);
+               parport_pc_data_reverse(port); /* Must be in PS2 mode */
+               udelay(5);
+               parport_frob_control(port, PARPORT_CONTROL_INIT, 0);
+               r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
                if (r)
-                       printk (KERN_DEBUG "%s: PE,1 timeout (%d) "
+                       printk(KERN_DEBUG "%s: PE,1 timeout (%d) "
                                "in ecp_write_block_pio\n", port->name, r);
 
-               parport_frob_control (port,
+               parport_frob_control(port,
                                      PARPORT_CONTROL_INIT,
                                      PARPORT_CONTROL_INIT);
-               r = parport_wait_peripheral (port,
+               r = parport_wait_peripheral(port,
                                             PARPORT_STATUS_PAPEROUT,
                                             PARPORT_STATUS_PAPEROUT);
-                if (r)
-                        printk (KERN_DEBUG "%s: PE,2 timeout (%d) "
+               if (r)
+                       printk(KERN_DEBUG "%s: PE,2 timeout (%d) "
                                "in ecp_write_block_pio\n", port->name, r);
        }
 
-       r = parport_wait_peripheral (port,
-                                    PARPORT_STATUS_BUSY, 
+       r = parport_wait_peripheral(port,
+                                    PARPORT_STATUS_BUSY,
                                     PARPORT_STATUS_BUSY);
-       if(r)
-               printk (KERN_DEBUG
+       if (r)
+               printk(KERN_DEBUG
                        "%s: BUSY timeout (%d) in ecp_write_block_pio\n",
                        port->name, r);
 
@@ -931,7 +942,7 @@ static size_t parport_pc_ecp_write_block_pio (struct parport *port,
 }
 
 #if 0
-static size_t parport_pc_ecp_read_block_pio (struct parport *port,
+static size_t parport_pc_ecp_read_block_pio(struct parport *port,
                                             void *buf, size_t length,
                                             int flags)
 {
@@ -944,13 +955,13 @@ static size_t parport_pc_ecp_read_block_pio (struct parport *port,
        char *bufp = buf;
 
        port = port->physport;
-DPRINTK (KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
-dump_parport_state ("enter fcn", port);
+       DPRINTK(KERN_DEBUG "parport_pc: parport_pc_ecp_read_block_pio\n");
+       dump_parport_state("enter fcn", port);
 
        /* Special case: a timeout of zero means we cannot call schedule().
         * Also if O_NONBLOCK is set then use the default implementation. */
        if (port->cad->timeout <= PARPORT_INACTIVITY_O_NONBLOCK)
-               return parport_ieee1284_ecp_read_data (port, buf,
+               return parport_ieee1284_ecp_read_data(port, buf,
                                                       length, flags);
 
        if (port->ieee1284.mode == IEEE1284_MODE_ECPRLE) {
@@ -966,173 +977,178 @@ dump_parport_state ("enter fcn", port);
         * go through software emulation.  Otherwise we may have to throw
         * away data. */
        if (length < fifofull)
-               return parport_ieee1284_ecp_read_data (port, buf,
+               return parport_ieee1284_ecp_read_data(port, buf,
                                                       length, flags);
 
        if (port->ieee1284.phase != IEEE1284_PH_REV_IDLE) {
                /* change to reverse-idle phase (must be in forward-idle) */
 
                /* Event 38: Set nAutoFd low (also make sure nStrobe is high) */
-               parport_frob_control (port,
+               parport_frob_control(port,
                                      PARPORT_CONTROL_AUTOFD
                                      | PARPORT_CONTROL_STROBE,
                                      PARPORT_CONTROL_AUTOFD);
-               parport_pc_data_reverse (port); /* Must be in PS2 mode */
-               udelay (5);
+               parport_pc_data_reverse(port); /* Must be in PS2 mode */
+               udelay(5);
                /* Event 39: Set nInit low to initiate bus reversal */
-               parport_frob_control (port,
+               parport_frob_control(port,
                                      PARPORT_CONTROL_INIT,
                                      0);
                /* Event 40: Wait for  nAckReverse (PError) to go low */
-               r = parport_wait_peripheral (port, PARPORT_STATUS_PAPEROUT, 0);
-                if (r) {
-                        printk (KERN_DEBUG "%s: PE timeout Event 40 (%d) "
+               r = parport_wait_peripheral(port, PARPORT_STATUS_PAPEROUT, 0);
+               if (r) {
+                       printk(KERN_DEBUG "%s: PE timeout Event 40 (%d) "
                                "in ecp_read_block_pio\n", port->name, r);
                        return 0;
                }
        }
 
        /* Set up ECP FIFO mode.*/
-/*     parport_pc_frob_control (port,
+/*     parport_pc_frob_control(port,
                                 PARPORT_CONTROL_STROBE |
                                 PARPORT_CONTROL_AUTOFD,
                                 PARPORT_CONTROL_AUTOFD); */
-       r = change_mode (port, ECR_ECP); /* ECP FIFO */
-       if (r) printk (KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n", port->name);
+       r = change_mode(port, ECR_ECP); /* ECP FIFO */
+       if (r)
+               printk(KERN_DEBUG "%s: Warning change_mode ECR_ECP failed\n",
+                                                               port->name);
 
        port->ieee1284.phase = IEEE1284_PH_REV_DATA;
 
        /* the first byte must be collected manually */
-dump_parport_state ("pre 43", port);
+       dump_parport_state("pre 43", port);
        /* Event 43: Wait for nAck to go low */
-       r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, 0);
+       r = parport_wait_peripheral(port, PARPORT_STATUS_ACK, 0);
        if (r) {
                /* timed out while reading -- no data */
-               printk (KERN_DEBUG "PIO read timed out (initial byte)\n");
+               printk(KERN_DEBUG "PIO read timed out (initial byte)\n");
                goto out_no_data;
        }
        /* read byte */
-       *bufp++ = inb (DATA (port));
+       *bufp++ = inb(DATA(port));
        left--;
-dump_parport_state ("43-44", port);
+       dump_parport_state("43-44", port);
        /* Event 44: nAutoFd (HostAck) goes high to acknowledge */
-       parport_pc_frob_control (port,
+       parport_pc_frob_control(port,
                                 PARPORT_CONTROL_AUTOFD,
                                 0);
-dump_parport_state ("pre 45", port);
+       dump_parport_state("pre 45", port);
        /* Event 45: Wait for nAck to go high */
-/*     r = parport_wait_peripheral (port, PARPORT_STATUS_ACK, PARPORT_STATUS_ACK); */
-dump_parport_state ("post 45", port);
-r = 0;
+       /* r = parport_wait_peripheral(port, PARPORT_STATUS_ACK,
+                                               PARPORT_STATUS_ACK); */
+       dump_parport_state("post 45", port);
+       r = 0;
        if (r) {
                /* timed out while waiting for peripheral to respond to ack */
-               printk (KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n");
+               printk(KERN_DEBUG "ECP PIO read timed out (waiting for nAck)\n");
 
                /* keep hold of the byte we've got already */
                goto out_no_data;
        }
        /* Event 46: nAutoFd (HostAck) goes low to accept more data */
-       parport_pc_frob_control (port,
+       parport_pc_frob_control(port,
                                 PARPORT_CONTROL_AUTOFD,
                                 PARPORT_CONTROL_AUTOFD);
 
 
-dump_parport_state ("rev idle", port);
+       dump_parport_state("rev idle", port);
        /* Do the transfer. */
        while (left > fifofull) {
                int ret;
                unsigned long expire = jiffies + port->cad->timeout;
-               unsigned char ecrval = inb (ECONTROL (port));
+               unsigned char ecrval = inb(ECONTROL(port));
 
-               if (need_resched() && time_before (jiffies, expire))
+               if (need_resched() && time_before(jiffies, expire))
                        /* Can't yield the port. */
-                       schedule ();
+                       schedule();
 
                /* At this point, the FIFO may already be full. In
-                 * that case ECP is already holding back the
-                 * peripheral (assuming proper design) with a delayed
-                 * handshake.  Work fast to avoid a peripheral
-                 * timeout.  */
+                * that case ECP is already holding back the
+                * peripheral (assuming proper design) with a delayed
+                * handshake.  Work fast to avoid a peripheral
+                * timeout.  */
 
                if (ecrval & 0x01) {
                        /* FIFO is empty. Wait for interrupt. */
-dump_parport_state ("FIFO empty", port);
+                       dump_parport_state("FIFO empty", port);
 
                        /* Anyone else waiting for the port? */
                        if (port->waithead) {
-                               printk (KERN_DEBUG "Somebody wants the port\n");
+                               printk(KERN_DEBUG "Somebody wants the port\n");
                                break;
                        }
 
                        /* Clear serviceIntr */
-                       ECR_WRITE (port, ecrval & ~(1<<2));
-               false_alarm:
-dump_parport_state ("waiting", port);
-                       ret = parport_wait_event (port, HZ);
-DPRINTK (KERN_DEBUG "parport_wait_event returned %d\n", ret);
+                       ECR_WRITE(port, ecrval & ~(1<<2));
+false_alarm:
+                       dump_parport_state("waiting", port);
+                       ret = parport_wait_event(port, HZ);
+                       DPRINTK(KERN_DEBUG "parport_wait_event returned %d\n",
+                                                                       ret);
                        if (ret < 0)
                                break;
                        ret = 0;
-                       if (!time_before (jiffies, expire)) {
+                       if (!time_before(jiffies, expire)) {
                                /* Timed out. */
-dump_parport_state ("timeout", port);
-                               printk (KERN_DEBUG "PIO read timed out\n");
+                               dump_parport_state("timeout", port);
+                               printk(KERN_DEBUG "PIO read timed out\n");
                                break;
                        }
-                       ecrval = inb (ECONTROL (port));
+                       ecrval = inb(ECONTROL(port));
                        if (!(ecrval & (1<<2))) {
                                if (need_resched() &&
-                                   time_before (jiffies, expire)) {
-                                       schedule ();
+                                   time_before(jiffies, expire)) {
+                                       schedule();
                                }
                                goto false_alarm;
                        }
 
                        /* Depending on how the FIFO threshold was
-                         * set, how long interrupt service took, and
-                         * how fast the peripheral is, we might be
-                         * lucky and have a just filled FIFO. */
+                        * set, how long interrupt service took, and
+                        * how fast the peripheral is, we might be
+                        * lucky and have a just filled FIFO. */
                        continue;
                }
 
                if (ecrval & 0x02) {
                        /* FIFO is full. */
-dump_parport_state ("FIFO full", port);
-                       insb (fifo, bufp, fifo_depth);
+                       dump_parport_state("FIFO full", port);
+                       insb(fifo, bufp, fifo_depth);
                        bufp += fifo_depth;
                        left -= fifo_depth;
                        continue;
                }
 
-DPRINTK (KERN_DEBUG "*** ecp_read_block_pio: reading one byte from the FIFO\n");
+               DPRINTK(KERN_DEBUG
+                 "*** ecp_read_block_pio: reading one byte from the FIFO\n");
 
                /* FIFO not filled.  We will cycle this loop for a while
-                 * and either the peripheral will fill it faster,
-                 * tripping a fast empty with insb, or we empty it. */
-               *bufp++ = inb (fifo);
+                * and either the peripheral will fill it faster,
+                * tripping a fast empty with insb, or we empty it. */
+               *bufp++ = inb(fifo);
                left--;
        }
 
        /* scoop up anything left in the FIFO */
-       while (left && !(inb (ECONTROL (port) & 0x01))) {
-               *bufp++ = inb (fifo);
+       while (left && !(inb(ECONTROL(port) & 0x01))) {
+               *bufp++ = inb(fifo);
                left--;
        }
 
        port->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-dump_parport_state ("rev idle2", port);
+       dump_parport_state("rev idle2", port);
 
 out_no_data:
 
        /* Go to forward idle mode to shut the peripheral up (event 47). */
-       parport_frob_control (port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT);
+       parport_frob_control(port, PARPORT_CONTROL_INIT, PARPORT_CONTROL_INIT);
 
        /* event 49: PError goes high */
-       r = parport_wait_peripheral (port,
+       r = parport_wait_peripheral(port,
                                     PARPORT_STATUS_PAPEROUT,
                                     PARPORT_STATUS_PAPEROUT);
        if (r) {
-               printk (KERN_DEBUG
+               printk(KERN_DEBUG
                        "%s: PE timeout FWDIDLE (%d) in ecp_read_block_pio\n",
                        port->name, r);
        }
@@ -1141,14 +1157,14 @@ out_no_data:
 
        /* Finish up. */
        {
-               int lost = get_fifo_residue (port);
+               int lost = get_fifo_residue(port);
                if (lost)
                        /* Shouldn't happen with compliant peripherals. */
-                       printk (KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n",
+                       printk(KERN_DEBUG "%s: DATA LOSS (%d bytes)!\n",
                                port->name, lost);
        }
 
-dump_parport_state ("fwd idle", port);
+       dump_parport_state("fwd idle", port);
        return length - left;
 }
 #endif  /*  0  */
@@ -1164,8 +1180,7 @@ dump_parport_state ("fwd idle", port);
 
 /* GCC is not inlining extern inline function later overwriten to non-inline,
    so we use outlined_ variants here.  */
-static const struct parport_operations parport_pc_ops =
-{
+static const struct parport_operations parport_pc_ops = {
        .write_data     = parport_pc_write_data,
        .read_data      = parport_pc_read_data,
 
@@ -1202,88 +1217,107 @@ static const struct parport_operations parport_pc_ops =
 };
 
 #ifdef CONFIG_PARPORT_PC_SUPERIO
+
+static struct superio_struct *find_free_superio(void)
+{
+       int i;
+       for (i = 0; i < NR_SUPERIOS; i++)
+               if (superios[i].io == 0)
+                       return &superios[i];
+       return NULL;
+}
+
+
 /* Super-IO chipset detection, Winbond, SMSC */
 static void __devinit show_parconfig_smsc37c669(int io, int key)
 {
-       int cr1,cr4,cra,cr23,cr26,cr27,i=0;
-       static const char *const modes[]={
+       int cr1, cr4, cra, cr23, cr26, cr27;
+       struct superio_struct *s;
+
+       static const char *const modes[] = {
                "SPP and Bidirectional (PS/2)",
                "EPP and SPP",
                "ECP",
                "ECP and EPP" };
 
-       outb(key,io);
-       outb(key,io);
-       outb(1,io);
-       cr1=inb(io+1);
-       outb(4,io);
-       cr4=inb(io+1);
-       outb(0x0a,io);
-       cra=inb(io+1);
-       outb(0x23,io);
-       cr23=inb(io+1);
-       outb(0x26,io);
-       cr26=inb(io+1);
-       outb(0x27,io);
-       cr27=inb(io+1);
-       outb(0xaa,io);
+       outb(key, io);
+       outb(key, io);
+       outb(1, io);
+       cr1 = inb(io + 1);
+       outb(4, io);
+       cr4 = inb(io + 1);
+       outb(0x0a, io);
+       cra = inb(io + 1);
+       outb(0x23, io);
+       cr23 = inb(io + 1);
+       outb(0x26, io);
+       cr26 = inb(io + 1);
+       outb(0x27, io);
+       cr27 = inb(io + 1);
+       outb(0xaa, io);
 
        if (verbose_probing) {
-               printk (KERN_INFO "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, "
+               printk(KERN_INFO
+                       "SMSC 37c669 LPT Config: cr_1=0x%02x, 4=0x%02x, "
                        "A=0x%2x, 23=0x%02x, 26=0x%02x, 27=0x%02x\n",
-                       cr1,cr4,cra,cr23,cr26,cr27);
-               
+                       cr1, cr4, cra, cr23, cr26, cr27);
+
                /* The documentation calls DMA and IRQ-Lines by letters, so
                   the board maker can/will wire them
                   appropriately/randomly...  G=reserved H=IDE-irq, */
-               printk (KERN_INFO "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, "
-                       "fifo threshold=%d\n", cr23*4,
-                       (cr27 &0x0f) ? 'A'-1+(cr27 &0x0f): '-',
-                       (cr26 &0x0f) ? 'A'-1+(cr26 &0x0f): '-', cra & 0x0f);
+               printk(KERN_INFO
+       "SMSC LPT Config: io=0x%04x, irq=%c, dma=%c, fifo threshold=%d\n",
+                               cr23 * 4,
+                               (cr27 & 0x0f) ? 'A' - 1 + (cr27 & 0x0f) : '-',
+                               (cr26 & 0x0f) ? 'A' - 1 + (cr26 & 0x0f) : '-',
+                               cra & 0x0f);
                printk(KERN_INFO "SMSC LPT Config: enabled=%s power=%s\n",
-                      (cr23*4 >=0x100) ?"yes":"no", (cr1 & 4) ? "yes" : "no");
-               printk(KERN_INFO "SMSC LPT Config: Port mode=%s, EPP version =%s\n",
-                      (cr1 & 0x08 ) ? "Standard mode only (SPP)" : modes[cr4 & 0x03], 
-                      (cr4 & 0x40) ? "1.7" : "1.9");
+                      (cr23 * 4 >= 0x100) ? "yes" : "no",
+                      (cr1 & 4) ? "yes" : "no");
+               printk(KERN_INFO
+                       "SMSC LPT Config: Port mode=%s, EPP version =%s\n",
+                               (cr1 & 0x08) ? "Standard mode only (SPP)"
+                                             : modes[cr4 & 0x03],
+                               (cr4 & 0x40) ? "1.7" : "1.9");
        }
-               
+
        /* Heuristics !  BIOS setup for this mainboard device limits
           the choices to standard settings, i.e. io-address and IRQ
           are related, however DMA can be 1 or 3, assume DMA_A=DMA1,
           DMA_C=DMA3 (this is true e.g. for TYAN 1564D Tomcat IV) */
-       if(cr23*4 >=0x100) { /* if active */
-               while((superios[i].io!= 0) && (i<NR_SUPERIOS))
-                       i++;
-               if(i==NR_SUPERIOS)
+       if (cr23 * 4 >= 0x100) { /* if active */
+               s = find_free_superio();
+               if (s == NULL)
                        printk(KERN_INFO "Super-IO: too many chips!\n");
                else {
                        int d;
-                       switch (cr23*4) {
-                               case 0x3bc:
-                                       superios[i].io = 0x3bc;
-                                       superios[i].irq = 7;
-                                       break;
-                               case 0x378:
-                                       superios[i].io = 0x378;
-                                       superios[i].irq = 7;
-                                       break;
-                               case 0x278:
-                                       superios[i].io = 0x278;
-                                       superios[i].irq = 5;
+                       switch (cr23 * 4) {
+                       case 0x3bc:
+                               s->io = 0x3bc;
+                               s->irq = 7;
+                               break;
+                       case 0x378:
+                               s->io = 0x378;
+                               s->irq = 7;
+                               break;
+                       case 0x278:
+                               s->io = 0x278;
+                               s->irq = 5;
                        }
-                       d=(cr26 &0x0f);
-                       if((d==1) || (d==3)) 
-                               superios[i].dma= d;
+                       d = (cr26 & 0x0f);
+                       if (d == 1 || d == 3)
+                               s->dma = d;
                        else
-                               superios[i].dma= PARPORT_DMA_NONE;
+                               s->dma = PARPORT_DMA_NONE;
                }
-       }
+       }
 }
 
 
 static void __devinit show_parconfig_winbond(int io, int key)
 {
-       int cr30,cr60,cr61,cr70,cr74,crf0,i=0;
+       int cr30, cr60, cr61, cr70, cr74, crf0;
+       struct superio_struct *s;
        static const char *const modes[] = {
                "Standard (SPP) and Bidirectional(PS/2)", /* 0 */
                "EPP-1.9 and SPP",
@@ -1296,110 +1330,134 @@ static void __devinit show_parconfig_winbond(int io, int key)
        static char *const irqtypes[] = {
                "pulsed low, high-Z",
                "follows nACK" };
-               
+
        /* The registers are called compatible-PnP because the
-           register layout is modelled after ISA-PnP, the access
-           method is just another ... */
-       outb(key,io);
-       outb(key,io);
-       outb(0x07,io);   /* Register 7: Select Logical Device */
-       outb(0x01,io+1); /* LD1 is Parallel Port */
-       outb(0x30,io);
-       cr30=inb(io+1);
-       outb(0x60,io);
-       cr60=inb(io+1);
-       outb(0x61,io);
-       cr61=inb(io+1);
-       outb(0x70,io);
-       cr70=inb(io+1);
-       outb(0x74,io);
-       cr74=inb(io+1);
-       outb(0xf0,io);
-       crf0=inb(io+1);
-       outb(0xaa,io);
+          register layout is modelled after ISA-PnP, the access
+          method is just another ... */
+       outb(key, io);
+       outb(key, io);
+       outb(0x07, io);   /* Register 7: Select Logical Device */
+       outb(0x01, io + 1); /* LD1 is Parallel Port */
+       outb(0x30, io);
+       cr30 = inb(io + 1);
+       outb(0x60, io);
+       cr60 = inb(io + 1);
+       outb(0x61, io);
+       cr61 = inb(io + 1);
+       outb(0x70, io);
+       cr70 = inb(io + 1);
+       outb(0x74, io);
+       cr74 = inb(io + 1);
+       outb(0xf0, io);
+       crf0 = inb(io + 1);
+       outb(0xaa, io);
 
        if (verbose_probing) {
-               printk(KERN_INFO "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x "
-                      "70=%02x 74=%02x, f0=%02x\n", cr30,cr60,cr61,cr70,cr74,crf0);
-               printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ", 
-                      (cr30 & 0x01) ? "yes":"no", cr60,cr61,cr70&0x0f );
+               printk(KERN_INFO
+    "Winbond LPT Config: cr_30=%02x 60,61=%02x%02x 70=%02x 74=%02x, f0=%02x\n",
+                                       cr30, cr60, cr61, cr70, cr74, crf0);
+               printk(KERN_INFO "Winbond LPT Config: active=%s, io=0x%02x%02x irq=%d, ",
+                      (cr30 & 0x01) ? "yes" : "no", cr60, cr61, cr70 & 0x0f);
                if ((cr74 & 0x07) > 3)
                        printk("dma=none\n");
                else
-                       printk("dma=%d\n",cr74 & 0x07);
-               printk(KERN_INFO "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n",
-                      irqtypes[crf0>>7], (crf0>>3)&0x0f);
-               printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n", modes[crf0 & 0x07]);
+                       printk("dma=%d\n", cr74 & 0x07);
+               printk(KERN_INFO
+                   "Winbond LPT Config: irqtype=%s, ECP fifo threshold=%d\n",
+                                       irqtypes[crf0>>7], (crf0>>3)&0x0f);
+               printk(KERN_INFO "Winbond LPT Config: Port mode=%s\n",
+                                       modes[crf0 & 0x07]);
        }
 
-       if(cr30 & 0x01) { /* the settings can be interrogated later ... */
-               while((superios[i].io!= 0) && (i<NR_SUPERIOS))
-                       i++;
-               if(i==NR_SUPERIOS) 
+       if (cr30 & 0x01) { /* the settings can be interrogated later ... */
+               s = find_free_superio();
+               if (s == NULL)
                        printk(KERN_INFO "Super-IO: too many chips!\n");
                else {
-                       superios[i].io = (cr60<<8)|cr61;
-                       superios[i].irq = cr70&0x0f;
-                       superios[i].dma = (((cr74 & 0x07) > 3) ?
+                       s->io = (cr60 << 8) | cr61;
+                       s->irq = cr70 & 0x0f;
+                       s->dma = (((cr74 & 0x07) > 3) ?
                                           PARPORT_DMA_NONE : (cr74 & 0x07));
                }
        }
 }
 
-static void __devinit decode_winbond(int efer, int key, int devid, int devrev, int oldid)
+static void __devinit decode_winbond(int efer, int key, int devid,
+                                                       int devrev, int oldid)
 {
        const char *type = "unknown";
-       int id,progif=2;
+       int id, progif = 2;
 
        if (devid == devrev)
                /* simple heuristics, we happened to read some
-                   non-winbond register */
+                  non-winbond register */
                return;
 
-       id=(devid<<8) | devrev;
+       id = (devid << 8) | devrev;
 
        /* Values are from public data sheets pdf files, I can just
-           confirm 83977TF is correct :-) */
-       if      (id == 0x9771) type="83977F/AF";
-       else if (id == 0x9773) type="83977TF / SMSC 97w33x/97w34x";
-       else if (id == 0x9774) type="83977ATF";
-       else if ((id & ~0x0f) == 0x5270) type="83977CTF / SMSC 97w36x";
-       else if ((id & ~0x0f) == 0x52f0) type="83977EF / SMSC 97w35x";
-       else if ((id & ~0x0f) == 0x5210) type="83627";
-       else if ((id & ~0x0f) == 0x6010) type="83697HF";
-       else if ((oldid &0x0f ) == 0x0a) { type="83877F"; progif=1;}
-       else if ((oldid &0x0f ) == 0x0b) { type="83877AF"; progif=1;}
-       else if ((oldid &0x0f ) == 0x0c) { type="83877TF"; progif=1;}
-       else if ((oldid &0x0f ) == 0x0d) { type="83877ATF"; progif=1;}
-       else progif=0;
+          confirm 83977TF is correct :-) */
+       if (id == 0x9771)
+               type = "83977F/AF";
+       else if (id == 0x9773)
+               type = "83977TF / SMSC 97w33x/97w34x";
+       else if (id == 0x9774)
+               type = "83977ATF";
+       else if ((id & ~0x0f) == 0x5270)
+               type = "83977CTF / SMSC 97w36x";
+       else if ((id & ~0x0f) == 0x52f0)
+               type = "83977EF / SMSC 97w35x";
+       else if ((id & ~0x0f) == 0x5210)
+               type = "83627";
+       else if ((id & ~0x0f) == 0x6010)
+               type = "83697HF";
+       else if ((oldid & 0x0f) == 0x0a) {
+               type = "83877F";
+               progif = 1;
+       } else if ((oldid & 0x0f) == 0x0b) {
+               type = "83877AF";
+               progif = 1;
+       } else if ((oldid & 0x0f) == 0x0c) {
+               type = "83877TF";
+               progif = 1;
+       } else if ((oldid & 0x0f) == 0x0d) {
+               type = "83877ATF";
+               progif = 1;
+       } else
+               progif = 0;
 
        if (verbose_probing)
                printk(KERN_INFO "Winbond chip at EFER=0x%x key=0x%02x "
-                      "devid=%02x devrev=%02x oldid=%02x type=%s\n", 
+                      "devid=%02x devrev=%02x oldid=%02x type=%s\n",
                       efer, key, devid, devrev, oldid, type);
 
        if (progif == 2)
-               show_parconfig_winbond(efer,key);
+               show_parconfig_winbond(efer, key);
 }
 
 static void __devinit decode_smsc(int efer, int key, int devid, int devrev)
 {
-        const char *type = "unknown";
+       const char *type = "unknown";
        void (*func)(int io, int key);
-        int id;
+       int id;
 
-        if (devid == devrev)
+       if (devid == devrev)
                /* simple heuristics, we happened to read some
-                   non-smsc register */
+                  non-smsc register */
                return;
 
-       func=NULL;
-        id=(devid<<8) | devrev;
+       func = NULL;
+       id = (devid << 8) | devrev;
 
-       if      (id==0x0302) {type="37c669"; func=show_parconfig_smsc37c669;}
-       else if (id==0x6582) type="37c665IR";
-       else if (devid==0x65) type="37c665GT";
-       else if (devid==0x66) type="37c666GT";
+       if (id == 0x0302) {
+               type = "37c669";
+               func = show_parconfig_smsc37c669;
+       } else if (id == 0x6582)
+               type = "37c665IR";
+       else if (devid == 0x65)
+               type = "37c665GT";
+       else if (devid == 0x66)
+               type = "37c666GT";
 
        if (verbose_probing)
                printk(KERN_INFO "SMSC chip at EFER=0x%x "
@@ -1407,138 +1465,138 @@ static void __devinit decode_smsc(int efer, int key, int devid, int devrev)
                       efer, key, devid, devrev, type);
 
        if (func)
-               func(efer,key);
+               func(efer, key);
 }
 
 
 static void __devinit winbond_check(int io, int key)
 {
-       int devid,devrev,oldid,x_devid,x_devrev,x_oldid;
+       int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
 
        if (!request_region(io, 3, __func__))
                return;
 
        /* First probe without key */
-       outb(0x20,io);
-       x_devid=inb(io+1);
-       outb(0x21,io);
-       x_devrev=inb(io+1);
-       outb(0x09,io);
-       x_oldid=inb(io+1);
-
-       outb(key,io);
-       outb(key,io);     /* Write Magic Sequence to EFER, extended
-                             funtion enable register */
-       outb(0x20,io);    /* Write EFIR, extended function index register */
-       devid=inb(io+1);  /* Read EFDR, extended function data register */
-       outb(0x21,io);
-       devrev=inb(io+1);
-       outb(0x09,io);
-       oldid=inb(io+1);
-       outb(0xaa,io);    /* Magic Seal */
+       outb(0x20, io);
+       x_devid = inb(io + 1);
+       outb(0x21, io);
+       x_devrev = inb(io + 1);
+       outb(0x09, io);
+       x_oldid = inb(io + 1);
+
+       outb(key, io);
+       outb(key, io);     /* Write Magic Sequence to EFER, extended
+                             funtion enable register */
+       outb(0x20, io);    /* Write EFIR, extended function index register */
+       devid = inb(io + 1);  /* Read EFDR, extended function data register */
+       outb(0x21, io);
+       devrev = inb(io + 1);
+       outb(0x09, io);
+       oldid = inb(io + 1);
+       outb(0xaa, io);    /* Magic Seal */
 
        if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid))
                goto out; /* protection against false positives */
 
-       decode_winbond(io,key,devid,devrev,oldid);
+       decode_winbond(io, key, devid, devrev, oldid);
 out:
        release_region(io, 3);
 }
 
-static void __devinit winbond_check2(int io,int key)
+static void __devinit winbond_check2(int io, int key)
 {
-        int devid,devrev,oldid,x_devid,x_devrev,x_oldid;
+       int devid, devrev, oldid, x_devid, x_devrev, x_oldid;
 
        if (!request_region(io, 3, __func__))
                return;
 
        /* First probe without the key */
-       outb(0x20,io+2);
-       x_devid=inb(io+2);
-       outb(0x21,io+1);
-       x_devrev=inb(io+2);
-       outb(0x09,io+1);
-       x_oldid=inb(io+2);
-
-        outb(key,io);     /* Write Magic Byte to EFER, extended
-                             funtion enable register */
-        outb(0x20,io+2);  /* Write EFIR, extended function index register */
-        devid=inb(io+2);  /* Read EFDR, extended function data register */
-        outb(0x21,io+1);
-        devrev=inb(io+2);
-        outb(0x09,io+1);
-        oldid=inb(io+2);
-        outb(0xaa,io);    /* Magic Seal */
-
-       if ((x_devid == devid) && (x_devrev == devrev) && (x_oldid == oldid))
+       outb(0x20, io + 2);
+       x_devid = inb(io + 2);
+       outb(0x21, io + 1);
+       x_devrev = inb(io + 2);
+       outb(0x09, io + 1);
+       x_oldid = inb(io + 2);
+
+       outb(key, io);     /* Write Magic Byte to EFER, extended
+                             funtion enable register */
+       outb(0x20, io + 2);  /* Write EFIR, extended function index register */
+       devid = inb(io + 2);  /* Read EFDR, extended function data register */
+       outb(0x21, io + 1);
+       devrev = inb(io + 2);
+       outb(0x09, io + 1);
+       oldid = inb(io + 2);
+       outb(0xaa, io);    /* Magic Seal */
+
+       if (x_devid == devid && x_devrev == devrev && x_oldid == oldid)
                goto out; /* protection against false positives */
 
-       decode_winbond(io,key,devid,devrev,oldid);
+       decode_winbond(io, key, devid, devrev, oldid);
 out:
        release_region(io, 3);
 }
 
 static void __devinit smsc_check(int io, int key)
 {
-        int id,rev,oldid,oldrev,x_id,x_rev,x_oldid,x_oldrev;
+       int id, rev, oldid, oldrev, x_id, x_rev, x_oldid, x_oldrev;
 
        if (!request_region(io, 3, __func__))
                return;
 
        /* First probe without the key */
-       outb(0x0d,io);
-       x_oldid=inb(io+1);
-       outb(0x0e,io);
-       x_oldrev=inb(io+1);
-       outb(0x20,io);
-       x_id=inb(io+1);
-       outb(0x21,io);
-       x_rev=inb(io+1);
-
-        outb(key,io);
-        outb(key,io);     /* Write Magic Sequence to EFER, extended
-                             funtion enable register */
-        outb(0x0d,io);    /* Write EFIR, extended function index register */
-        oldid=inb(io+1);  /* Read EFDR, extended function data register */
-        outb(0x0e,io);
-        oldrev=inb(io+1);
-       outb(0x20,io);
-       id=inb(io+1);
-       outb(0x21,io);
-       rev=inb(io+1);
-        outb(0xaa,io);    /* Magic Seal */
-
-       if ((x_id == id) && (x_oldrev == oldrev) &&
-           (x_oldid == oldid) && (x_rev == rev))
+       outb(0x0d, io);
+       x_oldid = inb(io + 1);
+       outb(0x0e, io);
+       x_oldrev = inb(io + 1);
+       outb(0x20, io);
+       x_id = inb(io + 1);
+       outb(0x21, io);
+       x_rev = inb(io + 1);
+
+       outb(key, io);
+       outb(key, io);     /* Write Magic Sequence to EFER, extended
+                             funtion enable register */
+       outb(0x0d, io);    /* Write EFIR, extended function index register */
+       oldid = inb(io + 1);  /* Read EFDR, extended function data register */
+       outb(0x0e, io);
+       oldrev = inb(io + 1);
+       outb(0x20, io);
+       id = inb(io + 1);
+       outb(0x21, io);
+       rev = inb(io + 1);
+       outb(0xaa, io);    /* Magic Seal */
+
+       if (x_id == id && x_oldrev == oldrev &&
+           x_oldid == oldid && x_rev == rev)
                goto out; /* protection against false positives */
 
-        decode_smsc(io,key,oldid,oldrev);
+       decode_smsc(io, key, oldid, oldrev);
 out:
        release_region(io, 3);
 }
 
 
-static void __devinit detect_and_report_winbond (void)
-{ 
+static void __devinit detect_and_report_winbond(void)
+{
        if (verbose_probing)
                printk(KERN_DEBUG "Winbond Super-IO detection, now testing ports 3F0,370,250,4E,2E ...\n");
-       winbond_check(0x3f0,0x87);
-       winbond_check(0x370,0x87);
-       winbond_check(0x2e ,0x87);
-       winbond_check(0x4e ,0x87);
-       winbond_check(0x3f0,0x86);
-       winbond_check2(0x250,0x88); 
-       winbond_check2(0x250,0x89);
+       winbond_check(0x3f0, 0x87);
+       winbond_check(0x370, 0x87);
+       winbond_check(0x2e , 0x87);
+       winbond_check(0x4e , 0x87);
+       winbond_check(0x3f0, 0x86);
+       winbond_check2(0x250, 0x88);
+       winbond_check2(0x250, 0x89);
 }
 
-static void __devinit detect_and_report_smsc (void)
+static void __devinit detect_and_report_smsc(void)
 {
        if (verbose_probing)
                printk(KERN_DEBUG "SMSC Super-IO detection, now testing Ports 2F0, 370 ...\n");
-       smsc_check(0x3f0,0x55);
-       smsc_check(0x370,0x55);
-       smsc_check(0x3f0,0x44);
-       smsc_check(0x370,0x44);
+       smsc_check(0x3f0, 0x55);
+       smsc_check(0x370, 0x55);
+       smsc_check(0x3f0, 0x44);
+       smsc_check(0x370, 0x44);
 }
 
 static void __devinit detect_and_report_it87(void)
@@ -1573,34 +1631,39 @@ static void __devinit detect_and_report_it87(void)
 }
 #endif /* CONFIG_PARPORT_PC_SUPERIO */
 
-static int get_superio_dma (struct parport *p)
+static struct superio_struct *find_superio(struct parport *p)
 {
-       int i=0;
-       while( (superios[i].io != p->base) && (i<NR_SUPERIOS))
-               i++;
-       if (i!=NR_SUPERIOS)
-               return superios[i].dma;
+       int i;
+       for (i = 0; i < NR_SUPERIOS; i++)
+               if (superios[i].io != p->base)
+                       return &superios[i];
+       return NULL;
+}
+
+static int get_superio_dma(struct parport *p)
+{
+       struct superio_struct *s = find_superio(p);
+       if (s)
+               return s->dma;
        return PARPORT_DMA_NONE;
 }
 
-static int get_superio_irq (struct parport *p)
+static int get_superio_irq(struct parport *p)
 {
-       int i=0;
-        while( (superios[i].io != p->base) && (i<NR_SUPERIOS))
-                i++;
-        if (i!=NR_SUPERIOS)
-                return superios[i].irq;
-        return PARPORT_IRQ_NONE;
+       struct superio_struct *s = find_superio(p);
+       if (s)
+               return s->irq;
+       return PARPORT_IRQ_NONE;
 }
-       
+
 
 /* --- Mode detection ------------------------------------- */
 
 /*
  * Checks for port existence, all ports support SPP MODE
- * Returns: 
+ * Returns:
  *         0           :  No parallel port at this address
- *  PARPORT_MODE_PCSPP :  SPP port detected 
+ *  PARPORT_MODE_PCSPP :  SPP port detected
  *                        (if the user specified an ioport himself,
  *                         this shall always be the case!)
  *
@@ -1610,7 +1673,7 @@ static int parport_SPP_supported(struct parport *pb)
        unsigned char r, w;
 
        /*
-        * first clear an eventually pending EPP timeout 
+        * first clear an eventually pending EPP timeout
         * I (sailer@ife.ee.ethz.ch) have an SMSC chipset
         * that does not even respond to SPP cycles if an EPP
         * timeout is pending
@@ -1619,19 +1682,19 @@ static int parport_SPP_supported(struct parport *pb)
 
        /* Do a simple read-write test to make sure the port exists. */
        w = 0xc;
-       outb (w, CONTROL (pb));
+       outb(w, CONTROL(pb));
 
        /* Is there a control register that we can read from?  Some
         * ports don't allow reads, so read_control just returns a
         * software copy. Some ports _do_ allow reads, so bypass the
         * software copy here.  In addition, some bits aren't
         * writable. */
-       r = inb (CONTROL (pb));
+       r = inb(CONTROL(pb));
        if ((r & 0xf) == w) {
                w = 0xe;
-               outb (w, CONTROL (pb));
-               r = inb (CONTROL (pb));
-               outb (0xc, CONTROL (pb));
+               outb(w, CONTROL(pb));
+               r = inb(CONTROL(pb));
+               outb(0xc, CONTROL(pb));
                if ((r & 0xf) == w)
                        return PARPORT_MODE_PCSPP;
        }
@@ -1639,18 +1702,18 @@ static int parport_SPP_supported(struct parport *pb)
        if (user_specified)
                /* That didn't work, but the user thinks there's a
                 * port here. */
-               printk (KERN_INFO "parport 0x%lx (WARNING): CTR: "
+               printk(KERN_INFO "parport 0x%lx (WARNING): CTR: "
                        "wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
 
        /* Try the data register.  The data lines aren't tri-stated at
         * this stage, so we expect back what we wrote. */
        w = 0xaa;
-       parport_pc_write_data (pb, w);
-       r = parport_pc_read_data (pb);
+       parport_pc_write_data(pb, w);
+       r = parport_pc_read_data(pb);
        if (r == w) {
                w = 0x55;
-               parport_pc_write_data (pb, w);
-               r = parport_pc_read_data (pb);
+               parport_pc_write_data(pb, w);
+               r = parport_pc_read_data(pb);
                if (r == w)
                        return PARPORT_MODE_PCSPP;
        }
@@ -1658,9 +1721,9 @@ static int parport_SPP_supported(struct parport *pb)
        if (user_specified) {
                /* Didn't work, but the user is convinced this is the
                 * place. */
-               printk (KERN_INFO "parport 0x%lx (WARNING): DATA: "
+               printk(KERN_INFO "parport 0x%lx (WARNING): DATA: "
                        "wrote 0x%02x, read 0x%02x\n", pb->base, w, r);
-               printk (KERN_INFO "parport 0x%lx: You gave this address, "
+               printk(KERN_INFO "parport 0x%lx: You gave this address, "
                        "but there is probably no parallel port there!\n",
                        pb->base);
        }
@@ -1691,33 +1754,33 @@ static int parport_ECR_present(struct parport *pb)
        struct parport_pc_private *priv = pb->private_data;
        unsigned char r = 0xc;
 
-       outb (r, CONTROL (pb));
-       if ((inb (ECONTROL (pb)) & 0x3) == (r & 0x3)) {
-               outb (r ^ 0x2, CONTROL (pb)); /* Toggle bit 1 */
+       outb(r, CONTROL(pb));
+       if ((inb(ECONTROL(pb)) & 0x3) == (r & 0x3)) {
+               outb(r ^ 0x2, CONTROL(pb)); /* Toggle bit 1 */
 
-               r = inb (CONTROL (pb));
-               if ((inb (ECONTROL (pb)) & 0x2) == (r & 0x2))
+               r = inb(CONTROL(pb));
+               if ((inb(ECONTROL(pb)) & 0x2) == (r & 0x2))
                        goto no_reg; /* Sure that no ECR register exists */
        }
-       
-       if ((inb (ECONTROL (pb)) & 0x3 ) != 0x1)
+
+       if ((inb(ECONTROL(pb)) & 0x3) != 0x1)
                goto no_reg;
 
-       ECR_WRITE (pb, 0x34);
-       if (inb (ECONTROL (pb)) != 0x35)
+       ECR_WRITE(pb, 0x34);
+       if (inb(ECONTROL(pb)) != 0x35)
                goto no_reg;
 
        priv->ecr = 1;
-       outb (0xc, CONTROL (pb));
-       
+       outb(0xc, CONTROL(pb));
+
        /* Go to mode 000 */
-       frob_set_mode (pb, ECR_SPP);
+       frob_set_mode(pb, ECR_SPP);
 
        return 1;
 
  no_reg:
-       outb (0xc, CONTROL (pb));
-       return 0; 
+       outb(0xc, CONTROL(pb));
+       return 0;
 }
 
 #ifdef CONFIG_PARPORT_1284
@@ -1727,7 +1790,7 @@ static int parport_ECR_present(struct parport *pb)
  * allows us to read data from the data lines.  In theory we would get back
  * 0xff but any peripheral attached to the port may drag some or all of the
  * lines down to zero.  So if we get back anything that isn't the contents
- * of the data register we deem PS/2 support to be present. 
+ * of the data register we deem PS/2 support to be present.
  *
  * Some SPP ports have "half PS/2" ability - you can't turn off the line
  * drivers, but an external peripheral with sufficiently beefy drivers of
@@ -1735,26 +1798,28 @@ static int parport_ECR_present(struct parport *pb)
  * where they can then be read back as normal.  Ports with this property
  * and the right type of device attached are likely to fail the SPP test,
  * (as they will appear to have stuck bits) and so the fact that they might
- * be misdetected here is rather academic. 
+ * be misdetected here is rather academic.
  */
 
 static int parport_PS2_supported(struct parport *pb)
 {
        int ok = 0;
-  
+
        clear_epp_timeout(pb);
 
        /* try to tri-state the buffer */
-       parport_pc_data_reverse (pb);
-       
+       parport_pc_data_reverse(pb);
+
        parport_pc_write_data(pb, 0x55);
-       if (parport_pc_read_data(pb) != 0x55) ok++;
+       if (parport_pc_read_data(pb) != 0x55)
+               ok++;
 
        parport_pc_write_data(pb, 0xaa);
-       if (parport_pc_read_data(pb) != 0xaa) ok++;
+       if (parport_pc_read_data(pb) != 0xaa)
+               ok++;
 
        /* cancel input mode */
-       parport_pc_data_forward (pb);
+       parport_pc_data_forward(pb);
 
        if (ok) {
                pb->modes |= PARPORT_MODE_TRISTATE;
@@ -1773,68 +1838,68 @@ static int parport_ECP_supported(struct parport *pb)
        int config, configb;
        int pword;
        struct parport_pc_private *priv = pb->private_data;
-       /* Translate ECP intrLine to ISA irq value */   
-       static const int intrline[]= { 0, 7, 9, 10, 11, 14, 15, 5 }; 
+       /* Translate ECP intrLine to ISA irq value */
+       static const int intrline[] = { 0, 7, 9, 10, 11, 14, 15, 5 };
 
        /* If there is no ECR, we have no hope of supporting ECP. */
        if (!priv->ecr)
                return 0;
 
        /* Find out FIFO depth */
-       ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-       ECR_WRITE (pb, ECR_TST << 5); /* TEST FIFO */
-       for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02); i++)
-               outb (0xaa, FIFO (pb));
+       ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+       ECR_WRITE(pb, ECR_TST << 5); /* TEST FIFO */
+       for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02); i++)
+               outb(0xaa, FIFO(pb));
 
        /*
         * Using LGS chipset it uses ECR register, but
         * it doesn't support ECP or FIFO MODE
         */
        if (i == 1024) {
-               ECR_WRITE (pb, ECR_SPP << 5);
+               ECR_WRITE(pb, ECR_SPP << 5);
                return 0;
        }
 
        priv->fifo_depth = i;
        if (verbose_probing)
-               printk (KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i);
+               printk(KERN_DEBUG "0x%lx: FIFO is %d bytes\n", pb->base, i);
 
        /* Find out writeIntrThreshold */
-       frob_econtrol (pb, 1<<2, 1<<2);
-       frob_econtrol (pb, 1<<2, 0);
+       frob_econtrol(pb, 1<<2, 1<<2);
+       frob_econtrol(pb, 1<<2, 0);
        for (i = 1; i <= priv->fifo_depth; i++) {
-               inb (FIFO (pb));
-               udelay (50);
-               if (inb (ECONTROL (pb)) & (1<<2))
+               inb(FIFO(pb));
+               udelay(50);
+               if (inb(ECONTROL(pb)) & (1<<2))
                        break;
        }
 
        if (i <= priv->fifo_depth) {
                if (verbose_probing)
-                       printk (KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n",
+                       printk(KERN_DEBUG "0x%lx: writeIntrThreshold is %d\n",
                                pb->base, i);
        } else
                /* Number of bytes we know we can write if we get an
-                   interrupt. */
+                  interrupt. */
                i = 0;
 
        priv->writeIntrThreshold = i;
 
        /* Find out readIntrThreshold */
-       frob_set_mode (pb, ECR_PS2); /* Reset FIFO and enable PS2 */
-       parport_pc_data_reverse (pb); /* Must be in PS2 mode */
-       frob_set_mode (pb, ECR_TST); /* Test FIFO */
-       frob_econtrol (pb, 1<<2, 1<<2);
-       frob_econtrol (pb, 1<<2, 0);
+       frob_set_mode(pb, ECR_PS2); /* Reset FIFO and enable PS2 */
+       parport_pc_data_reverse(pb); /* Must be in PS2 mode */
+       frob_set_mode(pb, ECR_TST); /* Test FIFO */
+       frob_econtrol(pb, 1<<2, 1<<2);
+       frob_econtrol(pb, 1<<2, 0);
        for (i = 1; i <= priv->fifo_depth; i++) {
-               outb (0xaa, FIFO (pb));
-               if (inb (ECONTROL (pb)) & (1<<2))
+               outb(0xaa, FIFO(pb));
+               if (inb(ECONTROL(pb)) & (1<<2))
                        break;
        }
 
        if (i <= priv->fifo_depth) {
                if (verbose_probing)
-                       printk (KERN_INFO "0x%lx: readIntrThreshold is %d\n",
+                       printk(KERN_INFO "0x%lx: readIntrThreshold is %d\n",
                                pb->base, i);
        } else
                /* Number of bytes we can read if we get an interrupt. */
@@ -1842,23 +1907,23 @@ static int parport_ECP_supported(struct parport *pb)
 
        priv->readIntrThreshold = i;
 
-       ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-       ECR_WRITE (pb, 0xf4); /* Configuration mode */
-       config = inb (CONFIGA (pb));
+       ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+       ECR_WRITE(pb, 0xf4); /* Configuration mode */
+       config = inb(CONFIGA(pb));
        pword = (config >> 4) & 0x7;
        switch (pword) {
        case 0:
                pword = 2;
-               printk (KERN_WARNING "0x%lx: Unsupported pword size!\n",
+               printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
                        pb->base);
                break;
        case 2:
                pword = 4;
-               printk (KERN_WARNING "0x%lx: Unsupported pword size!\n",
+               printk(KERN_WARNING "0x%lx: Unsupported pword size!\n",
                        pb->base);
                break;
        default:
-               printk (KERN_WARNING "0x%lx: Unknown implementation ID\n",
+               printk(KERN_WARNING "0x%lx: Unknown implementation ID\n",
                        pb->base);
                /* Assume 1 */
        case 1:
@@ -1867,28 +1932,29 @@ static int parport_ECP_supported(struct parport *pb)
        priv->pword = pword;
 
        if (verbose_probing) {
-               printk (KERN_DEBUG "0x%lx: PWord is %d bits\n", pb->base, 8 * pword);
-               
-               printk (KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base,
+               printk(KERN_DEBUG "0x%lx: PWord is %d bits\n",
+                       pb->base, 8 * pword);
+
+               printk(KERN_DEBUG "0x%lx: Interrupts are ISA-%s\n", pb->base,
                        config & 0x80 ? "Level" : "Pulses");
 
-               configb = inb (CONFIGB (pb));
-               printk (KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n",
+               configb = inb(CONFIGB(pb));
+               printk(KERN_DEBUG "0x%lx: ECP port cfgA=0x%02x cfgB=0x%02x\n",
                        pb->base, config, configb);
-               printk (KERN_DEBUG "0x%lx: ECP settings irq=", pb->base);
-               if ((configb >>3) & 0x07)
-                       printk("%d",intrline[(configb >>3) & 0x07]);
+               printk(KERN_DEBUG "0x%lx: ECP settings irq=", pb->base);
+               if ((configb >> 3) & 0x07)
+                       printk("%d", intrline[(configb >> 3) & 0x07]);
                else
                        printk("<none or set by other means>");
-               printk (" dma=");
-               if( (configb & 0x03 ) == 0x00)
+               printk(" dma=");
+               if ((configb & 0x03) == 0x00)
                        printk("<none or set by other means>\n");
                else
-                       printk("%d\n",configb & 0x07);
+                       printk("%d\n", configb & 0x07);
        }
 
        /* Go back to mode 000 */
-       frob_set_mode (pb, ECR_SPP);
+       frob_set_mode(pb, ECR_SPP);
 
        return 1;
 }
@@ -1903,10 +1969,10 @@ static int parport_ECPPS2_supported(struct parport *pb)
        if (!priv->ecr)
                return 0;
 
-       oecr = inb (ECONTROL (pb));
-       ECR_WRITE (pb, ECR_PS2 << 5);
+       oecr = inb(ECONTROL(pb));
+       ECR_WRITE(pb, ECR_PS2 << 5);
        result = parport_PS2_supported(pb);
-       ECR_WRITE (pb, oecr);
+       ECR_WRITE(pb, oecr);
        return result;
 }
 
@@ -1930,16 +1996,15 @@ static int parport_EPP_supported(struct parport *pb)
         */
 
        /* If EPP timeout bit clear then EPP available */
-       if (!clear_epp_timeout(pb)) {
+       if (!clear_epp_timeout(pb))
                return 0;  /* No way to clear timeout */
-       }
 
        /* Check for Intel bug. */
        if (priv->ecr) {
                unsigned char i;
                for (i = 0x00; i < 0x80; i += 0x20) {
-                       ECR_WRITE (pb, i);
-                       if (clear_epp_timeout (pb)) {
+                       ECR_WRITE(pb, i);
+                       if (clear_epp_timeout(pb)) {
                                /* Phony EPP in ECP. */
                                return 0;
                        }
@@ -1963,17 +2028,16 @@ static int parport_ECPEPP_supported(struct parport *pb)
        int result;
        unsigned char oecr;
 
-       if (!priv->ecr) {
+       if (!priv->ecr)
                return 0;
-       }
 
-       oecr = inb (ECONTROL (pb));
+       oecr = inb(ECONTROL(pb));
        /* Search for SMC style EPP+ECP mode */
-       ECR_WRITE (pb, 0x80);
-       outb (0x04, CONTROL (pb));
+       ECR_WRITE(pb, 0x80);
+       outb(0x04, CONTROL(pb));
        result = parport_EPP_supported(pb);
 
-       ECR_WRITE (pb, oecr);
+       ECR_WRITE(pb, oecr);
 
        if (result) {
                /* Set up access functions to use ECP+EPP hardware. */
@@ -1991,11 +2055,25 @@ static int parport_ECPEPP_supported(struct parport *pb)
 /* Don't bother probing for modes we know we won't use. */
 static int __devinit parport_PS2_supported(struct parport *pb) { return 0; }
 #ifdef CONFIG_PARPORT_PC_FIFO
-static int parport_ECP_supported(struct parport *pb) { return 0; }
+static int parport_ECP_supported(struct parport *pb)
+{
+       return 0;
+}
 #endif
-static int __devinit parport_EPP_supported(struct parport *pb) { return 0; }
-static int __devinit parport_ECPEPP_supported(struct parport *pb){return 0;}
-static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;}
+static int __devinit parport_EPP_supported(struct parport *pb)
+{
+       return 0;
+}
+
+static int __devinit parport_ECPEPP_supported(struct parport *pb)
+{
+       return 0;
+}
+
+static int __devinit parport_ECPPS2_supported(struct parport *pb)
+{
+       return 0;
+}
 
 #endif /* No IEEE 1284 support */
 
@@ -2005,17 +2083,17 @@ static int __devinit parport_ECPPS2_supported(struct parport *pb){return 0;}
 static int programmable_irq_support(struct parport *pb)
 {
        int irq, intrLine;
-       unsigned char oecr = inb (ECONTROL (pb));
+       unsigned char oecr = inb(ECONTROL(pb));
        static const int lookup[8] = {
                PARPORT_IRQ_NONE, 7, 9, 10, 11, 14, 15, 5
        };
 
-       ECR_WRITE (pb, ECR_CNF << 5); /* Configuration MODE */
+       ECR_WRITE(pb, ECR_CNF << 5); /* Configuration MODE */
 
-       intrLine = (inb (CONFIGB (pb)) >> 3) & 0x07;
+       intrLine = (inb(CONFIGB(pb)) >> 3) & 0x07;
        irq = lookup[intrLine];
 
-       ECR_WRITE (pb, oecr);
+       ECR_WRITE(pb, oecr);
        return irq;
 }
 
@@ -2025,17 +2103,17 @@ static int irq_probe_ECP(struct parport *pb)
        unsigned long irqs;
 
        irqs = probe_irq_on();
-               
-       ECR_WRITE (pb, ECR_SPP << 5); /* Reset FIFO */
-       ECR_WRITE (pb, (ECR_TST << 5) | 0x04);
-       ECR_WRITE (pb, ECR_TST << 5);
+
+       ECR_WRITE(pb, ECR_SPP << 5); /* Reset FIFO */
+       ECR_WRITE(pb, (ECR_TST << 5) | 0x04);
+       ECR_WRITE(pb, ECR_TST << 5);
 
        /* If Full FIFO sure that writeIntrThreshold is generated */
-       for (i=0; i < 1024 && !(inb (ECONTROL (pb)) & 0x02) ; i++) 
-               outb (0xaa, FIFO (pb));
-               
+       for (i = 0; i < 1024 && !(inb(ECONTROL(pb)) & 0x02) ; i++)
+               outb(0xaa, FIFO(pb));
+
        pb->irq = probe_irq_off(irqs);
-       ECR_WRITE (pb, ECR_SPP << 5);
+       ECR_WRITE(pb, ECR_SPP << 5);
 
        if (pb->irq <= 0)
                pb->irq = PARPORT_IRQ_NONE;
@@ -2045,7 +2123,7 @@ static int irq_probe_ECP(struct parport *pb)
 
 /*
  * This detection seems that only works in National Semiconductors
- * This doesn't work in SMC, LGS, and Winbond 
+ * This doesn't work in SMC, LGS, and Winbond
  */
 static int irq_probe_EPP(struct parport *pb)
 {
@@ -2056,16 +2134,16 @@ static int irq_probe_EPP(struct parport *pb)
        unsigned char oecr;
 
        if (pb->modes & PARPORT_MODE_PCECR)
-               oecr = inb (ECONTROL (pb));
+               oecr = inb(ECONTROL(pb));
 
        irqs = probe_irq_on();
 
        if (pb->modes & PARPORT_MODE_PCECR)
-               frob_econtrol (pb, 0x10, 0x10);
-       
+               frob_econtrol(pb, 0x10, 0x10);
+
        clear_epp_timeout(pb);
-       parport_pc_frob_control (pb, 0x20, 0x20);
-       parport_pc_frob_control (pb, 0x10, 0x10);
+       parport_pc_frob_control(pb, 0x20, 0x20);
+       parport_pc_frob_control(pb, 0x10, 0x10);
        clear_epp_timeout(pb);
 
        /* Device isn't expecting an EPP read
@@ -2074,9 +2152,9 @@ static int irq_probe_EPP(struct parport *pb)
        parport_pc_read_epp(pb);
        udelay(20);
 
-       pb->irq = probe_irq_off (irqs);
+       pb->irq = probe_irq_off(irqs);
        if (pb->modes & PARPORT_MODE_PCECR)
-               ECR_WRITE (pb, oecr);
+               ECR_WRITE(pb, oecr);
        parport_pc_write_control(pb, 0xc);
 
        if (pb->irq <= 0)
@@ -2133,28 +2211,28 @@ static int parport_irq_probe(struct parport *pb)
 /* --- DMA detection -------------------------------------- */
 
 /* Only if chipset conforms to ECP ISA Interface Standard */
-static int programmable_dma_support (struct parport *p)
+static int programmable_dma_support(struct parport *p)
 {
-       unsigned char oecr = inb (ECONTROL (p));
+       unsigned char oecr = inb(ECONTROL(p));
        int dma;
 
-       frob_set_mode (p, ECR_CNF);
-       
-       dma = inb (CONFIGB(p)) & 0x07;
+       frob_set_mode(p, ECR_CNF);
+
+       dma = inb(CONFIGB(p)) & 0x07;
        /* 000: Indicates jumpered 8-bit DMA if read-only.
           100: Indicates jumpered 16-bit DMA if read-only. */
        if ((dma & 0x03) == 0)
                dma = PARPORT_DMA_NONE;
 
-       ECR_WRITE (p, oecr);
+       ECR_WRITE(p, oecr);
        return dma;
 }
 
-static int parport_dma_probe (struct parport *p)
+static int parport_dma_probe(struct parport *p)
 {
        const struct parport_pc_private *priv = p->private_data;
-       if (priv->ecr)
-               p->dma = programmable_dma_support(p); /* ask ECP chipset first */
+       if (priv->ecr)          /* ask ECP chipset first */
+               p->dma = programmable_dma_support(p);
        if (p->dma == PARPORT_DMA_NONE) {
                /* ask known Super-IO chips proper, although these
                   claim ECP compatible, some don't report their DMA
@@ -2212,7 +2290,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
        if (!base_res)
                goto out4;
 
-       memcpy(ops, &parport_pc_ops, sizeof (struct parport_operations));
+       memcpy(ops, &parport_pc_ops, sizeof(struct parport_operations));
        priv->ctr = 0xc;
        priv->ctr_writable = ~0x10;
        priv->ecr = 0;
@@ -2239,7 +2317,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
                        if (!parport_EPP_supported(p))
                                parport_ECPEPP_supported(p);
        }
-       if (!parport_SPP_supported (p))
+       if (!parport_SPP_supported(p))
                /* No port. */
                goto out5;
        if (priv->ecr)
@@ -2247,7 +2325,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
        else
                parport_PS2_supported(p);
 
-       p->size = (p->modes & PARPORT_MODE_EPP)?8:3;
+       p->size = (p->modes & PARPORT_MODE_EPP) ? 8 : 3;
 
        printk(KERN_INFO "%s: PC-style at 0x%lx", p->name, p->base);
        if (p->base_hi && priv->ecr)
@@ -2271,7 +2349,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
                }
        }
        if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq
-                                           is mandatory (see above) */
+                                          is mandatory (see above) */
                p->dma = PARPORT_DMA_NONE;
 
 #ifdef CONFIG_PARPORT_PC_FIFO
@@ -2288,16 +2366,23 @@ struct parport *parport_pc_probe_port(unsigned long int base,
                if (p->dma != PARPORT_DMA_NONE) {
                        printk(", dma %d", p->dma);
                        p->modes |= PARPORT_MODE_DMA;
-               }
-               else printk(", using FIFO");
-       }
-       else
+               } else
+                       printk(", using FIFO");
+       } else
                /* We can't use the DMA channel after all. */
                p->dma = PARPORT_DMA_NONE;
 #endif /* Allowed to use FIFO/DMA */
 
        printk(" [");
-#define printmode(x) {if(p->modes&PARPORT_MODE_##x){printk("%s%s",f?",":"",#x);f++;}}
+
+#define printmode(x) \
+       {\
+               if (p->modes & PARPORT_MODE_##x) {\
+                       printk("%s%s", f ? "," : "", #x);\
+                       f++;\
+               } \
+       }
+
        {
                int f = 0;
                printmode(PCSPP);
@@ -2309,10 +2394,10 @@ struct parport *parport_pc_probe_port(unsigned long int base,
        }
 #undef printmode
 #ifndef CONFIG_PARPORT_1284
-       printk ("(,...)");
+       printk("(,...)");
 #endif /* CONFIG_PARPORT_1284 */
        printk("]\n");
-       if (probedirq != PARPORT_IRQ_NONE) 
+       if (probedirq != PARPORT_IRQ_NONE)
                printk(KERN_INFO "%s: irq %d detected\n", p->name, probedirq);
 
        /* If No ECP release the ports grabbed above. */
@@ -2328,7 +2413,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
        if (p->irq != PARPORT_IRQ_NONE) {
                if (request_irq(p->irq, parport_irq_handler,
                                 irqflags, p->name, p)) {
-                       printk (KERN_WARNING "%s: irq %d in use, "
+                       printk(KERN_WARNING "%s: irq %d in use, "
                                "resorting to polled operation\n",
                                p->name, p->irq);
                        p->irq = PARPORT_IRQ_NONE;
@@ -2338,8 +2423,8 @@ struct parport *parport_pc_probe_port(unsigned long int base,
 #ifdef CONFIG_PARPORT_PC_FIFO
 #ifdef HAS_DMA
                if (p->dma != PARPORT_DMA_NONE) {
-                       if (request_dma (p->dma, p->name)) {
-                               printk (KERN_WARNING "%s: dma %d in use, "
+                       if (request_dma(p->dma, p->name)) {
+                               printk(KERN_WARNING "%s: dma %d in use, "
                                        "resorting to PIO operation\n",
                                        p->name, p->dma);
                                p->dma = PARPORT_DMA_NONE;
@@ -2349,8 +2434,8 @@ struct parport *parport_pc_probe_port(unsigned long int base,
                                                       PAGE_SIZE,
                                                       &priv->dma_handle,
                                                       GFP_KERNEL);
-                               if (! priv->dma_buf) {
-                                       printk (KERN_WARNING "%s: "
+                               if (!priv->dma_buf) {
+                                       printk(KERN_WARNING "%s: "
                                                "cannot get buffer for DMA, "
                                                "resorting to PIO operation\n",
                                                p->name);
@@ -2369,10 +2454,10 @@ struct parport *parport_pc_probe_port(unsigned long int base,
                 * Put the ECP detected port in PS2 mode.
                 * Do this also for ports that have ECR but don't do ECP.
                 */
-               ECR_WRITE (p, 0x34);
+               ECR_WRITE(p, 0x34);
 
        parport_pc_write_data(p, 0);
-       parport_pc_data_forward (p);
+       parport_pc_data_forward(p);
 
        /* Now that we've told the sharing engine about the port, and
           found out its characteristics, let the high-level drivers
@@ -2380,7 +2465,7 @@ struct parport *parport_pc_probe_port(unsigned long int base,
        spin_lock(&ports_lock);
        list_add(&priv->list, &ports_list);
        spin_unlock(&ports_lock);
-       parport_announce_port (p);
+       parport_announce_port(p);
 
        return p;
 
@@ -2393,18 +2478,17 @@ out5:
 out4:
        parport_put_port(p);
 out3:
-       kfree (priv);
+       kfree(priv);
 out2:
-       kfree (ops);
+       kfree(ops);
 out1:
        if (pdev)
                platform_device_unregister(pdev);
        return NULL;
 }
+EXPORT_SYMBOL(parport_pc_probe_port);
 
-EXPORT_SYMBOL (parport_pc_probe_port);
-
-void parport_pc_unregister_port (struct parport *p)
+void parport_pc_unregister_port(struct parport *p)
 {
        struct parport_pc_private *priv = p->private_data;
        struct parport_operations *ops = p->ops;
@@ -2430,17 +2514,16 @@ void parport_pc_unregister_port (struct parport *p)
                                    priv->dma_buf,
                                    priv->dma_handle);
 #endif
-       kfree (p->private_data);
+       kfree(p->private_data);
        parport_put_port(p);
-       kfree (ops); /* hope no-one cached it */
+       kfree(ops); /* hope no-one cached it */
 }
-
-EXPORT_SYMBOL (parport_pc_unregister_port);
+EXPORT_SYMBOL(parport_pc_unregister_port);
 
 #ifdef CONFIG_PCI
 
 /* ITE support maintained by Rich Liu <richliu@poorman.org> */
-static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
+static int __devinit sio_ite_8872_probe(struct pci_dev *pdev, int autoirq,
                                         int autodma,
                                         const struct parport_pc_via_data *via)
 {
@@ -2452,73 +2535,74 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
        int irq;
        int i;
 
-       DPRINTK (KERN_DEBUG "sio_ite_8872_probe()\n");
-       
-       // make sure which one chip
-       for(i = 0; i < 5; i++) {
+       DPRINTK(KERN_DEBUG "sio_ite_8872_probe()\n");
+
+       /* make sure which one chip */
+       for (i = 0; i < 5; i++) {
                base_res = request_region(inta_addr[i], 32, "it887x");
                if (base_res) {
                        int test;
-                       pci_write_config_dword (pdev, 0x60,
+                       pci_write_config_dword(pdev, 0x60,
                                                0xe5000000 | inta_addr[i]);
-                       pci_write_config_dword (pdev, 0x78,
+                       pci_write_config_dword(pdev, 0x78,
                                                0x00000000 | inta_addr[i]);
-                       test = inb (inta_addr[i]);
-                       if (test != 0xff) break;
+                       test = inb(inta_addr[i]);
+                       if (test != 0xff)
+                               break;
                        release_region(inta_addr[i], 0x8);
                }
        }
-       if(i >= 5) {
-               printk (KERN_INFO "parport_pc: cannot find ITE8872 INTA\n");
+       if (i >= 5) {
+               printk(KERN_INFO "parport_pc: cannot find ITE8872 INTA\n");
                return 0;
        }
 
-       type = inb (inta_addr[i] + 0x18);
+       type = inb(inta_addr[i] + 0x18);
        type &= 0x0f;
 
        switch (type) {
        case 0x2:
-               printk (KERN_INFO "parport_pc: ITE8871 found (1P)\n");
+               printk(KERN_INFO "parport_pc: ITE8871 found (1P)\n");
                ite8872set = 0x64200000;
                break;
        case 0xa:
-               printk (KERN_INFO "parport_pc: ITE8875 found (1P)\n");
+               printk(KERN_INFO "parport_pc: ITE8875 found (1P)\n");
                ite8872set = 0x64200000;
                break;
        case 0xe:
-               printk (KERN_INFO "parport_pc: ITE8872 found (2S1P)\n");
+               printk(KERN_INFO "parport_pc: ITE8872 found (2S1P)\n");
                ite8872set = 0x64e00000;
                break;
        case 0x6:
-               printk (KERN_INFO "parport_pc: ITE8873 found (1S)\n");
+               printk(KERN_INFO "parport_pc: ITE8873 found (1S)\n");
                return 0;
        case 0x8:
-               DPRINTK (KERN_DEBUG "parport_pc: ITE8874 found (2S)\n");
+               DPRINTK(KERN_DEBUG "parport_pc: ITE8874 found (2S)\n");
                return 0;
        default:
-               printk (KERN_INFO "parport_pc: unknown ITE887x\n");
-               printk (KERN_INFO "parport_pc: please mail 'lspci -nvv' "
+               printk(KERN_INFO "parport_pc: unknown ITE887x\n");
+               printk(KERN_INFO "parport_pc: please mail 'lspci -nvv' "
                        "output to Rich.Liu@ite.com.tw\n");
                return 0;
        }
 
-       pci_read_config_byte (pdev, 0x3c, &ite8872_irq);
-       pci_read_config_dword (pdev, 0x1c, &ite8872_lpt);
+       pci_read_config_byte(pdev, 0x3c, &ite8872_irq);
+       pci_read_config_dword(pdev, 0x1c, &ite8872_lpt);
        ite8872_lpt &= 0x0000ff00;
-       pci_read_config_dword (pdev, 0x20, &ite8872_lpthi);
+       pci_read_config_dword(pdev, 0x20, &ite8872_lpthi);
        ite8872_lpthi &= 0x0000ff00;
-       pci_write_config_dword (pdev, 0x6c, 0xe3000000 | ite8872_lpt);
-       pci_write_config_dword (pdev, 0x70, 0xe3000000 | ite8872_lpthi);
-       pci_write_config_dword (pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt);
-       // SET SPP&EPP , Parallel Port NO DMA , Enable All Function
-       // SET Parallel IRQ
-       pci_write_config_dword (pdev, 0x9c,
+       pci_write_config_dword(pdev, 0x6c, 0xe3000000 | ite8872_lpt);
+       pci_write_config_dword(pdev, 0x70, 0xe3000000 | ite8872_lpthi);
+       pci_write_config_dword(pdev, 0x80, (ite8872_lpthi<<16) | ite8872_lpt);
+       /* SET SPP&EPP , Parallel Port NO DMA , Enable All Function */
+       /* SET Parallel IRQ */
+       pci_write_config_dword(pdev, 0x9c,
                                ite8872set | (ite8872_irq * 0x11111));
 
-       DPRINTK (KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq);
-       DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n",
+       DPRINTK(KERN_DEBUG "ITE887x: The IRQ is %d.\n", ite8872_irq);
+       DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O port is 0x%x.\n",
                 ite8872_lpt);
-       DPRINTK (KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n",
+       DPRINTK(KERN_DEBUG "ITE887x: The PARALLEL I/O porthi is 0x%x.\n",
                 ite8872_lpthi);
 
        /* Let the user (or defaults) steer us away from interrupts */
@@ -2530,14 +2614,14 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
         * Release the resource so that parport_pc_probe_port can get it.
         */
        release_resource(base_res);
-       if (parport_pc_probe_port (ite8872_lpt, ite8872_lpthi,
+       if (parport_pc_probe_port(ite8872_lpt, ite8872_lpthi,
                                   irq, PARPORT_DMA_NONE, &pdev->dev, 0)) {
-               printk (KERN_INFO
+               printk(KERN_INFO
                        "parport_pc: ITE 8872 parallel port: io=0x%X",
-                       ite8872_lpt);
+                                                               ite8872_lpt);
                if (irq != PARPORT_IRQ_NONE)
-                       printk (", irq=%d", irq);
-               printk ("\n");
+                       printk(", irq=%d", irq);
+               printk("\n");
                return 1;
        }
 
@@ -2546,7 +2630,7 @@ static int __devinit sio_ite_8872_probe (struct pci_dev *pdev, int autoirq,
 
 /* VIA 8231 support by Pavel Fedin <sonic_amiga@rambler.ru>
    based on VIA 686a support code by Jeff Garzik <jgarzik@pobox.com> */
-static int __devinitdata parport_init_mode = 0;
+static int __devinitdata parport_init_mode;
 
 /* Data for two known VIA chips */
 static struct parport_pc_via_data via_686a_data __devinitdata = {
@@ -2568,7 +2652,7 @@ static struct parport_pc_via_data via_8231_data __devinitdata = {
        0xF6
 };
 
-static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
+static int __devinit sio_via_probe(struct pci_dev *pdev, int autoirq,
                                    int autodma,
                                    const struct parport_pc_via_data *via)
 {
@@ -2580,38 +2664,38 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
 
        printk(KERN_DEBUG "parport_pc: VIA 686A/8231 detected\n");
 
-       switch(parport_init_mode)
-       {
+       switch (parport_init_mode) {
        case 1:
-           printk(KERN_DEBUG "parport_pc: setting SPP mode\n");
-           siofunc = VIA_FUNCTION_PARPORT_SPP;
-           break;
+               printk(KERN_DEBUG "parport_pc: setting SPP mode\n");
+               siofunc = VIA_FUNCTION_PARPORT_SPP;
+               break;
        case 2:
-           printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n");
-           siofunc = VIA_FUNCTION_PARPORT_SPP;
-           ppcontrol = VIA_PARPORT_BIDIR;
-           break;
+               printk(KERN_DEBUG "parport_pc: setting PS/2 mode\n");
+               siofunc = VIA_FUNCTION_PARPORT_SPP;
+               ppcontrol = VIA_PARPORT_BIDIR;
+               break;
        case 3:
-           printk(KERN_DEBUG "parport_pc: setting EPP mode\n");
-           siofunc = VIA_FUNCTION_PARPORT_EPP;
-           ppcontrol = VIA_PARPORT_BIDIR;
-           have_epp = 1;
-           break;
+               printk(KERN_DEBUG "parport_pc: setting EPP mode\n");
+               siofunc = VIA_FUNCTION_PARPORT_EPP;
+               ppcontrol = VIA_PARPORT_BIDIR;
+               have_epp = 1;
+               break;
        case 4:
-           printk(KERN_DEBUG "parport_pc: setting ECP mode\n");
-           siofunc = VIA_FUNCTION_PARPORT_ECP;
-           ppcontrol = VIA_PARPORT_BIDIR;
-           break;
+               printk(KERN_DEBUG "parport_pc: setting ECP mode\n");
+               siofunc = VIA_FUNCTION_PARPORT_ECP;
+               ppcontrol = VIA_PARPORT_BIDIR;
+               break;
        case 5:
-           printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n");
-           siofunc = VIA_FUNCTION_PARPORT_ECP;
-           ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP;
-           have_epp = 1;
-           break;
-        default:
-           printk(KERN_DEBUG "parport_pc: probing current configuration\n");
-           siofunc = VIA_FUNCTION_PROBE;
-           break;
+               printk(KERN_DEBUG "parport_pc: setting EPP+ECP mode\n");
+               siofunc = VIA_FUNCTION_PARPORT_ECP;
+               ppcontrol = VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP;
+               have_epp = 1;
+               break;
+       default:
+               printk(KERN_DEBUG
+                       "parport_pc: probing current configuration\n");
+               siofunc = VIA_FUNCTION_PROBE;
+               break;
        }
        /*
         * unlock super i/o configuration
@@ -2622,38 +2706,36 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
 
        /* Bits 1-0: Parallel Port Mode / Enable */
        outb(via->viacfg_function, VIA_CONFIG_INDEX);
-       tmp = inb (VIA_CONFIG_DATA);
+       tmp = inb(VIA_CONFIG_DATA);
        /* Bit 5: EPP+ECP enable; bit 7: PS/2 bidirectional port enable */
        outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
-       tmp2 = inb (VIA_CONFIG_DATA);
-       if (siofunc == VIA_FUNCTION_PROBE)
-       {
-           siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE;
-           ppcontrol = tmp2;
+       tmp2 = inb(VIA_CONFIG_DATA);
+       if (siofunc == VIA_FUNCTION_PROBE) {
+               siofunc = tmp & VIA_FUNCTION_PARPORT_DISABLE;
+               ppcontrol = tmp2;
+       } else {
+               tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
+               tmp |= siofunc;
+               outb(via->viacfg_function, VIA_CONFIG_INDEX);
+               outb(tmp, VIA_CONFIG_DATA);
+               tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
+               tmp2 |= ppcontrol;
+               outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
+               outb(tmp2, VIA_CONFIG_DATA);
        }
-       else
-       {
-           tmp &= ~VIA_FUNCTION_PARPORT_DISABLE;
-           tmp |= siofunc;
-           outb(via->viacfg_function, VIA_CONFIG_INDEX);
-           outb(tmp, VIA_CONFIG_DATA);
-           tmp2 &= ~(VIA_PARPORT_BIDIR|VIA_PARPORT_ECPEPP);
-           tmp2 |= ppcontrol;
-           outb(via->viacfg_parport_control, VIA_CONFIG_INDEX);
-           outb(tmp2, VIA_CONFIG_DATA);
-       }
-       
+
        /* Parallel Port I/O Base Address, bits 9-2 */
        outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
        port1 = inb(VIA_CONFIG_DATA) << 2;
-       
-       printk (KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",port1);
-       if ((port1 == 0x3BC) && have_epp)
-       {
-           outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
-           outb((0x378 >> 2), VIA_CONFIG_DATA);
-           printk(KERN_DEBUG "parport_pc: Parallel port base changed to 0x378\n");
-           port1 = 0x378;
+
+       printk(KERN_DEBUG "parport_pc: Current parallel port base: 0x%X\n",
+                                                                       port1);
+       if (port1 == 0x3BC && have_epp) {
+               outb(via->viacfg_parport_base, VIA_CONFIG_INDEX);
+               outb((0x378 >> 2), VIA_CONFIG_DATA);
+               printk(KERN_DEBUG
+                       "parport_pc: Parallel port base changed to 0x378\n");
+               port1 = 0x378;
        }
 
        /*
@@ -2667,36 +2749,39 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
                printk(KERN_INFO "parport_pc: VIA parallel port disabled in BIOS\n");
                return 0;
        }
-       
+
        /* Bits 7-4: PnP Routing for Parallel Port IRQ */
        pci_read_config_byte(pdev, via->via_pci_parport_irq_reg, &tmp);
        irq = ((tmp & VIA_IRQCONTROL_PARALLEL) >> 4);
 
-       if (siofunc == VIA_FUNCTION_PARPORT_ECP)
-       {
-           /* Bits 3-2: PnP Routing for Parallel Port DMA */
-           pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp);
-           dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2);
-       }
-       else
-           /* if ECP not enabled, DMA is not enabled, assumed bogus 'dma' value */
-           dma = PARPORT_DMA_NONE;
+       if (siofunc == VIA_FUNCTION_PARPORT_ECP) {
+               /* Bits 3-2: PnP Routing for Parallel Port DMA */
+               pci_read_config_byte(pdev, via->via_pci_parport_dma_reg, &tmp);
+               dma = ((tmp & VIA_DMACONTROL_PARALLEL) >> 2);
+       } else
+               /* if ECP not enabled, DMA is not enabled, assumed
+                  bogus 'dma' value */
+               dma = PARPORT_DMA_NONE;
 
        /* Let the user (or defaults) steer us away from interrupts and DMA */
        if (autoirq == PARPORT_IRQ_NONE) {
-           irq = PARPORT_IRQ_NONE;
-           dma = PARPORT_DMA_NONE;
+               irq = PARPORT_IRQ_NONE;
+               dma = PARPORT_DMA_NONE;
        }
        if (autodma == PARPORT_DMA_NONE)
-           dma = PARPORT_DMA_NONE;
+               dma = PARPORT_DMA_NONE;
 
        switch (port1) {
-       case 0x3bc: port2 = 0x7bc; break;
-       case 0x378: port2 = 0x778; break;
-       case 0x278: port2 = 0x678; break;
+       case 0x3bc:
+               port2 = 0x7bc; break;
+       case 0x378:
+               port2 = 0x778; break;
+       case 0x278:
+               port2 = 0x678; break;
        default:
-               printk(KERN_INFO "parport_pc: Weird VIA parport base 0x%X, ignoring\n",
-                       port1);
+               printk(KERN_INFO
+                       "parport_pc: Weird VIA parport base 0x%X, ignoring\n",
+                                                                       port1);
                return 0;
        }
 
@@ -2714,17 +2799,17 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
        }
 
        /* finally, do the probe with values obtained */
-       if (parport_pc_probe_port (port1, port2, irq, dma, &pdev->dev, 0)) {
-               printk (KERN_INFO
+       if (parport_pc_probe_port(port1, port2, irq, dma, &pdev->dev, 0)) {
+               printk(KERN_INFO
                        "parport_pc: VIA parallel port: io=0x%X", port1);
                if (irq != PARPORT_IRQ_NONE)
-                       printk (", irq=%d", irq);
+                       printk(", irq=%d", irq);
                if (dma != PARPORT_DMA_NONE)
-                       printk (", dma=%d", dma);
-               printk ("\n");
+                       printk(", dma=%d", dma);
+               printk("\n");
                return 1;
        }
-       
+
        printk(KERN_WARNING "parport_pc: Strange, can't probe VIA parallel port: io=0x%X, irq=%d, dma=%d\n",
                port1, irq, dma);
        return 0;
@@ -2732,8 +2817,8 @@ static int __devinit sio_via_probe (struct pci_dev *pdev, int autoirq,
 
 
 enum parport_pc_sio_types {
-       sio_via_686a = 0,       /* Via VT82C686A motherboard Super I/O */
-       sio_via_8231,           /* Via VT8231 south bridge integrated Super IO */
+       sio_via_686a = 0,   /* Via VT82C686A motherboard Super I/O */
+       sio_via_8231,       /* Via VT8231 south bridge integrated Super IO */
        sio_ite_8872,
        last_sio
 };
@@ -2804,15 +2889,15 @@ enum parport_pc_pci_cards {
 };
 
 
-/* each element directly indexed from enum list, above 
+/* each element directly indexed from enum list, above
  * (but offset by last_sio) */
 static struct parport_pc_pci {
        int numports;
        struct { /* BAR (base address registers) numbers in the config
-                    space header */
+                   space header */
                int lo;
-               int hi; /* -1 if not there, >6 for offset-method (max
-                           BAR is 6) */
+               int hi;
+               /* -1 if not there, >6 for offset-method (max BAR is 6) */
        } addr[4];
 
        /* If set, this is called immediately after pci_enable_device.
@@ -2857,7 +2942,7 @@ static struct parport_pc_pci {
        /* timedia_4018  */             { 2, { { 0, 1 }, { 2, 3 }, } },
        /* timedia_9018a */             { 2, { { 0, 1 }, { 2, 3 }, } },
                                        /* SYBA uses fixed offsets in
-                                           a 1K io window */
+                                          a 1K io window */
        /* syba_2p_epp AP138B */        { 2, { { 0, 0x078 }, { 0, 0x178 }, } },
        /* syba_1p_ecp W83787 */        { 1, { { 0, 0x078 }, } },
        /* titan_010l */                { 1, { { 3, -1 }, } },
@@ -2873,11 +2958,14 @@ static struct parport_pc_pci {
        /* oxsemi_pcie_pport */         { 1, { { 0, 1 }, } },
        /* aks_0100 */                  { 1, { { 0, -1 }, } },
        /* mobility_pp */               { 1, { { 0, 1 }, } },
-       /* netmos_9705 */               { 1, { { 0, -1 }, } }, /* untested */
-        /* netmos_9715 */               { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */
-        /* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} }, /* untested */
-       /* netmos_9805 */               { 1, { { 0, -1 }, } }, /* untested */
-       /* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } }, /* untested */
+
+       /* The netmos entries below are untested */
+       /* netmos_9705 */               { 1, { { 0, -1 }, } },
+       /* netmos_9715 */               { 2, { { 0, 1 }, { 2, 3 },} },
+       /* netmos_9755 */               { 2, { { 0, 1 }, { 2, 3 },} },
+       /* netmos_9805 */               { 1, { { 0, -1 }, } },
+       /* netmos_9815 */               { 2, { { 0, -1 }, { 2, -1 }, } },
+
        /* quatech_sppxp100 */          { 1, { { 0, 1 }, } },
 };
 
@@ -2906,7 +2994,7 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
        { PCI_VENDOR_ID_LAVA, PCI_DEVICE_ID_LAVA_BOCA_IOPPAR,
          PCI_ANY_ID, PCI_ANY_ID, 0, 0, boca_ioppar },
        { PCI_VENDOR_ID_PLX, PCI_DEVICE_ID_PLX_9050,
-         PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0,0, plx_9050 },
+         PCI_SUBVENDOR_ID_EXSYS, PCI_SUBDEVICE_ID_EXSYS_4014, 0, 0, plx_9050 },
        /* PCI_VENDOR_ID_TIMEDIA/SUNIX has many differing cards ...*/
        { 0x1409, 0x7168, 0x1409, 0x4078, 0, 0, timedia_4078a },
        { 0x1409, 0x7168, 0x1409, 0x4079, 0, 0, timedia_4079h },
@@ -2940,7 +3028,8 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
        { 0x9710, 0x9805, 0x1000, 0x0010, 0, 0, titan_1284p1 },
        { 0x9710, 0x9815, 0x1000, 0x0020, 0, 0, titan_1284p2 },
        /* PCI_VENDOR_ID_AVLAB/Intek21 has another bunch of cards ...*/
-       { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p}, /* AFAVLAB_TK9902 */
+       /* AFAVLAB_TK9902 */
+       { 0x14db, 0x2120, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_1p},
        { 0x14db, 0x2121, PCI_ANY_ID, PCI_ANY_ID, 0, 0, avlab_2p},
        { PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI952PP,
          PCI_ANY_ID, PCI_ANY_ID, 0, 0, oxsemi_952 },
@@ -2983,14 +3072,14 @@ static const struct pci_device_id parport_pc_pci_tbl[] = {
          PCI_ANY_ID, PCI_ANY_ID, 0, 0, quatech_sppxp100 },
        { 0, } /* terminate list */
 };
-MODULE_DEVICE_TABLE(pci,parport_pc_pci_tbl);
+MODULE_DEVICE_TABLE(pci, parport_pc_pci_tbl);
 
 struct pci_parport_data {
        int num;
        struct parport *ports[2];
 };
 
-static int parport_pc_pci_probe (struct pci_dev *dev,
+static int parport_pc_pci_probe(struct pci_dev *dev,
                                           const struct pci_device_id *id)
 {
        int err, count, n, i = id->driver_data;
@@ -3003,7 +3092,8 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
        /* This is a PCI card */
        i -= last_sio;
        count = 0;
-       if ((err = pci_enable_device (dev)) != 0)
+       err = pci_enable_device(dev);
+       if (err)
                return err;
 
        data = kmalloc(sizeof(struct pci_parport_data), GFP_KERNEL);
@@ -3011,7 +3101,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
                return -ENOMEM;
 
        if (cards[i].preinit_hook &&
-           cards[i].preinit_hook (dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) {
+           cards[i].preinit_hook(dev, PARPORT_IRQ_NONE, PARPORT_DMA_NONE)) {
                kfree(data);
                return -ENODEV;
        }
@@ -3021,25 +3111,25 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
                int hi = cards[i].addr[n].hi;
                int irq;
                unsigned long io_lo, io_hi;
-               io_lo = pci_resource_start (dev, lo);
+               io_lo = pci_resource_start(dev, lo);
                io_hi = 0;
                if ((hi >= 0) && (hi <= 6))
-                       io_hi = pci_resource_start (dev, hi);
+                       io_hi = pci_resource_start(dev, hi);
                else if (hi > 6)
                        io_lo += hi; /* Reinterpret the meaning of
-                                        "hi" as an offset (see SYBA
-                                        def.) */
+                                       "hi" as an offset (see SYBA
+                                       def.) */
                /* TODO: test if sharing interrupts works */
                irq = dev->irq;
                if (irq == IRQ_NONE) {
-                       printk (KERN_DEBUG
+                       printk(KERN_DEBUG
        "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx)\n",
                                parport_pc_pci_tbl[i + last_sio].vendor,
                                parport_pc_pci_tbl[i + last_sio].device,
                                io_lo, io_hi);
                        irq = PARPORT_IRQ_NONE;
                } else {
-                       printk (KERN_DEBUG
+                       printk(KERN_DEBUG
        "PCI parallel port detected: %04x:%04x, I/O at %#lx(%#lx), IRQ %d\n",
                                parport_pc_pci_tbl[i + last_sio].vendor,
                                parport_pc_pci_tbl[i + last_sio].device,
@@ -3056,7 +3146,7 @@ static int parport_pc_pci_probe (struct pci_dev *dev,
        data->num = count;
 
        if (cards[i].postinit_hook)
-               cards[i].postinit_hook (dev, count == 0);
+               cards[i].postinit_hook(dev, count == 0);
 
        if (count) {
                pci_set_drvdata(dev, data);
@@ -3090,7 +3180,7 @@ static struct pci_driver parport_pc_pci_driver = {
        .remove         = __devexit_p(parport_pc_pci_remove),
 };
 
-static int __init parport_pc_init_superio (int autoirq, int autodma)
+static int __init parport_pc_init_superio(int autoirq, int autodma)
 {
        const struct pci_device_id *id;
        struct pci_dev *pdev = NULL;
@@ -3101,8 +3191,9 @@ static int __init parport_pc_init_superio (int autoirq, int autodma)
                if (id == NULL || id->driver_data >= last_sio)
                        continue;
 
-               if (parport_pc_superio_info[id->driver_data].probe
-                       (pdev, autoirq, autodma,parport_pc_superio_info[id->driver_data].via)) {
+               if (parport_pc_superio_info[id->driver_data].probe(
+                       pdev, autoirq, autodma,
+                       parport_pc_superio_info[id->driver_data].via)) {
                        ret++;
                }
        }
@@ -3111,7 +3202,10 @@ static int __init parport_pc_init_superio (int autoirq, int autodma)
 }
 #else
 static struct pci_driver parport_pc_pci_driver;
-static int __init parport_pc_init_superio(int autoirq, int autodma) {return 0;}
+static int __init parport_pc_init_superio(int autoirq, int autodma)
+{
+       return 0;
+}
 #endif /* CONFIG_PCI */
 
 #ifdef CONFIG_PNP
@@ -3124,44 +3218,45 @@ static const struct pnp_device_id parport_pc_pnp_tbl[] = {
        { }
 };
 
-MODULE_DEVICE_TABLE(pnp,parport_pc_pnp_tbl);
+MODULE_DEVICE_TABLE(pnp, parport_pc_pnp_tbl);
 
-static int parport_pc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id)
+static int parport_pc_pnp_probe(struct pnp_dev *dev,
+                                               const struct pnp_device_id *id)
 {
        struct parport *pdata;
        unsigned long io_lo, io_hi;
        int dma, irq;
 
-       if (pnp_port_valid(dev,0) &&
-               !(pnp_port_flags(dev,0) & IORESOURCE_DISABLED)) {
-               io_lo = pnp_port_start(dev,0);
+       if (pnp_port_valid(dev, 0) &&
+               !(pnp_port_flags(dev, 0) & IORESOURCE_DISABLED)) {
+               io_lo = pnp_port_start(dev, 0);
        } else
                return -EINVAL;
 
-       if (pnp_port_valid(dev,1) &&
-               !(pnp_port_flags(dev,1) & IORESOURCE_DISABLED)) {
-               io_hi = pnp_port_start(dev,1);
+       if (pnp_port_valid(dev, 1) &&
+               !(pnp_port_flags(dev, 1) & IORESOURCE_DISABLED)) {
+               io_hi = pnp_port_start(dev, 1);
        } else
                io_hi = 0;
 
-       if (pnp_irq_valid(dev,0) &&
-               !(pnp_irq_flags(dev,0) & IORESOURCE_DISABLED)) {
-               irq = pnp_irq(dev,0);
+       if (pnp_irq_valid(dev, 0) &&
+               !(pnp_irq_flags(dev, 0) & IORESOURCE_DISABLED)) {
+               irq = pnp_irq(dev, 0);
        } else
                irq = PARPORT_IRQ_NONE;
 
-       if (pnp_dma_valid(dev,0) &&
-               !(pnp_dma_flags(dev,0) & IORESOURCE_DISABLED)) {
-               dma = pnp_dma(dev,0);
+       if (pnp_dma_valid(dev, 0) &&
+               !(pnp_dma_flags(dev, 0) & IORESOURCE_DISABLED)) {
+               dma = pnp_dma(dev, 0);
        } else
                dma = PARPORT_DMA_NONE;
 
        dev_info(&dev->dev, "reported by %s\n", dev->protocol->name);
-       if (!(pdata = parport_pc_probe_port(io_lo, io_hi,
-                                       irq, dma, &dev->dev, 0)))
+       pdata = parport_pc_probe_port(io_lo, io_hi, irq, dma, &dev->dev, 0);
+       if (pdata == NULL)
                return -ENODEV;
 
-       pnp_set_drvdata(dev,pdata);
+       pnp_set_drvdata(dev, pdata);
        return 0;
 }
 
@@ -3203,7 +3298,7 @@ static struct platform_driver parport_pc_platform_driver = {
 
 /* This is called by parport_pc_find_nonpci_ports (in asm/parport.h) */
 static int __devinit __attribute__((unused))
-parport_pc_find_isa_ports (int autoirq, int autodma)
+parport_pc_find_isa_ports(int autoirq, int autodma)
 {
        int count = 0;
 
@@ -3227,7 +3322,7 @@ parport_pc_find_isa_ports (int autoirq, int autodma)
  * autoirq is PARPORT_IRQ_NONE, PARPORT_IRQ_AUTO, or PARPORT_IRQ_PROBEONLY
  * autodma is PARPORT_DMA_NONE or PARPORT_DMA_AUTO
  */
-static void __init parport_pc_find_ports (int autoirq, int autodma)
+static void __init parport_pc_find_ports(int autoirq, int autodma)
 {
        int count = 0, err;
 
@@ -3261,11 +3356,18 @@ static void __init parport_pc_find_ports (int autoirq, int autodma)
  *     syntax and keep in mind that code below is a cleaned up version.
  */
 
-static int __initdata io[PARPORT_PC_MAX_PORTS+1] = { [0 ... PARPORT_PC_MAX_PORTS] = 0 };
-static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] =
-       { [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO };
-static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE };
-static int __initdata irqval[PARPORT_PC_MAX_PORTS] = { [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY };
+static int __initdata io[PARPORT_PC_MAX_PORTS+1] = {
+       [0 ... PARPORT_PC_MAX_PORTS] = 0
+};
+static int __initdata io_hi[PARPORT_PC_MAX_PORTS+1] = {
+       [0 ... PARPORT_PC_MAX_PORTS] = PARPORT_IOHI_AUTO
+};
+static int __initdata dmaval[PARPORT_PC_MAX_PORTS] = {
+       [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_DMA_NONE
+};
+static int __initdata irqval[PARPORT_PC_MAX_PORTS] = {
+       [0 ... PARPORT_PC_MAX_PORTS-1] = PARPORT_IRQ_PROBEONLY
+};
 
 static int __init parport_parse_param(const char *s, int *val,
                                int automatic, int none, int nofifo)
@@ -3306,18 +3408,19 @@ static int __init parport_parse_dma(const char *dmastr, int *val)
 #ifdef CONFIG_PCI
 static int __init parport_init_mode_setup(char *str)
 {
-       printk(KERN_DEBUG "parport_pc.c: Specified parameter parport_init_mode=%s\n", str);
-
-       if (!strcmp (str, "spp"))
-               parport_init_mode=1;
-       if (!strcmp (str, "ps2"))
-               parport_init_mode=2;
-       if (!strcmp (str, "epp"))
-               parport_init_mode=3;
-       if (!strcmp (str, "ecp"))
-               parport_init_mode=4;
-       if (!strcmp (str, "ecpepp"))
-               parport_init_mode=5;
+       printk(KERN_DEBUG
+            "parport_pc.c: Specified parameter parport_init_mode=%s\n", str);
+
+       if (!strcmp(str, "spp"))
+               parport_init_mode = 1;
+       if (!strcmp(str, "ps2"))
+               parport_init_mode = 2;
+       if (!strcmp(str, "epp"))
+               parport_init_mode = 3;
+       if (!strcmp(str, "ecp"))
+               parport_init_mode = 4;
+       if (!strcmp(str, "ecpepp"))
+               parport_init_mode = 5;
        return 1;
 }
 #endif
@@ -3341,7 +3444,8 @@ module_param(verbose_probing, int, 0644);
 #endif
 #ifdef CONFIG_PCI
 static char *init_mode;
-MODULE_PARM_DESC(init_mode, "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)");
+MODULE_PARM_DESC(init_mode,
+       "Initialise mode for VIA VT8231 port (spp, ps2, epp, ecp or ecpepp)");
 module_param(init_mode, charp, 0);
 #endif
 
@@ -3372,7 +3476,7 @@ static int __init parse_parport_params(void)
                                irqval[0] = val;
                                break;
                        default:
-                               printk (KERN_WARNING
+                               printk(KERN_WARNING
                                        "parport_pc: irq specified "
                                        "without base address.  Use 'io=' "
                                        "to specify one\n");
@@ -3385,7 +3489,7 @@ static int __init parse_parport_params(void)
                                dmaval[0] = val;
                                break;
                        default:
-                               printk (KERN_WARNING
+                               printk(KERN_WARNING
                                        "parport_pc: dma specified "
                                        "without base address.  Use 'io=' "
                                        "to specify one\n");
@@ -3396,7 +3500,7 @@ static int __init parse_parport_params(void)
 
 #else
 
-static int parport_setup_ptr __initdata = 0;
+static int parport_setup_ptr __initdata;
 
 /*
  * Acceptable parameters:
@@ -3407,7 +3511,7 @@ static int parport_setup_ptr __initdata = 0;
  *
  * IRQ/DMA may be numeric or 'auto' or 'none'
  */
-static int __init parport_setup (char *str)
+static int __init parport_setup(char *str)
 {
        char *endptr;
        char *sep;
@@ -3419,15 +3523,15 @@ static int __init parport_setup (char *str)
                return 1;
        }
 
-       if (!strncmp (str, "auto", 4)) {
+       if (!strncmp(str, "auto", 4)) {
                irqval[0] = PARPORT_IRQ_AUTO;
                dmaval[0] = PARPORT_DMA_AUTO;
                return 1;
        }
 
-       val = simple_strtoul (str, &endptr, 0);
+       val = simple_strtoul(str, &endptr, 0);
        if (endptr == str) {
-               printk (KERN_WARNING "parport=%s not understood\n", str);
+               printk(KERN_WARNING "parport=%s not understood\n", str);
                return 1;
        }
 
@@ -3461,7 +3565,7 @@ static int __init parse_parport_params(void)
        return io[0] == PARPORT_DISABLE;
 }
 
-__setup ("parport=", parport_setup);
+__setup("parport=", parport_setup);
 
 /*
  * Acceptable parameters:
@@ -3469,7 +3573,7 @@ __setup ("parport=", parport_setup);
  * parport_init_mode=[spp|ps2|epp|ecp|ecpepp]
  */
 #ifdef CONFIG_PCI
-__setup("parport_init_mode=",parport_init_mode_setup);
+__setup("parport_init_mode=", parport_init_mode_setup);
 #endif
 #endif
 
@@ -3493,13 +3597,13 @@ static int __init parport_pc_init(void)
                for (i = 0; i < PARPORT_PC_MAX_PORTS; i++) {
                        if (!io[i])
                                break;
-                       if ((io_hi[i]) == PARPORT_IOHI_AUTO)
-                              io_hi[i] = 0x400 + io[i];
+                       if (io_hi[i] == PARPORT_IOHI_AUTO)
+                               io_hi[i] = 0x400 + io[i];
                        parport_pc_probe_port(io[i], io_hi[i],
-                                         irqval[i], dmaval[i], NULL, 0);
+                                       irqval[i], dmaval[i], NULL, 0);
                }
        } else
-               parport_pc_find_ports (irqval[0], dmaval[0]);
+               parport_pc_find_ports(irqval[0], dmaval[0]);
 
        return 0;
 }
@@ -3507,9 +3611,9 @@ static int __init parport_pc_init(void)
 static void __exit parport_pc_exit(void)
 {
        if (pci_registered_parport)
-               pci_unregister_driver (&parport_pc_pci_driver);
+               pci_unregister_driver(&parport_pc_pci_driver);
        if (pnp_registered_parport)
-               pnp_unregister_driver (&parport_pc_pnp_driver);
+               pnp_unregister_driver(&parport_pc_pnp_driver);
        platform_driver_unregister(&parport_pc_platform_driver);
 
        while (!list_empty(&ports_list)) {
index dd18f85..42e4260 100644 (file)
@@ -153,45 +153,47 @@ int ibmphp_init_devno(struct slot **cur_slot)
                return -1;
        }
        for (loop = 0; loop < len; loop++) {
-               if ((*cur_slot)->number == rtable->slots[loop].slot) {
-               if ((*cur_slot)->bus == rtable->slots[loop].bus) {
+               if ((*cur_slot)->number == rtable->slots[loop].slot &&
+                   (*cur_slot)->bus == rtable->slots[loop].bus) {
+                       struct io_apic_irq_attr irq_attr;
+
                        (*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
                        for (i = 0; i < 4; i++)
                                (*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
-                                               (int) (*cur_slot)->device, i);
-
-                               debug("(*cur_slot)->irq[0] = %x\n",
-                                               (*cur_slot)->irq[0]);
-                               debug("(*cur_slot)->irq[1] = %x\n",
-                                               (*cur_slot)->irq[1]);
-                               debug("(*cur_slot)->irq[2] = %x\n",
-                                               (*cur_slot)->irq[2]);
-                               debug("(*cur_slot)->irq[3] = %x\n",
-                                               (*cur_slot)->irq[3]);
-
-                               debug("rtable->exlusive_irqs = %x\n",
+                                               (int) (*cur_slot)->device, i,
+                                               &irq_attr);
+
+                       debug("(*cur_slot)->irq[0] = %x\n",
+                                       (*cur_slot)->irq[0]);
+                       debug("(*cur_slot)->irq[1] = %x\n",
+                                       (*cur_slot)->irq[1]);
+                       debug("(*cur_slot)->irq[2] = %x\n",
+                                       (*cur_slot)->irq[2]);
+                       debug("(*cur_slot)->irq[3] = %x\n",
+                                       (*cur_slot)->irq[3]);
+
+                       debug("rtable->exlusive_irqs = %x\n",
                                        rtable->exclusive_irqs);
-                               debug("rtable->slots[loop].irq[0].bitmap = %x\n",
+                       debug("rtable->slots[loop].irq[0].bitmap = %x\n",
                                        rtable->slots[loop].irq[0].bitmap);
-                               debug("rtable->slots[loop].irq[1].bitmap = %x\n",
+                       debug("rtable->slots[loop].irq[1].bitmap = %x\n",
                                        rtable->slots[loop].irq[1].bitmap);
-                               debug("rtable->slots[loop].irq[2].bitmap = %x\n",
+                       debug("rtable->slots[loop].irq[2].bitmap = %x\n",
                                        rtable->slots[loop].irq[2].bitmap);
-                               debug("rtable->slots[loop].irq[3].bitmap = %x\n",
+                       debug("rtable->slots[loop].irq[3].bitmap = %x\n",
                                        rtable->slots[loop].irq[3].bitmap);
 
-                               debug("rtable->slots[loop].irq[0].link = %x\n",
+                       debug("rtable->slots[loop].irq[0].link = %x\n",
                                        rtable->slots[loop].irq[0].link);
-                               debug("rtable->slots[loop].irq[1].link = %x\n",
+                       debug("rtable->slots[loop].irq[1].link = %x\n",
                                        rtable->slots[loop].irq[1].link);
-                               debug("rtable->slots[loop].irq[2].link = %x\n",
+                       debug("rtable->slots[loop].irq[2].link = %x\n",
                                        rtable->slots[loop].irq[2].link);
-                               debug("rtable->slots[loop].irq[3].link = %x\n",
+                       debug("rtable->slots[loop].irq[3].link = %x\n",
                                        rtable->slots[loop].irq[3].link);
-                               debug("end of init_devno\n");
-                               kfree(rtable);
-                               return 0;
-                       }
+                       debug("end of init_devno\n");
+                       kfree(rtable);
+                       return 0;
                }
        }
 
index 6808d83..737a1c4 100644 (file)
@@ -98,6 +98,7 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
        int max_irq;
        int pos;
        int irq;
+       int node;
 
        pos = pci_find_ht_capability(dev, HT_CAPTYPE_IRQ);
        if (!pos)
@@ -125,7 +126,8 @@ int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
        cfg->msg.address_lo = 0xffffffff;
        cfg->msg.address_hi = 0xffffffff;
 
-       irq = create_irq();
+       node = dev_to_node(&dev->dev);
+       irq = create_irq_nr(0, node);
 
        if (irq <= 0) {
                kfree(cfg);
index a563fbe..cd38916 100644 (file)
@@ -1972,15 +1972,6 @@ static int __init init_dmars(void)
                }
        }
 
-#ifdef CONFIG_INTR_REMAP
-       if (!intr_remapping_enabled) {
-               ret = enable_intr_remapping(0);
-               if (ret)
-                       printk(KERN_ERR
-                              "IOMMU: enable interrupt remapping failed\n");
-       }
-#endif
-
        /*
         * For each rmrr
         *   for each dev attached to rmrr
index f5e0ea7..3a0cb0b 100644 (file)
@@ -15,6 +15,14 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
 static int ir_ioapic_num;
 int intr_remapping_enabled;
 
+static int disable_intremap;
+static __init int setup_nointremap(char *str)
+{
+       disable_intremap = 1;
+       return 0;
+}
+early_param("nointremap", setup_nointremap);
+
 struct irq_2_iommu {
        struct intel_iommu *iommu;
        u16 irte_index;
@@ -23,15 +31,12 @@ struct irq_2_iommu {
 };
 
 #ifdef CONFIG_GENERIC_HARDIRQS
-static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu)
+static struct irq_2_iommu *get_one_free_irq_2_iommu(int node)
 {
        struct irq_2_iommu *iommu;
-       int node;
-
-       node = cpu_to_node(cpu);
 
        iommu = kzalloc_node(sizeof(*iommu), GFP_ATOMIC, node);
-       printk(KERN_DEBUG "alloc irq_2_iommu on cpu %d node %d\n", cpu, node);
+       printk(KERN_DEBUG "alloc irq_2_iommu on node %d\n", node);
 
        return iommu;
 }
@@ -48,7 +53,7 @@ static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
        return desc->irq_2_iommu;
 }
 
-static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
+static struct irq_2_iommu *irq_2_iommu_alloc_node(unsigned int irq, int node)
 {
        struct irq_desc *desc;
        struct irq_2_iommu *irq_iommu;
@@ -56,7 +61,7 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
        /*
         * alloc irq desc if not allocated already.
         */
-       desc = irq_to_desc_alloc_cpu(irq, cpu);
+       desc = irq_to_desc_alloc_node(irq, node);
        if (!desc) {
                printk(KERN_INFO "can not get irq_desc for %d\n", irq);
                return NULL;
@@ -65,14 +70,14 @@ static struct irq_2_iommu *irq_2_iommu_alloc_cpu(unsigned int irq, int cpu)
        irq_iommu = desc->irq_2_iommu;
 
        if (!irq_iommu)
-               desc->irq_2_iommu = get_one_free_irq_2_iommu(cpu);
+               desc->irq_2_iommu = get_one_free_irq_2_iommu(node);
 
        return desc->irq_2_iommu;
 }
 
 static struct irq_2_iommu *irq_2_iommu_alloc(unsigned int irq)
 {
-       return irq_2_iommu_alloc_cpu(irq, boot_cpu_id);
+       return irq_2_iommu_alloc_node(irq, cpu_to_node(boot_cpu_id));
 }
 
 #else /* !CONFIG_SPARSE_IRQ */
@@ -423,20 +428,6 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
                      readl, (sts & DMA_GSTS_IRTPS), sts);
        spin_unlock_irqrestore(&iommu->register_lock, flags);
 
-       if (mode == 0) {
-               spin_lock_irqsave(&iommu->register_lock, flags);
-
-               /* enable comaptiblity format interrupt pass through */
-               cmd = iommu->gcmd | DMA_GCMD_CFI;
-               iommu->gcmd |= DMA_GCMD_CFI;
-               writel(cmd, iommu->reg + DMAR_GCMD_REG);
-
-               IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
-                             readl, (sts & DMA_GSTS_CFIS), sts);
-
-               spin_unlock_irqrestore(&iommu->register_lock, flags);
-       }
-
        /*
         * global invalidation of interrupt entry cache before enabling
         * interrupt-remapping.
@@ -516,6 +507,23 @@ end:
        spin_unlock_irqrestore(&iommu->register_lock, flags);
 }
 
+int __init intr_remapping_supported(void)
+{
+       struct dmar_drhd_unit *drhd;
+
+       if (disable_intremap)
+               return 0;
+
+       for_each_drhd_unit(drhd) {
+               struct intel_iommu *iommu = drhd->iommu;
+
+               if (!ecap_ir_support(iommu->ecap))
+                       return 0;
+       }
+
+       return 1;
+}
+
 int __init enable_intr_remapping(int eim)
 {
        struct dmar_drhd_unit *drhd;
index e3c3e08..f1ae247 100644 (file)
@@ -745,6 +745,8 @@ int pci_setup_device(struct pci_dev *dev)
 
        /* Early fixups, before probing the BARs */
        pci_fixup_device(pci_fixup_early, dev);
+       /* device class may be changed after fixup */
+       class = dev->class >> 8;
 
        switch (dev->hdr_type) {                    /* header type */
        case PCI_HEADER_TYPE_NORMAL:                /* standard header */
index adf1785..7f207f3 100644 (file)
@@ -123,7 +123,7 @@ static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev,
        }
 
        flags = irq_flags(triggering, polarity, shareable);
-       irq = acpi_register_gsi(gsi, triggering, polarity);
+       irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity);
        if (irq >= 0)
                pcibios_penalize_isa_irq(irq, 1);
        else
index e1716f1..91e316f 100644 (file)
@@ -1065,6 +1065,7 @@ sg_ioctl(struct inode *inode, struct file *filp,
                return blk_trace_setup(sdp->device->request_queue,
                                       sdp->disk->disk_name,
                                       MKDEV(SCSI_GENERIC_MAJOR, sdp->index),
+                                      NULL,
                                       (char *)arg);
        case BLKTRACESTART:
                return blk_trace_startstop(sdp->device->request_queue, 1);
index a0127e9..fb867a9 100644 (file)
@@ -287,6 +287,13 @@ static const struct serial8250_config uart_config[] = {
                .fcr            = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
                .flags          = UART_CAP_FIFO,
        },
+       [PORT_AR7] = {
+               .name           = "AR7",
+               .fifo_size      = 16,
+               .tx_loadsz      = 16,
+               .fcr            = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_00,
+               .flags          = UART_CAP_FIFO | UART_CAP_AFE,
+       },
 };
 
 #if defined (CONFIG_SERIAL_8250_AU1X00)
index 938bc1b..e371a9c 100644 (file)
@@ -2776,6 +2776,9 @@ static struct pci_device_id serial_pci_tbl[] = {
        {       PCI_VENDOR_ID_OXSEMI, 0x950a,
                PCI_ANY_ID, PCI_ANY_ID, 0, 0,
                pbn_b0_2_1130000 },
+       {       PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_C950,
+               PCI_VENDOR_ID_OXSEMI, PCI_SUBDEVICE_ID_OXSEMI_C950, 0, 0,
+               pbn_b0_1_921600 },
        {       PCI_VENDOR_ID_OXSEMI, PCI_DEVICE_ID_OXSEMI_16PCI954,
                PCI_ANY_ID, PCI_ANY_ID, 0, 0,
                pbn_b0_4_115200 },
index 343e3a3..641e800 100644 (file)
@@ -833,6 +833,7 @@ config SERIAL_IMX
        bool "IMX serial port support"
        depends on ARM && (ARCH_IMX || ARCH_MXC)
        select SERIAL_CORE
+       select RATIONAL
        help
          If you have a machine based on a Motorola IMX CPU you
          can enable its onboard serial port by enabling this option.
@@ -1433,4 +1434,11 @@ config SPORT_BAUD_RATE
        default 19200 if (SERIAL_SPORT_BAUD_RATE_19200)
        default 9600 if (SERIAL_SPORT_BAUD_RATE_9600)
 
+config SERIAL_TIMBERDALE
+       tristate "Support for timberdale UART"
+       depends on MFD_TIMBERDALE
+       select SERIAL_CORE
+       ---help---
+       Add support for UART controller on timberdale.
+
 endmenu
index d438eb2..45a8658 100644 (file)
@@ -77,3 +77,4 @@ obj-$(CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL) += nwpserial.o
 obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
 obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
 obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
+obj-$(CONFIG_SERIAL_TIMBERDALE)        += timbuart.o
index d86123e..e2f6b1b 100644 (file)
@@ -330,6 +330,11 @@ static void bfin_serial_tx_chars(struct bfin_serial_port *uart)
                /* Clear TFI bit */
                UART_PUT_LSR(uart, TFI);
 #endif
+               /* Anomaly notes:
+                *  05000215 -  we always clear ETBEI within last UART TX
+                *              interrupt to end a string. It is always set
+                *              when start a new tx.
+                */
                UART_CLEAR_IER(uart, ETBEI);
                return;
        }
@@ -415,6 +420,7 @@ static void bfin_serial_dma_tx_chars(struct bfin_serial_port *uart)
        set_dma_start_addr(uart->tx_dma_channel, (unsigned long)(xmit->buf+xmit->tail));
        set_dma_x_count(uart->tx_dma_channel, uart->tx_count);
        set_dma_x_modify(uart->tx_dma_channel, 1);
+       SSYNC();
        enable_dma(uart->tx_dma_channel);
 
        UART_SET_IER(uart, ETBEI);
@@ -473,27 +479,41 @@ static void bfin_serial_dma_rx_chars(struct bfin_serial_port *uart)
 void bfin_serial_rx_dma_timeout(struct bfin_serial_port *uart)
 {
        int x_pos, pos;
-       unsigned long flags;
-
-       spin_lock_irqsave(&uart->port.lock, flags);
 
+       dma_disable_irq(uart->rx_dma_channel);
+       spin_lock_bh(&uart->port.lock);
+
+       /* 2D DMA RX buffer ring is used. Because curr_y_count and
+        * curr_x_count can't be read as an atomic operation,
+        * curr_y_count should be read before curr_x_count. When
+        * curr_x_count is read, curr_y_count may already indicate
+        * next buffer line. But, the position calculated here is
+        * still indicate the old line. The wrong position data may
+        * be smaller than current buffer tail, which cause garbages
+        * are received if it is not prohibit.
+        */
        uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
        x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
        uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
-       if (uart->rx_dma_nrows == DMA_RX_YCOUNT)
+       if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
                uart->rx_dma_nrows = 0;
        x_pos = DMA_RX_XCOUNT - x_pos;
        if (x_pos == DMA_RX_XCOUNT)
                x_pos = 0;
 
        pos = uart->rx_dma_nrows * DMA_RX_XCOUNT + x_pos;
-       if (pos != uart->rx_dma_buf.tail) {
+       /* Ignore receiving data if new position is in the same line of
+        * current buffer tail and small.
+        */
+       if (pos > uart->rx_dma_buf.tail ||
+               uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
                uart->rx_dma_buf.head = pos;
                bfin_serial_dma_rx_chars(uart);
                uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
        }
 
-       spin_unlock_irqrestore(&uart->port.lock, flags);
+       spin_unlock_bh(&uart->port.lock);
+       dma_enable_irq(uart->rx_dma_channel);
 
        mod_timer(&(uart->rx_dma_timer), jiffies + DMA_RX_FLUSH_JIFFIES);
 }
@@ -514,6 +534,11 @@ static irqreturn_t bfin_serial_dma_tx_int(int irq, void *dev_id)
        if (!(get_dma_curr_irqstat(uart->tx_dma_channel)&DMA_RUN)) {
                disable_dma(uart->tx_dma_channel);
                clear_dma_irqstat(uart->tx_dma_channel);
+               /* Anomaly notes:
+                *  05000215 -  we always clear ETBEI within last UART TX
+                *              interrupt to end a string. It is always set
+                *              when start a new tx.
+                */
                UART_CLEAR_IER(uart, ETBEI);
                xmit->tail = (xmit->tail + uart->tx_count) & (UART_XMIT_SIZE - 1);
                uart->port.icount.tx += uart->tx_count;
@@ -532,11 +557,26 @@ static irqreturn_t bfin_serial_dma_rx_int(int irq, void *dev_id)
 {
        struct bfin_serial_port *uart = dev_id;
        unsigned short irqstat;
+       int x_pos, pos;
 
        spin_lock(&uart->port.lock);
        irqstat = get_dma_curr_irqstat(uart->rx_dma_channel);
        clear_dma_irqstat(uart->rx_dma_channel);
-       bfin_serial_dma_rx_chars(uart);
+
+       uart->rx_dma_nrows = get_dma_curr_ycount(uart->rx_dma_channel);
+       x_pos = get_dma_curr_xcount(uart->rx_dma_channel);
+       uart->rx_dma_nrows = DMA_RX_YCOUNT - uart->rx_dma_nrows;
+       if (uart->rx_dma_nrows == DMA_RX_YCOUNT || x_pos == 0)
+               uart->rx_dma_nrows = 0;
+
+       pos = uart->rx_dma_nrows * DMA_RX_XCOUNT;
+       if (pos > uart->rx_dma_buf.tail ||
+               uart->rx_dma_nrows < (uart->rx_dma_buf.tail/DMA_RX_XCOUNT)) {
+               uart->rx_dma_buf.head = pos;
+               bfin_serial_dma_rx_chars(uart);
+               uart->rx_dma_buf.tail = uart->rx_dma_buf.head;
+       }
+
        spin_unlock(&uart->port.lock);
 
        return IRQ_HANDLED;
@@ -789,8 +829,16 @@ bfin_serial_set_termios(struct uart_port *port, struct ktermios *termios,
                        __func__);
        }
 
-       if (termios->c_cflag & CSTOPB)
-               lcr |= STB;
+       /* Anomaly notes:
+        *  05000231 -  STOP bit is always set to 1 whatever the user is set.
+        */
+       if (termios->c_cflag & CSTOPB) {
+               if (ANOMALY_05000231)
+                       printk(KERN_WARNING "STOP bits other than 1 is not "
+                               "supported in case of anomaly 05000231.\n");
+               else
+                       lcr |= STB;
+       }
        if (termios->c_cflag & PARENB)
                lcr |= PEN;
        if (!(termios->c_cflag & PARODD))
@@ -940,6 +988,10 @@ static void bfin_serial_reset_irda(struct uart_port *port)
 }
 
 #ifdef CONFIG_CONSOLE_POLL
+/* Anomaly notes:
+ *  05000099 -  Because we only use THRE in poll_put and DR in poll_get,
+ *             losing other bits of UART_LSR is not a problem here.
+ */
 static void bfin_serial_poll_put_char(struct uart_port *port, unsigned char chr)
 {
        struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
@@ -1245,12 +1297,17 @@ static __init void early_serial_write(struct console *con, const char *s,
        }
 }
 
+/*
+ * This should have a .setup or .early_setup in it, but then things get called
+ * without the command line options, and the baud rate gets messed up - so
+ * don't let the common infrastructure play with things. (see calls to setup
+ * & earlysetup in ./kernel/printk.c:register_console()
+ */
 static struct __initdata console bfin_early_serial_console = {
        .name = "early_BFuart",
        .write = early_serial_write,
        .device = uart_console_device,
        .flags = CON_PRINTBUFFER,
-       .setup = bfin_serial_console_setup,
        .index = -1,
        .data  = &bfin_serial_reg,
 };
index 529c0ff..34b4ae0 100644 (file)
@@ -101,15 +101,16 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value)
 {
        pr_debug("%s value:%x\n", __func__, value);
        /* Place a Start and Stop bit */
-       __asm__ volatile (
-               "R2 = b#01111111100;\n\t"
-               "R3 = b#10000000001;\n\t"
-               "%0 <<= 2;\n\t"
-               "%0 = %0 & R2;\n\t"
-               "%0 = %0 | R3;\n\t"
-               :"=r"(value)
-               :"0"(value)
-               :"R2", "R3");
+       __asm__ __volatile__ (
+               "R2 = b#01111111100;"
+               "R3 = b#10000000001;"
+               "%0 <<= 2;"
+               "%0 = %0 & R2;"
+               "%0 = %0 | R3;"
+               : "=d"(value)
+               : "d"(value)
+               : "ASTAT", "R2", "R3"
+       );
        pr_debug("%s value:%x\n", __func__, value);
 
        SPORT_PUT_TX(up, value);
@@ -118,27 +119,30 @@ static inline void tx_one_byte(struct sport_uart_port *up, unsigned int value)
 static inline unsigned int rx_one_byte(struct sport_uart_port *up)
 {
        unsigned int value, extract;
+       u32 tmp_mask1, tmp_mask2, tmp_shift, tmp;
 
        value = SPORT_GET_RX32(up);
        pr_debug("%s value:%x\n", __func__, value);
 
        /* Extract 8 bits data */
-       __asm__ volatile (
-               "R5 = 0;\n\t"
-               "P0 = 8;\n\t"
-               "R1 = 0x1801(Z);\n\t"
-               "R3 = 0x0300(Z);\n\t"
-               "R4 = 0;\n\t"
-               "LSETUP(loop_s, loop_e) LC0 = P0;\nloop_s:\t"
-               "R2 = extract(%1, R1.L)(Z);\n\t"
-               "R2 <<= R4;\n\t"
-               "R5 = R5 | R2;\n\t"
-               "R1 = R1 - R3;\nloop_e:\t"
-               "R4 += 1;\n\t"
-               "%0 = R5;\n\t"
-               :"=r"(extract)
-               :"r"(value)
-               :"P0", "R1", "R2","R3","R4", "R5");
+       __asm__ __volatile__ (
+               "%[extr] = 0;"
+               "%[mask1] = 0x1801(Z);"
+               "%[mask2] = 0x0300(Z);"
+               "%[shift] = 0;"
+               "LSETUP(.Lloop_s, .Lloop_e) LC0 = %[lc];"
+               ".Lloop_s:"
+               "%[tmp] = extract(%[val], %[mask1].L)(Z);"
+               "%[tmp] <<= %[shift];"
+               "%[extr] = %[extr] | %[tmp];"
+               "%[mask1] = %[mask1] - %[mask2];"
+               ".Lloop_e:"
+               "%[shift] += 1;"
+               : [val]"=d"(value), [extr]"=d"(extract), [shift]"=d"(tmp_shift), [tmp]"=d"(tmp),
+                 [mask1]"=d"(tmp_mask1), [mask2]"=d"(tmp_mask2)
+               : "d"(value), [lc]"a"(8)
+               : "ASTAT", "LB0", "LC0", "LT0"
+       );
 
        pr_debug("      extract:%x\n", extract);
        return extract;
@@ -149,7 +153,7 @@ static int sport_uart_setup(struct sport_uart_port *up, int sclk, int baud_rate)
        int tclkdiv, tfsdiv, rclkdiv;
 
        /* Set TCR1 and TCR2 */
-       SPORT_PUT_TCR1(up, (LTFS | ITFS | TFSR | TLSBIT | ITCLK));
+       SPORT_PUT_TCR1(up, (LATFS | ITFS | TFSR | TLSBIT | ITCLK));
        SPORT_PUT_TCR2(up, 10);
        pr_debug("%s TCR1:%x, TCR2:%x\n", __func__, SPORT_GET_TCR1(up), SPORT_GET_TCR2(up));
 
@@ -419,7 +423,7 @@ static void sport_shutdown(struct uart_port *port)
 }
 
 static void sport_set_termios(struct uart_port *port,
-               struct termios *termios, struct termios *old)
+               struct ktermios *termios, struct ktermios *old)
 {
        pr_debug("%s enter, c_cflag:%08x\n", __func__, termios->c_cflag);
        uart_update_timeout(port, CS8 ,port->uartclk);
index a461b3b..9f2891c 100644 (file)
@@ -137,7 +137,12 @@ static LIST_HEAD(icom_adapter_head);
 static spinlock_t icom_lock;
 
 #ifdef ICOM_TRACE
-static inline void trace(struct icom_port *, char *, unsigned long) {};
+static inline void trace(struct icom_port *icom_port, char *trace_pt,
+                       unsigned long trace_data)
+{
+       dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
+       icom_port->port, trace_pt, trace_data);
+}
 #else
 static inline void trace(struct icom_port *icom_port, char *trace_pt, unsigned long trace_data) {};
 #endif
@@ -408,7 +413,7 @@ static void load_code(struct icom_port *icom_port)
        release_firmware(fw);
 
        /* Set Hardware level */
-       if ((icom_port->adapter->version | ADAPTER_V2) == ADAPTER_V2)
+       if (icom_port->adapter->version == ADAPTER_V2)
                writeb(V2_HARDWARE, &(icom_port->dram->misc_flags));
 
        /* Start the processor in Adapter */
@@ -861,7 +866,7 @@ static irqreturn_t icom_interrupt(int irq, void *dev_id)
        /* find icom_port for this interrupt */
        icom_adapter = (struct icom_adapter *) dev_id;
 
-       if ((icom_adapter->version | ADAPTER_V2) == ADAPTER_V2) {
+       if (icom_adapter->version == ADAPTER_V2) {
                int_reg = icom_adapter->base_addr + 0x8024;
 
                adapter_interrupts = readl(int_reg);
@@ -1647,15 +1652,6 @@ static void __exit icom_exit(void)
 module_init(icom_init);
 module_exit(icom_exit);
 
-#ifdef ICOM_TRACE
-static inline void trace(struct icom_port *icom_port, char *trace_pt,
-                 unsigned long trace_data)
-{
-       dev_info(&icom_port->adapter->pci_dev->dev, ":%d:%s - %lx\n",
-                icom_port->port, trace_pt, trace_data);
-}
-#endif
-
 MODULE_AUTHOR("Michael Anderson <mjanders@us.ibm.com>");
 MODULE_DESCRIPTION("IBM iSeries Serial IOA driver");
 MODULE_SUPPORTED_DEVICE
index 9f460b1..7b5d1de 100644 (file)
@@ -8,6 +8,9 @@
  *  Author: Sascha Hauer <sascha@saschahauer.de>
  *  Copyright (C) 2004 Pengutronix
  *
+ *  Copyright (C) 2009 emlix GmbH
+ *  Author: Fabian Godehardt (added IrDA support for iMX)
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -41,6 +44,8 @@
 #include <linux/serial_core.h>
 #include <linux/serial.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/rational.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
 #define  UCR4_DREN      (1<<0)  /* Recv data ready interrupt enable */
 #define  UFCR_RXTL_SHF   0       /* Receiver trigger level shift */
 #define  UFCR_RFDIV      (7<<7)  /* Reference freq divider mask */
+#define  UFCR_RFDIV_REG(x)     (((x) < 7 ? 6 - (x) : 6) << 7)
 #define  UFCR_TXTL_SHF   10      /* Transmitter trigger level shift */
 #define  USR1_PARITYERR  (1<<15) /* Parity error interrupt flag */
 #define  USR1_RTSS      (1<<14) /* RTS pin status */
@@ -211,10 +217,20 @@ struct imx_port {
        struct timer_list       timer;
        unsigned int            old_status;
        int                     txirq,rxirq,rtsirq;
-       int                     have_rtscts:1;
+       unsigned int            have_rtscts:1;
+       unsigned int            use_irda:1;
+       unsigned int            irda_inv_rx:1;
+       unsigned int            irda_inv_tx:1;
+       unsigned short          trcv_delay; /* transceiver delay */
        struct clk              *clk;
 };
 
+#ifdef CONFIG_IRDA
+#define USE_IRDA(sport)        ((sport)->use_irda)
+#else
+#define USE_IRDA(sport)        (0)
+#endif
+
 /*
  * Handle any change of modem status signal since we were last called.
  */
@@ -268,6 +284,48 @@ static void imx_stop_tx(struct uart_port *port)
        struct imx_port *sport = (struct imx_port *)port;
        unsigned long temp;
 
+       if (USE_IRDA(sport)) {
+               /* half duplex - wait for end of transmission */
+               int n = 256;
+               while ((--n > 0) &&
+                     !(readl(sport->port.membase + USR2) & USR2_TXDC)) {
+                       udelay(5);
+                       barrier();
+               }
+               /*
+                * irda transceiver - wait a bit more to avoid
+                * cutoff, hardware dependent
+                */
+               udelay(sport->trcv_delay);
+
+               /*
+                * half duplex - reactivate receive mode,
+                * flush receive pipe echo crap
+                */
+               if (readl(sport->port.membase + USR2) & USR2_TXDC) {
+                       temp = readl(sport->port.membase + UCR1);
+                       temp &= ~(UCR1_TXMPTYEN | UCR1_TRDYEN);
+                       writel(temp, sport->port.membase + UCR1);
+
+                       temp = readl(sport->port.membase + UCR4);
+                       temp &= ~(UCR4_TCEN);
+                       writel(temp, sport->port.membase + UCR4);
+
+                       while (readl(sport->port.membase + URXD0) &
+                              URXD_CHARRDY)
+                               barrier();
+
+                       temp = readl(sport->port.membase + UCR1);
+                       temp |= UCR1_RRDYEN;
+                       writel(temp, sport->port.membase + UCR1);
+
+                       temp = readl(sport->port.membase + UCR4);
+                       temp |= UCR4_DREN;
+                       writel(temp, sport->port.membase + UCR4);
+               }
+               return;
+       }
+
        temp = readl(sport->port.membase + UCR1);
        writel(temp & ~UCR1_TXMPTYEN, sport->port.membase + UCR1);
 }
@@ -302,13 +360,15 @@ static inline void imx_transmit_buffer(struct imx_port *sport)
                /* send xmit->buf[xmit->tail]
                 * out the port here */
                writel(xmit->buf[xmit->tail], sport->port.membase + URTX0);
-               xmit->tail = (xmit->tail + 1) &
-                        (UART_XMIT_SIZE - 1);
+               xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
                sport->port.icount.tx++;
                if (uart_circ_empty(xmit))
                        break;
        }
 
+       if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+               uart_write_wakeup(&sport->port);
+
        if (uart_circ_empty(xmit))
                imx_stop_tx(&sport->port);
 }
@@ -321,9 +381,30 @@ static void imx_start_tx(struct uart_port *port)
        struct imx_port *sport = (struct imx_port *)port;
        unsigned long temp;
 
+       if (USE_IRDA(sport)) {
+               /* half duplex in IrDA mode; have to disable receive mode */
+               temp = readl(sport->port.membase + UCR4);
+               temp &= ~(UCR4_DREN);
+               writel(temp, sport->port.membase + UCR4);
+
+               temp = readl(sport->port.membase + UCR1);
+               temp &= ~(UCR1_RRDYEN);
+               writel(temp, sport->port.membase + UCR1);
+       }
+
        temp = readl(sport->port.membase + UCR1);
        writel(temp | UCR1_TXMPTYEN, sport->port.membase + UCR1);
 
+       if (USE_IRDA(sport)) {
+               temp = readl(sport->port.membase + UCR1);
+               temp |= UCR1_TRDYEN;
+               writel(temp, sport->port.membase + UCR1);
+
+               temp = readl(sport->port.membase + UCR4);
+               temp |= UCR4_TCEN;
+               writel(temp, sport->port.membase + UCR4);
+       }
+
        if (readl(sport->port.membase + UTS) & UTS_TXEMPTY)
                imx_transmit_buffer(sport);
 }
@@ -395,8 +476,7 @@ static irqreturn_t imx_rxint(int irq, void *dev_id)
                                continue;
                }
 
-               if (uart_handle_sysrq_char
-                           (&sport->port, (unsigned char)rx))
+               if (uart_handle_sysrq_char(&sport->port, (unsigned char)rx))
                        continue;
 
                if (rx & (URXD_PRERR | URXD_OVRRUN | URXD_FRMERR) ) {
@@ -471,26 +551,26 @@ static unsigned int imx_tx_empty(struct uart_port *port)
  */
 static unsigned int imx_get_mctrl(struct uart_port *port)
 {
-        struct imx_port *sport = (struct imx_port *)port;
-        unsigned int tmp = TIOCM_DSR | TIOCM_CAR;
+       struct imx_port *sport = (struct imx_port *)port;
+       unsigned int tmp = TIOCM_DSR | TIOCM_CAR;
 
-        if (readl(sport->port.membase + USR1) & USR1_RTSS)
-                tmp |= TIOCM_CTS;
+       if (readl(sport->port.membase + USR1) & USR1_RTSS)
+               tmp |= TIOCM_CTS;
 
-        if (readl(sport->port.membase + UCR2) & UCR2_CTS)
-                tmp |= TIOCM_RTS;
+       if (readl(sport->port.membase + UCR2) & UCR2_CTS)
+               tmp |= TIOCM_RTS;
 
-        return tmp;
+       return tmp;
 }
 
 static void imx_set_mctrl(struct uart_port *port, unsigned int mctrl)
 {
-        struct imx_port *sport = (struct imx_port *)port;
+       struct imx_port *sport = (struct imx_port *)port;
        unsigned long temp;
 
        temp = readl(sport->port.membase + UCR2) & ~UCR2_CTS;
 
-        if (mctrl & TIOCM_RTS)
+       if (mctrl & TIOCM_RTS)
                temp |= UCR2_CTS;
 
        writel(temp, sport->port.membase + UCR2);
@@ -534,12 +614,7 @@ static int imx_setup_ufcr(struct imx_port *sport, unsigned int mode)
        if(!ufcr_rfdiv)
                ufcr_rfdiv = 1;
 
-       if(ufcr_rfdiv >= 7)
-               ufcr_rfdiv = 6;
-       else
-               ufcr_rfdiv = 6 - ufcr_rfdiv;
-
-       val |= UFCR_RFDIV & (ufcr_rfdiv << 7);
+       val |= UFCR_RFDIV_REG(ufcr_rfdiv);
 
        writel(val, sport->port.membase + UFCR);
 
@@ -558,8 +633,24 @@ static int imx_startup(struct uart_port *port)
         * requesting IRQs
         */
        temp = readl(sport->port.membase + UCR4);
+
+       if (USE_IRDA(sport))
+               temp |= UCR4_IRSC;
+
        writel(temp & ~UCR4_DREN, sport->port.membase + UCR4);
 
+       if (USE_IRDA(sport)) {
+               /* reset fifo's and state machines */
+               int i = 100;
+               temp = readl(sport->port.membase + UCR2);
+               temp &= ~UCR2_SRST;
+               writel(temp, sport->port.membase + UCR2);
+               while (!(readl(sport->port.membase + UCR2) & UCR2_SRST) &&
+                   (--i > 0)) {
+                       udelay(1);
+               }
+       }
+
        /*
         * Allocate the IRQ(s) i.MX1 has three interrupts whereas later
         * chips only have one interrupt.
@@ -575,12 +666,16 @@ static int imx_startup(struct uart_port *port)
                if (retval)
                        goto error_out2;
 
-               retval = request_irq(sport->rtsirq, imx_rtsint,
-                            (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 :
-                              IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
-                               DRIVER_NAME, sport);
-               if (retval)
-                       goto error_out3;
+               /* do not use RTS IRQ on IrDA */
+               if (!USE_IRDA(sport)) {
+                       retval = request_irq(sport->rtsirq, imx_rtsint,
+                                    (sport->rtsirq < MAX_INTERNAL_IRQ) ? 0 :
+                                      IRQF_TRIGGER_FALLING |
+                                      IRQF_TRIGGER_RISING,
+                                       DRIVER_NAME, sport);
+                       if (retval)
+                               goto error_out3;
+               }
        } else {
                retval = request_irq(sport->port.irq, imx_int, 0,
                                DRIVER_NAME, sport);
@@ -597,18 +692,49 @@ static int imx_startup(struct uart_port *port)
 
        temp = readl(sport->port.membase + UCR1);
        temp |= UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN;
+
+       if (USE_IRDA(sport)) {
+               temp |= UCR1_IREN;
+               temp &= ~(UCR1_RTSDEN);
+       }
+
        writel(temp, sport->port.membase + UCR1);
 
        temp = readl(sport->port.membase + UCR2);
        temp |= (UCR2_RXEN | UCR2_TXEN);
        writel(temp, sport->port.membase + UCR2);
 
+       if (USE_IRDA(sport)) {
+               /* clear RX-FIFO */
+               int i = 64;
+               while ((--i > 0) &&
+                       (readl(sport->port.membase + URXD0) & URXD_CHARRDY)) {
+                       barrier();
+               }
+       }
+
 #if defined CONFIG_ARCH_MX2 || defined CONFIG_ARCH_MX3
        temp = readl(sport->port.membase + UCR3);
        temp |= UCR3_RXDMUXSEL;
        writel(temp, sport->port.membase + UCR3);
 #endif
 
+       if (USE_IRDA(sport)) {
+               temp = readl(sport->port.membase + UCR4);
+               if (sport->irda_inv_rx)
+                       temp |= UCR4_INVR;
+               else
+                       temp &= ~(UCR4_INVR);
+               writel(temp | UCR4_DREN, sport->port.membase + UCR4);
+
+               temp = readl(sport->port.membase + UCR3);
+               if (sport->irda_inv_tx)
+                       temp |= UCR3_INVT;
+               else
+                       temp &= ~(UCR3_INVT);
+               writel(temp, sport->port.membase + UCR3);
+       }
+
        /*
         * Enable modem status interrupts
         */
@@ -616,6 +742,16 @@ static int imx_startup(struct uart_port *port)
        imx_enable_ms(&sport->port);
        spin_unlock_irqrestore(&sport->port.lock,flags);
 
+       if (USE_IRDA(sport)) {
+               struct imxuart_platform_data *pdata;
+               pdata = sport->port.dev->platform_data;
+               sport->irda_inv_rx = pdata->irda_inv_rx;
+               sport->irda_inv_tx = pdata->irda_inv_tx;
+               sport->trcv_delay = pdata->transceiver_delay;
+               if (pdata->irda_enable)
+                       pdata->irda_enable(1);
+       }
+
        return 0;
 
 error_out3:
@@ -633,6 +769,17 @@ static void imx_shutdown(struct uart_port *port)
        struct imx_port *sport = (struct imx_port *)port;
        unsigned long temp;
 
+       temp = readl(sport->port.membase + UCR2);
+       temp &= ~(UCR2_TXEN);
+       writel(temp, sport->port.membase + UCR2);
+
+       if (USE_IRDA(sport)) {
+               struct imxuart_platform_data *pdata;
+               pdata = sport->port.dev->platform_data;
+               if (pdata->irda_enable)
+                       pdata->irda_enable(0);
+       }
+
        /*
         * Stop our timer.
         */
@@ -642,7 +789,8 @@ static void imx_shutdown(struct uart_port *port)
         * Free the interrupts
         */
        if (sport->txirq > 0) {
-               free_irq(sport->rtsirq, sport);
+               if (!USE_IRDA(sport))
+                       free_irq(sport->rtsirq, sport);
                free_irq(sport->txirq, sport);
                free_irq(sport->rxirq, sport);
        } else
@@ -654,6 +802,9 @@ static void imx_shutdown(struct uart_port *port)
 
        temp = readl(sport->port.membase + UCR1);
        temp &= ~(UCR1_TXMPTYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN);
+       if (USE_IRDA(sport))
+               temp &= ~(UCR1_IREN);
+
        writel(temp, sport->port.membase + UCR1);
 }
 
@@ -665,7 +816,9 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
        unsigned long flags;
        unsigned int ucr2, old_ucr1, old_txrxen, baud, quot;
        unsigned int old_csize = old ? old->c_cflag & CSIZE : CS8;
-       unsigned int div, num, denom, ufcr;
+       unsigned int div, ufcr;
+       unsigned long num, denom;
+       uint64_t tdiv64;
 
        /*
         * If we don't support modem control lines, don't allow
@@ -761,38 +914,39 @@ imx_set_termios(struct uart_port *port, struct ktermios *termios,
                        sport->port.membase + UCR2);
        old_txrxen &= (UCR2_TXEN | UCR2_RXEN);
 
-       div = sport->port.uartclk / (baud * 16);
-       if (div > 7)
-               div = 7;
-       if (!div)
+       if (USE_IRDA(sport)) {
+               /*
+                * use maximum available submodule frequency to
+                * avoid missing short pulses due to low sampling rate
+                */
                div = 1;
-
-       num = baud;
-       denom = port->uartclk / div / 16;
-
-       /* shift num and denom right until they fit into 16 bits */
-       while (num > 0x10000 || denom > 0x10000) {
-               num >>= 1;
-               denom >>= 1;
+       } else {
+               div = sport->port.uartclk / (baud * 16);
+               if (div > 7)
+                       div = 7;
+               if (!div)
+                       div = 1;
        }
-       if (num > 0)
-               num -= 1;
-       if (denom > 0)
-               denom -= 1;
 
-       writel(num, sport->port.membase + UBIR);
-       writel(denom, sport->port.membase + UBMR);
+       rational_best_approximation(16 * div * baud, sport->port.uartclk,
+               1 << 16, 1 << 16, &num, &denom);
 
-       if (div == 7)
-               div = 6; /* 6 in RFDIV means divide by 7 */
-       else
-               div = 6 - div;
+       tdiv64 = sport->port.uartclk;
+       tdiv64 *= num;
+       do_div(tdiv64, denom * 16 * div);
+       tty_encode_baud_rate(sport->port.info->port.tty,
+               (speed_t)tdiv64, (speed_t)tdiv64);
+
+       num -= 1;
+       denom -= 1;
 
        ufcr = readl(sport->port.membase + UFCR);
-       ufcr = (ufcr & (~UFCR_RFDIV)) |
-           (div << 7);
+       ufcr = (ufcr & (~UFCR_RFDIV)) | UFCR_RFDIV_REG(div);
        writel(ufcr, sport->port.membase + UFCR);
 
+       writel(num, sport->port.membase + UBIR);
+       writel(denom, sport->port.membase + UBMR);
+
 #ifdef ONEMS
        writel(sport->port.uartclk / div / 1000, sport->port.membase + ONEMS);
 #endif
@@ -1031,6 +1185,8 @@ imx_console_setup(struct console *co, char *options)
        if (co->index == -1 || co->index >= ARRAY_SIZE(imx_ports))
                co->index = 0;
        sport = imx_ports[co->index];
+       if(sport == NULL)
+               return -ENODEV;
 
        if (options)
                uart_parse_options(options, &baud, &parity, &bits, &flow);
@@ -1070,22 +1226,22 @@ static struct uart_driver imx_reg = {
 
 static int serial_imx_suspend(struct platform_device *dev, pm_message_t state)
 {
-        struct imx_port *sport = platform_get_drvdata(dev);
+       struct imx_port *sport = platform_get_drvdata(dev);
 
-        if (sport)
-                uart_suspend_port(&imx_reg, &sport->port);
+       if (sport)
+               uart_suspend_port(&imx_reg, &sport->port);
 
-        return 0;
+       return 0;
 }
 
 static int serial_imx_resume(struct platform_device *dev)
 {
-        struct imx_port *sport = platform_get_drvdata(dev);
+       struct imx_port *sport = platform_get_drvdata(dev);
 
-        if (sport)
-                uart_resume_port(&imx_reg, &sport->port);
+       if (sport)
+               uart_resume_port(&imx_reg, &sport->port);
 
-        return 0;
+       return 0;
 }
 
 static int serial_imx_probe(struct platform_device *pdev)
@@ -1141,19 +1297,29 @@ static int serial_imx_probe(struct platform_device *pdev)
        imx_ports[pdev->id] = sport;
 
        pdata = pdev->dev.platform_data;
-       if(pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
+       if (pdata && (pdata->flags & IMXUART_HAVE_RTSCTS))
                sport->have_rtscts = 1;
 
+#ifdef CONFIG_IRDA
+       if (pdata && (pdata->flags & IMXUART_IRDA))
+               sport->use_irda = 1;
+#endif
+
        if (pdata->init) {
                ret = pdata->init(pdev);
                if (ret)
                        goto clkput;
        }
 
-       uart_add_one_port(&imx_reg, &sport->port);
+       ret = uart_add_one_port(&imx_reg, &sport->port);
+       if (ret)
+               goto deinit;
        platform_set_drvdata(pdev, &sport->port);
 
        return 0;
+deinit:
+       if (pdata->exit)
+               pdata->exit(pdev);
 clkput:
        clk_put(sport->clk);
        clk_disable(sport->clk);
@@ -1191,13 +1357,13 @@ static int serial_imx_remove(struct platform_device *pdev)
 }
 
 static struct platform_driver serial_imx_driver = {
-        .probe          = serial_imx_probe,
-        .remove         = serial_imx_remove,
+       .probe          = serial_imx_probe,
+       .remove         = serial_imx_remove,
 
        .suspend        = serial_imx_suspend,
        .resume         = serial_imx_resume,
        .driver         = {
-               .name   = "imx-uart",
+               .name   = "imx-uart",
                .owner  = THIS_MODULE,
        },
 };
index c0a3e27..4e5f3bd 100644 (file)
@@ -61,6 +61,7 @@ enum {
        if ((DBG_##nlevel & jsm_debug))                 \
        dev_printk(KERN_##klevel, pdev->dev, fmt, ## args)
 
+#define        MAXLINES        256
 #define MAXPORTS       8
 #define MAX_STOPS_SENT 5
 
index 31496dc..107ce2e 100644 (file)
@@ -33,6 +33,8 @@
 
 #include "jsm.h"
 
+static DECLARE_BITMAP(linemap, MAXLINES);
+
 static void jsm_carrier(struct jsm_channel *ch);
 
 static inline int jsm_get_mstat(struct jsm_channel *ch)
@@ -433,6 +435,7 @@ int __devinit jsm_tty_init(struct jsm_board *brd)
 int __devinit jsm_uart_port_init(struct jsm_board *brd)
 {
        int i;
+       unsigned int line;
        struct jsm_channel *ch;
 
        if (!brd)
@@ -459,9 +462,15 @@ int __devinit jsm_uart_port_init(struct jsm_board *brd)
                brd->channels[i]->uart_port.membase = brd->re_map_membase;
                brd->channels[i]->uart_port.fifosize = 16;
                brd->channels[i]->uart_port.ops = &jsm_ops;
-               brd->channels[i]->uart_port.line = brd->channels[i]->ch_portnum + brd->boardnum * 2;
+               line = find_first_zero_bit(linemap, MAXLINES);
+               if (line >= MAXLINES) {
+                       printk(KERN_INFO "jsm: linemap is full, added device failed\n");
+                       continue;
+               } else
+                       set_bit((int)line, linemap);
+               brd->channels[i]->uart_port.line = line;
                if (uart_add_one_port (&jsm_uart_driver, &brd->channels[i]->uart_port))
-                       printk(KERN_INFO "Added device failed\n");
+                       printk(KERN_INFO "jsm: add device failed\n");
                else
                        printk(KERN_INFO "Added device \n");
        }
@@ -494,6 +503,7 @@ int jsm_remove_uart_port(struct jsm_board *brd)
 
                ch = brd->channels[i];
 
+               clear_bit((int)(ch->uart_port.line), linemap);
                uart_remove_one_port(&jsm_uart_driver, &brd->channels[i]->uart_port);
        }
 
diff --git a/drivers/serial/timbuart.c b/drivers/serial/timbuart.c
new file mode 100644 (file)
index 0000000..ac9e5d5
--- /dev/null
@@ -0,0 +1,526 @@
+/*
+ * timbuart.c timberdale FPGA UART driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/serial_core.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+
+#include "timbuart.h"
+
+struct timbuart_port {
+       struct uart_port        port;
+       struct tasklet_struct   tasklet;
+       int                     usedma;
+       u8                      last_ier;
+       struct platform_device  *dev;
+};
+
+static int baudrates[] = {9600, 19200, 38400, 57600, 115200, 230400, 460800,
+       921600, 1843200, 3250000};
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier);
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid);
+
+static void timbuart_stop_rx(struct uart_port *port)
+{
+       /* spin lock held by upper layer, disable all RX interrupts */
+       u8 ier = ioread8(port->membase + TIMBUART_IER) & ~RXFLAGS;
+       iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_stop_tx(struct uart_port *port)
+{
+       /* spinlock held by upper layer, disable TX interrupt */
+       u8 ier = ioread8(port->membase + TIMBUART_IER) & ~TXBAE;
+       iowrite8(ier, port->membase + TIMBUART_IER);
+}
+
+static void timbuart_start_tx(struct uart_port *port)
+{
+       struct timbuart_port *uart =
+               container_of(port, struct timbuart_port, port);
+
+       /* do not transfer anything here -> fire off the tasklet */
+       tasklet_schedule(&uart->tasklet);
+}
+
+static void timbuart_flush_buffer(struct uart_port *port)
+{
+       u8 ctl = ioread8(port->membase + TIMBUART_CTRL) | TIMBUART_CTRL_FLSHTX;
+
+       iowrite8(ctl, port->membase + TIMBUART_CTRL);
+       iowrite8(TXBF, port->membase + TIMBUART_ISR);
+}
+
+static void timbuart_rx_chars(struct uart_port *port)
+{
+       struct tty_struct *tty = port->info->port.tty;
+
+       while (ioread8(port->membase + TIMBUART_ISR) & RXDP) {
+               u8 ch = ioread8(port->membase + TIMBUART_RXFIFO);
+               port->icount.rx++;
+               tty_insert_flip_char(tty, ch, TTY_NORMAL);
+       }
+
+       spin_unlock(&port->lock);
+       tty_flip_buffer_push(port->info->port.tty);
+       spin_lock(&port->lock);
+
+       dev_dbg(port->dev, "%s - total read %d bytes\n",
+               __func__, port->icount.rx);
+}
+
+static void timbuart_tx_chars(struct uart_port *port)
+{
+       struct circ_buf *xmit = &port->info->xmit;
+
+       while (!(ioread8(port->membase + TIMBUART_ISR) & TXBF) &&
+               !uart_circ_empty(xmit)) {
+               iowrite8(xmit->buf[xmit->tail],
+                       port->membase + TIMBUART_TXFIFO);
+               xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+               port->icount.tx++;
+       }
+
+       dev_dbg(port->dev,
+               "%s - total written %d bytes, CTL: %x, RTS: %x, baud: %x\n",
+                __func__,
+               port->icount.tx,
+               ioread8(port->membase + TIMBUART_CTRL),
+               port->mctrl & TIOCM_RTS,
+               ioread8(port->membase + TIMBUART_BAUDRATE));
+}
+
+static void timbuart_handle_tx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+       struct timbuart_port *uart =
+               container_of(port, struct timbuart_port, port);
+       struct circ_buf *xmit = &port->info->xmit;
+
+       if (uart_circ_empty(xmit) || uart_tx_stopped(port))
+               return;
+
+       if (port->x_char)
+               return;
+
+       if (isr & TXFLAGS) {
+               timbuart_tx_chars(port);
+               /* clear all TX interrupts */
+               iowrite8(TXFLAGS, port->membase + TIMBUART_ISR);
+
+               if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+                       uart_write_wakeup(port);
+       } else
+               /* Re-enable any tx interrupt */
+               *ier |= uart->last_ier & TXFLAGS;
+
+       /* enable interrupts if there are chars in the transmit buffer,
+        * Or if we delivered some bytes and want the almost empty interrupt
+        * we wake up the upper layer later when we got the interrupt
+        * to give it some time to go out...
+        */
+       if (!uart_circ_empty(xmit))
+               *ier |= TXBAE;
+
+       dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_handle_rx_port(struct uart_port *port, u8 isr, u8 *ier)
+{
+       if (isr & RXFLAGS) {
+               /* Some RX status is set */
+               if (isr & RXBF) {
+                       u8 ctl = ioread8(port->membase + TIMBUART_CTRL) |
+                               TIMBUART_CTRL_FLSHRX;
+                       iowrite8(ctl, port->membase + TIMBUART_CTRL);
+                       port->icount.overrun++;
+               } else if (isr & (RXDP))
+                       timbuart_rx_chars(port);
+
+               /* ack all RX interrupts */
+               iowrite8(RXFLAGS, port->membase + TIMBUART_ISR);
+       }
+
+       /* always have the RX interrupts enabled */
+       *ier |= RXBAF | RXBF | RXTT;
+
+       dev_dbg(port->dev, "%s - leaving\n", __func__);
+}
+
+void timbuart_tasklet(unsigned long arg)
+{
+       struct timbuart_port *uart = (struct timbuart_port *)arg;
+       u8 isr, ier = 0;
+
+       spin_lock(&uart->port.lock);
+
+       isr = ioread8(uart->port.membase + TIMBUART_ISR);
+       dev_dbg(uart->port.dev, "%s ISR: %x\n", __func__, isr);
+
+       if (!uart->usedma)
+               timbuart_handle_tx_port(&uart->port, isr, &ier);
+
+       timbuart_mctrl_check(&uart->port, isr, &ier);
+
+       if (!uart->usedma)
+               timbuart_handle_rx_port(&uart->port, isr, &ier);
+
+       iowrite8(ier, uart->port.membase + TIMBUART_IER);
+
+       spin_unlock(&uart->port.lock);
+       dev_dbg(uart->port.dev, "%s leaving\n", __func__);
+}
+
+static unsigned int timbuart_tx_empty(struct uart_port *port)
+{
+       u8 isr = ioread8(port->membase + TIMBUART_ISR);
+
+       return (isr & TXBAE) ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int timbuart_get_mctrl(struct uart_port *port)
+{
+       u8 cts = ioread8(port->membase + TIMBUART_CTRL);
+       dev_dbg(port->dev, "%s - cts %x\n", __func__, cts);
+
+       if (cts & TIMBUART_CTRL_CTS)
+               return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR;
+       else
+               return TIOCM_DSR | TIOCM_CAR;
+}
+
+static void timbuart_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+       dev_dbg(port->dev, "%s - %x\n", __func__, mctrl);
+
+       if (mctrl & TIOCM_RTS)
+               iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+       else
+               iowrite8(TIMBUART_CTRL_RTS, port->membase + TIMBUART_CTRL);
+}
+
+static void timbuart_mctrl_check(struct uart_port *port, u8 isr, u8 *ier)
+{
+       unsigned int cts;
+
+       if (isr & CTS_DELTA) {
+               /* ack */
+               iowrite8(CTS_DELTA, port->membase + TIMBUART_ISR);
+               cts = timbuart_get_mctrl(port);
+               uart_handle_cts_change(port, cts & TIOCM_CTS);
+               wake_up_interruptible(&port->info->delta_msr_wait);
+       }
+
+       *ier |= CTS_DELTA;
+}
+
+static void timbuart_enable_ms(struct uart_port *port)
+{
+       /* N/A */
+}
+
+static void timbuart_break_ctl(struct uart_port *port, int ctl)
+{
+       /* N/A */
+}
+
+static int timbuart_startup(struct uart_port *port)
+{
+       struct timbuart_port *uart =
+               container_of(port, struct timbuart_port, port);
+
+       dev_dbg(port->dev, "%s\n", __func__);
+
+       iowrite8(TIMBUART_CTRL_FLSHRX, port->membase + TIMBUART_CTRL);
+       iowrite8(0xff, port->membase + TIMBUART_ISR);
+       /* Enable all but TX interrupts */
+       iowrite8(RXBAF | RXBF | RXTT | CTS_DELTA,
+               port->membase + TIMBUART_IER);
+
+       return request_irq(port->irq, timbuart_handleinterrupt, IRQF_SHARED,
+               "timb-uart", uart);
+}
+
+static void timbuart_shutdown(struct uart_port *port)
+{
+       struct timbuart_port *uart =
+               container_of(port, struct timbuart_port, port);
+       dev_dbg(port->dev, "%s\n", __func__);
+       free_irq(port->irq, uart);
+       iowrite8(0, port->membase + TIMBUART_IER);
+}
+
+static int get_bindex(int baud)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(baudrates); i++)
+               if (baud <= baudrates[i])
+                       return i;
+
+       return -1;
+}
+
+static void timbuart_set_termios(struct uart_port *port,
+       struct ktermios *termios,
+       struct ktermios *old)
+{
+       unsigned int baud;
+       short bindex;
+       unsigned long flags;
+
+       baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 16);
+       bindex = get_bindex(baud);
+       dev_dbg(port->dev, "%s - bindex %d\n", __func__, bindex);
+
+       if (bindex < 0)
+               bindex = 0;
+       baud = baudrates[bindex];
+
+       /* The serial layer calls into this once with old = NULL when setting
+          up initially */
+       if (old)
+               tty_termios_copy_hw(termios, old);
+       tty_termios_encode_baud_rate(termios, baud, baud);
+
+       spin_lock_irqsave(&port->lock, flags);
+       iowrite8((u8)bindex, port->membase + TIMBUART_BAUDRATE);
+       uart_update_timeout(port, termios->c_cflag, baud);
+       spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *timbuart_type(struct uart_port *port)
+{
+       return port->type == PORT_UNKNOWN ? "timbuart" : NULL;
+}
+
+/* We do not request/release mappings of the registers here,
+ * currently it's done in the proble function.
+ */
+static void timbuart_release_port(struct uart_port *port)
+{
+       struct platform_device *pdev = to_platform_device(port->dev);
+       int size =
+               resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+       if (port->flags & UPF_IOREMAP) {
+               iounmap(port->membase);
+               port->membase = NULL;
+       }
+
+       release_mem_region(port->mapbase, size);
+}
+
+static int timbuart_request_port(struct uart_port *port)
+{
+       struct platform_device *pdev = to_platform_device(port->dev);
+       int size =
+               resource_size(platform_get_resource(pdev, IORESOURCE_MEM, 0));
+
+       if (!request_mem_region(port->mapbase, size, "timb-uart"))
+               return -EBUSY;
+
+       if (port->flags & UPF_IOREMAP) {
+               port->membase = ioremap(port->mapbase, size);
+               if (port->membase == NULL) {
+                       release_mem_region(port->mapbase, size);
+                       return -ENOMEM;
+               }
+       }
+
+       return 0;
+}
+
+static irqreturn_t timbuart_handleinterrupt(int irq, void *devid)
+{
+       struct timbuart_port *uart = (struct timbuart_port *)devid;
+
+       if (ioread8(uart->port.membase + TIMBUART_IPR)) {
+               uart->last_ier = ioread8(uart->port.membase + TIMBUART_IER);
+
+               /* disable interrupts, the tasklet enables them again */
+               iowrite8(0, uart->port.membase + TIMBUART_IER);
+
+               /* fire off bottom half */
+               tasklet_schedule(&uart->tasklet);
+
+               return IRQ_HANDLED;
+       } else
+               return IRQ_NONE;
+}
+
+/*
+ * Configure/autoconfigure the port.
+ */
+static void timbuart_config_port(struct uart_port *port, int flags)
+{
+       if (flags & UART_CONFIG_TYPE) {
+               port->type = PORT_TIMBUART;
+               timbuart_request_port(port);
+       }
+}
+
+static int timbuart_verify_port(struct uart_port *port,
+       struct serial_struct *ser)
+{
+       /* we don't want the core code to modify any port params */
+       return -EINVAL;
+}
+
+static struct uart_ops timbuart_ops = {
+       .tx_empty = timbuart_tx_empty,
+       .set_mctrl = timbuart_set_mctrl,
+       .get_mctrl = timbuart_get_mctrl,
+       .stop_tx = timbuart_stop_tx,
+       .start_tx = timbuart_start_tx,
+       .flush_buffer = timbuart_flush_buffer,
+       .stop_rx = timbuart_stop_rx,
+       .enable_ms = timbuart_enable_ms,
+       .break_ctl = timbuart_break_ctl,
+       .startup = timbuart_startup,
+       .shutdown = timbuart_shutdown,
+       .set_termios = timbuart_set_termios,
+       .type = timbuart_type,
+       .release_port = timbuart_release_port,
+       .request_port = timbuart_request_port,
+       .config_port = timbuart_config_port,
+       .verify_port = timbuart_verify_port
+};
+
+static struct uart_driver timbuart_driver = {
+       .owner = THIS_MODULE,
+       .driver_name = "timberdale_uart",
+       .dev_name = "ttyTU",
+       .major = TIMBUART_MAJOR,
+       .minor = TIMBUART_MINOR,
+       .nr = 1
+};
+
+static int timbuart_probe(struct platform_device *dev)
+{
+       int err;
+       struct timbuart_port *uart;
+       struct resource *iomem;
+
+       dev_dbg(&dev->dev, "%s\n", __func__);
+
+       uart = kzalloc(sizeof(*uart), GFP_KERNEL);
+       if (!uart) {
+               err = -EINVAL;
+               goto err_mem;
+       }
+
+       uart->usedma = 0;
+
+       uart->port.uartclk = 3250000 * 16;
+       uart->port.fifosize  = TIMBUART_FIFO_SIZE;
+       uart->port.regshift  = 2;
+       uart->port.iotype  = UPIO_MEM;
+       uart->port.ops = &timbuart_ops;
+       uart->port.irq = 0;
+       uart->port.flags = UPF_BOOT_AUTOCONF | UPF_IOREMAP;
+       uart->port.line  = 0;
+       uart->port.dev  = &dev->dev;
+
+       iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
+       if (!iomem) {
+               err = -ENOMEM;
+               goto err_register;
+       }
+       uart->port.mapbase = iomem->start;
+       uart->port.membase = NULL;
+
+       uart->port.irq = platform_get_irq(dev, 0);
+       if (uart->port.irq < 0) {
+               err = -EINVAL;
+               goto err_register;
+       }
+
+       tasklet_init(&uart->tasklet, timbuart_tasklet, (unsigned long)uart);
+
+       err = uart_register_driver(&timbuart_driver);
+       if (err)
+               goto err_register;
+
+       err = uart_add_one_port(&timbuart_driver, &uart->port);
+       if (err)
+               goto err_add_port;
+
+       platform_set_drvdata(dev, uart);
+
+       return 0;
+
+err_add_port:
+       uart_unregister_driver(&timbuart_driver);
+err_register:
+       kfree(uart);
+err_mem:
+       printk(KERN_ERR "timberdale: Failed to register Timberdale UART: %d\n",
+               err);
+
+       return err;
+}
+
+static int timbuart_remove(struct platform_device *dev)
+{
+       struct timbuart_port *uart = platform_get_drvdata(dev);
+
+       tasklet_kill(&uart->tasklet);
+       uart_remove_one_port(&timbuart_driver, &uart->port);
+       uart_unregister_driver(&timbuart_driver);
+       kfree(uart);
+
+       return 0;
+}
+
+static struct platform_driver timbuart_platform_driver = {
+       .driver = {
+               .name   = "timb-uart",
+               .owner  = THIS_MODULE,
+       },
+       .probe          = timbuart_probe,
+       .remove         = timbuart_remove,
+};
+
+/*--------------------------------------------------------------------------*/
+
+static int __init timbuart_init(void)
+{
+       return platform_driver_register(&timbuart_platform_driver);
+}
+
+static void __exit timbuart_exit(void)
+{
+       platform_driver_unregister(&timbuart_platform_driver);
+}
+
+module_init(timbuart_init);
+module_exit(timbuart_exit);
+
+MODULE_DESCRIPTION("Timberdale UART driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:timb-uart");
+
diff --git a/drivers/serial/timbuart.h b/drivers/serial/timbuart.h
new file mode 100644 (file)
index 0000000..7e56676
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * timbuart.c timberdale FPGA GPIO driver
+ * Copyright (c) 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/* Supports:
+ * Timberdale FPGA UART
+ */
+
+#ifndef _TIMBUART_H
+#define _TIMBUART_H
+
+#define TIMBUART_FIFO_SIZE     2048
+
+#define TIMBUART_RXFIFO                0x08
+#define TIMBUART_TXFIFO                0x0c
+#define TIMBUART_IER           0x10
+#define TIMBUART_IPR           0x14
+#define TIMBUART_ISR           0x18
+#define TIMBUART_CTRL          0x1c
+#define TIMBUART_BAUDRATE      0x20
+
+#define TIMBUART_CTRL_RTS      0x01
+#define TIMBUART_CTRL_CTS      0x02
+#define TIMBUART_CTRL_FLSHTX   0x40
+#define TIMBUART_CTRL_FLSHRX   0x80
+
+#define TXBF           0x01
+#define TXBAE          0x02
+#define CTS_DELTA      0x04
+#define RXDP           0x08
+#define RXBAF          0x10
+#define RXBF           0x20
+#define RXTT           0x40
+#define RXBNAE         0x80
+#define TXBE           0x100
+
+#define RXFLAGS (RXDP | RXBAF | RXBF | RXTT | RXBNAE)
+#define TXFLAGS (TXBF | TXBAE)
+
+#define TIMBUART_MAJOR 204
+#define TIMBUART_MINOR 192
+
+#endif /* _TIMBUART_H */
+
index 7dc3a6b..a0e0d24 100644 (file)
@@ -29,6 +29,7 @@ int ssb_watchdog_timer_set(struct ssb_bus *bus, u32 ticks)
        }
        return -ENODEV;
 }
+EXPORT_SYMBOL(ssb_watchdog_timer_set);
 
 u32 ssb_gpio_in(struct ssb_bus *bus, u32 mask)
 {
index 7a1164d..ddeb691 100644 (file)
@@ -16,7 +16,8 @@
  *     v0.9  - thorough cleaning, URBification, almost a rewrite
  *     v0.10 - some more cleanups
  *     v0.11 - fixed flow control, read error doesn't stop reads
- *     v0.12 - added TIOCM ioctls, added break handling, made struct acm kmalloced
+ *     v0.12 - added TIOCM ioctls, added break handling, made struct acm
+ *             kmalloced
  *     v0.13 - added termios, added hangup
  *     v0.14 - sized down struct acm
  *     v0.15 - fixed flow control again - characters could be lost
@@ -62,7 +63,7 @@
 #include <linux/tty_flip.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <linux/usb.h>
 #include <linux/usb/cdc.h>
 #include <asm/byteorder.h>
@@ -87,7 +88,10 @@ static struct acm *acm_table[ACM_TTY_MINORS];
 
 static DEFINE_MUTEX(open_mutex);
 
-#define ACM_READY(acm) (acm && acm->dev && acm->used)
+#define ACM_READY(acm) (acm && acm->dev && acm->port.count)
+
+static const struct tty_port_operations acm_port_ops = {
+};
 
 #ifdef VERBOSE_DEBUG
 #define verbose        1
@@ -99,13 +103,15 @@ static DEFINE_MUTEX(open_mutex);
  * Functions for ACM control messages.
  */
 
-static int acm_ctrl_msg(struct acm *acm, int request, int value, void *buf, int len)
+static int acm_ctrl_msg(struct acm *acm, int request, int value,
+                                                       void *buf, int len)
 {
        int retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0),
                request, USB_RT_ACM, value,
                acm->control->altsetting[0].desc.bInterfaceNumber,
                buf, len, 5000);
-       dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d", request, value, len, retval);
+       dbg("acm_control_msg: rq: 0x%02x val: %#x len: %#x result: %d",
+                                               request, value, len, retval);
        return retval < 0 ? retval : 0;
 }
 
@@ -150,9 +156,8 @@ static int acm_wb_is_avail(struct acm *acm)
 
        n = ACM_NW;
        spin_lock_irqsave(&acm->write_lock, flags);
-       for (i = 0; i < ACM_NW; i++) {
+       for (i = 0; i < ACM_NW; i++)
                n -= acm->wb[i].use;
-       }
        spin_unlock_irqrestore(&acm->write_lock, flags);
        return n;
 }
@@ -183,7 +188,8 @@ static int acm_start_wb(struct acm *acm, struct acm_wb *wb)
        wb->urb->transfer_buffer_length = wb->len;
        wb->urb->dev = acm->dev;
 
-       if ((rc = usb_submit_urb(wb->urb, GFP_ATOMIC)) < 0) {
+       rc = usb_submit_urb(wb->urb, GFP_ATOMIC);
+       if (rc < 0) {
                dbg("usb_submit_urb(write bulk) failed: %d", rc);
                acm_write_done(acm, wb);
        }
@@ -262,6 +268,7 @@ static void acm_ctrl_irq(struct urb *urb)
 {
        struct acm *acm = urb->context;
        struct usb_cdc_notification *dr = urb->transfer_buffer;
+       struct tty_struct *tty;
        unsigned char *data;
        int newctrl;
        int retval;
@@ -287,40 +294,45 @@ static void acm_ctrl_irq(struct urb *urb)
 
        data = (unsigned char *)(dr + 1);
        switch (dr->bNotificationType) {
+       case USB_CDC_NOTIFY_NETWORK_CONNECTION:
+               dbg("%s network", dr->wValue ?
+                                       "connected to" : "disconnected from");
+               break;
 
-               case USB_CDC_NOTIFY_NETWORK_CONNECTION:
-
-                       dbg("%s network", dr->wValue ? "connected to" : "disconnected from");
-                       break;
-
-               case USB_CDC_NOTIFY_SERIAL_STATE:
-
-                       newctrl = get_unaligned_le16(data);
+       case USB_CDC_NOTIFY_SERIAL_STATE:
+               tty = tty_port_tty_get(&acm->port);
+               newctrl = get_unaligned_le16(data);
 
-                       if (acm->tty && !acm->clocal && (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
+               if (tty) {
+                       if (!acm->clocal &&
+                               (acm->ctrlin & ~newctrl & ACM_CTRL_DCD)) {
                                dbg("calling hangup");
-                               tty_hangup(acm->tty);
+                               tty_hangup(tty);
                        }
+                       tty_kref_put(tty);
+               }
 
-                       acm->ctrlin = newctrl;
-
-                       dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
-                               acm->ctrlin & ACM_CTRL_DCD ? '+' : '-', acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
-                               acm->ctrlin & ACM_CTRL_BRK ? '+' : '-', acm->ctrlin & ACM_CTRL_RI  ? '+' : '-',
-                               acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-',     acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
-                               acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
+               acm->ctrlin = newctrl;
 
+               dbg("input control lines: dcd%c dsr%c break%c ring%c framing%c parity%c overrun%c",
+                       acm->ctrlin & ACM_CTRL_DCD ? '+' : '-',
+                       acm->ctrlin & ACM_CTRL_DSR ? '+' : '-',
+                       acm->ctrlin & ACM_CTRL_BRK ? '+' : '-',
+                       acm->ctrlin & ACM_CTRL_RI  ? '+' : '-',
+                       acm->ctrlin & ACM_CTRL_FRAMING ? '+' : '-',
+                       acm->ctrlin & ACM_CTRL_PARITY ? '+' : '-',
+                       acm->ctrlin & ACM_CTRL_OVERRUN ? '+' : '-');
                        break;
 
-               default:
-                       dbg("unknown notification %d received: index %d len %d data0 %d data1 %d",
-                               dr->bNotificationType, dr->wIndex,
-                               dr->wLength, data[0], data[1]);
-                       break;
+       default:
+               dbg("unknown notification %d received: index %d len %d data0 %d data1 %d",
+                       dr->bNotificationType, dr->wIndex,
+                       dr->wLength, data[0], data[1]);
+               break;
        }
 exit:
        usb_mark_last_busy(acm->dev);
-       retval = usb_submit_urb (urb, GFP_ATOMIC);
+       retval = usb_submit_urb(urb, GFP_ATOMIC);
        if (retval)
                dev_err(&urb->dev->dev, "%s - usb_submit_urb failed with "
                        "result %d", __func__, retval);
@@ -371,15 +383,14 @@ static void acm_rx_tasklet(unsigned long _acm)
 {
        struct acm *acm = (void *)_acm;
        struct acm_rb *buf;
-       struct tty_struct *tty = acm->tty;
+       struct tty_struct *tty;
        struct acm_ru *rcv;
        unsigned long flags;
        unsigned char throttled;
 
        dbg("Entering acm_rx_tasklet");
 
-       if (!ACM_READY(acm))
-       {
+       if (!ACM_READY(acm)) {
                dbg("acm_rx_tasklet: ACM not ready");
                return;
        }
@@ -387,12 +398,13 @@ static void acm_rx_tasklet(unsigned long _acm)
        spin_lock_irqsave(&acm->throttle_lock, flags);
        throttled = acm->throttle;
        spin_unlock_irqrestore(&acm->throttle_lock, flags);
-       if (throttled)
-       {
+       if (throttled) {
                dbg("acm_rx_tasklet: throttled");
                return;
        }
 
+       tty = tty_port_tty_get(&acm->port);
+
 next_buffer:
        spin_lock_irqsave(&acm->read_lock, flags);
        if (list_empty(&acm->filled_read_bufs)) {
@@ -406,20 +418,22 @@ next_buffer:
 
        dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d", buf, buf->size);
 
-       tty_buffer_request_room(tty, buf->size);
-       spin_lock_irqsave(&acm->throttle_lock, flags);
-       throttled = acm->throttle;
-       spin_unlock_irqrestore(&acm->throttle_lock, flags);
-       if (!throttled)
-               tty_insert_flip_string(tty, buf->base, buf->size);
-       tty_flip_buffer_push(tty);
-
-       if (throttled) {
-               dbg("Throttling noticed");
-               spin_lock_irqsave(&acm->read_lock, flags);
-               list_add(&buf->list, &acm->filled_read_bufs);
-               spin_unlock_irqrestore(&acm->read_lock, flags);
-               return;
+       if (tty) {
+               spin_lock_irqsave(&acm->throttle_lock, flags);
+               throttled = acm->throttle;
+               spin_unlock_irqrestore(&acm->throttle_lock, flags);
+               if (!throttled) {
+                       tty_buffer_request_room(tty, buf->size);
+                       tty_insert_flip_string(tty, buf->base, buf->size);
+                       tty_flip_buffer_push(tty);
+               } else {
+                       tty_kref_put(tty);
+                       dbg("Throttling noticed");
+                       spin_lock_irqsave(&acm->read_lock, flags);
+                       list_add(&buf->list, &acm->filled_read_bufs);
+                       spin_unlock_irqrestore(&acm->read_lock, flags);
+                       return;
+               }
        }
 
        spin_lock_irqsave(&acm->read_lock, flags);
@@ -428,6 +442,8 @@ next_buffer:
        goto next_buffer;
 
 urbs:
+       tty_kref_put(tty);
+
        while (!list_empty(&acm->spare_read_bufs)) {
                spin_lock_irqsave(&acm->read_lock, flags);
                if (list_empty(&acm->spare_read_urbs)) {
@@ -454,10 +470,11 @@ urbs:
                rcv->urb->transfer_dma = buf->dma;
                rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 
-               /* This shouldn't kill the driver as unsuccessful URBs are returned to the
-                  free-urbs-pool and resubmited ASAP */
+               /* This shouldn't kill the driver as unsuccessful URBs are
+                  returned to the free-urbs-pool and resubmited ASAP */
                spin_lock_irqsave(&acm->read_lock, flags);
-               if (acm->susp_count || usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
+               if (acm->susp_count ||
+                               usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
                        list_add(&buf->list, &acm->spare_read_bufs);
                        list_add(&rcv->list, &acm->spare_read_urbs);
                        acm->processing = 0;
@@ -499,11 +516,14 @@ static void acm_write_bulk(struct urb *urb)
 static void acm_softint(struct work_struct *work)
 {
        struct acm *acm = container_of(work, struct acm, work);
+       struct tty_struct *tty;
 
        dev_vdbg(&acm->data->dev, "tx work\n");
        if (!ACM_READY(acm))
                return;
-       tty_wakeup(acm->tty);
+       tty = tty_port_tty_get(&acm->port);
+       tty_wakeup(tty);
+       tty_kref_put(tty);
 }
 
 static void acm_waker(struct work_struct *waker)
@@ -543,8 +563,9 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
                rv = 0;
 
        set_bit(TTY_NO_WRITE_SPLIT, &tty->flags);
+
        tty->driver_data = acm;
-       acm->tty = tty;
+       tty_port_tty_set(&acm->port, tty);
 
        if (usb_autopm_get_interface(acm->control) < 0)
                goto early_bail;
@@ -552,11 +573,10 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
                acm->control->needs_remote_wakeup = 1;
 
        mutex_lock(&acm->mutex);
-       if (acm->used++) {
+       if (acm->port.count++) {
                usb_autopm_put_interface(acm->control);
                goto done;
-        }
-
+       }
 
        acm->ctrlurb->dev = acm->dev;
        if (usb_submit_urb(acm->ctrlurb, GFP_KERNEL)) {
@@ -567,22 +587,22 @@ static int acm_tty_open(struct tty_struct *tty, struct file *filp)
        if (0 > acm_set_control(acm, acm->ctrlout = ACM_CTRL_DTR | ACM_CTRL_RTS) &&
            (acm->ctrl_caps & USB_CDC_CAP_LINE))
                goto full_bailout;
+
        usb_autopm_put_interface(acm->control);
 
        INIT_LIST_HEAD(&acm->spare_read_urbs);
        INIT_LIST_HEAD(&acm->spare_read_bufs);
        INIT_LIST_HEAD(&acm->filled_read_bufs);
-       for (i = 0; i < acm->rx_buflimit; i++) {
+
+       for (i = 0; i < acm->rx_buflimit; i++)
                list_add(&(acm->ru[i].list), &acm->spare_read_urbs);
-       }
-       for (i = 0; i < acm->rx_buflimit; i++) {
+       for (i = 0; i < acm->rx_buflimit; i++)
                list_add(&(acm->rb[i].list), &acm->spare_read_bufs);
-       }
 
        acm->throttle = 0;
 
        tasklet_schedule(&acm->urb_task);
-
+       rv = tty_port_block_til_ready(&acm->port, tty, filp);
 done:
        mutex_unlock(&acm->mutex);
 err_out:
@@ -593,16 +613,17 @@ full_bailout:
        usb_kill_urb(acm->ctrlurb);
 bail_out:
        usb_autopm_put_interface(acm->control);
-       acm->used--;
+       acm->port.count--;
        mutex_unlock(&acm->mutex);
 early_bail:
        mutex_unlock(&open_mutex);
+       tty_port_tty_set(&acm->port, NULL);
        return -EIO;
 }
 
 static void acm_tty_unregister(struct acm *acm)
 {
-       int i,nr;
+       int i, nr;
 
        nr = acm->rx_buflimit;
        tty_unregister_device(acm_tty_driver, acm->minor);
@@ -619,41 +640,56 @@ static void acm_tty_unregister(struct acm *acm)
 
 static int acm_tty_chars_in_buffer(struct tty_struct *tty);
 
+static void acm_port_down(struct acm *acm, int drain)
+{
+       int i, nr = acm->rx_buflimit;
+       mutex_lock(&open_mutex);
+       if (acm->dev) {
+               usb_autopm_get_interface(acm->control);
+               acm_set_control(acm, acm->ctrlout = 0);
+               /* try letting the last writes drain naturally */
+               if (drain) {
+                       wait_event_interruptible_timeout(acm->drain_wait,
+                               (ACM_NW == acm_wb_is_avail(acm)) || !acm->dev,
+                                       ACM_CLOSE_TIMEOUT * HZ);
+               }
+               usb_kill_urb(acm->ctrlurb);
+               for (i = 0; i < ACM_NW; i++)
+                       usb_kill_urb(acm->wb[i].urb);
+               for (i = 0; i < nr; i++)
+                       usb_kill_urb(acm->ru[i].urb);
+               acm->control->needs_remote_wakeup = 0;
+               usb_autopm_put_interface(acm->control);
+       }
+       mutex_unlock(&open_mutex);
+}
+
+static void acm_tty_hangup(struct tty_struct *tty)
+{
+       struct acm *acm = tty->driver_data;
+       tty_port_hangup(&acm->port);
+       acm_port_down(acm, 0);
+}
+
 static void acm_tty_close(struct tty_struct *tty, struct file *filp)
 {
        struct acm *acm = tty->driver_data;
-       int i,nr;
 
-       if (!acm || !acm->used)
+       /* Perform the closing process and see if we need to do the hardware
+          shutdown */
+       if (tty_port_close_start(&acm->port, tty, filp) == 0)
                return;
-
-       nr = acm->rx_buflimit;
+       acm_port_down(acm, 0);
+       tty_port_close_end(&acm->port, tty);
        mutex_lock(&open_mutex);
-       if (!--acm->used) {
-               if (acm->dev) {
-                       usb_autopm_get_interface(acm->control);
-                       acm_set_control(acm, acm->ctrlout = 0);
-
-                       /* try letting the last writes drain naturally */
-                       wait_event_interruptible_timeout(acm->drain_wait,
-                                       (ACM_NW == acm_wb_is_avail(acm))
-                                               || !acm->dev,
-                                       ACM_CLOSE_TIMEOUT * HZ);
-
-                       usb_kill_urb(acm->ctrlurb);
-                       for (i = 0; i < ACM_NW; i++)
-                               usb_kill_urb(acm->wb[i].urb);
-                       for (i = 0; i < nr; i++)
-                               usb_kill_urb(acm->ru[i].urb);
-                       acm->control->needs_remote_wakeup = 0;
-                       usb_autopm_put_interface(acm->control);
-               } else
-                       acm_tty_unregister(acm);
-       }
+       tty_port_tty_set(&acm->port, NULL);
+       if (!acm->dev)
+               acm_tty_unregister(acm);
        mutex_unlock(&open_mutex);
 }
 
-static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int count)
+static int acm_tty_write(struct tty_struct *tty,
+                                       const unsigned char *buf, int count)
 {
        struct acm *acm = tty->driver_data;
        int stat;
@@ -669,7 +705,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c
                return 0;
 
        spin_lock_irqsave(&acm->write_lock, flags);
-       if ((wbn = acm_wb_alloc(acm)) < 0) {
+       wbn = acm_wb_alloc(acm);
+       if (wbn < 0) {
                spin_unlock_irqrestore(&acm->write_lock, flags);
                return 0;
        }
@@ -681,7 +718,8 @@ static int acm_tty_write(struct tty_struct *tty, const unsigned char *buf, int c
        wb->len = count;
        spin_unlock_irqrestore(&acm->write_lock, flags);
 
-       if ((stat = acm_write_start(acm, wbn)) < 0)
+       stat = acm_write_start(acm, wbn);
+       if (stat < 0)
                return stat;
        return count;
 }
@@ -767,8 +805,10 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file,
                return -EINVAL;
 
        newctrl = acm->ctrlout;
-       set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (set & TIOCM_RTS ? ACM_CTRL_RTS : 0);
-       clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) | (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0);
+       set = (set & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
+                                       (set & TIOCM_RTS ? ACM_CTRL_RTS : 0);
+       clear = (clear & TIOCM_DTR ? ACM_CTRL_DTR : 0) |
+                                       (clear & TIOCM_RTS ? ACM_CTRL_RTS : 0);
 
        newctrl = (newctrl & ~clear) | set;
 
@@ -777,7 +817,8 @@ static int acm_tty_tiocmset(struct tty_struct *tty, struct file *file,
        return acm_set_control(acm, acm->ctrlout = newctrl);
 }
 
-static int acm_tty_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg)
+static int acm_tty_ioctl(struct tty_struct *tty, struct file *file,
+                                       unsigned int cmd, unsigned long arg)
 {
        struct acm *acm = tty->driver_data;
 
@@ -799,7 +840,8 @@ static const __u8 acm_tty_size[] = {
        5, 6, 7, 8
 };
 
-static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios_old)
+static void acm_tty_set_termios(struct tty_struct *tty,
+                                               struct ktermios *termios_old)
 {
        struct acm *acm = tty->driver_data;
        struct ktermios *termios = tty->termios;
@@ -809,19 +851,23 @@ static void acm_tty_set_termios(struct tty_struct *tty, struct ktermios *termios
        if (!ACM_READY(acm))
                return;
 
+       /* FIXME: Needs to support the tty_baud interface */
+       /* FIXME: Broken on sparc */
        newline.dwDTERate = cpu_to_le32p(acm_tty_speed +
                (termios->c_cflag & CBAUD & ~CBAUDEX) + (termios->c_cflag & CBAUDEX ? 15 : 0));
        newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0;
        newline.bParityType = termios->c_cflag & PARENB ?
-               (termios->c_cflag & PARODD ? 1 : 2) + (termios->c_cflag & CMSPAR ? 2 : 0) : 0;
+                               (termios->c_cflag & PARODD ? 1 : 2) +
+                               (termios->c_cflag & CMSPAR ? 2 : 0) : 0;
        newline.bDataBits = acm_tty_size[(termios->c_cflag & CSIZE) >> 4];
-
+       /* FIXME: Needs to clear unsupported bits in the termios */
        acm->clocal = ((termios->c_cflag & CLOCAL) != 0);
 
        if (!newline.dwDTERate) {
                newline.dwDTERate = acm->line.dwDTERate;
                newctrl &= ~ACM_CTRL_DTR;
-       } else  newctrl |=  ACM_CTRL_DTR;
+       } else
+               newctrl |=  ACM_CTRL_DTR;
 
        if (newctrl != acm->ctrlout)
                acm_set_control(acm, acm->ctrlout = newctrl);
@@ -846,9 +892,8 @@ static void acm_write_buffers_free(struct acm *acm)
        struct acm_wb *wb;
        struct usb_device *usb_dev = interface_to_usbdev(acm->control);
 
-       for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) {
+       for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++)
                usb_buffer_free(usb_dev, acm->writesize, wb->buf, wb->dmah);
-       }
 }
 
 static void acm_read_buffers_free(struct acm *acm)
@@ -857,7 +902,8 @@ static void acm_read_buffers_free(struct acm *acm)
        int i, n = acm->rx_buflimit;
 
        for (i = 0; i < n; i++)
-               usb_buffer_free(usb_dev, acm->readsize, acm->rb[i].base, acm->rb[i].dma);
+               usb_buffer_free(usb_dev, acm->readsize,
+                                       acm->rb[i].base, acm->rb[i].dma);
 }
 
 /* Little helper: write buffers allocate */
@@ -882,8 +928,8 @@ static int acm_write_buffers_alloc(struct acm *acm)
        return 0;
 }
 
-static int acm_probe (struct usb_interface *intf,
-                     const struct usb_device_id *id)
+static int acm_probe(struct usb_interface *intf,
+                    const struct usb_device_id *id)
 {
        struct usb_cdc_union_desc *union_header = NULL;
        struct usb_cdc_country_functional_desc *cfd = NULL;
@@ -897,7 +943,7 @@ static int acm_probe (struct usb_interface *intf,
        struct usb_device *usb_dev = interface_to_usbdev(intf);
        struct acm *acm;
        int minor;
-       int ctrlsize,readsize;
+       int ctrlsize, readsize;
        u8 *buf;
        u8 ac_management_function = 0;
        u8 call_management_function = 0;
@@ -917,7 +963,7 @@ static int acm_probe (struct usb_interface *intf,
                control_interface = usb_ifnum_to_if(usb_dev, 0);
                goto skip_normal_probe;
        }
-       
+
        /* normal probing*/
        if (!buffer) {
                dev_err(&intf->dev, "Weird descriptor references\n");
@@ -925,8 +971,10 @@ static int acm_probe (struct usb_interface *intf,
        }
 
        if (!buflen) {
-               if (intf->cur_altsetting->endpoint->extralen && intf->cur_altsetting->endpoint->extra) {
-                       dev_dbg(&intf->dev,"Seeking extra descriptors on endpoint\n");
+               if (intf->cur_altsetting->endpoint->extralen &&
+                               intf->cur_altsetting->endpoint->extra) {
+                       dev_dbg(&intf->dev,
+                               "Seeking extra descriptors on endpoint\n");
                        buflen = intf->cur_altsetting->endpoint->extralen;
                        buffer = intf->cur_altsetting->endpoint->extra;
                } else {
@@ -937,47 +985,43 @@ static int acm_probe (struct usb_interface *intf,
        }
 
        while (buflen > 0) {
-               if (buffer [1] != USB_DT_CS_INTERFACE) {
+               if (buffer[1] != USB_DT_CS_INTERFACE) {
                        dev_err(&intf->dev, "skipping garbage\n");
                        goto next_desc;
                }
 
-               switch (buffer [2]) {
-                       case USB_CDC_UNION_TYPE: /* we've found it */
-                               if (union_header) {
-                                       dev_err(&intf->dev, "More than one "
-                                               "union descriptor, "
-                                               "skipping ...\n");
-                                       goto next_desc;
-                               }
-                               union_header = (struct usb_cdc_union_desc *)
-                                                       buffer;
-                               break;
-                       case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
-                               cfd = (struct usb_cdc_country_functional_desc *)buffer;
-                               break;
-                       case USB_CDC_HEADER_TYPE: /* maybe check version */ 
-                               break; /* for now we ignore it */ 
-                       case USB_CDC_ACM_TYPE:
-                               ac_management_function = buffer[3];
-                               break;
-                       case USB_CDC_CALL_MANAGEMENT_TYPE:
-                               call_management_function = buffer[3];
-                               call_interface_num = buffer[4];
-                               if ((call_management_function & 3) != 3)
-                                       dev_err(&intf->dev, "This device "
-                                               "cannot do calls on its own. "
-                                               "It is no modem.\n");
-                               break;
-                       default:
-                               /* there are LOTS more CDC descriptors that
-                                * could legitimately be found here.
-                                */
-                               dev_dbg(&intf->dev, "Ignoring descriptor: "
-                                               "type %02x, length %d\n",
-                                               buffer[2], buffer[0]);
-                               break;
+               switch (buffer[2]) {
+               case USB_CDC_UNION_TYPE: /* we've found it */
+                       if (union_header) {
+                               dev_err(&intf->dev, "More than one "
+                                       "union descriptor, skipping ...\n");
+                               goto next_desc;
                        }
+                       union_header = (struct usb_cdc_union_desc *)buffer;
+                       break;
+               case USB_CDC_COUNTRY_TYPE: /* export through sysfs*/
+                       cfd = (struct usb_cdc_country_functional_desc *)buffer;
+                       break;
+               case USB_CDC_HEADER_TYPE: /* maybe check version */
+                       break; /* for now we ignore it */
+               case USB_CDC_ACM_TYPE:
+                       ac_management_function = buffer[3];
+                       break;
+               case USB_CDC_CALL_MANAGEMENT_TYPE:
+                       call_management_function = buffer[3];
+                       call_interface_num = buffer[4];
+                       if ((call_management_function & 3) != 3)
+                               dev_err(&intf->dev, "This device cannot do calls on its own. It is not a modem.\n");
+                       break;
+               default:
+                       /* there are LOTS more CDC descriptors that
+                        * could legitimately be found here.
+                        */
+                       dev_dbg(&intf->dev, "Ignoring descriptor: "
+                                       "type %02x, length %d\n",
+                                       buffer[2], buffer[0]);
+                       break;
+               }
 next_desc:
                buflen -= buffer[0];
                buffer += buffer[0];
@@ -985,33 +1029,36 @@ next_desc:
 
        if (!union_header) {
                if (call_interface_num > 0) {
-                       dev_dbg(&intf->dev,"No union descriptor, using call management descriptor\n");
+                       dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n");
                        data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = call_interface_num));
                        control_interface = intf;
                } else {
-                       dev_dbg(&intf->dev,"No union descriptor, giving up\n");
+                       dev_dbg(&intf->dev,
+                                       "No union descriptor, giving up\n");
                        return -ENODEV;
                }
        } else {
                control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0);
                data_interface = usb_ifnum_to_if(usb_dev, (data_interface_num = union_header->bSlaveInterface0));
                if (!control_interface || !data_interface) {
-                       dev_dbg(&intf->dev,"no interfaces\n");
+                       dev_dbg(&intf->dev, "no interfaces\n");
                        return -ENODEV;
                }
        }
-       
+
        if (data_interface_num != call_interface_num)
-               dev_dbg(&intf->dev,"Separate call control interface. That is not fully supported.\n");
+               dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n");
 
 skip_normal_probe:
 
        /*workaround for switched interfaces */
-       if (data_interface->cur_altsetting->desc.bInterfaceClass != CDC_DATA_INTERFACE_TYPE) {
-               if (control_interface->cur_altsetting->desc.bInterfaceClass == CDC_DATA_INTERFACE_TYPE) {
+       if (data_interface->cur_altsetting->desc.bInterfaceClass
+                                               != CDC_DATA_INTERFACE_TYPE) {
+               if (control_interface->cur_altsetting->desc.bInterfaceClass
+                                               == CDC_DATA_INTERFACE_TYPE) {
                        struct usb_interface *t;
-                       dev_dbg(&intf->dev,"Your device has switched interfaces.\n");
-
+                       dev_dbg(&intf->dev,
+                               "Your device has switched interfaces.\n");
                        t = control_interface;
                        control_interface = data_interface;
                        data_interface = t;
@@ -1023,9 +1070,9 @@ skip_normal_probe:
        /* Accept probe requests only for the control interface */
        if (intf != control_interface)
                return -ENODEV;
-       
+
        if (usb_interface_claimed(data_interface)) { /* valid in this context */
-               dev_dbg(&intf->dev,"The data interface isn't available\n");
+               dev_dbg(&intf->dev, "The data interface isn't available\n");
                return -EBUSY;
        }
 
@@ -1042,8 +1089,8 @@ skip_normal_probe:
        if (!usb_endpoint_dir_in(epread)) {
                /* descriptors are swapped */
                struct usb_endpoint_descriptor *t;
-               dev_dbg(&intf->dev,"The data interface has switched endpoints\n");
-               
+               dev_dbg(&intf->dev,
+                       "The data interface has switched endpoints\n");
                t = epread;
                epread = epwrite;
                epwrite = t;
@@ -1056,13 +1103,15 @@ skip_normal_probe:
                return -ENODEV;
        }
 
-       if (!(acm = kzalloc(sizeof(struct acm), GFP_KERNEL))) {
+       acm = kzalloc(sizeof(struct acm), GFP_KERNEL);
+       if (acm == NULL) {
                dev_dbg(&intf->dev, "out of memory (acm kzalloc)\n");
                goto alloc_fail;
        }
 
        ctrlsize = le16_to_cpu(epctrl->wMaxPacketSize);
-       readsize = le16_to_cpu(epread->wMaxPacketSize)* ( quirks == SINGLE_RX_URB ? 1 : 2);
+       readsize = le16_to_cpu(epread->wMaxPacketSize) *
+                               (quirks == SINGLE_RX_URB ? 1 : 2);
        acm->writesize = le16_to_cpu(epwrite->wMaxPacketSize) * 20;
        acm->control = control_interface;
        acm->data = data_interface;
@@ -1082,6 +1131,8 @@ skip_normal_probe:
        spin_lock_init(&acm->read_lock);
        mutex_init(&acm->mutex);
        acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
+       tty_port_init(&acm->port);
+       acm->port.ops = &acm_port_ops;
 
        buf = usb_buffer_alloc(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma);
        if (!buf) {
@@ -1103,8 +1154,10 @@ skip_normal_probe:
        for (i = 0; i < num_rx_buf; i++) {
                struct acm_ru *rcv = &(acm->ru[i]);
 
-               if (!(rcv->urb = usb_alloc_urb(0, GFP_KERNEL))) {
-                       dev_dbg(&intf->dev, "out of memory (read urbs usb_alloc_urb)\n");
+               rcv->urb = usb_alloc_urb(0, GFP_KERNEL);
+               if (rcv->urb == NULL) {
+                       dev_dbg(&intf->dev,
+                               "out of memory (read urbs usb_alloc_urb)\n");
                        goto alloc_fail7;
                }
 
@@ -1117,26 +1170,29 @@ skip_normal_probe:
                rb->base = usb_buffer_alloc(acm->dev, readsize,
                                GFP_KERNEL, &rb->dma);
                if (!rb->base) {
-                       dev_dbg(&intf->dev, "out of memory (read bufs usb_buffer_alloc)\n");
+                       dev_dbg(&intf->dev,
+                               "out of memory (read bufs usb_buffer_alloc)\n");
                        goto alloc_fail7;
                }
        }
-       for(i = 0; i < ACM_NW; i++)
-       {
+       for (i = 0; i < ACM_NW; i++) {
                struct acm_wb *snd = &(acm->wb[i]);
 
-               if (!(snd->urb = usb_alloc_urb(0, GFP_KERNEL))) {
-                       dev_dbg(&intf->dev, "out of memory (write urbs usb_alloc_urb)");
+               snd->urb = usb_alloc_urb(0, GFP_KERNEL);
+               if (snd->urb == NULL) {
+                       dev_dbg(&intf->dev,
+                               "out of memory (write urbs usb_alloc_urb)");
                        goto alloc_fail7;
                }
 
-               usb_fill_bulk_urb(snd->urb, usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
-                               NULL, acm->writesize, acm_write_bulk, snd);
+               usb_fill_bulk_urb(snd->urb, usb_dev,
+                       usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
+                       NULL, acm->writesize, acm_write_bulk, snd);
                snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
                snd->instance = acm;
        }
 
-       usb_set_intfdata (intf, acm);
+       usb_set_intfdata(intf, acm);
 
        i = device_create_file(&intf->dev, &dev_attr_bmCapabilities);
        if (i < 0)
@@ -1147,7 +1203,8 @@ skip_normal_probe:
                if (!acm->country_codes)
                        goto skip_countries;
                acm->country_code_size = cfd->bLength - 4;
-               memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, cfd->bLength - 4);
+               memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0,
+                                                       cfd->bLength - 4);
                acm->country_rel_date = cfd->iCountryCodeRelDate;
 
                i = device_create_file(&intf->dev, &dev_attr_wCountryCodes);
@@ -1156,7 +1213,8 @@ skip_normal_probe:
                        goto skip_countries;
                }
 
-               i = device_create_file(&intf->dev, &dev_attr_iCountryCodeRelDate);
+               i = device_create_file(&intf->dev,
+                                               &dev_attr_iCountryCodeRelDate);
                if (i < 0) {
                        kfree(acm->country_codes);
                        goto skip_countries;
@@ -1164,8 +1222,10 @@ skip_normal_probe:
        }
 
 skip_countries:
-       usb_fill_int_urb(acm->ctrlurb, usb_dev, usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
-                        acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, epctrl->bInterval);
+       usb_fill_int_urb(acm->ctrlurb, usb_dev,
+                       usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress),
+                       acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm,
+                       epctrl->bInterval);
        acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
        acm->ctrlurb->transfer_dma = acm->ctrl_dma;
 
@@ -1212,7 +1272,7 @@ static void stop_data_traffic(struct acm *acm)
        tasklet_disable(&acm->urb_task);
 
        usb_kill_urb(acm->ctrlurb);
-       for(i = 0; i < ACM_NW; i++)
+       for (i = 0; i < ACM_NW; i++)
                usb_kill_urb(acm->wb[i].urb);
        for (i = 0; i < acm->rx_buflimit; i++)
                usb_kill_urb(acm->ru[i].urb);
@@ -1227,13 +1287,14 @@ static void acm_disconnect(struct usb_interface *intf)
 {
        struct acm *acm = usb_get_intfdata(intf);
        struct usb_device *usb_dev = interface_to_usbdev(intf);
+       struct tty_struct *tty;
 
        /* sibling interface is already cleaning up */
        if (!acm)
                return;
 
        mutex_lock(&open_mutex);
-       if (acm->country_codes){
+       if (acm->country_codes) {
                device_remove_file(&acm->control->dev,
                                &dev_attr_wCountryCodes);
                device_remove_file(&acm->control->dev,
@@ -1247,22 +1308,25 @@ static void acm_disconnect(struct usb_interface *intf)
        stop_data_traffic(acm);
 
        acm_write_buffers_free(acm);
-       usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
+       usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer,
+                                                               acm->ctrl_dma);
        acm_read_buffers_free(acm);
 
        usb_driver_release_interface(&acm_driver, intf == acm->control ?
                                        acm->data : acm->control);
 
-       if (!acm->used) {
+       if (acm->port.count == 0) {
                acm_tty_unregister(acm);
                mutex_unlock(&open_mutex);
                return;
        }
 
        mutex_unlock(&open_mutex);
-
-       if (acm->tty)
-               tty_hangup(acm->tty);
+       tty = tty_port_tty_get(&acm->port);
+       if (tty) {
+               tty_hangup(tty);
+               tty_kref_put(tty);
+       }
 }
 
 #ifdef CONFIG_PM
@@ -1297,7 +1361,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
        */
        mutex_lock(&acm->mutex);
 
-       if (acm->used)
+       if (acm->port.count)
                stop_data_traffic(acm);
 
        mutex_unlock(&acm->mutex);
@@ -1319,7 +1383,7 @@ static int acm_resume(struct usb_interface *intf)
                return 0;
 
        mutex_lock(&acm->mutex);
-       if (acm->used) {
+       if (acm->port.count) {
                rv = usb_submit_urb(acm->ctrlurb, GFP_NOIO);
                if (rv < 0)
                        goto err_out;
@@ -1398,7 +1462,7 @@ static struct usb_device_id acm_ids[] = {
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_GSM) },
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
-               USB_CDC_ACM_PROTO_AT_3G ) },
+               USB_CDC_ACM_PROTO_AT_3G) },
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_ACM_PROTO_AT_CDMA) },
 
@@ -1406,7 +1470,7 @@ static struct usb_device_id acm_ids[] = {
        { }
 };
 
-MODULE_DEVICE_TABLE (usb, acm_ids);
+MODULE_DEVICE_TABLE(usb, acm_ids);
 
 static struct usb_driver acm_driver = {
        .name =         "cdc_acm",
@@ -1429,6 +1493,7 @@ static struct usb_driver acm_driver = {
 static const struct tty_operations acm_ops = {
        .open =                 acm_tty_open,
        .close =                acm_tty_close,
+       .hangup =               acm_tty_hangup,
        .write =                acm_tty_write,
        .write_room =           acm_tty_write_room,
        .ioctl =                acm_tty_ioctl,
@@ -1460,7 +1525,8 @@ static int __init acm_init(void)
        acm_tty_driver->subtype = SERIAL_TYPE_NORMAL,
        acm_tty_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
        acm_tty_driver->init_termios = tty_std_termios;
-       acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL;
+       acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD |
+                                                               HUPCL | CLOCAL;
        tty_set_operations(acm_tty_driver, &acm_ops);
 
        retval = tty_register_driver(acm_tty_driver);
@@ -1492,7 +1558,7 @@ static void __exit acm_exit(void)
 module_init(acm_init);
 module_exit(acm_exit);
 
-MODULE_AUTHOR( DRIVER_AUTHOR );
-MODULE_DESCRIPTION( DRIVER_DESC );
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR);
index 1f95e7a..4c38564 100644 (file)
@@ -89,8 +89,8 @@ struct acm {
        struct usb_device *dev;                         /* the corresponding usb device */
        struct usb_interface *control;                  /* control interface */
        struct usb_interface *data;                     /* data interface */
-       struct tty_struct *tty;                         /* the corresponding tty */
-       struct urb *ctrlurb;                    /* urbs */
+       struct tty_port port;                           /* our tty port data */
+       struct urb *ctrlurb;                            /* urbs */
        u8 *ctrl_buffer;                                /* buffers of urbs */
        dma_addr_t ctrl_dma;                            /* dma handles of buffers */
        u8 *country_codes;                              /* country codes from device */
@@ -120,7 +120,6 @@ struct acm {
        unsigned int ctrlout;                           /* output control lines (DTR, RTS) */
        unsigned int writesize;                         /* max packet size for the output bulk endpoint */
        unsigned int readsize,ctrlsize;                 /* buffer sizes for freeing */
-       unsigned int used;                              /* someone has this acm's device open */
        unsigned int minor;                             /* acm minor number */
        unsigned char throttle;                         /* throttled by tty layer */
        unsigned char clocal;                           /* termios CLOCAL */
index b7eacad..2bfd6dd 100644 (file)
@@ -93,8 +93,7 @@ static int  belkin_sa_startup(struct usb_serial *serial);
 static void belkin_sa_shutdown(struct usb_serial *serial);
 static int  belkin_sa_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void belkin_sa_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+static void belkin_sa_close(struct usb_serial_port *port);
 static void belkin_sa_read_int_callback(struct urb *urb);
 static void belkin_sa_set_termios(struct tty_struct *tty,
                        struct usb_serial_port *port, struct ktermios * old);
@@ -244,8 +243,7 @@ exit:
 } /* belkin_sa_open */
 
 
-static void belkin_sa_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void belkin_sa_close(struct usb_serial_port *port)
 {
        dbg("%s port %d", __func__, port->number);
 
index ab4cc27..2830766 100644 (file)
@@ -262,13 +262,33 @@ error:    kfree(priv);
        return r;
 }
 
-static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port,
-                               struct file *filp)
+static int ch341_carrier_raised(struct usb_serial_port *port)
+{
+       struct ch341_private *priv = usb_get_serial_port_data(port);
+       if (priv->line_status & CH341_BIT_DCD)
+               return 1;
+       return 0;
+}
+
+static void ch341_dtr_rts(struct usb_serial_port *port, int on)
 {
        struct ch341_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
-       unsigned int c_cflag;
 
+       dbg("%s - port %d", __func__, port->number);
+       /* drop DTR and RTS */
+       spin_lock_irqsave(&priv->lock, flags);
+       if (on)
+               priv->line_control |= CH341_BIT_RTS | CH341_BIT_DTR;
+       else
+               priv->line_control &= ~(CH341_BIT_RTS | CH341_BIT_DTR);
+       spin_unlock_irqrestore(&priv->lock, flags);
+       ch341_set_handshake(port->serial->dev, priv->line_control);
+       wake_up_interruptible(&priv->delta_msr_wait);
+}
+
+static void ch341_close(struct usb_serial_port *port)
+{
        dbg("%s - port %d", __func__, port->number);
 
        /* shutdown our urbs */
@@ -276,18 +296,6 @@ static void ch341_close(struct tty_struct *tty, struct usb_serial_port *port,
        usb_kill_urb(port->write_urb);
        usb_kill_urb(port->read_urb);
        usb_kill_urb(port->interrupt_in_urb);
-
-       if (tty) {
-               c_cflag = tty->termios->c_cflag;
-               if (c_cflag & HUPCL) {
-                       /* drop DTR and RTS */
-                       spin_lock_irqsave(&priv->lock, flags);
-                       priv->line_control = 0;
-                       spin_unlock_irqrestore(&priv->lock, flags);
-                       ch341_set_handshake(port->serial->dev, 0);
-               }
-       }
-       wake_up_interruptible(&priv->delta_msr_wait);
 }
 
 
@@ -302,7 +310,6 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
        dbg("ch341_open()");
 
        priv->baud_rate = DEFAULT_BAUD_RATE;
-       priv->line_control = CH341_BIT_RTS | CH341_BIT_DTR;
 
        r = ch341_configure(serial->dev, priv);
        if (r)
@@ -322,7 +329,7 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port,
        if (r) {
                dev_err(&port->dev, "%s - failed submitting interrupt urb,"
                        " error %d\n", __func__, r);
-               ch341_close(tty, port, NULL);
+               ch341_close(port);
                return -EPROTO;
        }
 
@@ -343,9 +350,6 @@ static void ch341_set_termios(struct tty_struct *tty,
 
        dbg("ch341_set_termios()");
 
-       if (!tty || !tty->termios)
-               return;
-
        baud_rate = tty_get_baud_rate(tty);
 
        priv->baud_rate = baud_rate;
@@ -568,6 +572,8 @@ static struct usb_serial_driver ch341_device = {
        .usb_driver        = &ch341_driver,
        .num_ports         = 1,
        .open              = ch341_open,
+       .dtr_rts           = ch341_dtr_rts,
+       .carrier_raised    = ch341_carrier_raised,
        .close             = ch341_close,
        .ioctl             = ch341_ioctl,
        .set_termios       = ch341_set_termios,
index 19e2404..247b61b 100644 (file)
@@ -169,7 +169,9 @@ static int usb_console_setup(struct console *co, char *options)
                        kfree(tty);
                }
        }
-
+       /* So we know not to kill the hardware on a hangup on this
+          port. We have also bumped the use count by one so it won't go
+          idle */
        port->console = 1;
        retval = 0;
 
@@ -182,7 +184,7 @@ free_tty:
        kfree(tty);
 reset_open_count:
        port->port.count = 0;
-goto out;
+       goto out;
 }
 
 static void usb_console_write(struct console *co,
index e8d5133..16a154d 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Silicon Laboratories CP2101/CP2102 USB to RS232 serial adaptor driver
+ * Silicon Laboratories CP210x USB to RS232 serial adaptor driver
  *
  * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk)
  *
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.08"
-#define DRIVER_DESC "Silicon Labs CP2101/CP2102 RS232 serial adaptor driver"
+#define DRIVER_VERSION "v0.09"
+#define DRIVER_DESC "Silicon Labs CP210x RS232 serial adaptor driver"
 
 /*
  * Function Prototypes
  */
-static int cp2101_open(struct tty_struct *, struct usb_serial_port *,
+static int cp210x_open(struct tty_struct *, struct usb_serial_port *,
                                                        struct file *);
-static void cp2101_cleanup(struct usb_serial_port *);
-static void cp2101_close(struct tty_struct *, struct usb_serial_port *,
-                                                       struct file*);
-static void cp2101_get_termios(struct tty_struct *,
+static void cp210x_cleanup(struct usb_serial_port *);
+static void cp210x_close(struct usb_serial_port *);
+static void cp210x_get_termios(struct tty_struct *,
        struct usb_serial_port *port);
-static void cp2101_get_termios_port(struct usb_serial_port *port,
+static void cp210x_get_termios_port(struct usb_serial_port *port,
        unsigned int *cflagp, unsigned int *baudp);
-static void cp2101_set_termios(struct tty_struct *, struct usb_serial_port *,
+static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *,
                                                        struct ktermios*);
-static int cp2101_tiocmget(struct tty_struct *, struct file *);
-static int cp2101_tiocmset(struct tty_struct *, struct file *,
+static int cp210x_tiocmget(struct tty_struct *, struct file *);
+static int cp210x_tiocmset(struct tty_struct *, struct file *,
                unsigned int, unsigned int);
-static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *,
+static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *,
                unsigned int, unsigned int);
-static void cp2101_break_ctl(struct tty_struct *, int);
-static int cp2101_startup(struct usb_serial *);
-static void cp2101_shutdown(struct usb_serial *);
+static void cp210x_break_ctl(struct tty_struct *, int);
+static int cp210x_startup(struct usb_serial *);
+static void cp210x_shutdown(struct usb_serial *);
 
 static int debug;
 
 static struct usb_device_id id_table [] = {
        { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */
        { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */
+       { USB_DEVICE(0x0745, 0x1000) }, /* CipherLab USB CCD Barcode Scanner 1000 */
        { USB_DEVICE(0x08e6, 0x5501) }, /* Gemalto Prox-PU/CU contactless smartcard reader */
+       { USB_DEVICE(0x08FD, 0x000A) }, /* Digianswer A/S , ZigBee/802.15.4 MAC Device */
        { USB_DEVICE(0x0FCF, 0x1003) }, /* Dynastream ANT development board */
        { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */
        { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */
        { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */
        { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */
        { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */
+       { USB_DEVICE(0x10C4, 0x0F91) }, /* Vstabi */
        { USB_DEVICE(0x10C4, 0x800A) }, /* SPORTident BSM7-D-USB main station */
        { USB_DEVICE(0x10C4, 0x803B) }, /* Pololu USB-serial converter */
        { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */
@@ -85,10 +87,12 @@ static struct usb_device_id id_table [] = {
        { USB_DEVICE(0x10C4, 0x81C8) }, /* Lipowsky Industrie Elektronik GmbH, Baby-JTAG */
        { USB_DEVICE(0x10C4, 0x81E2) }, /* Lipowsky Industrie Elektronik GmbH, Baby-LIN */
        { USB_DEVICE(0x10C4, 0x81E7) }, /* Aerocomm Radio */
+       { USB_DEVICE(0x10C4, 0x81F2) }, /* C1007 HF band RFID controller */
        { USB_DEVICE(0x10C4, 0x8218) }, /* Lipowsky Industrie Elektronik GmbH, HARP-1 */
        { USB_DEVICE(0x10C4, 0x822B) }, /* Modem EDGE(GSM) Comander 2 */
        { USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demostration module */
        { USB_DEVICE(0x10c4, 0x8293) }, /* Telegesys ETRX2USB */
+       { USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
        { USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
        { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */
        { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
@@ -99,7 +103,9 @@ static struct usb_device_id id_table [] = {
        { USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */
        { USB_DEVICE(0x10C4, 0xF004) }, /* Elan Digital Systems USBcount50 */
        { USB_DEVICE(0x10C5, 0xEA61) }, /* Silicon Labs MobiData GPRS USB Modem */
+       { USB_DEVICE(0x10CE, 0xEA6A) }, /* Silicon Labs MobiData GPRS USB Modem 100EU */
        { USB_DEVICE(0x13AD, 0x9999) }, /* Baltech card reader */
+       { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
        { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
        { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
        { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
@@ -108,53 +114,70 @@ static struct usb_device_id id_table [] = {
 
 MODULE_DEVICE_TABLE(usb, id_table);
 
-static struct usb_driver cp2101_driver = {
-       .name           = "cp2101",
+static struct usb_driver cp210x_driver = {
+       .name           = "cp210x",
        .probe          = usb_serial_probe,
        .disconnect     = usb_serial_disconnect,
        .id_table       = id_table,
        .no_dynamic_id  =       1,
 };
 
-static struct usb_serial_driver cp2101_device = {
+static struct usb_serial_driver cp210x_device = {
        .driver = {
                .owner =        THIS_MODULE,
-               .name =         "cp2101",
+               .name =         "cp210x",
        },
-       .usb_driver             = &cp2101_driver,
+       .usb_driver             = &cp210x_driver,
        .id_table               = id_table,
        .num_ports              = 1,
-       .open                   = cp2101_open,
-       .close                  = cp2101_close,
-       .break_ctl              = cp2101_break_ctl,
-       .set_termios            = cp2101_set_termios,
-       .tiocmget               = cp2101_tiocmget,
-       .tiocmset               = cp2101_tiocmset,
-       .attach                 = cp2101_startup,
-       .shutdown               = cp2101_shutdown,
+       .open                   = cp210x_open,
+       .close                  = cp210x_close,
+       .break_ctl              = cp210x_break_ctl,
+       .set_termios            = cp210x_set_termios,
+       .tiocmget               = cp210x_tiocmget,
+       .tiocmset               = cp210x_tiocmset,
+       .attach                 = cp210x_startup,
+       .shutdown               = cp210x_shutdown,
 };
 
 /* Config request types */
 #define REQTYPE_HOST_TO_DEVICE 0x41
 #define REQTYPE_DEVICE_TO_HOST 0xc1
 
-/* Config SET requests. To GET, add 1 to the request number */
-#define CP2101_UART            0x00    /* Enable / Disable */
-#define CP2101_BAUDRATE                0x01    /* (BAUD_RATE_GEN_FREQ / baudrate) */
-#define CP2101_BITS            0x03    /* 0x(0)(databits)(parity)(stopbits) */
-#define CP2101_BREAK           0x05    /* On / Off */
-#define CP2101_CONTROL         0x07    /* Flow control line states */
-#define CP2101_MODEMCTL                0x13    /* Modem controls */
-#define CP2101_CONFIG_6                0x19    /* 6 bytes of config data ??? */
-
-/* CP2101_UART */
+/* Config request codes */
+#define CP210X_IFC_ENABLE      0x00
+#define CP210X_SET_BAUDDIV     0x01
+#define CP210X_GET_BAUDDIV     0x02
+#define CP210X_SET_LINE_CTL    0x03
+#define CP210X_GET_LINE_CTL    0x04
+#define CP210X_SET_BREAK       0x05
+#define CP210X_IMM_CHAR                0x06
+#define CP210X_SET_MHS         0x07
+#define CP210X_GET_MDMSTS      0x08
+#define CP210X_SET_XON         0x09
+#define CP210X_SET_XOFF                0x0A
+#define CP210X_SET_EVENTMASK   0x0B
+#define CP210X_GET_EVENTMASK   0x0C
+#define CP210X_SET_CHAR                0x0D
+#define CP210X_GET_CHARS       0x0E
+#define CP210X_GET_PROPS       0x0F
+#define CP210X_GET_COMM_STATUS 0x10
+#define CP210X_RESET           0x11
+#define CP210X_PURGE           0x12
+#define CP210X_SET_FLOW                0x13
+#define CP210X_GET_FLOW                0x14
+#define CP210X_EMBED_EVENTS    0x15
+#define CP210X_GET_EVENTSTATE  0x16
+#define CP210X_SET_CHARS       0x19
+
+/* CP210X_IFC_ENABLE */
 #define UART_ENABLE            0x0001
 #define UART_DISABLE           0x0000
 
-/* CP2101_BAUDRATE */
+/* CP210X_(SET|GET)_BAUDDIV */
 #define BAUD_RATE_GEN_FREQ     0x384000
 
-/* CP2101_BITS */
+/* CP210X_(SET|GET)_LINE_CTL */
 #define BITS_DATA_MASK         0X0f00
 #define BITS_DATA_5            0X0500
 #define BITS_DATA_6            0X0600
@@ -174,11 +197,11 @@ static struct usb_serial_driver cp2101_device = {
 #define BITS_STOP_1_5          0x0001
 #define BITS_STOP_2            0x0002
 
-/* CP2101_BREAK */
+/* CP210X_SET_BREAK */
 #define BREAK_ON               0x0000
 #define BREAK_OFF              0x0001
 
-/* CP2101_CONTROL */
+/* CP210X_(SET_MHS|GET_MDMSTS) */
 #define CONTROL_DTR            0x0001
 #define CONTROL_RTS            0x0002
 #define CONTROL_CTS            0x0010
@@ -189,13 +212,13 @@ static struct usb_serial_driver cp2101_device = {
 #define CONTROL_WRITE_RTS      0x0200
 
 /*
- * cp2101_get_config
- * Reads from the CP2101 configuration registers
+ * cp210x_get_config
+ * Reads from the CP210x configuration registers
  * 'size' is specified in bytes.
  * 'data' is a pointer to a pre-allocated array of integers large
  * enough to hold 'size' bytes (with 4 bytes to each integer)
  */
-static int cp2101_get_config(struct usb_serial_port *port, u8 request,
+static int cp210x_get_config(struct usb_serial_port *port, u8 request,
                unsigned int *data, int size)
 {
        struct usb_serial *serial = port->serial;
@@ -211,9 +234,6 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request,
                return -ENOMEM;
        }
 
-       /* For get requests, the request number must be incremented */
-       request++;
-
        /* Issue the request, attempting to read 'size' bytes */
        result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0),
                                request, REQTYPE_DEVICE_TO_HOST, 0x0000,
@@ -236,12 +256,12 @@ static int cp2101_get_config(struct usb_serial_port *port, u8 request,
 }
 
 /*
- * cp2101_set_config
- * Writes to the CP2101 configuration registers
+ * cp210x_set_config
+ * Writes to the CP210x configuration registers
  * Values less than 16 bits wide are sent directly
  * 'size' is specified in bytes.
  */
-static int cp2101_set_config(struct usb_serial_port *port, u8 request,
+static int cp210x_set_config(struct usb_serial_port *port, u8 request,
                unsigned int *data, int size)
 {
        struct usb_serial *serial = port->serial;
@@ -292,21 +312,21 @@ static int cp2101_set_config(struct usb_serial_port *port, u8 request,
 }
 
 /*
- * cp2101_set_config_single
- * Convenience function for calling cp2101_set_config on single data values
+ * cp210x_set_config_single
+ * Convenience function for calling cp210x_set_config on single data values
  * without requiring an integer pointer
  */
-static inline int cp2101_set_config_single(struct usb_serial_port *port,
+static inline int cp210x_set_config_single(struct usb_serial_port *port,
                u8 request, unsigned int data)
 {
-       return cp2101_set_config(port, request, &data, 2);
+       return cp210x_set_config(port, request, &data, 2);
 }
 
 /*
- * cp2101_quantise_baudrate
+ * cp210x_quantise_baudrate
  * Quantises the baud rate as per AN205 Table 1
  */
-static unsigned int cp2101_quantise_baudrate(unsigned int baud) {
+static unsigned int cp210x_quantise_baudrate(unsigned int baud) {
        if      (baud <= 56)       baud = 0;
        else if (baud <= 300)      baud = 300;
        else if (baud <= 600)      baud = 600;
@@ -343,7 +363,7 @@ static unsigned int cp2101_quantise_baudrate(unsigned int baud) {
        return baud;
 }
 
-static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
+static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port,
                                struct file *filp)
 {
        struct usb_serial *serial = port->serial;
@@ -351,7 +371,7 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
 
        dbg("%s - port %d", __func__, port->number);
 
-       if (cp2101_set_config_single(port, CP2101_UART, UART_ENABLE)) {
+       if (cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_ENABLE)) {
                dev_err(&port->dev, "%s - Unable to enable UART\n",
                                __func__);
                return -EPROTO;
@@ -373,17 +393,17 @@ static int cp2101_open(struct tty_struct *tty, struct usb_serial_port *port,
        }
 
        /* Configure the termios structure */
-       cp2101_get_termios(tty, port);
+       cp210x_get_termios(tty, port);
 
        /* Set the DTR and RTS pins low */
-       cp2101_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data
+       cp210x_tiocmset_port(tty ? (struct usb_serial_port *) tty->driver_data
                        : port,
                NULL, TIOCM_DTR | TIOCM_RTS, 0);
 
        return 0;
 }
 
-static void cp2101_cleanup(struct usb_serial_port *port)
+static void cp210x_cleanup(struct usb_serial_port *port)
 {
        struct usb_serial *serial = port->serial;
 
@@ -398,8 +418,7 @@ static void cp2101_cleanup(struct usb_serial_port *port)
        }
 }
 
-static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port,
-                                       struct file *filp)
+static void cp210x_close(struct usb_serial_port *port)
 {
        dbg("%s - port %d", __func__, port->number);
 
@@ -410,23 +429,23 @@ static void cp2101_close(struct tty_struct *tty, struct usb_serial_port *port,
 
        mutex_lock(&port->serial->disc_mutex);
        if (!port->serial->disconnected)
-               cp2101_set_config_single(port, CP2101_UART, UART_DISABLE);
+               cp210x_set_config_single(port, CP210X_IFC_ENABLE, UART_DISABLE);
        mutex_unlock(&port->serial->disc_mutex);
 }
 
 /*
- * cp2101_get_termios
+ * cp210x_get_termios
  * Reads the baud rate, data bits, parity, stop bits and flow control mode
  * from the device, corrects any unsupported values, and configures the
  * termios structure to reflect the state of the device
  */
-static void cp2101_get_termios(struct tty_struct *tty,
+static void cp210x_get_termios(struct tty_struct *tty,
        struct usb_serial_port *port)
 {
        unsigned int baud;
 
        if (tty) {
-               cp2101_get_termios_port(tty->driver_data,
+               cp210x_get_termios_port(tty->driver_data,
                        &tty->termios->c_cflag, &baud);
                tty_encode_baud_rate(tty, baud, baud);
        }
@@ -434,15 +453,15 @@ static void cp2101_get_termios(struct tty_struct *tty,
        else {
                unsigned int cflag;
                cflag = 0;
-               cp2101_get_termios_port(port, &cflag, &baud);
+               cp210x_get_termios_port(port, &cflag, &baud);
        }
 }
 
 /*
- * cp2101_get_termios_port
- * This is the heart of cp2101_get_termios which always uses a &usb_serial_port.
+ * cp210x_get_termios_port
+ * This is the heart of cp210x_get_termios which always uses a &usb_serial_port.
  */
-static void cp2101_get_termios_port(struct usb_serial_port *port,
+static void cp210x_get_termios_port(struct usb_serial_port *port,
        unsigned int *cflagp, unsigned int *baudp)
 {
        unsigned int cflag, modem_ctl[4];
@@ -451,17 +470,17 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
 
        dbg("%s - port %d", __func__, port->number);
 
-       cp2101_get_config(port, CP2101_BAUDRATE, &baud, 2);
+       cp210x_get_config(port, CP210X_GET_BAUDDIV, &baud, 2);
        /* Convert to baudrate */
        if (baud)
-               baud = cp2101_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
+               baud = cp210x_quantise_baudrate((BAUD_RATE_GEN_FREQ + baud/2)/ baud);
 
        dbg("%s - baud rate = %d", __func__, baud);
        *baudp = baud;
 
        cflag = *cflagp;
 
-       cp2101_get_config(port, CP2101_BITS, &bits, 2);
+       cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
        cflag &= ~CSIZE;
        switch (bits & BITS_DATA_MASK) {
        case BITS_DATA_5:
@@ -486,14 +505,14 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
                cflag |= CS8;
                bits &= ~BITS_DATA_MASK;
                bits |= BITS_DATA_8;
-               cp2101_set_config(port, CP2101_BITS, &bits, 2);
+               cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
                break;
        default:
                dbg("%s - Unknown number of data bits, using 8", __func__);
                cflag |= CS8;
                bits &= ~BITS_DATA_MASK;
                bits |= BITS_DATA_8;
-               cp2101_set_config(port, CP2101_BITS, &bits, 2);
+               cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
                break;
        }
 
@@ -516,20 +535,20 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
                                __func__);
                cflag &= ~PARENB;
                bits &= ~BITS_PARITY_MASK;
-               cp2101_set_config(port, CP2101_BITS, &bits, 2);
+               cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
                break;
        case BITS_PARITY_SPACE:
                dbg("%s - parity = SPACE (not supported, disabling parity)",
                                __func__);
                cflag &= ~PARENB;
                bits &= ~BITS_PARITY_MASK;
-               cp2101_set_config(port, CP2101_BITS, &bits, 2);
+               cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
                break;
        default:
                dbg("%s - Unknown parity mode, disabling parity", __func__);
                cflag &= ~PARENB;
                bits &= ~BITS_PARITY_MASK;
-               cp2101_set_config(port, CP2101_BITS, &bits, 2);
+               cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
                break;
        }
 
@@ -542,7 +561,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
                dbg("%s - stop bits = 1.5 (not supported, using 1 stop bit)",
                                                                __func__);
                bits &= ~BITS_STOP_MASK;
-               cp2101_set_config(port, CP2101_BITS, &bits, 2);
+               cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
                break;
        case BITS_STOP_2:
                dbg("%s - stop bits = 2", __func__);
@@ -552,11 +571,11 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
                dbg("%s - Unknown number of stop bits, using 1 stop bit",
                                                                __func__);
                bits &= ~BITS_STOP_MASK;
-               cp2101_set_config(port, CP2101_BITS, &bits, 2);
+               cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2);
                break;
        }
 
-       cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+       cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
        if (modem_ctl[0] & 0x0008) {
                dbg("%s - flow control = CRTSCTS", __func__);
                cflag |= CRTSCTS;
@@ -568,7 +587,7 @@ static void cp2101_get_termios_port(struct usb_serial_port *port,
        *cflagp = cflag;
 }
 
-static void cp2101_set_termios(struct tty_struct *tty,
+static void cp210x_set_termios(struct tty_struct *tty,
                struct usb_serial_port *port, struct ktermios *old_termios)
 {
        unsigned int cflag, old_cflag;
@@ -583,13 +602,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
        tty->termios->c_cflag &= ~CMSPAR;
        cflag = tty->termios->c_cflag;
        old_cflag = old_termios->c_cflag;
-       baud = cp2101_quantise_baudrate(tty_get_baud_rate(tty));
+       baud = cp210x_quantise_baudrate(tty_get_baud_rate(tty));
 
        /* If the baud rate is to be updated*/
        if (baud != tty_termios_baud_rate(old_termios) && baud != 0) {
                dbg("%s - Setting baud rate to %d baud", __func__,
                                baud);
-               if (cp2101_set_config_single(port, CP2101_BAUDRATE,
+               if (cp210x_set_config_single(port, CP210X_SET_BAUDDIV,
                                        ((BAUD_RATE_GEN_FREQ + baud/2) / baud))) {
                        dbg("Baud rate requested not supported by device\n");
                        baud = tty_termios_baud_rate(old_termios);
@@ -600,7 +619,7 @@ static void cp2101_set_termios(struct tty_struct *tty,
 
        /* If the number of data bits is to be updated */
        if ((cflag & CSIZE) != (old_cflag & CSIZE)) {
-               cp2101_get_config(port, CP2101_BITS, &bits, 2);
+               cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
                bits &= ~BITS_DATA_MASK;
                switch (cflag & CSIZE) {
                case CS5:
@@ -624,19 +643,19 @@ static void cp2101_set_termios(struct tty_struct *tty,
                        dbg("%s - data bits = 9", __func__);
                        break;*/
                default:
-                       dbg("cp2101 driver does not "
+                       dbg("cp210x driver does not "
                                        "support the number of bits requested,"
                                        " using 8 bit mode\n");
                                bits |= BITS_DATA_8;
                                break;
                }
-               if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+               if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
                        dbg("Number of data bits requested "
                                        "not supported by device\n");
        }
 
        if ((cflag & (PARENB|PARODD)) != (old_cflag & (PARENB|PARODD))) {
-               cp2101_get_config(port, CP2101_BITS, &bits, 2);
+               cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
                bits &= ~BITS_PARITY_MASK;
                if (cflag & PARENB) {
                        if (cflag & PARODD) {
@@ -647,13 +666,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
                                dbg("%s - parity = EVEN", __func__);
                        }
                }
-               if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+               if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
                        dbg("Parity mode not supported "
                                        "by device\n");
        }
 
        if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) {
-               cp2101_get_config(port, CP2101_BITS, &bits, 2);
+               cp210x_get_config(port, CP210X_GET_LINE_CTL, &bits, 2);
                bits &= ~BITS_STOP_MASK;
                if (cflag & CSTOPB) {
                        bits |= BITS_STOP_2;
@@ -662,13 +681,13 @@ static void cp2101_set_termios(struct tty_struct *tty,
                        bits |= BITS_STOP_1;
                        dbg("%s - stop bits = 1", __func__);
                }
-               if (cp2101_set_config(port, CP2101_BITS, &bits, 2))
+               if (cp210x_set_config(port, CP210X_SET_LINE_CTL, &bits, 2))
                        dbg("Number of stop bits requested "
                                        "not supported by device\n");
        }
 
        if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) {
-               cp2101_get_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+               cp210x_get_config(port, CP210X_GET_FLOW, modem_ctl, 16);
                dbg("%s - read modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
                                __func__, modem_ctl[0], modem_ctl[1],
                                modem_ctl[2], modem_ctl[3]);
@@ -688,19 +707,19 @@ static void cp2101_set_termios(struct tty_struct *tty,
                dbg("%s - write modem controls = 0x%.4x 0x%.4x 0x%.4x 0x%.4x",
                                __func__, modem_ctl[0], modem_ctl[1],
                                modem_ctl[2], modem_ctl[3]);
-               cp2101_set_config(port, CP2101_MODEMCTL, modem_ctl, 16);
+               cp210x_set_config(port, CP210X_SET_FLOW, modem_ctl, 16);
        }
 
 }
 
-static int cp2101_tiocmset (struct tty_struct *tty, struct file *file,
+static int cp210x_tiocmset (struct tty_struct *tty, struct file *file,
                unsigned int set, unsigned int clear)
 {
        struct usb_serial_port *port = tty->driver_data;
-       return cp2101_tiocmset_port(port, file, set, clear);
+       return cp210x_tiocmset_port(port, file, set, clear);
 }
 
-static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file,
+static int cp210x_tiocmset_port(struct usb_serial_port *port, struct file *file,
                unsigned int set, unsigned int clear)
 {
        unsigned int control = 0;
@@ -726,10 +745,10 @@ static int cp2101_tiocmset_port(struct usb_serial_port *port, struct file *file,
 
        dbg("%s - control = 0x%.4x", __func__, control);
 
-       return cp2101_set_config(port, CP2101_CONTROL, &control, 2);
+       return cp210x_set_config(port, CP210X_SET_MHS, &control, 2);
 }
 
-static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
+static int cp210x_tiocmget (struct tty_struct *tty, struct file *file)
 {
        struct usb_serial_port *port = tty->driver_data;
        unsigned int control;
@@ -737,7 +756,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
 
        dbg("%s - port %d", __func__, port->number);
 
-       cp2101_get_config(port, CP2101_CONTROL, &control, 1);
+       cp210x_get_config(port, CP210X_GET_MDMSTS, &control, 1);
 
        result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0)
                |((control & CONTROL_RTS) ? TIOCM_RTS : 0)
@@ -751,7 +770,7 @@ static int cp2101_tiocmget (struct tty_struct *tty, struct file *file)
        return result;
 }
 
-static void cp2101_break_ctl (struct tty_struct *tty, int break_state)
+static void cp210x_break_ctl (struct tty_struct *tty, int break_state)
 {
        struct usb_serial_port *port = tty->driver_data;
        unsigned int state;
@@ -763,17 +782,17 @@ static void cp2101_break_ctl (struct tty_struct *tty, int break_state)
                state = BREAK_ON;
        dbg("%s - turning break %s", __func__,
                        state == BREAK_OFF ? "off" : "on");
-       cp2101_set_config(port, CP2101_BREAK, &state, 2);
+       cp210x_set_config(port, CP210X_SET_BREAK, &state, 2);
 }
 
-static int cp2101_startup(struct usb_serial *serial)
+static int cp210x_startup(struct usb_serial *serial)
 {
-       /* CP2101 buffers behave strangely unless device is reset */
+       /* cp210x buffers behave strangely unless device is reset */
        usb_reset_device(serial->dev);
        return 0;
 }
 
-static void cp2101_shutdown(struct usb_serial *serial)
+static void cp210x_shutdown(struct usb_serial *serial)
 {
        int i;
 
@@ -781,21 +800,21 @@ static void cp2101_shutdown(struct usb_serial *serial)
 
        /* Stop reads and writes on all ports */
        for (i = 0; i < serial->num_ports; ++i)
-               cp2101_cleanup(serial->port[i]);
+               cp210x_cleanup(serial->port[i]);
 }
 
-static int __init cp2101_init(void)
+static int __init cp210x_init(void)
 {
        int retval;
 
-       retval = usb_serial_register(&cp2101_device);
+       retval = usb_serial_register(&cp210x_device);
        if (retval)
                return retval; /* Failed to register */
 
-       retval = usb_register(&cp2101_driver);
+       retval = usb_register(&cp210x_driver);
        if (retval) {
                /* Failed to register */
-               usb_serial_deregister(&cp2101_device);
+               usb_serial_deregister(&cp210x_device);
                return retval;
        }
 
@@ -805,14 +824,14 @@ static int __init cp2101_init(void)
        return 0;
 }
 
-static void __exit cp2101_exit(void)
+static void __exit cp210x_exit(void)
 {
-       usb_deregister(&cp2101_driver);
-       usb_serial_deregister(&cp2101_device);
+       usb_deregister(&cp210x_driver);
+       usb_serial_deregister(&cp210x_device);
 }
 
-module_init(cp2101_init);
-module_exit(cp2101_exit);
+module_init(cp210x_init);
+module_exit(cp210x_exit);
 
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_VERSION(DRIVER_VERSION);
index dd501bb..933ba91 100644 (file)
@@ -61,8 +61,7 @@ static int cyberjack_startup(struct usb_serial *serial);
 static void cyberjack_shutdown(struct usb_serial *serial);
 static int  cyberjack_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void cyberjack_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+static void cyberjack_close(struct usb_serial_port *port);
 static int cyberjack_write(struct tty_struct *tty,
        struct usb_serial_port *port, const unsigned char *buf, int count);
 static int cyberjack_write_room(struct tty_struct *tty);
@@ -185,8 +184,7 @@ static int  cyberjack_open(struct tty_struct *tty,
        return result;
 }
 
-static void cyberjack_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void cyberjack_close(struct usb_serial_port *port)
 {
        dbg("%s - port %d", __func__, port->number);
 
index e568710..669f938 100644 (file)
@@ -174,8 +174,8 @@ static int  cypress_ca42v2_startup(struct usb_serial *serial);
 static void cypress_shutdown(struct usb_serial *serial);
 static int  cypress_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void cypress_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+static void cypress_close(struct usb_serial_port *port);
+static void cypress_dtr_rts(struct usb_serial_port *port, int on);
 static int  cypress_write(struct tty_struct *tty, struct usb_serial_port *port,
                        const unsigned char *buf, int count);
 static void cypress_send(struct usb_serial_port *port);
@@ -218,6 +218,7 @@ static struct usb_serial_driver cypress_earthmate_device = {
        .shutdown =                     cypress_shutdown,
        .open =                         cypress_open,
        .close =                        cypress_close,
+       .dtr_rts =                      cypress_dtr_rts,
        .write =                        cypress_write,
        .write_room =                   cypress_write_room,
        .ioctl =                        cypress_ioctl,
@@ -244,6 +245,7 @@ static struct usb_serial_driver cypress_hidcom_device = {
        .shutdown =                     cypress_shutdown,
        .open =                         cypress_open,
        .close =                        cypress_close,
+       .dtr_rts =                      cypress_dtr_rts,
        .write =                        cypress_write,
        .write_room =                   cypress_write_room,
        .ioctl =                        cypress_ioctl,
@@ -270,6 +272,7 @@ static struct usb_serial_driver cypress_ca42v2_device = {
        .shutdown =                     cypress_shutdown,
        .open =                         cypress_open,
        .close =                        cypress_close,
+       .dtr_rts =                      cypress_dtr_rts,
        .write =                        cypress_write,
        .write_room =                   cypress_write_room,
        .ioctl =                        cypress_ioctl,
@@ -656,11 +659,7 @@ static int cypress_open(struct tty_struct *tty,
        priv->rx_flags = 0;
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       /* raise both lines and set termios */
-       spin_lock_irqsave(&priv->lock, flags);
-       priv->line_control = CONTROL_DTR | CONTROL_RTS;
-       priv->cmd_ctrl = 1;
-       spin_unlock_irqrestore(&priv->lock, flags);
+       /* Set termios */
        result = cypress_write(tty, port, NULL, 0);
 
        if (result) {
@@ -694,76 +693,42 @@ static int cypress_open(struct tty_struct *tty,
                                                        __func__, result);
                cypress_set_dead(port);
        }
-
+       port->port.drain_delay = 256;
        return result;
 } /* cypress_open */
 
+static void cypress_dtr_rts(struct usb_serial_port *port, int on)
+{
+       struct cypress_private *priv = usb_get_serial_port_data(port);
+       /* drop dtr and rts */
+       priv = usb_get_serial_port_data(port);
+       spin_lock_irq(&priv->lock);
+       if (on == 0)
+               priv->line_control = 0;
+       else 
+               priv->line_control = CONTROL_DTR | CONTROL_RTS;
+       priv->cmd_ctrl = 1;
+       spin_unlock_irq(&priv->lock);
+       cypress_write(NULL, port, NULL, 0);
+}
 
-static void cypress_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void cypress_close(struct usb_serial_port *port)
 {
        struct cypress_private *priv = usb_get_serial_port_data(port);
-       unsigned int c_cflag;
-       int bps;
-       long timeout;
-       wait_queue_t wait;
 
        dbg("%s - port %d", __func__, port->number);
 
-       /* wait for data to drain from buffer */
-       spin_lock_irq(&priv->lock);
-       timeout = CYPRESS_CLOSING_WAIT;
-       init_waitqueue_entry(&wait, current);
-       add_wait_queue(&tty->write_wait, &wait);
-       for (;;) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               if (cypress_buf_data_avail(priv->buf) == 0
-               || timeout == 0 || signal_pending(current)
-               /* without mutex, allowed due to harmless failure mode */
-               || port->serial->disconnected)
-                       break;
-               spin_unlock_irq(&priv->lock);
-               timeout = schedule_timeout(timeout);
-               spin_lock_irq(&priv->lock);
-       }
-       set_current_state(TASK_RUNNING);
-       remove_wait_queue(&tty->write_wait, &wait);
-       /* clear out any remaining data in the buffer */
-       cypress_buf_clear(priv->buf);
-       spin_unlock_irq(&priv->lock);
-
        /* writing is potentially harmful, lock must be taken */
        mutex_lock(&port->serial->disc_mutex);
        if (port->serial->disconnected) {
                mutex_unlock(&port->serial->disc_mutex);
                return;
        }
-       /* wait for characters to drain from device */
-       if (tty) {
-               bps = tty_get_baud_rate(tty);
-               if (bps > 1200)
-                       timeout = max((HZ * 2560) / bps, HZ / 10);
-               else
-                       timeout = 2 * HZ;
-               schedule_timeout_interruptible(timeout);
-       }
-
+       cypress_buf_clear(priv->buf);
        dbg("%s - stopping urbs", __func__);
        usb_kill_urb(port->interrupt_in_urb);
        usb_kill_urb(port->interrupt_out_urb);
 
-       if (tty) {
-               c_cflag = tty->termios->c_cflag;
-               if (c_cflag & HUPCL) {
-                       /* drop dtr and rts */
-                       priv = usb_get_serial_port_data(port);
-                       spin_lock_irq(&priv->lock);
-                       priv->line_control = 0;
-                       priv->cmd_ctrl = 1;
-                       spin_unlock_irq(&priv->lock);
-                       cypress_write(tty, port, NULL, 0);
-               }
-       }
 
        if (stats)
                dev_info(&port->dev, "Statistics: %d Bytes In | %d Bytes Out | %d Commands Issued\n",
index 38ba4ea..30f5140 100644 (file)
@@ -422,7 +422,6 @@ struct digi_port {
        int dp_throttled;
        int dp_throttle_restart;
        wait_queue_head_t dp_flush_wait;
-       int dp_in_close;                        /* close in progress */
        wait_queue_head_t dp_close_wait;        /* wait queue for close */
        struct work_struct dp_wakeup_work;
        struct usb_serial_port *dp_port;
@@ -456,8 +455,9 @@ static int digi_write_room(struct tty_struct *tty);
 static int digi_chars_in_buffer(struct tty_struct *tty);
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
        struct file *filp);
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-       struct file *filp);
+static void digi_close(struct usb_serial_port *port);
+static int digi_carrier_raised(struct usb_serial_port *port);
+static void digi_dtr_rts(struct usb_serial_port *port, int on);
 static int digi_startup_device(struct usb_serial *serial);
 static int digi_startup(struct usb_serial *serial);
 static void digi_shutdown(struct usb_serial *serial);
@@ -510,6 +510,8 @@ static struct usb_serial_driver digi_acceleport_2_device = {
        .num_ports =                    3,
        .open =                         digi_open,
        .close =                        digi_close,
+       .dtr_rts =                      digi_dtr_rts,
+       .carrier_raised =               digi_carrier_raised,
        .write =                        digi_write,
        .write_room =                   digi_write_room,
        .write_bulk_callback =          digi_write_bulk_callback,
@@ -1328,6 +1330,19 @@ static int digi_chars_in_buffer(struct tty_struct *tty)
 
 }
 
+static void digi_dtr_rts(struct usb_serial_port *port, int on)
+{
+       /* Adjust DTR and RTS */
+       digi_set_modem_signals(port, on * (TIOCM_DTR|TIOCM_RTS), 1);
+}
+
+static int digi_carrier_raised(struct usb_serial_port *port)
+{
+       struct digi_port *priv = usb_get_serial_port_data(port);
+       if (priv->dp_modem_signals & TIOCM_CD)
+               return 1;
+       return 0;
+}
 
 static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
                                struct file *filp)
@@ -1336,7 +1351,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
        unsigned char buf[32];
        struct digi_port *priv = usb_get_serial_port_data(port);
        struct ktermios not_termios;
-       unsigned long flags = 0;
 
        dbg("digi_open: TOP: port=%d, open_count=%d",
                priv->dp_port_num, port->port.count);
@@ -1345,26 +1359,6 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
        if (digi_startup_device(port->serial) != 0)
                return -ENXIO;
 
-       spin_lock_irqsave(&priv->dp_port_lock, flags);
-
-       /* don't wait on a close in progress for non-blocking opens */
-       if (priv->dp_in_close && (filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0) {
-               spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-               return -EAGAIN;
-       }
-
-       /* wait for a close in progress to finish */
-       while (priv->dp_in_close) {
-               cond_wait_interruptible_timeout_irqrestore(
-                       &priv->dp_close_wait, DIGI_RETRY_TIMEOUT,
-                       &priv->dp_port_lock, flags);
-               if (signal_pending(current))
-                       return -EINTR;
-               spin_lock_irqsave(&priv->dp_port_lock, flags);
-       }
-
-       spin_unlock_irqrestore(&priv->dp_port_lock, flags);
-
        /* read modem signals automatically whenever they change */
        buf[0] = DIGI_CMD_READ_INPUT_SIGNALS;
        buf[1] = priv->dp_port_num;
@@ -1387,16 +1381,11 @@ static int digi_open(struct tty_struct *tty, struct usb_serial_port *port,
                not_termios.c_iflag = ~tty->termios->c_iflag;
                digi_set_termios(tty, port, &not_termios);
        }
-
-       /* set DTR and RTS */
-       digi_set_modem_signals(port, TIOCM_DTR|TIOCM_RTS, 1);
-
        return 0;
 }
 
 
-static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
-                               struct file *filp)
+static void digi_close(struct usb_serial_port *port)
 {
        DEFINE_WAIT(wait);
        int ret;
@@ -1411,28 +1400,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
        if (port->serial->disconnected)
                goto exit;
 
-       /* do cleanup only after final close on this port */
-       spin_lock_irq(&priv->dp_port_lock);
-       priv->dp_in_close = 1;
-       spin_unlock_irq(&priv->dp_port_lock);
-
-       /* tell line discipline to process only XON/XOFF */
-       tty->closing = 1;
-
-       /* wait for output to drain */
-       if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-               tty_wait_until_sent(tty, DIGI_CLOSE_TIMEOUT);
-
-       /* flush driver and line discipline buffers */
-       tty_driver_flush_buffer(tty);
-       tty_ldisc_flush(tty);
-
        if (port->serial->dev) {
-               /* wait for transmit idle */
-               if ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) == 0)
-                       digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
-               /* drop DTR and RTS */
-               digi_set_modem_signals(port, 0, 0);
+               /* FIXME: Transmit idle belongs in the wait_unti_sent path */
+               digi_transmit_idle(port, DIGI_CLOSE_TIMEOUT);
 
                /* disable input flow control */
                buf[0] = DIGI_CMD_SET_INPUT_FLOW_CONTROL;
@@ -1477,11 +1447,9 @@ static void digi_close(struct tty_struct *tty, struct usb_serial_port *port,
                /* shutdown any outstanding bulk writes */
                usb_kill_urb(port->write_urb);
        }
-       tty->closing = 0;
 exit:
        spin_lock_irq(&priv->dp_port_lock);
        priv->dp_write_urb_in_use = 0;
-       priv->dp_in_close = 0;
        wake_up_interruptible(&priv->dp_close_wait);
        spin_unlock_irq(&priv->dp_port_lock);
        mutex_unlock(&port->serial->disc_mutex);
@@ -1560,7 +1528,6 @@ static int digi_startup(struct usb_serial *serial)
                priv->dp_throttled = 0;
                priv->dp_throttle_restart = 0;
                init_waitqueue_head(&priv->dp_flush_wait);
-               priv->dp_in_close = 0;
                init_waitqueue_head(&priv->dp_close_wait);
                INIT_WORK(&priv->dp_wakeup_work, digi_wakeup_write_lock);
                priv->dp_port = serial->port[i];
index c709ec4..2b141cc 100644 (file)
@@ -81,8 +81,7 @@ static int debug;
 /* function prototypes for an empeg-car player */
 static int  empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
                                                struct file *filp);
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-                                               struct file *filp);
+static void empeg_close(struct usb_serial_port *port);
 static int  empeg_write(struct tty_struct *tty, struct usb_serial_port *port,
                                                const unsigned char *buf,
                                                int count);
@@ -181,8 +180,7 @@ static int empeg_open(struct tty_struct *tty, struct usb_serial_port *port,
 }
 
 
-static void empeg_close(struct tty_struct *tty, struct usb_serial_port *port,
-                               struct file *filp)
+static void empeg_close(struct usb_serial_port *port)
 {
        dbg("%s - port %d", __func__, port->number);
 
index d9fcdae..683304d 100644 (file)
@@ -89,6 +89,7 @@ struct ftdi_private {
        int force_rtscts;       /* if non-zero, force RTS-CTS to always
                                   be enabled */
 
+       unsigned int latency;           /* latency setting in use */
        spinlock_t tx_lock;     /* spinlock for transmit state */
        unsigned long tx_bytes;
        unsigned long tx_outstanding_bytes;
@@ -719,8 +720,8 @@ static int  ftdi_sio_port_probe(struct usb_serial_port *port);
 static int  ftdi_sio_port_remove(struct usb_serial_port *port);
 static int  ftdi_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void ftdi_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+static void ftdi_close(struct usb_serial_port *port);
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on);
 static int  ftdi_write(struct tty_struct *tty, struct usb_serial_port *port,
                        const unsigned char *buf, int count);
 static int  ftdi_write_room(struct tty_struct *tty);
@@ -758,6 +759,7 @@ static struct usb_serial_driver ftdi_sio_device = {
        .port_remove =          ftdi_sio_port_remove,
        .open =                 ftdi_open,
        .close =                ftdi_close,
+       .dtr_rts =              ftdi_dtr_rts,
        .throttle =             ftdi_throttle,
        .unthrottle =           ftdi_unthrottle,
        .write =                ftdi_write,
@@ -1037,7 +1039,54 @@ static int change_speed(struct tty_struct *tty, struct usb_serial_port *port)
        return rv;
 }
 
+static int write_latency_timer(struct usb_serial_port *port)
+{
+       struct ftdi_private *priv = usb_get_serial_port_data(port);
+       struct usb_device *udev = port->serial->dev;
+       char buf[1];
+       int rv = 0;
+       int l = priv->latency;
+
+       if (priv->flags & ASYNC_LOW_LATENCY)
+               l = 1;
+
+       dbg("%s: setting latency timer = %i", __func__, l);
+
+       rv = usb_control_msg(udev,
+                            usb_sndctrlpipe(udev, 0),
+                            FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
+                            FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
+                            l, priv->interface,
+                            buf, 0, WDR_TIMEOUT);
+
+       if (rv < 0)
+               dev_err(&port->dev, "Unable to write latency timer: %i\n", rv);
+       return rv;
+}
+
+static int read_latency_timer(struct usb_serial_port *port)
+{
+       struct ftdi_private *priv = usb_get_serial_port_data(port);
+       struct usb_device *udev = port->serial->dev;
+       unsigned short latency = 0;
+       int rv = 0;
+
 
+       dbg("%s", __func__);
+
+       rv = usb_control_msg(udev,
+                            usb_rcvctrlpipe(udev, 0),
+                            FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
+                            FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
+                            0, priv->interface,
+                            (char *) &latency, 1, WDR_TIMEOUT);
+
+       if (rv < 0) {
+               dev_err(&port->dev, "Unable to read latency timer: %i\n", rv);
+               return -EIO;
+       }
+       return latency;
+}
 
 static int get_serial_info(struct usb_serial_port *port,
                                struct serial_struct __user *retinfo)
@@ -1097,6 +1146,7 @@ static int set_serial_info(struct tty_struct *tty,
        priv->custom_divisor = new_serial.custom_divisor;
 
        tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
+       write_latency_timer(port);
 
 check_and_exit:
        if ((old_priv.flags & ASYNC_SPD_MASK) !=
@@ -1192,27 +1242,13 @@ static ssize_t show_latency_timer(struct device *dev,
 {
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
-       struct usb_device *udev = port->serial->dev;
-       unsigned short latency = 0;
-       int rv = 0;
-
-
-       dbg("%s", __func__);
-
-       rv = usb_control_msg(udev,
-                            usb_rcvctrlpipe(udev, 0),
-                            FTDI_SIO_GET_LATENCY_TIMER_REQUEST,
-                            FTDI_SIO_GET_LATENCY_TIMER_REQUEST_TYPE,
-                            0, priv->interface,
-                            (char *) &latency, 1, WDR_TIMEOUT);
-
-       if (rv < 0) {
-               dev_err(dev, "Unable to read latency timer: %i\n", rv);
-               return -EIO;
-       }
-       return sprintf(buf, "%i\n", latency);
+       if (priv->flags & ASYNC_LOW_LATENCY)
+               return sprintf(buf, "1\n");
+       else
+               return sprintf(buf, "%i\n", priv->latency);
 }
 
+
 /* Write a new value of the latency timer, in units of milliseconds. */
 static ssize_t store_latency_timer(struct device *dev,
                        struct device_attribute *attr, const char *valbuf,
@@ -1220,25 +1256,13 @@ static ssize_t store_latency_timer(struct device *dev,
 {
        struct usb_serial_port *port = to_usb_serial_port(dev);
        struct ftdi_private *priv = usb_get_serial_port_data(port);
-       struct usb_device *udev = port->serial->dev;
-       char buf[1];
        int v = simple_strtoul(valbuf, NULL, 10);
        int rv = 0;
 
-       dbg("%s: setting latency timer = %i", __func__, v);
-
-       rv = usb_control_msg(udev,
-                            usb_sndctrlpipe(udev, 0),
-                            FTDI_SIO_SET_LATENCY_TIMER_REQUEST,
-                            FTDI_SIO_SET_LATENCY_TIMER_REQUEST_TYPE,
-                            v, priv->interface,
-                            buf, 0, WDR_TIMEOUT);
-
-       if (rv < 0) {
-               dev_err(dev, "Unable to write latency timer: %i\n", rv);
+       priv->latency = v;
+       rv = write_latency_timer(port);
+       if (rv < 0)
                return -EIO;
-       }
-
        return count;
 }
 
@@ -1392,6 +1416,7 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port)
        usb_set_serial_port_data(port, priv);
 
        ftdi_determine_type(port);
+       read_latency_timer(port);
        create_sysfs_attrs(port);
        return 0;
 }
@@ -1514,6 +1539,8 @@ static int ftdi_open(struct tty_struct *tty,
        if (tty)
                tty->low_latency = (priv->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
 
+       write_latency_timer(port);
+
        /* No error checking for this (will get errors later anyway) */
        /* See ftdi_sio.h for description of what is reset */
        usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
@@ -1529,11 +1556,6 @@ static int ftdi_open(struct tty_struct *tty,
        if (tty)
                ftdi_set_termios(tty, port, tty->termios);
 
-       /* FIXME: Flow control might be enabled, so it should be checked -
-          we have no control of defaults! */
-       /* Turn on RTS and DTR since we are not flow controlling by default */
-       set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
-
        /* Not throttled */
        spin_lock_irqsave(&priv->rx_lock, flags);
        priv->rx_flags &= ~(THROTTLED | ACTUALLY_THROTTLED);
@@ -1558,6 +1580,30 @@ static int ftdi_open(struct tty_struct *tty,
 } /* ftdi_open */
 
 
+static void ftdi_dtr_rts(struct usb_serial_port *port, int on)
+{
+       struct ftdi_private *priv = usb_get_serial_port_data(port);
+       char buf[1];
+
+       mutex_lock(&port->serial->disc_mutex);
+       if (!port->serial->disconnected) {
+               /* Disable flow control */
+               if (!on && usb_control_msg(port->serial->dev,
+                           usb_sndctrlpipe(port->serial->dev, 0),
+                           FTDI_SIO_SET_FLOW_CTRL_REQUEST,
+                           FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
+                           0, priv->interface, buf, 0,
+                           WDR_TIMEOUT) < 0) {
+                           dev_err(&port->dev, "error from flowcontrol urb\n");
+               }
+               /* drop RTS and DTR */
+               if (on)
+                       set_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+               else
+                       clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
+       }
+       mutex_unlock(&port->serial->disc_mutex);
+}
 
 /*
  * usbserial:__serial_close  only calls ftdi_close if the point is open
@@ -1567,31 +1613,12 @@ static int ftdi_open(struct tty_struct *tty,
  *
  */
 
-static void ftdi_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void ftdi_close(struct usb_serial_port *port)
 { /* ftdi_close */
-       unsigned int c_cflag = tty->termios->c_cflag;
        struct ftdi_private *priv = usb_get_serial_port_data(port);
-       char buf[1];
 
        dbg("%s", __func__);
 
-       mutex_lock(&port->serial->disc_mutex);
-       if (c_cflag & HUPCL && !port->serial->disconnected) {
-               /* Disable flow control */
-               if (usb_control_msg(port->serial->dev,
-                                   usb_sndctrlpipe(port->serial->dev, 0),
-                                   FTDI_SIO_SET_FLOW_CTRL_REQUEST,
-                                   FTDI_SIO_SET_FLOW_CTRL_REQUEST_TYPE,
-                                   0, priv->interface, buf, 0,
-                                   WDR_TIMEOUT) < 0) {
-                       dev_err(&port->dev, "error from flowcontrol urb\n");
-               }
-
-               /* drop RTS and DTR */
-               clear_mctrl(port, TIOCM_DTR | TIOCM_RTS);
-       } /* Note change no line if hupcl is off */
-       mutex_unlock(&port->serial->disc_mutex);
 
        /* cancel any scheduled reading */
        cancel_delayed_work_sync(&priv->rx_work);
index 586d30f..ee25a3f 100644 (file)
@@ -993,8 +993,7 @@ static int garmin_open(struct tty_struct *tty,
 }
 
 
-static void garmin_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void garmin_close(struct usb_serial_port *port)
 {
        struct usb_serial *serial = port->serial;
        struct garmin_data *garmin_data_p = usb_get_serial_port_data(port);
index 4cec990..be82ea9 100644 (file)
@@ -184,8 +184,7 @@ int usb_serial_generic_resume(struct usb_serial *serial)
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_resume);
 
-void usb_serial_generic_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+void usb_serial_generic_close(struct usb_serial_port *port)
 {
        dbg("%s - port %d", __func__, port->number);
        generic_cleanup(port);
index fb4a73d..53ef599 100644 (file)
@@ -207,8 +207,7 @@ static void edge_bulk_out_cmd_callback(struct urb *urb);
 /* function prototypes for the usbserial callbacks */
 static int edge_open(struct tty_struct *tty, struct usb_serial_port *port,
                                        struct file *filp);
-static void edge_close(struct tty_struct *tty, struct usb_serial_port *port,
-                                       struct file *filp);
+static void edge_close(struct usb_serial_port *port);
 static int edge_write(struct tty_struct *tty, struct usb_serial_port *port,
                                        const unsigned char *buf, int count);
 static int edge_write_room(struct tty_struct *tty);
@@ -965,7 +964,7 @@ static int edge_open(struct tty_struct *tty,
 
        if (!edge_port->txfifo.fifo) {
                dbg("%s - no memory", __func__);
-               edge_close(tty, port, filp);
+               edge_close(port);
                return -ENOMEM;
        }
 
@@ -975,7 +974,7 @@ static int edge_open(struct tty_struct *tty,
 
        if (!edge_port->write_urb) {
                dbg("%s - no memory", __func__);
-               edge_close(tty, port, filp);
+               edge_close(port);
                return -ENOMEM;
        }
 
@@ -1099,8 +1098,7 @@ static void block_until_tx_empty(struct edgeport_port *edge_port)
  * edge_close
  *     this function is called by the tty driver when a port is closed
  *****************************************************************************/
-static void edge_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
        struct edgeport_serial *edge_serial;
        struct edgeport_port *edge_port;
index 513b25e..eabf20e 100644 (file)
@@ -2009,8 +2009,7 @@ release_es_lock:
        return status;
 }
 
-static void edge_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void edge_close(struct usb_serial_port *port)
 {
        struct edgeport_serial *edge_serial;
        struct edgeport_port *edge_port;
index cd62825..c610a99 100644 (file)
@@ -76,8 +76,7 @@ static int initial_wait;
 /* Function prototypes for an ipaq */
 static int  ipaq_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void ipaq_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+static void ipaq_close(struct usb_serial_port *port);
 static int  ipaq_calc_num_ports(struct usb_serial *serial);
 static int  ipaq_startup(struct usb_serial *serial);
 static void ipaq_shutdown(struct usb_serial *serial);
@@ -714,8 +713,7 @@ error:
 }
 
 
-static void ipaq_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void ipaq_close(struct usb_serial_port *port)
 {
        struct ipaq_private     *priv = usb_get_serial_port_data(port);
 
index da2a2b4..29ad038 100644 (file)
@@ -302,23 +302,17 @@ static int ipw_open(struct tty_struct *tty,
        return 0;
 }
 
-static void ipw_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void ipw_dtr_rts(struct usb_serial_port *port, int on)
 {
        struct usb_device *dev = port->serial->dev;
        int result;
 
-       if (tty_hung_up_p(filp)) {
-               dbg("%s: tty_hung_up_p ...", __func__);
-               return;
-       }
-
        /*--1: drop the dtr */
        dbg("%s:dropping dtr", __func__);
        result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                         IPW_SIO_SET_PIN,
                         USB_TYPE_VENDOR | USB_RECIP_INTERFACE | USB_DIR_OUT,
-                        IPW_PIN_CLRDTR,
+                        on ? IPW_PIN_SETDTR : IPW_PIN_CLRDTR,
                         0,
                         NULL,
                         0,
@@ -332,7 +326,7 @@ static void ipw_close(struct tty_struct *tty,
        result = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
                         IPW_SIO_SET_PIN, USB_TYPE_VENDOR |
                                        USB_RECIP_INTERFACE | USB_DIR_OUT,
-                        IPW_PIN_CLRRTS,
+                        on ? IPW_PIN_SETRTS : IPW_PIN_CLRRTS,
                         0,
                         NULL,
                         0,
@@ -340,7 +334,12 @@ static void ipw_close(struct tty_struct *tty,
        if (result < 0)
                dev_err(&port->dev,
                                "dropping rts failed (error = %d)\n", result);
+}
 
+static void ipw_close(struct usb_serial_port *port)
+{
+       struct usb_device *dev = port->serial->dev;
+       int result;
 
        /*--3: purge */
        dbg("%s:sending purge", __func__);
@@ -461,6 +460,7 @@ static struct usb_serial_driver ipw_device = {
        .num_ports =            1,
        .open =                 ipw_open,
        .close =                ipw_close,
+       .dtr_rts =              ipw_dtr_rts,
        .port_probe =           ipw_probe,
        .port_remove =          ipw_disconnect,
        .write =                ipw_write,
index 4e2cda9..66009b6 100644 (file)
@@ -88,8 +88,7 @@ static int xbof = -1;
 static int  ir_startup (struct usb_serial *serial);
 static int  ir_open(struct tty_struct *tty, struct usb_serial_port *port,
                                        struct file *filep);
-static void ir_close(struct tty_struct *tty, struct usb_serial_port *port,
-                                       struct file *filep);
+static void ir_close(struct usb_serial_port *port);
 static int  ir_write(struct tty_struct *tty, struct usb_serial_port *port,
                                        const unsigned char *buf, int count);
 static void ir_write_bulk_callback (struct urb *urb);
@@ -346,8 +345,7 @@ static int ir_open(struct tty_struct *tty,
        return result;
 }
 
-static void ir_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file * filp)
+static void ir_close(struct usb_serial_port *port)
 {
        dbg("%s - port %d", __func__, port->number);
 
index 4473d44..76a3cc3 100644 (file)
@@ -40,7 +40,7 @@ static int debug;
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.5"
+#define DRIVER_VERSION "v0.10"
 #define DRIVER_DESC "Infinity USB Unlimited Phoenix driver"
 
 static struct usb_device_id id_table[] = {
@@ -70,7 +70,6 @@ static void read_rxcmd_callback(struct urb *urb);
 struct iuu_private {
        spinlock_t lock;        /* store irq state */
        wait_queue_head_t delta_msr_wait;
-       u8 line_control;
        u8 line_status;
        u8 termios_initialized;
        int tiostatus;          /* store IUART SIGNAL for tiocmget call */
@@ -651,32 +650,33 @@ static int iuu_bulk_write(struct usb_serial_port *port)
        unsigned long flags;
        int result;
        int i;
+       int buf_len;
        char *buf_ptr = port->write_urb->transfer_buffer;
        dbg("%s - enter", __func__);
 
+       spin_lock_irqsave(&priv->lock, flags);
        *buf_ptr++ = IUU_UART_ESC;
        *buf_ptr++ = IUU_UART_TX;
        *buf_ptr++ = priv->writelen;
 
-       memcpy(buf_ptr, priv->writebuf,
-              priv->writelen);
+       memcpy(buf_ptr, priv->writebuf, priv->writelen);
+       buf_len = priv->writelen;
+       priv->writelen = 0;
+       spin_unlock_irqrestore(&priv->lock, flags);
        if (debug == 1) {
-               for (i = 0; i < priv->writelen; i++)
+               for (i = 0; i < buf_len; i++)
                        sprintf(priv->dbgbuf + i*2 ,
                                "%02X", priv->writebuf[i]);
-               priv->dbgbuf[priv->writelen+i*2] = 0;
+               priv->dbgbuf[buf_len+i*2] = 0;
                dbg("%s - writing %i chars : %s", __func__,
-                   priv->writelen, priv->dbgbuf);
+                   buf_len, priv->dbgbuf);
        }
        usb_fill_bulk_urb(port->write_urb, port->serial->dev,
                          usb_sndbulkpipe(port->serial->dev,
                                          port->bulk_out_endpointAddress),
-                         port->write_urb->transfer_buffer, priv->writelen + 3,
+                         port->write_urb->transfer_buffer, buf_len + 3,
                          iuu_rxcmd, port);
        result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
-       spin_lock_irqsave(&priv->lock, flags);
-       priv->writelen = 0;
-       spin_unlock_irqrestore(&priv->lock, flags);
        usb_serial_port_softint(port);
        return result;
 }
@@ -770,14 +770,10 @@ static int iuu_uart_write(struct tty_struct *tty, struct usb_serial_port *port,
                return -ENOMEM;
 
        spin_lock_irqsave(&priv->lock, flags);
-       if (priv->writelen > 0) {
-               /* buffer already filled but not commited */
-               spin_unlock_irqrestore(&priv->lock, flags);
-               return 0;
-       }
+
        /* fill the buffer */
-       memcpy(priv->writebuf, buf, count);
-       priv->writelen = count;
+       memcpy(priv->writebuf + priv->writelen, buf, count);
+       priv->writelen += count;
        spin_unlock_irqrestore(&priv->lock, flags);
 
        return count;
@@ -819,7 +815,7 @@ static int iuu_uart_on(struct usb_serial_port *port)
        buf[0] = IUU_UART_ENABLE;
        buf[1] = (u8) ((IUU_BAUD_9600 >> 8) & 0x00FF);
        buf[2] = (u8) (0x00FF & IUU_BAUD_9600);
-       buf[3] = (u8) (0x0F0 & IUU_TWO_STOP_BITS) | (0x07 & IUU_PARITY_EVEN);
+       buf[3] = (u8) (0x0F0 & IUU_ONE_STOP_BIT) | (0x07 & IUU_PARITY_EVEN);
 
        status = bulk_immediate(port, buf, 4);
        if (status != IUU_OPERATION_OK) {
@@ -946,19 +942,59 @@ static int iuu_uart_baud(struct usb_serial_port *port, u32 baud,
        return status;
 }
 
-static int set_control_lines(struct usb_device *dev, u8 value)
+static void iuu_set_termios(struct tty_struct *tty,
+               struct usb_serial_port *port, struct ktermios *old_termios)
 {
-       return 0;
+       const u32 supported_mask = CMSPAR|PARENB|PARODD;
+
+       unsigned int cflag = tty->termios->c_cflag;
+       int status;
+       u32 actual;
+       u32 parity;
+       int csize = CS7;
+       int baud = 9600;        /* Fixed for the moment */
+       u32 newval = cflag & supported_mask;
+
+       /* compute the parity parameter */
+       parity = 0;
+       if (cflag & CMSPAR) {   /* Using mark space */
+               if (cflag & PARODD)
+                       parity |= IUU_PARITY_SPACE;
+               else
+                       parity |= IUU_PARITY_MARK;
+       } else if (!(cflag & PARENB)) {
+               parity |= IUU_PARITY_NONE;
+               csize = CS8;
+       } else if (cflag & PARODD)
+               parity |= IUU_PARITY_ODD;
+       else
+               parity |= IUU_PARITY_EVEN;
+
+       parity |= (cflag & CSTOPB ? IUU_TWO_STOP_BITS : IUU_ONE_STOP_BIT);
+
+       /* set it */
+       status = iuu_uart_baud(port,
+                       (clockmode == 2) ? 16457 : 9600 * boost / 100,
+                       &actual, parity);
+
+       /* set the termios value to the real one, so the user now what has
+        * changed. We support few fields so its easies to copy the old hw
+        * settings back over and then adjust them
+        */
+       if (old_termios)
+               tty_termios_copy_hw(tty->termios, old_termios);
+       if (status != 0)        /* Set failed - return old bits */
+               return;
+       /* Re-encode speed, parity and csize */
+       tty_encode_baud_rate(tty, baud, baud);
+       tty->termios->c_cflag &= ~(supported_mask|CSIZE);
+       tty->termios->c_cflag |= newval | csize;
 }
 
-static void iuu_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void iuu_close(struct usb_serial_port *port)
 {
        /* iuu_led (port,255,0,0,0); */
        struct usb_serial *serial;
-       struct iuu_private *priv = usb_get_serial_port_data(port);
-       unsigned long flags;
-       unsigned int c_cflag;
 
        serial = port->serial;
        if (!serial)
@@ -968,17 +1004,6 @@ static void iuu_close(struct tty_struct *tty,
 
        iuu_uart_off(port);
        if (serial->dev) {
-               if (tty) {
-                       c_cflag = tty->termios->c_cflag;
-                       if (c_cflag & HUPCL) {
-                               /* drop DTR and RTS */
-                               priv = usb_get_serial_port_data(port);
-                               spin_lock_irqsave(&priv->lock, flags);
-                               priv->line_control = 0;
-                               spin_unlock_irqrestore(&priv->lock, flags);
-                               set_control_lines(port->serial->dev, 0);
-                       }
-               }
                /* free writebuf */
                /* shutdown our urbs */
                dbg("%s - shutting down urbs", __func__);
@@ -1154,7 +1179,7 @@ static int iuu_open(struct tty_struct *tty,
        if (result) {
                dev_err(&port->dev, "%s - failed submitting read urb,"
                        " error %d\n", __func__, result);
-               iuu_close(tty, port, NULL);
+               iuu_close(port);
                return -EPROTO;
        } else {
                dbg("%s - rxcmd OK", __func__);
@@ -1175,6 +1200,7 @@ static struct usb_serial_driver iuu_device = {
        .read_bulk_callback = iuu_uart_read_callback,
        .tiocmget = iuu_tiocmget,
        .tiocmset = iuu_tiocmset,
+       .set_termios = iuu_set_termios,
        .attach = iuu_startup,
        .shutdown = iuu_shutdown,
 };
index 00daa8f..f1195a9 100644 (file)
@@ -1298,8 +1298,16 @@ static inline void stop_urb(struct urb *urb)
                usb_kill_urb(urb);
 }
 
-static void keyspan_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void keyspan_dtr_rts(struct usb_serial_port *port, int on)
+{
+       struct keyspan_port_private *p_priv = usb_get_serial_port_data(port);
+
+       p_priv->rts_state = on;
+       p_priv->dtr_state = on;
+       keyspan_send_setup(port, 0);
+}
+
+static void keyspan_close(struct usb_serial_port *port)
 {
        int                     i;
        struct usb_serial       *serial = port->serial;
@@ -1336,7 +1344,6 @@ static void keyspan_close(struct tty_struct *tty,
                        stop_urb(p_priv->out_urbs[i]);
                }
        }
-       tty_port_tty_set(&port->port, NULL);
 }
 
 /* download the firmware to a pre-renumeration device */
index 38b4582..0d4569b 100644 (file)
@@ -38,9 +38,8 @@
 static int  keyspan_open               (struct tty_struct *tty,
                                         struct usb_serial_port *port,
                                         struct file *filp);
-static void keyspan_close              (struct tty_struct *tty,
-                                        struct usb_serial_port *port,
-                                        struct file *filp);
+static void keyspan_close              (struct usb_serial_port *port);
+static void keyspan_dtr_rts            (struct usb_serial_port *port, int on);
 static int  keyspan_startup            (struct usb_serial *serial);
 static void keyspan_shutdown           (struct usb_serial *serial);
 static int  keyspan_write_room         (struct tty_struct *tty);
@@ -562,6 +561,7 @@ static struct usb_serial_driver keyspan_1port_device = {
        .num_ports              = 1,
        .open                   = keyspan_open,
        .close                  = keyspan_close,
+       .dtr_rts                = keyspan_dtr_rts,
        .write                  = keyspan_write,
        .write_room             = keyspan_write_room,
        .set_termios            = keyspan_set_termios,
@@ -582,6 +582,7 @@ static struct usb_serial_driver keyspan_2port_device = {
        .num_ports              = 2,
        .open                   = keyspan_open,
        .close                  = keyspan_close,
+       .dtr_rts                = keyspan_dtr_rts,
        .write                  = keyspan_write,
        .write_room             = keyspan_write_room,
        .set_termios            = keyspan_set_termios,
@@ -602,6 +603,7 @@ static struct usb_serial_driver keyspan_4port_device = {
        .num_ports              = 4,
        .open                   = keyspan_open,
        .close                  = keyspan_close,
+       .dtr_rts                = keyspan_dtr_rts,
        .write                  = keyspan_write,
        .write_room             = keyspan_write_room,
        .set_termios            = keyspan_set_termios,
index bf1ae24..ab769db 100644 (file)
@@ -651,6 +651,35 @@ static int keyspan_pda_chars_in_buffer(struct tty_struct *tty)
 }
 
 
+static void keyspan_pda_dtr_rts(struct usb_serial_port *port, int on)
+{
+       struct usb_serial *serial = port->serial;
+
+       if (serial->dev) {
+               if (on)
+                       keyspan_pda_set_modem_info(serial, (1<<7) | (1<< 2));
+               else
+                       keyspan_pda_set_modem_info(serial, 0);
+       }
+}
+
+static int keyspan_pda_carrier_raised(struct usb_serial_port *port)
+{
+       struct usb_serial *serial = port->serial;
+       unsigned char modembits;
+
+       /* If we can read the modem status and the DCD is low then
+          carrier is not raised yet */
+       if (keyspan_pda_get_modem_info(serial, &modembits) >= 0) {
+               if (!(modembits & (1>>6)))
+                       return 0;
+       }
+       /* Carrier raised, or we failed (eg disconnected) so
+          progress accordingly */
+       return 1;
+}
+
+
 static int keyspan_pda_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp)
 {
@@ -682,13 +711,6 @@ static int keyspan_pda_open(struct tty_struct *tty,
        priv->tx_room = room;
        priv->tx_throttled = room ? 0 : 1;
 
-       /* the normal serial device seems to always turn on DTR and RTS here,
-          so do the same */
-       if (tty && (tty->termios->c_cflag & CBAUD))
-               keyspan_pda_set_modem_info(serial, (1<<7) | (1<<2));
-       else
-               keyspan_pda_set_modem_info(serial, 0);
-
        /*Start reading from the device*/
        port->interrupt_in_urb->dev = serial->dev;
        rc = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
@@ -700,19 +722,11 @@ static int keyspan_pda_open(struct tty_struct *tty,
 error:
        return rc;
 }
-
-
-static void keyspan_pda_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void keyspan_pda_close(struct usb_serial_port *port)
 {
        struct usb_serial *serial = port->serial;
 
        if (serial->dev) {
-               /* the normal serial device seems to always shut
-                  off DTR and RTS now */
-               if (tty->termios->c_cflag & HUPCL)
-                       keyspan_pda_set_modem_info(serial, 0);
-
                /* shutdown our bulk reads and writes */
                usb_kill_urb(port->write_urb);
                usb_kill_urb(port->interrupt_in_urb);
@@ -839,6 +853,8 @@ static struct usb_serial_driver keyspan_pda_device = {
        .usb_driver =           &keyspan_pda_driver,
        .id_table =             id_table_std,
        .num_ports =            1,
+       .dtr_rts =              keyspan_pda_dtr_rts,
+       .carrier_raised =       keyspan_pda_carrier_raised,
        .open =                 keyspan_pda_open,
        .close =                keyspan_pda_close,
        .write =                keyspan_pda_write,
index fcd9082..fa817c6 100644 (file)
@@ -76,8 +76,7 @@ static int  klsi_105_startup(struct usb_serial *serial);
 static void klsi_105_shutdown(struct usb_serial *serial);
 static int  klsi_105_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void klsi_105_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+static void klsi_105_close(struct usb_serial_port *port);
 static int  klsi_105_write(struct tty_struct *tty,
        struct usb_serial_port *port, const unsigned char *buf, int count);
 static void klsi_105_write_bulk_callback(struct urb *urb);
@@ -447,8 +446,7 @@ exit:
 } /* klsi_105_open */
 
 
-static void klsi_105_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void klsi_105_close(struct usb_serial_port *port)
 {
        struct klsi_105_private *priv = usb_get_serial_port_data(port);
        int rc;
index c148544..6b57049 100644 (file)
@@ -72,8 +72,7 @@ static int  kobil_startup(struct usb_serial *serial);
 static void kobil_shutdown(struct usb_serial *serial);
 static int  kobil_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void kobil_close(struct tty_struct *tty, struct usb_serial_port *port,
-                       struct file *filp);
+static void kobil_close(struct usb_serial_port *port);
 static int  kobil_write(struct tty_struct *tty, struct usb_serial_port *port,
                         const unsigned char *buf, int count);
 static int  kobil_write_room(struct tty_struct *tty);
@@ -209,7 +208,7 @@ static void kobil_shutdown(struct usb_serial *serial)
 
        for (i = 0; i < serial->num_ports; ++i) {
                while (serial->port[i]->port.count > 0)
-                       kobil_close(NULL, serial->port[i], NULL);
+                       kobil_close(serial->port[i]);
                kfree(usb_get_serial_port_data(serial->port[i]));
                usb_set_serial_port_data(serial->port[i], NULL);
        }
@@ -346,11 +345,11 @@ static int kobil_open(struct tty_struct *tty,
 }
 
 
-static void kobil_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void kobil_close(struct usb_serial_port *port)
 {
        dbg("%s - port %d", __func__, port->number);
 
+       /* FIXME: Add rts/dtr methods */
        if (port->write_urb) {
                usb_kill_urb(port->write_urb);
                usb_free_urb(port->write_urb);
index 82930a7..8737955 100644 (file)
@@ -95,8 +95,8 @@ static int  mct_u232_startup(struct usb_serial *serial);
 static void mct_u232_shutdown(struct usb_serial *serial);
 static int  mct_u232_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void mct_u232_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+static void mct_u232_close(struct usb_serial_port *port);
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on);
 static void mct_u232_read_int_callback(struct urb *urb);
 static void mct_u232_set_termios(struct tty_struct *tty,
                        struct usb_serial_port *port, struct ktermios *old);
@@ -140,6 +140,7 @@ static struct usb_serial_driver mct_u232_device = {
        .num_ports =         1,
        .open =              mct_u232_open,
        .close =             mct_u232_close,
+       .dtr_rts =           mct_u232_dtr_rts,
        .throttle =          mct_u232_throttle,
        .unthrottle =        mct_u232_unthrottle,
        .read_int_callback = mct_u232_read_int_callback,
@@ -496,29 +497,29 @@ error:
        return retval;
 } /* mct_u232_open */
 
-
-static void mct_u232_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void mct_u232_dtr_rts(struct usb_serial_port *port, int on)
 {
-       unsigned int c_cflag;
        unsigned int control_state;
        struct mct_u232_private *priv = usb_get_serial_port_data(port);
-       dbg("%s port %d", __func__, port->number);
 
-       if (tty) {
-               c_cflag = tty->termios->c_cflag;
-               mutex_lock(&port->serial->disc_mutex);
-               if (c_cflag & HUPCL && !port->serial->disconnected) {
-                       /* drop DTR and RTS */
-                       spin_lock_irq(&priv->lock);
+       mutex_lock(&port->serial->disc_mutex);
+       if (!port->serial->disconnected) {
+               /* drop DTR and RTS */
+               spin_lock_irq(&priv->lock);
+               if (on)
+                       priv->control_state |= TIOCM_DTR | TIOCM_RTS;
+               else
                        priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS);
-                       control_state = priv->control_state;
-                       spin_unlock_irq(&priv->lock);
-                       mct_u232_set_modem_ctrl(port->serial, control_state);
-               }
-               mutex_unlock(&port->serial->disc_mutex);
+               control_state = priv->control_state;
+               spin_unlock_irq(&priv->lock);
+               mct_u232_set_modem_ctrl(port->serial, control_state);
        }
+       mutex_unlock(&port->serial->disc_mutex);
+}
 
+static void mct_u232_close(struct usb_serial_port *port)
+{
+       dbg("%s port %d", __func__, port->number);
 
        if (port->serial->dev) {
                /* shutdown our urbs */
index 24e3b5d..9e1a013 100644 (file)
@@ -533,8 +533,7 @@ static int mos7720_chars_in_buffer(struct tty_struct *tty)
        return chars;
 }
 
-static void mos7720_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void mos7720_close(struct usb_serial_port *port)
 {
        struct usb_serial *serial;
        struct moschip_port *mos7720_port;
index 84fb1dc..10b78a3 100644 (file)
@@ -1135,54 +1135,12 @@ static int mos7840_chars_in_buffer(struct tty_struct *tty)
 
 }
 
-/************************************************************************
- *
- * mos7840_block_until_tx_empty
- *
- *     This function will block the close until one of the following:
- *             1. TX count are 0
- *             2. The mos7840 has stopped
- *             3. A timeout of 3 seconds without activity has expired
- *
- ************************************************************************/
-static void mos7840_block_until_tx_empty(struct tty_struct *tty,
-                               struct moschip_port *mos7840_port)
-{
-       int timeout = HZ / 10;
-       int wait = 30;
-       int count;
-
-       while (1) {
-
-               count = mos7840_chars_in_buffer(tty);
-
-               /* Check for Buffer status */
-               if (count <= 0)
-                       return;
-
-               /* Block the thread for a while */
-               interruptible_sleep_on_timeout(&mos7840_port->wait_chase,
-                                              timeout);
-
-               /* No activity.. count down section */
-               wait--;
-               if (wait == 0) {
-                       dbg("%s - TIMEOUT", __func__);
-                       return;
-               } else {
-                       /* Reset timeout value back to seconds */
-                       wait = 30;
-               }
-       }
-}
-
 /*****************************************************************************
  * mos7840_close
  *     this function is called by the tty driver when a port is closed
  *****************************************************************************/
 
-static void mos7840_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void mos7840_close(struct usb_serial_port *port)
 {
        struct usb_serial *serial;
        struct moschip_port *mos7840_port;
@@ -1223,10 +1181,6 @@ static void mos7840_close(struct tty_struct *tty,
                }
        }
 
-       if (serial->dev)
-               /* flush and block until tx is empty */
-               mos7840_block_until_tx_empty(tty, mos7840_port);
-
        /* While closing port, shutdown all bulk read, write  *
         * and interrupt read if they exists                  */
        if (serial->dev) {
index bcdcbb8..f5f3751 100644 (file)
@@ -98,8 +98,7 @@ static int navman_open(struct tty_struct *tty,
        return result;
 }
 
-static void navman_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void navman_close(struct usb_serial_port *port)
 {
        dbg("%s - port %d", __func__, port->number);
 
index df65397..1104617 100644 (file)
@@ -66,8 +66,7 @@ static int debug;
 /* function prototypes */
 static int  omninet_open(struct tty_struct *tty, struct usb_serial_port *port,
                                                        struct file *filp);
-static void omninet_close(struct tty_struct *tty, struct usb_serial_port *port,
-                                                       struct file *filp);
+static void omninet_close(struct usb_serial_port *port);
 static void omninet_read_bulk_callback(struct urb *urb);
 static void omninet_write_bulk_callback(struct urb *urb);
 static int  omninet_write(struct tty_struct *tty, struct usb_serial_port *port,
@@ -189,8 +188,7 @@ static int omninet_open(struct tty_struct *tty,
        return result;
 }
 
-static void omninet_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void omninet_close(struct usb_serial_port *port)
 {
        dbg("%s - port %d", __func__, port->number);
        usb_kill_urb(port->read_urb);
index b500ad1..c20480a 100644 (file)
@@ -173,8 +173,7 @@ static int opticon_open(struct tty_struct *tty, struct usb_serial_port *port,
        return result;
 }
 
-static void opticon_close(struct tty_struct *tty, struct usb_serial_port *port,
-                         struct file *filp)
+static void opticon_close(struct usb_serial_port *port)
 {
        struct opticon_private *priv = usb_get_serial_data(port->serial);
 
index 7817b82..a16d69f 100644 (file)
@@ -45,8 +45,9 @@
 /* Function prototypes */
 static int  option_open(struct tty_struct *tty, struct usb_serial_port *port,
                                                        struct file *filp);
-static void option_close(struct tty_struct *tty, struct usb_serial_port *port,
-                                                       struct file *filp);
+static void option_close(struct usb_serial_port *port);
+static void option_dtr_rts(struct usb_serial_port *port, int on);
+
 static int  option_startup(struct usb_serial *serial);
 static void option_shutdown(struct usb_serial *serial);
 static int  option_write_room(struct tty_struct *tty);
@@ -61,7 +62,7 @@ static void option_set_termios(struct tty_struct *tty,
 static int  option_tiocmget(struct tty_struct *tty, struct file *file);
 static int  option_tiocmset(struct tty_struct *tty, struct file *file,
                                unsigned int set, unsigned int clear);
-static int  option_send_setup(struct tty_struct *tty, struct usb_serial_port *port);
+static int  option_send_setup(struct usb_serial_port *port);
 static int  option_suspend(struct usb_serial *serial, pm_message_t message);
 static int  option_resume(struct usb_serial *serial);
 
@@ -551,6 +552,7 @@ static struct usb_serial_driver option_1port_device = {
        .num_ports         = 1,
        .open              = option_open,
        .close             = option_close,
+       .dtr_rts           = option_dtr_rts,
        .write             = option_write,
        .write_room        = option_write_room,
        .chars_in_buffer   = option_chars_in_buffer,
@@ -630,7 +632,7 @@ static void option_set_termios(struct tty_struct *tty,
        dbg("%s", __func__);
        /* Doesn't support option setting */
        tty_termios_copy_hw(tty->termios, old_termios);
-       option_send_setup(tty, port);
+       option_send_setup(port);
 }
 
 static int option_tiocmget(struct tty_struct *tty, struct file *file)
@@ -669,7 +671,7 @@ static int option_tiocmset(struct tty_struct *tty, struct file *file,
                portdata->rts_state = 0;
        if (clear & TIOCM_DTR)
                portdata->dtr_state = 0;
-       return option_send_setup(tty, port);
+       return option_send_setup(port);
 }
 
 /* Write */
@@ -897,10 +899,6 @@ static int option_open(struct tty_struct *tty,
 
        dbg("%s", __func__);
 
-       /* Set some sane defaults */
-       portdata->rts_state = 1;
-       portdata->dtr_state = 1;
-
        /* Reset low level data toggle and start reading from endpoints */
        for (i = 0; i < N_IN_URB; i++) {
                urb = portdata->in_urbs[i];
@@ -936,37 +934,43 @@ static int option_open(struct tty_struct *tty,
                                usb_pipeout(urb->pipe), 0); */
        }
 
-       option_send_setup(tty, port);
+       option_send_setup(port);
 
        return 0;
 }
 
-static void option_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void option_dtr_rts(struct usb_serial_port *port, int on)
 {
-       int i;
        struct usb_serial *serial = port->serial;
        struct option_port_private *portdata;
 
        dbg("%s", __func__);
        portdata = usb_get_serial_port_data(port);
+       mutex_lock(&serial->disc_mutex);
+       portdata->rts_state = on;
+       portdata->dtr_state = on;
+       if (serial->dev)
+               option_send_setup(port);
+       mutex_unlock(&serial->disc_mutex);
+}
 
-       portdata->rts_state = 0;
-       portdata->dtr_state = 0;
 
-       if (serial->dev) {
-               mutex_lock(&serial->disc_mutex);
-               if (!serial->disconnected)
-                       option_send_setup(tty, port);
-               mutex_unlock(&serial->disc_mutex);
+static void option_close(struct usb_serial_port *port)
+{
+       int i;
+       struct usb_serial *serial = port->serial;
+       struct option_port_private *portdata;
+
+       dbg("%s", __func__);
+       portdata = usb_get_serial_port_data(port);
 
+       if (serial->dev) {
                /* Stop reading/writing urbs */
                for (i = 0; i < N_IN_URB; i++)
                        usb_kill_urb(portdata->in_urbs[i]);
                for (i = 0; i < N_OUT_URB; i++)
                        usb_kill_urb(portdata->out_urbs[i]);
        }
-       tty_port_tty_set(&port->port, NULL);
 }
 
 /* Helper functions used by option_setup_urbs */
@@ -1032,28 +1036,24 @@ static void option_setup_urbs(struct usb_serial *serial)
  * This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN
  * CDC.
 */
-static int option_send_setup(struct tty_struct *tty,
-                                               struct usb_serial_port *port)
+static int option_send_setup(struct usb_serial_port *port)
 {
        struct usb_serial *serial = port->serial;
        struct option_port_private *portdata;
        int ifNum = serial->interface->cur_altsetting->desc.bInterfaceNumber;
+       int val = 0;
        dbg("%s", __func__);
 
        portdata = usb_get_serial_port_data(port);
 
-       if (tty) {
-               int val = 0;
-               if (portdata->dtr_state)
-                       val |= 0x01;
-               if (portdata->rts_state)
-                       val |= 0x02;
+       if (portdata->dtr_state)
+               val |= 0x01;
+       if (portdata->rts_state)
+               val |= 0x02;
 
-               return usb_control_msg(serial->dev,
-                       usb_rcvctrlpipe(serial->dev, 0),
-                       0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
-       }
-       return 0;
+       return usb_control_msg(serial->dev,
+               usb_rcvctrlpipe(serial->dev, 0),
+               0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
 }
 
 static int option_startup(struct usb_serial *serial)
index ba551f0..7de5478 100644 (file)
@@ -143,8 +143,7 @@ struct oti6858_control_pkt {
 /* function prototypes */
 static int oti6858_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void oti6858_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+static void oti6858_close(struct usb_serial_port *port);
 static void oti6858_set_termios(struct tty_struct *tty,
                        struct usb_serial_port *port, struct ktermios *old);
 static int oti6858_ioctl(struct tty_struct *tty, struct file *file,
@@ -622,67 +621,30 @@ static int oti6858_open(struct tty_struct *tty,
        if (result != 0) {
                dev_err(&port->dev, "%s(): usb_submit_urb() failed"
                               " with error %d\n", __func__, result);
-               oti6858_close(tty, port, NULL);
+               oti6858_close(port);
                return -EPROTO;
        }
 
        /* setup termios */
        if (tty)
                oti6858_set_termios(tty, port, &tmp_termios);
-
+       port->port.drain_delay = 256;   /* FIXME: check the FIFO length */
        return 0;
 }
 
-static void oti6858_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void oti6858_close(struct usb_serial_port *port)
 {
        struct oti6858_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
-       long timeout;
-       wait_queue_t wait;
 
        dbg("%s(port = %d)", __func__, port->number);
 
-       /* wait for data to drain from the buffer */
        spin_lock_irqsave(&priv->lock, flags);
-       timeout = 30 * HZ;      /* PL2303_CLOSING_WAIT */
-       init_waitqueue_entry(&wait, current);
-       add_wait_queue(&tty->write_wait, &wait);
-       dbg("%s(): entering wait loop", __func__);
-       for (;;) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               if (oti6858_buf_data_avail(priv->buf) == 0
-               || timeout == 0 || signal_pending(current)
-               || port->serial->disconnected)
-                       break;
-               spin_unlock_irqrestore(&priv->lock, flags);
-               timeout = schedule_timeout(timeout);
-               spin_lock_irqsave(&priv->lock, flags);
-       }
-       set_current_state(TASK_RUNNING);
-       remove_wait_queue(&tty->write_wait, &wait);
-       dbg("%s(): after wait loop", __func__);
-
        /* clear out any remaining data in the buffer */
        oti6858_buf_clear(priv->buf);
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       /* wait for characters to drain from the device */
-       /* (this is long enough for the entire 256 byte */
-       /* pl2303 hardware buffer to drain with no flow */
-       /* control for data rates of 1200 bps or more, */
-       /* for lower rates we should really know how much */
-       /* data is in the buffer to compute a delay */
-       /* that is not unnecessarily long) */
-       /* FIXME
-       bps = tty_get_baud_rate(tty);
-       if (bps > 1200)
-               timeout = max((HZ*2560)/bps,HZ/10);
-       else
-       */
-               timeout = 2*HZ;
-       schedule_timeout_interruptible(timeout);
-       dbg("%s(): after schedule_timeout_interruptible()", __func__);
+       dbg("%s(): after buf_clear()", __func__);
 
        /* cancel scheduled setup */
        cancel_delayed_work(&priv->delayed_setup_work);
@@ -694,15 +656,6 @@ static void oti6858_close(struct tty_struct *tty,
        usb_kill_urb(port->write_urb);
        usb_kill_urb(port->read_urb);
        usb_kill_urb(port->interrupt_in_urb);
-
-       /*
-       if (tty && (tty->termios->c_cflag) & HUPCL) {
-               // drop DTR and RTS
-               spin_lock_irqsave(&priv->lock, flags);
-               priv->pending_setup.control &= ~CONTROL_MASK;
-               spin_unlock_irqrestore(&priv->lock, flags);
-       }
-       */
 }
 
 static int oti6858_tiocmset(struct tty_struct *tty, struct file *file,
index 751a533..e02dc3d 100644 (file)
@@ -652,69 +652,41 @@ static void pl2303_set_termios(struct tty_struct *tty,
        kfree(buf);
 }
 
-static void pl2303_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void pl2303_dtr_rts(struct usb_serial_port *port, int on)
+{
+       struct pl2303_private *priv = usb_get_serial_port_data(port);
+       unsigned long flags;
+       u8 control;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       /* Change DTR and RTS */
+       if (on)
+               priv->line_control |= (CONTROL_DTR | CONTROL_RTS);
+       else
+               priv->line_control &= ~(CONTROL_DTR | CONTROL_RTS);
+       control = priv->line_control;
+       spin_unlock_irqrestore(&priv->lock, flags);
+       set_control_lines(port->serial->dev, control);
+}
+
+static void pl2303_close(struct usb_serial_port *port)
 {
        struct pl2303_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
-       unsigned int c_cflag;
-       int bps;
-       long timeout;
-       wait_queue_t wait;
 
        dbg("%s - port %d", __func__, port->number);
 
-       /* wait for data to drain from the buffer */
        spin_lock_irqsave(&priv->lock, flags);
-       timeout = PL2303_CLOSING_WAIT;
-       init_waitqueue_entry(&wait, current);
-       add_wait_queue(&tty->write_wait, &wait);
-       for (;;) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               if (pl2303_buf_data_avail(priv->buf) == 0 ||
-                   timeout == 0 || signal_pending(current) ||
-                   port->serial->disconnected)
-                       break;
-               spin_unlock_irqrestore(&priv->lock, flags);
-               timeout = schedule_timeout(timeout);
-               spin_lock_irqsave(&priv->lock, flags);
-       }
-       set_current_state(TASK_RUNNING);
-       remove_wait_queue(&tty->write_wait, &wait);
        /* clear out any remaining data in the buffer */
        pl2303_buf_clear(priv->buf);
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       /* wait for characters to drain from the device */
-       /* (this is long enough for the entire 256 byte */
-       /* pl2303 hardware buffer to drain with no flow */
-       /* control for data rates of 1200 bps or more, */
-       /* for lower rates we should really know how much */
-       /* data is in the buffer to compute a delay */
-       /* that is not unnecessarily long) */
-       bps = tty_get_baud_rate(tty);
-       if (bps > 1200)
-               timeout = max((HZ*2560)/bps, HZ/10);
-       else
-               timeout = 2*HZ;
-       schedule_timeout_interruptible(timeout);
-
        /* shutdown our urbs */
        dbg("%s - shutting down urbs", __func__);
        usb_kill_urb(port->write_urb);
        usb_kill_urb(port->read_urb);
        usb_kill_urb(port->interrupt_in_urb);
 
-       if (tty) {
-               c_cflag = tty->termios->c_cflag;
-               if (c_cflag & HUPCL) {
-                       /* drop DTR and RTS */
-                       spin_lock_irqsave(&priv->lock, flags);
-                       priv->line_control = 0;
-                       spin_unlock_irqrestore(&priv->lock, flags);
-                       set_control_lines(port->serial->dev, 0);
-               }
-       }
 }
 
 static int pl2303_open(struct tty_struct *tty,
@@ -748,7 +720,7 @@ static int pl2303_open(struct tty_struct *tty,
        if (result) {
                dev_err(&port->dev, "%s - failed submitting read urb,"
                        " error %d\n", __func__, result);
-               pl2303_close(tty, port, NULL);
+               pl2303_close(port);
                return -EPROTO;
        }
 
@@ -758,9 +730,10 @@ static int pl2303_open(struct tty_struct *tty,
        if (result) {
                dev_err(&port->dev, "%s - failed submitting interrupt urb,"
                        " error %d\n", __func__, result);
-               pl2303_close(tty, port, NULL);
+               pl2303_close(port);
                return -EPROTO;
        }
+       port->port.drain_delay = 256;
        return 0;
 }
 
@@ -821,6 +794,14 @@ static int pl2303_tiocmget(struct tty_struct *tty, struct file *file)
        return result;
 }
 
+static int pl2303_carrier_raised(struct usb_serial_port *port)
+{
+       struct pl2303_private *priv = usb_get_serial_port_data(port);
+       if (priv->line_status & UART_DCD)
+               return 1;
+       return 0;
+}
+
 static int wait_modem_info(struct usb_serial_port *port, unsigned int arg)
 {
        struct pl2303_private *priv = usb_get_serial_port_data(port);
@@ -1125,6 +1106,8 @@ static struct usb_serial_driver pl2303_device = {
        .num_ports =            1,
        .open =                 pl2303_open,
        .close =                pl2303_close,
+       .dtr_rts =              pl2303_dtr_rts,
+       .carrier_raised =       pl2303_carrier_raised,
        .write =                pl2303_write,
        .ioctl =                pl2303_ioctl,
        .break_ctl =            pl2303_break_ctl,
index 913225c..17ac34f 100644 (file)
 #include <linux/module.h>
 #include <linux/usb.h>
 #include <linux/usb/serial.h>
-#include <linux/usb/ch9.h>
 
 #define SWIMS_USB_REQUEST_SetPower     0x00
 #define SWIMS_USB_REQUEST_SetNmea      0x07
 
-/* per port private data */
 #define N_IN_URB       4
 #define N_OUT_URB      4
 #define IN_BUFLEN      4096
 static int debug;
 static int nmea;
 
+/* Used in interface blacklisting */
+struct sierra_iface_info {
+       const u32 infolen;      /* number of interface numbers on blacklist */
+       const u8  *ifaceinfo;   /* pointer to the array holding the numbers */
+};
+
 static int sierra_set_power_state(struct usb_device *udev, __u16 swiState)
 {
        int result;
@@ -85,6 +89,23 @@ static int sierra_calc_num_ports(struct usb_serial *serial)
        return result;
 }
 
+static int is_blacklisted(const u8 ifnum,
+                               const struct sierra_iface_info *blacklist)
+{
+       const u8  *info;
+       int i;
+
+       if (blacklist) {
+               info = blacklist->ifaceinfo;
+
+               for (i = 0; i < blacklist->infolen; i++) {
+                       if (info[i] == ifnum)
+                               return 1;
+               }
+       }
+       return 0;
+}
+
 static int sierra_calc_interface(struct usb_serial *serial)
 {
        int interface;
@@ -153,9 +174,25 @@ static int sierra_probe(struct usb_serial *serial,
         */
        usb_set_serial_data(serial, (void *)num_ports);
 
+       /* ifnum could have changed - by calling usb_set_interface */
+       ifnum = sierra_calc_interface(serial);
+
+       if (is_blacklisted(ifnum,
+                               (struct sierra_iface_info *)id->driver_info)) {
+               dev_dbg(&serial->dev->dev,
+                       "Ignoring blacklisted interface #%d\n", ifnum);
+               return -ENODEV;
+       }
+
        return result;
 }
 
+static const u8 direct_ip_non_serial_ifaces[] = { 7, 8, 9, 10, 11 };
+static const struct sierra_iface_info direct_ip_interface_blacklist = {
+       .infolen = ARRAY_SIZE(direct_ip_non_serial_ifaces),
+       .ifaceinfo = direct_ip_non_serial_ifaces,
+};
+
 static struct usb_device_id id_table [] = {
        { USB_DEVICE(0x1199, 0x0017) }, /* Sierra Wireless EM5625 */
        { USB_DEVICE(0x1199, 0x0018) }, /* Sierra Wireless MC5720 */
@@ -188,9 +225,11 @@ static struct usb_device_id id_table [] = {
        { USB_DEVICE(0x1199, 0x6833) }, /* Sierra Wireless MC8781 */
        { USB_DEVICE(0x1199, 0x683A) }, /* Sierra Wireless MC8785 */
        { USB_DEVICE(0x1199, 0x683B) }, /* Sierra Wireless MC8785 Composite */
-       { USB_DEVICE(0x1199, 0x683C) }, /* Sierra Wireless MC8790 */
-       { USB_DEVICE(0x1199, 0x683D) }, /* Sierra Wireless MC8790 */
-       { USB_DEVICE(0x1199, 0x683E) }, /* Sierra Wireless MC8790 */
+       /* Sierra Wireless MC8790, MC8791, MC8792 Composite */
+       { USB_DEVICE(0x1199, 0x683C) },
+       { USB_DEVICE(0x1199, 0x683D) }, /* Sierra Wireless MC8791 Composite */
+       /* Sierra Wireless MC8790, MC8791, MC8792 */
+       { USB_DEVICE(0x1199, 0x683E) },
        { USB_DEVICE(0x1199, 0x6850) }, /* Sierra Wireless AirCard 880 */
        { USB_DEVICE(0x1199, 0x6851) }, /* Sierra Wireless AirCard 881 */
        { USB_DEVICE(0x1199, 0x6852) }, /* Sierra Wireless AirCard 880 E */
@@ -211,6 +250,10 @@ static struct usb_device_id id_table [] = {
        { USB_DEVICE(0x1199, 0x0112) }, /* Sierra Wireless AirCard 580 */
        { USB_DEVICE(0x0F3D, 0x0112) }, /* Airprime/Sierra PC 5220 */
 
+       { USB_DEVICE(0x1199, 0x68A3),   /* Sierra Wireless Direct IP modems */
+         .driver_info = (kernel_ulong_t)&direct_ip_interface_blacklist
+       },
+
        { }
 };
 MODULE_DEVICE_TABLE(usb, id_table);
@@ -229,7 +272,6 @@ struct sierra_port_private {
 
        /* Input endpoints and buffers for this port */
        struct urb *in_urbs[N_IN_URB];
-       char *in_buffer[N_IN_URB];
 
        /* Settings for the port */
        int rts_state;  /* Handshaking pins (outputs) */
@@ -240,57 +282,50 @@ struct sierra_port_private {
        int ri_state;
 };
 
-static int sierra_send_setup(struct tty_struct *tty,
-                                               struct usb_serial_port *port)
+static int sierra_send_setup(struct usb_serial_port *port)
 {
        struct usb_serial *serial = port->serial;
        struct sierra_port_private *portdata;
        __u16 interface = 0;
+       int val = 0;
 
        dev_dbg(&port->dev, "%s", __func__);
 
        portdata = usb_get_serial_port_data(port);
 
-       if (tty) {
-               int val = 0;
-               if (portdata->dtr_state)
-                       val |= 0x01;
-               if (portdata->rts_state)
-                       val |= 0x02;
-
-               /* If composite device then properly report interface */
-               if (serial->num_ports == 1) {
-                       interface = sierra_calc_interface(serial);
-
-                       /* Control message is sent only to interfaces with
-                        * interrupt_in endpoints
-                        */
-                       if (port->interrupt_in_urb) {
-                               /* send control message */
-                               return usb_control_msg(serial->dev,
-                                       usb_rcvctrlpipe(serial->dev, 0),
-                                       0x22, 0x21, val, interface,
-                                       NULL, 0, USB_CTRL_SET_TIMEOUT);
-                       }
-               }
-
-               /* Otherwise the need to do non-composite mapping */
-               else {
-                       if (port->bulk_out_endpointAddress == 2)
-                               interface = 0;
-                       else if (port->bulk_out_endpointAddress == 4)
-                               interface = 1;
-                       else if (port->bulk_out_endpointAddress == 5)
-                               interface = 2;
+       if (portdata->dtr_state)
+               val |= 0x01;
+       if (portdata->rts_state)
+               val |= 0x02;
 
+       /* If composite device then properly report interface */
+       if (serial->num_ports == 1) {
+               interface = sierra_calc_interface(serial);
+               /* Control message is sent only to interfaces with
+                * interrupt_in endpoints
+                */
+               if (port->interrupt_in_urb) {
+                       /* send control message */
                        return usb_control_msg(serial->dev,
                                usb_rcvctrlpipe(serial->dev, 0),
                                0x22, 0x21, val, interface,
                                NULL, 0, USB_CTRL_SET_TIMEOUT);
-
                }
        }
 
+       /* Otherwise the need to do non-composite mapping */
+       else {
+               if (port->bulk_out_endpointAddress == 2)
+                       interface = 0;
+               else if (port->bulk_out_endpointAddress == 4)
+                       interface = 1;
+               else if (port->bulk_out_endpointAddress == 5)
+                       interface = 2;
+               return usb_control_msg(serial->dev,
+                       usb_rcvctrlpipe(serial->dev, 0),
+                       0x22, 0x21, val, interface,
+                       NULL, 0, USB_CTRL_SET_TIMEOUT);
+       }
        return 0;
 }
 
@@ -299,7 +334,7 @@ static void sierra_set_termios(struct tty_struct *tty,
 {
        dev_dbg(&port->dev, "%s", __func__);
        tty_termios_copy_hw(tty->termios, old_termios);
-       sierra_send_setup(tty, port);
+       sierra_send_setup(port);
 }
 
 static int sierra_tiocmget(struct tty_struct *tty, struct file *file)
@@ -338,7 +373,18 @@ static int sierra_tiocmset(struct tty_struct *tty, struct file *file,
                portdata->rts_state = 0;
        if (clear & TIOCM_DTR)
                portdata->dtr_state = 0;
-       return sierra_send_setup(tty, port);
+       return sierra_send_setup(port);
+}
+
+static void sierra_release_urb(struct urb *urb)
+{
+       struct usb_serial_port *port;
+       if (urb) {
+               port =  urb->context;
+               dev_dbg(&port->dev, "%s: %p\n", __func__, urb);
+               kfree(urb->transfer_buffer);
+               usb_free_urb(urb);
+       }
 }
 
 static void sierra_outdat_callback(struct urb *urb)
@@ -465,7 +511,7 @@ static void sierra_indat_callback(struct urb *urb)
                                " received", __func__);
 
                /* Resubmit urb so we continue receiving */
-               if (port->port.count && status != -ESHUTDOWN) {
+               if (port->port.count && status != -ESHUTDOWN && status != -EPERM) {
                        err = usb_submit_urb(urb, GFP_ATOMIC);
                        if (err)
                                dev_err(&port->dev, "resubmit read urb failed."
@@ -557,67 +603,99 @@ static int sierra_write_room(struct tty_struct *tty)
        return 2048;
 }
 
-static int sierra_open(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void sierra_stop_rx_urbs(struct usb_serial_port *port)
 {
-       struct sierra_port_private *portdata;
-       struct usb_serial *serial = port->serial;
        int i;
-       struct urb *urb;
-       int result;
+       struct sierra_port_private *portdata = usb_get_serial_port_data(port);
 
-       portdata = usb_get_serial_port_data(port);
+       for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++)
+               usb_kill_urb(portdata->in_urbs[i]);
 
-       dev_dbg(&port->dev, "%s", __func__);
+       usb_kill_urb(port->interrupt_in_urb);
+}
 
-       /* Set some sane defaults */
-       portdata->rts_state = 1;
-       portdata->dtr_state = 1;
+static int sierra_submit_rx_urbs(struct usb_serial_port *port, gfp_t mem_flags)
+{
+       int ok_cnt;
+       int err = -EINVAL;
+       int i;
+       struct urb *urb;
+       struct sierra_port_private *portdata = usb_get_serial_port_data(port);
 
-       /* Reset low level data toggle and start reading from endpoints */
-       for (i = 0; i < N_IN_URB; i++) {
+       ok_cnt = 0;
+       for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
                urb = portdata->in_urbs[i];
                if (!urb)
                        continue;
-               if (urb->dev != serial->dev) {
-                       dev_dbg(&port->dev, "%s: dev %p != %p",
-                                __func__, urb->dev, serial->dev);
-                       continue;
+               err = usb_submit_urb(urb, mem_flags);
+               if (err) {
+                       dev_err(&port->dev, "%s: submit urb failed: %d\n",
+                               __func__, err);
+               } else {
+                       ok_cnt++;
                }
+       }
 
-               /*
-                * make sure endpoint data toggle is synchronized with the
-                * device
-                */
-               usb_clear_halt(urb->dev, urb->pipe);
-
-               result = usb_submit_urb(urb, GFP_KERNEL);
-               if (result) {
-                       dev_err(&port->dev, "submit urb %d failed (%d) %d\n",
-                               i, result, urb->transfer_buffer_length);
+       if (ok_cnt && port->interrupt_in_urb) {
+               err = usb_submit_urb(port->interrupt_in_urb, mem_flags);
+               if (err) {
+                       dev_err(&port->dev, "%s: submit intr urb failed: %d\n",
+                               __func__, err);
                }
        }
 
-       sierra_send_setup(tty, port);
+       if (ok_cnt > 0) /* at least one rx urb submitted */
+               return 0;
+       else
+               return err;
+}
+
+static struct urb *sierra_setup_urb(struct usb_serial *serial, int endpoint,
+                                       int dir, void *ctx, int len,
+                                       gfp_t mem_flags,
+                                       usb_complete_t callback)
+{
+       struct urb      *urb;
+       u8              *buf;
+
+       if (endpoint == -1)
+               return NULL;
 
-       /* start up the interrupt endpoint if we have one */
-       if (port->interrupt_in_urb) {
-               result = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
-               if (result)
-                       dev_err(&port->dev, "submit irq_in urb failed %d\n",
-                               result);
+       urb = usb_alloc_urb(0, mem_flags);
+       if (urb == NULL) {
+               dev_dbg(&serial->dev->dev, "%s: alloc for endpoint %d failed\n",
+                       __func__, endpoint);
+               return NULL;
        }
-       return 0;
+
+       buf = kmalloc(len, mem_flags);
+       if (buf) {
+               /* Fill URB using supplied data */
+               usb_fill_bulk_urb(urb, serial->dev,
+                       usb_sndbulkpipe(serial->dev, endpoint) | dir,
+                       buf, len, callback, ctx);
+
+               /* debug */
+               dev_dbg(&serial->dev->dev, "%s %c u : %p d:%p\n", __func__,
+                               dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
+       } else {
+               dev_dbg(&serial->dev->dev, "%s %c u:%p d:%p\n", __func__,
+                               dir == USB_DIR_IN ? 'i' : 'o', urb, buf);
+
+               sierra_release_urb(urb);
+               urb = NULL;
+       }
+
+       return urb;
 }
 
-static void sierra_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void sierra_close(struct usb_serial_port *port)
 {
        int i;
        struct usb_serial *serial = port->serial;
        struct sierra_port_private *portdata;
 
-       dev_dbg(&port->dev, "%s", __func__);
+       dev_dbg(&port->dev, "%s\n", __func__);
        portdata = usb_get_serial_port_data(port);
 
        portdata->rts_state = 0;
@@ -626,25 +704,83 @@ static void sierra_close(struct tty_struct *tty,
        if (serial->dev) {
                mutex_lock(&serial->disc_mutex);
                if (!serial->disconnected)
-                       sierra_send_setup(tty, port);
+                       sierra_send_setup(port);
                mutex_unlock(&serial->disc_mutex);
 
-               /* Stop reading/writing urbs */
-               for (i = 0; i < N_IN_URB; i++)
-                       usb_kill_urb(portdata->in_urbs[i]);
+               /* Stop reading urbs */
+               sierra_stop_rx_urbs(port);
+               /* .. and release them */
+               for (i = 0; i < N_IN_URB; i++) {
+                       sierra_release_urb(portdata->in_urbs[i]);
+                       portdata->in_urbs[i] = NULL;
+               }
        }
+}
 
-       usb_kill_urb(port->interrupt_in_urb);
-       tty_port_tty_set(&port->port, NULL);
+static int sierra_open(struct tty_struct *tty,
+                       struct usb_serial_port *port, struct file *filp)
+{
+       struct sierra_port_private *portdata;
+       struct usb_serial *serial = port->serial;
+       int i;
+       int err;
+       int endpoint;
+       struct urb *urb;
+
+       portdata = usb_get_serial_port_data(port);
+
+       dev_dbg(&port->dev, "%s", __func__);
+
+       /* Set some sane defaults */
+       portdata->rts_state = 1;
+       portdata->dtr_state = 1;
+
+
+       endpoint = port->bulk_in_endpointAddress;
+       for (i = 0; i < ARRAY_SIZE(portdata->in_urbs); i++) {
+               urb = sierra_setup_urb(serial, endpoint, USB_DIR_IN, port,
+                                       IN_BUFLEN, GFP_KERNEL,
+                                       sierra_indat_callback);
+               portdata->in_urbs[i] = urb;
+       }
+       /* clear halt condition */
+       usb_clear_halt(serial->dev,
+                       usb_sndbulkpipe(serial->dev, endpoint) | USB_DIR_IN);
+
+       err = sierra_submit_rx_urbs(port, GFP_KERNEL);
+       if (err) {
+               /* get rid of everything as in close */
+               sierra_close(port);
+               return err;
+       }
+       sierra_send_setup(port);
+
+       return 0;
+}
+
+
+static void sierra_dtr_rts(struct usb_serial_port *port, int on)
+{
+       struct usb_serial *serial = port->serial;
+       struct sierra_port_private *portdata;
+
+       portdata = usb_get_serial_port_data(port);
+       portdata->rts_state = on;
+       portdata->dtr_state = on;
+
+       if (serial->dev) {
+               mutex_lock(&serial->disc_mutex);
+               if (!serial->disconnected)
+                       sierra_send_setup(port);
+               mutex_unlock(&serial->disc_mutex);
+       }
 }
 
 static int sierra_startup(struct usb_serial *serial)
 {
        struct usb_serial_port *port;
        struct sierra_port_private *portdata;
-       struct urb *urb;
        int i;
-       int j;
 
        dev_dbg(&serial->dev->dev, "%s", __func__);
 
@@ -666,34 +802,8 @@ static int sierra_startup(struct usb_serial *serial)
                        return -ENOMEM;
                }
                spin_lock_init(&portdata->lock);
-               for (j = 0; j < N_IN_URB; j++) {
-                       portdata->in_buffer[j] = kmalloc(IN_BUFLEN, GFP_KERNEL);
-                       if (!portdata->in_buffer[j]) {
-                               for (--j; j >= 0; j--)
-                                       kfree(portdata->in_buffer[j]);
-                               kfree(portdata);
-                               return -ENOMEM;
-                       }
-               }
-
+               /* Set the port private data pointer */
                usb_set_serial_port_data(port, portdata);
-
-               /* initialize the in urbs */
-               for (j = 0; j < N_IN_URB; ++j) {
-                       urb = usb_alloc_urb(0, GFP_KERNEL);
-                       if (urb == NULL) {
-                               dev_dbg(&port->dev, "%s: alloc for in "
-                                       "port failed.", __func__);
-                               continue;
-                       }
-                       /* Fill URB using supplied data. */
-                       usb_fill_bulk_urb(urb, serial->dev,
-                                         usb_rcvbulkpipe(serial->dev,
-                                               port->bulk_in_endpointAddress),
-                                         portdata->in_buffer[j], IN_BUFLEN,
-                                         sierra_indat_callback, port);
-                       portdata->in_urbs[j] = urb;
-               }
        }
 
        return 0;
@@ -701,7 +811,7 @@ static int sierra_startup(struct usb_serial *serial)
 
 static void sierra_shutdown(struct usb_serial *serial)
 {
-       int i, j;
+       int i;
        struct usb_serial_port *port;
        struct sierra_port_private *portdata;
 
@@ -714,12 +824,6 @@ static void sierra_shutdown(struct usb_serial *serial)
                portdata = usb_get_serial_port_data(port);
                if (!portdata)
                        continue;
-
-               for (j = 0; j < N_IN_URB; j++) {
-                       usb_kill_urb(portdata->in_urbs[j]);
-                       usb_free_urb(portdata->in_urbs[j]);
-                       kfree(portdata->in_buffer[j]);
-               }
                kfree(portdata);
                usb_set_serial_port_data(port, NULL);
        }
@@ -737,6 +841,7 @@ static struct usb_serial_driver sierra_device = {
        .probe             = sierra_probe,
        .open              = sierra_open,
        .close             = sierra_close,
+       .dtr_rts           = sierra_dtr_rts,
        .write             = sierra_write,
        .write_room        = sierra_write_room,
        .set_termios       = sierra_set_termios,
index 5e7528c..8f7ed8f 100644 (file)
@@ -446,66 +446,47 @@ static void spcp8x5_set_workMode(struct usb_device *dev, u16 value,
                        "RTSCTS usb_control_msg(enable flowctrl) = %d\n", ret);
 }
 
+static int spcp8x5_carrier_raised(struct usb_serial_port *port)
+{
+       struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+       if (priv->line_status & MSR_STATUS_LINE_DCD)
+               return 1;
+       return 0;
+}
+
+static void spcp8x5_dtr_rts(struct usb_serial_port *port, int on)
+{
+       struct spcp8x5_private *priv = usb_get_serial_port_data(port);
+       unsigned long flags;
+       u8 control;
+
+       spin_lock_irqsave(&priv->lock, flags);
+       if (on)
+               priv->line_control = MCR_CONTROL_LINE_DTR
+                                               | MCR_CONTROL_LINE_RTS;
+       else
+               priv->line_control &= ~ (MCR_CONTROL_LINE_DTR
+                                               | MCR_CONTROL_LINE_RTS);
+       control = priv->line_control;
+       spin_unlock_irqrestore(&priv->lock, flags);
+       spcp8x5_set_ctrlLine(port->serial->dev, control , priv->type);
+}
+
 /* close the serial port. We should wait for data sending to device 1st and
  * then kill all urb. */
-static void spcp8x5_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void spcp8x5_close(struct usb_serial_port *port)
 {
        struct spcp8x5_private *priv = usb_get_serial_port_data(port);
        unsigned long flags;
-       unsigned int c_cflag;
-       int bps;
-       long timeout;
-       wait_queue_t wait;
        int result;
 
        dbg("%s - port %d", __func__, port->number);
 
-       /* wait for data to drain from the buffer */
        spin_lock_irqsave(&priv->lock, flags);
-       timeout = SPCP8x5_CLOSING_WAIT;
-       init_waitqueue_entry(&wait, current);
-       add_wait_queue(&tty->write_wait, &wait);
-       for (;;) {
-               set_current_state(TASK_INTERRUPTIBLE);
-               if (ringbuf_avail_data(priv->buf) == 0 ||
-                   timeout == 0 || signal_pending(current))
-                       break;
-               spin_unlock_irqrestore(&priv->lock, flags);
-               timeout = schedule_timeout(timeout);
-               spin_lock_irqsave(&priv->lock, flags);
-       }
-       set_current_state(TASK_RUNNING);
-       remove_wait_queue(&tty->write_wait, &wait);
-
        /* clear out any remaining data in the buffer */
        clear_ringbuf(priv->buf);
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       /* wait for characters to drain from the device (this is long enough
-        * for the entire all byte spcp8x5 hardware buffer to drain with no
-        * flow control for data rates of 1200 bps or more, for lower rates we
-        * should really know how much data is in the buffer to compute a delay
-        * that is not unnecessarily long) */
-       bps = tty_get_baud_rate(tty);
-       if (bps > 1200)
-               timeout = max((HZ*2560) / bps, HZ/10);
-       else
-               timeout = 2*HZ;
-       set_current_state(TASK_INTERRUPTIBLE);
-       schedule_timeout(timeout);
-
-       /* clear control lines */
-       if (tty) {
-               c_cflag = tty->termios->c_cflag;
-               if (c_cflag & HUPCL) {
-                       spin_lock_irqsave(&priv->lock, flags);
-                       priv->line_control = 0;
-                       spin_unlock_irqrestore(&priv->lock, flags);
-                       spcp8x5_set_ctrlLine(port->serial->dev, 0 , priv->type);
-               }
-       }
-
        /* kill urb */
        if (port->write_urb != NULL) {
                result = usb_unlink_urb(port->write_urb);
@@ -665,13 +646,6 @@ static int spcp8x5_open(struct tty_struct *tty,
        if (ret)
                return ret;
 
-       spin_lock_irqsave(&priv->lock, flags);
-       if (tty && (tty->termios->c_cflag & CBAUD))
-               priv->line_control = MCR_DTR | MCR_RTS;
-       else
-               priv->line_control = 0;
-       spin_unlock_irqrestore(&priv->lock, flags);
-
        spcp8x5_set_ctrlLine(serial->dev, priv->line_control , priv->type);
 
        /* Setup termios */
@@ -691,9 +665,10 @@ static int spcp8x5_open(struct tty_struct *tty,
        port->read_urb->dev = serial->dev;
        ret = usb_submit_urb(port->read_urb, GFP_KERNEL);
        if (ret) {
-               spcp8x5_close(tty, port, NULL);
+               spcp8x5_close(port);
                return -EPROTO;
        }
+       port->port.drain_delay = 256;
        return 0;
 }
 
@@ -1033,6 +1008,8 @@ static struct usb_serial_driver spcp8x5_device = {
        .num_ports              = 1,
        .open                   = spcp8x5_open,
        .close                  = spcp8x5_close,
+       .dtr_rts                = spcp8x5_dtr_rts,
+       .carrier_raised         = spcp8x5_carrier_raised,
        .write                  = spcp8x5_write,
        .set_termios            = spcp8x5_set_termios,
        .ioctl                  = spcp8x5_ioctl,
index 69879e4..8b07ebc 100644 (file)
@@ -152,8 +152,7 @@ static int symbol_open(struct tty_struct *tty, struct usb_serial_port *port,
        return result;
 }
 
-static void symbol_close(struct tty_struct *tty, struct usb_serial_port *port,
-                         struct file *filp)
+static void symbol_close(struct usb_serial_port *port)
 {
        struct symbol_private *priv = usb_get_serial_data(port->serial);
 
index 0a64bac..42cb04c 100644 (file)
@@ -100,8 +100,7 @@ static int ti_startup(struct usb_serial *serial);
 static void ti_shutdown(struct usb_serial *serial);
 static int ti_open(struct tty_struct *tty, struct usb_serial_port *port,
                struct file *file);
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-               struct file *file);
+static void ti_close(struct usb_serial_port *port);
 static int ti_write(struct tty_struct *tty, struct usb_serial_port *port,
                const unsigned char *data, int count);
 static int ti_write_room(struct tty_struct *tty);
@@ -647,8 +646,7 @@ release_lock:
 }
 
 
-static void ti_close(struct tty_struct *tty, struct usb_serial_port *port,
-                                                       struct file *file)
+static void ti_close(struct usb_serial_port *port)
 {
        struct ti_device *tdev;
        struct ti_port *tport;
index f331e2b..1967a7e 100644 (file)
@@ -238,9 +238,11 @@ static int serial_open (struct tty_struct *tty, struct file *filp)
                        goto bailout_interface_put;
                mutex_unlock(&serial->disc_mutex);
        }
-
        mutex_unlock(&port->mutex);
-       return 0;
+       /* Now do the correct tty layer semantics */
+       retval = tty_port_block_til_ready(&port->port, tty, filp);
+       if (retval == 0)
+               return 0;
 
 bailout_interface_put:
        usb_autopm_put_interface(serial->interface);
@@ -259,64 +261,89 @@ bailout_serial_put:
        return retval;
 }
 
-static void serial_close(struct tty_struct *tty, struct file *filp)
+/**
+ *     serial_do_down          -       shut down hardware
+ *     @port: port to shut down
+ *
+ *     Shut down a USB port unless it is the console. We never shut down the
+ *     console hardware as it will always be in use.
+ *
+ *     Don't free any resources at this point
+ */
+static void serial_do_down(struct usb_serial_port *port)
 {
-       struct usb_serial_port *port = tty->driver_data;
+       struct usb_serial_driver *drv = port->serial->type;
        struct usb_serial *serial;
        struct module *owner;
-       int count;
 
-       if (!port)
+       /* The console is magical, do not hang up the console hardware
+          or there will be tears */
+       if (port->console)
                return;
 
-       dbg("%s - port %d", __func__, port->number);
-
        mutex_lock(&port->mutex);
        serial = port->serial;
        owner = serial->type->driver.owner;
 
-       if (port->port.count == 0) {
-               mutex_unlock(&port->mutex);
-               return;
-       }
-
-       if (port->port.count == 1)
-               /* only call the device specific close if this
-                * port is being closed by the last owner. Ensure we do
-                * this before we drop the port count. The call is protected
-                * by the port mutex
-                */
-               serial->type->close(tty, port, filp);
-
-       if (port->port.count == (port->console ? 2 : 1)) {
-               struct tty_struct *tty = tty_port_tty_get(&port->port);
-               if (tty) {
-                       /* We must do this before we drop the port count to
-                          zero. */
-                       if (tty->driver_data)
-                               tty->driver_data = NULL;
-                       tty_port_tty_set(&port->port, NULL);
-                       tty_kref_put(tty);
-               }
-       }
+       if (drv->close)
+               drv->close(port);
 
-       --port->port.count;
-       count = port->port.count;
        mutex_unlock(&port->mutex);
-       put_device(&port->dev);
+}
+
+/**
+ *     serial_do_free          -       free resources post close/hangup
+ *     @port: port to free up
+ *
+ *     Do the resource freeing and refcount dropping for the port. We must
+ *     be careful about ordering and we must avoid freeing up the console.
+ */
 
+static void serial_do_free(struct usb_serial_port *port)
+{
+       struct usb_serial *serial;
+       struct module *owner;
+
+       /* The console is magical, do not hang up the console hardware
+          or there will be tears */
+       if (port->console)
+               return;
+
+       serial = port->serial;
+       owner = serial->type->driver.owner;
+       put_device(&port->dev);
        /* Mustn't dereference port any more */
-       if (count == 0) {
-               mutex_lock(&serial->disc_mutex);
-               if (!serial->disconnected)
-                       usb_autopm_put_interface(serial->interface);
-               mutex_unlock(&serial->disc_mutex);
-       }
+       mutex_lock(&serial->disc_mutex);
+       if (!serial->disconnected)
+               usb_autopm_put_interface(serial->interface);
+       mutex_unlock(&serial->disc_mutex);
        usb_serial_put(serial);
-
        /* Mustn't dereference serial any more */
-       if (count == 0)
-               module_put(owner);
+       module_put(owner);
+}
+
+static void serial_close(struct tty_struct *tty, struct file *filp)
+{
+       struct usb_serial_port *port = tty->driver_data;
+
+       dbg("%s - port %d", __func__, port->number);
+
+
+       if (tty_port_close_start(&port->port, tty, filp) == 0)
+               return;
+
+       serial_do_down(port);           
+       tty_port_close_end(&port->port, tty);
+       tty_port_tty_set(&port->port, NULL);
+       serial_do_free(port);
+}
+
+static void serial_hangup(struct tty_struct *tty)
+{
+       struct usb_serial_port *port = tty->driver_data;
+       serial_do_down(port);
+       tty_port_hangup(&port->port);
+       serial_do_free(port);
 }
 
 static int serial_write(struct tty_struct *tty, const unsigned char *buf,
@@ -648,6 +675,29 @@ static struct usb_serial_driver *search_serial_device(
        return NULL;
 }
 
+static int serial_carrier_raised(struct tty_port *port)
+{
+       struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+       struct usb_serial_driver *drv = p->serial->type;
+       if (drv->carrier_raised)
+               return drv->carrier_raised(p);
+       /* No carrier control - don't block */
+       return 1;       
+}
+
+static void serial_dtr_rts(struct tty_port *port, int on)
+{
+       struct usb_serial_port *p = container_of(port, struct usb_serial_port, port);
+       struct usb_serial_driver *drv = p->serial->type;
+       if (drv->dtr_rts)
+               drv->dtr_rts(p, on);
+}
+
+static const struct tty_port_operations serial_port_ops = {
+       .carrier_raised = serial_carrier_raised,
+       .dtr_rts = serial_dtr_rts,
+};
+
 int usb_serial_probe(struct usb_interface *interface,
                               const struct usb_device_id *id)
 {
@@ -841,6 +891,7 @@ int usb_serial_probe(struct usb_interface *interface,
                if (!port)
                        goto probe_error;
                tty_port_init(&port->port);
+               port->port.ops = &serial_port_ops;
                port->serial = serial;
                spin_lock_init(&port->lock);
                mutex_init(&port->mutex);
@@ -1071,6 +1122,9 @@ void usb_serial_disconnect(struct usb_interface *interface)
                if (port) {
                        struct tty_struct *tty = tty_port_tty_get(&port->port);
                        if (tty) {
+                               /* The hangup will occur asynchronously but
+                                  the object refcounts will sort out all the
+                                  cleanup */
                                tty_hangup(tty);
                                tty_kref_put(tty);
                        }
@@ -1135,6 +1189,7 @@ static const struct tty_operations serial_ops = {
        .open =                 serial_open,
        .close =                serial_close,
        .write =                serial_write,
+       .hangup =               serial_hangup,
        .write_room =           serial_write_room,
        .ioctl =                serial_ioctl,
        .set_termios =          serial_set_termios,
@@ -1147,6 +1202,7 @@ static const struct tty_operations serial_ops = {
        .proc_fops =            &serial_proc_fops,
 };
 
+
 struct tty_driver *usb_serial_tty_driver;
 
 static int __init usb_serial_init(void)
index 5ac414b..b15f1c0 100644 (file)
@@ -38,8 +38,7 @@
 /* function prototypes for a handspring visor */
 static int  visor_open(struct tty_struct *tty, struct usb_serial_port *port,
                                        struct file *filp);
-static void visor_close(struct tty_struct *tty, struct usb_serial_port *port,
-                                       struct file *filp);
+static void visor_close(struct usb_serial_port *port);
 static int  visor_write(struct tty_struct *tty, struct usb_serial_port *port,
                                        const unsigned char *buf, int count);
 static int  visor_write_room(struct tty_struct *tty);
@@ -324,8 +323,7 @@ exit:
 }
 
 
-static void visor_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void visor_close(struct usb_serial_port *port)
 {
        struct visor_private *priv = usb_get_serial_port_data(port);
        unsigned char *transfer_buffer;
index 5335d32..7c7295d 100644 (file)
@@ -147,8 +147,7 @@ static int  whiteheat_attach(struct usb_serial *serial);
 static void whiteheat_shutdown(struct usb_serial *serial);
 static int  whiteheat_open(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-static void whiteheat_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+static void whiteheat_close(struct usb_serial_port *port);
 static int  whiteheat_write(struct tty_struct *tty,
                        struct usb_serial_port *port,
                        const unsigned char *buf, int count);
@@ -712,8 +711,7 @@ exit:
 }
 
 
-static void whiteheat_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp)
+static void whiteheat_close(struct usb_serial_port *port)
 {
        struct whiteheat_private *info = usb_get_serial_port_data(port);
        struct whiteheat_urb_wrap *wrap;
@@ -723,31 +721,7 @@ static void whiteheat_close(struct tty_struct *tty,
 
        dbg("%s - port %d", __func__, port->number);
 
-       mutex_lock(&port->serial->disc_mutex);
-       /* filp is NULL when called from usb_serial_disconnect */
-       if ((filp && (tty_hung_up_p(filp))) || port->serial->disconnected) {
-               mutex_unlock(&port->serial->disc_mutex);
-               return;
-       }
-       mutex_unlock(&port->serial->disc_mutex);
-
-       tty->closing = 1;
-
-/*
- * Not currently in use; tty_wait_until_sent() calls
- * serial_chars_in_buffer() which deadlocks on the second semaphore
- * acquisition. This should be fixed at some point. Greg's been
- * notified.
-       if ((filp->f_flags & (O_NDELAY | O_NONBLOCK)) == 0) {
-               tty_wait_until_sent(tty, CLOSING_DELAY);
-       }
-*/
-
-       tty_driver_flush_buffer(tty);
-       tty_ldisc_flush(tty);
-
        firm_report_tx_done(port);
-
        firm_close(port);
 
        /* shutdown our bulk reads and writes */
@@ -775,10 +749,7 @@ static void whiteheat_close(struct tty_struct *tty,
        }
        spin_unlock_irq(&info->lock);
        mutex_unlock(&info->deathwarrant);
-
        stop_command_port(port->serial);
-
-       tty->closing = 0;
 }
 
 
index 8ac9cdd..cab100a 100644 (file)
@@ -18,6 +18,16 @@ config XEN_SCRUB_PAGES
          secure, but slightly less efficient.
          If in doubt, say yes.
 
+config XEN_DEV_EVTCHN
+       tristate "Xen /dev/xen/evtchn device"
+       depends on XEN
+       default y
+       help
+         The evtchn driver allows a userspace process to triger event
+         channels and to receive notification of an event channel
+         firing.
+         If in doubt, say yes.
+
 config XENFS
        tristate "Xen filesystem"
        depends on XEN
@@ -41,3 +51,13 @@ config XEN_COMPAT_XENFS
          a xen platform.
          If in doubt, say yes.
 
+config XEN_SYS_HYPERVISOR
+       bool "Create xen entries under /sys/hypervisor"
+       depends on XEN && SYSFS
+       select SYS_HYPERVISOR
+       default y
+       help
+         Create entries under /sys/hypervisor describing the Xen
+        hypervisor environment.  When running native or in another
+        virtual environment, /sys/hypervisor will still be present,
+        but will have no xen contents.
\ No newline at end of file
index ff8accc..ec2a39b 100644 (file)
@@ -4,4 +4,6 @@ obj-y   += xenbus/
 obj-$(CONFIG_HOTPLUG_CPU)      += cpu_hotplug.o
 obj-$(CONFIG_XEN_XENCOMM)      += xencomm.o
 obj-$(CONFIG_XEN_BALLOON)      += balloon.o
-obj-$(CONFIG_XENFS)            += xenfs/
\ No newline at end of file
+obj-$(CONFIG_XEN_DEV_EVTCHN)   += evtchn.o
+obj-$(CONFIG_XENFS)            += xenfs/
+obj-$(CONFIG_XEN_SYS_HYPERVISOR)       += sys-hypervisor.o
\ No newline at end of file
index 30963af..891d2e9 100644 (file)
@@ -151,6 +151,12 @@ static unsigned int evtchn_from_irq(unsigned irq)
        return info_for_irq(irq)->evtchn;
 }
 
+unsigned irq_from_evtchn(unsigned int evtchn)
+{
+       return evtchn_to_irq[evtchn];
+}
+EXPORT_SYMBOL_GPL(irq_from_evtchn);
+
 static enum ipi_vector ipi_from_irq(unsigned irq)
 {
        struct irq_info *info = info_for_irq(irq);
@@ -335,7 +341,7 @@ static int find_unbound_irq(void)
        if (irq == nr_irqs)
                panic("No available IRQ to bind to: increase nr_irqs!\n");
 
-       desc = irq_to_desc_alloc_cpu(irq, 0);
+       desc = irq_to_desc_alloc_node(irq, 0);
        if (WARN_ON(desc == NULL))
                return -1;
 
@@ -688,13 +694,13 @@ void rebind_evtchn_irq(int evtchn, int irq)
 }
 
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
-static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+static int rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
 {
        struct evtchn_bind_vcpu bind_vcpu;
        int evtchn = evtchn_from_irq(irq);
 
        if (!VALID_EVTCHN(evtchn))
-               return;
+               return -1;
 
        /* Send future instances of this interrupt to other vcpu. */
        bind_vcpu.port = evtchn;
@@ -707,13 +713,15 @@ static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
         */
        if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
                bind_evtchn_to_cpu(evtchn, tcpu);
-}
 
+       return 0;
+}
 
-static void set_affinity_irq(unsigned irq, const struct cpumask *dest)
+static int set_affinity_irq(unsigned irq, const struct cpumask *dest)
 {
        unsigned tcpu = cpumask_first(dest);
-       rebind_irq_to_cpu(irq, tcpu);
+
+       return rebind_irq_to_cpu(irq, tcpu);
 }
 
 int resend_irq_on_evtchn(unsigned int irq)
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
new file mode 100644 (file)
index 0000000..af03195
--- /dev/null
@@ -0,0 +1,507 @@
+/******************************************************************************
+ * evtchn.c
+ *
+ * Driver for receiving and demuxing event-channel signals.
+ *
+ * Copyright (c) 2004-2005, K A Fraser
+ * Multi-process extensions Copyright (c) 2004, Steven Smith
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/errno.h>
+#include <linux/miscdevice.h>
+#include <linux/major.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/poll.h>
+#include <linux/irq.h>
+#include <linux/init.h>
+#include <linux/gfp.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+#include <xen/events.h>
+#include <xen/evtchn.h>
+#include <asm/xen/hypervisor.h>
+
+struct per_user_data {
+       struct mutex bind_mutex; /* serialize bind/unbind operations */
+
+       /* Notification ring, accessed via /dev/xen/evtchn. */
+#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+       evtchn_port_t *ring;
+       unsigned int ring_cons, ring_prod, ring_overflow;
+       struct mutex ring_cons_mutex; /* protect against concurrent readers */
+
+       /* Processes wait on this queue when ring is empty. */
+       wait_queue_head_t evtchn_wait;
+       struct fasync_struct *evtchn_async_queue;
+       const char *name;
+};
+
+/* Who's bound to each port? */
+static struct per_user_data *port_user[NR_EVENT_CHANNELS];
+static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */
+
+irqreturn_t evtchn_interrupt(int irq, void *data)
+{
+       unsigned int port = (unsigned long)data;
+       struct per_user_data *u;
+
+       spin_lock(&port_user_lock);
+
+       u = port_user[port];
+
+       disable_irq_nosync(irq);
+
+       if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
+               u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port;
+               wmb(); /* Ensure ring contents visible */
+               if (u->ring_cons == u->ring_prod++) {
+                       wake_up_interruptible(&u->evtchn_wait);
+                       kill_fasync(&u->evtchn_async_queue,
+                                   SIGIO, POLL_IN);
+               }
+       } else {
+               u->ring_overflow = 1;
+       }
+
+       spin_unlock(&port_user_lock);
+
+       return IRQ_HANDLED;
+}
+
+static ssize_t evtchn_read(struct file *file, char __user *buf,
+                          size_t count, loff_t *ppos)
+{
+       int rc;
+       unsigned int c, p, bytes1 = 0, bytes2 = 0;
+       struct per_user_data *u = file->private_data;
+
+       /* Whole number of ports. */
+       count &= ~(sizeof(evtchn_port_t)-1);
+
+       if (count == 0)
+               return 0;
+
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
+
+       for (;;) {
+               mutex_lock(&u->ring_cons_mutex);
+
+               rc = -EFBIG;
+               if (u->ring_overflow)
+                       goto unlock_out;
+
+               c = u->ring_cons;
+               p = u->ring_prod;
+               if (c != p)
+                       break;
+
+               mutex_unlock(&u->ring_cons_mutex);
+
+               if (file->f_flags & O_NONBLOCK)
+                       return -EAGAIN;
+
+               rc = wait_event_interruptible(u->evtchn_wait,
+                                             u->ring_cons != u->ring_prod);
+               if (rc)
+                       return rc;
+       }
+
+       /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+       if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
+               bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+                       sizeof(evtchn_port_t);
+               bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
+       } else {
+               bytes1 = (p - c) * sizeof(evtchn_port_t);
+               bytes2 = 0;
+       }
+
+       /* Truncate chunks according to caller's maximum byte count. */
+       if (bytes1 > count) {
+               bytes1 = count;
+               bytes2 = 0;
+       } else if ((bytes1 + bytes2) > count) {
+               bytes2 = count - bytes1;
+       }
+
+       rc = -EFAULT;
+       rmb(); /* Ensure that we see the port before we copy it. */
+       if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+           ((bytes2 != 0) &&
+            copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
+               goto unlock_out;
+
+       u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+       rc = bytes1 + bytes2;
+
+ unlock_out:
+       mutex_unlock(&u->ring_cons_mutex);
+       return rc;
+}
+
+static ssize_t evtchn_write(struct file *file, const char __user *buf,
+                           size_t count, loff_t *ppos)
+{
+       int rc, i;
+       evtchn_port_t *kbuf = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+       struct per_user_data *u = file->private_data;
+
+       if (kbuf == NULL)
+               return -ENOMEM;
+
+       /* Whole number of ports. */
+       count &= ~(sizeof(evtchn_port_t)-1);
+
+       rc = 0;
+       if (count == 0)
+               goto out;
+
+       if (count > PAGE_SIZE)
+               count = PAGE_SIZE;
+
+       rc = -EFAULT;
+       if (copy_from_user(kbuf, buf, count) != 0)
+               goto out;
+
+       spin_lock_irq(&port_user_lock);
+       for (i = 0; i < (count/sizeof(evtchn_port_t)); i++)
+               if ((kbuf[i] < NR_EVENT_CHANNELS) && (port_user[kbuf[i]] == u))
+                       enable_irq(irq_from_evtchn(kbuf[i]));
+       spin_unlock_irq(&port_user_lock);
+
+       rc = count;
+
+ out:
+       free_page((unsigned long)kbuf);
+       return rc;
+}
+
+static int evtchn_bind_to_user(struct per_user_data *u, int port)
+{
+       int rc = 0;
+
+       /*
+        * Ports are never reused, so every caller should pass in a
+        * unique port.
+        *
+        * (Locking not necessary because we haven't registered the
+        * interrupt handler yet, and our caller has already
+        * serialized bind operations.)
+        */
+       BUG_ON(port_user[port] != NULL);
+       port_user[port] = u;
+
+       rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED,
+                                      u->name, (void *)(unsigned long)port);
+       if (rc >= 0)
+               rc = 0;
+
+       return rc;
+}
+
+static void evtchn_unbind_from_user(struct per_user_data *u, int port)
+{
+       int irq = irq_from_evtchn(port);
+
+       unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+
+       /* make sure we unbind the irq handler before clearing the port */
+       barrier();
+
+       port_user[port] = NULL;
+}
+
+static long evtchn_ioctl(struct file *file,
+                        unsigned int cmd, unsigned long arg)
+{
+       int rc;
+       struct per_user_data *u = file->private_data;
+       void __user *uarg = (void __user *) arg;
+
+       /* Prevent bind from racing with unbind */
+       mutex_lock(&u->bind_mutex);
+
+       switch (cmd) {
+       case IOCTL_EVTCHN_BIND_VIRQ: {
+               struct ioctl_evtchn_bind_virq bind;
+               struct evtchn_bind_virq bind_virq;
+
+               rc = -EFAULT;
+               if (copy_from_user(&bind, uarg, sizeof(bind)))
+                       break;
+
+               bind_virq.virq = bind.virq;
+               bind_virq.vcpu = 0;
+               rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+                                                &bind_virq);
+               if (rc != 0)
+                       break;
+
+               rc = evtchn_bind_to_user(u, bind_virq.port);
+               if (rc == 0)
+                       rc = bind_virq.port;
+               break;
+       }
+
+       case IOCTL_EVTCHN_BIND_INTERDOMAIN: {
+               struct ioctl_evtchn_bind_interdomain bind;
+               struct evtchn_bind_interdomain bind_interdomain;
+
+               rc = -EFAULT;
+               if (copy_from_user(&bind, uarg, sizeof(bind)))
+                       break;
+
+               bind_interdomain.remote_dom  = bind.remote_domain;
+               bind_interdomain.remote_port = bind.remote_port;
+               rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+                                                &bind_interdomain);
+               if (rc != 0)
+                       break;
+
+               rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
+               if (rc == 0)
+                       rc = bind_interdomain.local_port;
+               break;
+       }
+
+       case IOCTL_EVTCHN_BIND_UNBOUND_PORT: {
+               struct ioctl_evtchn_bind_unbound_port bind;
+               struct evtchn_alloc_unbound alloc_unbound;
+
+               rc = -EFAULT;
+               if (copy_from_user(&bind, uarg, sizeof(bind)))
+                       break;
+
+               alloc_unbound.dom        = DOMID_SELF;
+               alloc_unbound.remote_dom = bind.remote_domain;
+               rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+                                                &alloc_unbound);
+               if (rc != 0)
+                       break;
+
+               rc = evtchn_bind_to_user(u, alloc_unbound.port);
+               if (rc == 0)
+                       rc = alloc_unbound.port;
+               break;
+       }
+
+       case IOCTL_EVTCHN_UNBIND: {
+               struct ioctl_evtchn_unbind unbind;
+
+               rc = -EFAULT;
+               if (copy_from_user(&unbind, uarg, sizeof(unbind)))
+                       break;
+
+               rc = -EINVAL;
+               if (unbind.port >= NR_EVENT_CHANNELS)
+                       break;
+
+               spin_lock_irq(&port_user_lock);
+
+               rc = -ENOTCONN;
+               if (port_user[unbind.port] != u) {
+                       spin_unlock_irq(&port_user_lock);
+                       break;
+               }
+
+               evtchn_unbind_from_user(u, unbind.port);
+
+               spin_unlock_irq(&port_user_lock);
+
+               rc = 0;
+               break;
+       }
+
+       case IOCTL_EVTCHN_NOTIFY: {
+               struct ioctl_evtchn_notify notify;
+
+               rc = -EFAULT;
+               if (copy_from_user(&notify, uarg, sizeof(notify)))
+                       break;
+
+               if (notify.port >= NR_EVENT_CHANNELS) {
+                       rc = -EINVAL;
+               } else if (port_user[notify.port] != u) {
+                       rc = -ENOTCONN;
+               } else {
+                       notify_remote_via_evtchn(notify.port);
+                       rc = 0;
+               }
+               break;
+       }
+
+       case IOCTL_EVTCHN_RESET: {
+               /* Initialise the ring to empty. Clear errors. */
+               mutex_lock(&u->ring_cons_mutex);
+               spin_lock_irq(&port_user_lock);
+               u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+               spin_unlock_irq(&port_user_lock);
+               mutex_unlock(&u->ring_cons_mutex);
+               rc = 0;
+               break;
+       }
+
+       default:
+               rc = -ENOSYS;
+               break;
+       }
+       mutex_unlock(&u->bind_mutex);
+
+       return rc;
+}
+
+static unsigned int evtchn_poll(struct file *file, poll_table *wait)
+{
+       unsigned int mask = POLLOUT | POLLWRNORM;
+       struct per_user_data *u = file->private_data;
+
+       poll_wait(file, &u->evtchn_wait, wait);
+       if (u->ring_cons != u->ring_prod)
+               mask |= POLLIN | POLLRDNORM;
+       if (u->ring_overflow)
+               mask = POLLERR;
+       return mask;
+}
+
+static int evtchn_fasync(int fd, struct file *filp, int on)
+{
+       struct per_user_data *u = filp->private_data;
+       return fasync_helper(fd, filp, on, &u->evtchn_async_queue);
+}
+
+static int evtchn_open(struct inode *inode, struct file *filp)
+{
+       struct per_user_data *u;
+
+       u = kzalloc(sizeof(*u), GFP_KERNEL);
+       if (u == NULL)
+               return -ENOMEM;
+
+       u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
+       if (u->name == NULL) {
+               kfree(u);
+               return -ENOMEM;
+       }
+
+       init_waitqueue_head(&u->evtchn_wait);
+
+       u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
+       if (u->ring == NULL) {
+               kfree(u->name);
+               kfree(u);
+               return -ENOMEM;
+       }
+
+       mutex_init(&u->bind_mutex);
+       mutex_init(&u->ring_cons_mutex);
+
+       filp->private_data = u;
+
+       return 0;
+}
+
+static int evtchn_release(struct inode *inode, struct file *filp)
+{
+       int i;
+       struct per_user_data *u = filp->private_data;
+
+       spin_lock_irq(&port_user_lock);
+
+       free_page((unsigned long)u->ring);
+
+       for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+               if (port_user[i] != u)
+                       continue;
+
+               evtchn_unbind_from_user(port_user[i], i);
+       }
+
+       spin_unlock_irq(&port_user_lock);
+
+       kfree(u->name);
+       kfree(u);
+
+       return 0;
+}
+
+static const struct file_operations evtchn_fops = {
+       .owner   = THIS_MODULE,
+       .read    = evtchn_read,
+       .write   = evtchn_write,
+       .unlocked_ioctl = evtchn_ioctl,
+       .poll    = evtchn_poll,
+       .fasync  = evtchn_fasync,
+       .open    = evtchn_open,
+       .release = evtchn_release,
+};
+
+static struct miscdevice evtchn_miscdev = {
+       .minor        = MISC_DYNAMIC_MINOR,
+       .name         = "evtchn",
+       .fops         = &evtchn_fops,
+};
+static int __init evtchn_init(void)
+{
+       int err;
+
+       if (!xen_domain())
+               return -ENODEV;
+
+       spin_lock_init(&port_user_lock);
+       memset(port_user, 0, sizeof(port_user));
+
+       /* Create '/dev/misc/evtchn'. */
+       err = misc_register(&evtchn_miscdev);
+       if (err != 0) {
+               printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+               return err;
+       }
+
+       printk(KERN_INFO "Event-channel device installed.\n");
+
+       return 0;
+}
+
+static void __exit evtchn_cleanup(void)
+{
+       misc_deregister(&evtchn_miscdev);
+}
+
+module_init(evtchn_init);
+module_exit(evtchn_cleanup);
+
+MODULE_LICENSE("GPL");
index 4b5b848..fddc202 100644 (file)
@@ -98,9 +98,8 @@ static void do_suspend(void)
                goto out;
        }
 
-       printk("suspending xenbus...\n");
-       /* XXX use normal device tree? */
-       xenbus_suspend();
+       printk(KERN_DEBUG "suspending xenstore...\n");
+       xs_suspend();
 
        err = device_power_down(PMSG_SUSPEND);
        if (err) {
@@ -116,9 +115,9 @@ static void do_suspend(void)
 
        if (!cancelled) {
                xen_arch_resume();
-               xenbus_resume();
+               xs_resume();
        } else
-               xenbus_suspend_cancel();
+               xs_suspend_cancel();
 
        device_power_up(PMSG_RESUME);
 
diff --git a/drivers/xen/sys-hypervisor.c b/drivers/xen/sys-hypervisor.c
new file mode 100644 (file)
index 0000000..88a60e0
--- /dev/null
@@ -0,0 +1,445 @@
+/*
+ *  copyright (c) 2006 IBM Corporation
+ *  Authored by: Mike D. Day <ncmike@us.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 as
+ *  published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/kobject.h>
+
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xenbus.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/version.h>
+
+#define HYPERVISOR_ATTR_RO(_name) \
+static struct hyp_sysfs_attr  _name##_attr = __ATTR_RO(_name)
+
+#define HYPERVISOR_ATTR_RW(_name) \
+static struct hyp_sysfs_attr _name##_attr = \
+       __ATTR(_name, 0644, _name##_show, _name##_store)
+
+struct hyp_sysfs_attr {
+       struct attribute attr;
+       ssize_t (*show)(struct hyp_sysfs_attr *, char *);
+       ssize_t (*store)(struct hyp_sysfs_attr *, const char *, size_t);
+       void *hyp_attr_data;
+};
+
+static ssize_t type_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       return sprintf(buffer, "xen\n");
+}
+
+HYPERVISOR_ATTR_RO(type);
+
+static int __init xen_sysfs_type_init(void)
+{
+       return sysfs_create_file(hypervisor_kobj, &type_attr.attr);
+}
+
+static void xen_sysfs_type_destroy(void)
+{
+       sysfs_remove_file(hypervisor_kobj, &type_attr.attr);
+}
+
+/* xen version attributes */
+static ssize_t major_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+       if (version)
+               return sprintf(buffer, "%d\n", version >> 16);
+       return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(major);
+
+static ssize_t minor_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int version = HYPERVISOR_xen_version(XENVER_version, NULL);
+       if (version)
+               return sprintf(buffer, "%d\n", version & 0xff);
+       return -ENODEV;
+}
+
+HYPERVISOR_ATTR_RO(minor);
+
+static ssize_t extra_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int ret = -ENOMEM;
+       char *extra;
+
+       extra = kmalloc(XEN_EXTRAVERSION_LEN, GFP_KERNEL);
+       if (extra) {
+               ret = HYPERVISOR_xen_version(XENVER_extraversion, extra);
+               if (!ret)
+                       ret = sprintf(buffer, "%s\n", extra);
+               kfree(extra);
+       }
+
+       return ret;
+}
+
+HYPERVISOR_ATTR_RO(extra);
+
+static struct attribute *version_attrs[] = {
+       &major_attr.attr,
+       &minor_attr.attr,
+       &extra_attr.attr,
+       NULL
+};
+
+static struct attribute_group version_group = {
+       .name = "version",
+       .attrs = version_attrs,
+};
+
+static int __init xen_sysfs_version_init(void)
+{
+       return sysfs_create_group(hypervisor_kobj, &version_group);
+}
+
+static void xen_sysfs_version_destroy(void)
+{
+       sysfs_remove_group(hypervisor_kobj, &version_group);
+}
+
+/* UUID */
+
+static ssize_t uuid_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       char *vm, *val;
+       int ret;
+       extern int xenstored_ready;
+
+       if (!xenstored_ready)
+               return -EBUSY;
+
+       vm = xenbus_read(XBT_NIL, "vm", "", NULL);
+       if (IS_ERR(vm))
+               return PTR_ERR(vm);
+       val = xenbus_read(XBT_NIL, vm, "uuid", NULL);
+       kfree(vm);
+       if (IS_ERR(val))
+               return PTR_ERR(val);
+       ret = sprintf(buffer, "%s\n", val);
+       kfree(val);
+       return ret;
+}
+
+HYPERVISOR_ATTR_RO(uuid);
+
+static int __init xen_sysfs_uuid_init(void)
+{
+       return sysfs_create_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+static void xen_sysfs_uuid_destroy(void)
+{
+       sysfs_remove_file(hypervisor_kobj, &uuid_attr.attr);
+}
+
+/* xen compilation attributes */
+
+static ssize_t compiler_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int ret = -ENOMEM;
+       struct xen_compile_info *info;
+
+       info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+       if (info) {
+               ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+               if (!ret)
+                       ret = sprintf(buffer, "%s\n", info->compiler);
+               kfree(info);
+       }
+
+       return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiler);
+
+static ssize_t compiled_by_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int ret = -ENOMEM;
+       struct xen_compile_info *info;
+
+       info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+       if (info) {
+               ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+               if (!ret)
+                       ret = sprintf(buffer, "%s\n", info->compile_by);
+               kfree(info);
+       }
+
+       return ret;
+}
+
+HYPERVISOR_ATTR_RO(compiled_by);
+
+static ssize_t compile_date_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int ret = -ENOMEM;
+       struct xen_compile_info *info;
+
+       info = kmalloc(sizeof(struct xen_compile_info), GFP_KERNEL);
+       if (info) {
+               ret = HYPERVISOR_xen_version(XENVER_compile_info, info);
+               if (!ret)
+                       ret = sprintf(buffer, "%s\n", info->compile_date);
+               kfree(info);
+       }
+
+       return ret;
+}
+
+HYPERVISOR_ATTR_RO(compile_date);
+
+static struct attribute *xen_compile_attrs[] = {
+       &compiler_attr.attr,
+       &compiled_by_attr.attr,
+       &compile_date_attr.attr,
+       NULL
+};
+
+static struct attribute_group xen_compilation_group = {
+       .name = "compilation",
+       .attrs = xen_compile_attrs,
+};
+
+int __init static xen_compilation_init(void)
+{
+       return sysfs_create_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+static void xen_compilation_destroy(void)
+{
+       sysfs_remove_group(hypervisor_kobj, &xen_compilation_group);
+}
+
+/* xen properties info */
+
+static ssize_t capabilities_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int ret = -ENOMEM;
+       char *caps;
+
+       caps = kmalloc(XEN_CAPABILITIES_INFO_LEN, GFP_KERNEL);
+       if (caps) {
+               ret = HYPERVISOR_xen_version(XENVER_capabilities, caps);
+               if (!ret)
+                       ret = sprintf(buffer, "%s\n", caps);
+               kfree(caps);
+       }
+
+       return ret;
+}
+
+HYPERVISOR_ATTR_RO(capabilities);
+
+static ssize_t changeset_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int ret = -ENOMEM;
+       char *cset;
+
+       cset = kmalloc(XEN_CHANGESET_INFO_LEN, GFP_KERNEL);
+       if (cset) {
+               ret = HYPERVISOR_xen_version(XENVER_changeset, cset);
+               if (!ret)
+                       ret = sprintf(buffer, "%s\n", cset);
+               kfree(cset);
+       }
+
+       return ret;
+}
+
+HYPERVISOR_ATTR_RO(changeset);
+
+static ssize_t virtual_start_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int ret = -ENOMEM;
+       struct xen_platform_parameters *parms;
+
+       parms = kmalloc(sizeof(struct xen_platform_parameters), GFP_KERNEL);
+       if (parms) {
+               ret = HYPERVISOR_xen_version(XENVER_platform_parameters,
+                                            parms);
+               if (!ret)
+                       ret = sprintf(buffer, "%lx\n", parms->virt_start);
+               kfree(parms);
+       }
+
+       return ret;
+}
+
+HYPERVISOR_ATTR_RO(virtual_start);
+
+static ssize_t pagesize_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       int ret;
+
+       ret = HYPERVISOR_xen_version(XENVER_pagesize, NULL);
+       if (ret > 0)
+               ret = sprintf(buffer, "%x\n", ret);
+
+       return ret;
+}
+
+HYPERVISOR_ATTR_RO(pagesize);
+
+static ssize_t xen_feature_show(int index, char *buffer)
+{
+       ssize_t ret;
+       struct xen_feature_info info;
+
+       info.submap_idx = index;
+       ret = HYPERVISOR_xen_version(XENVER_get_features, &info);
+       if (!ret)
+               ret = sprintf(buffer, "%08x", info.submap);
+
+       return ret;
+}
+
+static ssize_t features_show(struct hyp_sysfs_attr *attr, char *buffer)
+{
+       ssize_t len;
+       int i;
+
+       len = 0;
+       for (i = XENFEAT_NR_SUBMAPS-1; i >= 0; i--) {
+               int ret = xen_feature_show(i, buffer + len);
+               if (ret < 0) {
+                       if (len == 0)
+                               len = ret;
+                       break;
+               }
+               len += ret;
+       }
+       if (len > 0)
+               buffer[len++] = '\n';
+
+       return len;
+}
+
+HYPERVISOR_ATTR_RO(features);
+
+static struct attribute *xen_properties_attrs[] = {
+       &capabilities_attr.attr,
+       &changeset_attr.attr,
+       &virtual_start_attr.attr,
+       &pagesize_attr.attr,
+       &features_attr.attr,
+       NULL
+};
+
+static struct attribute_group xen_properties_group = {
+       .name = "properties",
+       .attrs = xen_properties_attrs,
+};
+
+static int __init xen_properties_init(void)
+{
+       return sysfs_create_group(hypervisor_kobj, &xen_properties_group);
+}
+
+static void xen_properties_destroy(void)
+{
+       sysfs_remove_group(hypervisor_kobj, &xen_properties_group);
+}
+
+static int __init hyper_sysfs_init(void)
+{
+       int ret;
+
+       if (!xen_domain())
+               return -ENODEV;
+
+       ret = xen_sysfs_type_init();
+       if (ret)
+               goto out;
+       ret = xen_sysfs_version_init();
+       if (ret)
+               goto version_out;
+       ret = xen_compilation_init();
+       if (ret)
+               goto comp_out;
+       ret = xen_sysfs_uuid_init();
+       if (ret)
+               goto uuid_out;
+       ret = xen_properties_init();
+       if (ret)
+               goto prop_out;
+
+       goto out;
+
+prop_out:
+       xen_sysfs_uuid_destroy();
+uuid_out:
+       xen_compilation_destroy();
+comp_out:
+       xen_sysfs_version_destroy();
+version_out:
+       xen_sysfs_type_destroy();
+out:
+       return ret;
+}
+
+static void __exit hyper_sysfs_exit(void)
+{
+       xen_properties_destroy();
+       xen_compilation_destroy();
+       xen_sysfs_uuid_destroy();
+       xen_sysfs_version_destroy();
+       xen_sysfs_type_destroy();
+
+}
+module_init(hyper_sysfs_init);
+module_exit(hyper_sysfs_exit);
+
+static ssize_t hyp_sysfs_show(struct kobject *kobj,
+                             struct attribute *attr,
+                             char *buffer)
+{
+       struct hyp_sysfs_attr *hyp_attr;
+       hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+       if (hyp_attr->show)
+               return hyp_attr->show(hyp_attr, buffer);
+       return 0;
+}
+
+static ssize_t hyp_sysfs_store(struct kobject *kobj,
+                              struct attribute *attr,
+                              const char *buffer,
+                              size_t len)
+{
+       struct hyp_sysfs_attr *hyp_attr;
+       hyp_attr = container_of(attr, struct hyp_sysfs_attr, attr);
+       if (hyp_attr->store)
+               return hyp_attr->store(hyp_attr, buffer, len);
+       return 0;
+}
+
+static struct sysfs_ops hyp_sysfs_ops = {
+       .show = hyp_sysfs_show,
+       .store = hyp_sysfs_store,
+};
+
+static struct kobj_type hyp_sysfs_kobj_type = {
+       .sysfs_ops = &hyp_sysfs_ops,
+};
+
+static int __init hypervisor_subsys_init(void)
+{
+       if (!xen_domain())
+               return -ENODEV;
+
+       hypervisor_kobj->ktype = &hyp_sysfs_kobj_type;
+       return 0;
+}
+device_initcall(hypervisor_subsys_init);
index 773d1cf..d42e25d 100644 (file)
@@ -71,6 +71,9 @@ static int xenbus_probe_frontend(const char *type, const char *name);
 
 static void xenbus_dev_shutdown(struct device *_dev);
 
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+static int xenbus_dev_resume(struct device *dev);
+
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -188,6 +191,9 @@ static struct xen_bus_type xenbus_frontend = {
                .remove    = xenbus_dev_remove,
                .shutdown  = xenbus_dev_shutdown,
                .dev_attrs = xenbus_dev_attrs,
+
+               .suspend   = xenbus_dev_suspend,
+               .resume    = xenbus_dev_resume,
        },
 };
 
@@ -654,6 +660,7 @@ void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
 
        kfree(root);
 }
+EXPORT_SYMBOL_GPL(xenbus_dev_changed);
 
 static void frontend_changed(struct xenbus_watch *watch,
                             const char **vec, unsigned int len)
@@ -669,7 +676,7 @@ static struct xenbus_watch fe_watch = {
        .callback = frontend_changed,
 };
 
-static int suspend_dev(struct device *dev, void *data)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
 {
        int err = 0;
        struct xenbus_driver *drv;
@@ -682,35 +689,14 @@ static int suspend_dev(struct device *dev, void *data)
        drv = to_xenbus_driver(dev->driver);
        xdev = container_of(dev, struct xenbus_device, dev);
        if (drv->suspend)
-               err = drv->suspend(xdev);
+               err = drv->suspend(xdev, state);
        if (err)
                printk(KERN_WARNING
                       "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
        return 0;
 }
 
-static int suspend_cancel_dev(struct device *dev, void *data)
-{
-       int err = 0;
-       struct xenbus_driver *drv;
-       struct xenbus_device *xdev;
-
-       DPRINTK("");
-
-       if (dev->driver == NULL)
-               return 0;
-       drv = to_xenbus_driver(dev->driver);
-       xdev = container_of(dev, struct xenbus_device, dev);
-       if (drv->suspend_cancel)
-               err = drv->suspend_cancel(xdev);
-       if (err)
-               printk(KERN_WARNING
-                      "xenbus: suspend_cancel %s failed: %i\n",
-                      dev_name(dev), err);
-       return 0;
-}
-
-static int resume_dev(struct device *dev, void *data)
+static int xenbus_dev_resume(struct device *dev)
 {
        int err;
        struct xenbus_driver *drv;
@@ -755,33 +741,6 @@ static int resume_dev(struct device *dev, void *data)
        return 0;
 }
 
-void xenbus_suspend(void)
-{
-       DPRINTK("");
-
-       bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
-       xenbus_backend_suspend(suspend_dev);
-       xs_suspend();
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend);
-
-void xenbus_resume(void)
-{
-       xb_init_comms();
-       xs_resume();
-       bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
-       xenbus_backend_resume(resume_dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_resume);
-
-void xenbus_suspend_cancel(void)
-{
-       xs_suspend_cancel();
-       bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
-       xenbus_backend_resume(suspend_cancel_dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
-
 /* A flag to determine if xenstored is 'ready' (i.e. has started) */
 int xenstored_ready = 0;
 
index e325eab..eab33f1 100644 (file)
@@ -673,6 +673,8 @@ void xs_resume(void)
        struct xenbus_watch *watch;
        char token[sizeof(watch) * 2 + 1];
 
+       xb_init_comms();
+
        mutex_unlock(&xs_state.response_mutex);
        mutex_unlock(&xs_state.request_mutex);
        up_write(&xs_state.transaction_mutex);
index 515741a..6559e0c 100644 (file)
 MODULE_DESCRIPTION("Xen filesystem");
 MODULE_LICENSE("GPL");
 
+static ssize_t capabilities_read(struct file *file, char __user *buf,
+                                size_t size, loff_t *off)
+{
+       char *tmp = "";
+
+       if (xen_initial_domain())
+               tmp = "control_d\n";
+
+       return simple_read_from_buffer(buf, size, off, tmp, strlen(tmp));
+}
+
+static const struct file_operations capabilities_file_ops = {
+       .read = capabilities_read,
+};
+
 static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
 {
        static struct tree_descr xenfs_files[] = {
-               [2] = {"xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR},
+               [1] = {},
+               { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
+               { "capabilities", &capabilities_file_ops, S_IRUGO },
                {""},
        };
 
index eeb2468..2341375 100644 (file)
@@ -297,20 +297,14 @@ static int validate_request(struct autofs_wait_queue **wait,
         */
        if (notify == NFY_MOUNT) {
                /*
-                * If the dentry isn't hashed just go ahead and try the
-                * mount again with a new wait (not much else we can do).
-               */
-               if (!d_unhashed(dentry)) {
-                       /*
-                        * But if the dentry is hashed, that means that we
-                        * got here through the revalidate path.  Thus, we
-                        * need to check if the dentry has been mounted
-                        * while we waited on the wq_mutex. If it has,
-                        * simply return success.
-                        */
-                       if (d_mountpoint(dentry))
-                               return 0;
-               }
+                * If the dentry was successfully mounted while we slept
+                * on the wait queue mutex we can return success. If it
+                * isn't mounted (doesn't have submounts for the case of
+                * a multi-mount with no mount at it's base) we can
+                * continue on and create a new request.
+                */
+               if (have_submounts(dentry))
+                       return 0;
        }
 
        return 1;
index 9871164..740699c 100644 (file)
--- a/fs/bio.c
+++ b/fs/bio.c
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <scsi/sg.h>           /* for struct sg_iovec */
 
-DEFINE_TRACE(block_split);
+#include <trace/events/block.h>
 
 /*
  * Test patch to inline a certain number of bi_io_vec's inside the bio
index 3e2c7c7..35af933 100644 (file)
@@ -2622,7 +2622,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
                                                       search_start);
                if (block_group && block_group_bits(block_group, data)) {
                        down_read(&space_info->groups_sem);
-                       goto have_block_group;
+                       if (list_empty(&block_group->list) ||
+                           block_group->ro) {
+                               /*
+                                * someone is removing this block group,
+                                * we can't jump into the have_block_group
+                                * target because our list pointers are not
+                                * valid
+                                */
+                               btrfs_put_block_group(block_group);
+                               up_read(&space_info->groups_sem);
+                       } else
+                               goto have_block_group;
                } else if (block_group) {
                        btrfs_put_block_group(block_group);
                }
@@ -2656,6 +2667,13 @@ have_block_group:
                         * people trying to start a new cluster
                         */
                        spin_lock(&last_ptr->refill_lock);
+                       if (last_ptr->block_group &&
+                           (last_ptr->block_group->ro ||
+                           !block_group_bits(last_ptr->block_group, data))) {
+                               offset = 0;
+                               goto refill_cluster;
+                       }
+
                        offset = btrfs_alloc_from_cluster(block_group, last_ptr,
                                                 num_bytes, search_start);
                        if (offset) {
@@ -2681,10 +2699,17 @@ have_block_group:
 
                                last_ptr_loop = 1;
                                search_start = block_group->key.objectid;
+                               /*
+                                * we know this block group is properly
+                                * in the list because
+                                * btrfs_remove_block_group, drops the
+                                * cluster before it removes the block
+                                * group from the list
+                                */
                                goto have_block_group;
                        }
                        spin_unlock(&last_ptr->lock);
-
+refill_cluster:
                        /*
                         * this cluster didn't work out, free it and
                         * start over
@@ -5968,6 +5993,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 {
        struct btrfs_path *path;
        struct btrfs_block_group_cache *block_group;
+       struct btrfs_free_cluster *cluster;
        struct btrfs_key key;
        int ret;
 
@@ -5979,6 +6005,21 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 
        memcpy(&key, &block_group->key, sizeof(key));
 
+       /* make sure this block group isn't part of an allocation cluster */
+       cluster = &root->fs_info->data_alloc_cluster;
+       spin_lock(&cluster->refill_lock);
+       btrfs_return_cluster_to_free_space(block_group, cluster);
+       spin_unlock(&cluster->refill_lock);
+
+       /*
+        * make sure this block group isn't part of a metadata
+        * allocation cluster
+        */
+       cluster = &root->fs_info->meta_alloc_cluster;
+       spin_lock(&cluster->refill_lock);
+       btrfs_return_cluster_to_free_space(block_group, cluster);
+       spin_unlock(&cluster->refill_lock);
+
        path = btrfs_alloc_path();
        BUG_ON(!path);
 
@@ -5988,7 +6029,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
        spin_unlock(&root->fs_info->block_group_cache_lock);
        btrfs_remove_free_space_cache(block_group);
        down_write(&block_group->space_info->groups_sem);
-       list_del(&block_group->list);
+       /*
+        * we must use list_del_init so people can check to see if they
+        * are still on the list after taking the semaphore
+        */
+       list_del_init(&block_group->list);
        up_write(&block_group->space_info->groups_sem);
 
        spin_lock(&block_group->space_info->lock);
index 5f01dad..a6d35b0 100644 (file)
@@ -1440,6 +1440,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        device->io_align = root->sectorsize;
        device->sector_size = root->sectorsize;
        device->total_bytes = i_size_read(bdev->bd_inode);
+       device->disk_total_bytes = device->total_bytes;
        device->dev_root = root->fs_info->dev_root;
        device->bdev = bdev;
        device->in_fs_metadata = 1;
index ad01129..1864d0b 100644 (file)
@@ -2736,6 +2736,8 @@ has_buffers:
                pos += blocksize;
        }
 
+       map_bh.b_size = blocksize;
+       map_bh.b_state = 0;
        err = get_block(inode, iblock, &map_bh, 0);
        if (err)
                goto unlock;
index c68edb9..9b1d285 100644 (file)
@@ -557,8 +557,10 @@ static int __init init_devpts_fs(void)
        int err = register_filesystem(&devpts_fs_type);
        if (!err) {
                devpts_mnt = kern_mount(&devpts_fs_type);
-               if (IS_ERR(devpts_mnt))
+               if (IS_ERR(devpts_mnt)) {
                        err = PTR_ERR(devpts_mnt);
+                       unregister_filesystem(&devpts_fs_type);
+               }
        }
        return err;
 }
index 0571983..bca0c61 100644 (file)
@@ -219,6 +219,7 @@ static struct inode *alloc_inode(struct super_block *sb)
 void destroy_inode(struct inode *inode)
 {
        BUG_ON(inode_has_buffers(inode));
+       ima_inode_free(inode);
        security_inode_free(inode);
        if (inode->i_sb->s_op->destroy_inode)
                inode->i_sb->s_op->destroy_inode(inode);
@@ -1053,13 +1054,22 @@ int insert_inode_locked(struct inode *inode)
        struct super_block *sb = inode->i_sb;
        ino_t ino = inode->i_ino;
        struct hlist_head *head = inode_hashtable + hash(sb, ino);
-       struct inode *old;
 
        inode->i_state |= I_LOCK|I_NEW;
        while (1) {
+               struct hlist_node *node;
+               struct inode *old = NULL;
                spin_lock(&inode_lock);
-               old = find_inode_fast(sb, head, ino);
-               if (likely(!old)) {
+               hlist_for_each_entry(old, node, head, i_hash) {
+                       if (old->i_ino != ino)
+                               continue;
+                       if (old->i_sb != sb)
+                               continue;
+                       if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+                               continue;
+                       break;
+               }
+               if (likely(!node)) {
                        hlist_add_head(&inode->i_hash, head);
                        spin_unlock(&inode_lock);
                        return 0;
@@ -1081,14 +1091,24 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 {
        struct super_block *sb = inode->i_sb;
        struct hlist_head *head = inode_hashtable + hash(sb, hashval);
-       struct inode *old;
 
        inode->i_state |= I_LOCK|I_NEW;
 
        while (1) {
+               struct hlist_node *node;
+               struct inode *old = NULL;
+
                spin_lock(&inode_lock);
-               old = find_inode(sb, head, test, data);
-               if (likely(!old)) {
+               hlist_for_each_entry(old, node, head, i_hash) {
+                       if (old->i_sb != sb)
+                               continue;
+                       if (!test(old, data))
+                               continue;
+                       if (old->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE))
+                               continue;
+                       break;
+               }
+               if (likely(!node)) {
                        hlist_add_head(&inode->i_hash, head);
                        spin_unlock(&inode_lock);
                        return 0;
index 06560c5..618e21c 100644 (file)
@@ -241,7 +241,7 @@ write_out_data:
                        spin_lock(&journal->j_list_lock);
                }
                /* Someone already cleaned up the buffer? */
-               if (!buffer_jbd(bh)
+               if (!buffer_jbd(bh) || bh2jh(bh) != jh
                        || jh->b_transaction != commit_transaction
                        || jh->b_jlist != BJ_SyncData) {
                        jbd_unlock_bh_state(bh);
@@ -478,7 +478,9 @@ void journal_commit_transaction(journal_t *journal)
                        spin_lock(&journal->j_list_lock);
                        continue;
                }
-               if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+               if (buffer_jbd(bh) && bh2jh(bh) == jh &&
+                   jh->b_transaction == commit_transaction &&
+                   jh->b_jlist == BJ_Locked) {
                        __journal_unfile_buffer(jh);
                        jbd_unlock_bh_state(bh);
                        journal_remove_journal_head(bh);
index 9bca39c..1afa4dd 100644 (file)
 
 static int loadavg_proc_show(struct seq_file *m, void *v)
 {
-       int a, b, c;
-       unsigned long seq;
+       unsigned long avnrun[3];
 
-       do {
-               seq = read_seqbegin(&xtime_lock);
-               a = avenrun[0] + (FIXED_1/200);
-               b = avenrun[1] + (FIXED_1/200);
-               c = avenrun[2] + (FIXED_1/200);
-       } while (read_seqretry(&xtime_lock, seq));
+       get_avenrun(avnrun, FIXED_1/200, 0);
 
-       seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
-               LOAD_INT(a), LOAD_FRAC(a),
-               LOAD_INT(b), LOAD_FRAC(b),
-               LOAD_INT(c), LOAD_FRAC(c),
+       seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
+               LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
+               LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
+               LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
                nr_running(), nr_threads,
                task_active_pid_ns(current)->last_pid);
        return 0;
index d8c3e3c..fe36acc 100644 (file)
@@ -8,3 +8,4 @@ header-y += mtd/
 header-y += rdma/
 header-y += video/
 header-y += drm/
+header-y += xen/
index 8e6d0ca..e410f60 100644 (file)
@@ -280,17 +280,18 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 #endif
 
 /*
- * A facility to provide batching of the reload of page tables with the
- * actual context switch code for paravirtualized guests.  By convention,
- * only one of the lazy modes (CPU, MMU) should be active at any given
- * time, entry should never be nested, and entry and exits should always
- * be paired.  This is for sanity of maintaining and reasoning about the
- * kernel code.
+ * A facility to provide batching of the reload of page tables and
+ * other process state with the actual context switch code for
+ * paravirtualized guests.  By convention, only one of the batched
+ * update (lazy) modes (CPU, MMU) should be active at any given time,
+ * entry should never be nested, and entry and exits should always be
+ * paired.  This is for sanity of maintaining and reasoning about the
+ * kernel code.  In this case, the exit (end of the context switch) is
+ * in architecture-specific code, and so doesn't need a generic
+ * definition.
  */
-#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
-#define arch_enter_lazy_cpu_mode()     do {} while (0)
-#define arch_leave_lazy_cpu_mode()     do {} while (0)
-#define arch_flush_lazy_cpu_mode()     do {} while (0)
+#ifndef __HAVE_ARCH_START_CONTEXT_SWITCH
+#define arch_start_context_switch(prev)        do {} while (0)
 #endif
 
 #ifndef __HAVE_PFNMAP_TRACKING
index 89853bc..f1736ca 100644 (file)
@@ -63,7 +63,7 @@
 #define BRANCH_PROFILE()
 #endif
 
-#ifdef CONFIG_EVENT_TRACER
+#ifdef CONFIG_EVENT_TRACING
 #define FTRACE_EVENTS()        VMLINUX_SYMBOL(__start_ftrace_events) = .;      \
                        *(_ftrace_events)                               \
                        VMLINUX_SYMBOL(__stop_ftrace_events) = .;
index 3c1924c..7300fb8 100644 (file)
@@ -471,6 +471,9 @@ struct drm_connector {
        u32 property_ids[DRM_CONNECTOR_MAX_PROPERTY];
        uint64_t property_values[DRM_CONNECTOR_MAX_PROPERTY];
 
+       /* requested DPMS state */
+       int dpms;
+
        void *helper_private;
 
        uint32_t encoder_ids[DRM_CONNECTOR_MAX_ENCODER];
index ec073d8..6769ff6 100644 (file)
@@ -99,6 +99,8 @@ extern bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
                                     struct drm_framebuffer *old_fb);
 extern bool drm_helper_crtc_in_use(struct drm_crtc *crtc);
 
+extern void drm_helper_connector_dpms(struct drm_connector *connector, int mode);
+
 extern int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
                                          struct drm_mode_fb_cmd *mode_cmd);
 
index 88be890..51b4b0a 100644 (file)
@@ -119,7 +119,7 @@ extern int pci_mmcfg_config_num;
 extern int sbf_port;
 extern unsigned long acpi_realmode_flags;
 
-int acpi_register_gsi (u32 gsi, int triggering, int polarity);
+int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity);
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 
 #ifdef CONFIG_X86_IO_APIC
index 48ee32a..64a982e 100644 (file)
 #define UART01x_FR_MODEM_ANY   (UART01x_FR_DCD|UART01x_FR_DSR|UART01x_FR_CTS)
 
 #ifndef __ASSEMBLY__
+struct amba_device; /* in uncompress this is included but amba/bus.h is not */
 struct amba_pl010_data {
        void (*set_mctrl)(struct amba_device *dev, void __iomem *base, unsigned int mctrl);
 };
index d960889..7e4350e 100644 (file)
@@ -116,9 +116,9 @@ struct blk_io_trace {
  * The remap event
  */
 struct blk_io_trace_remap {
-       __be32 device;
        __be32 device_from;
-       __be64 sector;
+       __be32 device_to;
+       __be64 sector_from;
 };
 
 enum {
@@ -165,8 +165,9 @@ struct blk_trace {
 
 extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *);
 extern void blk_trace_shutdown(struct request_queue *);
-extern int do_blk_trace_setup(struct request_queue *q,
-       char *name, dev_t dev, struct blk_user_trace_setup *buts);
+extern int do_blk_trace_setup(struct request_queue *q, char *name,
+                             dev_t dev, struct block_device *bdev,
+                             struct blk_user_trace_setup *buts);
 extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 
 /**
@@ -193,22 +194,42 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...);
 extern void blk_add_driver_data(struct request_queue *q, struct request *rq,
                                void *data, size_t len);
 extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+                          struct block_device *bdev,
                           char __user *arg);
 extern int blk_trace_startstop(struct request_queue *q, int start);
 extern int blk_trace_remove(struct request_queue *q);
+extern int blk_trace_init_sysfs(struct device *dev);
 
 extern struct attribute_group blk_trace_attr_group;
 
 #else /* !CONFIG_BLK_DEV_IO_TRACE */
-#define blk_trace_ioctl(bdev, cmd, arg)                (-ENOTTY)
-#define blk_trace_shutdown(q)                  do { } while (0)
-#define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY)
-#define blk_add_driver_data(q, rq, data, len)  do {} while (0)
-#define blk_trace_setup(q, name, dev, arg)     (-ENOTTY)
-#define blk_trace_startstop(q, start)          (-ENOTTY)
-#define blk_trace_remove(q)                    (-ENOTTY)
-#define blk_add_trace_msg(q, fmt, ...)         do { } while (0)
+# define blk_trace_ioctl(bdev, cmd, arg)               (-ENOTTY)
+# define blk_trace_shutdown(q)                         do { } while (0)
+# define do_blk_trace_setup(q, name, dev, bdev, buts)  (-ENOTTY)
+# define blk_add_driver_data(q, rq, data, len)         do {} while (0)
+# define blk_trace_setup(q, name, dev, bdev, arg)      (-ENOTTY)
+# define blk_trace_startstop(q, start)                 (-ENOTTY)
+# define blk_trace_remove(q)                           (-ENOTTY)
+# define blk_add_trace_msg(q, fmt, ...)                        do { } while (0)
+static inline int blk_trace_init_sysfs(struct device *dev)
+{
+       return 0;
+}
 
 #endif /* CONFIG_BLK_DEV_IO_TRACE */
+
+#if defined(CONFIG_EVENT_TRACING) && defined(CONFIG_BLOCK)
+
+static inline int blk_cmd_buf_len(struct request *rq)
+{
+       return blk_pc_request(rq) ? rq->cmd_len * 3 : 1;
+}
+
+extern void blk_dump_cmd(char *buf, struct request *rq);
+extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes);
+extern void blk_fill_rwbs_rq(char *rwbs, struct request *rq);
+
+#endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */
+
 #endif /* __KERNEL__ */
 #endif
index f2ded21..af931ee 100644 (file)
@@ -222,6 +222,8 @@ int copy_siginfo_from_user32(siginfo_t *to, struct compat_siginfo __user *from);
 int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
 int get_compat_sigevent(struct sigevent *event,
                const struct compat_sigevent __user *u_event);
+long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+                                 struct compat_siginfo __user *uinfo);
 
 static inline int compat_timeval_compare(struct compat_timeval *lhs,
                                        struct compat_timeval *rhs)
index 9f31538..c5ac87c 100644 (file)
@@ -1022,6 +1022,8 @@ typedef struct cpumask *cpumask_var_t;
 
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
+bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
+bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
 void alloc_bootmem_cpumask_var(cpumask_var_t *mask);
 void free_cpumask_var(cpumask_var_t mask);
 void free_bootmem_cpumask_var(cpumask_var_t mask);
@@ -1040,6 +1042,19 @@ static inline bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
        return true;
 }
 
+static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+       cpumask_clear(*mask);
+       return true;
+}
+
+static inline bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags,
+                                         int node)
+{
+       cpumask_clear(*mask);
+       return true;
+}
+
 static inline void alloc_bootmem_cpumask_var(cpumask_var_t *mask)
 {
 }
index 788850b..1fbdea4 100644 (file)
@@ -142,19 +142,6 @@ struct CYZ_BOOT_CTRL {
 
 
 #ifndef DP_WINDOW_SIZE
-/* #include "cyclomz.h" */
-/****************** ****************** *******************/
-/*
- *     The data types defined below are used in all ZFIRM interface
- *     data structures. They accomodate differences between HW
- *     architectures and compilers.
- */
-
-typedef __u64  ucdouble;               /* 64 bits, unsigned */
-typedef __u32  uclong;                 /* 32 bits, unsigned */
-typedef __u16  ucshort;                /* 16 bits, unsigned */
-typedef __u8   ucchar;                 /* 8 bits, unsigned */
-
 /*
  *     Memory Window Sizes
  */
@@ -507,16 +494,20 @@ struct ZFW_CTRL {
 
 /* Per card data structure */
 struct cyclades_card {
-    void __iomem *base_addr;
-    void __iomem *ctl_addr;
-    int irq;
-    unsigned int num_chips;    /* 0 if card absent, -1 if Z/PCI, else Y */
-    unsigned int first_line;   /* minor number of first channel on card */
-    unsigned int nports;       /* Number of ports in the card */
-    int bus_index;             /* address shift - 0 for ISA, 1 for PCI */
-    int intr_enabled;          /* FW Interrupt flag - 0 disabled, 1 enabled */
-    spinlock_t card_lock;
-    struct cyclades_port *ports;
+       void __iomem *base_addr;
+       union {
+               void __iomem *p9050;
+               struct RUNTIME_9060 __iomem *p9060;
+       } ctl_addr;
+       int irq;
+       unsigned int num_chips; /* 0 if card absent, -1 if Z/PCI, else Y */
+       unsigned int first_line;        /* minor number of first channel on card */
+       unsigned int nports;    /* Number of ports in the card */
+       int bus_index;          /* address shift - 0 for ISA, 1 for PCI */
+       int intr_enabled;               /* FW Interrupt flag - 0 disabled, 1 enabled */
+       u32 hw_ver;
+       spinlock_t card_lock;
+       struct cyclades_port *ports;
 };
 
 /***************************************
index 28d53cb..171ad8a 100644 (file)
@@ -32,6 +32,8 @@ extern void dma_debug_add_bus(struct bus_type *bus);
 
 extern void dma_debug_init(u32 num_entries);
 
+extern int dma_debug_resize_entries(u32 num_entries);
+
 extern void debug_dma_map_page(struct device *dev, struct page *page,
                               size_t offset, size_t size,
                               int direction, dma_addr_t dma_addr,
@@ -91,6 +93,11 @@ static inline void dma_debug_init(u32 num_entries)
 {
 }
 
+static inline int dma_debug_resize_entries(u32 num_entries)
+{
+       return 0;
+}
+
 static inline void debug_dma_map_page(struct device *dev, struct page *page,
                                      size_t offset, size_t size,
                                      int direction, dma_addr_t dma_addr,
index e397dc3..10ff5c4 100644 (file)
@@ -108,6 +108,7 @@ struct irte {
 };
 #ifdef CONFIG_INTR_REMAP
 extern int intr_remapping_enabled;
+extern int intr_remapping_supported(void);
 extern int enable_intr_remapping(int);
 extern void disable_intr_remapping(void);
 extern int reenable_intr_remapping(int);
@@ -157,6 +158,8 @@ static inline struct intel_iommu *map_ioapic_to_ir(int apic)
 }
 #define irq_remapped(irq)              (0)
 #define enable_intr_remapping(mode)    (-1)
+#define disable_intr_remapping()       (0)
+#define reenable_intr_remapping(mode)  (0)
 #define intr_remapping_enabled         (0)
 #endif
 
index 8a0c2f2..39b95c5 100644 (file)
@@ -233,8 +233,6 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
 extern int skip_trace(unsigned long ip);
 
-extern void ftrace_release(void *start, unsigned long size);
-
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
 #else
@@ -325,13 +323,8 @@ static inline void __ftrace_enabled_restore(int enabled)
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 extern void ftrace_init(void);
-extern void ftrace_init_module(struct module *mod,
-                              unsigned long *start, unsigned long *end);
 #else
 static inline void ftrace_init(void) { }
-static inline void
-ftrace_init_module(struct module *mod,
-                  unsigned long *start, unsigned long *end) { }
 #endif
 
 /*
@@ -368,6 +361,7 @@ struct ftrace_ret_stack {
        unsigned long ret;
        unsigned long func;
        unsigned long long calltime;
+       unsigned long long subtime;
 };
 
 /*
@@ -379,8 +373,6 @@ extern void return_to_handler(void);
 
 extern int
 ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
-extern void
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret);
 
 /*
  * Sometimes we don't want to trace a function with the function
@@ -496,8 +488,15 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
 
 extern int ftrace_dump_on_oops;
 
+#ifdef CONFIG_PREEMPT
+#define INIT_TRACE_RECURSION           .trace_recursion = 0,
+#endif
+
 #endif /* CONFIG_TRACING */
 
+#ifndef INIT_TRACE_RECURSION
+#define INIT_TRACE_RECURSION
+#endif
 
 #ifdef CONFIG_HW_BRANCH_TRACER
 
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
new file mode 100644 (file)
index 0000000..5c093ff
--- /dev/null
@@ -0,0 +1,172 @@
+#ifndef _LINUX_FTRACE_EVENT_H
+#define _LINUX_FTRACE_EVENT_H
+
+#include <linux/trace_seq.h>
+#include <linux/ring_buffer.h>
+#include <linux/percpu.h>
+
+struct trace_array;
+struct tracer;
+struct dentry;
+
+DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
+
+struct trace_print_flags {
+       unsigned long           mask;
+       const char              *name;
+};
+
+const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+                                  unsigned long flags,
+                                  const struct trace_print_flags *flag_array);
+
+const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+                                    const struct trace_print_flags *symbol_array);
+
+/*
+ * The trace entry - the most basic unit of tracing. This is what
+ * is printed in the end as a single line in the trace output, such as:
+ *
+ *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
+ */
+struct trace_entry {
+       unsigned short          type;
+       unsigned char           flags;
+       unsigned char           preempt_count;
+       int                     pid;
+       int                     tgid;
+};
+
+#define FTRACE_MAX_EVENT                                               \
+       ((1 << (sizeof(((struct trace_entry *)0)->type) * 8)) - 1)
+
+/*
+ * Trace iterator - used by printout routines who present trace
+ * results to users and which routines might sleep, etc:
+ */
+struct trace_iterator {
+       struct trace_array      *tr;
+       struct tracer           *trace;
+       void                    *private;
+       int                     cpu_file;
+       struct mutex            mutex;
+       struct ring_buffer_iter *buffer_iter[NR_CPUS];
+       unsigned long           iter_flags;
+
+       /* The below is zeroed out in pipe_read */
+       struct trace_seq        seq;
+       struct trace_entry      *ent;
+       int                     cpu;
+       u64                     ts;
+
+       loff_t                  pos;
+       long                    idx;
+
+       cpumask_var_t           started;
+};
+
+
+typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
+                                             int flags);
+struct trace_event {
+       struct hlist_node       node;
+       struct list_head        list;
+       int                     type;
+       trace_print_func        trace;
+       trace_print_func        raw;
+       trace_print_func        hex;
+       trace_print_func        binary;
+};
+
+extern int register_ftrace_event(struct trace_event *event);
+extern int unregister_ftrace_event(struct trace_event *event);
+
+/* Return values for print_line callback */
+enum print_line_t {
+       TRACE_TYPE_PARTIAL_LINE = 0,    /* Retry after flushing the seq */
+       TRACE_TYPE_HANDLED      = 1,
+       TRACE_TYPE_UNHANDLED    = 2,    /* Relay to other output functions */
+       TRACE_TYPE_NO_CONSUME   = 3     /* Handled but ask to not consume */
+};
+
+
+struct ring_buffer_event *
+trace_current_buffer_lock_reserve(int type, unsigned long len,
+                                 unsigned long flags, int pc);
+void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
+                                       unsigned long flags, int pc);
+void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
+                                       unsigned long flags, int pc);
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event);
+
+void tracing_record_cmdline(struct task_struct *tsk);
+
+struct ftrace_event_call {
+       struct list_head        list;
+       char                    *name;
+       char                    *system;
+       struct dentry           *dir;
+       struct trace_event      *event;
+       int                     enabled;
+       int                     (*regfunc)(void);
+       void                    (*unregfunc)(void);
+       int                     id;
+       int                     (*raw_init)(void);
+       int                     (*show_format)(struct trace_seq *s);
+       int                     (*define_fields)(void);
+       struct list_head        fields;
+       int                     filter_active;
+       void                    *filter;
+       void                    *mod;
+
+#ifdef CONFIG_EVENT_PROFILE
+       atomic_t        profile_count;
+       int             (*profile_enable)(struct ftrace_event_call *);
+       void            (*profile_disable)(struct ftrace_event_call *);
+#endif
+};
+
+#define MAX_FILTER_PRED                32
+#define MAX_FILTER_STR_VAL     128
+
+extern int init_preds(struct ftrace_event_call *call);
+extern void destroy_preds(struct ftrace_event_call *call);
+extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
+extern int filter_current_check_discard(struct ftrace_event_call *call,
+                                       void *rec,
+                                       struct ring_buffer_event *event);
+
+extern int trace_define_field(struct ftrace_event_call *call, char *type,
+                             char *name, int offset, int size, int is_signed);
+
+#define is_signed_type(type)   (((type)(-1)) < 0)
+
+int trace_set_clr_event(const char *system, const char *event, int set);
+
+/*
+ * The double __builtin_constant_p is because gcc will give us an error
+ * if we try to allocate the static variable to fmt if it is not a
+ * constant. Even with the outer if statement optimizing out.
+ */
+#define event_trace_printk(ip, fmt, args...)                           \
+do {                                                                   \
+       __trace_printk_check_format(fmt, ##args);                       \
+       tracing_record_cmdline(current);                                \
+       if (__builtin_constant_p(fmt)) {                                \
+               static const char *trace_printk_fmt                     \
+                 __attribute__((section("__trace_printk_fmt"))) =      \
+                       __builtin_constant_p(fmt) ? fmt : NULL;         \
+                                                                       \
+               __trace_bprintk(ip, trace_printk_fmt, ##args);          \
+       } else                                                          \
+               __trace_printk(ip, fmt, ##args);                        \
+} while (0)
+
+#define __common_field(type, item, is_signed)                          \
+       ret = trace_define_field(event_call, #type, "common_" #item,    \
+                                offsetof(typeof(field.ent), item),     \
+                                sizeof(field.ent.item), is_signed);    \
+       if (ret)                                                        \
+               return ret;
+
+#endif /* _LINUX_FTRACE_EVENT_H */
index 3bf5bb5..34956c8 100644 (file)
@@ -23,6 +23,8 @@ union ktime;
 #define FUTEX_TRYLOCK_PI       8
 #define FUTEX_WAIT_BITSET      9
 #define FUTEX_WAKE_BITSET      10
+#define FUTEX_WAIT_REQUEUE_PI  11
+#define FUTEX_CMP_REQUEUE_PI   12
 
 #define FUTEX_PRIVATE_FLAG     128
 #define FUTEX_CLOCK_REALTIME   256
@@ -38,6 +40,10 @@ union ktime;
 #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAIT_BITSET_PRIVATE      (FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG)
 #define FUTEX_WAKE_BITSET_PRIVATE      (FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE  (FUTEX_WAIT_REQUEUE_PI | \
+                                        FUTEX_PRIVATE_FLAG)
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE   (FUTEX_CMP_REQUEUE_PI | \
+                                        FUTEX_PRIVATE_FLAG)
 
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
index 9fed365..867cb68 100644 (file)
@@ -26,6 +26,9 @@
 #include <asm/io.h>
 #include <asm/mutex.h>
 
+/* for request_sense */
+#include <linux/cdrom.h>
+
 #if defined(CONFIG_CRIS) || defined(CONFIG_FRV) || defined(CONFIG_MN10300)
 # define SUPPORT_VLB_SYNC 0
 #else
@@ -324,7 +327,6 @@ struct ide_cmd {
        unsigned int            cursg_ofs;
 
        struct request          *rq;            /* copy of request */
-       void                    *special;       /* valid_t generally */
 };
 
 /* ATAPI packet command flags */
@@ -360,11 +362,7 @@ struct ide_atapi_pc {
 
        /* data buffer */
        u8 *buf;
-       /* current buffer position */
-       u8 *cur_pos;
        int buf_size;
-       /* missing/available data on the current buffer */
-       int b_count;
 
        /* the corresponding request */
        struct request *rq;
@@ -377,10 +375,6 @@ struct ide_atapi_pc {
         */
        u8 pc_buf[IDE_PC_BUFFER_SIZE];
 
-       /* idetape only */
-       struct idetape_bh *bh;
-       char *b_data;
-
        unsigned long timeout;
 };
 
@@ -593,16 +587,16 @@ struct ide_drive_s {
        /* callback for packet commands */
        int  (*pc_callback)(struct ide_drive_s *, int);
 
-       void (*pc_update_buffers)(struct ide_drive_s *, struct ide_atapi_pc *);
-       int  (*pc_io_buffers)(struct ide_drive_s *, struct ide_atapi_pc *,
-                             unsigned int, int);
-
        ide_startstop_t (*irq_handler)(struct ide_drive_s *);
 
        unsigned long atapi_flags;
 
        struct ide_atapi_pc request_sense_pc;
-       struct request request_sense_rq;
+
+       /* current sense rq and buffer */
+       bool sense_rq_armed;
+       struct request sense_rq;
+       struct request_sense sense_data;
 };
 
 typedef struct ide_drive_s ide_drive_t;
@@ -1174,7 +1168,10 @@ int ide_do_test_unit_ready(ide_drive_t *, struct gendisk *);
 int ide_do_start_stop(ide_drive_t *, struct gendisk *, int);
 int ide_set_media_lock(ide_drive_t *, struct gendisk *, int);
 void ide_create_request_sense_cmd(ide_drive_t *, struct ide_atapi_pc *);
-void ide_retry_pc(ide_drive_t *, struct gendisk *);
+void ide_retry_pc(ide_drive_t *drive);
+
+void ide_prep_sense(ide_drive_t *drive, struct request *rq);
+int ide_queue_sense_rq(ide_drive_t *drive, void *special);
 
 int ide_cd_expiry(ide_drive_t *);
 
index d87247d..889bf99 100644 (file)
@@ -174,6 +174,7 @@ extern struct cred init_cred;
        INIT_TRACE_IRQFLAGS                                             \
        INIT_LOCKDEP                                                    \
        INIT_FTRACE_GRAPH                                               \
+       INIT_TRACE_RECURSION                                            \
 }
 
 
index 91bb76f..ff374ce 100644 (file)
@@ -566,6 +566,6 @@ struct irq_desc;
 extern int early_irq_init(void);
 extern int arch_probe_nr_irqs(void);
 extern int arch_early_irq_init(void);
-extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+extern int arch_init_chip_data(struct irq_desc *desc, int node);
 
 #endif
index b7cbeed..eedbb8e 100644 (file)
@@ -117,7 +117,7 @@ struct irq_chip {
        void            (*eoi)(unsigned int irq);
 
        void            (*end)(unsigned int irq);
-       void            (*set_affinity)(unsigned int irq,
+       int             (*set_affinity)(unsigned int irq,
                                        const struct cpumask *dest);
        int             (*retrigger)(unsigned int irq);
        int             (*set_type)(unsigned int irq, unsigned int flow_type);
@@ -187,7 +187,7 @@ struct irq_desc {
        spinlock_t              lock;
 #ifdef CONFIG_SMP
        cpumask_var_t           affinity;
-       unsigned int            cpu;
+       unsigned int            node;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
        cpumask_var_t           pending_mask;
 #endif
@@ -201,26 +201,23 @@ struct irq_desc {
 } ____cacheline_internodealigned_in_smp;
 
 extern void arch_init_copy_chip_data(struct irq_desc *old_desc,
-                                       struct irq_desc *desc, int cpu);
+                                       struct irq_desc *desc, int node);
 extern void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc);
 
 #ifndef CONFIG_SPARSE_IRQ
 extern struct irq_desc irq_desc[NR_IRQS];
-#else /* CONFIG_SPARSE_IRQ */
-extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
-#endif /* CONFIG_SPARSE_IRQ */
-
-extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+#endif
 
-static inline struct irq_desc *
-irq_remap_to_desc(unsigned int irq, struct irq_desc *desc)
-{
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-       return irq_to_desc(irq);
+#ifdef CONFIG_NUMA_IRQ_DESC
+extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int node);
 #else
+static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
+{
        return desc;
-#endif
 }
+#endif
+
+extern struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node);
 
 /*
  * Migration helpers for obsolete names, they will go away:
@@ -386,7 +383,7 @@ extern void set_irq_noprobe(unsigned int irq);
 extern void set_irq_probe(unsigned int irq);
 
 /* Handle dynamic irq creation and destruction */
-extern unsigned int create_irq_nr(unsigned int irq_want);
+extern unsigned int create_irq_nr(unsigned int irq_want, int node);
 extern int create_irq(void);
 extern void destroy_irq(unsigned int irq);
 
@@ -424,47 +421,48 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry);
 
 #ifdef CONFIG_SMP
 /**
- * init_alloc_desc_masks - allocate cpumasks for irq_desc
+ * alloc_desc_masks - allocate cpumasks for irq_desc
  * @desc:      pointer to irq_desc struct
  * @cpu:       cpu which will be handling the cpumasks
  * @boot:      true if need bootmem
  *
  * Allocates affinity and pending_mask cpumask if required.
  * Returns true if successful (or not required).
- * Side effect: affinity has all bits set, pending_mask has all bits clear.
  */
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
                                                                bool boot)
 {
-       int node;
-
+#ifdef CONFIG_CPUMASK_OFFSTACK
        if (boot) {
                alloc_bootmem_cpumask_var(&desc->affinity);
-               cpumask_setall(desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
                alloc_bootmem_cpumask_var(&desc->pending_mask);
-               cpumask_clear(desc->pending_mask);
 #endif
                return true;
        }
 
-       node = cpu_to_node(cpu);
-
        if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
                return false;
-       cpumask_setall(desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
        if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
                free_cpumask_var(desc->affinity);
                return false;
        }
-       cpumask_clear(desc->pending_mask);
+#endif
 #endif
        return true;
 }
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+       cpumask_setall(desc->affinity);
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+       cpumask_clear(desc->pending_mask);
+#endif
+}
+
 /**
  * init_copy_desc_masks - copy cpumasks for irq_desc
  * @old_desc:  pointer to old irq_desc struct
@@ -478,7 +476,7 @@ static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
 static inline void init_copy_desc_masks(struct irq_desc *old_desc,
                                        struct irq_desc *new_desc)
 {
-#ifdef CONFIG_CPUMASKS_OFFSTACK
+#ifdef CONFIG_CPUMASK_OFFSTACK
        cpumask_copy(new_desc->affinity, old_desc->affinity);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -499,12 +497,16 @@ static inline void free_desc_masks(struct irq_desc *old_desc,
 
 #else /* !CONFIG_SMP */
 
-static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
+static inline bool alloc_desc_masks(struct irq_desc *desc, int node,
                                                                bool boot)
 {
        return true;
 }
 
+static inline void init_desc_masks(struct irq_desc *desc)
+{
+}
+
 static inline void init_copy_desc_masks(struct irq_desc *old_desc,
                                        struct irq_desc *new_desc)
 {
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
new file mode 100644 (file)
index 0000000..b616d39
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <trace/events/kmem.h>
+
+#ifdef CONFIG_KMEMTRACE
+extern void kmemtrace_init(void);
+#else
+static inline void kmemtrace_init(void)
+{
+}
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
index bff1f0d..9772d6c 100644 (file)
@@ -19,6 +19,7 @@ struct anon_vma;
 struct file_ra_state;
 struct user_struct;
 struct writeback_control;
+struct rlimit;
 
 #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
@@ -1031,8 +1032,6 @@ extern void add_active_range(unsigned int nid, unsigned long start_pfn,
                                        unsigned long end_pfn);
 extern void remove_active_range(unsigned int nid, unsigned long start_pfn,
                                        unsigned long end_pfn);
-extern void push_node_boundaries(unsigned int nid, unsigned long start_pfn,
-                                       unsigned long end_pfn);
 extern void remove_all_active_ranges(void);
 extern unsigned long absent_pages_in_range(unsigned long start_pfn,
                                                unsigned long end_pfn);
@@ -1319,8 +1318,8 @@ int vmemmap_populate_basepages(struct page *start_page,
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 
-extern void *alloc_locked_buffer(size_t size);
-extern void free_locked_buffer(void *buffer, size_t size);
-extern void release_locked_buffer(void *buffer, size_t size);
+extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+                                size_t size);
+extern void refund_locked_memory(struct mm_struct *mm, size_t size);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
index 3d1b7bd..97491f7 100644 (file)
@@ -30,6 +30,8 @@ extern unsigned int kmmio_count;
 
 extern int register_kmmio_probe(struct kmmio_probe *p);
 extern void unregister_kmmio_probe(struct kmmio_probe *p);
+extern int kmmio_init(void);
+extern void kmmio_cleanup(void);
 
 #ifdef CONFIG_MMIOTRACE
 /* kmmio is active by some kmmio_probes? */
index 627ac08..a8f2c0a 100644 (file)
@@ -337,6 +337,14 @@ struct module
        const char **trace_bprintk_fmt_start;
        unsigned int num_trace_bprintk_fmt;
 #endif
+#ifdef CONFIG_EVENT_TRACING
+       struct ftrace_event_call *trace_events;
+       unsigned int num_trace_events;
+#endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+       unsigned long *ftrace_callsites;
+       unsigned int num_ftrace_callsites;
+#endif
 
 #ifdef CONFIG_MODULE_UNLOAD
        /* What modules depend on me? */
index 3069ec7..878cab4 100644 (file)
@@ -150,5 +150,6 @@ extern int __must_check mutex_lock_killable(struct mutex *lock);
  */
 extern int mutex_trylock(struct mutex *lock);
 extern void mutex_unlock(struct mutex *lock);
+extern int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
 
 #endif
index 0f71812..d7d1c41 100644 (file)
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_U       0xC118
 #define PCI_DEVICE_ID_OXSEMI_PCIe952_1_GU      0xC11C
 #define PCI_DEVICE_ID_OXSEMI_16PCI954  0x9501
+#define PCI_DEVICE_ID_OXSEMI_C950      0x950B
 #define PCI_DEVICE_ID_OXSEMI_16PCI95N  0x9511
 #define PCI_DEVICE_ID_OXSEMI_16PCI954PP        0x9513
 #define PCI_DEVICE_ID_OXSEMI_16PCI952  0x9521
 #define PCI_DEVICE_ID_OXSEMI_16PCI952PP        0x9523
+#define PCI_SUBDEVICE_ID_OXSEMI_C950   0x0001
 
 #define PCI_VENDOR_ID_CHELSIO          0x1425
 
index 67c1565..59e133d 100644 (file)
@@ -95,7 +95,6 @@ extern void __ptrace_link(struct task_struct *child,
                          struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
 extern void exit_ptrace(struct task_struct *tracer);
-extern void ptrace_fork(struct task_struct *task, unsigned long clone_flags);
 #define PTRACE_MODE_READ   1
 #define PTRACE_MODE_ATTACH 2
 /* Returns 0 on success, -errno on denial. */
@@ -327,15 +326,6 @@ static inline void user_enable_block_step(struct task_struct *task)
 #define arch_ptrace_untrace(task)              do { } while (0)
 #endif
 
-#ifndef arch_ptrace_fork
-/*
- * Do machine-specific work to initialize a new task.
- *
- * This is called from copy_process().
- */
-#define arch_ptrace_fork(child, clone_flags)   do { } while (0)
-#endif
-
 extern int task_current_syscall(struct task_struct *target, long *callno,
                                unsigned long args[6], unsigned int maxargs,
                                unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/rational.h b/include/linux/rational.h
new file mode 100644 (file)
index 0000000..4f532fc
--- /dev/null
@@ -0,0 +1,19 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers,
+ * e.g. when calculating optimum numerator/denominator pairs for
+ * pll configuration taking into account restricted register size
+ */
+
+#ifndef _LINUX_RATIONAL_H
+#define _LINUX_RATIONAL_H
+
+void rational_best_approximation(
+       unsigned long given_numerator, unsigned long given_denominator,
+       unsigned long max_numerator, unsigned long max_denominator,
+       unsigned long *best_numerator, unsigned long *best_denominator);
+
+#endif /* _LINUX_RATIONAL_H */
index e649bd3..5710f43 100644 (file)
@@ -198,6 +198,32 @@ static inline void list_splice_init_rcu(struct list_head *list,
        at->prev = last;
 }
 
+/**
+ * list_entry_rcu - get the struct for this entry
+ * @ptr:        the &struct list_head pointer.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_entry_rcu(ptr, type, member) \
+       container_of(rcu_dereference(ptr), type, member)
+
+/**
+ * list_first_entry_rcu - get the first element from a list
+ * @ptr:        the list head to take the element from.
+ * @type:       the type of the struct this is embedded in.
+ * @member:     the name of the list_struct within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ *
+ * This primitive may safely run concurrently with the _rcu list-mutation
+ * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
+ */
+#define list_first_entry_rcu(ptr, type, member) \
+       list_entry_rcu((ptr)->next, type, member)
+
 #define __list_for_each_rcu(pos, head) \
        for (pos = rcu_dereference((head)->next); \
                pos != (head); \
@@ -214,9 +240,9 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * as long as the traversal is guarded by rcu_read_lock().
  */
 #define list_for_each_entry_rcu(pos, head, member) \
-       for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
+       for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
                prefetch(pos->member.next), &pos->member != (head); \
-               pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
+               pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
 
 
 /**
index 58b2aa5..5a51538 100644 (file)
@@ -161,8 +161,15 @@ struct rcu_data {
        unsigned long offline_fqs;      /* Kicked due to being offline. */
        unsigned long resched_ipi;      /* Sent a resched IPI. */
 
-       /* 5) For future __rcu_pending statistics. */
+       /* 5) __rcu_pending() statistics. */
        long n_rcu_pending;             /* rcu_pending() calls since boot. */
+       long n_rp_qs_pending;
+       long n_rp_cb_ready;
+       long n_rp_cpu_needs_gp;
+       long n_rp_gp_completed;
+       long n_rp_gp_started;
+       long n_rp_need_fqs;
+       long n_rp_need_nothing;
 
        int cpu;
 };
index e1b7b21..8670f15 100644 (file)
@@ -11,7 +11,7 @@ struct ring_buffer_iter;
  * Don't refer to this struct directly, use functions below.
  */
 struct ring_buffer_event {
-       u32             type:2, len:3, time_delta:27;
+       u32             type_len:5, time_delta:27;
        u32             array[];
 };
 
@@ -24,7 +24,8 @@ struct ring_buffer_event {
  *                               size is variable depending on how much
  *                               padding is needed
  *                              If time_delta is non zero:
- *                               everything else same as RINGBUF_TYPE_DATA
+ *                               array[0] holds the actual length
+ *                               size = 4 + length (bytes)
  *
  * @RINGBUF_TYPE_TIME_EXTEND:  Extend the time delta
  *                              array[0] = time delta (28 .. 59)
@@ -35,22 +36,23 @@ struct ring_buffer_event {
  *                              array[1..2] = tv_sec
  *                              size = 16 bytes
  *
- * @RINGBUF_TYPE_DATA:         Data record
- *                              If len is zero:
+ * <= @RINGBUF_TYPE_DATA_TYPE_LEN_MAX:
+ *                             Data record
+ *                              If type_len is zero:
  *                               array[0] holds the actual length
  *                               array[1..(length+3)/4] holds data
- *                               size = 4 + 4 + length (bytes)
+ *                               size = 4 + length (bytes)
  *                              else
- *                               length = len << 2
+ *                               length = type_len << 2
  *                               array[0..(length+3)/4-1] holds data
  *                               size = 4 + length (bytes)
  */
 enum ring_buffer_type {
+       RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28,
        RINGBUF_TYPE_PADDING,
        RINGBUF_TYPE_TIME_EXTEND,
        /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */
        RINGBUF_TYPE_TIME_STAMP,
-       RINGBUF_TYPE_DATA,
 };
 
 unsigned ring_buffer_event_length(struct ring_buffer_event *event);
@@ -68,13 +70,54 @@ ring_buffer_event_time_delta(struct ring_buffer_event *event)
        return event->time_delta;
 }
 
+/*
+ * ring_buffer_event_discard can discard any event in the ring buffer.
+ *   it is up to the caller to protect against a reader from
+ *   consuming it or a writer from wrapping and replacing it.
+ *
+ * No external protection is needed if this is called before
+ * the event is commited. But in that case it would be better to
+ * use ring_buffer_discard_commit.
+ *
+ * Note, if an event that has not been committed is discarded
+ * with ring_buffer_event_discard, it must still be committed.
+ */
 void ring_buffer_event_discard(struct ring_buffer_event *event);
 
+/*
+ * ring_buffer_discard_commit will remove an event that has not
+ *   ben committed yet. If this is used, then ring_buffer_unlock_commit
+ *   must not be called on the discarded event. This function
+ *   will try to remove the event from the ring buffer completely
+ *   if another event has not been written after it.
+ *
+ * Example use:
+ *
+ *  if (some_condition)
+ *    ring_buffer_discard_commit(buffer, event);
+ *  else
+ *    ring_buffer_unlock_commit(buffer, event);
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+                               struct ring_buffer_event *event);
+
 /*
  * size is in bytes for each per CPU buffer.
  */
 struct ring_buffer *
-ring_buffer_alloc(unsigned long size, unsigned flags);
+__ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *key);
+
+/*
+ * Because the ring buffer is generic, if other users of the ring buffer get
+ * traced by ftrace, it can produce lockdep warnings. We need to keep each
+ * ring buffer's lock class separate.
+ */
+#define ring_buffer_alloc(size, flags)                 \
+({                                                     \
+       static struct lock_class_key __key;             \
+       __ring_buffer_alloc((size), (flags), &__key);   \
+})
+
 void ring_buffer_free(struct ring_buffer *buffer);
 
 int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
@@ -122,6 +165,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer);
 unsigned long ring_buffer_overruns(struct ring_buffer *buffer);
 unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
 unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu);
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu);
 
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
 void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
@@ -137,6 +182,11 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
 int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
                          size_t len, int cpu, int full);
 
+struct trace_seq;
+
+int ring_buffer_print_entry_header(struct trace_seq *s);
+int ring_buffer_print_page_header(struct trace_seq *s);
+
 enum ring_buffer_flags {
        RB_FL_OVERWRITE         = 1 << 0,
 };
index b4c38bc..d139966 100644 (file)
@@ -77,6 +77,7 @@ struct sched_param {
 #include <linux/proportions.h>
 #include <linux/seccomp.h>
 #include <linux/rcupdate.h>
+#include <linux/rculist.h>
 #include <linux/rtmutex.h>
 
 #include <linux/time.h>
@@ -96,8 +97,8 @@ struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
 struct bio;
-struct bts_tracer;
 struct fs_struct;
+struct bts_context;
 
 /*
  * List of flags we want to share for kernel threads,
@@ -116,6 +117,7 @@ struct fs_struct;
  *    11 bit fractions.
  */
 extern unsigned long avenrun[];                /* Load averages */
+extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
 
 #define FSHIFT         11              /* nr of bits of precision */
 #define FIXED_1                (1<<FSHIFT)     /* 1.0 as fixed-point */
@@ -135,8 +137,8 @@ DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
 extern unsigned long nr_uninterruptible(void);
-extern unsigned long nr_active(void);
 extern unsigned long nr_iowait(void);
+extern void calc_global_load(void);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
@@ -838,7 +840,17 @@ struct sched_group {
         */
        u32 reciprocal_cpu_power;
 
-       unsigned long cpumask[];
+       /*
+        * The CPUs this group covers.
+        *
+        * NOTE: this field is variable length. (Allocated dynamically
+        * by attaching extra space to the end of the structure,
+        * depending on how many CPUs the kernel has booted up with)
+        *
+        * It is also be embedded into static data structures at build
+        * time. (See 'struct static_sched_group' in kernel/sched.c)
+        */
+       unsigned long cpumask[0];
 };
 
 static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
@@ -924,8 +936,17 @@ struct sched_domain {
        char *name;
 #endif
 
-       /* span of all CPUs in this domain */
-       unsigned long span[];
+       /*
+        * Span of all CPUs in this domain.
+        *
+        * NOTE: this field is variable length. (Allocated dynamically
+        * by attaching extra space to the end of the structure,
+        * depending on how many CPUs the kernel has booted up with)
+        *
+        * It is also be embedded into static data structures at build
+        * time. (See 'struct static_sched_domain' in kernel/sched.c)
+        */
+       unsigned long span[0];
 };
 
 static inline struct cpumask *sched_domain_span(struct sched_domain *sd)
@@ -1209,18 +1230,11 @@ struct task_struct {
        struct list_head ptraced;
        struct list_head ptrace_entry;
 
-#ifdef CONFIG_X86_PTRACE_BTS
        /*
         * This is the tracer handle for the ptrace BTS extension.
         * This field actually belongs to the ptracer task.
         */
-       struct bts_tracer *bts;
-       /*
-        * The buffer to hold the BTS data.
-        */
-       void *bts_buffer;
-       size_t bts_size;
-#endif /* CONFIG_X86_PTRACE_BTS */
+       struct bts_context *bts;
 
        /* PID/PID hash table linkage. */
        struct pid_link pids[PIDTYPE_MAX];
@@ -1428,7 +1442,9 @@ struct task_struct {
 #ifdef CONFIG_TRACING
        /* state flags for use by tracers */
        unsigned long trace;
-#endif
+       /* bitmask of trace recursion */
+       unsigned long trace_recursion;
+#endif /* CONFIG_TRACING */
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
@@ -2001,8 +2017,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
+extern void wait_task_context_switch(struct task_struct *p);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
+static inline void wait_task_context_switch(struct task_struct *p) {}
 static inline unsigned long wait_task_inactive(struct task_struct *p,
                                               long match_state)
 {
@@ -2010,7 +2028,8 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
 }
 #endif
 
-#define next_task(p)   list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks)
+#define next_task(p) \
+       list_entry_rcu((p)->tasks.next, struct task_struct, tasks)
 
 #define for_each_process(p) \
        for (p = &init_task ; (p = next_task(p)) != &init_task ; )
@@ -2049,8 +2068,8 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2)
 
 static inline struct task_struct *next_thread(const struct task_struct *p)
 {
-       return list_entry(rcu_dereference(p->thread_group.next),
-                         struct task_struct, thread_group);
+       return list_entry_rcu(p->thread_group.next,
+                             struct task_struct, thread_group);
 }
 
 static inline int thread_group_empty(struct task_struct *p)
index 9136cc5..e5bb75a 100644 (file)
@@ -96,54 +96,76 @@ struct serial_uart_config {
 
 /*
  * Definitions for async_struct (and serial_struct) flags field
+ *
+ * Define ASYNCB_* for convenient use with {test,set,clear}_bit.
  */
-#define ASYNC_HUP_NOTIFY 0x0001 /* Notify getty on hangups and closes 
-                                  on the callout port */
-#define ASYNC_FOURPORT  0x0002 /* Set OU1, OUT2 per AST Fourport settings */
-#define ASYNC_SAK      0x0004  /* Secure Attention Key (Orange book) */
-#define ASYNC_SPLIT_TERMIOS 0x0008 /* Separate termios for dialin/callout */
-
-#define ASYNC_SPD_MASK 0x1030
-#define ASYNC_SPD_HI   0x0010  /* Use 56000 instead of 38400 bps */
-
-#define ASYNC_SPD_VHI  0x0020  /* Use 115200 instead of 38400 bps */
-#define ASYNC_SPD_CUST 0x0030  /* Use user-specified divisor */
-
-#define ASYNC_SKIP_TEST        0x0040 /* Skip UART test during autoconfiguration */
-#define ASYNC_AUTO_IRQ  0x0080 /* Do automatic IRQ during autoconfiguration */
-#define ASYNC_SESSION_LOCKOUT 0x0100 /* Lock out cua opens based on session */
-#define ASYNC_PGRP_LOCKOUT    0x0200 /* Lock out cua opens based on pgrp */
-#define ASYNC_CALLOUT_NOHUP   0x0400 /* Don't do hangups for cua device */
-
-#define ASYNC_HARDPPS_CD       0x0800  /* Call hardpps when CD goes high  */
-
-#define ASYNC_SPD_SHI  0x1000  /* Use 230400 instead of 38400 bps */
-#define ASYNC_SPD_WARP 0x1010  /* Use 460800 instead of 38400 bps */
-
-#define ASYNC_LOW_LATENCY 0x2000 /* Request low latency behaviour */
-
-#define ASYNC_BUGGY_UART  0x4000 /* This is a buggy UART, skip some safety
-                                 * checks.  Note: can be dangerous! */
-
-#define ASYNC_AUTOPROBE         0x8000 /* Port was autoprobed by PCI or PNP code */
-
-#define ASYNC_FLAGS    0x7FFF  /* Possible legal async flags */
-#define ASYNC_USR_MASK 0x3430  /* Legal flags that non-privileged
-                                * users can set or reset */
-
-/* Internal flags used only by kernel/chr_drv/serial.c */
-#define ASYNC_INITIALIZED      0x80000000 /* Serial port was initialized */
-#define ASYNC_NORMAL_ACTIVE    0x20000000 /* Normal device is active */
-#define ASYNC_BOOT_AUTOCONF    0x10000000 /* Autoconfigure port on bootup */
-#define ASYNC_CLOSING          0x08000000 /* Serial port is closing */
-#define ASYNC_CTS_FLOW         0x04000000 /* Do CTS flow control */
-#define ASYNC_CHECK_CD         0x02000000 /* i.e., CLOCAL */
-#define ASYNC_SHARE_IRQ                0x01000000 /* for multifunction cards
-                                            --- no longer used */
-#define ASYNC_CONS_FLOW                0x00800000 /* flow control for console  */
-
-#define ASYNC_BOOT_ONLYMCA     0x00400000 /* Probe only if MCA bus */
-#define ASYNC_INTERNAL_FLAGS   0xFFC00000 /* Internal flags */
+#define ASYNCB_HUP_NOTIFY       0 /* Notify getty on hangups and closes
+                                   * on the callout port */
+#define ASYNCB_FOURPORT                 1 /* Set OU1, OUT2 per AST Fourport settings */
+#define ASYNCB_SAK              2 /* Secure Attention Key (Orange book) */
+#define ASYNCB_SPLIT_TERMIOS    3 /* Separate termios for dialin/callout */
+#define ASYNCB_SPD_HI           4 /* Use 56000 instead of 38400 bps */
+#define ASYNCB_SPD_VHI          5 /* Use 115200 instead of 38400 bps */
+#define ASYNCB_SKIP_TEST        6 /* Skip UART test during autoconfiguration */
+#define ASYNCB_AUTO_IRQ                 7 /* Do automatic IRQ during
+                                   * autoconfiguration */
+#define ASYNCB_SESSION_LOCKOUT  8 /* Lock out cua opens based on session */
+#define ASYNCB_PGRP_LOCKOUT     9 /* Lock out cua opens based on pgrp */
+#define ASYNCB_CALLOUT_NOHUP   10 /* Don't do hangups for cua device */
+#define ASYNCB_HARDPPS_CD      11 /* Call hardpps when CD goes high  */
+#define ASYNCB_SPD_SHI         12 /* Use 230400 instead of 38400 bps */
+#define ASYNCB_LOW_LATENCY     13 /* Request low latency behaviour */
+#define ASYNCB_BUGGY_UART      14 /* This is a buggy UART, skip some safety
+                                   * checks.  Note: can be dangerous! */
+#define ASYNCB_AUTOPROBE       15 /* Port was autoprobed by PCI or PNP code */
+#define ASYNCB_LAST_USER       15
+
+/* Internal flags used only by kernel */
+#define ASYNCB_INITIALIZED     31 /* Serial port was initialized */
+#define ASYNCB_NORMAL_ACTIVE   29 /* Normal device is active */
+#define ASYNCB_BOOT_AUTOCONF   28 /* Autoconfigure port on bootup */
+#define ASYNCB_CLOSING         27 /* Serial port is closing */
+#define ASYNCB_CTS_FLOW                26 /* Do CTS flow control */
+#define ASYNCB_CHECK_CD                25 /* i.e., CLOCAL */
+#define ASYNCB_SHARE_IRQ       24 /* for multifunction cards, no longer used */
+#define ASYNCB_CONS_FLOW       23 /* flow control for console  */
+#define ASYNCB_BOOT_ONLYMCA    22 /* Probe only if MCA bus */
+#define ASYNCB_FIRST_KERNEL    22
+
+#define ASYNC_HUP_NOTIFY       (1U << ASYNCB_HUP_NOTIFY)
+#define ASYNC_FOURPORT         (1U << ASYNCB_FOURPORT)
+#define ASYNC_SAK              (1U << ASYNCB_SAK)
+#define ASYNC_SPLIT_TERMIOS    (1U << ASYNCB_SPLIT_TERMIOS)
+#define ASYNC_SPD_HI           (1U << ASYNCB_SPD_HI)
+#define ASYNC_SPD_VHI          (1U << ASYNCB_SPD_VHI)
+#define ASYNC_SKIP_TEST                (1U << ASYNCB_SKIP_TEST)
+#define ASYNC_AUTO_IRQ         (1U << ASYNCB_AUTO_IRQ)
+#define ASYNC_SESSION_LOCKOUT  (1U << ASYNCB_SESSION_LOCKOUT)
+#define ASYNC_PGRP_LOCKOUT     (1U << ASYNCB_PGRP_LOCKOUT)
+#define ASYNC_CALLOUT_NOHUP    (1U << ASYNCB_CALLOUT_NOHUP)
+#define ASYNC_HARDPPS_CD       (1U << ASYNCB_HARDPPS_CD)
+#define ASYNC_SPD_SHI          (1U << ASYNCB_SPD_SHI)
+#define ASYNC_LOW_LATENCY      (1U << ASYNCB_LOW_LATENCY)
+#define ASYNC_BUGGY_UART       (1U << ASYNCB_BUGGY_UART)
+#define ASYNC_AUTOPROBE                (1U << ASYNCB_AUTOPROBE)
+
+#define ASYNC_FLAGS            ((1U << ASYNCB_LAST_USER) - 1)
+#define ASYNC_USR_MASK         (ASYNC_SPD_HI|ASYNC_SPD_VHI| \
+               ASYNC_CALLOUT_NOHUP|ASYNC_SPD_SHI|ASYNC_LOW_LATENCY)
+#define ASYNC_SPD_CUST         (ASYNC_SPD_HI|ASYNC_SPD_VHI)
+#define ASYNC_SPD_WARP         (ASYNC_SPD_HI|ASYNC_SPD_SHI)
+#define ASYNC_SPD_MASK         (ASYNC_SPD_HI|ASYNC_SPD_VHI|ASYNC_SPD_SHI)
+
+#define ASYNC_INITIALIZED      (1U << ASYNCB_INITIALIZED)
+#define ASYNC_NORMAL_ACTIVE    (1U << ASYNCB_NORMAL_ACTIVE)
+#define ASYNC_BOOT_AUTOCONF    (1U << ASYNCB_BOOT_AUTOCONF)
+#define ASYNC_CLOSING          (1U << ASYNCB_CLOSING)
+#define ASYNC_CTS_FLOW         (1U << ASYNCB_CTS_FLOW)
+#define ASYNC_CHECK_CD         (1U << ASYNCB_CHECK_CD)
+#define ASYNC_SHARE_IRQ                (1U << ASYNCB_SHARE_IRQ)
+#define ASYNC_CONS_FLOW                (1U << ASYNCB_CONS_FLOW)
+#define ASYNC_BOOT_ONLYMCA     (1U << ASYNCB_BOOT_ONLYMCA)
+#define ASYNC_INTERNAL_FLAGS   (~((1U << ASYNCB_FIRST_KERNEL) - 1))
 
 /*
  * Multiport serial configuration structure --- external structure
index 57a97e5..6fd80c4 100644 (file)
@@ -41,7 +41,8 @@
 #define PORT_XSCALE    15
 #define PORT_RM9000    16      /* PMC-Sierra RM9xxx internal UART */
 #define PORT_OCTEON    17      /* Cavium OCTEON internal UART */
-#define PORT_MAX_8250  17      /* max port ID */
+#define PORT_AR7       18      /* Texas Instruments AR7 internal UART */
+#define PORT_MAX_8250  18      /* max port ID */
 
 /*
  * ARM specific type numbers.  These are not currently guaranteed
 /* MAX3100 */
 #define PORT_MAX3100    86
 
+/* Timberdale UART */
+#define PORT_TIMBUART  87
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
index 84f997f..c755283 100644 (file)
@@ -235,6 +235,8 @@ static inline int valid_signal(unsigned long sig)
 extern int next_signal(struct sigpending *pending, sigset_t *mask);
 extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
 extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
+extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
+                                siginfo_t *info);
 extern long do_sigpending(void __user *, unsigned long);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
 extern int show_unhandled_signals;
index 5ac9b0b..713f841 100644 (file)
@@ -14,7 +14,7 @@
 #include <asm/page.h>          /* kmalloc_sizes.h needs PAGE_SIZE */
 #include <asm/cache.h>         /* kmalloc_sizes.h needs L1_CACHE_BYTES */
 #include <linux/compiler.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 /* Size description struct for general caches. */
 struct cache_sizes {
index 5046f90..be5d40c 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/gfp.h>
 #include <linux/workqueue.h>
 #include <linux/kobject.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 enum stat_item {
        ALLOC_FASTPATH,         /* Allocation from cpu slab */
index 938234c..d4841ed 100644 (file)
@@ -60,6 +60,7 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock)
 #define __raw_spin_is_locked(lock)     ((void)(lock), 0)
 /* for sched.c and kernel_lock.c: */
 # define __raw_spin_lock(lock)         do { (void)(lock); } while (0)
+# define __raw_spin_lock_flags(lock, flags)    do { (void)(lock); } while (0)
 # define __raw_spin_unlock(lock)       do { (void)(lock); } while (0)
 # define __raw_spin_trylock(lock)      ({ (void)(lock); 1; })
 #endif /* DEBUG_SPINLOCK */
index ac9ff54..cb1a663 100644 (file)
@@ -29,7 +29,8 @@ extern void *swiotlb_alloc(unsigned order, unsigned long nslabs);
 
 extern dma_addr_t swiotlb_phys_to_bus(struct device *hwdev,
                                      phys_addr_t address);
-extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+extern phys_addr_t swiotlb_bus_to_phys(struct device *hwdev,
+                                      dma_addr_t address);
 
 extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
 
index e6b820f..a8cc4e1 100644 (file)
@@ -21,13 +21,14 @@ struct restart_block {
                struct {
                        unsigned long arg0, arg1, arg2, arg3;
                };
-               /* For futex_wait */
+               /* For futex_wait and futex_wait_requeue_pi */
                struct {
                        u32 *uaddr;
                        u32 val;
                        u32 flags;
                        u32 bitset;
                        u64 time;
+                       u32 *uaddr2;
                } futex;
                /* For nanosleep */
                struct {
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
new file mode 100644 (file)
index 0000000..c68bccb
--- /dev/null
@@ -0,0 +1,92 @@
+#ifndef _LINUX_TRACE_SEQ_H
+#define _LINUX_TRACE_SEQ_H
+
+#include <linux/fs.h>
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE.
+ */
+
+struct trace_seq {
+       unsigned char           buffer[PAGE_SIZE];
+       unsigned int            len;
+       unsigned int            readpos;
+};
+
+static inline void
+trace_seq_init(struct trace_seq *s)
+{
+       s->len = 0;
+       s->readpos = 0;
+}
+
+/*
+ * Currently only defined when tracing is enabled.
+ */
+#ifdef CONFIG_TRACING
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+       __attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+       __attribute__ ((format (printf, 2, 0)));
+extern int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
+extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
+extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+                                size_t cnt);
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
+extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+                               size_t len);
+extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
+extern int trace_seq_path(struct trace_seq *s, struct path *path);
+
+#else /* CONFIG_TRACING */
+static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+       return 0;
+}
+static inline int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+{
+       return 0;
+}
+
+static inline void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+}
+static inline ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+                                size_t cnt)
+{
+       return 0;
+}
+static inline int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+       return 0;
+}
+static inline int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+       return 0;
+}
+static inline int
+trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len)
+{
+       return 0;
+}
+static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+                                      size_t len)
+{
+       return 0;
+}
+static inline void *trace_seq_reserve(struct trace_seq *s, size_t len)
+{
+       return NULL;
+}
+static inline int trace_seq_path(struct trace_seq *s, struct path *path)
+{
+       return 0;
+}
+#endif /* CONFIG_TRACING */
+
+#endif /* _LINUX_TRACE_SEQ_H */
index c7aa154..eb96603 100644 (file)
@@ -259,14 +259,12 @@ static inline void tracehook_finish_clone(struct task_struct *child,
 
 /**
  * tracehook_report_clone - in parent, new child is about to start running
- * @trace:             return value from tracehook_prepare_clone()
  * @regs:              parent's user register state
  * @clone_flags:       flags from parent's system call
  * @pid:               new child's PID in the parent's namespace
  * @child:             new child task
  *
- * Called after a child is set up, but before it has been started
- * running.  @trace is the value returned by tracehook_prepare_clone().
+ * Called after a child is set up, but before it has been started running.
  * This is not a good place to block, because the child has not started
  * yet.  Suspend the child here if desired, and then block in
  * tracehook_report_clone_complete().  This must prevent the child from
@@ -276,13 +274,14 @@ static inline void tracehook_finish_clone(struct task_struct *child,
  *
  * Called with no locks held, but the child cannot run until this returns.
  */
-static inline void tracehook_report_clone(int trace, struct pt_regs *regs,
+static inline void tracehook_report_clone(struct pt_regs *regs,
                                          unsigned long clone_flags,
                                          pid_t pid, struct task_struct *child)
 {
-       if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) {
+       if (unlikely(task_ptrace(child))) {
                /*
-                * The child starts up with an immediate SIGSTOP.
+                * It doesn't matter who attached/attaching to this
+                * task, the pending SIGSTOP is right in any case.
                 */
                sigaddset(&child->pending.signal, SIGSTOP);
                set_tsk_thread_flag(child, TIF_SIGPENDING);
index d35a7ee..14df7e6 100644 (file)
@@ -31,6 +31,8 @@ struct tracepoint {
                                         * Keep in sync with vmlinux.lds.h.
                                         */
 
+#ifndef DECLARE_TRACE
+
 #define TP_PROTO(args...)      args
 #define TP_ARGS(args...)               args
 
@@ -114,6 +116,7 @@ static inline void tracepoint_update_probe_range(struct tracepoint *begin,
        struct tracepoint *end)
 { }
 #endif /* CONFIG_TRACEPOINTS */
+#endif /* DECLARE_TRACE */
 
 /*
  * Connect a probe to a tracepoint.
@@ -154,10 +157,8 @@ static inline void tracepoint_synchronize_unregister(void)
 }
 
 #define PARAMS(args...) args
-#define TRACE_FORMAT(name, proto, args, fmt)           \
-       DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
-
 
+#ifndef TRACE_EVENT
 /*
  * For use with the TRACE_EVENT macro:
  *
@@ -262,5 +263,6 @@ static inline void tracepoint_synchronize_unregister(void)
 
 #define TRACE_EVENT(name, proto, args, struct, assign, print)  \
        DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+#endif
 
 #endif
index fc39db9..1488d8c 100644 (file)
@@ -185,7 +185,7 @@ struct tty_port;
 struct tty_port_operations {
        /* Return 1 if the carrier is raised */
        int (*carrier_raised)(struct tty_port *port);
-       void (*raise_dtr_rts)(struct tty_port *port);
+       void (*dtr_rts)(struct tty_port *port, int raise);
 };
        
 struct tty_port {
@@ -201,6 +201,9 @@ struct tty_port {
        unsigned char           *xmit_buf;      /* Optional buffer */
        int                     close_delay;    /* Close port delay */
        int                     closing_wait;   /* Delay for output */
+       int                     drain_delay;    /* Set to zero if no pure time
+                                                  based drain is needed else
+                                                  set to size of fifo */
 };
 
 /*
@@ -223,8 +226,11 @@ struct tty_struct {
        struct tty_driver *driver;
        const struct tty_operations *ops;
        int index;
-       /* The ldisc objects are protected by tty_ldisc_lock at the moment */
-       struct tty_ldisc ldisc;
+
+       /* Protects ldisc changes: Lock tty not pty */
+       struct mutex ldisc_mutex;
+       struct tty_ldisc *ldisc;
+
        struct mutex termios_mutex;
        spinlock_t ctrl_lock;
        /* Termios values are protected by the termios mutex */
@@ -311,6 +317,7 @@ struct tty_struct {
 #define TTY_CLOSING            7       /* ->close() in progress */
 #define TTY_LDISC              9       /* Line discipline attached */
 #define TTY_LDISC_CHANGING     10      /* Line discipline changing */
+#define TTY_LDISC_OPEN         11      /* Line discipline is open */
 #define TTY_HW_COOK_OUT        14      /* Hardware can do output cooking */
 #define TTY_HW_COOK_IN                 15      /* Hardware can do input cooking */
 #define TTY_PTY_LOCK           16      /* pty private */
@@ -403,6 +410,7 @@ extern int tty_termios_hw_change(struct ktermios *a, struct ktermios *b);
 extern struct tty_ldisc *tty_ldisc_ref(struct tty_struct *);
 extern void tty_ldisc_deref(struct tty_ldisc *);
 extern struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *);
+extern void tty_ldisc_hangup(struct tty_struct *tty);
 extern const struct file_operations tty_ldiscs_proc_fops;
 
 extern void tty_wakeup(struct tty_struct *tty);
@@ -425,6 +433,9 @@ extern struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx,
 extern void tty_release_dev(struct file *filp);
 extern int tty_init_termios(struct tty_struct *tty);
 
+extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
+extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
+
 extern struct mutex tty_mutex;
 
 extern void tty_write_unlock(struct tty_struct *tty);
@@ -438,6 +449,7 @@ extern struct tty_struct *tty_port_tty_get(struct tty_port *port);
 extern void tty_port_tty_set(struct tty_port *port, struct tty_struct *tty);
 extern int tty_port_carrier_raised(struct tty_port *port);
 extern void tty_port_raise_dtr_rts(struct tty_port *port);
+extern void tty_port_lower_dtr_rts(struct tty_port *port);
 extern void tty_port_hangup(struct tty_port *port);
 extern int tty_port_block_til_ready(struct tty_port *port,
                                struct tty_struct *tty, struct file *filp);
index bcba84e..3566129 100644 (file)
  *     the line discipline are close to full, and it should somehow
  *     signal that no more characters should be sent to the tty.
  *
- *     Optional: Always invoke via tty_throttle();
+ *     Optional: Always invoke via tty_throttle(), called under the
+ *     termios lock.
  * 
  * void (*unthrottle)(struct tty_struct * tty);
  *
  *     that characters can now be sent to the tty without fear of
  *     overrunning the input buffers of the line disciplines.
  * 
- *     Optional: Always invoke via tty_unthrottle();
+ *     Optional: Always invoke via tty_unthrottle(), called under the
+ *     termios lock.
  *
  * void (*stop)(struct tty_struct *tty);
  *
index 625e9e4..8cdfed7 100644 (file)
@@ -224,8 +224,7 @@ struct usb_serial_driver {
        /* Called by console with tty = NULL and by tty */
        int  (*open)(struct tty_struct *tty,
                        struct usb_serial_port *port, struct file *filp);
-       void (*close)(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+       void (*close)(struct usb_serial_port *port);
        int  (*write)(struct tty_struct *tty, struct usb_serial_port *port,
                        const unsigned char *buf, int count);
        /* Called only by the tty layer */
@@ -241,6 +240,10 @@ struct usb_serial_driver {
        int  (*tiocmget)(struct tty_struct *tty, struct file *file);
        int  (*tiocmset)(struct tty_struct *tty, struct file *file,
                         unsigned int set, unsigned int clear);
+       /* Called by the tty layer for port level work. There may or may not
+          be an attached tty at this point */
+       void (*dtr_rts)(struct usb_serial_port *port, int on);
+       int  (*carrier_raised)(struct usb_serial_port *port);
        /* USB events */
        void (*read_int_callback)(struct urb *urb);
        void (*write_int_callback)(struct urb *urb);
@@ -283,8 +286,7 @@ extern int usb_serial_generic_open(struct tty_struct *tty,
                struct usb_serial_port *port, struct file *filp);
 extern int usb_serial_generic_write(struct tty_struct *tty,
        struct usb_serial_port *port, const unsigned char *buf, int count);
-extern void usb_serial_generic_close(struct tty_struct *tty,
-                       struct usb_serial_port *port, struct file *filp);
+extern void usb_serial_generic_close(struct usb_serial_port *port);
 extern int usb_serial_generic_resume(struct usb_serial *serial);
 extern int usb_serial_generic_write_room(struct tty_struct *tty);
 extern int usb_serial_generic_chars_in_buffer(struct tty_struct *tty);
index bc02463..6788e1a 100644 (file)
@@ -132,8 +132,6 @@ static inline void __remove_wait_queue(wait_queue_head_t *head,
        list_del(&old->task_list);
 }
 
-void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
-                       int nr_exclusive, int sync, void *key);
 void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key);
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr,
diff --git a/include/trace/block.h b/include/trace/block.h
deleted file mode 100644 (file)
index 25b7068..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-#ifndef _TRACE_BLOCK_H
-#define _TRACE_BLOCK_H
-
-#include <linux/blkdev.h>
-#include <linux/tracepoint.h>
-
-DECLARE_TRACE(block_rq_abort,
-       TP_PROTO(struct request_queue *q, struct request *rq),
-             TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_insert,
-       TP_PROTO(struct request_queue *q, struct request *rq),
-             TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_issue,
-       TP_PROTO(struct request_queue *q, struct request *rq),
-             TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_requeue,
-       TP_PROTO(struct request_queue *q, struct request *rq),
-             TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_rq_complete,
-       TP_PROTO(struct request_queue *q, struct request *rq),
-             TP_ARGS(q, rq));
-
-DECLARE_TRACE(block_bio_bounce,
-       TP_PROTO(struct request_queue *q, struct bio *bio),
-             TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_complete,
-       TP_PROTO(struct request_queue *q, struct bio *bio),
-             TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_backmerge,
-       TP_PROTO(struct request_queue *q, struct bio *bio),
-             TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_frontmerge,
-       TP_PROTO(struct request_queue *q, struct bio *bio),
-             TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_bio_queue,
-       TP_PROTO(struct request_queue *q, struct bio *bio),
-             TP_ARGS(q, bio));
-
-DECLARE_TRACE(block_getrq,
-       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-             TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_sleeprq,
-       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
-             TP_ARGS(q, bio, rw));
-
-DECLARE_TRACE(block_plug,
-       TP_PROTO(struct request_queue *q),
-             TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_timer,
-       TP_PROTO(struct request_queue *q),
-             TP_ARGS(q));
-
-DECLARE_TRACE(block_unplug_io,
-       TP_PROTO(struct request_queue *q),
-             TP_ARGS(q));
-
-DECLARE_TRACE(block_split,
-       TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
-             TP_ARGS(q, bio, pdu));
-
-DECLARE_TRACE(block_remap,
-       TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
-                sector_t from, sector_t to),
-             TP_ARGS(q, bio, dev, from, to));
-
-#endif
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
new file mode 100644 (file)
index 0000000..f7a7ae1
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Trace files that want to automate creationg of all tracepoints defined
+ * in their file should include this file. The following are macros that the
+ * trace file may define:
+ *
+ * TRACE_SYSTEM defines the system the tracepoint is for
+ *
+ * TRACE_INCLUDE_FILE if the file name is something other than TRACE_SYSTEM.h
+ *     This macro may be defined to tell define_trace.h what file to include.
+ *     Note, leave off the ".h".
+ *
+ * TRACE_INCLUDE_PATH if the path is something other than core kernel include/trace
+ *     then this macro can define the path to use. Note, the path is relative to
+ *     define_trace.h, not the file including it. Full path names for out of tree
+ *     modules must be used.
+ */
+
+#ifdef CREATE_TRACE_POINTS
+
+/* Prevent recursion */
+#undef CREATE_TRACE_POINTS
+
+#include <linux/stringify.h>
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
+       DEFINE_TRACE(name)
+
+#undef DECLARE_TRACE
+#define DECLARE_TRACE(name, proto, args)       \
+       DEFINE_TRACE(name)
+
+#undef TRACE_INCLUDE
+#undef __TRACE_INCLUDE
+
+#ifndef TRACE_INCLUDE_FILE
+# define TRACE_INCLUDE_FILE TRACE_SYSTEM
+# define UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifndef TRACE_INCLUDE_PATH
+# define __TRACE_INCLUDE(system) <trace/events/system.h>
+# define UNDEF_TRACE_INCLUDE_PATH
+#else
+# define __TRACE_INCLUDE(system) __stringify(TRACE_INCLUDE_PATH/system.h)
+#endif
+
+# define TRACE_INCLUDE(system) __TRACE_INCLUDE(system)
+
+/* Let the trace headers be reread */
+#define TRACE_HEADER_MULTI_READ
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#ifdef CONFIG_EVENT_TRACING
+#include <trace/ftrace.h>
+#endif
+
+#undef TRACE_HEADER_MULTI_READ
+
+/* Only undef what we defined in this file */
+#ifdef UNDEF_TRACE_INCLUDE_FILE
+# undef TRACE_INCLUDE_FILE
+# undef UNDEF_TRACE_INCLUDE_FILE
+#endif
+
+#ifdef UNDEF_TRACE_INCLUDE_PATH
+# undef TRACE_INCLUDE_PATH
+# undef UNDEF_TRACE_INCLUDE_PATH
+#endif
+
+/* We may be processing more files */
+#define CREATE_TRACE_POINTS
+
+#endif /* CREATE_TRACE_POINTS */
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
new file mode 100644 (file)
index 0000000..53effd4
--- /dev/null
@@ -0,0 +1,498 @@
+#if !defined(_TRACE_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_BLOCK_H
+
+#include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM block
+
+TRACE_EVENT(block_rq_abort,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  int,          errors                  )
+               __array(  char,         rwbs,   6               )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+               __entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+               __entry->nr_sector = blk_pc_request(rq) ?
+                                               0 : rq->hard_nr_sectors;
+               __entry->errors    = rq->errors;
+
+               blk_fill_rwbs_rq(__entry->rwbs, rq);
+               blk_dump_cmd(__get_str(cmd), rq);
+       ),
+
+       TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, __get_str(cmd),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_insert,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  unsigned int, bytes                   )
+               __array(  char,         rwbs,   6               )
+               __array(  char,         comm,   TASK_COMM_LEN   )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+               __entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+               __entry->nr_sector = blk_pc_request(rq) ?
+                                               0 : rq->hard_nr_sectors;
+               __entry->bytes     = blk_pc_request(rq) ? rq->data_len : 0;
+
+               blk_fill_rwbs_rq(__entry->rwbs, rq);
+               blk_dump_cmd(__get_str(cmd), rq);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, __entry->bytes, __get_str(cmd),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_issue,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  unsigned int, bytes                   )
+               __array(  char,         rwbs,   6               )
+               __array(  char,         comm,   TASK_COMM_LEN   )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+               __entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+               __entry->nr_sector = blk_pc_request(rq) ?
+                                               0 : rq->hard_nr_sectors;
+               __entry->bytes     = blk_pc_request(rq) ? rq->data_len : 0;
+
+               blk_fill_rwbs_rq(__entry->rwbs, rq);
+               blk_dump_cmd(__get_str(cmd), rq);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, __entry->bytes, __get_str(cmd),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_rq_requeue,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  int,          errors                  )
+               __array(  char,         rwbs,   6               )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+               __entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+               __entry->nr_sector = blk_pc_request(rq) ?
+                                               0 : rq->hard_nr_sectors;
+               __entry->errors    = rq->errors;
+
+               blk_fill_rwbs_rq(__entry->rwbs, rq);
+               blk_dump_cmd(__get_str(cmd), rq);
+       ),
+
+       TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, __get_str(cmd),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->errors)
+);
+
+TRACE_EVENT(block_rq_complete,
+
+       TP_PROTO(struct request_queue *q, struct request *rq),
+
+       TP_ARGS(q, rq),
+
+       TP_STRUCT__entry(
+               __field(  dev_t,        dev                     )
+               __field(  sector_t,     sector                  )
+               __field(  unsigned int, nr_sector               )
+               __field(  int,          errors                  )
+               __array(  char,         rwbs,   6               )
+               __dynamic_array( char,  cmd,    blk_cmd_buf_len(rq)     )
+       ),
+
+       TP_fast_assign(
+               __entry->dev       = rq->rq_disk ? disk_devt(rq->rq_disk) : 0;
+               __entry->sector    = blk_pc_request(rq) ? 0 : rq->hard_sector;
+               __entry->nr_sector = blk_pc_request(rq) ?
+                                               0 : rq->hard_nr_sectors;
+               __entry->errors    = rq->errors;
+
+               blk_fill_rwbs_rq(__entry->rwbs, rq);
+               blk_dump_cmd(__get_str(cmd), rq);
+       ),
+
+       TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->rwbs, __get_str(cmd),
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->errors)
+);
+TRACE_EVENT(block_bio_bounce,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __array( char,          rwbs,   6               )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_complete,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned,      nr_sector       )
+               __field( int,           error           )
+               __array( char,          rwbs,   6       )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%d]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->error)
+);
+
+TRACE_EVENT(block_bio_backmerge,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __array( char,          rwbs,   6               )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_frontmerge,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned,      nr_sector               )
+               __array( char,          rwbs,   6               )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_bio_queue,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio),
+
+       TP_ARGS(q, bio),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __array( char,          rwbs,   6               )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_getrq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __array( char,          rwbs,   6               )
+               __array( char,          comm,   TASK_COMM_LEN   )
+        ),
+
+       TP_fast_assign(
+               __entry->dev            = bio ? bio->bi_bdev->bd_dev : 0;
+               __entry->sector         = bio ? bio->bi_sector : 0;
+               __entry->nr_sector      = bio ? bio->bi_size >> 9 : 0;
+               blk_fill_rwbs(__entry->rwbs,
+                             bio ? bio->bi_rw : 0, __entry->nr_sector);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+        ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_sleeprq,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
+
+       TP_ARGS(q, bio, rw),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                     )
+               __field( sector_t,      sector                  )
+               __field( unsigned int,  nr_sector               )
+               __array( char,          rwbs,   6               )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio ? bio->bi_bdev->bd_dev : 0;
+               __entry->sector         = bio ? bio->bi_sector : 0;
+               __entry->nr_sector      = bio ? bio->bi_size >> 9 : 0;
+               blk_fill_rwbs(__entry->rwbs,
+                           bio ? bio->bi_rw : 0, __entry->nr_sector);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector, __entry->comm)
+);
+
+TRACE_EVENT(block_plug,
+
+       TP_PROTO(struct request_queue *q),
+
+       TP_ARGS(q),
+
+       TP_STRUCT__entry(
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("[%s]", __entry->comm)
+);
+
+TRACE_EVENT(block_unplug_timer,
+
+       TP_PROTO(struct request_queue *q),
+
+       TP_ARGS(q),
+
+       TP_STRUCT__entry(
+               __field( int,           nr_rq                   )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->nr_rq  = q->rq.count[READ] + q->rq.count[WRITE];
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_unplug_io,
+
+       TP_PROTO(struct request_queue *q),
+
+       TP_ARGS(q),
+
+       TP_STRUCT__entry(
+               __field( int,           nr_rq                   )
+               __array( char,          comm,   TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->nr_rq  = q->rq.count[READ] + q->rq.count[WRITE];
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("[%s] %d", __entry->comm, __entry->nr_rq)
+);
+
+TRACE_EVENT(block_split,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio,
+                unsigned int new_sector),
+
+       TP_ARGS(q, bio, new_sector),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev                             )
+               __field( sector_t,      sector                          )
+               __field( sector_t,      new_sector                      )
+               __array( char,          rwbs,           6               )
+               __array( char,          comm,           TASK_COMM_LEN   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->new_sector     = new_sector;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+               memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
+       ),
+
+       TP_printk("%d,%d %s %llu / %llu [%s]",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 (unsigned long long)__entry->new_sector,
+                 __entry->comm)
+);
+
+TRACE_EVENT(block_remap,
+
+       TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+                sector_t from),
+
+       TP_ARGS(q, bio, dev, from),
+
+       TP_STRUCT__entry(
+               __field( dev_t,         dev             )
+               __field( sector_t,      sector          )
+               __field( unsigned int,  nr_sector       )
+               __field( dev_t,         old_dev         )
+               __field( sector_t,      old_sector      )
+               __array( char,          rwbs,   6       )
+       ),
+
+       TP_fast_assign(
+               __entry->dev            = bio->bi_bdev->bd_dev;
+               __entry->sector         = bio->bi_sector;
+               __entry->nr_sector      = bio->bi_size >> 9;
+               __entry->old_dev        = dev;
+               __entry->old_sector     = from;
+               blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size);
+       ),
+
+       TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
+                 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->rwbs,
+                 (unsigned long long)__entry->sector,
+                 __entry->nr_sector,
+                 MAJOR(__entry->old_dev), MINOR(__entry->old_dev),
+                 (unsigned long long)__entry->old_sector)
+);
+
+#endif /* _TRACE_BLOCK_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
+
diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h
new file mode 100644 (file)
index 0000000..b0c7ede
--- /dev/null
@@ -0,0 +1,145 @@
+#if !defined(_TRACE_IRQ_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_IRQ_H
+
+#include <linux/tracepoint.h>
+#include <linux/interrupt.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM irq
+
+#define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq }
+#define show_softirq_name(val)                 \
+       __print_symbolic(val,                   \
+                        softirq_name(HI),      \
+                        softirq_name(TIMER),   \
+                        softirq_name(NET_TX),  \
+                        softirq_name(NET_RX),  \
+                        softirq_name(BLOCK),   \
+                        softirq_name(TASKLET), \
+                        softirq_name(SCHED),   \
+                        softirq_name(HRTIMER), \
+                        softirq_name(RCU))
+
+/**
+ * irq_handler_entry - called immediately before the irq action handler
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ *
+ * The struct irqaction pointed to by @action contains various
+ * information about the handler, including the device name,
+ * @action->name, and the device id, @action->dev_id. When used in
+ * conjunction with the irq_handler_exit tracepoint, we can figure
+ * out irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_entry,
+
+       TP_PROTO(int irq, struct irqaction *action),
+
+       TP_ARGS(irq, action),
+
+       TP_STRUCT__entry(
+               __field(        int,    irq             )
+               __string(       name,   action->name    )
+       ),
+
+       TP_fast_assign(
+               __entry->irq = irq;
+               __assign_str(name, action->name);
+       ),
+
+       TP_printk("irq=%d handler=%s", __entry->irq, __get_str(name))
+);
+
+/**
+ * irq_handler_exit - called immediately after the irq action handler returns
+ * @irq: irq number
+ * @action: pointer to struct irqaction
+ * @ret: return value
+ *
+ * If the @ret value is set to IRQ_HANDLED, then we know that the corresponding
+ * @action->handler scuccessully handled this irq. Otherwise, the irq might be
+ * a shared irq line, or the irq was not handled successfully. Can be used in
+ * conjunction with the irq_handler_entry to understand irq handler latencies.
+ */
+TRACE_EVENT(irq_handler_exit,
+
+       TP_PROTO(int irq, struct irqaction *action, int ret),
+
+       TP_ARGS(irq, action, ret),
+
+       TP_STRUCT__entry(
+               __field(        int,    irq     )
+               __field(        int,    ret     )
+       ),
+
+       TP_fast_assign(
+               __entry->irq    = irq;
+               __entry->ret    = ret;
+       ),
+
+       TP_printk("irq=%d return=%s",
+                 __entry->irq, __entry->ret ? "handled" : "unhandled")
+);
+
+/**
+ * softirq_entry - called immediately before the softirq handler
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter, contains a pointer to the struct softirq_action
+ * which has a pointer to the action handler that is called. By subtracting
+ * the @vec pointer from the @h pointer, we can determine the softirq
+ * number. Also, when used in combination with the softirq_exit tracepoint
+ * we can determine the softirq latency.
+ */
+TRACE_EVENT(softirq_entry,
+
+       TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+       TP_ARGS(h, vec),
+
+       TP_STRUCT__entry(
+               __field(        int,    vec                     )
+       ),
+
+       TP_fast_assign(
+               __entry->vec = (int)(h - vec);
+       ),
+
+       TP_printk("softirq=%d action=%s", __entry->vec,
+                 show_softirq_name(__entry->vec))
+);
+
+/**
+ * softirq_exit - called immediately after the softirq handler returns
+ * @h: pointer to struct softirq_action
+ * @vec: pointer to first struct softirq_action in softirq_vec array
+ *
+ * The @h parameter contains a pointer to the struct softirq_action
+ * that has handled the softirq. By subtracting the @vec pointer from
+ * the @h pointer, we can determine the softirq number. Also, when used in
+ * combination with the softirq_entry tracepoint we can determine the softirq
+ * latency.
+ */
+TRACE_EVENT(softirq_exit,
+
+       TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
+
+       TP_ARGS(h, vec),
+
+       TP_STRUCT__entry(
+               __field(        int,    vec                     )
+       ),
+
+       TP_fast_assign(
+               __entry->vec = (int)(h - vec);
+       ),
+
+       TP_printk("softirq=%d action=%s", __entry->vec,
+                 show_softirq_name(__entry->vec))
+);
+
+#endif /*  _TRACE_IRQ_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
new file mode 100644 (file)
index 0000000..9baba50
--- /dev/null
@@ -0,0 +1,231 @@
+#if !defined(_TRACE_KMEM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KMEM_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kmem
+
+/*
+ * The order of these masks is important. Matching masks will be seen
+ * first and the left over flags will end up showing by themselves.
+ *
+ * For example, if we have GFP_KERNEL before GFP_USER we wil get:
+ *
+ *  GFP_KERNEL|GFP_HARDWALL
+ *
+ * Thus most bits set go first.
+ */
+#define show_gfp_flags(flags)                                          \
+       (flags) ? __print_flags(flags, "|",                             \
+       {(unsigned long)GFP_HIGHUSER_MOVABLE,   "GFP_HIGHUSER_MOVABLE"}, \
+       {(unsigned long)GFP_HIGHUSER,           "GFP_HIGHUSER"},        \
+       {(unsigned long)GFP_USER,               "GFP_USER"},            \
+       {(unsigned long)GFP_TEMPORARY,          "GFP_TEMPORARY"},       \
+       {(unsigned long)GFP_KERNEL,             "GFP_KERNEL"},          \
+       {(unsigned long)GFP_NOFS,               "GFP_NOFS"},            \
+       {(unsigned long)GFP_ATOMIC,             "GFP_ATOMIC"},          \
+       {(unsigned long)GFP_NOIO,               "GFP_NOIO"},            \
+       {(unsigned long)__GFP_HIGH,             "GFP_HIGH"},            \
+       {(unsigned long)__GFP_WAIT,             "GFP_WAIT"},            \
+       {(unsigned long)__GFP_IO,               "GFP_IO"},              \
+       {(unsigned long)__GFP_COLD,             "GFP_COLD"},            \
+       {(unsigned long)__GFP_NOWARN,           "GFP_NOWARN"},          \
+       {(unsigned long)__GFP_REPEAT,           "GFP_REPEAT"},          \
+       {(unsigned long)__GFP_NOFAIL,           "GFP_NOFAIL"},          \
+       {(unsigned long)__GFP_NORETRY,          "GFP_NORETRY"},         \
+       {(unsigned long)__GFP_COMP,             "GFP_COMP"},            \
+       {(unsigned long)__GFP_ZERO,             "GFP_ZERO"},            \
+       {(unsigned long)__GFP_NOMEMALLOC,       "GFP_NOMEMALLOC"},      \
+       {(unsigned long)__GFP_HARDWALL,         "GFP_HARDWALL"},        \
+       {(unsigned long)__GFP_THISNODE,         "GFP_THISNODE"},        \
+       {(unsigned long)__GFP_RECLAIMABLE,      "GFP_RECLAIMABLE"},     \
+       {(unsigned long)__GFP_MOVABLE,          "GFP_MOVABLE"}          \
+       ) : "GFP_NOWAIT"
+
+TRACE_EVENT(kmalloc,
+
+       TP_PROTO(unsigned long call_site,
+                const void *ptr,
+                size_t bytes_req,
+                size_t bytes_alloc,
+                gfp_t gfp_flags),
+
+       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  call_site       )
+               __field(        const void *,   ptr             )
+               __field(        size_t,         bytes_req       )
+               __field(        size_t,         bytes_alloc     )
+               __field(        gfp_t,          gfp_flags       )
+       ),
+
+       TP_fast_assign(
+               __entry->call_site      = call_site;
+               __entry->ptr            = ptr;
+               __entry->bytes_req      = bytes_req;
+               __entry->bytes_alloc    = bytes_alloc;
+               __entry->gfp_flags      = gfp_flags;
+       ),
+
+       TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+               __entry->call_site,
+               __entry->ptr,
+               __entry->bytes_req,
+               __entry->bytes_alloc,
+               show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(kmem_cache_alloc,
+
+       TP_PROTO(unsigned long call_site,
+                const void *ptr,
+                size_t bytes_req,
+                size_t bytes_alloc,
+                gfp_t gfp_flags),
+
+       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  call_site       )
+               __field(        const void *,   ptr             )
+               __field(        size_t,         bytes_req       )
+               __field(        size_t,         bytes_alloc     )
+               __field(        gfp_t,          gfp_flags       )
+       ),
+
+       TP_fast_assign(
+               __entry->call_site      = call_site;
+               __entry->ptr            = ptr;
+               __entry->bytes_req      = bytes_req;
+               __entry->bytes_alloc    = bytes_alloc;
+               __entry->gfp_flags      = gfp_flags;
+       ),
+
+       TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+               __entry->call_site,
+               __entry->ptr,
+               __entry->bytes_req,
+               __entry->bytes_alloc,
+               show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(kmalloc_node,
+
+       TP_PROTO(unsigned long call_site,
+                const void *ptr,
+                size_t bytes_req,
+                size_t bytes_alloc,
+                gfp_t gfp_flags,
+                int node),
+
+       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  call_site       )
+               __field(        const void *,   ptr             )
+               __field(        size_t,         bytes_req       )
+               __field(        size_t,         bytes_alloc     )
+               __field(        gfp_t,          gfp_flags       )
+               __field(        int,            node            )
+       ),
+
+       TP_fast_assign(
+               __entry->call_site      = call_site;
+               __entry->ptr            = ptr;
+               __entry->bytes_req      = bytes_req;
+               __entry->bytes_alloc    = bytes_alloc;
+               __entry->gfp_flags      = gfp_flags;
+               __entry->node           = node;
+       ),
+
+       TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+               __entry->call_site,
+               __entry->ptr,
+               __entry->bytes_req,
+               __entry->bytes_alloc,
+               show_gfp_flags(__entry->gfp_flags),
+               __entry->node)
+);
+
+TRACE_EVENT(kmem_cache_alloc_node,
+
+       TP_PROTO(unsigned long call_site,
+                const void *ptr,
+                size_t bytes_req,
+                size_t bytes_alloc,
+                gfp_t gfp_flags,
+                int node),
+
+       TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  call_site       )
+               __field(        const void *,   ptr             )
+               __field(        size_t,         bytes_req       )
+               __field(        size_t,         bytes_alloc     )
+               __field(        gfp_t,          gfp_flags       )
+               __field(        int,            node            )
+       ),
+
+       TP_fast_assign(
+               __entry->call_site      = call_site;
+               __entry->ptr            = ptr;
+               __entry->bytes_req      = bytes_req;
+               __entry->bytes_alloc    = bytes_alloc;
+               __entry->gfp_flags      = gfp_flags;
+               __entry->node           = node;
+       ),
+
+       TP_printk("call_site=%lx ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+               __entry->call_site,
+               __entry->ptr,
+               __entry->bytes_req,
+               __entry->bytes_alloc,
+               show_gfp_flags(__entry->gfp_flags),
+               __entry->node)
+);
+
+TRACE_EVENT(kfree,
+
+       TP_PROTO(unsigned long call_site, const void *ptr),
+
+       TP_ARGS(call_site, ptr),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  call_site       )
+               __field(        const void *,   ptr             )
+       ),
+
+       TP_fast_assign(
+               __entry->call_site      = call_site;
+               __entry->ptr            = ptr;
+       ),
+
+       TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+
+TRACE_EVENT(kmem_cache_free,
+
+       TP_PROTO(unsigned long call_site, const void *ptr),
+
+       TP_ARGS(call_site, ptr),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  call_site       )
+               __field(        const void *,   ptr             )
+       ),
+
+       TP_fast_assign(
+               __entry->call_site      = call_site;
+               __entry->ptr            = ptr;
+       ),
+
+       TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr)
+);
+#endif /* _TRACE_KMEM_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/lockdep.h b/include/trace/events/lockdep.h
new file mode 100644 (file)
index 0000000..0e956c9
--- /dev/null
@@ -0,0 +1,96 @@
+#if !defined(_TRACE_LOCKDEP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCKDEP_H
+
+#include <linux/lockdep.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lockdep
+
+#ifdef CONFIG_LOCKDEP
+
+TRACE_EVENT(lock_acquire,
+
+       TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
+               int trylock, int read, int check,
+               struct lockdep_map *next_lock, unsigned long ip),
+
+       TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
+
+       TP_STRUCT__entry(
+               __field(unsigned int, flags)
+               __string(name, lock->name)
+       ),
+
+       TP_fast_assign(
+               __entry->flags = (trylock ? 1 : 0) | (read ? 2 : 0);
+               __assign_str(name, lock->name);
+       ),
+
+       TP_printk("%s%s%s", (__entry->flags & 1) ? "try " : "",
+                 (__entry->flags & 2) ? "read " : "",
+                 __get_str(name))
+);
+
+TRACE_EVENT(lock_release,
+
+       TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
+
+       TP_ARGS(lock, nested, ip),
+
+       TP_STRUCT__entry(
+               __string(name, lock->name)
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, lock->name);
+       ),
+
+       TP_printk("%s", __get_str(name))
+);
+
+#ifdef CONFIG_LOCK_STAT
+
+TRACE_EVENT(lock_contended,
+
+       TP_PROTO(struct lockdep_map *lock, unsigned long ip),
+
+       TP_ARGS(lock, ip),
+
+       TP_STRUCT__entry(
+               __string(name, lock->name)
+       ),
+
+       TP_fast_assign(
+               __assign_str(name, lock->name);
+       ),
+
+       TP_printk("%s", __get_str(name))
+);
+
+TRACE_EVENT(lock_acquired,
+       TP_PROTO(struct lockdep_map *lock, unsigned long ip, s64 waittime),
+
+       TP_ARGS(lock, ip, waittime),
+
+       TP_STRUCT__entry(
+               __string(name, lock->name)
+               __field(unsigned long, wait_usec)
+               __field(unsigned long, wait_nsec_rem)
+       ),
+       TP_fast_assign(
+               __assign_str(name, lock->name);
+               __entry->wait_nsec_rem = do_div(waittime, NSEC_PER_USEC);
+               __entry->wait_usec = (unsigned long) waittime;
+       ),
+       TP_printk("%s (%lu.%03lu us)", __get_str(name), __entry->wait_usec,
+                                      __entry->wait_nsec_rem)
+);
+
+#endif
+#endif
+
+#endif /* _TRACE_LOCKDEP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
similarity index 91%
rename from include/trace/sched_event_types.h
rename to include/trace/events/sched.h
index 63547dc..24ab5bc 100644 (file)
@@ -1,9 +1,8 @@
+#if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SCHED_H
 
-/* use <trace/sched.h> instead */
-#ifndef TRACE_EVENT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM sched
@@ -157,6 +156,7 @@ TRACE_EVENT(sched_switch,
                __array(        char,   prev_comm,      TASK_COMM_LEN   )
                __field(        pid_t,  prev_pid                        )
                __field(        int,    prev_prio                       )
+               __field(        long,   prev_state                      )
                __array(        char,   next_comm,      TASK_COMM_LEN   )
                __field(        pid_t,  next_pid                        )
                __field(        int,    next_prio                       )
@@ -166,13 +166,19 @@ TRACE_EVENT(sched_switch,
                memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
                __entry->prev_pid       = prev->pid;
                __entry->prev_prio      = prev->prio;
+               __entry->prev_state     = prev->state;
                memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
                __entry->next_pid       = next->pid;
                __entry->next_prio      = next->prio;
        ),
 
-       TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
+       TP_printk("task %s:%d [%d] (%s) ==> %s:%d [%d]",
                __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
+               __entry->prev_state ?
+                 __print_flags(__entry->prev_state, "|",
+                               { 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
+                               { 16, "Z" }, { 32, "X" }, { 64, "x" },
+                               { 128, "W" }) : "R",
                __entry->next_comm, __entry->next_pid, __entry->next_prio)
 );
 
@@ -181,9 +187,9 @@ TRACE_EVENT(sched_switch,
  */
 TRACE_EVENT(sched_migrate_task,
 
-       TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+       TP_PROTO(struct task_struct *p, int dest_cpu),
 
-       TP_ARGS(p, orig_cpu, dest_cpu),
+       TP_ARGS(p, dest_cpu),
 
        TP_STRUCT__entry(
                __array(        char,   comm,   TASK_COMM_LEN   )
@@ -197,7 +203,7 @@ TRACE_EVENT(sched_migrate_task,
                memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
                __entry->pid            = p->pid;
                __entry->prio           = p->prio;
-               __entry->orig_cpu       = orig_cpu;
+               __entry->orig_cpu       = task_cpu(p);
                __entry->dest_cpu       = dest_cpu;
        ),
 
@@ -334,4 +340,7 @@ TRACE_EVENT(sched_signal_send,
                  __entry->sig, __entry->comm, __entry->pid)
 );
 
-#undef TRACE_SYSTEM
+#endif /* _TRACE_SCHED_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h
new file mode 100644 (file)
index 0000000..1e8fabb
--- /dev/null
@@ -0,0 +1,40 @@
+#if !defined(_TRACE_SKB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_SKB_H
+
+#include <linux/skbuff.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM skb
+
+/*
+ * Tracepoint for free an sk_buff:
+ */
+TRACE_EVENT(kfree_skb,
+
+       TP_PROTO(struct sk_buff *skb, void *location),
+
+       TP_ARGS(skb, location),
+
+       TP_STRUCT__entry(
+               __field(        void *,         skbaddr         )
+               __field(        unsigned short, protocol        )
+               __field(        void *,         location        )
+       ),
+
+       TP_fast_assign(
+               __entry->skbaddr = skb;
+               if (skb) {
+                       __entry->protocol = ntohs(skb->protocol);
+               }
+               __entry->location = location;
+       ),
+
+       TP_printk("skbaddr=%p protocol=%u location=%p",
+               __entry->skbaddr, __entry->protocol, __entry->location)
+);
+
+#endif /* _TRACE_SKB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/workqueue.h b/include/trace/events/workqueue.h
new file mode 100644 (file)
index 0000000..035f1bf
--- /dev/null
@@ -0,0 +1,100 @@
+#if !defined(_TRACE_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WORKQUEUE_H
+
+#include <linux/workqueue.h>
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM workqueue
+
+TRACE_EVENT(workqueue_insertion,
+
+       TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+       TP_ARGS(wq_thread, work),
+
+       TP_STRUCT__entry(
+               __array(char,           thread_comm,    TASK_COMM_LEN)
+               __field(pid_t,          thread_pid)
+               __field(work_func_t,    func)
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+               __entry->thread_pid     = wq_thread->pid;
+               __entry->func           = work->func;
+       ),
+
+       TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+               __entry->thread_pid, __entry->func)
+);
+
+TRACE_EVENT(workqueue_execution,
+
+       TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
+
+       TP_ARGS(wq_thread, work),
+
+       TP_STRUCT__entry(
+               __array(char,           thread_comm,    TASK_COMM_LEN)
+               __field(pid_t,          thread_pid)
+               __field(work_func_t,    func)
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+               __entry->thread_pid     = wq_thread->pid;
+               __entry->func           = work->func;
+       ),
+
+       TP_printk("thread=%s:%d func=%pF", __entry->thread_comm,
+               __entry->thread_pid, __entry->func)
+);
+
+/* Trace the creation of one workqueue thread on a cpu */
+TRACE_EVENT(workqueue_creation,
+
+       TP_PROTO(struct task_struct *wq_thread, int cpu),
+
+       TP_ARGS(wq_thread, cpu),
+
+       TP_STRUCT__entry(
+               __array(char,   thread_comm,    TASK_COMM_LEN)
+               __field(pid_t,  thread_pid)
+               __field(int,    cpu)
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+               __entry->thread_pid     = wq_thread->pid;
+               __entry->cpu            = cpu;
+       ),
+
+       TP_printk("thread=%s:%d cpu=%d", __entry->thread_comm,
+               __entry->thread_pid, __entry->cpu)
+);
+
+TRACE_EVENT(workqueue_destruction,
+
+       TP_PROTO(struct task_struct *wq_thread),
+
+       TP_ARGS(wq_thread),
+
+       TP_STRUCT__entry(
+               __array(char,   thread_comm,    TASK_COMM_LEN)
+               __field(pid_t,  thread_pid)
+       ),
+
+       TP_fast_assign(
+               memcpy(__entry->thread_comm, wq_thread->comm, TASK_COMM_LEN);
+               __entry->thread_pid     = wq_thread->pid;
+       ),
+
+       TP_printk("thread=%s:%d", __entry->thread_comm, __entry->thread_pid)
+);
+
+#endif /* _TRACE_WORKQUEUE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
new file mode 100644 (file)
index 0000000..1867553
--- /dev/null
@@ -0,0 +1,591 @@
+/*
+ * Stage 1 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * struct ftrace_raw_<call> {
+ *     struct trace_entry              ent;
+ *     <type>                          <item>;
+ *     <type2>                         <item2>[<len>];
+ *     [...]
+ * };
+ *
+ * The <type> <item> is created by the __field(type, item) macro or
+ * the __array(type2, item2, len) macro.
+ * We simply do "type item;", and that will create the fields
+ * in the structure.
+ */
+
+#include <linux/ftrace_event.h>
+
+#undef __field
+#define __field(type, item)            type    item;
+
+#undef __array
+#define __array(type, item, len)       type    item[len];
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len) unsigned short __data_loc_##item;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
+       struct ftrace_raw_##name {                              \
+               struct trace_entry      ent;                    \
+               tstruct                                         \
+               char                    __data[0];              \
+       };                                                      \
+       static struct ftrace_event_call event_##name
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+
+/*
+ * Stage 2 of the trace events.
+ *
+ * Include the following:
+ *
+ * struct ftrace_data_offsets_<call> {
+ *     int                             <item1>;
+ *     int                             <item2>;
+ *     [...]
+ * };
+ *
+ * The __dynamic_array() macro will create each int <item>, this is
+ * to keep the offset of each array from the beginning of the event.
+ */
+
+#undef __field
+#define __field(type, item);
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)       int item;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
+       struct ftrace_data_offsets_##call {                             \
+               tstruct;                                                \
+       };
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Setup the showing format of trace point.
+ *
+ * int
+ * ftrace_format_##call(struct trace_seq *s)
+ * {
+ *     struct ftrace_raw_##call field;
+ *     int ret;
+ *
+ *     ret = trace_seq_printf(s, #type " " #item ";"
+ *                            " offset:%u; size:%u;\n",
+ *                            offsetof(struct ftrace_raw_##call, item),
+ *                            sizeof(field.type));
+ *
+ * }
+ */
+
+#undef TP_STRUCT__entry
+#define TP_STRUCT__entry(args...) args
+
+#undef __field
+#define __field(type, item)                                    \
+       ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
+                              "offset:%u;\tsize:%u;\n",                \
+                              (unsigned int)offsetof(typeof(field), item), \
+                              (unsigned int)sizeof(field.item));       \
+       if (!ret)                                                       \
+               return 0;
+
+#undef __array
+#define __array(type, item, len)                                               \
+       ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"    \
+                              "offset:%u;\tsize:%u;\n",                \
+                              (unsigned int)offsetof(typeof(field), item), \
+                              (unsigned int)sizeof(field.item));       \
+       if (!ret)                                                       \
+               return 0;
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)                                      \
+       ret = trace_seq_printf(s, "\tfield:__data_loc " #item ";\t"            \
+                              "offset:%u;\tsize:%u;\n",                       \
+                              (unsigned int)offsetof(typeof(field),           \
+                                       __data_loc_##item),                    \
+                              (unsigned int)sizeof(field.__data_loc_##item)); \
+       if (!ret)                                                              \
+               return 0;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef __entry
+#define __entry REC
+
+#undef __print_symbolic
+#undef __get_dynamic_array
+#undef __get_str
+
+#undef TP_printk
+#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
+
+#undef TP_fast_assign
+#define TP_fast_assign(args...) args
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)           \
+static int                                                             \
+ftrace_format_##call(struct trace_seq *s)                              \
+{                                                                      \
+       struct ftrace_raw_##call field __attribute__((unused));         \
+       int ret = 0;                                                    \
+                                                                       \
+       tstruct;                                                        \
+                                                                       \
+       trace_seq_printf(s, "\nprint fmt: " print);                     \
+                                                                       \
+       return ret;                                                     \
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 3 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * enum print_line_t
+ * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
+ * {
+ *     struct trace_seq *s = &iter->seq;
+ *     struct ftrace_raw_<call> *field; <-- defined in stage 1
+ *     struct trace_entry *entry;
+ *     struct trace_seq *p;
+ *     int ret;
+ *
+ *     entry = iter->ent;
+ *
+ *     if (entry->type != event_<call>.id) {
+ *             WARN_ON_ONCE(1);
+ *             return TRACE_TYPE_UNHANDLED;
+ *     }
+ *
+ *     field = (typeof(field))entry;
+ *
+ *     p = get_cpu_var(ftrace_event_seq);
+ *     trace_seq_init(p);
+ *     ret = trace_seq_printf(s, <TP_printk> "\n");
+ *     put_cpu();
+ *     if (!ret)
+ *             return TRACE_TYPE_PARTIAL_LINE;
+ *
+ *     return TRACE_TYPE_HANDLED;
+ * }
+ *
+ * This is the method used to print the raw event to the trace
+ * output format. Note, this is not needed if the data is read
+ * in binary.
+ */
+
+#undef __entry
+#define __entry field
+
+#undef TP_printk
+#define TP_printk(fmt, args...) fmt "\n", args
+
+#undef __get_dynamic_array
+#define __get_dynamic_array(field)     \
+               ((void *)__entry + __entry->__data_loc_##field)
+
+#undef __get_str
+#define __get_str(field) (char *)__get_dynamic_array(field)
+
+#undef __print_flags
+#define __print_flags(flag, delim, flag_array...)                      \
+       ({                                                              \
+               static const struct trace_print_flags flags[] =         \
+                       { flag_array, { -1, NULL }};                    \
+               ftrace_print_flags_seq(p, delim, flag, flags);          \
+       })
+
+#undef __print_symbolic
+#define __print_symbolic(value, symbol_array...)                       \
+       ({                                                              \
+               static const struct trace_print_flags symbols[] =       \
+                       { symbol_array, { -1, NULL }};                  \
+               ftrace_print_symbols_seq(p, value, symbols);            \
+       })
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
+enum print_line_t                                                      \
+ftrace_raw_output_##call(struct trace_iterator *iter, int flags)       \
+{                                                                      \
+       struct trace_seq *s = &iter->seq;                               \
+       struct ftrace_raw_##call *field;                                \
+       struct trace_entry *entry;                                      \
+       struct trace_seq *p;                                            \
+       int ret;                                                        \
+                                                                       \
+       entry = iter->ent;                                              \
+                                                                       \
+       if (entry->type != event_##call.id) {                           \
+               WARN_ON_ONCE(1);                                        \
+               return TRACE_TYPE_UNHANDLED;                            \
+       }                                                               \
+                                                                       \
+       field = (typeof(field))entry;                                   \
+                                                                       \
+       p = &get_cpu_var(ftrace_event_seq);                             \
+       trace_seq_init(p);                                              \
+       ret = trace_seq_printf(s, #call ": " print);                    \
+       put_cpu();                                                      \
+       if (!ret)                                                       \
+               return TRACE_TYPE_PARTIAL_LINE;                         \
+                                                                       \
+       return TRACE_TYPE_HANDLED;                                      \
+}
+       
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef __field
+#define __field(type, item)                                            \
+       ret = trace_define_field(event_call, #type, #item,              \
+                                offsetof(typeof(field), item),         \
+                                sizeof(field.item), is_signed_type(type));     \
+       if (ret)                                                        \
+               return ret;
+
+#undef __array
+#define __array(type, item, len)                                       \
+       BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);                         \
+       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+                                offsetof(typeof(field), item),         \
+                                sizeof(field.item), 0);                \
+       if (ret)                                                        \
+               return ret;
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)                                      \
+       ret = trace_define_field(event_call, "__data_loc" "[" #type "]", #item,\
+                               offsetof(typeof(field), __data_loc_##item),    \
+                                sizeof(field.__data_loc_##item), 0);
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, func, print)           \
+int                                                                    \
+ftrace_define_fields_##call(void)                                      \
+{                                                                      \
+       struct ftrace_raw_##call field;                                 \
+       struct ftrace_event_call *event_call = &event_##call;           \
+       int ret;                                                        \
+                                                                       \
+       __common_field(int, type, 1);                                   \
+       __common_field(unsigned char, flags, 0);                        \
+       __common_field(unsigned char, preempt_count, 0);                \
+       __common_field(int, pid, 1);                                    \
+       __common_field(int, tgid, 1);                                   \
+                                                                       \
+       tstruct;                                                        \
+                                                                       \
+       return ret;                                                     \
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * remember the offset of each array from the beginning of the event.
+ */
+
+#undef __entry
+#define __entry entry
+
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)                               \
+       __data_offsets->item = __data_size +                            \
+                              offsetof(typeof(*entry), __data);        \
+       __data_size += (len) * sizeof(type);
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, strlen(src) + 1)       \
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
+static inline int ftrace_get_offsets_##call(                           \
+       struct ftrace_data_offsets_##call *__data_offsets, proto)       \
+{                                                                      \
+       int __data_size = 0;                                            \
+       struct ftrace_raw_##call __maybe_unused *entry;                 \
+                                                                       \
+       tstruct;                                                        \
+                                                                       \
+       return __data_size;                                             \
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+/*
+ * Stage 4 of the trace events.
+ *
+ * Override the macros in <trace/trace_events.h> to include the following:
+ *
+ * static void ftrace_event_<call>(proto)
+ * {
+ *     event_trace_printk(_RET_IP_, "<call>: " <fmt>);
+ * }
+ *
+ * static int ftrace_reg_event_<call>(void)
+ * {
+ *     int ret;
+ *
+ *     ret = register_trace_<call>(ftrace_event_<call>);
+ *     if (!ret)
+ *             pr_info("event trace: Could not activate trace point "
+ *                     "probe to  <call>");
+ *     return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *     unregister_trace_<call>(ftrace_event_<call>);
+ * }
+ *
+ *
+ * For those macros defined with TRACE_EVENT:
+ *
+ * static struct ftrace_event_call event_<call>;
+ *
+ * static void ftrace_raw_event_<call>(proto)
+ * {
+ *     struct ring_buffer_event *event;
+ *     struct ftrace_raw_<call> *entry; <-- defined in stage 1
+ *     unsigned long irq_flags;
+ *     int pc;
+ *
+ *     local_save_flags(irq_flags);
+ *     pc = preempt_count();
+ *
+ *     event = trace_current_buffer_lock_reserve(event_<call>.id,
+ *                               sizeof(struct ftrace_raw_<call>),
+ *                               irq_flags, pc);
+ *     if (!event)
+ *             return;
+ *     entry   = ring_buffer_event_data(event);
+ *
+ *     <assign>;  <-- Here we assign the entries by the __field and
+ *                     __array macros.
+ *
+ *     trace_current_buffer_unlock_commit(event, irq_flags, pc);
+ * }
+ *
+ * static int ftrace_raw_reg_event_<call>(void)
+ * {
+ *     int ret;
+ *
+ *     ret = register_trace_<call>(ftrace_raw_event_<call>);
+ *     if (!ret)
+ *             pr_info("event trace: Could not activate trace point "
+ *                     "probe to <call>");
+ *     return ret;
+ * }
+ *
+ * static void ftrace_unreg_event_<call>(void)
+ * {
+ *     unregister_trace_<call>(ftrace_raw_event_<call>);
+ * }
+ *
+ * static struct trace_event ftrace_event_type_<call> = {
+ *     .trace                  = ftrace_raw_output_<call>, <-- stage 2
+ * };
+ *
+ * static int ftrace_raw_init_event_<call>(void)
+ * {
+ *     int id;
+ *
+ *     id = register_ftrace_event(&ftrace_event_type_<call>);
+ *     if (!id)
+ *             return -ENODEV;
+ *     event_<call>.id = id;
+ *     return 0;
+ * }
+ *
+ * static struct ftrace_event_call __used
+ * __attribute__((__aligned__(4)))
+ * __attribute__((section("_ftrace_events"))) event_<call> = {
+ *     .name                   = "<call>",
+ *     .system                 = "<system>",
+ *     .raw_init               = ftrace_raw_init_event_<call>,
+ *     .regfunc                = ftrace_reg_event_<call>,
+ *     .unregfunc              = ftrace_unreg_event_<call>,
+ *     .show_format            = ftrace_format_<call>,
+ * }
+ *
+ */
+
+#undef TP_FMT
+#define TP_FMT(fmt, args...)   fmt "\n", ##args
+
+#ifdef CONFIG_EVENT_PROFILE
+#define _TRACE_PROFILE(call, proto, args)                              \
+static void ftrace_profile_##call(proto)                               \
+{                                                                      \
+       extern void perf_tpcounter_event(int);                          \
+       perf_tpcounter_event(event_##call.id);                          \
+}                                                                      \
+                                                                       \
+static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
+{                                                                      \
+       int ret = 0;                                                    \
+                                                                       \
+       if (!atomic_inc_return(&event_call->profile_count))             \
+               ret = register_trace_##call(ftrace_profile_##call);     \
+                                                                       \
+       return ret;                                                     \
+}                                                                      \
+                                                                       \
+static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
+{                                                                      \
+       if (atomic_add_negative(-1, &event_call->profile_count))        \
+               unregister_trace_##call(ftrace_profile_##call);         \
+}
+
+#define _TRACE_PROFILE_INIT(call)                                      \
+       .profile_count = ATOMIC_INIT(-1),                               \
+       .profile_enable = ftrace_profile_enable_##call,                 \
+       .profile_disable = ftrace_profile_disable_##call,
+
+#else
+#define _TRACE_PROFILE(call, proto, args)
+#define _TRACE_PROFILE_INIT(call)
+#endif
+
+#undef __entry
+#define __entry entry
+
+#undef __field
+#define __field(type, item)
+
+#undef __array
+#define __array(type, item, len)
+
+#undef __dynamic_array
+#define __dynamic_array(type, item, len)                               \
+       __entry->__data_loc_##item = __data_offsets.item;
+
+#undef __string
+#define __string(item, src) __dynamic_array(char, item, -1)            \
+
+#undef __assign_str
+#define __assign_str(dst, src)                                         \
+       strcpy(__get_str(dst), src);
+
+#undef TRACE_EVENT
+#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
+_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))                      \
+                                                                       \
+static struct ftrace_event_call event_##call;                          \
+                                                                       \
+static void ftrace_raw_event_##call(proto)                             \
+{                                                                      \
+       struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+       struct ftrace_event_call *event_call = &event_##call;           \
+       struct ring_buffer_event *event;                                \
+       struct ftrace_raw_##call *entry;                                \
+       unsigned long irq_flags;                                        \
+       int __data_size;                                                \
+       int pc;                                                         \
+                                                                       \
+       local_save_flags(irq_flags);                                    \
+       pc = preempt_count();                                           \
+                                                                       \
+       __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
+                                                                       \
+       event = trace_current_buffer_lock_reserve(event_##call.id,      \
+                                sizeof(*entry) + __data_size,          \
+                                irq_flags, pc);                        \
+       if (!event)                                                     \
+               return;                                                 \
+       entry   = ring_buffer_event_data(event);                        \
+                                                                       \
+                                                                       \
+       tstruct                                                         \
+                                                                       \
+       { assign; }                                                     \
+                                                                       \
+       if (!filter_current_check_discard(event_call, entry, event))    \
+               trace_nowake_buffer_unlock_commit(event, irq_flags, pc); \
+}                                                                      \
+                                                                       \
+static int ftrace_raw_reg_event_##call(void)                           \
+{                                                                      \
+       int ret;                                                        \
+                                                                       \
+       ret = register_trace_##call(ftrace_raw_event_##call);           \
+       if (ret)                                                        \
+               pr_info("event trace: Could not activate trace point "  \
+                       "probe to " #call "\n");                        \
+       return ret;                                                     \
+}                                                                      \
+                                                                       \
+static void ftrace_raw_unreg_event_##call(void)                                \
+{                                                                      \
+       unregister_trace_##call(ftrace_raw_event_##call);               \
+}                                                                      \
+                                                                       \
+static struct trace_event ftrace_event_type_##call = {                 \
+       .trace                  = ftrace_raw_output_##call,             \
+};                                                                     \
+                                                                       \
+static int ftrace_raw_init_event_##call(void)                          \
+{                                                                      \
+       int id;                                                         \
+                                                                       \
+       id = register_ftrace_event(&ftrace_event_type_##call);          \
+       if (!id)                                                        \
+               return -ENODEV;                                         \
+       event_##call.id = id;                                           \
+       INIT_LIST_HEAD(&event_##call.fields);                           \
+       init_preds(&event_##call);                                      \
+       return 0;                                                       \
+}                                                                      \
+                                                                       \
+static struct ftrace_event_call __used                                 \
+__attribute__((__aligned__(4)))                                                \
+__attribute__((section("_ftrace_events"))) event_##call = {            \
+       .name                   = #call,                                \
+       .system                 = __stringify(TRACE_SYSTEM),            \
+       .event                  = &ftrace_event_type_##call,            \
+       .raw_init               = ftrace_raw_init_event_##call,         \
+       .regfunc                = ftrace_raw_reg_event_##call,          \
+       .unregfunc              = ftrace_raw_unreg_event_##call,        \
+       .show_format            = ftrace_format_##call,                 \
+       .define_fields          = ftrace_define_fields_##call,          \
+       _TRACE_PROFILE_INIT(call)                                       \
+}
+
+#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef _TRACE_PROFILE
+#undef _TRACE_PROFILE_INIT
+
diff --git a/include/trace/irq.h b/include/trace/irq.h
deleted file mode 100644 (file)
index ff5d449..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TRACE_IRQ_H
-#define _TRACE_IRQ_H
-
-#include <linux/interrupt.h>
-#include <linux/tracepoint.h>
-
-#include <trace/irq_event_types.h>
-
-#endif
diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h
deleted file mode 100644 (file)
index 85964eb..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-
-/* use <trace/irq.h> instead */
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM irq
-
-/*
- * Tracepoint for entry of interrupt handler:
- */
-TRACE_FORMAT(irq_handler_entry,
-       TP_PROTO(int irq, struct irqaction *action),
-       TP_ARGS(irq, action),
-       TP_FMT("irq=%d handler=%s", irq, action->name)
-       );
-
-/*
- * Tracepoint for return of an interrupt handler:
- */
-TRACE_EVENT(irq_handler_exit,
-
-       TP_PROTO(int irq, struct irqaction *action, int ret),
-
-       TP_ARGS(irq, action, ret),
-
-       TP_STRUCT__entry(
-               __field(        int,    irq     )
-               __field(        int,    ret     )
-       ),
-
-       TP_fast_assign(
-               __entry->irq    = irq;
-               __entry->ret    = ret;
-       ),
-
-       TP_printk("irq=%d return=%s",
-                 __entry->irq, __entry->ret ? "handled" : "unhandled")
-);
-
-TRACE_FORMAT(softirq_entry,
-       TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-       TP_ARGS(h, vec),
-       TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-       );
-
-TRACE_FORMAT(softirq_exit,
-       TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
-       TP_ARGS(h, vec),
-       TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
-       );
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/kmemtrace.h b/include/trace/kmemtrace.h
deleted file mode 100644 (file)
index 28ee69f..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <linux/tracepoint.h>
-#include <linux/types.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-DECLARE_TRACE(kmalloc,
-             TP_PROTO(unsigned long call_site,
-                     const void *ptr,
-                     size_t bytes_req,
-                     size_t bytes_alloc,
-                     gfp_t gfp_flags),
-             TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmem_cache_alloc,
-             TP_PROTO(unsigned long call_site,
-                     const void *ptr,
-                     size_t bytes_req,
-                     size_t bytes_alloc,
-                     gfp_t gfp_flags),
-             TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
-DECLARE_TRACE(kmalloc_node,
-             TP_PROTO(unsigned long call_site,
-                     const void *ptr,
-                     size_t bytes_req,
-                     size_t bytes_alloc,
-                     gfp_t gfp_flags,
-                     int node),
-             TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kmem_cache_alloc_node,
-             TP_PROTO(unsigned long call_site,
-                     const void *ptr,
-                     size_t bytes_req,
-                     size_t bytes_alloc,
-                     gfp_t gfp_flags,
-                     int node),
-             TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
-DECLARE_TRACE(kfree,
-             TP_PROTO(unsigned long call_site, const void *ptr),
-             TP_ARGS(call_site, ptr));
-DECLARE_TRACE(kmem_cache_free,
-             TP_PROTO(unsigned long call_site, const void *ptr),
-             TP_ARGS(call_site, ptr));
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
deleted file mode 100644 (file)
index 5ca67df..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TRACE_LOCKDEP_H
-#define _TRACE_LOCKDEP_H
-
-#include <linux/lockdep.h>
-#include <linux/tracepoint.h>
-
-#include <trace/lockdep_event_types.h>
-
-#endif
diff --git a/include/trace/lockdep_event_types.h b/include/trace/lockdep_event_types.h
deleted file mode 100644 (file)
index adccfcd..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-
-#ifndef TRACE_FORMAT
-# error Do not include this file directly.
-# error Unless you know what you are doing.
-#endif
-
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM lock
-
-#ifdef CONFIG_LOCKDEP
-
-TRACE_FORMAT(lock_acquire,
-       TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
-               int trylock, int read, int check,
-               struct lockdep_map *next_lock, unsigned long ip),
-       TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
-       TP_FMT("%s%s%s", trylock ? "try " : "",
-               read ? "read " : "", lock->name)
-       );
-
-TRACE_FORMAT(lock_release,
-       TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
-       TP_ARGS(lock, nested, ip),
-       TP_FMT("%s", lock->name)
-       );
-
-#ifdef CONFIG_LOCK_STAT
-
-TRACE_FORMAT(lock_contended,
-       TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-       TP_ARGS(lock, ip),
-       TP_FMT("%s", lock->name)
-       );
-
-TRACE_FORMAT(lock_acquired,
-       TP_PROTO(struct lockdep_map *lock, unsigned long ip),
-       TP_ARGS(lock, ip),
-       TP_FMT("%s", lock->name)
-       );
-
-#endif
-#endif
-
-#undef TRACE_SYSTEM
diff --git a/include/trace/sched.h b/include/trace/sched.h
deleted file mode 100644 (file)
index 4e372a1..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _TRACE_SCHED_H
-#define _TRACE_SCHED_H
-
-#include <linux/sched.h>
-#include <linux/tracepoint.h>
-
-#include <trace/sched_event_types.h>
-
-#endif
diff --git a/include/trace/skb.h b/include/trace/skb.h
deleted file mode 100644 (file)
index b66206d..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _TRACE_SKB_H_
-#define _TRACE_SKB_H_
-
-#include <linux/skbuff.h>
-#include <linux/tracepoint.h>
-
-DECLARE_TRACE(kfree_skb,
-       TP_PROTO(struct sk_buff *skb, void *location),
-       TP_ARGS(skb, location));
-
-#endif
diff --git a/include/trace/trace_event_types.h b/include/trace/trace_event_types.h
deleted file mode 100644 (file)
index df56f56..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-/* trace/<type>_event_types.h here */
-
-#include <trace/sched_event_types.h>
-#include <trace/irq_event_types.h>
-#include <trace/lockdep_event_types.h>
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
deleted file mode 100644 (file)
index fd13750..0000000
+++ /dev/null
@@ -1,5 +0,0 @@
-/* trace/<type>.h here */
-
-#include <trace/sched.h>
-#include <trace/irq.h>
-#include <trace/lockdep.h>
diff --git a/include/trace/workqueue.h b/include/trace/workqueue.h
deleted file mode 100644 (file)
index 7626523..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __TRACE_WORKQUEUE_H
-#define __TRACE_WORKQUEUE_H
-
-#include <linux/tracepoint.h>
-#include <linux/workqueue.h>
-#include <linux/sched.h>
-
-DECLARE_TRACE(workqueue_insertion,
-          TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-          TP_ARGS(wq_thread, work));
-
-DECLARE_TRACE(workqueue_execution,
-          TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
-          TP_ARGS(wq_thread, work));
-
-/* Trace the creation of one workqueue thread on a cpu */
-DECLARE_TRACE(workqueue_creation,
-          TP_PROTO(struct task_struct *wq_thread, int cpu),
-          TP_ARGS(wq_thread, cpu));
-
-DECLARE_TRACE(workqueue_destruction,
-          TP_PROTO(struct task_struct *wq_thread),
-          TP_ARGS(wq_thread));
-
-#endif /* __TRACE_WORKQUEUE_H */
diff --git a/include/xen/Kbuild b/include/xen/Kbuild
new file mode 100644 (file)
index 0000000..4e65c16
--- /dev/null
@@ -0,0 +1 @@
+header-y += evtchn.h
index 0d5f1ad..e68d59a 100644 (file)
@@ -53,4 +53,7 @@ bool xen_test_irq_pending(int irq);
    irq will be disabled so it won't deliver an interrupt. */
 void xen_poll_irq(int irq);
 
+/* Determine the IRQ which is bound to an event channel */
+unsigned irq_from_evtchn(unsigned int evtchn);
+
 #endif /* _XEN_EVENTS_H */
diff --git a/include/xen/evtchn.h b/include/xen/evtchn.h
new file mode 100644 (file)
index 0000000..14e833e
--- /dev/null
@@ -0,0 +1,88 @@
+/******************************************************************************
+ * evtchn.h
+ *
+ * Interface to /dev/xen/evtchn.
+ *
+ * Copyright (c) 2003-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_EVTCHN_H__
+#define __LINUX_PUBLIC_EVTCHN_H__
+
+/*
+ * Bind a fresh port to VIRQ @virq.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_VIRQ                         \
+       _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq))
+struct ioctl_evtchn_bind_virq {
+       unsigned int virq;
+};
+
+/*
+ * Bind a fresh port to remote <@remote_domain, @remote_port>.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_INTERDOMAIN                  \
+       _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain))
+struct ioctl_evtchn_bind_interdomain {
+       unsigned int remote_domain, remote_port;
+};
+
+/*
+ * Allocate a fresh port for binding to @remote_domain.
+ * Return allocated port.
+ */
+#define IOCTL_EVTCHN_BIND_UNBOUND_PORT                 \
+       _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port))
+struct ioctl_evtchn_bind_unbound_port {
+       unsigned int remote_domain;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_UNBIND                            \
+       _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind))
+struct ioctl_evtchn_unbind {
+       unsigned int port;
+};
+
+/*
+ * Unbind previously allocated @port.
+ */
+#define IOCTL_EVTCHN_NOTIFY                            \
+       _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify))
+struct ioctl_evtchn_notify {
+       unsigned int port;
+};
+
+/* Clear and reinitialise the event buffer. Clear error condition. */
+#define IOCTL_EVTCHN_RESET                             \
+       _IOC(_IOC_NONE, 'E', 5, 0)
+
+#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
index 453235e..e8b6519 100644 (file)
@@ -57,4 +57,7 @@ struct xen_feature_info {
 /* Declares the features reported by XENVER_get_features. */
 #include "features.h"
 
+/* arg == NULL; returns host memory page size. */
+#define XENVER_pagesize 7
+
 #endif /* __XEN_PUBLIC_VERSION_H__ */
index f87f961..b9763ba 100644 (file)
@@ -91,8 +91,7 @@ struct xenbus_driver {
        void (*otherend_changed)(struct xenbus_device *dev,
                                 enum xenbus_state backend_state);
        int (*remove)(struct xenbus_device *dev);
-       int (*suspend)(struct xenbus_device *dev);
-       int (*suspend_cancel)(struct xenbus_device *dev);
+       int (*suspend)(struct xenbus_device *dev, pm_message_t state);
        int (*resume)(struct xenbus_device *dev);
        int (*uevent)(struct xenbus_device *, char **, int, char *, int);
        struct device_driver driver;
index 7be4d38..d4e9671 100644 (file)
@@ -308,7 +308,7 @@ menu "RCU Subsystem"
 
 choice
        prompt "RCU Implementation"
-       default CLASSIC_RCU
+       default TREE_RCU
 
 config CLASSIC_RCU
        bool "Classic RCU"
index d721dad..bb7dc57 100644 (file)
@@ -64,6 +64,7 @@
 #include <linux/idr.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
+#include <linux/kmemtrace.h>
 #include <trace/boot.h>
 
 #include <asm/io.h>
@@ -71,7 +72,6 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
-#include <trace/kmemtrace.h>
 
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/smp.h>
index 16a2189..87c2b64 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1290,8 +1290,8 @@ void exit_sem(struct task_struct *tsk)
                int i;
 
                rcu_read_lock();
-               un = list_entry(rcu_dereference(ulp->list_proc.next),
-                                       struct sem_undo, list_proc);
+               un = list_entry_rcu(ulp->list_proc.next,
+                                   struct sem_undo, list_proc);
                if (&un->list_proc == &ulp->list_proc)
                        semid = -1;
                 else
index faa46da..4259716 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -969,10 +969,13 @@ SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 {
        struct mm_struct *mm = current->mm;
-       struct vm_area_struct *vma, *next;
+       struct vm_area_struct *vma;
        unsigned long addr = (unsigned long)shmaddr;
-       loff_t size = 0;
        int retval = -EINVAL;
+#ifdef CONFIG_MMU
+       loff_t size = 0;
+       struct vm_area_struct *next;
+#endif
 
        if (addr & ~PAGE_MASK)
                return retval;
index 4242366..a35eee3 100644 (file)
@@ -93,6 +93,7 @@ obj-$(CONFIG_LATENCYTOP) += latencytop.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace/
 obj-$(CONFIG_TRACING) += trace/
+obj-$(CONFIG_X86_DS) += trace/
 obj-$(CONFIG_SMP) += sched_cpupri.o
 obj-$(CONFIG_SLOW_WORK) += slow-work.o
 
index 5054030..27235f5 100644 (file)
@@ -92,23 +92,18 @@ extern int initcall_debug;
 static async_cookie_t  __lowest_in_progress(struct list_head *running)
 {
        struct async_entry *entry;
-       async_cookie_t ret = next_cookie; /* begin with "infinity" value */
 
        if (!list_empty(running)) {
                entry = list_first_entry(running,
                        struct async_entry, list);
-               ret = entry->cookie;
+               return entry->cookie;
        }
 
-       if (!list_empty(&async_pending)) {
-               list_for_each_entry(entry, &async_pending, list)
-                       if (entry->running == running) {
-                               ret = entry->cookie;
-                               break;
-                       }
-       }
+       list_for_each_entry(entry, &async_pending, list)
+               if (entry->running == running)
+                       return entry->cookie;
 
-       return ret;
+       return next_cookie;     /* "infinity" value */
 }
 
 static async_cookie_t  lowest_in_progress(struct list_head *running)
index 42d5654..f6c204f 100644 (file)
@@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigset_t __user *uthese,
 
 }
 
+asmlinkage long
+compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
+                            struct compat_siginfo __user *uinfo)
+{
+       siginfo_t info;
+
+       if (copy_siginfo_from_user32(&info, uinfo))
+               return -EFAULT;
+       return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 #ifdef __ARCH_WANT_COMPAT_SYS_TIME
 
 /* compat_time_t is a 32 bit "long" and needs to get converted. */
index abf9cf3..cab535c 100644 (file)
@@ -48,7 +48,7 @@
 #include <linux/tracehook.h>
 #include <linux/fs_struct.h>
 #include <linux/init_task.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 #include <asm/mmu_context.h>
 #include "cred-internals.h"
 
-DEFINE_TRACE(sched_process_free);
-DEFINE_TRACE(sched_process_exit);
-DEFINE_TRACE(sched_process_wait);
-
 static void exit_mm(struct task_struct * tsk);
 
 static void __unhash_process(struct task_struct *p)
index b9e2edd..bb762b4 100644 (file)
@@ -61,7 +61,6 @@
 #include <linux/proc_fs.h>
 #include <linux/blkdev.h>
 #include <linux/fs_struct.h>
-#include <trace/sched.h>
 #include <linux/magic.h>
 
 #include <asm/pgtable.h>
@@ -71,6 +70,8 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/sched.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
@@ -83,8 +84,6 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
-DEFINE_TRACE(sched_process_fork);
-
 int nr_processes(void)
 {
        int cpu;
@@ -982,6 +981,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (!p)
                goto fork_out;
 
+       ftrace_graph_init_task(p);
+
        rt_mutex_init_task(p);
 
 #ifdef CONFIG_PROVE_LOCKING
@@ -1089,8 +1090,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
 #endif
-       if (unlikely(current->ptrace))
-               ptrace_fork(p, clone_flags);
+
+       p->bts = NULL;
 
        /* Perform scheduler related setup. Assign this task to a CPU. */
        sched_fork(p, clone_flags);
@@ -1131,8 +1132,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                }
        }
 
-       ftrace_graph_init_task(p);
-
        p->pid = pid_nr(pid);
        p->tgid = p->pid;
        if (clone_flags & CLONE_THREAD)
@@ -1141,7 +1140,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        if (current->nsproxy != p->nsproxy) {
                retval = ns_cgroup_clone(p, pid);
                if (retval)
-                       goto bad_fork_free_graph;
+                       goto bad_fork_free_pid;
        }
 
        p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
@@ -1233,7 +1232,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                spin_unlock(&current->sighand->siglock);
                write_unlock_irq(&tasklist_lock);
                retval = -ERESTARTNOINTR;
-               goto bad_fork_free_graph;
+               goto bad_fork_free_pid;
        }
 
        if (clone_flags & CLONE_THREAD) {
@@ -1268,8 +1267,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        cgroup_post_fork(p);
        return p;
 
-bad_fork_free_graph:
-       ftrace_graph_exit_task(p);
 bad_fork_free_pid:
        if (pid != &init_struct_pid)
                free_pid(pid);
@@ -1409,7 +1406,7 @@ long do_fork(unsigned long clone_flags,
                }
 
                audit_finish_fork(p);
-               tracehook_report_clone(trace, regs, clone_flags, nr, p);
+               tracehook_report_clone(regs, clone_flags, nr, p);
 
                /*
                 * We set PF_STARTING at creation in case tracing wants to
index d546b2d..80b5ce7 100644 (file)
  *  PRIVATE futexes by Eric Dumazet
  *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
  *
+ *  Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
+ *  Copyright (C) IBM Corporation, 2009
+ *  Thanks to Thomas Gleixner for conceptual design and careful reviews.
+ *
  *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
  *  enough at me, Linus for the original (flawed) idea, Matthew
  *  Kirkwood for proof-of-concept implementation.
@@ -96,8 +100,8 @@ struct futex_pi_state {
  */
 struct futex_q {
        struct plist_node list;
-       /* There can only be a single waiter */
-       wait_queue_head_t waiter;
+       /* Waiter reference */
+       struct task_struct *task;
 
        /* Which hash list lock to use: */
        spinlock_t *lock_ptr;
@@ -107,7 +111,9 @@ struct futex_q {
 
        /* Optional priority inheritance state: */
        struct futex_pi_state *pi_state;
-       struct task_struct *task;
+
+       /* rt_waiter storage for requeue_pi: */
+       struct rt_mutex_waiter *rt_waiter;
 
        /* Bitset for the optional bitmasked wakeup */
        u32 bitset;
@@ -278,6 +284,25 @@ void put_futex_key(int fshared, union futex_key *key)
        drop_futex_key_refs(key);
 }
 
+/**
+ * futex_top_waiter() - Return the highest priority waiter on a futex
+ * @hb:     the hash bucket the futex_q's reside in
+ * @key:    the futex key (to distinguish it from other futex futex_q's)
+ *
+ * Must be called with the hb lock held.
+ */
+static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
+                                       union futex_key *key)
+{
+       struct futex_q *this;
+
+       plist_for_each_entry(this, &hb->chain, list) {
+               if (match_futex(&this->key, key))
+                       return this;
+       }
+       return NULL;
+}
+
 static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
 {
        u32 curval;
@@ -539,28 +564,160 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
        return 0;
 }
 
+/**
+ * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex
+ * @uaddr:             the pi futex user address
+ * @hb:                        the pi futex hash bucket
+ * @key:               the futex key associated with uaddr and hb
+ * @ps:                        the pi_state pointer where we store the result of the
+ *                     lookup
+ * @task:              the task to perform the atomic lock work for.  This will
+ *                     be "current" except in the case of requeue pi.
+ * @set_waiters:       force setting the FUTEX_WAITERS bit (1) or not (0)
+ *
+ * Returns:
+ *  0 - ready to wait
+ *  1 - acquired the lock
+ * <0 - error
+ *
+ * The hb->lock and futex_key refs shall be held by the caller.
+ */
+static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
+                               union futex_key *key,
+                               struct futex_pi_state **ps,
+                               struct task_struct *task, int set_waiters)
+{
+       int lock_taken, ret, ownerdied = 0;
+       u32 uval, newval, curval;
+
+retry:
+       ret = lock_taken = 0;
+
+       /*
+        * To avoid races, we attempt to take the lock here again
+        * (by doing a 0 -> TID atomic cmpxchg), while holding all
+        * the locks. It will most likely not succeed.
+        */
+       newval = task_pid_vnr(task);
+       if (set_waiters)
+               newval |= FUTEX_WAITERS;
+
+       curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
+
+       if (unlikely(curval == -EFAULT))
+               return -EFAULT;
+
+       /*
+        * Detect deadlocks.
+        */
+       if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
+               return -EDEADLK;
+
+       /*
+        * Surprise - we got the lock. Just return to userspace:
+        */
+       if (unlikely(!curval))
+               return 1;
+
+       uval = curval;
+
+       /*
+        * Set the FUTEX_WAITERS flag, so the owner will know it has someone
+        * to wake at the next unlock.
+        */
+       newval = curval | FUTEX_WAITERS;
+
+       /*
+        * There are two cases, where a futex might have no owner (the
+        * owner TID is 0): OWNER_DIED. We take over the futex in this
+        * case. We also do an unconditional take over, when the owner
+        * of the futex died.
+        *
+        * This is safe as we are protected by the hash bucket lock !
+        */
+       if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
+               /* Keep the OWNER_DIED bit */
+               newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
+               ownerdied = 0;
+               lock_taken = 1;
+       }
+
+       curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+
+       if (unlikely(curval == -EFAULT))
+               return -EFAULT;
+       if (unlikely(curval != uval))
+               goto retry;
+
+       /*
+        * We took the lock due to owner died take over.
+        */
+       if (unlikely(lock_taken))
+               return 1;
+
+       /*
+        * We dont have the lock. Look up the PI state (or create it if
+        * we are the first waiter):
+        */
+       ret = lookup_pi_state(uval, hb, key, ps);
+
+       if (unlikely(ret)) {
+               switch (ret) {
+               case -ESRCH:
+                       /*
+                        * No owner found for this futex. Check if the
+                        * OWNER_DIED bit is set to figure out whether
+                        * this is a robust futex or not.
+                        */
+                       if (get_futex_value_locked(&curval, uaddr))
+                               return -EFAULT;
+
+                       /*
+                        * We simply start over in case of a robust
+                        * futex. The code above will take the futex
+                        * and return happy.
+                        */
+                       if (curval & FUTEX_OWNER_DIED) {
+                               ownerdied = 1;
+                               goto retry;
+                       }
+               default:
+                       break;
+               }
+       }
+
+       return ret;
+}
+
 /*
  * The hash bucket lock must be held when this is called.
  * Afterwards, the futex_q must not be accessed.
  */
 static void wake_futex(struct futex_q *q)
 {
-       plist_del(&q->list, &q->list.plist);
+       struct task_struct *p = q->task;
+
        /*
-        * The lock in wake_up_all() is a crucial memory barrier after the
-        * plist_del() and also before assigning to q->lock_ptr.
+        * We set q->lock_ptr = NULL _before_ we wake up the task. If
+        * a non futex wake up happens on another CPU then the task
+        * might exit and p would dereference a non existing task
+        * struct. Prevent this by holding a reference on p across the
+        * wake up.
         */
-       wake_up(&q->waiter);
+       get_task_struct(p);
+
+       plist_del(&q->list, &q->list.plist);
        /*
-        * The waiting task can free the futex_q as soon as this is written,
-        * without taking any locks.  This must come last.
-        *
-        * A memory barrier is required here to prevent the following store to
-        * lock_ptr from getting ahead of the wakeup. Clearing the lock at the
-        * end of wake_up() does not prevent this store from moving.
+        * The waiting task can free the futex_q as soon as
+        * q->lock_ptr = NULL is written, without taking any locks. A
+        * memory barrier is required here to prevent the following
+        * store to lock_ptr from getting ahead of the plist_del.
         */
        smp_wmb();
        q->lock_ptr = NULL;
+
+       wake_up_state(p, TASK_NORMAL);
+       put_task_struct(p);
 }
 
 static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
@@ -689,7 +846,7 @@ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
 
        plist_for_each_entry_safe(this, next, head, list) {
                if (match_futex (&this->key, &key)) {
-                       if (this->pi_state) {
+                       if (this->pi_state || this->rt_waiter) {
                                ret = -EINVAL;
                                break;
                        }
@@ -802,24 +959,185 @@ out:
        return ret;
 }
 
-/*
- * Requeue all waiters hashed on one physical page to another
- * physical page.
+/**
+ * requeue_futex() - Requeue a futex_q from one hb to another
+ * @q:         the futex_q to requeue
+ * @hb1:       the source hash_bucket
+ * @hb2:       the target hash_bucket
+ * @key2:      the new key for the requeued futex_q
+ */
+static inline
+void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
+                  struct futex_hash_bucket *hb2, union futex_key *key2)
+{
+
+       /*
+        * If key1 and key2 hash to the same bucket, no need to
+        * requeue.
+        */
+       if (likely(&hb1->chain != &hb2->chain)) {
+               plist_del(&q->list, &hb1->chain);
+               plist_add(&q->list, &hb2->chain);
+               q->lock_ptr = &hb2->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+               q->list.plist.lock = &hb2->lock;
+#endif
+       }
+       get_futex_key_refs(key2);
+       q->key = *key2;
+}
+
+/**
+ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
+ * q:  the futex_q
+ * key:        the key of the requeue target futex
+ *
+ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
+ * target futex if it is uncontended or via a lock steal.  Set the futex_q key
+ * to the requeue target futex so the waiter can detect the wakeup on the right
+ * futex, but remove it from the hb and NULL the rt_waiter so it can detect
+ * atomic lock acquisition.  Must be called with the q->lock_ptr held.
+ */
+static inline
+void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
+{
+       drop_futex_key_refs(&q->key);
+       get_futex_key_refs(key);
+       q->key = *key;
+
+       WARN_ON(plist_node_empty(&q->list));
+       plist_del(&q->list, &q->list.plist);
+
+       WARN_ON(!q->rt_waiter);
+       q->rt_waiter = NULL;
+
+       wake_up_state(q->task, TASK_NORMAL);
+}
+
+/**
+ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
+ * @pifutex:           the user address of the to futex
+ * @hb1:               the from futex hash bucket, must be locked by the caller
+ * @hb2:               the to futex hash bucket, must be locked by the caller
+ * @key1:              the from futex key
+ * @key2:              the to futex key
+ * @ps:                        address to store the pi_state pointer
+ * @set_waiters:       force setting the FUTEX_WAITERS bit (1) or not (0)
+ *
+ * Try and get the lock on behalf of the top waiter if we can do it atomically.
+ * Wake the top waiter if we succeed.  If the caller specified set_waiters,
+ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
+ * hb1 and hb2 must be held by the caller.
+ *
+ * Returns:
+ *  0 - failed to acquire the lock atomicly
+ *  1 - acquired the lock
+ * <0 - error
+ */
+static int futex_proxy_trylock_atomic(u32 __user *pifutex,
+                                struct futex_hash_bucket *hb1,
+                                struct futex_hash_bucket *hb2,
+                                union futex_key *key1, union futex_key *key2,
+                                struct futex_pi_state **ps, int set_waiters)
+{
+       struct futex_q *top_waiter = NULL;
+       u32 curval;
+       int ret;
+
+       if (get_futex_value_locked(&curval, pifutex))
+               return -EFAULT;
+
+       /*
+        * Find the top_waiter and determine if there are additional waiters.
+        * If the caller intends to requeue more than 1 waiter to pifutex,
+        * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
+        * as we have means to handle the possible fault.  If not, don't set
+        * the bit unecessarily as it will force the subsequent unlock to enter
+        * the kernel.
+        */
+       top_waiter = futex_top_waiter(hb1, key1);
+
+       /* There are no waiters, nothing for us to do. */
+       if (!top_waiter)
+               return 0;
+
+       /*
+        * Try to take the lock for top_waiter.  Set the FUTEX_WAITERS bit in
+        * the contended case or if set_waiters is 1.  The pi_state is returned
+        * in ps in contended cases.
+        */
+       ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
+                                  set_waiters);
+       if (ret == 1)
+               requeue_pi_wake_futex(top_waiter, key2);
+
+       return ret;
+}
+
+/**
+ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
+ * uaddr1:     source futex user address
+ * uaddr2:     target futex user address
+ * nr_wake:    number of waiters to wake (must be 1 for requeue_pi)
+ * nr_requeue: number of waiters to requeue (0-INT_MAX)
+ * requeue_pi: if we are attempting to requeue from a non-pi futex to a
+ *             pi futex (pi to pi requeue is not supported)
+ *
+ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
+ * uaddr2 atomically on behalf of the top waiter.
+ *
+ * Returns:
+ * >=0 - on success, the number of tasks requeued or woken
+ *  <0 - on error
  */
 static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
-                        int nr_wake, int nr_requeue, u32 *cmpval)
+                        int nr_wake, int nr_requeue, u32 *cmpval,
+                        int requeue_pi)
 {
        union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
+       int drop_count = 0, task_count = 0, ret;
+       struct futex_pi_state *pi_state = NULL;
        struct futex_hash_bucket *hb1, *hb2;
        struct plist_head *head1;
        struct futex_q *this, *next;
-       int ret, drop_count = 0;
+       u32 curval2;
+
+       if (requeue_pi) {
+               /*
+                * requeue_pi requires a pi_state, try to allocate it now
+                * without any locks in case it fails.
+                */
+               if (refill_pi_state_cache())
+                       return -ENOMEM;
+               /*
+                * requeue_pi must wake as many tasks as it can, up to nr_wake
+                * + nr_requeue, since it acquires the rt_mutex prior to
+                * returning to userspace, so as to not leave the rt_mutex with
+                * waiters and no owner.  However, second and third wake-ups
+                * cannot be predicted as they involve race conditions with the
+                * first wake and a fault while looking up the pi_state.  Both
+                * pthread_cond_signal() and pthread_cond_broadcast() should
+                * use nr_wake=1.
+                */
+               if (nr_wake != 1)
+                       return -EINVAL;
+       }
 
 retry:
+       if (pi_state != NULL) {
+               /*
+                * We will have to lookup the pi_state again, so free this one
+                * to keep the accounting correct.
+                */
+               free_pi_state(pi_state);
+               pi_state = NULL;
+       }
+
        ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
        if (unlikely(ret != 0))
                goto out;
-       ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_READ);
+       ret = get_futex_key(uaddr2, fshared, &key2,
+                           requeue_pi ? VERIFY_WRITE : VERIFY_READ);
        if (unlikely(ret != 0))
                goto out_put_key1;
 
@@ -854,32 +1172,99 @@ retry_private:
                }
        }
 
+       if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
+               /*
+                * Attempt to acquire uaddr2 and wake the top waiter. If we
+                * intend to requeue waiters, force setting the FUTEX_WAITERS
+                * bit.  We force this here where we are able to easily handle
+                * faults rather in the requeue loop below.
+                */
+               ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
+                                                &key2, &pi_state, nr_requeue);
+
+               /*
+                * At this point the top_waiter has either taken uaddr2 or is
+                * waiting on it.  If the former, then the pi_state will not
+                * exist yet, look it up one more time to ensure we have a
+                * reference to it.
+                */
+               if (ret == 1) {
+                       WARN_ON(pi_state);
+                       task_count++;
+                       ret = get_futex_value_locked(&curval2, uaddr2);
+                       if (!ret)
+                               ret = lookup_pi_state(curval2, hb2, &key2,
+                                                     &pi_state);
+               }
+
+               switch (ret) {
+               case 0:
+                       break;
+               case -EFAULT:
+                       double_unlock_hb(hb1, hb2);
+                       put_futex_key(fshared, &key2);
+                       put_futex_key(fshared, &key1);
+                       ret = get_user(curval2, uaddr2);
+                       if (!ret)
+                               goto retry;
+                       goto out;
+               case -EAGAIN:
+                       /* The owner was exiting, try again. */
+                       double_unlock_hb(hb1, hb2);
+                       put_futex_key(fshared, &key2);
+                       put_futex_key(fshared, &key1);
+                       cond_resched();
+                       goto retry;
+               default:
+                       goto out_unlock;
+               }
+       }
+
        head1 = &hb1->chain;
        plist_for_each_entry_safe(this, next, head1, list) {
-               if (!match_futex (&this->key, &key1))
+               if (task_count - nr_wake >= nr_requeue)
+                       break;
+
+               if (!match_futex(&this->key, &key1))
                        continue;
-               if (++ret <= nr_wake) {
+
+               WARN_ON(!requeue_pi && this->rt_waiter);
+               WARN_ON(requeue_pi && !this->rt_waiter);
+
+               /*
+                * Wake nr_wake waiters.  For requeue_pi, if we acquired the
+                * lock, we already woke the top_waiter.  If not, it will be
+                * woken by futex_unlock_pi().
+                */
+               if (++task_count <= nr_wake && !requeue_pi) {
                        wake_futex(this);
-               } else {
-                       /*
-                        * If key1 and key2 hash to the same bucket, no need to
-                        * requeue.
-                        */
-                       if (likely(head1 != &hb2->chain)) {
-                               plist_del(&this->list, &hb1->chain);
-                               plist_add(&this->list, &hb2->chain);
-                               this->lock_ptr = &hb2->lock;
-#ifdef CONFIG_DEBUG_PI_LIST
-                               this->list.plist.lock = &hb2->lock;
-#endif
-                       }
-                       this->key = key2;
-                       get_futex_key_refs(&key2);
-                       drop_count++;
+                       continue;
+               }
 
-                       if (ret - nr_wake >= nr_requeue)
-                               break;
+               /*
+                * Requeue nr_requeue waiters and possibly one more in the case
+                * of requeue_pi if we couldn't acquire the lock atomically.
+                */
+               if (requeue_pi) {
+                       /* Prepare the waiter to take the rt_mutex. */
+                       atomic_inc(&pi_state->refcount);
+                       this->pi_state = pi_state;
+                       ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
+                                                       this->rt_waiter,
+                                                       this->task, 1);
+                       if (ret == 1) {
+                               /* We got the lock. */
+                               requeue_pi_wake_futex(this, &key2);
+                               continue;
+                       } else if (ret) {
+                               /* -EDEADLK */
+                               this->pi_state = NULL;
+                               free_pi_state(pi_state);
+                               goto out_unlock;
+                       }
                }
+               requeue_futex(this, hb1, hb2, &key2);
+               drop_count++;
        }
 
 out_unlock:
@@ -899,7 +1284,9 @@ out_put_keys:
 out_put_key1:
        put_futex_key(fshared, &key1);
 out:
-       return ret;
+       if (pi_state != NULL)
+               free_pi_state(pi_state);
+       return ret ? ret : task_count;
 }
 
 /* The key must be already stored in q->key. */
@@ -907,8 +1294,6 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
 {
        struct futex_hash_bucket *hb;
 
-       init_waitqueue_head(&q->waiter);
-
        get_futex_key_refs(&q->key);
        hb = hash_futex(&q->key);
        q->lock_ptr = &hb->lock;
@@ -1119,39 +1504,153 @@ handle_fault:
  */
 #define FLAGS_SHARED           0x01
 #define FLAGS_CLOCKRT          0x02
+#define FLAGS_HAS_TIMEOUT      0x04
 
 static long futex_wait_restart(struct restart_block *restart);
 
-static int futex_wait(u32 __user *uaddr, int fshared,
-                     u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
+/**
+ * fixup_owner() - Post lock pi_state and corner case management
+ * @uaddr:     user address of the futex
+ * @fshared:   whether the futex is shared (1) or not (0)
+ * @q:         futex_q (contains pi_state and access to the rt_mutex)
+ * @locked:    if the attempt to take the rt_mutex succeeded (1) or not (0)
+ *
+ * After attempting to lock an rt_mutex, this function is called to cleanup
+ * the pi_state owner as well as handle race conditions that may allow us to
+ * acquire the lock. Must be called with the hb lock held.
+ *
+ * Returns:
+ *  1 - success, lock taken
+ *  0 - success, lock not taken
+ * <0 - on error (-EFAULT)
+ */
+static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
+                      int locked)
 {
-       struct task_struct *curr = current;
-       struct restart_block *restart;
-       DECLARE_WAITQUEUE(wait, curr);
-       struct futex_hash_bucket *hb;
-       struct futex_q q;
-       u32 uval;
-       int ret;
-       struct hrtimer_sleeper t;
-       int rem = 0;
-
-       if (!bitset)
-               return -EINVAL;
+       struct task_struct *owner;
+       int ret = 0;
 
-       q.pi_state = NULL;
-       q.bitset = bitset;
-retry:
-       q.key = FUTEX_KEY_INIT;
-       ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ);
-       if (unlikely(ret != 0))
+       if (locked) {
+               /*
+                * Got the lock. We might not be the anticipated owner if we
+                * did a lock-steal - fix up the PI-state in that case:
+                */
+               if (q->pi_state->owner != current)
+                       ret = fixup_pi_state_owner(uaddr, q, current, fshared);
                goto out;
+       }
 
-retry_private:
-       hb = queue_lock(&q);
+       /*
+        * Catch the rare case, where the lock was released when we were on the
+        * way back before we locked the hash bucket.
+        */
+       if (q->pi_state->owner == current) {
+               /*
+                * Try to get the rt_mutex now. This might fail as some other
+                * task acquired the rt_mutex after we removed ourself from the
+                * rt_mutex waiters list.
+                */
+               if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
+                       locked = 1;
+                       goto out;
+               }
+
+               /*
+                * pi_state is incorrect, some other task did a lock steal and
+                * we returned due to timeout or signal without taking the
+                * rt_mutex. Too late. We can access the rt_mutex_owner without
+                * locking, as the other task is now blocked on the hash bucket
+                * lock. Fix the state up.
+                */
+               owner = rt_mutex_owner(&q->pi_state->pi_mutex);
+               ret = fixup_pi_state_owner(uaddr, q, owner, fshared);
+               goto out;
+       }
 
        /*
-        * Access the page AFTER the hash-bucket is locked.
-        * Order is important:
+        * Paranoia check. If we did not take the lock, then we should not be
+        * the owner, nor the pending owner, of the rt_mutex.
+        */
+       if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
+               printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
+                               "pi-state %p\n", ret,
+                               q->pi_state->pi_mutex.owner,
+                               q->pi_state->owner);
+
+out:
+       return ret ? ret : locked;
+}
+
+/**
+ * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
+ * @hb:                the futex hash bucket, must be locked by the caller
+ * @q:         the futex_q to queue up on
+ * @timeout:   the prepared hrtimer_sleeper, or null for no timeout
+ */
+static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
+                               struct hrtimer_sleeper *timeout)
+{
+       queue_me(q, hb);
+
+       /*
+        * There might have been scheduling since the queue_me(), as we
+        * cannot hold a spinlock across the get_user() in case it
+        * faults, and we cannot just set TASK_INTERRUPTIBLE state when
+        * queueing ourselves into the futex hash. This code thus has to
+        * rely on the futex_wake() code removing us from hash when it
+        * wakes us up.
+        */
+       set_current_state(TASK_INTERRUPTIBLE);
+
+       /* Arm the timer */
+       if (timeout) {
+               hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
+               if (!hrtimer_active(&timeout->timer))
+                       timeout->task = NULL;
+       }
+
+       /*
+        * !plist_node_empty() is safe here without any lock.
+        * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
+        */
+       if (likely(!plist_node_empty(&q->list))) {
+               /*
+                * If the timer has already expired, current will already be
+                * flagged for rescheduling. Only call schedule if there
+                * is no timeout, or if it has yet to expire.
+                */
+               if (!timeout || timeout->task)
+                       schedule();
+       }
+       __set_current_state(TASK_RUNNING);
+}
+
+/**
+ * futex_wait_setup() - Prepare to wait on a futex
+ * @uaddr:     the futex userspace address
+ * @val:       the expected value
+ * @fshared:   whether the futex is shared (1) or not (0)
+ * @q:         the associated futex_q
+ * @hb:                storage for hash_bucket pointer to be returned to caller
+ *
+ * Setup the futex_q and locate the hash_bucket.  Get the futex value and
+ * compare it with the expected value.  Handle atomic faults internally.
+ * Return with the hb lock held and a q.key reference on success, and unlocked
+ * with no q.key reference on failure.
+ *
+ * Returns:
+ *  0 - uaddr contains val and hb has been locked
+ * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
+ */
+static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
+                          struct futex_q *q, struct futex_hash_bucket **hb)
+{
+       u32 uval;
+       int ret;
+
+       /*
+        * Access the page AFTER the hash-bucket is locked.
+        * Order is important:
         *
         *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
         *   Userspace waker:  if (cond(var)) { var = new; futex_wake(&var); }
@@ -1165,95 +1664,83 @@ retry_private:
         * A consequence is that futex_wait() can return zero and absorb
         * a wakeup when *uaddr != val on entry to the syscall.  This is
         * rare, but normal.
-        *
-        * For shared futexes, we hold the mmap semaphore, so the mapping
-        * cannot have changed since we looked it up in get_futex_key.
         */
+retry:
+       q->key = FUTEX_KEY_INIT;
+       ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ);
+       if (unlikely(ret != 0))
+               return ret;
+
+retry_private:
+       *hb = queue_lock(q);
+
        ret = get_futex_value_locked(&uval, uaddr);
 
-       if (unlikely(ret)) {
-               queue_unlock(&q, hb);
+       if (ret) {
+               queue_unlock(q, *hb);
 
                ret = get_user(uval, uaddr);
                if (ret)
-                       goto out_put_key;
+                       goto out;
 
                if (!fshared)
                        goto retry_private;
 
-               put_futex_key(fshared, &q.key);
+               put_futex_key(fshared, &q->key);
                goto retry;
        }
-       ret = -EWOULDBLOCK;
-       if (unlikely(uval != val)) {
-               queue_unlock(&q, hb);
-               goto out_put_key;
-       }
 
-       /* Only actually queue if *uaddr contained val.  */
-       queue_me(&q, hb);
+       if (uval != val) {
+               queue_unlock(q, *hb);
+               ret = -EWOULDBLOCK;
+       }
 
-       /*
-        * There might have been scheduling since the queue_me(), as we
-        * cannot hold a spinlock across the get_user() in case it
-        * faults, and we cannot just set TASK_INTERRUPTIBLE state when
-        * queueing ourselves into the futex hash.  This code thus has to
-        * rely on the futex_wake() code removing us from hash when it
-        * wakes us up.
-        */
+out:
+       if (ret)
+               put_futex_key(fshared, &q->key);
+       return ret;
+}
 
-       /* add_wait_queue is the barrier after __set_current_state. */
-       __set_current_state(TASK_INTERRUPTIBLE);
-       add_wait_queue(&q.waiter, &wait);
-       /*
-        * !plist_node_empty() is safe here without any lock.
-        * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
-        */
-       if (likely(!plist_node_empty(&q.list))) {
-               if (!abs_time)
-                       schedule();
-               else {
-                       hrtimer_init_on_stack(&t.timer,
-                                             clockrt ? CLOCK_REALTIME :
-                                             CLOCK_MONOTONIC,
-                                             HRTIMER_MODE_ABS);
-                       hrtimer_init_sleeper(&t, current);
-                       hrtimer_set_expires_range_ns(&t.timer, *abs_time,
-                                                    current->timer_slack_ns);
-
-                       hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
-                       if (!hrtimer_active(&t.timer))
-                               t.task = NULL;
+static int futex_wait(u32 __user *uaddr, int fshared,
+                     u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
+{
+       struct hrtimer_sleeper timeout, *to = NULL;
+       struct restart_block *restart;
+       struct futex_hash_bucket *hb;
+       struct futex_q q;
+       int ret;
 
-                       /*
-                        * the timer could have already expired, in which
-                        * case current would be flagged for rescheduling.
-                        * Don't bother calling schedule.
-                        */
-                       if (likely(t.task))
-                               schedule();
+       if (!bitset)
+               return -EINVAL;
 
-                       hrtimer_cancel(&t.timer);
+       q.pi_state = NULL;
+       q.bitset = bitset;
+       q.rt_waiter = NULL;
 
-                       /* Flag if a timeout occured */
-                       rem = (t.task == NULL);
+       if (abs_time) {
+               to = &timeout;
 
-                       destroy_hrtimer_on_stack(&t.timer);
-               }
+               hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
+                                     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+               hrtimer_init_sleeper(to, current);
+               hrtimer_set_expires_range_ns(&to->timer, *abs_time,
+                                            current->timer_slack_ns);
        }
-       __set_current_state(TASK_RUNNING);
 
-       /*
-        * NOTE: we don't remove ourselves from the waitqueue because
-        * we are the only user of it.
-        */
+       /* Prepare to wait on uaddr. */
+       ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+       if (ret)
+               goto out;
+
+       /* queue_me and wait for wakeup, timeout, or a signal. */
+       futex_wait_queue_me(hb, &q, to);
 
        /* If we were woken (and unqueued), we succeeded, whatever. */
        ret = 0;
        if (!unqueue_me(&q))
                goto out_put_key;
        ret = -ETIMEDOUT;
-       if (rem)
+       if (to && !to->task)
                goto out_put_key;
 
        /*
@@ -1270,7 +1757,7 @@ retry_private:
        restart->futex.val = val;
        restart->futex.time = abs_time->tv64;
        restart->futex.bitset = bitset;
-       restart->futex.flags = 0;
+       restart->futex.flags = FLAGS_HAS_TIMEOUT;
 
        if (fshared)
                restart->futex.flags |= FLAGS_SHARED;
@@ -1282,6 +1769,10 @@ retry_private:
 out_put_key:
        put_futex_key(fshared, &q.key);
 out:
+       if (to) {
+               hrtimer_cancel(&to->timer);
+               destroy_hrtimer_on_stack(&to->timer);
+       }
        return ret;
 }
 
@@ -1290,13 +1781,16 @@ static long futex_wait_restart(struct restart_block *restart)
 {
        u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
        int fshared = 0;
-       ktime_t t;
+       ktime_t t, *tp = NULL;
 
-       t.tv64 = restart->futex.time;
+       if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
+               t.tv64 = restart->futex.time;
+               tp = &t;
+       }
        restart->fn = do_no_restart_syscall;
        if (restart->futex.flags & FLAGS_SHARED)
                fshared = 1;
-       return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
+       return (long)futex_wait(uaddr, fshared, restart->futex.val, tp,
                                restart->futex.bitset,
                                restart->futex.flags & FLAGS_CLOCKRT);
 }
@@ -1312,11 +1806,10 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
                         int detect, ktime_t *time, int trylock)
 {
        struct hrtimer_sleeper timeout, *to = NULL;
-       struct task_struct *curr = current;
        struct futex_hash_bucket *hb;
-       u32 uval, newval, curval;
+       u32 uval;
        struct futex_q q;
-       int ret, lock_taken, ownerdied = 0;
+       int res, ret;
 
        if (refill_pi_state_cache())
                return -ENOMEM;
@@ -1330,6 +1823,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
        }
 
        q.pi_state = NULL;
+       q.rt_waiter = NULL;
 retry:
        q.key = FUTEX_KEY_INIT;
        ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
@@ -1339,81 +1833,15 @@ retry:
 retry_private:
        hb = queue_lock(&q);
 
-retry_locked:
-       ret = lock_taken = 0;
-
-       /*
-        * To avoid races, we attempt to take the lock here again
-        * (by doing a 0 -> TID atomic cmpxchg), while holding all
-        * the locks. It will most likely not succeed.
-        */
-       newval = task_pid_vnr(current);
-
-       curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
-
-       if (unlikely(curval == -EFAULT))
-               goto uaddr_faulted;
-
-       /*
-        * Detect deadlocks. In case of REQUEUE_PI this is a valid
-        * situation and we return success to user space.
-        */
-       if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
-               ret = -EDEADLK;
-               goto out_unlock_put_key;
-       }
-
-       /*
-        * Surprise - we got the lock. Just return to userspace:
-        */
-       if (unlikely(!curval))
-               goto out_unlock_put_key;
-
-       uval = curval;
-
-       /*
-        * Set the WAITERS flag, so the owner will know it has someone
-        * to wake at next unlock
-        */
-       newval = curval | FUTEX_WAITERS;
-
-       /*
-        * There are two cases, where a futex might have no owner (the
-        * owner TID is 0): OWNER_DIED. We take over the futex in this
-        * case. We also do an unconditional take over, when the owner
-        * of the futex died.
-        *
-        * This is safe as we are protected by the hash bucket lock !
-        */
-       if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
-               /* Keep the OWNER_DIED bit */
-               newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
-               ownerdied = 0;
-               lock_taken = 1;
-       }
-
-       curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
-
-       if (unlikely(curval == -EFAULT))
-               goto uaddr_faulted;
-       if (unlikely(curval != uval))
-               goto retry_locked;
-
-       /*
-        * We took the lock due to owner died take over.
-        */
-       if (unlikely(lock_taken))
-               goto out_unlock_put_key;
-
-       /*
-        * We dont have the lock. Look up the PI state (or create it if
-        * we are the first waiter):
-        */
-       ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
-
+       ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
        if (unlikely(ret)) {
                switch (ret) {
-
+               case 1:
+                       /* We got the lock. */
+                       ret = 0;
+                       goto out_unlock_put_key;
+               case -EFAULT:
+                       goto uaddr_faulted;
                case -EAGAIN:
                        /*
                         * Task is exiting and we just wait for the
@@ -1423,25 +1851,6 @@ retry_locked:
                        put_futex_key(fshared, &q.key);
                        cond_resched();
                        goto retry;
-
-               case -ESRCH:
-                       /*
-                        * No owner found for this futex. Check if the
-                        * OWNER_DIED bit is set to figure out whether
-                        * this is a robust futex or not.
-                        */
-                       if (get_futex_value_locked(&curval, uaddr))
-                               goto uaddr_faulted;
-
-                       /*
-                        * We simply start over in case of a robust
-                        * futex. The code above will take the futex
-                        * and return happy.
-                        */
-                       if (curval & FUTEX_OWNER_DIED) {
-                               ownerdied = 1;
-                               goto retry_locked;
-                       }
                default:
                        goto out_unlock_put_key;
                }
@@ -1465,71 +1874,21 @@ retry_locked:
        }
 
        spin_lock(q.lock_ptr);
-
-       if (!ret) {
-               /*
-                * Got the lock. We might not be the anticipated owner
-                * if we did a lock-steal - fix up the PI-state in
-                * that case:
-                */
-               if (q.pi_state->owner != curr)
-                       ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
-       } else {
-               /*
-                * Catch the rare case, where the lock was released
-                * when we were on the way back before we locked the
-                * hash bucket.
-                */
-               if (q.pi_state->owner == curr) {
-                       /*
-                        * Try to get the rt_mutex now. This might
-                        * fail as some other task acquired the
-                        * rt_mutex after we removed ourself from the
-                        * rt_mutex waiters list.
-                        */
-                       if (rt_mutex_trylock(&q.pi_state->pi_mutex))
-                               ret = 0;
-                       else {
-                               /*
-                                * pi_state is incorrect, some other
-                                * task did a lock steal and we
-                                * returned due to timeout or signal
-                                * without taking the rt_mutex. Too
-                                * late. We can access the
-                                * rt_mutex_owner without locking, as
-                                * the other task is now blocked on
-                                * the hash bucket lock. Fix the state
-                                * up.
-                                */
-                               struct task_struct *owner;
-                               int res;
-
-                               owner = rt_mutex_owner(&q.pi_state->pi_mutex);
-                               res = fixup_pi_state_owner(uaddr, &q, owner,
-                                                          fshared);
-
-                               /* propagate -EFAULT, if the fixup failed */
-                               if (res)
-                                       ret = res;
-                       }
-               } else {
-                       /*
-                        * Paranoia check. If we did not take the lock
-                        * in the trylock above, then we should not be
-                        * the owner of the rtmutex, neither the real
-                        * nor the pending one:
-                        */
-                       if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
-                               printk(KERN_ERR "futex_lock_pi: ret = %d "
-                                      "pi-mutex: %p pi-state %p\n", ret,
-                                      q.pi_state->pi_mutex.owner,
-                                      q.pi_state->owner);
-               }
-       }
+       /*
+        * Fixup the pi_state owner and possibly acquire the lock if we
+        * haven't already.
+        */
+       res = fixup_owner(uaddr, fshared, &q, !ret);
+       /*
+        * If fixup_owner() returned an error, proprogate that.  If it acquired
+        * the lock, clear our -ETIMEDOUT or -EINTR.
+        */
+       if (res)
+               ret = (res < 0) ? res : 0;
 
        /*
-        * If fixup_pi_state_owner() faulted and was unable to handle the
-        * fault, unlock it and return the fault to userspace.
+        * If fixup_owner() faulted and was unable to handle the fault, unlock
+        * it and return the fault to userspace.
         */
        if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
                rt_mutex_unlock(&q.pi_state->pi_mutex);
@@ -1537,9 +1896,7 @@ retry_locked:
        /* Unqueue and drop the lock */
        unqueue_me_pi(&q);
 
-       if (to)
-               destroy_hrtimer_on_stack(&to->timer);
-       return ret != -EINTR ? ret : -ERESTARTNOINTR;
+       goto out;
 
 out_unlock_put_key:
        queue_unlock(&q, hb);
@@ -1549,7 +1906,7 @@ out_put_key:
 out:
        if (to)
                destroy_hrtimer_on_stack(&to->timer);
-       return ret;
+       return ret != -EINTR ? ret : -ERESTARTNOINTR;
 
 uaddr_faulted:
        /*
@@ -1572,7 +1929,6 @@ uaddr_faulted:
        goto retry;
 }
 
-
 /*
  * Userspace attempted a TID -> 0 atomic transition, and failed.
  * This is the in-kernel slowpath: we look up the PI state (if any),
@@ -1674,6 +2030,229 @@ pi_faulted:
        return ret;
 }
 
+/**
+ * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
+ * @hb:                the hash_bucket futex_q was original enqueued on
+ * @q:         the futex_q woken while waiting to be requeued
+ * @key2:      the futex_key of the requeue target futex
+ * @timeout:   the timeout associated with the wait (NULL if none)
+ *
+ * Detect if the task was woken on the initial futex as opposed to the requeue
+ * target futex.  If so, determine if it was a timeout or a signal that caused
+ * the wakeup and return the appropriate error code to the caller.  Must be
+ * called with the hb lock held.
+ *
+ * Returns
+ *  0 - no early wakeup detected
+ * <0 - -ETIMEDOUT or -ERESTARTNOINTR
+ */
+static inline
+int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
+                                  struct futex_q *q, union futex_key *key2,
+                                  struct hrtimer_sleeper *timeout)
+{
+       int ret = 0;
+
+       /*
+        * With the hb lock held, we avoid races while we process the wakeup.
+        * We only need to hold hb (and not hb2) to ensure atomicity as the
+        * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
+        * It can't be requeued from uaddr2 to something else since we don't
+        * support a PI aware source futex for requeue.
+        */
+       if (!match_futex(&q->key, key2)) {
+               WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
+               /*
+                * We were woken prior to requeue by a timeout or a signal.
+                * Unqueue the futex_q and determine which it was.
+                */
+               plist_del(&q->list, &q->list.plist);
+               drop_futex_key_refs(&q->key);
+
+               if (timeout && !timeout->task)
+                       ret = -ETIMEDOUT;
+               else
+                       ret = -ERESTARTNOINTR;
+       }
+       return ret;
+}
+
+/**
+ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
+ * @uaddr:     the futex we initialyl wait on (non-pi)
+ * @fshared:   whether the futexes are shared (1) or not (0).  They must be
+ *             the same type, no requeueing from private to shared, etc.
+ * @val:       the expected value of uaddr
+ * @abs_time:  absolute timeout
+ * @bitset:    32 bit wakeup bitset set by userspace, defaults to all.
+ * @clockrt:   whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
+ * @uaddr2:    the pi futex we will take prior to returning to user-space
+ *
+ * The caller will wait on uaddr and will be requeued by futex_requeue() to
+ * uaddr2 which must be PI aware.  Normal wakeup will wake on uaddr2 and
+ * complete the acquisition of the rt_mutex prior to returning to userspace.
+ * This ensures the rt_mutex maintains an owner when it has waiters; without
+ * one, the pi logic wouldn't know which task to boost/deboost, if there was a
+ * need to.
+ *
+ * We call schedule in futex_wait_queue_me() when we enqueue and return there
+ * via the following:
+ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
+ * 2) wakeup on uaddr2 after a requeue and subsequent unlock
+ * 3) signal (before or after requeue)
+ * 4) timeout (before or after requeue)
+ *
+ * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function.
+ *
+ * If 2, we may then block on trying to take the rt_mutex and return via:
+ * 5) successful lock
+ * 6) signal
+ * 7) timeout
+ * 8) other lock acquisition failure
+ *
+ * If 6, we setup a restart_block with futex_lock_pi() as the function.
+ *
+ * If 4 or 7, we cleanup and return with -ETIMEDOUT.
+ *
+ * Returns:
+ *  0 - On success
+ * <0 - On error
+ */
+static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
+                                u32 val, ktime_t *abs_time, u32 bitset,
+                                int clockrt, u32 __user *uaddr2)
+{
+       struct hrtimer_sleeper timeout, *to = NULL;
+       struct rt_mutex_waiter rt_waiter;
+       struct rt_mutex *pi_mutex = NULL;
+       struct futex_hash_bucket *hb;
+       union futex_key key2;
+       struct futex_q q;
+       int res, ret;
+
+       if (!bitset)
+               return -EINVAL;
+
+       if (abs_time) {
+               to = &timeout;
+               hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
+                                     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+               hrtimer_init_sleeper(to, current);
+               hrtimer_set_expires_range_ns(&to->timer, *abs_time,
+                                            current->timer_slack_ns);
+       }
+
+       /*
+        * The waiter is allocated on our stack, manipulated by the requeue
+        * code while we sleep on uaddr.
+        */
+       debug_rt_mutex_init_waiter(&rt_waiter);
+       rt_waiter.task = NULL;
+
+       q.pi_state = NULL;
+       q.bitset = bitset;
+       q.rt_waiter = &rt_waiter;
+
+       key2 = FUTEX_KEY_INIT;
+       ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
+       if (unlikely(ret != 0))
+               goto out;
+
+       /* Prepare to wait on uaddr. */
+       ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
+       if (ret)
+               goto out_key2;
+
+       /* Queue the futex_q, drop the hb lock, wait for wakeup. */
+       futex_wait_queue_me(hb, &q, to);
+
+       spin_lock(&hb->lock);
+       ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
+       spin_unlock(&hb->lock);
+       if (ret)
+               goto out_put_keys;
+
+       /*
+        * In order for us to be here, we know our q.key == key2, and since
+        * we took the hb->lock above, we also know that futex_requeue() has
+        * completed and we no longer have to concern ourselves with a wakeup
+        * race with the atomic proxy lock acquition by the requeue code.
+        */
+
+       /* Check if the requeue code acquired the second futex for us. */
+       if (!q.rt_waiter) {
+               /*
+                * Got the lock. We might not be the anticipated owner if we
+                * did a lock-steal - fix up the PI-state in that case.
+                */
+               if (q.pi_state && (q.pi_state->owner != current)) {
+                       spin_lock(q.lock_ptr);
+                       ret = fixup_pi_state_owner(uaddr2, &q, current,
+                                                  fshared);
+                       spin_unlock(q.lock_ptr);
+               }
+       } else {
+               /*
+                * We have been woken up by futex_unlock_pi(), a timeout, or a
+                * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
+                * the pi_state.
+                */
+               WARN_ON(!&q.pi_state);
+               pi_mutex = &q.pi_state->pi_mutex;
+               ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
+               debug_rt_mutex_free_waiter(&rt_waiter);
+
+               spin_lock(q.lock_ptr);
+               /*
+                * Fixup the pi_state owner and possibly acquire the lock if we
+                * haven't already.
+                */
+               res = fixup_owner(uaddr2, fshared, &q, !ret);
+               /*
+                * If fixup_owner() returned an error, proprogate that.  If it
+                * acquired the lock, clear our -ETIMEDOUT or -EINTR.
+                */
+               if (res)
+                       ret = (res < 0) ? res : 0;
+
+               /* Unqueue and drop the lock. */
+               unqueue_me_pi(&q);
+       }
+
+       /*
+        * If fixup_pi_state_owner() faulted and was unable to handle the
+        * fault, unlock the rt_mutex and return the fault to userspace.
+        */
+       if (ret == -EFAULT) {
+               if (rt_mutex_owner(pi_mutex) == current)
+                       rt_mutex_unlock(pi_mutex);
+       } else if (ret == -EINTR) {
+               /*
+                * We've already been requeued, but we have no way to
+                * restart by calling futex_lock_pi() directly. We
+                * could restart the syscall, but that will look at
+                * the user space value and return right away. So we
+                * drop back with EWOULDBLOCK to tell user space that
+                * "val" has been changed. That's the same what the
+                * restart of the syscall would do in
+                * futex_wait_setup().
+                */
+               ret = -EWOULDBLOCK;
+       }
+
+out_put_keys:
+       put_futex_key(fshared, &q.key);
+out_key2:
+       put_futex_key(fshared, &key2);
+
+out:
+       if (to) {
+               hrtimer_cancel(&to->timer);
+               destroy_hrtimer_on_stack(&to->timer);
+       }
+       return ret;
+}
+
 /*
  * Support for robust futexes: the kernel cleans up held futexes at
  * thread exit time.
@@ -1896,7 +2475,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                fshared = 1;
 
        clockrt = op & FUTEX_CLOCK_REALTIME;
-       if (clockrt && cmd != FUTEX_WAIT_BITSET)
+       if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
                return -ENOSYS;
 
        switch (cmd) {
@@ -1911,10 +2490,11 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                ret = futex_wake(uaddr, fshared, val, val3);
                break;
        case FUTEX_REQUEUE:
-               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
+               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
                break;
        case FUTEX_CMP_REQUEUE:
-               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);
+               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+                                   0);
                break;
        case FUTEX_WAKE_OP:
                ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
@@ -1931,6 +2511,15 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
                if (futex_cmpxchg_enabled)
                        ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
                break;
+       case FUTEX_WAIT_REQUEUE_PI:
+               val3 = FUTEX_BITSET_MATCH_ANY;
+               ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
+                                           clockrt, uaddr2);
+               break;
+       case FUTEX_CMP_REQUEUE_PI:
+               ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
+                                   1);
+               break;
        default:
                ret = -ENOSYS;
        }
@@ -1948,7 +2537,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
        int cmd = op & FUTEX_CMD_MASK;
 
        if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
-                     cmd == FUTEX_WAIT_BITSET)) {
+                     cmd == FUTEX_WAIT_BITSET ||
+                     cmd == FUTEX_WAIT_REQUEUE_PI)) {
                if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
                        return -EFAULT;
                if (!timespec_valid(&ts))
@@ -1960,11 +2550,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
                tp = &t;
        }
        /*
-        * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
+        * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
         * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
         */
        if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
-           cmd == FUTEX_WAKE_OP)
+           cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
                val2 = (u32) (unsigned long) utime;
 
        return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
index 3394f8f..7d04780 100644 (file)
@@ -3,5 +3,5 @@ obj-y := handle.o manage.o spurious.o resend.o chip.o devres.o
 obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
-obj-$(CONFIG_NUMA_MIGRATE_IRQ_DESC) += numa_migrate.o
+obj-$(CONFIG_NUMA_IRQ_DESC) += numa_migrate.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
index c687ba4..13c68e7 100644 (file)
@@ -359,7 +359,6 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
 
        spin_lock(&desc->lock);
        mask_ack_irq(desc, irq);
-       desc = irq_remap_to_desc(irq, desc);
 
        if (unlikely(desc->status & IRQ_INPROGRESS))
                goto out_unlock;
@@ -438,7 +437,6 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
        desc->status &= ~IRQ_INPROGRESS;
 out:
        desc->chip->eoi(irq);
-       desc = irq_remap_to_desc(irq, desc);
 
        spin_unlock(&desc->lock);
 }
@@ -475,7 +473,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
                    !desc->action)) {
                desc->status |= (IRQ_PENDING | IRQ_MASKED);
                mask_ack_irq(desc, irq);
-               desc = irq_remap_to_desc(irq, desc);
                goto out_unlock;
        }
        kstat_incr_irqs_this_cpu(irq, desc);
@@ -483,7 +480,6 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
        /* Start handling the irq */
        if (desc->chip->ack)
                desc->chip->ack(irq);
-       desc = irq_remap_to_desc(irq, desc);
 
        /* Mark the IRQ currently in progress.*/
        desc->status |= IRQ_INPROGRESS;
@@ -544,10 +540,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
        if (!noirqdebug)
                note_interrupt(irq, desc, action_ret);
 
-       if (desc->chip->eoi) {
+       if (desc->chip->eoi)
                desc->chip->eoi(irq);
-               desc = irq_remap_to_desc(irq, desc);
-       }
 }
 
 void
@@ -582,10 +576,8 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
 
        /* Uninstall? */
        if (handle == handle_bad_irq) {
-               if (desc->chip != &no_irq_chip) {
+               if (desc->chip != &no_irq_chip)
                        mask_ack_irq(desc, irq);
-                       desc = irq_remap_to_desc(irq, desc);
-               }
                desc->status |= IRQ_DISABLED;
                desc->depth = 1;
        }
index 26e0875..a600184 100644 (file)
  */
 
 #include <linux/irq.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/rculist.h>
 #include <linux/hash.h>
-#include <trace/irq.h>
 #include <linux/bootmem.h>
+#include <trace/events/irq.h>
 
 #include "internals.h"
 
@@ -81,45 +82,48 @@ static struct irq_desc irq_desc_init = {
        .lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
 };
 
-void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr)
 {
-       int node;
        void *ptr;
 
-       node = cpu_to_node(cpu);
-       ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
+       if (slab_is_available())
+               ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs),
+                                  GFP_ATOMIC, node);
+       else
+               ptr = alloc_bootmem_node(NODE_DATA(node),
+                               nr * sizeof(*desc->kstat_irqs));
 
        /*
         * don't overwite if can not get new one
         * init_copy_kstat_irqs() could still use old one
         */
        if (ptr) {
-               printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n",
-                        cpu, node);
+               printk(KERN_DEBUG "  alloc kstat_irqs on node %d\n", node);
                desc->kstat_irqs = ptr;
        }
 }
 
-static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
+static void init_one_irq_desc(int irq, struct irq_desc *desc, int node)
 {
        memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
 
        spin_lock_init(&desc->lock);
        desc->irq = irq;
 #ifdef CONFIG_SMP
-       desc->cpu = cpu;
+       desc->node = node;
 #endif
        lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-       init_kstat_irqs(desc, cpu, nr_cpu_ids);
+       init_kstat_irqs(desc, node, nr_cpu_ids);
        if (!desc->kstat_irqs) {
                printk(KERN_ERR "can not alloc kstat_irqs\n");
                BUG_ON(1);
        }
-       if (!init_alloc_desc_masks(desc, cpu, false)) {
+       if (!alloc_desc_masks(desc, node, false)) {
                printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
                BUG_ON(1);
        }
-       arch_init_chip_data(desc, cpu);
+       init_desc_masks(desc);
+       arch_init_chip_data(desc, node);
 }
 
 /*
@@ -169,7 +173,8 @@ int __init early_irq_init(void)
                desc[i].irq = i;
                desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
                lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
-               init_alloc_desc_masks(&desc[i], 0, true);
+               alloc_desc_masks(&desc[i], 0, true);
+               init_desc_masks(&desc[i]);
                irq_desc_ptrs[i] = desc + i;
        }
 
@@ -187,11 +192,10 @@ struct irq_desc *irq_to_desc(unsigned int irq)
        return NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
 {
        struct irq_desc *desc;
        unsigned long flags;
-       int node;
 
        if (irq >= nr_irqs) {
                WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
@@ -210,15 +214,17 @@ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
        if (desc)
                goto out_unlock;
 
-       node = cpu_to_node(cpu);
-       desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
-       printk(KERN_DEBUG "  alloc irq_desc for %d on cpu %d node %d\n",
-                irq, cpu, node);
+       if (slab_is_available())
+               desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+       else
+               desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc));
+
+       printk(KERN_DEBUG "  alloc irq_desc for %d on node %d\n", irq, node);
        if (!desc) {
                printk(KERN_ERR "can not alloc irq_desc\n");
                BUG_ON(1);
        }
-       init_one_irq_desc(irq, desc, cpu);
+       init_one_irq_desc(irq, desc, node);
 
        irq_desc_ptrs[irq] = desc;
 
@@ -256,7 +262,8 @@ int __init early_irq_init(void)
 
        for (i = 0; i < count; i++) {
                desc[i].irq = i;
-               init_alloc_desc_masks(&desc[i], 0, true);
+               alloc_desc_masks(&desc[i], 0, true);
+               init_desc_masks(&desc[i]);
                desc[i].kstat_irqs = kstat_irqs_all[i];
        }
        return arch_early_irq_init();
@@ -267,7 +274,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
        return (irq < NR_IRQS) ? irq_desc + irq : NULL;
 }
 
-struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
 {
        return irq_to_desc(irq);
 }
@@ -348,9 +355,6 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
               "but no thread function available.", irq, action->name);
 }
 
-DEFINE_TRACE(irq_handler_entry);
-DEFINE_TRACE(irq_handler_exit);
-
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:       the interrupt number
@@ -453,11 +457,8 @@ unsigned int __do_IRQ(unsigned int irq)
                /*
                 * No locking required for CPU-local interrupts:
                 */
-               if (desc->chip->ack) {
+               if (desc->chip->ack)
                        desc->chip->ack(irq);
-                       /* get new one */
-                       desc = irq_remap_to_desc(irq, desc);
-               }
                if (likely(!(desc->status & IRQ_DISABLED))) {
                        action_ret = handle_IRQ_event(irq, desc->action);
                        if (!noirqdebug)
@@ -468,10 +469,8 @@ unsigned int __do_IRQ(unsigned int irq)
        }
 
        spin_lock(&desc->lock);
-       if (desc->chip->ack) {
+       if (desc->chip->ack)
                desc->chip->ack(irq);
-               desc = irq_remap_to_desc(irq, desc);
-       }
        /*
         * REPLAY is when Linux resends an IRQ that was dropped earlier
         * WAITING is used by probe to mark irqs that are being tested
index 01ce20e..7346825 100644 (file)
@@ -16,7 +16,7 @@ extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
 extern void __enable_irq(struct irq_desc *desc, unsigned int irq, bool resume);
 
 extern struct lock_class_key irq_desc_lock_class;
-extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
+extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
 extern void clear_kstat_irqs(struct irq_desc *desc);
 extern spinlock_t sparse_irq_lock;
 
@@ -42,6 +42,9 @@ static inline void unregister_handler_proc(unsigned int irq,
 
 extern int irq_select_affinity_usr(unsigned int irq);
 
+extern void
+irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask);
+
 /*
  * Debugging printout:
  */
index 2734eca..aaf5c9d 100644 (file)
@@ -80,7 +80,7 @@ int irq_can_set_affinity(unsigned int irq)
        return 1;
 }
 
-static void
+void
 irq_set_thread_affinity(struct irq_desc *desc, const struct cpumask *cpumask)
 {
        struct irqaction *action = desc->action;
@@ -109,17 +109,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
        spin_lock_irqsave(&desc->lock, flags);
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
-       if (desc->status & IRQ_MOVE_PCNTXT)
-               desc->chip->set_affinity(irq, cpumask);
+       if (desc->status & IRQ_MOVE_PCNTXT) {
+               if (!desc->chip->set_affinity(irq, cpumask)) {
+                       cpumask_copy(desc->affinity, cpumask);
+                       irq_set_thread_affinity(desc, cpumask);
+               }
+       }
        else {
                desc->status |= IRQ_MOVE_PENDING;
                cpumask_copy(desc->pending_mask, cpumask);
        }
 #else
-       cpumask_copy(desc->affinity, cpumask);
-       desc->chip->set_affinity(irq, cpumask);
+       if (!desc->chip->set_affinity(irq, cpumask)) {
+               cpumask_copy(desc->affinity, cpumask);
+               irq_set_thread_affinity(desc, cpumask);
+       }
 #endif
-       irq_set_thread_affinity(desc, cpumask);
        desc->status |= IRQ_AFFINITY_SET;
        spin_unlock_irqrestore(&desc->lock, flags);
        return 0;
index e05ad9b..cfe767c 100644 (file)
@@ -1,5 +1,8 @@
 
 #include <linux/irq.h>
+#include <linux/interrupt.h>
+
+#include "internals.h"
 
 void move_masked_irq(int irq)
 {
@@ -39,11 +42,12 @@ void move_masked_irq(int irq)
         * masking the irqs.
         */
        if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
-                  < nr_cpu_ids)) {
-               cpumask_and(desc->affinity,
-                           desc->pending_mask, cpu_online_mask);
-               desc->chip->set_affinity(irq, desc->affinity);
-       }
+                  < nr_cpu_ids))
+               if (!desc->chip->set_affinity(irq, desc->pending_mask)) {
+                       cpumask_copy(desc->affinity, desc->pending_mask);
+                       irq_set_thread_affinity(desc, desc->pending_mask);
+               }
+
        cpumask_clear(desc->pending_mask);
 }
 
index 44bbdcb..2f69bee 100644 (file)
@@ -15,9 +15,9 @@
 
 static void init_copy_kstat_irqs(struct irq_desc *old_desc,
                                 struct irq_desc *desc,
-                                int cpu, int nr)
+                                int node, int nr)
 {
-       init_kstat_irqs(desc, cpu, nr);
+       init_kstat_irqs(desc, node, nr);
 
        if (desc->kstat_irqs != old_desc->kstat_irqs)
                memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
@@ -34,20 +34,20 @@ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
 }
 
 static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
-                struct irq_desc *desc, int cpu)
+                struct irq_desc *desc, int node)
 {
        memcpy(desc, old_desc, sizeof(struct irq_desc));
-       if (!init_alloc_desc_masks(desc, cpu, false)) {
+       if (!alloc_desc_masks(desc, node, false)) {
                printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
                                "for migration.\n", irq);
                return false;
        }
        spin_lock_init(&desc->lock);
-       desc->cpu = cpu;
+       desc->node = node;
        lockdep_set_class(&desc->lock, &irq_desc_lock_class);
-       init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
+       init_copy_kstat_irqs(old_desc, desc, node, nr_cpu_ids);
        init_copy_desc_masks(old_desc, desc);
-       arch_init_copy_chip_data(old_desc, desc, cpu);
+       arch_init_copy_chip_data(old_desc, desc, node);
        return true;
 }
 
@@ -59,12 +59,11 @@ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
 }
 
 static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
-                                               int cpu)
+                                               int node)
 {
        struct irq_desc *desc;
        unsigned int irq;
        unsigned long flags;
-       int node;
 
        irq = old_desc->irq;
 
@@ -76,7 +75,6 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
        if (desc && old_desc != desc)
                goto out_unlock;
 
-       node = cpu_to_node(cpu);
        desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
        if (!desc) {
                printk(KERN_ERR "irq %d: can not get new irq_desc "
@@ -85,7 +83,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
                desc = old_desc;
                goto out_unlock;
        }
-       if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
+       if (!init_copy_one_irq_desc(irq, old_desc, desc, node)) {
                /* still use old one */
                kfree(desc);
                desc = old_desc;
@@ -97,9 +95,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc,
 
        /* free the old one */
        free_one_irq_desc(old_desc, desc);
-       spin_unlock(&old_desc->lock);
        kfree(old_desc);
-       spin_lock(&desc->lock);
 
        return desc;
 
@@ -109,24 +105,14 @@ out_unlock:
        return desc;
 }
 
-struct irq_desc *move_irq_desc(struct irq_desc *desc, int cpu)
+struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
 {
-       int old_cpu;
-       int node, old_node;
-
        /* those all static, do move them */
        if (desc->irq < NR_IRQS_LEGACY)
                return desc;
 
-       old_cpu = desc->cpu;
-       if (old_cpu != cpu) {
-               node = cpu_to_node(cpu);
-               old_node = cpu_to_node(old_cpu);
-               if (old_node != node)
-                       desc = __real_move_irq_desc(desc, cpu);
-               else
-                       desc->cpu = cpu;
-       }
+       if (desc->node != node)
+               desc = __real_move_irq_desc(desc, node);
 
        return desc;
 }
index 4ebaf85..41c88fe 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #define KTHREAD_NICE_LEVEL (-5)
 
@@ -21,9 +21,6 @@ static DEFINE_SPINLOCK(kthread_create_lock);
 static LIST_HEAD(kthread_create_list);
 struct task_struct *kthreadd_task;
 
-DEFINE_TRACE(sched_kthread_stop);
-DEFINE_TRACE(sched_kthread_stop_ret);
-
 struct kthread_create_info
 {
        /* Information passed to kthread() from kthreadd. */
index accb40c..8bbeef9 100644 (file)
 #include <linux/hash.h>
 #include <linux/ftrace.h>
 #include <linux/stringify.h>
-#include <trace/lockdep.h>
 
 #include <asm/sections.h>
 
 #include "lockdep_internals.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/lockdep.h>
+
 #ifdef CONFIG_PROVE_LOCKING
 int prove_locking = 1;
 module_param(prove_locking, int, 0644);
@@ -2935,8 +2937,6 @@ void lock_set_class(struct lockdep_map *lock, const char *name,
 }
 EXPORT_SYMBOL_GPL(lock_set_class);
 
-DEFINE_TRACE(lock_acquire);
-
 /*
  * We are not always called with irqs disabled - do that here,
  * and also avoid lockdep recursion:
@@ -2963,8 +2963,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 }
 EXPORT_SYMBOL_GPL(lock_acquire);
 
-DEFINE_TRACE(lock_release);
-
 void lock_release(struct lockdep_map *lock, int nested,
                          unsigned long ip)
 {
@@ -3105,6 +3103,8 @@ found_it:
                hlock->holdtime_stamp = now;
        }
 
+       trace_lock_acquired(lock, ip, waittime);
+
        stats = get_lock_stats(hlock_class(hlock));
        if (waittime) {
                if (hlock->read)
@@ -3120,8 +3120,6 @@ found_it:
        lock->ip = ip;
 }
 
-DEFINE_TRACE(lock_contended);
-
 void lock_contended(struct lockdep_map *lock, unsigned long ip)
 {
        unsigned long flags;
@@ -3143,14 +3141,10 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip)
 }
 EXPORT_SYMBOL_GPL(lock_contended);
 
-DEFINE_TRACE(lock_acquired);
-
 void lock_acquired(struct lockdep_map *lock, unsigned long ip)
 {
        unsigned long flags;
 
-       trace_lock_acquired(lock, ip);
-
        if (unlikely(!lock_stat))
                return;
 
index e797812..2383e60 100644 (file)
@@ -18,6 +18,7 @@
 */
 #include <linux/module.h>
 #include <linux/moduleloader.h>
+#include <linux/ftrace_event.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
 #include <linux/fs.h>
@@ -1489,9 +1490,6 @@ static void free_module(struct module *mod)
        /* Free any allocated parameters. */
        destroy_params(mod->kp, mod->num_kp);
 
-       /* release any pointers to mcount in this module */
-       ftrace_release(mod->module_core, mod->core_size);
-
        /* This may be NULL, but that's OK */
        module_free(mod, mod->module_init);
        kfree(mod->args);
@@ -1892,11 +1890,9 @@ static noinline struct module *load_module(void __user *umod,
        unsigned int symindex = 0;
        unsigned int strindex = 0;
        unsigned int modindex, versindex, infoindex, pcpuindex;
-       unsigned int num_mcount;
        struct module *mod;
        long err = 0;
        void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
-       unsigned long *mseg;
        mm_segment_t old_fs;
 
        DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2172,7 +2168,19 @@ static noinline struct module *load_module(void __user *umod,
                                        sizeof(*mod->tracepoints),
                                        &mod->num_tracepoints);
 #endif
-
+#ifdef CONFIG_EVENT_TRACING
+       mod->trace_events = section_objs(hdr, sechdrs, secstrings,
+                                        "_ftrace_events",
+                                        sizeof(*mod->trace_events),
+                                        &mod->num_trace_events);
+#endif
+#ifdef CONFIG_FTRACE_MCOUNT_RECORD
+       /* sechdrs[0].sh_size is always zero */
+       mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
+                                            "__mcount_loc",
+                                            sizeof(*mod->ftrace_callsites),
+                                            &mod->num_ftrace_callsites);
+#endif
 #ifdef CONFIG_MODVERSIONS
        if ((mod->num_syms && !mod->crcs)
            || (mod->num_gpl_syms && !mod->gpl_crcs)
@@ -2237,11 +2245,6 @@ static noinline struct module *load_module(void __user *umod,
                        dynamic_debug_setup(debug, num_debug);
        }
 
-       /* sechdrs[0].sh_size is always zero */
-       mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc",
-                           sizeof(*mseg), &num_mcount);
-       ftrace_init_module(mod, mseg, mseg + num_mcount);
-
        err = module_finalize(hdr, sechdrs, mod);
        if (err < 0)
                goto cleanup;
@@ -2302,7 +2305,6 @@ static noinline struct module *load_module(void __user *umod,
  cleanup:
        kobject_del(&mod->mkobj.kobj);
        kobject_put(&mod->mkobj.kobj);
-       ftrace_release(mod->module_core, mod->core_size);
  free_unload:
        module_unload_free(mod);
 #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
index 507cf2b..e5cc0cd 100644 (file)
@@ -249,7 +249,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 
                /* didnt get the lock, go to sleep: */
                spin_unlock_mutex(&lock->wait_lock, flags);
-               __schedule();
+               preempt_enable_no_resched();
+               schedule();
+               preempt_disable();
                spin_lock_mutex(&lock->wait_lock, flags);
        }
 
@@ -471,5 +473,28 @@ int __sched mutex_trylock(struct mutex *lock)
 
        return ret;
 }
-
 EXPORT_SYMBOL(mutex_trylock);
+
+/**
+ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
+ * @cnt: the atomic which we are to dec
+ * @lock: the mutex to return holding if we dec to 0
+ *
+ * return true and hold lock if we dec to 0, return false otherwise
+ */
+int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
+{
+       /* dec if we can't possibly hit 0 */
+       if (atomic_add_unless(cnt, -1, 1))
+               return 0;
+       /* we might hit 0, so take the lock */
+       mutex_lock(lock);
+       if (!atomic_dec_and_test(cnt)) {
+               /* when we actually did the dec, we didn't hit 0 */
+               mutex_unlock(lock);
+               return 0;
+       }
+       /* we hit 0, and we hold the lock */
+       return 1;
+}
+EXPORT_SYMBOL(atomic_dec_and_mutex_lock);
index 0692ab5..2442d14 100644 (file)
 #include <linux/uaccess.h>
 
 
-/*
- * Initialize a new task whose father had been ptraced.
- *
- * Called from copy_process().
- */
-void ptrace_fork(struct task_struct *child, unsigned long clone_flags)
-{
-       arch_ptrace_fork(child, clone_flags);
-}
-
 /*
  * ptrace a task: make the debugger its new parent and
  * move it to the ptrace list.
@@ -304,6 +294,8 @@ int ptrace_detach(struct task_struct *child, unsigned int data)
        if (child->ptrace) {
                child->exit_code = data;
                dead = __ptrace_detach(current, child);
+               if (!child->exit_state)
+                       wake_up_process(child);
        }
        write_unlock_irq(&tasklist_lock);
 
index ce97a4d..beb0e65 100644 (file)
@@ -1356,17 +1356,11 @@ static int rcu_sched_grace_period(void *arg)
 
                rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
                spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
-               ret = 0;
+               ret = 0; /* unused */
                __wait_event_interruptible(rcu_ctrlblk.sched_wq,
                        rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
                        ret);
 
-               /*
-                * Signals would prevent us from sleeping, and we cannot
-                * do much with them in any case.  So flush them.
-                */
-               if (ret)
-                       flush_signals(current);
                couldsleepnext = 0;
 
        } while (!kthread_should_stop());
index d2a372f..0dccfbb 100644 (file)
@@ -1259,31 +1259,44 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
        check_cpu_stall(rsp, rdp);
 
        /* Is the RCU core waiting for a quiescent state from this CPU? */
-       if (rdp->qs_pending)
+       if (rdp->qs_pending) {
+               rdp->n_rp_qs_pending++;
                return 1;
+       }
 
        /* Does this CPU have callbacks ready to invoke? */
-       if (cpu_has_callbacks_ready_to_invoke(rdp))
+       if (cpu_has_callbacks_ready_to_invoke(rdp)) {
+               rdp->n_rp_cb_ready++;
                return 1;
+       }
 
        /* Has RCU gone idle with this CPU needing another grace period? */
-       if (cpu_needs_another_gp(rsp, rdp))
+       if (cpu_needs_another_gp(rsp, rdp)) {
+               rdp->n_rp_cpu_needs_gp++;
                return 1;
+       }
 
        /* Has another RCU grace period completed?  */
-       if (ACCESS_ONCE(rsp->completed) != rdp->completed) /* outside of lock */
+       if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */
+               rdp->n_rp_gp_completed++;
                return 1;
+       }
 
        /* Has a new RCU grace period started? */
-       if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) /* outside of lock */
+       if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */
+               rdp->n_rp_gp_started++;
                return 1;
+       }
 
        /* Has an RCU GP gone long enough to send resched IPIs &c? */
        if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
-           ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
+           ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) {
+               rdp->n_rp_need_fqs++;
                return 1;
+       }
 
        /* nothing to do */
+       rdp->n_rp_need_nothing++;
        return 0;
 }
 
index 4b1875b..fe1dcdb 100644 (file)
@@ -213,7 +213,63 @@ static struct file_operations rcugp_fops = {
        .release = single_release,
 };
 
-static struct dentry *rcudir, *datadir, *datadir_csv, *hierdir, *gpdir;
+static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
+{
+       seq_printf(m, "%3d%cnp=%ld "
+                  "qsp=%ld cbr=%ld cng=%ld gpc=%ld gps=%ld nf=%ld nn=%ld\n",
+                  rdp->cpu,
+                  cpu_is_offline(rdp->cpu) ? '!' : ' ',
+                  rdp->n_rcu_pending,
+                  rdp->n_rp_qs_pending,
+                  rdp->n_rp_cb_ready,
+                  rdp->n_rp_cpu_needs_gp,
+                  rdp->n_rp_gp_completed,
+                  rdp->n_rp_gp_started,
+                  rdp->n_rp_need_fqs,
+                  rdp->n_rp_need_nothing);
+}
+
+static void print_rcu_pendings(struct seq_file *m, struct rcu_state *rsp)
+{
+       int cpu;
+       struct rcu_data *rdp;
+
+       for_each_possible_cpu(cpu) {
+               rdp = rsp->rda[cpu];
+               if (rdp->beenonline)
+                       print_one_rcu_pending(m, rdp);
+       }
+}
+
+static int show_rcu_pending(struct seq_file *m, void *unused)
+{
+       seq_puts(m, "rcu:\n");
+       print_rcu_pendings(m, &rcu_state);
+       seq_puts(m, "rcu_bh:\n");
+       print_rcu_pendings(m, &rcu_bh_state);
+       return 0;
+}
+
+static int rcu_pending_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, show_rcu_pending, NULL);
+}
+
+static struct file_operations rcu_pending_fops = {
+       .owner = THIS_MODULE,
+       .open = rcu_pending_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static struct dentry *rcudir;
+static struct dentry *datadir;
+static struct dentry *datadir_csv;
+static struct dentry *gpdir;
+static struct dentry *hierdir;
+static struct dentry *rcu_pendingdir;
+
 static int __init rcuclassic_trace_init(void)
 {
        rcudir = debugfs_create_dir("rcu", NULL);
@@ -238,6 +294,11 @@ static int __init rcuclassic_trace_init(void)
                                                NULL, &rcuhier_fops);
        if (!hierdir)
                goto free_out;
+
+       rcu_pendingdir = debugfs_create_file("rcu_pending", 0444, rcudir,
+                                               NULL, &rcu_pending_fops);
+       if (!rcu_pendingdir)
+               goto free_out;
        return 0;
 free_out:
        if (datadir)
@@ -257,6 +318,7 @@ static void __exit rcuclassic_trace_cleanup(void)
        debugfs_remove(datadir_csv);
        debugfs_remove(gpdir);
        debugfs_remove(hierdir);
+       debugfs_remove(rcu_pendingdir);
        debugfs_remove(rcudir);
 }
 
index 69d9cb9..820c5af 100644 (file)
@@ -300,7 +300,8 @@ static int rt_mutex_adjust_prio_chain(struct task_struct *task,
  * assigned pending owner [which might not have taken the
  * lock yet]:
  */
-static inline int try_to_steal_lock(struct rt_mutex *lock)
+static inline int try_to_steal_lock(struct rt_mutex *lock,
+                                   struct task_struct *task)
 {
        struct task_struct *pendowner = rt_mutex_owner(lock);
        struct rt_mutex_waiter *next;
@@ -309,11 +310,11 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
        if (!rt_mutex_owner_pending(lock))
                return 0;
 
-       if (pendowner == current)
+       if (pendowner == task)
                return 1;
 
        spin_lock_irqsave(&pendowner->pi_lock, flags);
-       if (current->prio >= pendowner->prio) {
+       if (task->prio >= pendowner->prio) {
                spin_unlock_irqrestore(&pendowner->pi_lock, flags);
                return 0;
        }
@@ -338,21 +339,21 @@ static inline int try_to_steal_lock(struct rt_mutex *lock)
         * We are going to steal the lock and a waiter was
         * enqueued on the pending owners pi_waiters queue. So
         * we have to enqueue this waiter into
-        * current->pi_waiters list. This covers the case,
-        * where current is boosted because it holds another
+        * task->pi_waiters list. This covers the case,
+        * where task is boosted because it holds another
         * lock and gets unboosted because the booster is
         * interrupted, so we would delay a waiter with higher
-        * priority as current->normal_prio.
+        * priority as task->normal_prio.
         *
         * Note: in the rare case of a SCHED_OTHER task changing
         * its priority and thus stealing the lock, next->task
-        * might be current:
+        * might be task:
         */
-       if (likely(next->task != current)) {
-               spin_lock_irqsave(&current->pi_lock, flags);
-               plist_add(&next->pi_list_entry, &current->pi_waiters);
-               __rt_mutex_adjust_prio(current);
-               spin_unlock_irqrestore(&current->pi_lock, flags);
+       if (likely(next->task != task)) {
+               spin_lock_irqsave(&task->pi_lock, flags);
+               plist_add(&next->pi_list_entry, &task->pi_waiters);
+               __rt_mutex_adjust_prio(task);
+               spin_unlock_irqrestore(&task->pi_lock, flags);
        }
        return 1;
 }
@@ -389,7 +390,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
         */
        mark_rt_mutex_waiters(lock);
 
-       if (rt_mutex_owner(lock) && !try_to_steal_lock(lock))
+       if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current))
                return 0;
 
        /* We got the lock. */
@@ -411,6 +412,7 @@ static int try_to_take_rt_mutex(struct rt_mutex *lock)
  */
 static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
                                   struct rt_mutex_waiter *waiter,
+                                  struct task_struct *task,
                                   int detect_deadlock)
 {
        struct task_struct *owner = rt_mutex_owner(lock);
@@ -418,21 +420,21 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        unsigned long flags;
        int chain_walk = 0, res;
 
-       spin_lock_irqsave(&current->pi_lock, flags);
-       __rt_mutex_adjust_prio(current);
-       waiter->task = current;
+       spin_lock_irqsave(&task->pi_lock, flags);
+       __rt_mutex_adjust_prio(task);
+       waiter->task = task;
        waiter->lock = lock;
-       plist_node_init(&waiter->list_entry, current->prio);
-       plist_node_init(&waiter->pi_list_entry, current->prio);
+       plist_node_init(&waiter->list_entry, task->prio);
+       plist_node_init(&waiter->pi_list_entry, task->prio);
 
        /* Get the top priority waiter on the lock */
        if (rt_mutex_has_waiters(lock))
                top_waiter = rt_mutex_top_waiter(lock);
        plist_add(&waiter->list_entry, &lock->wait_list);
 
-       current->pi_blocked_on = waiter;
+       task->pi_blocked_on = waiter;
 
-       spin_unlock_irqrestore(&current->pi_lock, flags);
+       spin_unlock_irqrestore(&task->pi_lock, flags);
 
        if (waiter == rt_mutex_top_waiter(lock)) {
                spin_lock_irqsave(&owner->pi_lock, flags);
@@ -460,7 +462,7 @@ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
        spin_unlock(&lock->wait_lock);
 
        res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
-                                        current);
+                                        task);
 
        spin_lock(&lock->wait_lock);
 
@@ -605,37 +607,25 @@ void rt_mutex_adjust_pi(struct task_struct *task)
        rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
 }
 
-/*
- * Slow path lock function:
+/**
+ * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
+ * @lock:               the rt_mutex to take
+ * @state:              the state the task should block in (TASK_INTERRUPTIBLE
+ *                      or TASK_UNINTERRUPTIBLE)
+ * @timeout:            the pre-initialized and started timer, or NULL for none
+ * @waiter:             the pre-initialized rt_mutex_waiter
+ * @detect_deadlock:    passed to task_blocks_on_rt_mutex
+ *
+ * lock->wait_lock must be held by the caller.
  */
 static int __sched
-rt_mutex_slowlock(struct rt_mutex *lock, int state,
-                 struct hrtimer_sleeper *timeout,
-                 int detect_deadlock)
+__rt_mutex_slowlock(struct rt_mutex *lock, int state,
+                   struct hrtimer_sleeper *timeout,
+                   struct rt_mutex_waiter *waiter,
+                   int detect_deadlock)
 {
-       struct rt_mutex_waiter waiter;
        int ret = 0;
 
-       debug_rt_mutex_init_waiter(&waiter);
-       waiter.task = NULL;
-
-       spin_lock(&lock->wait_lock);
-
-       /* Try to acquire the lock again: */
-       if (try_to_take_rt_mutex(lock)) {
-               spin_unlock(&lock->wait_lock);
-               return 0;
-       }
-
-       set_current_state(state);
-
-       /* Setup the timer, when timeout != NULL */
-       if (unlikely(timeout)) {
-               hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
-               if (!hrtimer_active(&timeout->timer))
-                       timeout->task = NULL;
-       }
-
        for (;;) {
                /* Try to acquire the lock: */
                if (try_to_take_rt_mutex(lock))
@@ -656,19 +646,19 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
                }
 
                /*
-                * waiter.task is NULL the first time we come here and
+                * waiter->task is NULL the first time we come here and
                 * when we have been woken up by the previous owner
                 * but the lock got stolen by a higher prio task.
                 */
-               if (!waiter.task) {
-                       ret = task_blocks_on_rt_mutex(lock, &waiter,
+               if (!waiter->task) {
+                       ret = task_blocks_on_rt_mutex(lock, waiter, current,
                                                      detect_deadlock);
                        /*
                         * If we got woken up by the owner then start loop
                         * all over without going into schedule to try
                         * to get the lock now:
                         */
-                       if (unlikely(!waiter.task)) {
+                       if (unlikely(!waiter->task)) {
                                /*
                                 * Reset the return value. We might
                                 * have returned with -EDEADLK and the
@@ -684,15 +674,52 @@ rt_mutex_slowlock(struct rt_mutex *lock, int state,
 
                spin_unlock(&lock->wait_lock);
 
-               debug_rt_mutex_print_deadlock(&waiter);
+               debug_rt_mutex_print_deadlock(waiter);
 
-               if (waiter.task)
+               if (waiter->task)
                        schedule_rt_mutex(lock);
 
                spin_lock(&lock->wait_lock);
                set_current_state(state);
        }
 
+       return ret;
+}
+
+/*
+ * Slow path lock function:
+ */
+static int __sched
+rt_mutex_slowlock(struct rt_mutex *lock, int state,
+                 struct hrtimer_sleeper *timeout,
+                 int detect_deadlock)
+{
+       struct rt_mutex_waiter waiter;
+       int ret = 0;
+
+       debug_rt_mutex_init_waiter(&waiter);
+       waiter.task = NULL;
+
+       spin_lock(&lock->wait_lock);
+
+       /* Try to acquire the lock again: */
+       if (try_to_take_rt_mutex(lock)) {
+               spin_unlock(&lock->wait_lock);
+               return 0;
+       }
+
+       set_current_state(state);
+
+       /* Setup the timer, when timeout != NULL */
+       if (unlikely(timeout)) {
+               hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
+               if (!hrtimer_active(&timeout->timer))
+                       timeout->task = NULL;
+       }
+
+       ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
+                                 detect_deadlock);
+
        set_current_state(TASK_RUNNING);
 
        if (unlikely(waiter.task))
@@ -864,9 +891,9 @@ int __sched rt_mutex_lock_interruptible(struct rt_mutex *lock,
 EXPORT_SYMBOL_GPL(rt_mutex_lock_interruptible);
 
 /**
- * rt_mutex_lock_interruptible_ktime - lock a rt_mutex interruptible
- *                                    the timeout structure is provided
- *                                    by the caller
+ * rt_mutex_timed_lock - lock a rt_mutex interruptible
+ *                     the timeout structure is provided
+ *                     by the caller
  *
  * @lock:              the rt_mutex to be locked
  * @timeout:           timeout structure or NULL (no timeout)
@@ -913,7 +940,7 @@ void __sched rt_mutex_unlock(struct rt_mutex *lock)
 }
 EXPORT_SYMBOL_GPL(rt_mutex_unlock);
 
-/***
+/**
  * rt_mutex_destroy - mark a mutex unusable
  * @lock: the mutex to be destroyed
  *
@@ -985,6 +1012,59 @@ void rt_mutex_proxy_unlock(struct rt_mutex *lock,
        rt_mutex_deadlock_account_unlock(proxy_owner);
 }
 
+/**
+ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
+ * @lock:              the rt_mutex to take
+ * @waiter:            the pre-initialized rt_mutex_waiter
+ * @task:              the task to prepare
+ * @detect_deadlock:   perform deadlock detection (1) or not (0)
+ *
+ * Returns:
+ *  0 - task blocked on lock
+ *  1 - acquired the lock for task, caller should wake it up
+ * <0 - error
+ *
+ * Special API call for FUTEX_REQUEUE_PI support.
+ */
+int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+                             struct rt_mutex_waiter *waiter,
+                             struct task_struct *task, int detect_deadlock)
+{
+       int ret;
+
+       spin_lock(&lock->wait_lock);
+
+       mark_rt_mutex_waiters(lock);
+
+       if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
+               /* We got the lock for task. */
+               debug_rt_mutex_lock(lock);
+
+               rt_mutex_set_owner(lock, task, 0);
+
+               rt_mutex_deadlock_account_lock(lock, task);
+               return 1;
+       }
+
+       ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
+
+
+       if (ret && !waiter->task) {
+               /*
+                * Reset the return value. We might have
+                * returned with -EDEADLK and the owner
+                * released the lock while we were walking the
+                * pi chain.  Let the waiter sort it out.
+                */
+               ret = 0;
+       }
+       spin_unlock(&lock->wait_lock);
+
+       debug_rt_mutex_print_deadlock(waiter);
+
+       return ret;
+}
+
 /**
  * rt_mutex_next_owner - return the next owner of the lock
  *
@@ -1004,3 +1084,57 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
 
        return rt_mutex_top_waiter(lock)->task;
 }
+
+/**
+ * rt_mutex_finish_proxy_lock() - Complete lock acquisition
+ * @lock:              the rt_mutex we were woken on
+ * @to:                        the timeout, null if none. hrtimer should already have
+ *                     been started.
+ * @waiter:            the pre-initialized rt_mutex_waiter
+ * @detect_deadlock:   perform deadlock detection (1) or not (0)
+ *
+ * Complete the lock acquisition started our behalf by another thread.
+ *
+ * Returns:
+ *  0 - success
+ * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
+ *
+ * Special API call for PI-futex requeue support
+ */
+int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
+                              struct hrtimer_sleeper *to,
+                              struct rt_mutex_waiter *waiter,
+                              int detect_deadlock)
+{
+       int ret;
+
+       spin_lock(&lock->wait_lock);
+
+       set_current_state(TASK_INTERRUPTIBLE);
+
+       ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
+                                 detect_deadlock);
+
+       set_current_state(TASK_RUNNING);
+
+       if (unlikely(waiter->task))
+               remove_waiter(lock, waiter);
+
+       /*
+        * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
+        * have to fix that up.
+        */
+       fixup_rt_mutex_waiters(lock);
+
+       spin_unlock(&lock->wait_lock);
+
+       /*
+        * Readjust priority, when we did not get the lock. We might have been
+        * the pending owner and boosted. Since we did not take the lock, the
+        * PI boost has to go.
+        */
+       if (unlikely(ret))
+               rt_mutex_adjust_prio(current);
+
+       return ret;
+}
index e124bf5..97a2f81 100644 (file)
@@ -120,6 +120,14 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
                                       struct task_struct *proxy_owner);
 extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
                                  struct task_struct *proxy_owner);
+extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
+                                    struct rt_mutex_waiter *waiter,
+                                    struct task_struct *task,
+                                    int detect_deadlock);
+extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
+                                     struct hrtimer_sleeper *to,
+                                     struct rt_mutex_waiter *waiter,
+                                     int detect_deadlock);
 
 #ifdef CONFIG_DEBUG_RT_MUTEXES
 # include "rtmutex-debug.h"
index 26efa47..14c447a 100644 (file)
 #include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
 
 #include "sched_cpupri.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/sched.h>
+
 /*
  * Convert user-nice values [ -20 ... 0 ... 19 ]
  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
  */
 #define RUNTIME_INF    ((u64)~0ULL)
 
-DEFINE_TRACE(sched_wait_task);
-DEFINE_TRACE(sched_wakeup);
-DEFINE_TRACE(sched_wakeup_new);
-DEFINE_TRACE(sched_switch);
-DEFINE_TRACE(sched_migrate_task);
-
 #ifdef CONFIG_SMP
 
 static void double_rq_lock(struct rq *rq1, struct rq *rq2);
@@ -630,6 +626,10 @@ struct rq {
        struct list_head migration_queue;
 #endif
 
+       /* calc_load related fields */
+       unsigned long calc_load_update;
+       long calc_load_active;
+
 #ifdef CONFIG_SCHED_HRTICK
 #ifdef CONFIG_SMP
        int hrtick_csd_pending;
@@ -1728,6 +1728,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
 }
 #endif
 
+static void calc_load_account_active(struct rq *this_rq);
+
 #include "sched_stats.h"
 #include "sched_idletask.c"
 #include "sched_fair.c"
@@ -1958,7 +1960,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 
        clock_offset = old_rq->clock - new_rq->clock;
 
-       trace_sched_migrate_task(p, task_cpu(p), new_cpu);
+       trace_sched_migrate_task(p, new_cpu);
 
 #ifdef CONFIG_SCHEDSTATS
        if (p->se.wait_start)
@@ -2014,6 +2016,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
        return 1;
 }
 
+/*
+ * wait_task_context_switch -  wait for a thread to complete at least one
+ *                             context switch.
+ *
+ * @p must not be current.
+ */
+void wait_task_context_switch(struct task_struct *p)
+{
+       unsigned long nvcsw, nivcsw, flags;
+       int running;
+       struct rq *rq;
+
+       nvcsw   = p->nvcsw;
+       nivcsw  = p->nivcsw;
+       for (;;) {
+               /*
+                * The runqueue is assigned before the actual context
+                * switch. We need to take the runqueue lock.
+                *
+                * We could check initially without the lock but it is
+                * very likely that we need to take the lock in every
+                * iteration.
+                */
+               rq = task_rq_lock(p, &flags);
+               running = task_running(rq, p);
+               task_rq_unlock(rq, &flags);
+
+               if (likely(!running))
+                       break;
+               /*
+                * The switch count is incremented before the actual
+                * context switch. We thus wait for two switches to be
+                * sure at least one completed.
+                */
+               if ((p->nvcsw - nvcsw) > 1)
+                       break;
+               if ((p->nivcsw - nivcsw) > 1)
+                       break;
+
+               cpu_relax();
+       }
+}
+
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
@@ -2458,6 +2503,17 @@ out:
        return success;
 }
 
+/**
+ * wake_up_process - Wake up a specific process
+ * @p: The process to be woken up.
+ *
+ * Attempt to wake up the nominated process and move it to the set of runnable
+ * processes.  Returns 1 if the process was woken up, 0 if it was already
+ * running.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
+ */
 int wake_up_process(struct task_struct *p)
 {
        return try_to_wake_up(p, TASK_ALL, 0);
@@ -2766,7 +2822,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
         * combine the page table reload and the switch backend into
         * one hypercall.
         */
-       arch_enter_lazy_cpu_mode();
+       arch_start_context_switch(prev);
 
        if (unlikely(!mm)) {
                next->active_mm = oldmm;
@@ -2856,19 +2912,72 @@ unsigned long nr_iowait(void)
        return sum;
 }
 
-unsigned long nr_active(void)
+/* Variables and functions for calc_load */
+static atomic_long_t calc_load_tasks;
+static unsigned long calc_load_update;
+unsigned long avenrun[3];
+EXPORT_SYMBOL(avenrun);
+
+/**
+ * get_avenrun - get the load average array
+ * @loads:     pointer to dest load array
+ * @offset:    offset to add
+ * @shift:     shift count to shift the result left
+ *
+ * These values are estimates at best, so no need for locking.
+ */
+void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+{
+       loads[0] = (avenrun[0] + offset) << shift;
+       loads[1] = (avenrun[1] + offset) << shift;
+       loads[2] = (avenrun[2] + offset) << shift;
+}
+
+static unsigned long
+calc_load(unsigned long load, unsigned long exp, unsigned long active)
 {
-       unsigned long i, running = 0, uninterruptible = 0;
+       load *= exp;
+       load += active * (FIXED_1 - exp);
+       return load >> FSHIFT;
+}
 
-       for_each_online_cpu(i) {
-               running += cpu_rq(i)->nr_running;
-               uninterruptible += cpu_rq(i)->nr_uninterruptible;
-       }
+/*
+ * calc_load - update the avenrun load estimates 10 ticks after the
+ * CPUs have updated calc_load_tasks.
+ */
+void calc_global_load(void)
+{
+       unsigned long upd = calc_load_update + 10;
+       long active;
 
-       if (unlikely((long)uninterruptible < 0))
-               uninterruptible = 0;
+       if (time_before(jiffies, upd))
+               return;
 
-       return running + uninterruptible;
+       active = atomic_long_read(&calc_load_tasks);
+       active = active > 0 ? active * FIXED_1 : 0;
+
+       avenrun[0] = calc_load(avenrun[0], EXP_1, active);
+       avenrun[1] = calc_load(avenrun[1], EXP_5, active);
+       avenrun[2] = calc_load(avenrun[2], EXP_15, active);
+
+       calc_load_update += LOAD_FREQ;
+}
+
+/*
+ * Either called from update_cpu_load() or from a cpu going idle
+ */
+static void calc_load_account_active(struct rq *this_rq)
+{
+       long nr_active, delta;
+
+       nr_active = this_rq->nr_running;
+       nr_active += (long) this_rq->nr_uninterruptible;
+
+       if (nr_active != this_rq->calc_load_active) {
+               delta = nr_active - this_rq->calc_load_active;
+               this_rq->calc_load_active = nr_active;
+               atomic_long_add(delta, &calc_load_tasks);
+       }
 }
 
 /*
@@ -2899,6 +3008,11 @@ static void update_cpu_load(struct rq *this_rq)
                        new_load += scale-1;
                this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
        }
+
+       if (time_after_eq(jiffies, this_rq->calc_load_update)) {
+               this_rq->calc_load_update += LOAD_FREQ;
+               calc_load_account_active(this_rq);
+       }
 }
 
 #ifdef CONFIG_SMP
@@ -4240,10 +4354,126 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 static struct {
        atomic_t load_balancer;
        cpumask_var_t cpu_mask;
+       cpumask_var_t ilb_grp_nohz_mask;
 } nohz ____cacheline_aligned = {
        .load_balancer = ATOMIC_INIT(-1),
 };
 
+#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+/**
+ * lowest_flag_domain - Return lowest sched_domain containing flag.
+ * @cpu:       The cpu whose lowest level of sched domain is to
+ *             be returned.
+ * @flag:      The flag to check for the lowest sched_domain
+ *             for the given cpu.
+ *
+ * Returns the lowest sched_domain of a cpu which contains the given flag.
+ */
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+{
+       struct sched_domain *sd;
+
+       for_each_domain(cpu, sd)
+               if (sd && (sd->flags & flag))
+                       break;
+
+       return sd;
+}
+
+/**
+ * for_each_flag_domain - Iterates over sched_domains containing the flag.
+ * @cpu:       The cpu whose domains we're iterating over.
+ * @sd:                variable holding the value of the power_savings_sd
+ *             for cpu.
+ * @flag:      The flag to filter the sched_domains to be iterated.
+ *
+ * Iterates over all the scheduler domains for a given cpu that has the 'flag'
+ * set, starting from the lowest sched_domain to the highest.
+ */
+#define for_each_flag_domain(cpu, sd, flag) \
+       for (sd = lowest_flag_domain(cpu, flag); \
+               (sd && (sd->flags & flag)); sd = sd->parent)
+
+/**
+ * is_semi_idle_group - Checks if the given sched_group is semi-idle.
+ * @ilb_group: group to be checked for semi-idleness
+ *
+ * Returns:    1 if the group is semi-idle. 0 otherwise.
+ *
+ * We define a sched_group to be semi idle if it has atleast one idle-CPU
+ * and atleast one non-idle CPU. This helper function checks if the given
+ * sched_group is semi-idle or not.
+ */
+static inline int is_semi_idle_group(struct sched_group *ilb_group)
+{
+       cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask,
+                                       sched_group_cpus(ilb_group));
+
+       /*
+        * A sched_group is semi-idle when it has atleast one busy cpu
+        * and atleast one idle cpu.
+        */
+       if (cpumask_empty(nohz.ilb_grp_nohz_mask))
+               return 0;
+
+       if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group)))
+               return 0;
+
+       return 1;
+}
+/**
+ * find_new_ilb - Finds the optimum idle load balancer for nomination.
+ * @cpu:       The cpu which is nominating a new idle_load_balancer.
+ *
+ * Returns:    Returns the id of the idle load balancer if it exists,
+ *             Else, returns >= nr_cpu_ids.
+ *
+ * This algorithm picks the idle load balancer such that it belongs to a
+ * semi-idle powersavings sched_domain. The idea is to try and avoid
+ * completely idle packages/cores just for the purpose of idle load balancing
+ * when there are other idle cpu's which are better suited for that job.
+ */
+static int find_new_ilb(int cpu)
+{
+       struct sched_domain *sd;
+       struct sched_group *ilb_group;
+
+       /*
+        * Have idle load balancer selection from semi-idle packages only
+        * when power-aware load balancing is enabled
+        */
+       if (!(sched_smt_power_savings || sched_mc_power_savings))
+               goto out_done;
+
+       /*
+        * Optimize for the case when we have no idle CPUs or only one
+        * idle CPU. Don't walk the sched_domain hierarchy in such cases
+        */
+       if (cpumask_weight(nohz.cpu_mask) < 2)
+               goto out_done;
+
+       for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
+               ilb_group = sd->groups;
+
+               do {
+                       if (is_semi_idle_group(ilb_group))
+                               return cpumask_first(nohz.ilb_grp_nohz_mask);
+
+                       ilb_group = ilb_group->next;
+
+               } while (ilb_group != sd->groups);
+       }
+
+out_done:
+       return cpumask_first(nohz.cpu_mask);
+}
+#else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
+static inline int find_new_ilb(int call_cpu)
+{
+       return cpumask_first(nohz.cpu_mask);
+}
+#endif
+
 /*
  * This routine will try to nominate the ilb (idle load balancing)
  * owner among the cpus whose ticks are stopped. ilb owner will do the idle
@@ -4298,8 +4528,24 @@ int select_nohz_load_balancer(int stop_tick)
                        /* make me the ilb owner */
                        if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1)
                                return 1;
-               } else if (atomic_read(&nohz.load_balancer) == cpu)
+               } else if (atomic_read(&nohz.load_balancer) == cpu) {
+                       int new_ilb;
+
+                       if (!(sched_smt_power_savings ||
+                                               sched_mc_power_savings))
+                               return 1;
+                       /*
+                        * Check to see if there is a more power-efficient
+                        * ilb.
+                        */
+                       new_ilb = find_new_ilb(cpu);
+                       if (new_ilb < nr_cpu_ids && new_ilb != cpu) {
+                               atomic_set(&nohz.load_balancer, -1);
+                               resched_cpu(new_ilb);
+                               return 0;
+                       }
                        return 1;
+               }
        } else {
                if (!cpumask_test_cpu(cpu, nohz.cpu_mask))
                        return 0;
@@ -4468,15 +4714,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu)
                }
 
                if (atomic_read(&nohz.load_balancer) == -1) {
-                       /*
-                        * simple selection for now: Nominate the
-                        * first cpu in the nohz list to be the next
-                        * ilb owner.
-                        *
-                        * TBD: Traverse the sched domains and nominate
-                        * the nearest cpu in the nohz.cpu_mask.
-                        */
-                       int ilb = cpumask_first(nohz.cpu_mask);
+                       int ilb = find_new_ilb(cpu);
 
                        if (ilb < nr_cpu_ids)
                                resched_cpu(ilb);
@@ -5007,13 +5245,15 @@ pick_next_task(struct rq *rq)
 /*
  * schedule() is the main scheduler function.
  */
-asmlinkage void __sched __schedule(void)
+asmlinkage void __sched schedule(void)
 {
        struct task_struct *prev, *next;
        unsigned long *switch_count;
        struct rq *rq;
        int cpu;
 
+need_resched:
+       preempt_disable();
        cpu = smp_processor_id();
        rq = cpu_rq(cpu);
        rcu_qsctr_inc(cpu);
@@ -5070,15 +5310,9 @@ need_resched_nonpreemptible:
 
        if (unlikely(reacquire_kernel_lock(current) < 0))
                goto need_resched_nonpreemptible;
-}
 
-asmlinkage void __sched schedule(void)
-{
-need_resched:
-       preempt_disable();
-       __schedule();
        preempt_enable_no_resched();
-       if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
+       if (need_resched())
                goto need_resched;
 }
 EXPORT_SYMBOL(schedule);
@@ -5221,7 +5455,7 @@ EXPORT_SYMBOL(default_wake_function);
  * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
  * zero in this (rare) case, and we handle it by continuing to scan the queue.
  */
-void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
+static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
                        int nr_exclusive, int sync, void *key)
 {
        wait_queue_t *curr, *next;
@@ -5241,6 +5475,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
  * @key: is directly passed to the wakeup function
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void __wake_up(wait_queue_head_t *q, unsigned int mode,
                        int nr_exclusive, void *key)
@@ -5279,6 +5516,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
  * with each other. This can prevent needless bouncing between CPUs.
  *
  * On UP it can prevent extra preemption.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
                        int nr_exclusive, void *key)
@@ -5315,6 +5555,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync);        /* For internal use only */
  * awakened in the same order in which they were queued.
  *
  * See also complete_all(), wait_for_completion() and related routines.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void complete(struct completion *x)
 {
@@ -5332,6 +5575,9 @@ EXPORT_SYMBOL(complete);
  * @x:  holds the state of this particular completion
  *
  * This will wake up all threads waiting on this particular completion event.
+ *
+ * It may be assumed that this function implies a write memory barrier before
+ * changing the task state if and only if any tasks are woken up.
  */
 void complete_all(struct completion *x)
 {
@@ -6490,8 +6736,9 @@ void sched_show_task(struct task_struct *p)
 #ifdef CONFIG_DEBUG_STACK_USAGE
        free = stack_not_used(p);
 #endif
-       printk(KERN_CONT "%5lu %5d %6d\n", free,
-               task_pid_nr(p), task_pid_nr(p->real_parent));
+       printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free,
+               task_pid_nr(p), task_pid_nr(p->real_parent),
+               (unsigned long)task_thread_info(p)->flags);
 
        show_stack(p, NULL);
 }
@@ -6970,6 +7217,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
 
        }
 }
+
+/*
+ * remove the tasks which were accounted by rq from calc_load_tasks.
+ */
+static void calc_global_load_remove(struct rq *rq)
+{
+       atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
+}
 #endif /* CONFIG_HOTPLUG_CPU */
 
 #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
@@ -7204,6 +7459,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                /* Update our root-domain */
                rq = cpu_rq(cpu);
                spin_lock_irqsave(&rq->lock, flags);
+               rq->calc_load_update = calc_load_update;
+               rq->calc_load_active = 0;
                if (rq->rd) {
                        BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
 
@@ -7243,7 +7500,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
                cpuset_unlock();
                migrate_nr_uninterruptible(rq);
                BUG_ON(rq->nr_running != 0);
-
+               calc_global_load_remove(rq);
                /*
                 * No need to migrate the tasks: it was best-effort if
                 * they didn't take sched_hotcpu_mutex. Just wake up
@@ -7753,8 +8010,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0;
 
 /*
  * The cpus mask in sched_group and sched_domain hangs off the end.
- * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space
- * for nr_cpu_ids < CONFIG_NR_CPUS.
+ *
+ * ( See the the comments in include/linux/sched.h:struct sched_group
+ *   and struct sched_domain. )
  */
 struct static_sched_group {
        struct sched_group sg;
@@ -7875,7 +8133,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
                        struct sched_domain *sd;
 
                        sd = &per_cpu(phys_domains, j).sd;
-                       if (j != cpumask_first(sched_group_cpus(sd->groups))) {
+                       if (j != group_first_cpu(sd->groups)) {
                                /*
                                 * Only add "power" once for each
                                 * physical package.
@@ -7953,7 +8211,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 
        WARN_ON(!sd || !sd->groups);
 
-       if (cpu != cpumask_first(sched_group_cpus(sd->groups)))
+       if (cpu != group_first_cpu(sd->groups))
                return;
 
        child = sd->child;
@@ -8938,6 +9196,8 @@ void __init sched_init(void)
                rq = cpu_rq(i);
                spin_lock_init(&rq->lock);
                rq->nr_running = 0;
+               rq->calc_load_active = 0;
+               rq->calc_load_update = jiffies + LOAD_FREQ;
                init_cfs_rq(&rq->cfs, rq);
                init_rt_rq(&rq->rt, rq);
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -9045,6 +9305,9 @@ void __init sched_init(void)
         * when this runqueue becomes "idle".
         */
        init_idle(current, smp_processor_id());
+
+       calc_load_update = jiffies + LOAD_FREQ;
+
        /*
         * During early bootup we pretend to be a normal task:
         */
@@ -9055,6 +9318,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
 #ifdef CONFIG_NO_HZ
        alloc_bootmem_cpumask_var(&nohz.cpu_mask);
+       alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask);
 #endif
        alloc_bootmem_cpumask_var(&cpu_isolated_map);
 #endif /* SMP */
@@ -9800,6 +10064,13 @@ static int sched_rt_global_constraints(void)
        if (sysctl_sched_rt_period <= 0)
                return -EINVAL;
 
+       /*
+        * There's always some RT tasks in the root group
+        * -- migration, kstopmachine etc..
+        */
+       if (sysctl_sched_rt_runtime == 0)
+               return -EBUSY;
+
        spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
        for_each_possible_cpu(i) {
                struct rt_rq *rt_rq = &cpu_rq(i)->rt;
index cdd3c89..344712a 100644 (file)
@@ -165,7 +165,7 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem)
                vec->count = 0;
                if (bootmem)
                        alloc_bootmem_cpumask_var(&vec->mask);
-               else if (!alloc_cpumask_var(&vec->mask, GFP_KERNEL))
+               else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
                        goto cleanup;
        }
 
index 3816f21..5f9650e 100644 (file)
@@ -1487,17 +1487,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
 
        find_matching_se(&se, &pse);
 
-       while (se) {
-               BUG_ON(!pse);
+       BUG_ON(!pse);
 
-               if (wakeup_preempt_entity(se, pse) == 1) {
-                       resched_task(curr);
-                       break;
-               }
-
-               se = parent_entity(se);
-               pse = parent_entity(pse);
-       }
+       if (wakeup_preempt_entity(se, pse) == 1)
+               resched_task(curr);
 }
 
 static struct task_struct *pick_next_task_fair(struct rq *rq)
index 8a21a2e..499672c 100644 (file)
@@ -22,7 +22,8 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int sy
 static struct task_struct *pick_next_task_idle(struct rq *rq)
 {
        schedstat_inc(rq, sched_goidle);
-
+       /* adjust the active tasks as we might go into a long sleep */
+       calc_load_account_active(rq);
        return rq->idle;
 }
 
index f2c66f8..9bf0d2a 100644 (file)
@@ -1591,7 +1591,7 @@ static inline void init_sched_rt_class(void)
        unsigned int i;
 
        for_each_possible_cpu(i)
-               alloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
+               zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i),
                                        GFP_KERNEL, cpu_to_node(i));
 }
 #endif /* CONFIG_SMP */
index d803473..dba6ae9 100644 (file)
@@ -27,7 +27,7 @@
 #include <linux/freezer.h>
 #include <linux/pid_namespace.h>
 #include <linux/nsproxy.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/param.h>
 #include <asm/uaccess.h>
@@ -41,8 +41,6 @@
 
 static struct kmem_cache *sigqueue_cachep;
 
-DEFINE_TRACE(sched_signal_send);
-
 static void __user *sig_handler(struct task_struct *t, int sig)
 {
        return t->sighand->action[sig - 1].sa.sa_handler;
@@ -2278,24 +2276,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
        return kill_something_info(sig, &info, pid);
 }
 
-static int do_tkill(pid_t tgid, pid_t pid, int sig)
+static int
+do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
 {
-       int error;
-       struct siginfo info;
        struct task_struct *p;
        unsigned long flags;
-
-       error = -ESRCH;
-       info.si_signo = sig;
-       info.si_errno = 0;
-       info.si_code = SI_TKILL;
-       info.si_pid = task_tgid_vnr(current);
-       info.si_uid = current_uid();
+       int error = -ESRCH;
 
        rcu_read_lock();
        p = find_task_by_vpid(pid);
        if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
-               error = check_kill_permission(sig, &info, p);
+               error = check_kill_permission(sig, info, p);
                /*
                 * The null signal is a permissions and process existence
                 * probe.  No signal is actually delivered.
@@ -2305,7 +2296,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
                 * signal is private anyway.
                 */
                if (!error && sig && lock_task_sighand(p, &flags)) {
-                       error = specific_send_sig_info(sig, &info, p);
+                       error = specific_send_sig_info(sig, info, p);
                        unlock_task_sighand(p, &flags);
                }
        }
@@ -2314,6 +2305,19 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig)
        return error;
 }
 
+static int do_tkill(pid_t tgid, pid_t pid, int sig)
+{
+       struct siginfo info;
+
+       info.si_signo = sig;
+       info.si_errno = 0;
+       info.si_code = SI_TKILL;
+       info.si_pid = task_tgid_vnr(current);
+       info.si_uid = current_uid();
+
+       return do_send_specific(tgid, pid, sig, &info);
+}
+
 /**
  *  sys_tgkill - send signal to one specific thread
  *  @tgid: the thread group ID of the thread
@@ -2363,6 +2367,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
        return kill_proc_info(sig, &info, pid);
 }
 
+long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
+{
+       /* This is only valid for single tasks */
+       if (pid <= 0 || tgid <= 0)
+               return -EINVAL;
+
+       /* Not even root can pretend to send signals from the kernel.
+          Nor can they impersonate a kill(), which adds source info.  */
+       if (info->si_code >= 0)
+               return -EPERM;
+       info->si_signo = sig;
+
+       return do_send_specific(tgid, pid, sig, info);
+}
+
+SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
+               siginfo_t __user *, uinfo)
+{
+       siginfo_t info;
+
+       if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
+               return -EFAULT;
+
+       return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
+}
+
 int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
        struct task_struct *t = current;
index 858baac..ad63d85 100644 (file)
@@ -52,7 +52,7 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-               if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
+               if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
                                cpu_to_node(cpu)))
                        return NOTIFY_BAD;
                break;
index b525dd3..258885a 100644 (file)
@@ -24,7 +24,9 @@
 #include <linux/ftrace.h>
 #include <linux/smp.h>
 #include <linux/tick.h>
-#include <trace/irq.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/irq.h>
 
 #include <asm/irq.h>
 /*
@@ -186,9 +188,6 @@ EXPORT_SYMBOL(local_bh_enable_ip);
  */
 #define MAX_SOFTIRQ_RESTART 10
 
-DEFINE_TRACE(softirq_entry);
-DEFINE_TRACE(softirq_exit);
-
 asmlinkage void __do_softirq(void)
 {
        struct softirq_action *h;
@@ -828,7 +827,7 @@ int __init __weak arch_early_irq_init(void)
        return 0;
 }
 
-int __weak arch_init_chip_data(struct irq_desc *desc, int cpu)
+int __weak arch_init_chip_data(struct irq_desc *desc, int node)
 {
        return 0;
 }
index b2970d5..6a46371 100644 (file)
@@ -729,6 +729,14 @@ static struct ctl_table kern_table[] = {
                .mode           = 0444,
                .proc_handler   = &proc_dointvec,
        },
+       {
+               .ctl_name       = CTL_UNNUMBERED,
+               .procname       = "bootloader_version",
+               .data           = &bootloader_version,
+               .maxlen         = sizeof (int),
+               .mode           = 0444,
+               .proc_handler   = &proc_dointvec,
+       },
        {
                .ctl_name       = CTL_UNNUMBERED,
                .procname       = "kstack_depth_to_print",
index 687dff4..52a8bf8 100644 (file)
@@ -22,7 +22,7 @@
 
 /*
  * This read-write spinlock protects us from races in SMP while
- * playing with xtime and avenrun.
+ * playing with xtime.
  */
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
 
index cffffad..a26ed29 100644 (file)
@@ -1122,47 +1122,6 @@ void update_process_times(int user_tick)
        run_posix_cpu_timers(p);
 }
 
-/*
- * Nr of active tasks - counted in fixed-point numbers
- */
-static unsigned long count_active_tasks(void)
-{
-       return nr_active() * FIXED_1;
-}
-
-/*
- * Hmm.. Changed this, as the GNU make sources (load.c) seems to
- * imply that avenrun[] is the standard name for this kind of thing.
- * Nothing else seems to be standardized: the fractional size etc
- * all seem to differ on different machines.
- *
- * Requires xtime_lock to access.
- */
-unsigned long avenrun[3];
-
-EXPORT_SYMBOL(avenrun);
-
-/*
- * calc_load - given tick count, update the avenrun load estimates.
- * This is called while holding a write_lock on xtime_lock.
- */
-static inline void calc_load(unsigned long ticks)
-{
-       unsigned long active_tasks; /* fixed-point */
-       static int count = LOAD_FREQ;
-
-       count -= ticks;
-       if (unlikely(count < 0)) {
-               active_tasks = count_active_tasks();
-               do {
-                       CALC_LOAD(avenrun[0], EXP_1, active_tasks);
-                       CALC_LOAD(avenrun[1], EXP_5, active_tasks);
-                       CALC_LOAD(avenrun[2], EXP_15, active_tasks);
-                       count += LOAD_FREQ;
-               } while (count < 0);
-       }
-}
-
 /*
  * This function runs timers and the timer-tq in bottom half context.
  */
@@ -1186,16 +1145,6 @@ void run_local_timers(void)
        softlockup_tick();
 }
 
-/*
- * Called by the timer interrupt. xtime_lock must already be taken
- * by the timer IRQ!
- */
-static inline void update_times(unsigned long ticks)
-{
-       update_wall_time();
-       calc_load(ticks);
-}
-
 /*
  * The 64-bit jiffies value is not atomic - you MUST NOT read it
  * without sampling the sequence number in xtime_lock.
@@ -1205,7 +1154,8 @@ static inline void update_times(unsigned long ticks)
 void do_timer(unsigned long ticks)
 {
        jiffies_64 += ticks;
-       update_times(ticks);
+       update_wall_time();
+       calc_global_load();
 }
 
 #ifdef __ARCH_WANT_SYS_ALARM
@@ -1406,37 +1356,17 @@ int do_sysinfo(struct sysinfo *info)
 {
        unsigned long mem_total, sav_total;
        unsigned int mem_unit, bitcount;
-       unsigned long seq;
+       struct timespec tp;
 
        memset(info, 0, sizeof(struct sysinfo));
 
-       do {
-               struct timespec tp;
-               seq = read_seqbegin(&xtime_lock);
-
-               /*
-                * This is annoying.  The below is the same thing
-                * posix_get_clock_monotonic() does, but it wants to
-                * take the lock which we want to cover the loads stuff
-                * too.
-                */
-
-               getnstimeofday(&tp);
-               tp.tv_sec += wall_to_monotonic.tv_sec;
-               tp.tv_nsec += wall_to_monotonic.tv_nsec;
-               monotonic_to_bootbased(&tp);
-               if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
-                       tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
-                       tp.tv_sec++;
-               }
-               info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+       ktime_get_ts(&tp);
+       monotonic_to_bootbased(&tp);
+       info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
 
-               info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
-               info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
-               info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
+       get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
 
-               info->procs = nr_threads;
-       } while (read_seqretry(&xtime_lock, seq));
+       info->procs = nr_threads;
 
        si_meminfo(info);
        si_swapinfo(info);
index 417d198..4a13e5a 100644 (file)
@@ -48,6 +48,21 @@ config FTRACE_NMI_ENTER
        depends on HAVE_FTRACE_NMI_ENTER
        default y
 
+config EVENT_TRACING
+       select CONTEXT_SWITCH_TRACER
+       bool
+
+config CONTEXT_SWITCH_TRACER
+       select MARKERS
+       bool
+
+# All tracer options should select GENERIC_TRACER. For those options that are
+# enabled by all tracers (context switch and event tracer) they select TRACING.
+# This allows those options to appear when no other tracer is selected. But the
+# options do not appear when something else selects it. We need the two options
+# GENERIC_TRACER and TRACING to avoid circular dependencies to accomplish the
+# hidding of the automatic options options.
+
 config TRACING
        bool
        select DEBUG_FS
@@ -56,6 +71,11 @@ config TRACING
        select TRACEPOINTS
        select NOP_TRACER
        select BINARY_PRINTF
+       select EVENT_TRACING
+
+config GENERIC_TRACER
+       bool
+       select TRACING
 
 #
 # Minimum requirements an architecture has to meet for us to
@@ -73,14 +93,20 @@ config TRACING_SUPPORT
 
 if TRACING_SUPPORT
 
-menu "Tracers"
+menuconfig FTRACE
+       bool "Tracers"
+       default y if DEBUG_KERNEL
+       help
+        Enable the kernel tracing infrastructure.
+
+if FTRACE
 
 config FUNCTION_TRACER
        bool "Kernel Function Tracer"
        depends on HAVE_FUNCTION_TRACER
        select FRAME_POINTER
        select KALLSYMS
-       select TRACING
+       select GENERIC_TRACER
        select CONTEXT_SWITCH_TRACER
        help
          Enable the kernel to trace every kernel function. This is done
@@ -104,13 +130,14 @@ config FUNCTION_GRAPH_TRACER
          the return value. This is done by setting the current return 
          address on the current task structure into a stack of calls.
 
+
 config IRQSOFF_TRACER
        bool "Interrupts-off Latency Tracer"
        default n
        depends on TRACE_IRQFLAGS_SUPPORT
        depends on GENERIC_TIME
        select TRACE_IRQFLAGS
-       select TRACING
+       select GENERIC_TRACER
        select TRACER_MAX_TRACE
        help
          This option measures the time spent in irqs-off critical
@@ -131,7 +158,7 @@ config PREEMPT_TRACER
        default n
        depends on GENERIC_TIME
        depends on PREEMPT
-       select TRACING
+       select GENERIC_TRACER
        select TRACER_MAX_TRACE
        help
          This option measures the time spent in preemption off critical
@@ -150,7 +177,7 @@ config PREEMPT_TRACER
 config SYSPROF_TRACER
        bool "Sysprof Tracer"
        depends on X86
-       select TRACING
+       select GENERIC_TRACER
        select CONTEXT_SWITCH_TRACER
        help
          This tracer provides the trace needed by the 'Sysprof' userspace
@@ -158,40 +185,33 @@ config SYSPROF_TRACER
 
 config SCHED_TRACER
        bool "Scheduling Latency Tracer"
-       select TRACING
+       select GENERIC_TRACER
        select CONTEXT_SWITCH_TRACER
        select TRACER_MAX_TRACE
        help
          This tracer tracks the latency of the highest priority task
          to be scheduled in, starting from the point it has woken up.
 
-config CONTEXT_SWITCH_TRACER
-       bool "Trace process context switches"
-       select TRACING
-       select MARKERS
-       help
-         This tracer gets called from the context switch and records
-         all switching of tasks.
-
-config EVENT_TRACER
-       bool "Trace various events in the kernel"
+config ENABLE_DEFAULT_TRACERS
+       bool "Trace process context switches and events"
+       depends on !GENERIC_TRACER
        select TRACING
        help
          This tracer hooks to various trace points in the kernel
          allowing the user to pick and choose which trace point they
-         want to trace.
+         want to trace. It also includes the sched_switch tracer plugin.
 
 config FTRACE_SYSCALLS
        bool "Trace syscalls"
        depends on HAVE_FTRACE_SYSCALLS
-       select TRACING
+       select GENERIC_TRACER
        select KALLSYMS
        help
          Basic tracer to catch the syscall entry and exit events.
 
 config BOOT_TRACER
        bool "Trace boot initcalls"
-       select TRACING
+       select GENERIC_TRACER
        select CONTEXT_SWITCH_TRACER
        help
          This tracer helps developers to optimize boot times: it records
@@ -207,8 +227,36 @@ config BOOT_TRACER
          to enable this on bootup.
 
 config TRACE_BRANCH_PROFILING
+       bool
+       select GENERIC_TRACER
+
+choice
+       prompt "Branch Profiling"
+       default BRANCH_PROFILE_NONE
+       help
+        The branch profiling is a software profiler. It will add hooks
+        into the C conditionals to test which path a branch takes.
+
+        The likely/unlikely profiler only looks at the conditions that
+        are annotated with a likely or unlikely macro.
+
+        The "all branch" profiler will profile every if statement in the
+        kernel. This profiler will also enable the likely/unlikely
+        profiler as well.
+
+        Either of the above profilers add a bit of overhead to the system.
+        If unsure choose "No branch profiling".
+
+config BRANCH_PROFILE_NONE
+       bool "No branch profiling"
+       help
+        No branch profiling. Branch profiling adds a bit of overhead.
+        Only enable it if you want to analyse the branching behavior.
+        Otherwise keep it disabled.
+
+config PROFILE_ANNOTATED_BRANCHES
        bool "Trace likely/unlikely profiler"
-       select TRACING
+       select TRACE_BRANCH_PROFILING
        help
          This tracer profiles all the the likely and unlikely macros
          in the kernel. It will display the results in:
@@ -218,11 +266,9 @@ config TRACE_BRANCH_PROFILING
          Note: this will add a significant overhead, only turn this
          on if you need to profile the system's use of these macros.
 
-         Say N if unsure.
-
 config PROFILE_ALL_BRANCHES
        bool "Profile all if conditionals"
-       depends on TRACE_BRANCH_PROFILING
+       select TRACE_BRANCH_PROFILING
        help
          This tracer profiles all branch conditions. Every if ()
          taken in the kernel is recorded whether it hit or miss.
@@ -230,11 +276,12 @@ config PROFILE_ALL_BRANCHES
 
          /debugfs/tracing/profile_branch
 
+         This option also enables the likely/unlikely profiler.
+
          This configuration, when enabled, will impose a great overhead
          on the system. This should only be enabled when the system
          is to be analyzed
-
-         Say N if unsure.
+endchoice
 
 config TRACING_BRANCHES
        bool
@@ -261,7 +308,7 @@ config BRANCH_TRACER
 config POWER_TRACER
        bool "Trace power consumption behavior"
        depends on X86
-       select TRACING
+       select GENERIC_TRACER
        help
          This tracer helps developers to analyze and optimize the kernels
          power management decisions, specifically the C-state and P-state
@@ -295,14 +342,14 @@ config STACK_TRACER
 config HW_BRANCH_TRACER
        depends on HAVE_HW_BRANCH_TRACER
        bool "Trace hw branches"
-       select TRACING
+       select GENERIC_TRACER
        help
          This tracer records all branches on the system in a circular
          buffer giving access to the last N branches for each cpu.
 
 config KMEMTRACE
        bool "Trace SLAB allocations"
-       select TRACING
+       select GENERIC_TRACER
        help
          kmemtrace provides tracing for slab allocator functions, such as
          kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
@@ -322,7 +369,7 @@ config KMEMTRACE
 
 config WORKQUEUE_TRACER
        bool "Trace workqueues"
-       select TRACING
+       select GENERIC_TRACER
        help
          The workqueue tracer provides some statistical informations
           about each cpu workqueue thread such as the number of the
@@ -338,7 +385,7 @@ config BLK_DEV_IO_TRACE
        select RELAY
        select DEBUG_FS
        select TRACEPOINTS
-       select TRACING
+       select GENERIC_TRACER
        select STACKTRACE
        help
          Say Y here if you want to be able to trace the block layer actions
@@ -375,6 +422,20 @@ config DYNAMIC_FTRACE
         were made. If so, it runs stop_machine (stops all CPUS)
         and modifies the code to jump over the call to ftrace.
 
+config FUNCTION_PROFILER
+       bool "Kernel function profiler"
+       depends on FUNCTION_TRACER
+       default n
+       help
+        This option enables the kernel function profiler. A file is created
+        in debugfs called function_profile_enabled which defaults to zero.
+        When a 1 is echoed into this file profiling begins, and when a
+        zero is entered, profiling stops. A file in the trace_stats
+        directory called functions, that show the list of functions that
+        have been hit and their counters.
+
+        If in doubt, say N
+
 config FTRACE_MCOUNT_RECORD
        def_bool y
        depends on DYNAMIC_FTRACE
@@ -385,7 +446,7 @@ config FTRACE_SELFTEST
 
 config FTRACE_STARTUP_TEST
        bool "Perform a startup test on ftrace"
-       depends on TRACING
+       depends on GENERIC_TRACER
        select FTRACE_SELFTEST
        help
          This option performs a series of startup tests on ftrace. On bootup
@@ -396,7 +457,7 @@ config FTRACE_STARTUP_TEST
 config MMIOTRACE
        bool "Memory mapped IO tracing"
        depends on HAVE_MMIOTRACE_SUPPORT && PCI
-       select TRACING
+       select GENERIC_TRACER
        help
          Mmiotrace traces Memory Mapped I/O access and is meant for
          debugging and reverse engineering. It is called from the ioremap
@@ -416,7 +477,23 @@ config MMIOTRACE_TEST
 
          Say N, unless you absolutely know what you are doing.
 
-endmenu
+config RING_BUFFER_BENCHMARK
+       tristate "Ring buffer benchmark stress tester"
+       depends on RING_BUFFER
+       help
+         This option creates a test to stress the ring buffer and bench mark it.
+         It creates its own ring buffer such that it will not interfer with
+         any other users of the ring buffer (such as ftrace). It then creates
+         a producer and consumer that will run for 10 seconds and sleep for
+         10 seconds. Each interval it will print out the number of events
+         it recorded and give a rough estimate of how long each iteration took.
+
+         It does not disable interrupts or raise its priority, so it may be
+         affected by processes that are running.
+
+         If unsure, say N
+
+endif # FTRACE
 
 endif # TRACING_SUPPORT
 
index 2630f51..844164d 100644 (file)
@@ -15,11 +15,17 @@ ifdef CONFIG_TRACING_BRANCHES
 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
 endif
 
+#
+# Make the trace clocks available generally: it's infrastructure
+# relied on by ptrace for example:
+#
+obj-y += trace_clock.o
+
 obj-$(CONFIG_FUNCTION_TRACER) += libftrace.o
 obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
+obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
 
 obj-$(CONFIG_TRACING) += trace.o
-obj-$(CONFIG_TRACING) += trace_clock.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
@@ -39,12 +45,14 @@ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
 obj-$(CONFIG_POWER_TRACER) += trace_power.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
-obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events.o
-obj-$(CONFIG_EVENT_TRACER) += events.o
-obj-$(CONFIG_EVENT_TRACER) += trace_export.o
+obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+ifeq ($(CONFIG_BLOCK),y)
+obj-$(CONFIG_EVENT_TRACING) += blktrace.o
+endif
+obj-$(CONFIG_EVENT_TRACING) += trace_events.o
+obj-$(CONFIG_EVENT_TRACING) += trace_export.o
 obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
 obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
-obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
+obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 
 libftrace-y := ftrace.o
index 921ef5d..7bd6a98 100644 (file)
 #include <linux/mutex.h>
 #include <linux/debugfs.h>
 #include <linux/time.h>
-#include <trace/block.h>
 #include <linux/uaccess.h>
+
+#include <trace/events/block.h>
+
 #include "trace_output.h"
 
+#ifdef CONFIG_BLK_DEV_IO_TRACE
+
 static unsigned int blktrace_seq __read_mostly = 1;
 
 static struct trace_array *blk_tr;
@@ -147,7 +151,7 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
 {
        if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
                return 1;
-       if (sector < bt->start_lba || sector > bt->end_lba)
+       if (sector && (sector < bt->start_lba || sector > bt->end_lba))
                return 1;
        if (bt->pid && pid != bt->pid)
                return 1;
@@ -192,7 +196,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
        what |= MASK_TC_BIT(rw, DISCARD);
 
        pid = tsk->pid;
-       if (unlikely(act_log_check(bt, what, sector, pid)))
+       if (act_log_check(bt, what, sector, pid))
                return;
        cpu = raw_smp_processor_id();
 
@@ -262,6 +266,7 @@ static void blk_trace_free(struct blk_trace *bt)
 {
        debugfs_remove(bt->msg_file);
        debugfs_remove(bt->dropped_file);
+       debugfs_remove(bt->dir);
        relay_close(bt->rchan);
        free_percpu(bt->sequence);
        free_percpu(bt->msg_data);
@@ -403,11 +408,29 @@ static struct rchan_callbacks blk_relay_callbacks = {
        .remove_buf_file        = blk_remove_buf_file_callback,
 };
 
+static void blk_trace_setup_lba(struct blk_trace *bt,
+                               struct block_device *bdev)
+{
+       struct hd_struct *part = NULL;
+
+       if (bdev)
+               part = bdev->bd_part;
+
+       if (part) {
+               bt->start_lba = part->start_sect;
+               bt->end_lba = part->start_sect + part->nr_sects;
+       } else {
+               bt->start_lba = 0;
+               bt->end_lba = -1ULL;
+       }
+}
+
 /*
  * Setup everything required to start tracing
  */
 int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
-                       struct blk_user_trace_setup *buts)
+                      struct block_device *bdev,
+                      struct blk_user_trace_setup *buts)
 {
        struct blk_trace *old_bt, *bt = NULL;
        struct dentry *dir = NULL;
@@ -480,10 +503,13 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
        if (!bt->act_mask)
                bt->act_mask = (u16) -1;
 
-       bt->start_lba = buts->start_lba;
-       bt->end_lba = buts->end_lba;
-       if (!bt->end_lba)
-               bt->end_lba = -1ULL;
+       blk_trace_setup_lba(bt, bdev);
+
+       /* overwrite with user settings */
+       if (buts->start_lba)
+               bt->start_lba = buts->start_lba;
+       if (buts->end_lba)
+               bt->end_lba = buts->end_lba;
 
        bt->pid = buts->pid;
        bt->trace_state = Blktrace_setup;
@@ -505,6 +531,7 @@ err:
 }
 
 int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+                   struct block_device *bdev,
                    char __user *arg)
 {
        struct blk_user_trace_setup buts;
@@ -514,7 +541,7 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
        if (ret)
                return -EFAULT;
 
-       ret = do_blk_trace_setup(q, name, dev, &buts);
+       ret = do_blk_trace_setup(q, name, dev, bdev, &buts);
        if (ret)
                return ret;
 
@@ -582,7 +609,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
        switch (cmd) {
        case BLKTRACESETUP:
                bdevname(bdev, b);
-               ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
+               ret = blk_trace_setup(q, b, bdev->bd_dev, bdev, arg);
                break;
        case BLKTRACESTART:
                start = 1;
@@ -809,7 +836,6 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  * @bio:       the source bio
  * @dev:       target device
  * @from:      source sector
- * @to:                target sector
  *
  * Description:
  *     Device mapper or raid target sometimes need to split a bio because
@@ -817,7 +843,7 @@ static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
  *
  **/
 static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
-                                      dev_t dev, sector_t from, sector_t to)
+                                      dev_t dev, sector_t from)
 {
        struct blk_trace *bt = q->blk_trace;
        struct blk_io_trace_remap r;
@@ -825,12 +851,13 @@ static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
        if (likely(!bt))
                return;
 
-       r.device = cpu_to_be32(dev);
-       r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
-       r.sector = cpu_to_be64(to);
+       r.device_from = cpu_to_be32(dev);
+       r.device_to   = cpu_to_be32(bio->bi_bdev->bd_dev);
+       r.sector_from = cpu_to_be64(from);
 
-       __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
-                       !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+       __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+                       BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE),
+                       sizeof(r), &r);
 }
 
 /**
@@ -971,6 +998,16 @@ static inline const void *pdu_start(const struct trace_entry *ent)
        return te_blk_io_trace(ent) + 1;
 }
 
+static inline u32 t_action(const struct trace_entry *ent)
+{
+       return te_blk_io_trace(ent)->action;
+}
+
+static inline u32 t_bytes(const struct trace_entry *ent)
+{
+       return te_blk_io_trace(ent)->bytes;
+}
+
 static inline u32 t_sec(const struct trace_entry *ent)
 {
        return te_blk_io_trace(ent)->bytes >> 9;
@@ -996,11 +1033,11 @@ static void get_pdu_remap(const struct trace_entry *ent,
                          struct blk_io_trace_remap *r)
 {
        const struct blk_io_trace_remap *__r = pdu_start(ent);
-       __u64 sector = __r->sector;
+       __u64 sector_from = __r->sector_from;
 
-       r->device = be32_to_cpu(__r->device);
        r->device_from = be32_to_cpu(__r->device_from);
-       r->sector = be64_to_cpu(sector);
+       r->device_to   = be32_to_cpu(__r->device_to);
+       r->sector_from = be64_to_cpu(sector_from);
 }
 
 typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act);
@@ -1031,36 +1068,98 @@ static int blk_log_action(struct trace_iterator *iter, const char *act)
                                MAJOR(t->device), MINOR(t->device), act, rwbs);
 }
 
+static int blk_log_dump_pdu(struct trace_seq *s, const struct trace_entry *ent)
+{
+       const unsigned char *pdu_buf;
+       int pdu_len;
+       int i, end, ret;
+
+       pdu_buf = pdu_start(ent);
+       pdu_len = te_blk_io_trace(ent)->pdu_len;
+
+       if (!pdu_len)
+               return 1;
+
+       /* find the last zero that needs to be printed */
+       for (end = pdu_len - 1; end >= 0; end--)
+               if (pdu_buf[end])
+                       break;
+       end++;
+
+       if (!trace_seq_putc(s, '('))
+               return 0;
+
+       for (i = 0; i < pdu_len; i++) {
+
+               ret = trace_seq_printf(s, "%s%02x",
+                                      i == 0 ? "" : " ", pdu_buf[i]);
+               if (!ret)
+                       return ret;
+
+               /*
+                * stop when the rest is just zeroes and indicate so
+                * with a ".." appended
+                */
+               if (i == end && end != pdu_len - 1)
+                       return trace_seq_puts(s, " ..) ");
+       }
+
+       return trace_seq_puts(s, ") ");
+}
+
 static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
 {
        char cmd[TASK_COMM_LEN];
 
        trace_find_cmdline(ent->pid, cmd);
 
-       if (t_sec(ent))
-               return trace_seq_printf(s, "%llu + %u [%s]\n",
-                                       t_sector(ent), t_sec(ent), cmd);
-       return trace_seq_printf(s, "[%s]\n", cmd);
+       if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
+               int ret;
+
+               ret = trace_seq_printf(s, "%u ", t_bytes(ent));
+               if (!ret)
+                       return 0;
+               ret = blk_log_dump_pdu(s, ent);
+               if (!ret)
+                       return 0;
+               return trace_seq_printf(s, "[%s]\n", cmd);
+       } else {
+               if (t_sec(ent))
+                       return trace_seq_printf(s, "%llu + %u [%s]\n",
+                                               t_sector(ent), t_sec(ent), cmd);
+               return trace_seq_printf(s, "[%s]\n", cmd);
+       }
 }
 
 static int blk_log_with_error(struct trace_seq *s,
                              const struct trace_entry *ent)
 {
-       if (t_sec(ent))
-               return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
-                                       t_sec(ent), t_error(ent));
-       return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
+       if (t_action(ent) & BLK_TC_ACT(BLK_TC_PC)) {
+               int ret;
+
+               ret = blk_log_dump_pdu(s, ent);
+               if (ret)
+                       return trace_seq_printf(s, "[%d]\n", t_error(ent));
+               return 0;
+       } else {
+               if (t_sec(ent))
+                       return trace_seq_printf(s, "%llu + %u [%d]\n",
+                                               t_sector(ent),
+                                               t_sec(ent), t_error(ent));
+               return trace_seq_printf(s, "%llu [%d]\n",
+                                       t_sector(ent), t_error(ent));
+       }
 }
 
 static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
 {
-       struct blk_io_trace_remap r = { .device = 0, };
+       struct blk_io_trace_remap r = { .device_from = 0, };
 
        get_pdu_remap(ent, &r);
        return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
-                              t_sector(ent),
-                              t_sec(ent), MAJOR(r.device), MINOR(r.device),
-                              (unsigned long long)r.sector);
+                               t_sector(ent), t_sec(ent),
+                               MAJOR(r.device_from), MINOR(r.device_from),
+                               (unsigned long long)r.sector_from);
 }
 
 static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
@@ -1117,7 +1216,6 @@ static void blk_tracer_print_header(struct seq_file *m)
 static void blk_tracer_start(struct trace_array *tr)
 {
        blk_tracer_enabled = true;
-       trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
 }
 
 static int blk_tracer_init(struct trace_array *tr)
@@ -1130,7 +1228,6 @@ static int blk_tracer_init(struct trace_array *tr)
 static void blk_tracer_stop(struct trace_array *tr)
 {
        blk_tracer_enabled = false;
-       trace_flags |= TRACE_ITER_CONTEXT_INFO;
 }
 
 static void blk_tracer_reset(struct trace_array *tr)
@@ -1182,7 +1279,7 @@ static enum print_line_t print_one_line(struct trace_iterator *iter,
        }
 
        if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
-               ret = trace_seq_printf(s, "Bad pc action %x\n", what);
+               ret = trace_seq_printf(s, "Unknown action %x\n", what);
        else {
                ret = log_action(iter, what2act[what].act[long_act]);
                if (ret)
@@ -1195,9 +1292,6 @@ out:
 static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
                                               int flags)
 {
-       if (!trace_print_context(iter))
-               return TRACE_TYPE_PARTIAL_LINE;
-
        return print_one_line(iter, false);
 }
 
@@ -1232,6 +1326,18 @@ static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
        return print_one_line(iter, true);
 }
 
+static int blk_tracer_set_flag(u32 old_flags, u32 bit, int set)
+{
+       /* don't output context-info for blk_classic output */
+       if (bit == TRACE_BLK_OPT_CLASSIC) {
+               if (set)
+                       trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
+               else
+                       trace_flags |= TRACE_ITER_CONTEXT_INFO;
+       }
+       return 0;
+}
+
 static struct tracer blk_tracer __read_mostly = {
        .name           = "blk",
        .init           = blk_tracer_init,
@@ -1241,6 +1347,7 @@ static struct tracer blk_tracer __read_mostly = {
        .print_header   = blk_tracer_print_header,
        .print_line     = blk_tracer_print_line,
        .flags          = &blk_tracer_flags,
+       .set_flag       = blk_tracer_set_flag,
 };
 
 static struct trace_event trace_blk_event = {
@@ -1285,7 +1392,8 @@ static int blk_trace_remove_queue(struct request_queue *q)
 /*
  * Setup everything required to start tracing
  */
-static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
+static int blk_trace_setup_queue(struct request_queue *q,
+                                struct block_device *bdev)
 {
        struct blk_trace *old_bt, *bt = NULL;
        int ret = -ENOMEM;
@@ -1298,9 +1406,10 @@ static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
        if (!bt->msg_data)
                goto free_bt;
 
-       bt->dev = dev;
+       bt->dev = bdev->bd_dev;
        bt->act_mask = (u16)-1;
-       bt->end_lba = -1ULL;
+
+       blk_trace_setup_lba(bt, bdev);
 
        old_bt = xchg(&q->blk_trace, bt);
        if (old_bt != NULL) {
@@ -1517,7 +1626,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 
        if (attr == &dev_attr_enable) {
                if (value)
-                       ret = blk_trace_setup_queue(q, bdev->bd_dev);
+                       ret = blk_trace_setup_queue(q, bdev);
                else
                        ret = blk_trace_remove_queue(q);
                goto out_unlock_bdev;
@@ -1525,7 +1634,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
 
        ret = 0;
        if (q->blk_trace == NULL)
-               ret = blk_trace_setup_queue(q, bdev->bd_dev);
+               ret = blk_trace_setup_queue(q, bdev);
 
        if (ret == 0) {
                if (attr == &dev_attr_act_mask)
@@ -1548,3 +1657,80 @@ out:
        return ret ? ret : count;
 }
 
+int blk_trace_init_sysfs(struct device *dev)
+{
+       return sysfs_create_group(&dev->kobj, &blk_trace_attr_group);
+}
+
+#endif /* CONFIG_BLK_DEV_IO_TRACE */
+
+#ifdef CONFIG_EVENT_TRACING
+
+void blk_dump_cmd(char *buf, struct request *rq)
+{
+       int i, end;
+       int len = rq->cmd_len;
+       unsigned char *cmd = rq->cmd;
+
+       if (!blk_pc_request(rq)) {
+               buf[0] = '\0';
+               return;
+       }
+
+       for (end = len - 1; end >= 0; end--)
+               if (cmd[end])
+                       break;
+       end++;
+
+       for (i = 0; i < len; i++) {
+               buf += sprintf(buf, "%s%02x", i == 0 ? "" : " ", cmd[i]);
+               if (i == end && end != len - 1) {
+                       sprintf(buf, " ..");
+                       break;
+               }
+       }
+}
+
+void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
+{
+       int i = 0;
+
+       if (rw & WRITE)
+               rwbs[i++] = 'W';
+       else if (rw & 1 << BIO_RW_DISCARD)
+               rwbs[i++] = 'D';
+       else if (bytes)
+               rwbs[i++] = 'R';
+       else
+               rwbs[i++] = 'N';
+
+       if (rw & 1 << BIO_RW_AHEAD)
+               rwbs[i++] = 'A';
+       if (rw & 1 << BIO_RW_BARRIER)
+               rwbs[i++] = 'B';
+       if (rw & 1 << BIO_RW_SYNCIO)
+               rwbs[i++] = 'S';
+       if (rw & 1 << BIO_RW_META)
+               rwbs[i++] = 'M';
+
+       rwbs[i] = '\0';
+}
+
+void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
+{
+       int rw = rq->cmd_flags & 0x03;
+       int bytes;
+
+       if (blk_discard_rq(rq))
+               rw |= (1 << BIO_RW_DISCARD);
+
+       if (blk_pc_request(rq))
+               bytes = rq->data_len;
+       else
+               bytes = rq->hard_nr_sectors << 9;
+
+       blk_fill_rwbs(rwbs, rw, bytes);
+}
+
+#endif /* CONFIG_EVENT_TRACING */
+
diff --git a/kernel/trace/events.c b/kernel/trace/events.c
deleted file mode 100644 (file)
index 246f2aa..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * This is the place to register all trace points as events.
- */
-
-#include <linux/stringify.h>
-
-#include <trace/trace_events.h>
-
-#include "trace_output.h"
-
-#include "trace_events_stage_1.h"
-#include "trace_events_stage_2.h"
-#include "trace_events_stage_3.h"
-
index f1ed080..bb60732 100644 (file)
 #include <linux/list.h>
 #include <linux/hash.h>
 
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include <asm/ftrace.h>
+#include <asm/setup.h>
 
-#include "trace.h"
+#include "trace_output.h"
+#include "trace_stat.h"
 
 #define FTRACE_WARN_ON(cond)                   \
        do {                                    \
@@ -68,7 +70,7 @@ static DEFINE_MUTEX(ftrace_lock);
 
 static struct ftrace_ops ftrace_list_end __read_mostly =
 {
-       .func = ftrace_stub,
+       .func           = ftrace_stub,
 };
 
 static struct ftrace_ops *ftrace_list __read_mostly = &ftrace_list_end;
@@ -240,6 +242,580 @@ static void ftrace_update_pid_func(void)
 #endif
 }
 
+#ifdef CONFIG_FUNCTION_PROFILER
+struct ftrace_profile {
+       struct hlist_node               node;
+       unsigned long                   ip;
+       unsigned long                   counter;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       unsigned long long              time;
+#endif
+};
+
+struct ftrace_profile_page {
+       struct ftrace_profile_page      *next;
+       unsigned long                   index;
+       struct ftrace_profile           records[];
+};
+
+struct ftrace_profile_stat {
+       atomic_t                        disabled;
+       struct hlist_head               *hash;
+       struct ftrace_profile_page      *pages;
+       struct ftrace_profile_page      *start;
+       struct tracer_stat              stat;
+};
+
+#define PROFILE_RECORDS_SIZE                                           \
+       (PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
+
+#define PROFILES_PER_PAGE                                      \
+       (PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
+
+static int ftrace_profile_bits __read_mostly;
+static int ftrace_profile_enabled __read_mostly;
+
+/* ftrace_profile_lock - synchronize the enable and disable of the profiler */
+static DEFINE_MUTEX(ftrace_profile_lock);
+
+static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
+
+#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
+
+static void *
+function_stat_next(void *v, int idx)
+{
+       struct ftrace_profile *rec = v;
+       struct ftrace_profile_page *pg;
+
+       pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
+
+ again:
+       rec++;
+       if ((void *)rec >= (void *)&pg->records[pg->index]) {
+               pg = pg->next;
+               if (!pg)
+                       return NULL;
+               rec = &pg->records[0];
+               if (!rec->counter)
+                       goto again;
+       }
+
+       return rec;
+}
+
+static void *function_stat_start(struct tracer_stat *trace)
+{
+       struct ftrace_profile_stat *stat =
+               container_of(trace, struct ftrace_profile_stat, stat);
+
+       if (!stat || !stat->start)
+               return NULL;
+
+       return function_stat_next(&stat->start->records[0], 0);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+/* function graph compares on total time */
+static int function_stat_cmp(void *p1, void *p2)
+{
+       struct ftrace_profile *a = p1;
+       struct ftrace_profile *b = p2;
+
+       if (a->time < b->time)
+               return -1;
+       if (a->time > b->time)
+               return 1;
+       else
+               return 0;
+}
+#else
+/* not function graph compares against hits */
+static int function_stat_cmp(void *p1, void *p2)
+{
+       struct ftrace_profile *a = p1;
+       struct ftrace_profile *b = p2;
+
+       if (a->counter < b->counter)
+               return -1;
+       if (a->counter > b->counter)
+               return 1;
+       else
+               return 0;
+}
+#endif
+
+static int function_stat_headers(struct seq_file *m)
+{
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       seq_printf(m, "  Function                               "
+                  "Hit    Time            Avg\n"
+                     "  --------                               "
+                  "---    ----            ---\n");
+#else
+       seq_printf(m, "  Function                               Hit\n"
+                     "  --------                               ---\n");
+#endif
+       return 0;
+}
+
+static int function_stat_show(struct seq_file *m, void *v)
+{
+       struct ftrace_profile *rec = v;
+       char str[KSYM_SYMBOL_LEN];
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       static DEFINE_MUTEX(mutex);
+       static struct trace_seq s;
+       unsigned long long avg;
+#endif
+
+       kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+       seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       seq_printf(m, "    ");
+       avg = rec->time;
+       do_div(avg, rec->counter);
+
+       mutex_lock(&mutex);
+       trace_seq_init(&s);
+       trace_print_graph_duration(rec->time, &s);
+       trace_seq_puts(&s, "    ");
+       trace_print_graph_duration(avg, &s);
+       trace_print_seq(m, &s);
+       mutex_unlock(&mutex);
+#endif
+       seq_putc(m, '\n');
+
+       return 0;
+}
+
+static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
+{
+       struct ftrace_profile_page *pg;
+
+       pg = stat->pages = stat->start;
+
+       while (pg) {
+               memset(pg->records, 0, PROFILE_RECORDS_SIZE);
+               pg->index = 0;
+               pg = pg->next;
+       }
+
+       memset(stat->hash, 0,
+              FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
+}
+
+int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
+{
+       struct ftrace_profile_page *pg;
+       int functions;
+       int pages;
+       int i;
+
+       /* If we already allocated, do nothing */
+       if (stat->pages)
+               return 0;
+
+       stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
+       if (!stat->pages)
+               return -ENOMEM;
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+       functions = ftrace_update_tot_cnt;
+#else
+       /*
+        * We do not know the number of functions that exist because
+        * dynamic tracing is what counts them. With past experience
+        * we have around 20K functions. That should be more than enough.
+        * It is highly unlikely we will execute every function in
+        * the kernel.
+        */
+       functions = 20000;
+#endif
+
+       pg = stat->start = stat->pages;
+
+       pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
+
+       for (i = 0; i < pages; i++) {
+               pg->next = (void *)get_zeroed_page(GFP_KERNEL);
+               if (!pg->next)
+                       goto out_free;
+               pg = pg->next;
+       }
+
+       return 0;
+
+ out_free:
+       pg = stat->start;
+       while (pg) {
+               unsigned long tmp = (unsigned long)pg;
+
+               pg = pg->next;
+               free_page(tmp);
+       }
+
+       free_page((unsigned long)stat->pages);
+       stat->pages = NULL;
+       stat->start = NULL;
+
+       return -ENOMEM;
+}
+
+static int ftrace_profile_init_cpu(int cpu)
+{
+       struct ftrace_profile_stat *stat;
+       int size;
+
+       stat = &per_cpu(ftrace_profile_stats, cpu);
+
+       if (stat->hash) {
+               /* If the profile is already created, simply reset it */
+               ftrace_profile_reset(stat);
+               return 0;
+       }
+
+       /*
+        * We are profiling all functions, but usually only a few thousand
+        * functions are hit. We'll make a hash of 1024 items.
+        */
+       size = FTRACE_PROFILE_HASH_SIZE;
+
+       stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
+
+       if (!stat->hash)
+               return -ENOMEM;
+
+       if (!ftrace_profile_bits) {
+               size--;
+
+               for (; size; size >>= 1)
+                       ftrace_profile_bits++;
+       }
+
+       /* Preallocate the function profiling pages */
+       if (ftrace_profile_pages_init(stat) < 0) {
+               kfree(stat->hash);
+               stat->hash = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int ftrace_profile_init(void)
+{
+       int cpu;
+       int ret = 0;
+
+       for_each_online_cpu(cpu) {
+               ret = ftrace_profile_init_cpu(cpu);
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
+/* interrupts must be disabled */
+static struct ftrace_profile *
+ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
+{
+       struct ftrace_profile *rec;
+       struct hlist_head *hhd;
+       struct hlist_node *n;
+       unsigned long key;
+
+       key = hash_long(ip, ftrace_profile_bits);
+       hhd = &stat->hash[key];
+
+       if (hlist_empty(hhd))
+               return NULL;
+
+       hlist_for_each_entry_rcu(rec, n, hhd, node) {
+               if (rec->ip == ip)
+                       return rec;
+       }
+
+       return NULL;
+}
+
+static void ftrace_add_profile(struct ftrace_profile_stat *stat,
+                              struct ftrace_profile *rec)
+{
+       unsigned long key;
+
+       key = hash_long(rec->ip, ftrace_profile_bits);
+       hlist_add_head_rcu(&rec->node, &stat->hash[key]);
+}
+
+/*
+ * The memory is already allocated, this simply finds a new record to use.
+ */
+static struct ftrace_profile *
+ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
+{
+       struct ftrace_profile *rec = NULL;
+
+       /* prevent recursion (from NMIs) */
+       if (atomic_inc_return(&stat->disabled) != 1)
+               goto out;
+
+       /*
+        * Try to find the function again since an NMI
+        * could have added it
+        */
+       rec = ftrace_find_profiled_func(stat, ip);
+       if (rec)
+               goto out;
+
+       if (stat->pages->index == PROFILES_PER_PAGE) {
+               if (!stat->pages->next)
+                       goto out;
+               stat->pages = stat->pages->next;
+       }
+
+       rec = &stat->pages->records[stat->pages->index++];
+       rec->ip = ip;
+       ftrace_add_profile(stat, rec);
+
+ out:
+       atomic_dec(&stat->disabled);
+
+       return rec;
+}
+
+static void
+function_profile_call(unsigned long ip, unsigned long parent_ip)
+{
+       struct ftrace_profile_stat *stat;
+       struct ftrace_profile *rec;
+       unsigned long flags;
+
+       if (!ftrace_profile_enabled)
+               return;
+
+       local_irq_save(flags);
+
+       stat = &__get_cpu_var(ftrace_profile_stats);
+       if (!stat->hash || !ftrace_profile_enabled)
+               goto out;
+
+       rec = ftrace_find_profiled_func(stat, ip);
+       if (!rec) {
+               rec = ftrace_profile_alloc(stat, ip);
+               if (!rec)
+                       goto out;
+       }
+
+       rec->counter++;
+ out:
+       local_irq_restore(flags);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int profile_graph_entry(struct ftrace_graph_ent *trace)
+{
+       function_profile_call(trace->func, 0);
+       return 1;
+}
+
+static void profile_graph_return(struct ftrace_graph_ret *trace)
+{
+       struct ftrace_profile_stat *stat;
+       unsigned long long calltime;
+       struct ftrace_profile *rec;
+       unsigned long flags;
+
+       local_irq_save(flags);
+       stat = &__get_cpu_var(ftrace_profile_stats);
+       if (!stat->hash || !ftrace_profile_enabled)
+               goto out;
+
+       calltime = trace->rettime - trace->calltime;
+
+       if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
+               int index;
+
+               index = trace->depth;
+
+               /* Append this call time to the parent time to subtract */
+               if (index)
+                       current->ret_stack[index - 1].subtime += calltime;
+
+               if (current->ret_stack[index].subtime < calltime)
+                       calltime -= current->ret_stack[index].subtime;
+               else
+                       calltime = 0;
+       }
+
+       rec = ftrace_find_profiled_func(stat, trace->func);
+       if (rec)
+               rec->time += calltime;
+
+ out:
+       local_irq_restore(flags);
+}
+
+static int register_ftrace_profiler(void)
+{
+       return register_ftrace_graph(&profile_graph_return,
+                                    &profile_graph_entry);
+}
+
+static void unregister_ftrace_profiler(void)
+{
+       unregister_ftrace_graph();
+}
+#else
+static struct ftrace_ops ftrace_profile_ops __read_mostly =
+{
+       .func           = function_profile_call,
+};
+
+static int register_ftrace_profiler(void)
+{
+       return register_ftrace_function(&ftrace_profile_ops);
+}
+
+static void unregister_ftrace_profiler(void)
+{
+       unregister_ftrace_function(&ftrace_profile_ops);
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+static ssize_t
+ftrace_profile_write(struct file *filp, const char __user *ubuf,
+                    size_t cnt, loff_t *ppos)
+{
+       unsigned long val;
+       char buf[64];           /* big enough to hold a number */
+       int ret;
+
+       if (cnt >= sizeof(buf))
+               return -EINVAL;
+
+       if (copy_from_user(&buf, ubuf, cnt))
+               return -EFAULT;
+
+       buf[cnt] = 0;
+
+       ret = strict_strtoul(buf, 10, &val);
+       if (ret < 0)
+               return ret;
+
+       val = !!val;
+
+       mutex_lock(&ftrace_profile_lock);
+       if (ftrace_profile_enabled ^ val) {
+               if (val) {
+                       ret = ftrace_profile_init();
+                       if (ret < 0) {
+                               cnt = ret;
+                               goto out;
+                       }
+
+                       ret = register_ftrace_profiler();
+                       if (ret < 0) {
+                               cnt = ret;
+                               goto out;
+                       }
+                       ftrace_profile_enabled = 1;
+               } else {
+                       ftrace_profile_enabled = 0;
+                       /*
+                        * unregister_ftrace_profiler calls stop_machine
+                        * so this acts like an synchronize_sched.
+                        */
+                       unregister_ftrace_profiler();
+               }
+       }
+ out:
+       mutex_unlock(&ftrace_profile_lock);
+
+       filp->f_pos += cnt;
+
+       return cnt;
+}
+
+static ssize_t
+ftrace_profile_read(struct file *filp, char __user *ubuf,
+                    size_t cnt, loff_t *ppos)
+{
+       char buf[64];           /* big enough to hold a number */
+       int r;
+
+       r = sprintf(buf, "%u\n", ftrace_profile_enabled);
+       return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static const struct file_operations ftrace_profile_fops = {
+       .open           = tracing_open_generic,
+       .read           = ftrace_profile_read,
+       .write          = ftrace_profile_write,
+};
+
+/* used to initialize the real stat files */
+static struct tracer_stat function_stats __initdata = {
+       .name           = "functions",
+       .stat_start     = function_stat_start,
+       .stat_next      = function_stat_next,
+       .stat_cmp       = function_stat_cmp,
+       .stat_headers   = function_stat_headers,
+       .stat_show      = function_stat_show
+};
+
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+       struct ftrace_profile_stat *stat;
+       struct dentry *entry;
+       char *name;
+       int ret;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               stat = &per_cpu(ftrace_profile_stats, cpu);
+
+               /* allocate enough for function name + cpu number */
+               name = kmalloc(32, GFP_KERNEL);
+               if (!name) {
+                       /*
+                        * The files created are permanent, if something happens
+                        * we still do not free memory.
+                        */
+                       kfree(stat);
+                       WARN(1,
+                            "Could not allocate stat file for cpu %d\n",
+                            cpu);
+                       return;
+               }
+               stat->stat = function_stats;
+               snprintf(name, 32, "function%d", cpu);
+               stat->stat.name = name;
+               ret = register_stat_tracer(&stat->stat);
+               if (ret) {
+                       WARN(1,
+                            "Could not register function stat for cpu %d\n",
+                            cpu);
+                       kfree(name);
+                       return;
+               }
+       }
+
+       entry = debugfs_create_file("function_profile_enabled", 0644,
+                                   d_tracer, NULL, &ftrace_profile_fops);
+       if (!entry)
+               pr_warning("Could not create debugfs "
+                          "'function_profile_enabled' entry\n");
+}
+
+#else /* CONFIG_FUNCTION_PROFILER */
+static void ftrace_profile_debugfs(struct dentry *d_tracer)
+{
+}
+#endif /* CONFIG_FUNCTION_PROFILER */
+
 /* set when tracing only a pid */
 struct pid *ftrace_pid_trace;
 static struct pid * const ftrace_swapper_pid = &init_struct_pid;
@@ -261,7 +837,6 @@ struct ftrace_func_probe {
        struct rcu_head         rcu;
 };
 
-
 enum {
        FTRACE_ENABLE_CALLS             = (1 << 0),
        FTRACE_DISABLE_CALLS            = (1 << 1),
@@ -346,30 +921,6 @@ static void ftrace_free_rec(struct dyn_ftrace *rec)
        rec->flags |= FTRACE_FL_FREE;
 }
 
-void ftrace_release(void *start, unsigned long size)
-{
-       struct dyn_ftrace *rec;
-       struct ftrace_page *pg;
-       unsigned long s = (unsigned long)start;
-       unsigned long e = s + size;
-
-       if (ftrace_disabled || !start)
-               return;
-
-       mutex_lock(&ftrace_lock);
-       do_for_each_ftrace_rec(pg, rec) {
-               if ((rec->ip >= s) && (rec->ip < e)) {
-                       /*
-                        * rec->ip is changed in ftrace_free_rec()
-                        * It should not between s and e if record was freed.
-                        */
-                       FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
-                       ftrace_free_rec(rec);
-               }
-       } while_for_each_ftrace_rec();
-       mutex_unlock(&ftrace_lock);
-}
-
 static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
 {
        struct dyn_ftrace *rec;
@@ -1408,7 +1959,7 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
 
 static struct ftrace_ops trace_probe_ops __read_mostly =
 {
-       .func = function_trace_probe_call,
+       .func           = function_trace_probe_call,
 };
 
 static int ftrace_probe_registered;
@@ -1823,6 +2374,45 @@ void ftrace_set_notrace(unsigned char *buf, int len, int reset)
        ftrace_set_regex(buf, len, reset, 0);
 }
 
+/*
+ * command line interface to allow users to set filters on boot up.
+ */
+#define FTRACE_FILTER_SIZE             COMMAND_LINE_SIZE
+static char ftrace_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
+static char ftrace_filter_buf[FTRACE_FILTER_SIZE] __initdata;
+
+static int __init set_ftrace_notrace(char *str)
+{
+       strncpy(ftrace_notrace_buf, str, FTRACE_FILTER_SIZE);
+       return 1;
+}
+__setup("ftrace_notrace=", set_ftrace_notrace);
+
+static int __init set_ftrace_filter(char *str)
+{
+       strncpy(ftrace_filter_buf, str, FTRACE_FILTER_SIZE);
+       return 1;
+}
+__setup("ftrace_filter=", set_ftrace_filter);
+
+static void __init set_ftrace_early_filter(char *buf, int enable)
+{
+       char *func;
+
+       while (buf) {
+               func = strsep(&buf, ",");
+               ftrace_set_regex(func, strlen(func), 0, enable);
+       }
+}
+
+static void __init set_ftrace_early_filters(void)
+{
+       if (ftrace_filter_buf[0])
+               set_ftrace_early_filter(ftrace_filter_buf, 1);
+       if (ftrace_notrace_buf[0])
+               set_ftrace_early_filter(ftrace_notrace_buf, 0);
+}
+
 static int
 ftrace_regex_release(struct inode *inode, struct file *file, int enable)
 {
@@ -2128,38 +2718,23 @@ static const struct file_operations ftrace_graph_fops = {
 
 static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 {
-       struct dentry *entry;
 
-       entry = debugfs_create_file("available_filter_functions", 0444,
-                                   d_tracer, NULL, &ftrace_avail_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'available_filter_functions' entry\n");
+       trace_create_file("available_filter_functions", 0444,
+                       d_tracer, NULL, &ftrace_avail_fops);
 
-       entry = debugfs_create_file("failures", 0444,
-                                   d_tracer, NULL, &ftrace_failures_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'failures' entry\n");
+       trace_create_file("failures", 0444,
+                       d_tracer, NULL, &ftrace_failures_fops);
 
-       entry = debugfs_create_file("set_ftrace_filter", 0644, d_tracer,
-                                   NULL, &ftrace_filter_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'set_ftrace_filter' entry\n");
+       trace_create_file("set_ftrace_filter", 0644, d_tracer,
+                       NULL, &ftrace_filter_fops);
 
-       entry = debugfs_create_file("set_ftrace_notrace", 0644, d_tracer,
+       trace_create_file("set_ftrace_notrace", 0644, d_tracer,
                                    NULL, &ftrace_notrace_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'set_ftrace_notrace' entry\n");
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       entry = debugfs_create_file("set_graph_function", 0444, d_tracer,
+       trace_create_file("set_graph_function", 0444, d_tracer,
                                    NULL,
                                    &ftrace_graph_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'set_graph_function' entry\n");
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
        return 0;
@@ -2197,14 +2772,72 @@ static int ftrace_convert_nops(struct module *mod,
        return 0;
 }
 
-void ftrace_init_module(struct module *mod,
-                       unsigned long *start, unsigned long *end)
+#ifdef CONFIG_MODULES
+void ftrace_release(void *start, void *end)
+{
+       struct dyn_ftrace *rec;
+       struct ftrace_page *pg;
+       unsigned long s = (unsigned long)start;
+       unsigned long e = (unsigned long)end;
+
+       if (ftrace_disabled || !start || start == end)
+               return;
+
+       mutex_lock(&ftrace_lock);
+       do_for_each_ftrace_rec(pg, rec) {
+               if ((rec->ip >= s) && (rec->ip < e)) {
+                       /*
+                        * rec->ip is changed in ftrace_free_rec()
+                        * It should not between s and e if record was freed.
+                        */
+                       FTRACE_WARN_ON(rec->flags & FTRACE_FL_FREE);
+                       ftrace_free_rec(rec);
+               }
+       } while_for_each_ftrace_rec();
+       mutex_unlock(&ftrace_lock);
+}
+
+static void ftrace_init_module(struct module *mod,
+                              unsigned long *start, unsigned long *end)
 {
        if (ftrace_disabled || start == end)
                return;
        ftrace_convert_nops(mod, start, end);
 }
 
+static int ftrace_module_notify(struct notifier_block *self,
+                               unsigned long val, void *data)
+{
+       struct module *mod = data;
+
+       switch (val) {
+       case MODULE_STATE_COMING:
+               ftrace_init_module(mod, mod->ftrace_callsites,
+                                  mod->ftrace_callsites +
+                                  mod->num_ftrace_callsites);
+               break;
+       case MODULE_STATE_GOING:
+               ftrace_release(mod->ftrace_callsites,
+                              mod->ftrace_callsites +
+                              mod->num_ftrace_callsites);
+               break;
+       }
+
+       return 0;
+}
+#else
+static int ftrace_module_notify(struct notifier_block *self,
+                               unsigned long val, void *data)
+{
+       return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block ftrace_module_nb = {
+       .notifier_call = ftrace_module_notify,
+       .priority = 0,
+};
+
 extern unsigned long __start_mcount_loc[];
 extern unsigned long __stop_mcount_loc[];
 
@@ -2236,6 +2869,12 @@ void __init ftrace_init(void)
                                  __start_mcount_loc,
                                  __stop_mcount_loc);
 
+       ret = register_module_notifier(&ftrace_module_nb);
+       if (ret)
+               pr_warning("Failed to register trace ftrace module notifier\n");
+
+       set_ftrace_early_filters();
+
        return;
  failed:
        ftrace_disabled = 1;
@@ -2417,7 +3056,6 @@ static const struct file_operations ftrace_pid_fops = {
 static __init int ftrace_init_debugfs(void)
 {
        struct dentry *d_tracer;
-       struct dentry *entry;
 
        d_tracer = tracing_init_dentry();
        if (!d_tracer)
@@ -2425,11 +3063,11 @@ static __init int ftrace_init_debugfs(void)
 
        ftrace_init_dyn_debugfs(d_tracer);
 
-       entry = debugfs_create_file("set_ftrace_pid", 0644, d_tracer,
-                                   NULL, &ftrace_pid_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'set_ftrace_pid' entry\n");
+       trace_create_file("set_ftrace_pid", 0644, d_tracer,
+                           NULL, &ftrace_pid_fops);
+
+       ftrace_profile_debugfs(d_tracer);
+
        return 0;
 }
 fs_initcall(ftrace_init_debugfs);
@@ -2538,7 +3176,7 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
-static atomic_t ftrace_graph_active;
+static int ftrace_graph_active;
 static struct notifier_block ftrace_suspend_notifier;
 
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
@@ -2580,12 +3218,12 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
                }
 
                if (t->ret_stack == NULL) {
-                       t->curr_ret_stack = -1;
-                       /* Make sure IRQs see the -1 first: */
-                       barrier();
-                       t->ret_stack = ret_stack_list[start++];
                        atomic_set(&t->tracing_graph_pause, 0);
                        atomic_set(&t->trace_overrun, 0);
+                       t->curr_ret_stack = -1;
+                       /* Make sure the tasks see the -1 first: */
+                       smp_wmb();
+                       t->ret_stack = ret_stack_list[start++];
                }
        } while_each_thread(g, t);
 
@@ -2643,8 +3281,10 @@ static int start_graph_tracing(void)
                return -ENOMEM;
 
        /* The cpu_boot init_task->ret_stack will never be freed */
-       for_each_online_cpu(cpu)
-               ftrace_graph_init_task(idle_task(cpu));
+       for_each_online_cpu(cpu) {
+               if (!idle_task(cpu)->ret_stack)
+                       ftrace_graph_init_task(idle_task(cpu));
+       }
 
        do {
                ret = alloc_retstack_tasklist(ret_stack_list);
@@ -2690,7 +3330,7 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
        mutex_lock(&ftrace_lock);
 
        /* we currently allow only one tracer registered at a time */
-       if (atomic_read(&ftrace_graph_active)) {
+       if (ftrace_graph_active) {
                ret = -EBUSY;
                goto out;
        }
@@ -2698,10 +3338,10 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
        ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
        register_pm_notifier(&ftrace_suspend_notifier);
 
-       atomic_inc(&ftrace_graph_active);
+       ftrace_graph_active++;
        ret = start_graph_tracing();
        if (ret) {
-               atomic_dec(&ftrace_graph_active);
+               ftrace_graph_active--;
                goto out;
        }
 
@@ -2719,10 +3359,10 @@ void unregister_ftrace_graph(void)
 {
        mutex_lock(&ftrace_lock);
 
-       if (!unlikely(atomic_read(&ftrace_graph_active)))
+       if (unlikely(!ftrace_graph_active))
                goto out;
 
-       atomic_dec(&ftrace_graph_active);
+       ftrace_graph_active--;
        unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
        ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
        ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -2736,18 +3376,25 @@ void unregister_ftrace_graph(void)
 /* Allocate a return stack for newly created task */
 void ftrace_graph_init_task(struct task_struct *t)
 {
-       if (atomic_read(&ftrace_graph_active)) {
-               t->ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+       /* Make sure we do not use the parent ret_stack */
+       t->ret_stack = NULL;
+
+       if (ftrace_graph_active) {
+               struct ftrace_ret_stack *ret_stack;
+
+               ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
                                * sizeof(struct ftrace_ret_stack),
                                GFP_KERNEL);
-               if (!t->ret_stack)
+               if (!ret_stack)
                        return;
                t->curr_ret_stack = -1;
                atomic_set(&t->tracing_graph_pause, 0);
                atomic_set(&t->trace_overrun, 0);
                t->ftrace_timestamp = 0;
-       } else
-               t->ret_stack = NULL;
+               /* make curr_ret_stack visable before we add the ret_stack */
+               smp_wmb();
+               t->ret_stack = ret_stack;
+       }
 }
 
 void ftrace_graph_exit_task(struct task_struct *t)
index 5011f4d..86cdf67 100644 (file)
@@ -12,7 +12,7 @@
 #include <linux/dcache.h>
 #include <linux/fs.h>
 
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 
 #include "trace_output.h"
 #include "trace.h"
@@ -42,6 +42,7 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
                                   gfp_t gfp_flags,
                                   int node)
 {
+       struct ftrace_event_call *call = &event_kmem_alloc;
        struct trace_array *tr = kmemtrace_array;
        struct kmemtrace_alloc_entry *entry;
        struct ring_buffer_event *event;
@@ -62,7 +63,8 @@ static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
        entry->gfp_flags        = gfp_flags;
        entry->node             = node;
 
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 
        trace_wake_up();
 }
@@ -71,6 +73,7 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
                                  unsigned long call_site,
                                  const void *ptr)
 {
+       struct ftrace_event_call *call = &event_kmem_free;
        struct trace_array *tr = kmemtrace_array;
        struct kmemtrace_free_entry *entry;
        struct ring_buffer_event *event;
@@ -86,7 +89,8 @@ static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
        entry->call_site        = call_site;
        entry->ptr              = ptr;
 
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 
        trace_wake_up();
 }
index 960cbf4..2e642b2 100644 (file)
 
 #include "trace.h"
 
+/*
+ * The ring buffer header is special. We must manually up keep it.
+ */
+int ring_buffer_print_entry_header(struct trace_seq *s)
+{
+       int ret;
+
+       ret = trace_seq_printf(s, "# compressed entry header\n");
+       ret = trace_seq_printf(s, "\ttype_len    :    5 bits\n");
+       ret = trace_seq_printf(s, "\ttime_delta  :   27 bits\n");
+       ret = trace_seq_printf(s, "\tarray       :   32 bits\n");
+       ret = trace_seq_printf(s, "\n");
+       ret = trace_seq_printf(s, "\tpadding     : type == %d\n",
+                              RINGBUF_TYPE_PADDING);
+       ret = trace_seq_printf(s, "\ttime_extend : type == %d\n",
+                              RINGBUF_TYPE_TIME_EXTEND);
+       ret = trace_seq_printf(s, "\tdata max type_len  == %d\n",
+                              RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
+
+       return ret;
+}
+
 /*
  * The ring buffer is made up of a list of pages. A separate list of pages is
  * allocated for each CPU. A writer may only write to a buffer that is
@@ -182,7 +204,10 @@ EXPORT_SYMBOL_GPL(tracing_is_on);
 
 #define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
 #define RB_ALIGNMENT           4U
-#define RB_MAX_SMALL_DATA      28
+#define RB_MAX_SMALL_DATA      (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
+
+/* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
+#define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
 
 enum {
        RB_LEN_TIME_EXTEND = 8,
@@ -191,48 +216,28 @@ enum {
 
 static inline int rb_null_event(struct ring_buffer_event *event)
 {
-       return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
+       return event->type_len == RINGBUF_TYPE_PADDING
+                       && event->time_delta == 0;
 }
 
 static inline int rb_discarded_event(struct ring_buffer_event *event)
 {
-       return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
+       return event->type_len == RINGBUF_TYPE_PADDING && event->time_delta;
 }
 
 static void rb_event_set_padding(struct ring_buffer_event *event)
 {
-       event->type = RINGBUF_TYPE_PADDING;
+       event->type_len = RINGBUF_TYPE_PADDING;
        event->time_delta = 0;
 }
 
-/**
- * ring_buffer_event_discard - discard an event in the ring buffer
- * @buffer: the ring buffer
- * @event: the event to discard
- *
- * Sometimes a event that is in the ring buffer needs to be ignored.
- * This function lets the user discard an event in the ring buffer
- * and then that event will not be read later.
- *
- * Note, it is up to the user to be careful with this, and protect
- * against races. If the user discards an event that has been consumed
- * it is possible that it could corrupt the ring buffer.
- */
-void ring_buffer_event_discard(struct ring_buffer_event *event)
-{
-       event->type = RINGBUF_TYPE_PADDING;
-       /* time delta must be non zero */
-       if (!event->time_delta)
-               event->time_delta = 1;
-}
-
 static unsigned
 rb_event_data_length(struct ring_buffer_event *event)
 {
        unsigned length;
 
-       if (event->len)
-               length = event->len * RB_ALIGNMENT;
+       if (event->type_len)
+               length = event->type_len * RB_ALIGNMENT;
        else
                length = event->array[0];
        return length + RB_EVNT_HDR_SIZE;
@@ -242,12 +247,12 @@ rb_event_data_length(struct ring_buffer_event *event)
 static unsigned
 rb_event_length(struct ring_buffer_event *event)
 {
-       switch (event->type) {
+       switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                if (rb_null_event(event))
                        /* undefined */
                        return -1;
-               return rb_event_data_length(event);
+               return  event->array[0] + RB_EVNT_HDR_SIZE;
 
        case RINGBUF_TYPE_TIME_EXTEND:
                return RB_LEN_TIME_EXTEND;
@@ -271,7 +276,7 @@ rb_event_length(struct ring_buffer_event *event)
 unsigned ring_buffer_event_length(struct ring_buffer_event *event)
 {
        unsigned length = rb_event_length(event);
-       if (event->type != RINGBUF_TYPE_DATA)
+       if (event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
                return length;
        length -= RB_EVNT_HDR_SIZE;
        if (length > RB_MAX_SMALL_DATA + sizeof(event->array[0]))
@@ -284,9 +289,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
 static void *
 rb_event_data(struct ring_buffer_event *event)
 {
-       BUG_ON(event->type != RINGBUF_TYPE_DATA);
+       BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX);
        /* If length is in len field, then array[0] has the data */
-       if (event->len)
+       if (event->type_len)
                return (void *)&event->array[0];
        /* Otherwise length is in array[0] and array[1] has the data */
        return (void *)&event->array[1];
@@ -316,9 +321,10 @@ struct buffer_data_page {
 };
 
 struct buffer_page {
+       struct list_head list;          /* list of buffer pages */
        local_t          write;         /* index for next write */
        unsigned         read;          /* index for next read */
-       struct list_head list;          /* list of free pages */
+       local_t          entries;       /* entries on this page */
        struct buffer_data_page *page;  /* Actual data page */
 };
 
@@ -361,6 +367,34 @@ static inline int test_time_stamp(u64 delta)
 
 #define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
 
+/* Max payload is BUF_PAGE_SIZE - header (8bytes) */
+#define BUF_MAX_DATA_SIZE (BUF_PAGE_SIZE - (sizeof(u32) * 2))
+
+/* Max number of timestamps that can fit on a page */
+#define RB_TIMESTAMPS_PER_PAGE (BUF_PAGE_SIZE / RB_LEN_TIME_STAMP)
+
+int ring_buffer_print_page_header(struct trace_seq *s)
+{
+       struct buffer_data_page field;
+       int ret;
+
+       ret = trace_seq_printf(s, "\tfield: u64 timestamp;\t"
+                              "offset:0;\tsize:%u;\n",
+                              (unsigned int)sizeof(field.time_stamp));
+
+       ret = trace_seq_printf(s, "\tfield: local_t commit;\t"
+                              "offset:%u;\tsize:%u;\n",
+                              (unsigned int)offsetof(typeof(field), commit),
+                              (unsigned int)sizeof(field.commit));
+
+       ret = trace_seq_printf(s, "\tfield: char data;\t"
+                              "offset:%u;\tsize:%u;\n",
+                              (unsigned int)offsetof(typeof(field), data),
+                              (unsigned int)BUF_PAGE_SIZE);
+
+       return ret;
+}
+
 /*
  * head_page == tail_page && head == tail then buffer is empty.
  */
@@ -375,8 +409,11 @@ struct ring_buffer_per_cpu {
        struct buffer_page              *tail_page;     /* write to tail */
        struct buffer_page              *commit_page;   /* committed pages */
        struct buffer_page              *reader_page;
+       unsigned long                   nmi_dropped;
+       unsigned long                   commit_overrun;
        unsigned long                   overrun;
-       unsigned long                   entries;
+       unsigned long                   read;
+       local_t                         entries;
        u64                             write_stamp;
        u64                             read_stamp;
        atomic_t                        record_disabled;
@@ -389,6 +426,8 @@ struct ring_buffer {
        atomic_t                        record_disabled;
        cpumask_var_t                   cpumask;
 
+       struct lock_class_key           *reader_lock_key;
+
        struct mutex                    mutex;
 
        struct ring_buffer_per_cpu      **buffers;
@@ -420,13 +459,18 @@ struct ring_buffer_iter {
 /* Up this if you want to test the TIME_EXTENTS and normalization */
 #define DEBUG_SHIFT 0
 
+static inline u64 rb_time_stamp(struct ring_buffer *buffer, int cpu)
+{
+       /* shift to debug/test normalization and TIME_EXTENTS */
+       return buffer->clock() << DEBUG_SHIFT;
+}
+
 u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
 {
        u64 time;
 
        preempt_disable_notrace();
-       /* shift to debug/test normalization and TIME_EXTENTS */
-       time = buffer->clock() << DEBUG_SHIFT;
+       time = rb_time_stamp(buffer, cpu);
        preempt_enable_no_resched_notrace();
 
        return time;
@@ -523,6 +567,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu)
        cpu_buffer->cpu = cpu;
        cpu_buffer->buffer = buffer;
        spin_lock_init(&cpu_buffer->reader_lock);
+       lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key);
        cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        INIT_LIST_HEAD(&cpu_buffer->pages);
 
@@ -593,7 +638,8 @@ static int rb_cpu_notify(struct notifier_block *self,
  * when the buffer wraps. If this flag is not set, the buffer will
  * drop data when the tail hits the head.
  */
-struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
+struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags,
+                                       struct lock_class_key *key)
 {
        struct ring_buffer *buffer;
        int bsize;
@@ -616,6 +662,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
        buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
        buffer->flags = flags;
        buffer->clock = trace_clock_local;
+       buffer->reader_lock_key = key;
 
        /* need at least two pages */
        if (buffer->pages == 1)
@@ -673,7 +720,7 @@ struct ring_buffer *ring_buffer_alloc(unsigned long size, unsigned flags)
        kfree(buffer);
        return NULL;
 }
-EXPORT_SYMBOL_GPL(ring_buffer_alloc);
+EXPORT_SYMBOL_GPL(__ring_buffer_alloc);
 
 /**
  * ring_buffer_free - free a ring buffer.
@@ -947,31 +994,6 @@ static inline unsigned rb_head_size(struct ring_buffer_per_cpu *cpu_buffer)
        return rb_page_commit(cpu_buffer->head_page);
 }
 
-/*
- * When the tail hits the head and the buffer is in overwrite mode,
- * the head jumps to the next page and all content on the previous
- * page is discarded. But before doing so, we update the overrun
- * variable of the buffer.
- */
-static void rb_update_overflow(struct ring_buffer_per_cpu *cpu_buffer)
-{
-       struct ring_buffer_event *event;
-       unsigned long head;
-
-       for (head = 0; head < rb_head_size(cpu_buffer);
-            head += rb_event_length(event)) {
-
-               event = __rb_page_index(cpu_buffer->head_page, head);
-               if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-                       return;
-               /* Only count data entries */
-               if (event->type != RINGBUF_TYPE_DATA)
-                       continue;
-               cpu_buffer->overrun++;
-               cpu_buffer->entries--;
-       }
-}
-
 static inline void rb_inc_page(struct ring_buffer_per_cpu *cpu_buffer,
                               struct buffer_page **bpage)
 {
@@ -991,7 +1013,7 @@ rb_event_index(struct ring_buffer_event *event)
        return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
 }
 
-static int
+static inline int
 rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
             struct ring_buffer_event *event)
 {
@@ -1110,28 +1132,21 @@ static void
 rb_update_event(struct ring_buffer_event *event,
                         unsigned type, unsigned length)
 {
-       event->type = type;
+       event->type_len = type;
 
        switch (type) {
 
        case RINGBUF_TYPE_PADDING:
-               break;
-
        case RINGBUF_TYPE_TIME_EXTEND:
-               event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
-               break;
-
        case RINGBUF_TYPE_TIME_STAMP:
-               event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
                break;
 
-       case RINGBUF_TYPE_DATA:
+       case 0:
                length -= RB_EVNT_HDR_SIZE;
-               if (length > RB_MAX_SMALL_DATA) {
-                       event->len = 0;
+               if (length > RB_MAX_SMALL_DATA)
                        event->array[0] = length;
-               else
-                       event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+               else
+                       event->type_len = DIV_ROUND_UP(length, RB_ALIGNMENT);
                break;
        default:
                BUG();
@@ -1155,131 +1170,156 @@ static unsigned rb_calculate_event_length(unsigned length)
        return length;
 }
 
+
 static struct ring_buffer_event *
-__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
-                 unsigned type, unsigned long length, u64 *ts)
+rb_move_tail(struct ring_buffer_per_cpu *cpu_buffer,
+            unsigned long length, unsigned long tail,
+            struct buffer_page *commit_page,
+            struct buffer_page *tail_page, u64 *ts)
 {
-       struct buffer_page *tail_page, *head_page, *reader_page, *commit_page;
-       unsigned long tail, write;
+       struct buffer_page *next_page, *head_page, *reader_page;
        struct ring_buffer *buffer = cpu_buffer->buffer;
        struct ring_buffer_event *event;
-       unsigned long flags;
        bool lock_taken = false;
+       unsigned long flags;
 
-       commit_page = cpu_buffer->commit_page;
-       /* we just need to protect against interrupts */
-       barrier();
-       tail_page = cpu_buffer->tail_page;
-       write = local_add_return(length, &tail_page->write);
-       tail = write - length;
+       next_page = tail_page;
 
-       /* See if we shot pass the end of this buffer page */
-       if (write > BUF_PAGE_SIZE) {
-               struct buffer_page *next_page = tail_page;
+       local_irq_save(flags);
+       /*
+        * Since the write to the buffer is still not
+        * fully lockless, we must be careful with NMIs.
+        * The locks in the writers are taken when a write
+        * crosses to a new page. The locks protect against
+        * races with the readers (this will soon be fixed
+        * with a lockless solution).
+        *
+        * Because we can not protect against NMIs, and we
+        * want to keep traces reentrant, we need to manage
+        * what happens when we are in an NMI.
+        *
+        * NMIs can happen after we take the lock.
+        * If we are in an NMI, only take the lock
+        * if it is not already taken. Otherwise
+        * simply fail.
+        */
+       if (unlikely(in_nmi())) {
+               if (!__raw_spin_trylock(&cpu_buffer->lock)) {
+                       cpu_buffer->nmi_dropped++;
+                       goto out_reset;
+               }
+       } else
+               __raw_spin_lock(&cpu_buffer->lock);
 
-               local_irq_save(flags);
-               /*
-                * Since the write to the buffer is still not
-                * fully lockless, we must be careful with NMIs.
-                * The locks in the writers are taken when a write
-                * crosses to a new page. The locks protect against
-                * races with the readers (this will soon be fixed
-                * with a lockless solution).
-                *
-                * Because we can not protect against NMIs, and we
-                * want to keep traces reentrant, we need to manage
-                * what happens when we are in an NMI.
-                *
-                * NMIs can happen after we take the lock.
-                * If we are in an NMI, only take the lock
-                * if it is not already taken. Otherwise
-                * simply fail.
-                */
-               if (unlikely(in_nmi())) {
-                       if (!__raw_spin_trylock(&cpu_buffer->lock))
-                               goto out_reset;
-               } else
-                       __raw_spin_lock(&cpu_buffer->lock);
+       lock_taken = true;
 
-               lock_taken = true;
+       rb_inc_page(cpu_buffer, &next_page);
 
-               rb_inc_page(cpu_buffer, &next_page);
+       head_page = cpu_buffer->head_page;
+       reader_page = cpu_buffer->reader_page;
 
-               head_page = cpu_buffer->head_page;
-               reader_page = cpu_buffer->reader_page;
+       /* we grabbed the lock before incrementing */
+       if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+               goto out_reset;
 
-               /* we grabbed the lock before incrementing */
-               if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
-                       goto out_reset;
+       /*
+        * If for some reason, we had an interrupt storm that made
+        * it all the way around the buffer, bail, and warn
+        * about it.
+        */
+       if (unlikely(next_page == commit_page)) {
+               cpu_buffer->commit_overrun++;
+               goto out_reset;
+       }
 
-               /*
-                * If for some reason, we had an interrupt storm that made
-                * it all the way around the buffer, bail, and warn
-                * about it.
-                */
-               if (unlikely(next_page == commit_page)) {
-                       WARN_ON_ONCE(1);
+       if (next_page == head_page) {
+               if (!(buffer->flags & RB_FL_OVERWRITE))
                        goto out_reset;
-               }
 
-               if (next_page == head_page) {
-                       if (!(buffer->flags & RB_FL_OVERWRITE))
-                               goto out_reset;
-
-                       /* tail_page has not moved yet? */
-                       if (tail_page == cpu_buffer->tail_page) {
-                               /* count overflows */
-                               rb_update_overflow(cpu_buffer);
+               /* tail_page has not moved yet? */
+               if (tail_page == cpu_buffer->tail_page) {
+                       /* count overflows */
+                       cpu_buffer->overrun +=
+                               local_read(&head_page->entries);
 
-                               rb_inc_page(cpu_buffer, &head_page);
-                               cpu_buffer->head_page = head_page;
-                               cpu_buffer->head_page->read = 0;
-                       }
+                       rb_inc_page(cpu_buffer, &head_page);
+                       cpu_buffer->head_page = head_page;
+                       cpu_buffer->head_page->read = 0;
                }
+       }
 
-               /*
-                * If the tail page is still the same as what we think
-                * it is, then it is up to us to update the tail
-                * pointer.
-                */
-               if (tail_page == cpu_buffer->tail_page) {
-                       local_set(&next_page->write, 0);
-                       local_set(&next_page->page->commit, 0);
-                       cpu_buffer->tail_page = next_page;
+       /*
+        * If the tail page is still the same as what we think
+        * it is, then it is up to us to update the tail
+        * pointer.
+        */
+       if (tail_page == cpu_buffer->tail_page) {
+               local_set(&next_page->write, 0);
+               local_set(&next_page->entries, 0);
+               local_set(&next_page->page->commit, 0);
+               cpu_buffer->tail_page = next_page;
+
+               /* reread the time stamp */
+               *ts = rb_time_stamp(buffer, cpu_buffer->cpu);
+               cpu_buffer->tail_page->page->time_stamp = *ts;
+       }
 
-                       /* reread the time stamp */
-                       *ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
-                       cpu_buffer->tail_page->page->time_stamp = *ts;
-               }
+       /*
+        * The actual tail page has moved forward.
+        */
+       if (tail < BUF_PAGE_SIZE) {
+               /* Mark the rest of the page with padding */
+               event = __rb_page_index(tail_page, tail);
+               rb_event_set_padding(event);
+       }
 
-               /*
-                * The actual tail page has moved forward.
-                */
-               if (tail < BUF_PAGE_SIZE) {
-                       /* Mark the rest of the page with padding */
-                       event = __rb_page_index(tail_page, tail);
-                       rb_event_set_padding(event);
-               }
+       /* Set the write back to the previous setting */
+       local_sub(length, &tail_page->write);
 
-               if (tail <= BUF_PAGE_SIZE)
-                       /* Set the write back to the previous setting */
-                       local_set(&tail_page->write, tail);
+       /*
+        * If this was a commit entry that failed,
+        * increment that too
+        */
+       if (tail_page == cpu_buffer->commit_page &&
+           tail == rb_commit_index(cpu_buffer)) {
+               rb_set_commit_to_write(cpu_buffer);
+       }
 
-               /*
-                * If this was a commit entry that failed,
-                * increment that too
-                */
-               if (tail_page == cpu_buffer->commit_page &&
-                   tail == rb_commit_index(cpu_buffer)) {
-                       rb_set_commit_to_write(cpu_buffer);
-               }
+       __raw_spin_unlock(&cpu_buffer->lock);
+       local_irq_restore(flags);
+
+       /* fail and let the caller try again */
+       return ERR_PTR(-EAGAIN);
+
+ out_reset:
+       /* reset write */
+       local_sub(length, &tail_page->write);
 
+       if (likely(lock_taken))
                __raw_spin_unlock(&cpu_buffer->lock);
-               local_irq_restore(flags);
+       local_irq_restore(flags);
+       return NULL;
+}
 
-               /* fail and let the caller try again */
-               return ERR_PTR(-EAGAIN);
-       }
+static struct ring_buffer_event *
+__rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
+                 unsigned type, unsigned long length, u64 *ts)
+{
+       struct buffer_page *tail_page, *commit_page;
+       struct ring_buffer_event *event;
+       unsigned long tail, write;
+
+       commit_page = cpu_buffer->commit_page;
+       /* we just need to protect against interrupts */
+       barrier();
+       tail_page = cpu_buffer->tail_page;
+       write = local_add_return(length, &tail_page->write);
+       tail = write - length;
+
+       /* See if we shot pass the end of this buffer page */
+       if (write > BUF_PAGE_SIZE)
+               return rb_move_tail(cpu_buffer, length, tail,
+                                   commit_page, tail_page, ts);
 
        /* We reserved something on the buffer */
 
@@ -1289,6 +1329,10 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
        event = __rb_page_index(tail_page, tail);
        rb_update_event(event, type, length);
 
+       /* The passed in type is zero for DATA */
+       if (likely(!type))
+               local_inc(&tail_page->entries);
+
        /*
         * If this is a commit and the tail is zero, then update
         * this page's time stamp.
@@ -1297,16 +1341,38 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
                cpu_buffer->commit_page->page->time_stamp = *ts;
 
        return event;
+}
 
- out_reset:
-       /* reset write */
-       if (tail <= BUF_PAGE_SIZE)
-               local_set(&tail_page->write, tail);
+static inline int
+rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
+                 struct ring_buffer_event *event)
+{
+       unsigned long new_index, old_index;
+       struct buffer_page *bpage;
+       unsigned long index;
+       unsigned long addr;
 
-       if (likely(lock_taken))
-               __raw_spin_unlock(&cpu_buffer->lock);
-       local_irq_restore(flags);
-       return NULL;
+       new_index = rb_event_index(event);
+       old_index = new_index + rb_event_length(event);
+       addr = (unsigned long)event;
+       addr &= PAGE_MASK;
+
+       bpage = cpu_buffer->tail_page;
+
+       if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
+               /*
+                * This is on the tail page. It is possible that
+                * a write could come in and move the tail page
+                * and write to the next page. That is fine
+                * because we just shorten what is on this page.
+                */
+               index = local_cmpxchg(&bpage->write, old_index, new_index);
+               if (index == old_index)
+                       return 1;
+       }
+
+       /* could not discard */
+       return 0;
 }
 
 static int
@@ -1351,16 +1417,23 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
                        event->array[0] = *delta >> TS_SHIFT;
                } else {
                        cpu_buffer->commit_page->page->time_stamp = *ts;
-                       event->time_delta = 0;
-                       event->array[0] = 0;
+                       /* try to discard, since we do not need this */
+                       if (!rb_try_to_discard(cpu_buffer, event)) {
+                               /* nope, just zero it */
+                               event->time_delta = 0;
+                               event->array[0] = 0;
+                       }
                }
                cpu_buffer->write_stamp = *ts;
                /* let the caller know this was the commit */
                ret = 1;
        } else {
-               /* Darn, this is just wasted space */
-               event->time_delta = 0;
-               event->array[0] = 0;
+               /* Try to discard the event */
+               if (!rb_try_to_discard(cpu_buffer, event)) {
+                       /* Darn, this is just wasted space */
+                       event->time_delta = 0;
+                       event->array[0] = 0;
+               }
                ret = 0;
        }
 
@@ -1371,13 +1444,14 @@ rb_add_time_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 
 static struct ring_buffer_event *
 rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
-                     unsigned type, unsigned long length)
+                     unsigned long length)
 {
        struct ring_buffer_event *event;
-       u64 ts, delta;
+       u64 ts, delta = 0;
        int commit = 0;
        int nr_loops = 0;
 
+       length = rb_calculate_event_length(length);
  again:
        /*
         * We allow for interrupts to reenter here and do a trace.
@@ -1391,7 +1465,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
        if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
                return NULL;
 
-       ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
+       ts = rb_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
 
        /*
         * Only the first commit can update the timestamp.
@@ -1401,23 +1475,24 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
         * also be made. But only the entry that did the actual
         * commit will be something other than zero.
         */
-       if (cpu_buffer->tail_page == cpu_buffer->commit_page &&
-           rb_page_write(cpu_buffer->tail_page) ==
-           rb_commit_index(cpu_buffer)) {
+       if (likely(cpu_buffer->tail_page == cpu_buffer->commit_page &&
+                  rb_page_write(cpu_buffer->tail_page) ==
+                  rb_commit_index(cpu_buffer))) {
+               u64 diff;
 
-               delta = ts - cpu_buffer->write_stamp;
+               diff = ts - cpu_buffer->write_stamp;
 
-               /* make sure this delta is calculated here */
+               /* make sure this diff is calculated here */
                barrier();
 
                /* Did the write stamp get updated already? */
                if (unlikely(ts < cpu_buffer->write_stamp))
-                       delta = 0;
+                       goto get_event;
 
-               if (test_time_stamp(delta)) {
+               delta = diff;
+               if (unlikely(test_time_stamp(delta))) {
 
                        commit = rb_add_time_stamp(cpu_buffer, &ts, &delta);
-
                        if (commit == -EBUSY)
                                return NULL;
 
@@ -1426,12 +1501,11 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
 
                        RB_WARN_ON(cpu_buffer, commit < 0);
                }
-       } else
-               /* Non commits have zero deltas */
-               delta = 0;
+       }
 
-       event = __rb_reserve_next(cpu_buffer, type, length, &ts);
-       if (PTR_ERR(event) == -EAGAIN)
+ get_event:
+       event = __rb_reserve_next(cpu_buffer, 0, length, &ts);
+       if (unlikely(PTR_ERR(event) == -EAGAIN))
                goto again;
 
        if (!event) {
@@ -1448,7 +1522,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
         * If the timestamp was commited, make the commit our entry
         * now so that we will update it when needed.
         */
-       if (commit)
+       if (unlikely(commit))
                rb_set_commit_event(cpu_buffer, event);
        else if (!rb_is_commit(cpu_buffer, event))
                delta = 0;
@@ -1458,6 +1532,36 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer,
        return event;
 }
 
+#define TRACE_RECURSIVE_DEPTH 16
+
+static int trace_recursive_lock(void)
+{
+       current->trace_recursion++;
+
+       if (likely(current->trace_recursion < TRACE_RECURSIVE_DEPTH))
+               return 0;
+
+       /* Disable all tracing before we do anything else */
+       tracing_off_permanent();
+
+       printk_once(KERN_WARNING "Tracing recursion: depth[%ld]:"
+                   "HC[%lu]:SC[%lu]:NMI[%lu]\n",
+                   current->trace_recursion,
+                   hardirq_count() >> HARDIRQ_SHIFT,
+                   softirq_count() >> SOFTIRQ_SHIFT,
+                   in_nmi());
+
+       WARN_ON_ONCE(1);
+       return -1;
+}
+
+static void trace_recursive_unlock(void)
+{
+       WARN_ON_ONCE(!current->trace_recursion);
+
+       current->trace_recursion--;
+}
+
 static DEFINE_PER_CPU(int, rb_need_resched);
 
 /**
@@ -1491,6 +1595,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
        /* If we are tracing schedule, we don't want to recurse */
        resched = ftrace_preempt_disable();
 
+       if (trace_recursive_lock())
+               goto out_nocheck;
+
        cpu = raw_smp_processor_id();
 
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -1501,11 +1608,10 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
        if (atomic_read(&cpu_buffer->record_disabled))
                goto out;
 
-       length = rb_calculate_event_length(length);
-       if (length > BUF_PAGE_SIZE)
+       if (length > BUF_MAX_DATA_SIZE)
                goto out;
 
-       event = rb_reserve_next_event(cpu_buffer, RINGBUF_TYPE_DATA, length);
+       event = rb_reserve_next_event(cpu_buffer, length);
        if (!event)
                goto out;
 
@@ -1520,6 +1626,9 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
        return event;
 
  out:
+       trace_recursive_unlock();
+
+ out_nocheck:
        ftrace_preempt_enable(resched);
        return NULL;
 }
@@ -1528,7 +1637,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
 static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
                      struct ring_buffer_event *event)
 {
-       cpu_buffer->entries++;
+       local_inc(&cpu_buffer->entries);
 
        /* Only process further if we own the commit */
        if (!rb_is_commit(cpu_buffer, event))
@@ -1558,6 +1667,8 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 
        rb_commit(cpu_buffer, event);
 
+       trace_recursive_unlock();
+
        /*
         * Only the last preempt count needs to restore preemption.
         */
@@ -1570,6 +1681,99 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
 }
 EXPORT_SYMBOL_GPL(ring_buffer_unlock_commit);
 
+static inline void rb_event_discard(struct ring_buffer_event *event)
+{
+       /* array[0] holds the actual length for the discarded event */
+       event->array[0] = rb_event_data_length(event) - RB_EVNT_HDR_SIZE;
+       event->type_len = RINGBUF_TYPE_PADDING;
+       /* time delta must be non zero */
+       if (!event->time_delta)
+               event->time_delta = 1;
+}
+
+/**
+ * ring_buffer_event_discard - discard any event in the ring buffer
+ * @event: the event to discard
+ *
+ * Sometimes a event that is in the ring buffer needs to be ignored.
+ * This function lets the user discard an event in the ring buffer
+ * and then that event will not be read later.
+ *
+ * Note, it is up to the user to be careful with this, and protect
+ * against races. If the user discards an event that has been consumed
+ * it is possible that it could corrupt the ring buffer.
+ */
+void ring_buffer_event_discard(struct ring_buffer_event *event)
+{
+       rb_event_discard(event);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_event_discard);
+
+/**
+ * ring_buffer_commit_discard - discard an event that has not been committed
+ * @buffer: the ring buffer
+ * @event: non committed event to discard
+ *
+ * This is similar to ring_buffer_event_discard but must only be
+ * performed on an event that has not been committed yet. The difference
+ * is that this will also try to free the event from the ring buffer
+ * if another event has not been added behind it.
+ *
+ * If another event has been added behind it, it will set the event
+ * up as discarded, and perform the commit.
+ *
+ * If this function is called, do not call ring_buffer_unlock_commit on
+ * the event.
+ */
+void ring_buffer_discard_commit(struct ring_buffer *buffer,
+                               struct ring_buffer_event *event)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       int cpu;
+
+       /* The event is discarded regardless */
+       rb_event_discard(event);
+
+       /*
+        * This must only be called if the event has not been
+        * committed yet. Thus we can assume that preemption
+        * is still disabled.
+        */
+       RB_WARN_ON(buffer, preemptible());
+
+       cpu = smp_processor_id();
+       cpu_buffer = buffer->buffers[cpu];
+
+       if (!rb_try_to_discard(cpu_buffer, event))
+               goto out;
+
+       /*
+        * The commit is still visible by the reader, so we
+        * must increment entries.
+        */
+       local_inc(&cpu_buffer->entries);
+ out:
+       /*
+        * If a write came in and pushed the tail page
+        * we still need to update the commit pointer
+        * if we were the commit.
+        */
+       if (rb_is_commit(cpu_buffer, event))
+               rb_set_commit_to_write(cpu_buffer);
+
+       trace_recursive_unlock();
+
+       /*
+        * Only the last preempt count needs to restore preemption.
+        */
+       if (preempt_count() == 1)
+               ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
+       else
+               preempt_enable_no_resched_notrace();
+
+}
+EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
+
 /**
  * ring_buffer_write - write data to the buffer without reserving
  * @buffer: The ring buffer to write to.
@@ -1589,7 +1793,6 @@ int ring_buffer_write(struct ring_buffer *buffer,
 {
        struct ring_buffer_per_cpu *cpu_buffer;
        struct ring_buffer_event *event;
-       unsigned long event_length;
        void *body;
        int ret = -EBUSY;
        int cpu, resched;
@@ -1612,9 +1815,10 @@ int ring_buffer_write(struct ring_buffer *buffer,
        if (atomic_read(&cpu_buffer->record_disabled))
                goto out;
 
-       event_length = rb_calculate_event_length(length);
-       event = rb_reserve_next_event(cpu_buffer,
-                                     RINGBUF_TYPE_DATA, event_length);
+       if (length > BUF_MAX_DATA_SIZE)
+               goto out;
+
+       event = rb_reserve_next_event(cpu_buffer, length);
        if (!event)
                goto out;
 
@@ -1728,7 +1932,8 @@ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
                return 0;
 
        cpu_buffer = buffer->buffers[cpu];
-       ret = cpu_buffer->entries;
+       ret = (local_read(&cpu_buffer->entries) - cpu_buffer->overrun)
+               - cpu_buffer->read;
 
        return ret;
 }
@@ -1754,6 +1959,47 @@ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
 }
 EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
 
+/**
+ * ring_buffer_nmi_dropped_cpu - get the number of nmis that were dropped
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long ring_buffer_nmi_dropped_cpu(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       unsigned long ret;
+
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
+               return 0;
+
+       cpu_buffer = buffer->buffers[cpu];
+       ret = cpu_buffer->nmi_dropped;
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_nmi_dropped_cpu);
+
+/**
+ * ring_buffer_commit_overrun_cpu - get the number of overruns caused by commits
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to get the number of overruns from
+ */
+unsigned long
+ring_buffer_commit_overrun_cpu(struct ring_buffer *buffer, int cpu)
+{
+       struct ring_buffer_per_cpu *cpu_buffer;
+       unsigned long ret;
+
+       if (!cpumask_test_cpu(cpu, buffer->cpumask))
+               return 0;
+
+       cpu_buffer = buffer->buffers[cpu];
+       ret = cpu_buffer->commit_overrun;
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_commit_overrun_cpu);
+
 /**
  * ring_buffer_entries - get the number of entries in a buffer
  * @buffer: The ring buffer
@@ -1770,7 +2016,8 @@ unsigned long ring_buffer_entries(struct ring_buffer *buffer)
        /* if you care about this being correct, lock the buffer */
        for_each_buffer_cpu(buffer, cpu) {
                cpu_buffer = buffer->buffers[cpu];
-               entries += cpu_buffer->entries;
+               entries += (local_read(&cpu_buffer->entries) -
+                           cpu_buffer->overrun) - cpu_buffer->read;
        }
 
        return entries;
@@ -1862,7 +2109,7 @@ rb_update_read_stamp(struct ring_buffer_per_cpu *cpu_buffer,
 {
        u64 delta;
 
-       switch (event->type) {
+       switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                return;
 
@@ -1893,7 +2140,7 @@ rb_update_iter_read_stamp(struct ring_buffer_iter *iter,
 {
        u64 delta;
 
-       switch (event->type) {
+       switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                return;
 
@@ -1966,6 +2213,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->reader_page->list.prev = reader->list.prev;
 
        local_set(&cpu_buffer->reader_page->write, 0);
+       local_set(&cpu_buffer->reader_page->entries, 0);
        local_set(&cpu_buffer->reader_page->page->commit, 0);
 
        /* Make the reader page now replace the head */
@@ -2008,8 +2256,9 @@ static void rb_advance_reader(struct ring_buffer_per_cpu *cpu_buffer)
 
        event = rb_reader_event(cpu_buffer);
 
-       if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
-               cpu_buffer->entries--;
+       if (event->type_len <= RINGBUF_TYPE_DATA_TYPE_LEN_MAX
+                       || rb_discarded_event(event))
+               cpu_buffer->read++;
 
        rb_update_read_stamp(cpu_buffer, event);
 
@@ -2031,8 +2280,8 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
         * Check if we are at the end of the buffer.
         */
        if (iter->head >= rb_page_size(iter->head_page)) {
-               if (RB_WARN_ON(buffer,
-                              iter->head_page == cpu_buffer->commit_page))
+               /* discarded commits can make the page empty */
+               if (iter->head_page == cpu_buffer->commit_page)
                        return;
                rb_inc_iter(iter);
                return;
@@ -2075,12 +2324,10 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        /*
         * We repeat when a timestamp is encountered. It is possible
         * to get multiple timestamps from an interrupt entering just
-        * as one timestamp is about to be written. The max times
-        * that this can happen is the number of nested interrupts we
-        * can have.  Nesting 10 deep of interrupts is clearly
-        * an anomaly.
+        * as one timestamp is about to be written, or from discarded
+        * commits. The most that we can have is the number on a single page.
         */
-       if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+       if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
                return NULL;
 
        reader = rb_get_reader_page(cpu_buffer);
@@ -2089,7 +2336,7 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 
        event = rb_reader_event(cpu_buffer);
 
-       switch (event->type) {
+       switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                if (rb_null_event(event))
                        RB_WARN_ON(cpu_buffer, 1);
@@ -2146,14 +2393,14 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 
  again:
        /*
-        * We repeat when a timestamp is encountered. It is possible
-        * to get multiple timestamps from an interrupt entering just
-        * as one timestamp is about to be written. The max times
-        * that this can happen is the number of nested interrupts we
-        * can have. Nesting 10 deep of interrupts is clearly
-        * an anomaly.
+        * We repeat when a timestamp is encountered.
+        * We can get multiple timestamps by nested interrupts or also
+        * if filtering is on (discarding commits). Since discarding
+        * commits can be frequent we can get a lot of timestamps.
+        * But we limit them by not adding timestamps if they begin
+        * at the start of a page.
         */
-       if (RB_WARN_ON(cpu_buffer, ++nr_loops > 10))
+       if (RB_WARN_ON(cpu_buffer, ++nr_loops > RB_TIMESTAMPS_PER_PAGE))
                return NULL;
 
        if (rb_per_cpu_empty(cpu_buffer))
@@ -2161,7 +2408,7 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 
        event = rb_iter_head_event(iter);
 
-       switch (event->type) {
+       switch (event->type_len) {
        case RINGBUF_TYPE_PADDING:
                if (rb_null_event(event)) {
                        rb_inc_iter(iter);
@@ -2220,7 +2467,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
        event = rb_buffer_peek(buffer, cpu, ts);
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-       if (event && event->type == RINGBUF_TYPE_PADDING) {
+       if (event && event->type_len == RINGBUF_TYPE_PADDING) {
                cpu_relax();
                goto again;
        }
@@ -2248,7 +2495,7 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
        event = rb_iter_peek(iter, ts);
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-       if (event && event->type == RINGBUF_TYPE_PADDING) {
+       if (event && event->type_len == RINGBUF_TYPE_PADDING) {
                cpu_relax();
                goto again;
        }
@@ -2293,7 +2540,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
  out:
        preempt_enable();
 
-       if (event && event->type == RINGBUF_TYPE_PADDING) {
+       if (event && event->type_len == RINGBUF_TYPE_PADDING) {
                cpu_relax();
                goto again;
        }
@@ -2386,7 +2633,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts)
  out:
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
-       if (event && event->type == RINGBUF_TYPE_PADDING) {
+       if (event && event->type_len == RINGBUF_TYPE_PADDING) {
                cpu_relax();
                goto again;
        }
@@ -2411,6 +2658,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
        cpu_buffer->head_page
                = list_entry(cpu_buffer->pages.next, struct buffer_page, list);
        local_set(&cpu_buffer->head_page->write, 0);
+       local_set(&cpu_buffer->head_page->entries, 0);
        local_set(&cpu_buffer->head_page->page->commit, 0);
 
        cpu_buffer->head_page->read = 0;
@@ -2420,11 +2668,15 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 
        INIT_LIST_HEAD(&cpu_buffer->reader_page->list);
        local_set(&cpu_buffer->reader_page->write, 0);
+       local_set(&cpu_buffer->reader_page->entries, 0);
        local_set(&cpu_buffer->reader_page->page->commit, 0);
        cpu_buffer->reader_page->read = 0;
 
+       cpu_buffer->nmi_dropped = 0;
+       cpu_buffer->commit_overrun = 0;
        cpu_buffer->overrun = 0;
-       cpu_buffer->entries = 0;
+       cpu_buffer->read = 0;
+       local_set(&cpu_buffer->entries, 0);
 
        cpu_buffer->write_stamp = 0;
        cpu_buffer->read_stamp = 0;
@@ -2443,6 +2695,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
        if (!cpumask_test_cpu(cpu, buffer->cpumask))
                return;
 
+       atomic_inc(&cpu_buffer->record_disabled);
+
        spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 
        __raw_spin_lock(&cpu_buffer->lock);
@@ -2452,6 +2706,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
        __raw_spin_unlock(&cpu_buffer->lock);
 
        spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+       atomic_dec(&cpu_buffer->record_disabled);
 }
 EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
 
@@ -2578,28 +2834,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
 
-static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
-                             struct buffer_data_page *bpage,
-                             unsigned int offset)
-{
-       struct ring_buffer_event *event;
-       unsigned long head;
-
-       __raw_spin_lock(&cpu_buffer->lock);
-       for (head = offset; head < local_read(&bpage->commit);
-            head += rb_event_length(event)) {
-
-               event = __rb_data_page_index(bpage, head);
-               if (RB_WARN_ON(cpu_buffer, rb_null_event(event)))
-                       return;
-               /* Only count data entries */
-               if (event->type != RINGBUF_TYPE_DATA)
-                       continue;
-               cpu_buffer->entries--;
-       }
-       __raw_spin_unlock(&cpu_buffer->lock);
-}
-
 /**
  * ring_buffer_alloc_read_page - allocate a page to read from buffer
  * @buffer: the buffer to allocate for.
@@ -2630,6 +2864,7 @@ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
 
        return bpage;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
 
 /**
  * ring_buffer_free_read_page - free an allocated read page
@@ -2642,6 +2877,7 @@ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data)
 {
        free_page((unsigned long)data);
 }
+EXPORT_SYMBOL_GPL(ring_buffer_free_read_page);
 
 /**
  * ring_buffer_read_page - extract a page from the ring buffer
@@ -2768,16 +3004,17 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
                /* we copied everything to the beginning */
                read = 0;
        } else {
+               /* update the entry counter */
+               cpu_buffer->read += local_read(&reader->entries);
+
                /* swap the pages */
                rb_init_page(bpage);
                bpage = reader->page;
                reader->page = *data_page;
                local_set(&reader->write, 0);
+               local_set(&reader->entries, 0);
                reader->read = 0;
                *data_page = bpage;
-
-               /* update the entry counter */
-               rb_remove_entries(cpu_buffer, bpage, read);
        }
        ret = read;
 
@@ -2787,6 +3024,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
  out:
        return ret;
 }
+EXPORT_SYMBOL_GPL(ring_buffer_read_page);
 
 static ssize_t
 rb_simple_read(struct file *filp, char __user *ubuf,
@@ -2845,14 +3083,11 @@ static const struct file_operations rb_simple_fops = {
 static __init int rb_init_debugfs(void)
 {
        struct dentry *d_tracer;
-       struct dentry *entry;
 
        d_tracer = tracing_init_dentry();
 
-       entry = debugfs_create_file("tracing_on", 0644, d_tracer,
-                                   &ring_buffer_flags, &rb_simple_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'tracing_on' entry\n");
+       trace_create_file("tracing_on", 0644, d_tracer,
+                           &ring_buffer_flags, &rb_simple_fops);
 
        return 0;
 }
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
new file mode 100644 (file)
index 0000000..8d68e14
--- /dev/null
@@ -0,0 +1,416 @@
+/*
+ * ring buffer tester and benchmark
+ *
+ * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
+ */
+#include <linux/ring_buffer.h>
+#include <linux/completion.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/time.h>
+
+struct rb_page {
+       u64             ts;
+       local_t         commit;
+       char            data[4080];
+};
+
+/* run time and sleep time in seconds */
+#define RUN_TIME       10
+#define SLEEP_TIME     10
+
+/* number of events for writer to wake up the reader */
+static int wakeup_interval = 100;
+
+static int reader_finish;
+static struct completion read_start;
+static struct completion read_done;
+
+static struct ring_buffer *buffer;
+static struct task_struct *producer;
+static struct task_struct *consumer;
+static unsigned long read;
+
+static int disable_reader;
+module_param(disable_reader, uint, 0644);
+MODULE_PARM_DESC(disable_reader, "only run producer");
+
+static int read_events;
+
+static int kill_test;
+
+#define KILL_TEST()                            \
+       do {                                    \
+               if (!kill_test) {               \
+                       kill_test = 1;          \
+                       WARN_ON(1);             \
+               }                               \
+       } while (0)
+
+enum event_status {
+       EVENT_FOUND,
+       EVENT_DROPPED,
+};
+
+static enum event_status read_event(int cpu)
+{
+       struct ring_buffer_event *event;
+       int *entry;
+       u64 ts;
+
+       event = ring_buffer_consume(buffer, cpu, &ts);
+       if (!event)
+               return EVENT_DROPPED;
+
+       entry = ring_buffer_event_data(event);
+       if (*entry != cpu) {
+               KILL_TEST();
+               return EVENT_DROPPED;
+       }
+
+       read++;
+       return EVENT_FOUND;
+}
+
+static enum event_status read_page(int cpu)
+{
+       struct ring_buffer_event *event;
+       struct rb_page *rpage;
+       unsigned long commit;
+       void *bpage;
+       int *entry;
+       int ret;
+       int inc;
+       int i;
+
+       bpage = ring_buffer_alloc_read_page(buffer);
+       if (!bpage)
+               return EVENT_DROPPED;
+
+       ret = ring_buffer_read_page(buffer, &bpage, PAGE_SIZE, cpu, 1);
+       if (ret >= 0) {
+               rpage = bpage;
+               commit = local_read(&rpage->commit);
+               for (i = 0; i < commit && !kill_test; i += inc) {
+
+                       if (i >= (PAGE_SIZE - offsetof(struct rb_page, data))) {
+                               KILL_TEST();
+                               break;
+                       }
+
+                       inc = -1;
+                       event = (void *)&rpage->data[i];
+                       switch (event->type_len) {
+                       case RINGBUF_TYPE_PADDING:
+                               /* We don't expect any padding */
+                               KILL_TEST();
+                               break;
+                       case RINGBUF_TYPE_TIME_EXTEND:
+                               inc = 8;
+                               break;
+                       case 0:
+                               entry = ring_buffer_event_data(event);
+                               if (*entry != cpu) {
+                                       KILL_TEST();
+                                       break;
+                               }
+                               read++;
+                               if (!event->array[0]) {
+                                       KILL_TEST();
+                                       break;
+                               }
+                               inc = event->array[0];
+                               break;
+                       default:
+                               entry = ring_buffer_event_data(event);
+                               if (*entry != cpu) {
+                                       KILL_TEST();
+                                       break;
+                               }
+                               read++;
+                               inc = ((event->type_len + 1) * 4);
+                       }
+                       if (kill_test)
+                               break;
+
+                       if (inc <= 0) {
+                               KILL_TEST();
+                               break;
+                       }
+               }
+       }
+       ring_buffer_free_read_page(buffer, bpage);
+
+       if (ret < 0)
+               return EVENT_DROPPED;
+       return EVENT_FOUND;
+}
+
+static void ring_buffer_consumer(void)
+{
+       /* toggle between reading pages and events */
+       read_events ^= 1;
+
+       read = 0;
+       while (!reader_finish && !kill_test) {
+               int found;
+
+               do {
+                       int cpu;
+
+                       found = 0;
+                       for_each_online_cpu(cpu) {
+                               enum event_status stat;
+
+                               if (read_events)
+                                       stat = read_event(cpu);
+                               else
+                                       stat = read_page(cpu);
+
+                               if (kill_test)
+                                       break;
+                               if (stat == EVENT_FOUND)
+                                       found = 1;
+                       }
+               } while (found && !kill_test);
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (reader_finish)
+                       break;
+
+               schedule();
+               __set_current_state(TASK_RUNNING);
+       }
+       reader_finish = 0;
+       complete(&read_done);
+}
+
+static void ring_buffer_producer(void)
+{
+       struct timeval start_tv;
+       struct timeval end_tv;
+       unsigned long long time;
+       unsigned long long entries;
+       unsigned long long overruns;
+       unsigned long missed = 0;
+       unsigned long hit = 0;
+       unsigned long avg;
+       int cnt = 0;
+
+       /*
+        * Hammer the buffer for 10 secs (this may
+        * make the system stall)
+        */
+       pr_info("Starting ring buffer hammer\n");
+       do_gettimeofday(&start_tv);
+       do {
+               struct ring_buffer_event *event;
+               int *entry;
+
+               event = ring_buffer_lock_reserve(buffer, 10);
+               if (!event) {
+                       missed++;
+               } else {
+                       hit++;
+                       entry = ring_buffer_event_data(event);
+                       *entry = smp_processor_id();
+                       ring_buffer_unlock_commit(buffer, event);
+               }
+               do_gettimeofday(&end_tv);
+
+               cnt++;
+               if (consumer && !(cnt % wakeup_interval))
+                       wake_up_process(consumer);
+
+#ifndef CONFIG_PREEMPT
+               /*
+                * If we are a non preempt kernel, the 10 second run will
+                * stop everything while it runs. Instead, we will call
+                * cond_resched and also add any time that was lost by a
+                * rescedule.
+                *
+                * Do a cond resched at the same frequency we would wake up
+                * the reader.
+                */
+               if (cnt % wakeup_interval)
+                       cond_resched();
+#endif
+
+       } while (end_tv.tv_sec < (start_tv.tv_sec + RUN_TIME) && !kill_test);
+       pr_info("End ring buffer hammer\n");
+
+       if (consumer) {
+               /* Init both completions here to avoid races */
+               init_completion(&read_start);
+               init_completion(&read_done);
+               /* the completions must be visible before the finish var */
+               smp_wmb();
+               reader_finish = 1;
+               /* finish var visible before waking up the consumer */
+               smp_wmb();
+               wake_up_process(consumer);
+               wait_for_completion(&read_done);
+       }
+
+       time = end_tv.tv_sec - start_tv.tv_sec;
+       time *= USEC_PER_SEC;
+       time += (long long)((long)end_tv.tv_usec - (long)start_tv.tv_usec);
+
+       entries = ring_buffer_entries(buffer);
+       overruns = ring_buffer_overruns(buffer);
+
+       if (kill_test)
+               pr_info("ERROR!\n");
+       pr_info("Time:     %lld (usecs)\n", time);
+       pr_info("Overruns: %lld\n", overruns);
+       if (disable_reader)
+               pr_info("Read:     (reader disabled)\n");
+       else
+               pr_info("Read:     %ld  (by %s)\n", read,
+                       read_events ? "events" : "pages");
+       pr_info("Entries:  %lld\n", entries);
+       pr_info("Total:    %lld\n", entries + overruns + read);
+       pr_info("Missed:   %ld\n", missed);
+       pr_info("Hit:      %ld\n", hit);
+
+       /* Convert time from usecs to millisecs */
+       do_div(time, USEC_PER_MSEC);
+       if (time)
+               hit /= (long)time;
+       else
+               pr_info("TIME IS ZERO??\n");
+
+       pr_info("Entries per millisec: %ld\n", hit);
+
+       if (hit) {
+               /* Calculate the average time in nanosecs */
+               avg = NSEC_PER_MSEC / hit;
+               pr_info("%ld ns per entry\n", avg);
+       }
+
+       if (missed) {
+               if (time)
+                       missed /= (long)time;
+
+               pr_info("Total iterations per millisec: %ld\n", hit + missed);
+
+               /* it is possible that hit + missed will overflow and be zero */
+               if (!(hit + missed)) {
+                       pr_info("hit + missed overflowed and totalled zero!\n");
+                       hit--; /* make it non zero */
+               }
+
+               /* Caculate the average time in nanosecs */
+               avg = NSEC_PER_MSEC / (hit + missed);
+               pr_info("%ld ns per entry\n", avg);
+       }
+}
+
+static void wait_to_die(void)
+{
+       set_current_state(TASK_INTERRUPTIBLE);
+       while (!kthread_should_stop()) {
+               schedule();
+               set_current_state(TASK_INTERRUPTIBLE);
+       }
+       __set_current_state(TASK_RUNNING);
+}
+
+static int ring_buffer_consumer_thread(void *arg)
+{
+       while (!kthread_should_stop() && !kill_test) {
+               complete(&read_start);
+
+               ring_buffer_consumer();
+
+               set_current_state(TASK_INTERRUPTIBLE);
+               if (kthread_should_stop() || kill_test)
+                       break;
+
+               schedule();
+               __set_current_state(TASK_RUNNING);
+       }
+       __set_current_state(TASK_RUNNING);
+
+       if (kill_test)
+               wait_to_die();
+
+       return 0;
+}
+
+static int ring_buffer_producer_thread(void *arg)
+{
+       init_completion(&read_start);
+
+       while (!kthread_should_stop() && !kill_test) {
+               ring_buffer_reset(buffer);
+
+               if (consumer) {
+                       smp_wmb();
+                       wake_up_process(consumer);
+                       wait_for_completion(&read_start);
+               }
+
+               ring_buffer_producer();
+
+               pr_info("Sleeping for 10 secs\n");
+               set_current_state(TASK_INTERRUPTIBLE);
+               schedule_timeout(HZ * SLEEP_TIME);
+               __set_current_state(TASK_RUNNING);
+       }
+
+       if (kill_test)
+               wait_to_die();
+
+       return 0;
+}
+
+static int __init ring_buffer_benchmark_init(void)
+{
+       int ret;
+
+       /* make a one meg buffer in overwite mode */
+       buffer = ring_buffer_alloc(1000000, RB_FL_OVERWRITE);
+       if (!buffer)
+               return -ENOMEM;
+
+       if (!disable_reader) {
+               consumer = kthread_create(ring_buffer_consumer_thread,
+                                         NULL, "rb_consumer");
+               ret = PTR_ERR(consumer);
+               if (IS_ERR(consumer))
+                       goto out_fail;
+       }
+
+       producer = kthread_run(ring_buffer_producer_thread,
+                              NULL, "rb_producer");
+       ret = PTR_ERR(producer);
+
+       if (IS_ERR(producer))
+               goto out_kill;
+
+       return 0;
+
+ out_kill:
+       if (consumer)
+               kthread_stop(consumer);
+
+ out_fail:
+       ring_buffer_free(buffer);
+       return ret;
+}
+
+static void __exit ring_buffer_benchmark_exit(void)
+{
+       kthread_stop(producer);
+       if (consumer)
+               kthread_stop(consumer);
+       ring_buffer_free(buffer);
+}
+
+module_init(ring_buffer_benchmark_init);
+module_exit(ring_buffer_benchmark_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("ring_buffer_benchmark");
+MODULE_LICENSE("GPL");
index cda81ec..8acd9b8 100644 (file)
@@ -171,6 +171,13 @@ static struct trace_array  global_trace;
 
 static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
 
+int filter_current_check_discard(struct ftrace_event_call *call, void *rec,
+                                struct ring_buffer_event *event)
+{
+       return filter_check_discard(call, rec, global_trace.buffer, event);
+}
+EXPORT_SYMBOL_GPL(filter_current_check_discard);
+
 cycle_t ftrace_now(int cpu)
 {
        u64 ts;
@@ -255,7 +262,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
 
 /* trace_flags holds trace_options default values */
 unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
-       TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME;
+       TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
+       TRACE_ITER_GRAPH_TIME;
 
 /**
  * trace_wake_up - wake up tasks waiting for trace input
@@ -317,6 +325,7 @@ static const char *trace_options[] = {
        "latency-format",
        "global-clock",
        "sleep-time",
+       "graph-time",
        NULL
 };
 
@@ -402,17 +411,6 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
        return cnt;
 }
 
-static void
-trace_print_seq(struct seq_file *m, struct trace_seq *s)
-{
-       int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
-
-       s->buffer[len] = 0;
-       seq_puts(m, s->buffer);
-
-       trace_seq_init(s);
-}
-
 /**
  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
  * @tr: tracer
@@ -641,6 +639,16 @@ void tracing_reset_online_cpus(struct trace_array *tr)
                tracing_reset(tr, cpu);
 }
 
+void tracing_reset_current(int cpu)
+{
+       tracing_reset(&global_trace, cpu);
+}
+
+void tracing_reset_current_online_cpus(void)
+{
+       tracing_reset_online_cpus(&global_trace);
+}
+
 #define SAVED_CMDLINES 128
 #define NO_CMDLINE_MAP UINT_MAX
 static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
@@ -800,6 +808,7 @@ void trace_find_cmdline(int pid, char comm[])
                return;
        }
 
+       preempt_disable();
        __raw_spin_lock(&trace_cmdline_lock);
        map = map_pid_to_cmdline[pid];
        if (map != NO_CMDLINE_MAP)
@@ -808,6 +817,7 @@ void trace_find_cmdline(int pid, char comm[])
                strcpy(comm, "<...>");
 
        __raw_spin_unlock(&trace_cmdline_lock);
+       preempt_enable();
 }
 
 void tracing_record_cmdline(struct task_struct *tsk)
@@ -840,7 +850,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
 }
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-                                                   unsigned char type,
+                                                   int type,
                                                    unsigned long len,
                                                    unsigned long flags, int pc)
 {
@@ -883,30 +893,40 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
 }
 
 struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
+trace_current_buffer_lock_reserve(int type, unsigned long len,
                                  unsigned long flags, int pc)
 {
        return trace_buffer_lock_reserve(&global_trace,
                                         type, len, flags, pc);
 }
+EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve);
 
 void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
                                        unsigned long flags, int pc)
 {
-       return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
+       __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
 }
+EXPORT_SYMBOL_GPL(trace_current_buffer_unlock_commit);
 
 void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
                                        unsigned long flags, int pc)
 {
-       return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
+       __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
+}
+EXPORT_SYMBOL_GPL(trace_nowake_buffer_unlock_commit);
+
+void trace_current_buffer_discard_commit(struct ring_buffer_event *event)
+{
+       ring_buffer_discard_commit(global_trace.buffer, event);
 }
+EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);
 
 void
 trace_function(struct trace_array *tr,
               unsigned long ip, unsigned long parent_ip, unsigned long flags,
               int pc)
 {
+       struct ftrace_event_call *call = &event_function;
        struct ring_buffer_event *event;
        struct ftrace_entry *entry;
 
@@ -921,7 +941,9 @@ trace_function(struct trace_array *tr,
        entry   = ring_buffer_event_data(event);
        entry->ip                       = ip;
        entry->parent_ip                = parent_ip;
-       ring_buffer_unlock_commit(tr->buffer, event);
+
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 }
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -930,6 +952,7 @@ static int __trace_graph_entry(struct trace_array *tr,
                                unsigned long flags,
                                int pc)
 {
+       struct ftrace_event_call *call = &event_funcgraph_entry;
        struct ring_buffer_event *event;
        struct ftrace_graph_ent_entry *entry;
 
@@ -942,7 +965,8 @@ static int __trace_graph_entry(struct trace_array *tr,
                return 0;
        entry   = ring_buffer_event_data(event);
        entry->graph_ent                        = *trace;
-       ring_buffer_unlock_commit(global_trace.buffer, event);
+       if (!filter_current_check_discard(call, entry, event))
+               ring_buffer_unlock_commit(global_trace.buffer, event);
 
        return 1;
 }
@@ -952,6 +976,7 @@ static void __trace_graph_return(struct trace_array *tr,
                                unsigned long flags,
                                int pc)
 {
+       struct ftrace_event_call *call = &event_funcgraph_exit;
        struct ring_buffer_event *event;
        struct ftrace_graph_ret_entry *entry;
 
@@ -964,7 +989,8 @@ static void __trace_graph_return(struct trace_array *tr,
                return;
        entry   = ring_buffer_event_data(event);
        entry->ret                              = *trace;
-       ring_buffer_unlock_commit(global_trace.buffer, event);
+       if (!filter_current_check_discard(call, entry, event))
+               ring_buffer_unlock_commit(global_trace.buffer, event);
 }
 #endif
 
@@ -982,6 +1008,7 @@ static void __ftrace_trace_stack(struct trace_array *tr,
                                 int skip, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+       struct ftrace_event_call *call = &event_kernel_stack;
        struct ring_buffer_event *event;
        struct stack_entry *entry;
        struct stack_trace trace;
@@ -999,7 +1026,8 @@ static void __ftrace_trace_stack(struct trace_array *tr,
        trace.entries           = entry->caller;
 
        save_stack_trace(&trace);
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
 
@@ -1024,6 +1052,7 @@ static void ftrace_trace_userstack(struct trace_array *tr,
                                   unsigned long flags, int pc)
 {
 #ifdef CONFIG_STACKTRACE
+       struct ftrace_event_call *call = &event_user_stack;
        struct ring_buffer_event *event;
        struct userstack_entry *entry;
        struct stack_trace trace;
@@ -1045,7 +1074,8 @@ static void ftrace_trace_userstack(struct trace_array *tr,
        trace.entries           = entry->caller;
 
        save_stack_trace_user(&trace);
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 #endif
 }
 
@@ -1089,6 +1119,7 @@ tracing_sched_switch_trace(struct trace_array *tr,
                           struct task_struct *next,
                           unsigned long flags, int pc)
 {
+       struct ftrace_event_call *call = &event_context_switch;
        struct ring_buffer_event *event;
        struct ctx_switch_entry *entry;
 
@@ -1104,7 +1135,9 @@ tracing_sched_switch_trace(struct trace_array *tr,
        entry->next_prio                = next->prio;
        entry->next_state               = next->state;
        entry->next_cpu = task_cpu(next);
-       trace_buffer_unlock_commit(tr, event, flags, pc);
+
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               trace_buffer_unlock_commit(tr, event, flags, pc);
 }
 
 void
@@ -1113,6 +1146,7 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
                           struct task_struct *curr,
                           unsigned long flags, int pc)
 {
+       struct ftrace_event_call *call = &event_wakeup;
        struct ring_buffer_event *event;
        struct ctx_switch_entry *entry;
 
@@ -1129,7 +1163,8 @@ tracing_sched_wakeup_trace(struct trace_array *tr,
        entry->next_state               = wakee->state;
        entry->next_cpu                 = task_cpu(wakee);
 
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
        ftrace_trace_stack(tr, flags, 6, pc);
        ftrace_trace_userstack(tr, flags, pc);
 }
@@ -1230,11 +1265,13 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
                (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        static u32 trace_buf[TRACE_BUF_SIZE];
 
+       struct ftrace_event_call *call = &event_bprint;
        struct ring_buffer_event *event;
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
        struct bprint_entry *entry;
        unsigned long flags;
+       int disable;
        int resched;
        int cpu, len = 0, size, pc;
 
@@ -1249,7 +1286,8 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
 
-       if (unlikely(atomic_read(&data->disabled)))
+       disable = atomic_inc_return(&data->disabled);
+       if (unlikely(disable != 1))
                goto out;
 
        /* Lockdep uses trace_printk for lock tracing */
@@ -1269,13 +1307,15 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
        entry->fmt                      = fmt;
 
        memcpy(entry->buf, trace_buf, sizeof(u32) * len);
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 
 out_unlock:
        __raw_spin_unlock(&trace_buf_lock);
        local_irq_restore(flags);
 
 out:
+       atomic_dec_return(&data->disabled);
        ftrace_preempt_enable(resched);
        unpause_graph_tracing();
 
@@ -1288,12 +1328,14 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
        static raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
        static char trace_buf[TRACE_BUF_SIZE];
 
+       struct ftrace_event_call *call = &event_print;
        struct ring_buffer_event *event;
        struct trace_array *tr = &global_trace;
        struct trace_array_cpu *data;
        int cpu, len = 0, size, pc;
        struct print_entry *entry;
        unsigned long irq_flags;
+       int disable;
 
        if (tracing_disabled || tracing_selftest_running)
                return 0;
@@ -1303,7 +1345,8 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
        cpu = raw_smp_processor_id();
        data = tr->data[cpu];
 
-       if (unlikely(atomic_read(&data->disabled)))
+       disable = atomic_inc_return(&data->disabled);
+       if (unlikely(disable != 1))
                goto out;
 
        pause_graph_tracing();
@@ -1323,13 +1366,15 @@ int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
 
        memcpy(&entry->buf, trace_buf, len);
        entry->buf[len] = 0;
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 
  out_unlock:
        __raw_spin_unlock(&trace_buf_lock);
        raw_local_irq_restore(irq_flags);
        unpause_graph_tracing();
  out:
+       atomic_dec_return(&data->disabled);
        preempt_enable_notrace();
 
        return len;
@@ -1526,12 +1571,14 @@ static void *s_start(struct seq_file *m, loff_t *pos)
                p = s_next(m, p, &l);
        }
 
+       trace_event_read_lock();
        return p;
 }
 
 static void s_stop(struct seq_file *m, void *p)
 {
        atomic_dec(&trace_record_cmdline_disabled);
+       trace_event_read_unlock();
 }
 
 static void print_lat_help_header(struct seq_file *m)
@@ -1774,6 +1821,7 @@ static int trace_empty(struct trace_iterator *iter)
        return 1;
 }
 
+/*  Called with trace_event_read_lock() held. */
 static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
        enum print_line_t ret;
@@ -2396,6 +2444,56 @@ static const struct file_operations tracing_readme_fops = {
        .read           = tracing_readme_read,
 };
 
+static ssize_t
+tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
+                               size_t cnt, loff_t *ppos)
+{
+       char *buf_comm;
+       char *file_buf;
+       char *buf;
+       int len = 0;
+       int pid;
+       int i;
+
+       file_buf = kmalloc(SAVED_CMDLINES*(16+TASK_COMM_LEN), GFP_KERNEL);
+       if (!file_buf)
+               return -ENOMEM;
+
+       buf_comm = kmalloc(TASK_COMM_LEN, GFP_KERNEL);
+       if (!buf_comm) {
+               kfree(file_buf);
+               return -ENOMEM;
+       }
+
+       buf = file_buf;
+
+       for (i = 0; i < SAVED_CMDLINES; i++) {
+               int r;
+
+               pid = map_cmdline_to_pid[i];
+               if (pid == -1 || pid == NO_CMDLINE_MAP)
+                       continue;
+
+               trace_find_cmdline(pid, buf_comm);
+               r = sprintf(buf, "%d %s\n", pid, buf_comm);
+               buf += r;
+               len += r;
+       }
+
+       len = simple_read_from_buffer(ubuf, cnt, ppos,
+                                     file_buf, len);
+
+       kfree(file_buf);
+       kfree(buf_comm);
+
+       return len;
+}
+
+static const struct file_operations tracing_saved_cmdlines_fops = {
+    .open       = tracing_open_generic,
+    .read       = tracing_saved_cmdlines_read,
+};
+
 static ssize_t
 tracing_ctrl_read(struct file *filp, char __user *ubuf,
                  size_t cnt, loff_t *ppos)
@@ -2728,6 +2826,9 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
        /* trace pipe does not show start of buffer */
        cpumask_setall(iter->started);
 
+       if (trace_flags & TRACE_ITER_LATENCY_FMT)
+               iter->iter_flags |= TRACE_FILE_LAT_FMT;
+
        iter->cpu_file = cpu_file;
        iter->tr = &global_trace;
        mutex_init(&iter->mutex);
@@ -2915,6 +3016,7 @@ waitagain:
               offsetof(struct trace_iterator, seq));
        iter->pos = -1;
 
+       trace_event_read_lock();
        while (find_next_entry_inc(iter) != NULL) {
                enum print_line_t ret;
                int len = iter->seq.len;
@@ -2931,6 +3033,7 @@ waitagain:
                if (iter->seq.len >= cnt)
                        break;
        }
+       trace_event_read_unlock();
 
        /* Now copy what we have to the user */
        sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
@@ -3053,6 +3156,8 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
                goto out_err;
        }
 
+       trace_event_read_lock();
+
        /* Fill as many pages as possible. */
        for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
                pages[i] = alloc_page(GFP_KERNEL);
@@ -3075,6 +3180,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
                trace_seq_init(&iter->seq);
        }
 
+       trace_event_read_unlock();
        mutex_unlock(&iter->mutex);
 
        spd.nr_pages = i;
@@ -3425,7 +3531,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                .spd_release    = buffer_spd_release,
        };
        struct buffer_ref *ref;
-       int size, i;
+       int entries, size, i;
        size_t ret;
 
        if (*ppos & (PAGE_SIZE - 1)) {
@@ -3440,7 +3546,9 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                len &= PAGE_MASK;
        }
 
-       for (i = 0; i < PIPE_BUFFERS && len; i++, len -= PAGE_SIZE) {
+       entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
+
+       for (i = 0; i < PIPE_BUFFERS && len && entries; i++, len -= PAGE_SIZE) {
                struct page *page;
                int r;
 
@@ -3457,7 +3565,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                }
 
                r = ring_buffer_read_page(ref->buffer, &ref->page,
-                                         len, info->cpu, 0);
+                                         len, info->cpu, 1);
                if (r < 0) {
                        ring_buffer_free_read_page(ref->buffer,
                                                   ref->page);
@@ -3481,6 +3589,8 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
                spd.partial[i].private = (unsigned long)ref;
                spd.nr_pages++;
                *ppos += PAGE_SIZE;
+
+               entries = ring_buffer_entries_cpu(info->tr->buffer, info->cpu);
        }
 
        spd.nr_pages = i;
@@ -3508,6 +3618,45 @@ static const struct file_operations tracing_buffers_fops = {
        .llseek         = no_llseek,
 };
 
+static ssize_t
+tracing_stats_read(struct file *filp, char __user *ubuf,
+                  size_t count, loff_t *ppos)
+{
+       unsigned long cpu = (unsigned long)filp->private_data;
+       struct trace_array *tr = &global_trace;
+       struct trace_seq *s;
+       unsigned long cnt;
+
+       s = kmalloc(sizeof(*s), GFP_ATOMIC);
+       if (!s)
+               return ENOMEM;
+
+       trace_seq_init(s);
+
+       cnt = ring_buffer_entries_cpu(tr->buffer, cpu);
+       trace_seq_printf(s, "entries: %ld\n", cnt);
+
+       cnt = ring_buffer_overrun_cpu(tr->buffer, cpu);
+       trace_seq_printf(s, "overrun: %ld\n", cnt);
+
+       cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
+       trace_seq_printf(s, "commit overrun: %ld\n", cnt);
+
+       cnt = ring_buffer_nmi_dropped_cpu(tr->buffer, cpu);
+       trace_seq_printf(s, "nmi dropped: %ld\n", cnt);
+
+       count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+
+       kfree(s);
+
+       return count;
+}
+
+static const struct file_operations tracing_stats_fops = {
+       .open           = tracing_open_generic,
+       .read           = tracing_stats_read,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 int __weak ftrace_arch_read_dyn_info(char *buf, int size)
@@ -3597,7 +3746,7 @@ struct dentry *tracing_dentry_percpu(void)
 static void tracing_init_debugfs_percpu(long cpu)
 {
        struct dentry *d_percpu = tracing_dentry_percpu();
-       struct dentry *entry, *d_cpu;
+       struct dentry *d_cpu;
        /* strlen(cpu) + MAX(log10(cpu)) + '\0' */
        char cpu_dir[7];
 
@@ -3612,21 +3761,18 @@ static void tracing_init_debugfs_percpu(long cpu)
        }
 
        /* per cpu trace_pipe */
-       entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
-                               (void *) cpu, &tracing_pipe_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace_pipe' entry\n");
+       trace_create_file("trace_pipe", 0444, d_cpu,
+                       (void *) cpu, &tracing_pipe_fops);
 
        /* per cpu trace */
-       entry = debugfs_create_file("trace", 0644, d_cpu,
-                               (void *) cpu, &tracing_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace' entry\n");
+       trace_create_file("trace", 0644, d_cpu,
+                       (void *) cpu, &tracing_fops);
+
+       trace_create_file("trace_pipe_raw", 0444, d_cpu,
+                       (void *) cpu, &tracing_buffers_fops);
 
-       entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu,
-                                   (void *) cpu, &tracing_buffers_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
+       trace_create_file("stats", 0444, d_cpu,
+                       (void *) cpu, &tracing_stats_fops);
 }
 
 #ifdef CONFIG_FTRACE_SELFTEST
@@ -3782,6 +3928,22 @@ static const struct file_operations trace_options_core_fops = {
        .write = trace_options_core_write,
 };
 
+struct dentry *trace_create_file(const char *name,
+                                mode_t mode,
+                                struct dentry *parent,
+                                void *data,
+                                const struct file_operations *fops)
+{
+       struct dentry *ret;
+
+       ret = debugfs_create_file(name, mode, parent, data, fops);
+       if (!ret)
+               pr_warning("Could not create debugfs '%s' entry\n", name);
+
+       return ret;
+}
+
+
 static struct dentry *trace_options_init_dentry(void)
 {
        struct dentry *d_tracer;
@@ -3809,7 +3971,6 @@ create_trace_option_file(struct trace_option_dentry *topt,
                         struct tracer_opt *opt)
 {
        struct dentry *t_options;
-       struct dentry *entry;
 
        t_options = trace_options_init_dentry();
        if (!t_options)
@@ -3818,11 +3979,9 @@ create_trace_option_file(struct trace_option_dentry *topt,
        topt->flags = flags;
        topt->opt = opt;
 
-       entry = debugfs_create_file(opt->name, 0644, t_options, topt,
+       topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
                                    &trace_options_fops);
 
-       topt->entry = entry;
-
 }
 
 static struct trace_option_dentry *
@@ -3877,123 +4036,84 @@ static struct dentry *
 create_trace_option_core_file(const char *option, long index)
 {
        struct dentry *t_options;
-       struct dentry *entry;
 
        t_options = trace_options_init_dentry();
        if (!t_options)
                return NULL;
 
-       entry = debugfs_create_file(option, 0644, t_options, (void *)index,
+       return trace_create_file(option, 0644, t_options, (void *)index,
                                    &trace_options_core_fops);
-
-       return entry;
 }
 
 static __init void create_trace_options_dir(void)
 {
        struct dentry *t_options;
-       struct dentry *entry;
        int i;
 
        t_options = trace_options_init_dentry();
        if (!t_options)
                return;
 
-       for (i = 0; trace_options[i]; i++) {
-               entry = create_trace_option_core_file(trace_options[i], i);
-               if (!entry)
-                       pr_warning("Could not create debugfs %s entry\n",
-                                  trace_options[i]);
-       }
+       for (i = 0; trace_options[i]; i++)
+               create_trace_option_core_file(trace_options[i], i);
 }
 
 static __init int tracer_init_debugfs(void)
 {
        struct dentry *d_tracer;
-       struct dentry *entry;
        int cpu;
 
        d_tracer = tracing_init_dentry();
 
-       entry = debugfs_create_file("tracing_enabled", 0644, d_tracer,
-                                   &global_trace, &tracing_ctrl_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'tracing_enabled' entry\n");
+       trace_create_file("tracing_enabled", 0644, d_tracer,
+                       &global_trace, &tracing_ctrl_fops);
 
-       entry = debugfs_create_file("trace_options", 0644, d_tracer,
-                                   NULL, &tracing_iter_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace_options' entry\n");
+       trace_create_file("trace_options", 0644, d_tracer,
+                       NULL, &tracing_iter_fops);
 
-       create_trace_options_dir();
+       trace_create_file("tracing_cpumask", 0644, d_tracer,
+                       NULL, &tracing_cpumask_fops);
+
+       trace_create_file("trace", 0644, d_tracer,
+                       (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
 
-       entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
-                                   NULL, &tracing_cpumask_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
-
-       entry = debugfs_create_file("trace", 0644, d_tracer,
-                                (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'trace' entry\n");
-
-       entry = debugfs_create_file("available_tracers", 0444, d_tracer,
-                                   &global_trace, &show_traces_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'available_tracers' entry\n");
-
-       entry = debugfs_create_file("current_tracer", 0444, d_tracer,
-                                   &global_trace, &set_tracer_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'current_tracer' entry\n");
-
-       entry = debugfs_create_file("tracing_max_latency", 0644, d_tracer,
-                                   &tracing_max_latency,
-                                   &tracing_max_lat_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'tracing_max_latency' entry\n");
-
-       entry = debugfs_create_file("tracing_thresh", 0644, d_tracer,
-                                   &tracing_thresh, &tracing_max_lat_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'tracing_thresh' entry\n");
-       entry = debugfs_create_file("README", 0644, d_tracer,
-                                   NULL, &tracing_readme_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'README' entry\n");
-
-       entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
+       trace_create_file("available_tracers", 0444, d_tracer,
+                       &global_trace, &show_traces_fops);
+
+       trace_create_file("current_tracer", 0644, d_tracer,
+                       &global_trace, &set_tracer_fops);
+
+       trace_create_file("tracing_max_latency", 0644, d_tracer,
+                       &tracing_max_latency, &tracing_max_lat_fops);
+
+       trace_create_file("tracing_thresh", 0644, d_tracer,
+                       &tracing_thresh, &tracing_max_lat_fops);
+
+       trace_create_file("README", 0444, d_tracer,
+                       NULL, &tracing_readme_fops);
+
+       trace_create_file("trace_pipe", 0444, d_tracer,
                        (void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'trace_pipe' entry\n");
-
-       entry = debugfs_create_file("buffer_size_kb", 0644, d_tracer,
-                                   &global_trace, &tracing_entries_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'buffer_size_kb' entry\n");
-
-       entry = debugfs_create_file("trace_marker", 0220, d_tracer,
-                                   NULL, &tracing_mark_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'trace_marker' entry\n");
+
+       trace_create_file("buffer_size_kb", 0644, d_tracer,
+                       &global_trace, &tracing_entries_fops);
+
+       trace_create_file("trace_marker", 0220, d_tracer,
+                       NULL, &tracing_mark_fops);
+
+       trace_create_file("saved_cmdlines", 0444, d_tracer,
+                       NULL, &tracing_saved_cmdlines_fops);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
-       entry = debugfs_create_file("dyn_ftrace_total_info", 0444, d_tracer,
-                                   &ftrace_update_tot_cnt,
-                                   &tracing_dyn_info_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'dyn_ftrace_total_info' entry\n");
+       trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
+                       &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
 #endif
 #ifdef CONFIG_SYSPROF_TRACER
        init_tracer_sysprof_debugfs(d_tracer);
 #endif
 
+       create_trace_options_dir();
+
        for_each_tracing_cpu(cpu)
                tracing_init_debugfs_percpu(cpu);
 
@@ -4064,7 +4184,8 @@ trace_printk_seq(struct trace_seq *s)
 
 static void __ftrace_dump(bool disable_tracing)
 {
-       static DEFINE_SPINLOCK(ftrace_dump_lock);
+       static raw_spinlock_t ftrace_dump_lock =
+               (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
        /* use static because iter can be a bit big for the stack */
        static struct trace_iterator iter;
        unsigned int old_userobj;
@@ -4073,7 +4194,8 @@ static void __ftrace_dump(bool disable_tracing)
        int cnt = 0, cpu;
 
        /* only one dump */
-       spin_lock_irqsave(&ftrace_dump_lock, flags);
+       local_irq_save(flags);
+       __raw_spin_lock(&ftrace_dump_lock);
        if (dump_ran)
                goto out;
 
@@ -4145,7 +4267,8 @@ static void __ftrace_dump(bool disable_tracing)
        }
 
  out:
-       spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+       __raw_spin_unlock(&ftrace_dump_lock);
+       local_irq_restore(flags);
 }
 
 /* By default: disable tracing after the dump */
index e685ac2..6e735d4 100644 (file)
@@ -9,9 +9,12 @@
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
 #include <trace/boot.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <trace/power.h>
 
+#include <linux/trace_seq.h>
+#include <linux/ftrace_event.h>
+
 enum trace_type {
        __TRACE_FIRST_TYPE = 0,
 
@@ -41,20 +44,6 @@ enum trace_type {
        __TRACE_LAST_TYPE,
 };
 
-/*
- * The trace entry - the most basic unit of tracing. This is what
- * is printed in the end as a single line in the trace output, such as:
- *
- *     bash-15816 [01]   235.197585: idle_cpu <- irq_enter
- */
-struct trace_entry {
-       unsigned char           type;
-       unsigned char           flags;
-       unsigned char           preempt_count;
-       int                     pid;
-       int                     tgid;
-};
-
 /*
  * Function trace entry - function address and parent function addres:
  */
@@ -263,8 +252,6 @@ struct trace_array_cpu {
        char                    comm[TASK_COMM_LEN];
 };
 
-struct trace_iterator;
-
 /*
  * The trace array - an array of per-CPU trace arrays. This is the
  * highest level data structure that individual tracers deal with.
@@ -339,15 +326,6 @@ extern void __ftrace_bad_type(void);
                __ftrace_bad_type();                                    \
        } while (0)
 
-/* Return values for print_line callback */
-enum print_line_t {
-       TRACE_TYPE_PARTIAL_LINE = 0,    /* Retry after flushing the seq */
-       TRACE_TYPE_HANDLED      = 1,
-       TRACE_TYPE_UNHANDLED    = 2,    /* Relay to other output functions */
-       TRACE_TYPE_NO_CONSUME   = 3     /* Handled but ask to not consume */
-};
-
-
 /*
  * An option specific to a tracer. This is a boolean value.
  * The bit is the bit index that sets its value on the
@@ -423,60 +401,30 @@ struct tracer {
        struct tracer_stat      *stats;
 };
 
-struct trace_seq {
-       unsigned char           buffer[PAGE_SIZE];
-       unsigned int            len;
-       unsigned int            readpos;
-};
-
-static inline void
-trace_seq_init(struct trace_seq *s)
-{
-       s->len = 0;
-       s->readpos = 0;
-}
-
 
 #define TRACE_PIPE_ALL_CPU     -1
 
-/*
- * Trace iterator - used by printout routines who present trace
- * results to users and which routines might sleep, etc:
- */
-struct trace_iterator {
-       struct trace_array      *tr;
-       struct tracer           *trace;
-       void                    *private;
-       int                     cpu_file;
-       struct mutex            mutex;
-       struct ring_buffer_iter *buffer_iter[NR_CPUS];
-
-       /* The below is zeroed out in pipe_read */
-       struct trace_seq        seq;
-       struct trace_entry      *ent;
-       int                     cpu;
-       u64                     ts;
-
-       unsigned long           iter_flags;
-       loff_t                  pos;
-       long                    idx;
-
-       cpumask_var_t           started;
-};
-
 int tracer_init(struct tracer *t, struct trace_array *tr);
 int tracing_is_enabled(void);
 void trace_wake_up(void);
 void tracing_reset(struct trace_array *tr, int cpu);
 void tracing_reset_online_cpus(struct trace_array *tr);
+void tracing_reset_current(int cpu);
+void tracing_reset_current_online_cpus(void);
 int tracing_open_generic(struct inode *inode, struct file *filp);
+struct dentry *trace_create_file(const char *name,
+                                mode_t mode,
+                                struct dentry *parent,
+                                void *data,
+                                const struct file_operations *fops);
+
 struct dentry *tracing_init_dentry(void);
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
 
 struct ring_buffer_event;
 
 struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
-                                                   unsigned char type,
+                                                   int type,
                                                    unsigned long len,
                                                    unsigned long flags,
                                                    int pc);
@@ -484,14 +432,6 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
                                struct ring_buffer_event *event,
                                unsigned long flags, int pc);
 
-struct ring_buffer_event *
-trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
-                                 unsigned long flags, int pc);
-void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
-                                       unsigned long flags, int pc);
-void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
-                                       unsigned long flags, int pc);
-
 struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
                                                struct trace_array_cpu *data);
 
@@ -514,7 +454,6 @@ void tracing_sched_switch_trace(struct trace_array *tr,
                                struct task_struct *prev,
                                struct task_struct *next,
                                unsigned long flags, int pc);
-void tracing_record_cmdline(struct task_struct *tsk);
 
 void tracing_sched_wakeup_trace(struct trace_array *tr,
                                struct task_struct *wakee,
@@ -599,6 +538,8 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
                                               struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
                                         struct trace_array *tr);
+extern int trace_selftest_startup_hw_branches(struct tracer *trace,
+                                             struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
 
 extern void *head_page(struct trace_array_cpu *data);
@@ -613,6 +554,8 @@ extern unsigned long trace_flags;
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern enum print_line_t print_graph_function(struct trace_iterator *iter);
+extern enum print_line_t
+trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* TODO: make this variable */
@@ -644,7 +587,6 @@ static inline int ftrace_graph_addr(unsigned long addr)
        return 1;
 }
 #endif /* CONFIG_DYNAMIC_FTRACE */
-
 #else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
 print_graph_function(struct trace_iterator *iter)
@@ -692,6 +634,7 @@ enum trace_iterator_flags {
        TRACE_ITER_LATENCY_FMT          = 0x40000,
        TRACE_ITER_GLOBAL_CLK           = 0x80000,
        TRACE_ITER_SLEEP_TIME           = 0x100000,
+       TRACE_ITER_GRAPH_TIME           = 0x200000,
 };
 
 /*
@@ -790,103 +733,113 @@ struct ftrace_event_field {
        char                    *type;
        int                     offset;
        int                     size;
+       int                     is_signed;
 };
 
-struct ftrace_event_call {
-       char                    *name;
-       char                    *system;
-       struct dentry           *dir;
-       int                     enabled;
-       int                     (*regfunc)(void);
-       void                    (*unregfunc)(void);
-       int                     id;
-       int                     (*raw_init)(void);
-       int                     (*show_format)(struct trace_seq *s);
-       int                     (*define_fields)(void);
-       struct list_head        fields;
+struct event_filter {
+       int                     n_preds;
        struct filter_pred      **preds;
-
-#ifdef CONFIG_EVENT_PROFILE
-       atomic_t        profile_count;
-       int             (*profile_enable)(struct ftrace_event_call *);
-       void            (*profile_disable)(struct ftrace_event_call *);
-#endif
+       char                    *filter_string;
 };
 
 struct event_subsystem {
        struct list_head        list;
        const char              *name;
        struct dentry           *entry;
-       struct filter_pred      **preds;
+       void                    *filter;
 };
 
-#define events_for_each(event)                                         \
-       for (event = __start_ftrace_events;                             \
-            (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-            event++)
-
-#define MAX_FILTER_PRED 8
-
 struct filter_pred;
 
-typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
+                                int val1, int val2);
 
 struct filter_pred {
        filter_pred_fn_t fn;
        u64 val;
-       char *str_val;
+       char str_val[MAX_FILTER_STR_VAL];
        int str_len;
        char *field_name;
        int offset;
        int not;
-       int or;
-       int compound;
-       int clear;
+       int op;
+       int pop_n;
 };
 
-int trace_define_field(struct ftrace_event_call *call, char *type,
-                      char *name, int offset, int size);
-extern void filter_free_pred(struct filter_pred *pred);
-extern void filter_print_preds(struct filter_pred **preds,
+extern void print_event_filter(struct ftrace_event_call *call,
                               struct trace_seq *s);
-extern int filter_parse(char **pbuf, struct filter_pred *pred);
-extern int filter_add_pred(struct ftrace_event_call *call,
-                          struct filter_pred *pred);
-extern void filter_free_preds(struct ftrace_event_call *call);
-extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
-extern void filter_free_subsystem_preds(struct event_subsystem *system);
-extern int filter_add_subsystem_pred(struct event_subsystem *system,
-                                    struct filter_pred *pred);
-
-void event_trace_printk(unsigned long ip, const char *fmt, ...);
-extern struct ftrace_event_call __start_ftrace_events[];
-extern struct ftrace_event_call __stop_ftrace_events[];
-
-#define for_each_event(event)                                          \
-       for (event = __start_ftrace_events;                             \
-            (unsigned long)event < (unsigned long)__stop_ftrace_events; \
-            event++)
+extern int apply_event_filter(struct ftrace_event_call *call,
+                             char *filter_string);
+extern int apply_subsystem_event_filter(struct event_subsystem *system,
+                                       char *filter_string);
+extern void print_subsystem_event_filter(struct event_subsystem *system,
+                                        struct trace_seq *s);
+
+static inline int
+filter_check_discard(struct ftrace_event_call *call, void *rec,
+                    struct ring_buffer *buffer,
+                    struct ring_buffer_event *event)
+{
+       if (unlikely(call->filter_active) && !filter_match_preds(call, rec)) {
+               ring_buffer_discard_commit(buffer, event);
+               return 1;
+       }
+
+       return 0;
+}
+
+#define DEFINE_COMPARISON_PRED(type)                                   \
+static int filter_pred_##type(struct filter_pred *pred, void *event,   \
+                             int val1, int val2)                       \
+{                                                                      \
+       type *addr = (type *)(event + pred->offset);                    \
+       type val = (type)pred->val;                                     \
+       int match = 0;                                                  \
+                                                                       \
+       switch (pred->op) {                                             \
+       case OP_LT:                                                     \
+               match = (*addr < val);                                  \
+               break;                                                  \
+       case OP_LE:                                                     \
+               match = (*addr <= val);                                 \
+               break;                                                  \
+       case OP_GT:                                                     \
+               match = (*addr > val);                                  \
+               break;                                                  \
+       case OP_GE:                                                     \
+               match = (*addr >= val);                                 \
+               break;                                                  \
+       default:                                                        \
+               break;                                                  \
+       }                                                               \
+                                                                       \
+       return match;                                                   \
+}
+
+#define DEFINE_EQUALITY_PRED(size)                                     \
+static int filter_pred_##size(struct filter_pred *pred, void *event,   \
+                             int val1, int val2)                       \
+{                                                                      \
+       u##size *addr = (u##size *)(event + pred->offset);              \
+       u##size val = (u##size)pred->val;                               \
+       int match;                                                      \
+                                                                       \
+       match = (val == *addr) ^ pred->not;                             \
+                                                                       \
+       return match;                                                   \
+}
+
+extern struct mutex event_mutex;
+extern struct list_head ftrace_events;
 
 extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
-/*
- * The double __builtin_constant_p is because gcc will give us an error
- * if we try to allocate the static variable to fmt if it is not a
- * constant. Even with the outer if statement optimizing out.
- */
-#define event_trace_printk(ip, fmt, args...)                           \
-do {                                                                   \
-       __trace_printk_check_format(fmt, ##args);                       \
-       tracing_record_cmdline(current);                                \
-       if (__builtin_constant_p(fmt)) {                                \
-               static const char *trace_printk_fmt                     \
-                 __attribute__((section("__trace_printk_fmt"))) =      \
-                       __builtin_constant_p(fmt) ? fmt : NULL;         \
-                                                                       \
-               __trace_bprintk(ip, trace_printk_fmt, ##args);          \
-       } else                                                          \
-               __trace_printk(ip, fmt, ##args);                        \
-} while (0)
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)     \
+       extern struct ftrace_event_call event_##call;
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct, tpfmt)
+#include "trace_event_types.h"
 
 #endif /* _LINUX_KERNEL_TRACE_H */
index 7a30fc4..a29ef23 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
 #include <linux/kallsyms.h>
+#include <linux/time.h>
 
 #include "trace.h"
 #include "trace_output.h"
@@ -67,7 +68,7 @@ initcall_call_print_line(struct trace_iterator *iter)
        trace_assign_type(field, entry);
        call = &field->boot_call;
        ts = iter->ts;
-       nsec_rem = do_div(ts, 1000000000);
+       nsec_rem = do_div(ts, NSEC_PER_SEC);
 
        ret = trace_seq_printf(s, "[%5ld.%09ld] calling  %s @ %i\n",
                        (unsigned long)ts, nsec_rem, call->func, call->caller);
@@ -92,7 +93,7 @@ initcall_ret_print_line(struct trace_iterator *iter)
        trace_assign_type(field, entry);
        init_ret = &field->boot_ret;
        ts = iter->ts;
-       nsec_rem = do_div(ts, 1000000000);
+       nsec_rem = do_div(ts, NSEC_PER_SEC);
 
        ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
                        "returned %d after %llu msecs\n",
index 8333715..7a7a9fd 100644 (file)
@@ -30,6 +30,7 @@ static struct trace_array *branch_tracer;
 static void
 probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
 {
+       struct ftrace_event_call *call = &event_branch;
        struct trace_array *tr = branch_tracer;
        struct ring_buffer_event *event;
        struct trace_branch *entry;
@@ -73,7 +74,8 @@ probe_likely_condition(struct ftrace_branch_data *f, int val, int expect)
        entry->line = f->line;
        entry->correct = val == expect;
 
-       ring_buffer_unlock_commit(tr->buffer, event);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               ring_buffer_unlock_commit(tr->buffer, event);
 
  out:
        atomic_dec(&tr->data[cpu]->disabled);
@@ -271,7 +273,7 @@ static int branch_stat_show(struct seq_file *m, void *v)
        return 0;
 }
 
-static void *annotated_branch_stat_start(void)
+static void *annotated_branch_stat_start(struct tracer_stat *trace)
 {
        return __start_annotated_branch_profile;
 }
@@ -346,7 +348,7 @@ static int all_branch_stat_headers(struct seq_file *m)
        return 0;
 }
 
-static void *all_branch_stat_start(void)
+static void *all_branch_stat_start(struct tracer_stat *trace)
 {
        return __start_branch_profile;
 }
index 22cba99..5b5895a 100644 (file)
 int ftrace_profile_enable(int event_id)
 {
        struct ftrace_event_call *event;
+       int ret = -EINVAL;
 
-       for_each_event(event) {
-               if (event->id == event_id)
-                       return event->profile_enable(event);
+       mutex_lock(&event_mutex);
+       list_for_each_entry(event, &ftrace_events, list) {
+               if (event->id == event_id) {
+                       ret = event->profile_enable(event);
+                       break;
+               }
        }
+       mutex_unlock(&event_mutex);
 
-       return -EINVAL;
+       return ret;
 }
 
 void ftrace_profile_disable(int event_id)
 {
        struct ftrace_event_call *event;
 
-       for_each_event(event) {
-               if (event->id == event_id)
-                       return event->profile_disable(event);
+       mutex_lock(&event_mutex);
+       list_for_each_entry(event, &ftrace_events, list) {
+               if (event->id == event_id) {
+                       event->profile_disable(event);
+                       break;
+               }
        }
+       mutex_unlock(&event_mutex);
 }
-
index fd78bee..5e32e37 100644 (file)
@@ -57,7 +57,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
        TP_RAW_FMT("%u:%u:%u  ==+ %u:%u:%u [%03u]")
 );
 
-TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore,
+TRACE_EVENT_FORMAT_NOFILTER(special, TRACE_SPECIAL, special_entry, ignore,
        TRACE_STRUCT(
                TRACE_FIELD(unsigned long, arg1, arg1)
                TRACE_FIELD(unsigned long, arg2, arg2)
@@ -122,8 +122,10 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
 TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
        TRACE_STRUCT(
                TRACE_FIELD(unsigned int, line, line)
-               TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func)
-               TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file)
+               TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func,
+                                   TRACE_FUNC_SIZE+1, func)
+               TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file,
+                                   TRACE_FUNC_SIZE+1, file)
                TRACE_FIELD(char, correct, correct)
        ),
        TP_RAW_FMT("%u:%s:%s (%u)")
@@ -139,8 +141,8 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
 
 TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
        TRACE_STRUCT(
-               TRACE_FIELD(ktime_t, state_data.stamp, stamp)
-               TRACE_FIELD(ktime_t, state_data.end, end)
+               TRACE_FIELD_SIGN(ktime_t, state_data.stamp, stamp, 1)
+               TRACE_FIELD_SIGN(ktime_t, state_data.end, end, 1)
                TRACE_FIELD(int, state_data.type, type)
                TRACE_FIELD(int, state_data.state, state)
        ),
index 576f4fa..aa08be6 100644 (file)
@@ -8,19 +8,25 @@
  *
  */
 
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/delay.h>
 
 #include "trace_output.h"
 
 #define TRACE_SYSTEM "TRACE_SYSTEM"
 
-static DEFINE_MUTEX(event_mutex);
+DEFINE_MUTEX(event_mutex);
+
+LIST_HEAD(ftrace_events);
 
 int trace_define_field(struct ftrace_event_call *call, char *type,
-                      char *name, int offset, int size)
+                      char *name, int offset, int size, int is_signed)
 {
        struct ftrace_event_field *field;
 
@@ -38,6 +44,7 @@ int trace_define_field(struct ftrace_event_call *call, char *type,
 
        field->offset = offset;
        field->size = size;
+       field->is_signed = is_signed;
        list_add(&field->link, &call->fields);
 
        return 0;
@@ -51,47 +58,94 @@ err:
 
        return -ENOMEM;
 }
+EXPORT_SYMBOL_GPL(trace_define_field);
 
-static void ftrace_clear_events(void)
-{
-       struct ftrace_event_call *call = (void *)__start_ftrace_events;
-
+#ifdef CONFIG_MODULES
 
-       while ((unsigned long)call < (unsigned long)__stop_ftrace_events) {
+static void trace_destroy_fields(struct ftrace_event_call *call)
+{
+       struct ftrace_event_field *field, *next;
 
-               if (call->enabled) {
-                       call->enabled = 0;
-                       call->unregfunc();
-               }
-               call++;
+       list_for_each_entry_safe(field, next, &call->fields, link) {
+               list_del(&field->link);
+               kfree(field->type);
+               kfree(field->name);
+               kfree(field);
        }
 }
 
+#endif /* CONFIG_MODULES */
+
 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
                                        int enable)
 {
-
        switch (enable) {
        case 0:
                if (call->enabled) {
                        call->enabled = 0;
+                       tracing_stop_cmdline_record();
                        call->unregfunc();
                }
                break;
        case 1:
                if (!call->enabled) {
                        call->enabled = 1;
+                       tracing_start_cmdline_record();
                        call->regfunc();
                }
                break;
        }
 }
 
+static void ftrace_clear_events(void)
+{
+       struct ftrace_event_call *call;
+
+       mutex_lock(&event_mutex);
+       list_for_each_entry(call, &ftrace_events, list) {
+               ftrace_event_enable_disable(call, 0);
+       }
+       mutex_unlock(&event_mutex);
+}
+
+/*
+ * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
+ */
+static int __ftrace_set_clr_event(const char *match, const char *sub,
+                                 const char *event, int set)
+{
+       struct ftrace_event_call *call;
+       int ret = -EINVAL;
+
+       mutex_lock(&event_mutex);
+       list_for_each_entry(call, &ftrace_events, list) {
+
+               if (!call->name || !call->regfunc)
+                       continue;
+
+               if (match &&
+                   strcmp(match, call->name) != 0 &&
+                   strcmp(match, call->system) != 0)
+                       continue;
+
+               if (sub && strcmp(sub, call->system) != 0)
+                       continue;
+
+               if (event && strcmp(event, call->name) != 0)
+                       continue;
+
+               ftrace_event_enable_disable(call, set);
+
+               ret = 0;
+       }
+       mutex_unlock(&event_mutex);
+
+       return ret;
+}
+
 static int ftrace_set_clr_event(char *buf, int set)
 {
-       struct ftrace_event_call *call = __start_ftrace_events;
        char *event = NULL, *sub = NULL, *match;
-       int ret = -EINVAL;
 
        /*
         * The buf format can be <subsystem>:<event-name>
@@ -117,30 +171,24 @@ static int ftrace_set_clr_event(char *buf, int set)
                        event = NULL;
        }
 
-       mutex_lock(&event_mutex);
-       for_each_event(call) {
-
-               if (!call->name || !call->regfunc)
-                       continue;
-
-               if (match &&
-                   strcmp(match, call->name) != 0 &&
-                   strcmp(match, call->system) != 0)
-                       continue;
-
-               if (sub && strcmp(sub, call->system) != 0)
-                       continue;
-
-               if (event && strcmp(event, call->name) != 0)
-                       continue;
-
-               ftrace_event_enable_disable(call, set);
-
-               ret = 0;
-       }
-       mutex_unlock(&event_mutex);
+       return __ftrace_set_clr_event(match, sub, event, set);
+}
 
-       return ret;
+/**
+ * trace_set_clr_event - enable or disable an event
+ * @system: system name to match (NULL for any system)
+ * @event: event name to match (NULL for all events, within system)
+ * @set: 1 to enable, 0 to disable
+ *
+ * This is a way for other parts of the kernel to enable or disable
+ * event recording.
+ *
+ * Returns 0 on success, -EINVAL if the parameters do not match any
+ * registered events.
+ */
+int trace_set_clr_event(const char *system, const char *event, int set)
+{
+       return __ftrace_set_clr_event(NULL, system, event, set);
 }
 
 /* 128 should be much more than enough */
@@ -224,15 +272,17 @@ ftrace_event_write(struct file *file, const char __user *ubuf,
 static void *
 t_next(struct seq_file *m, void *v, loff_t *pos)
 {
-       struct ftrace_event_call *call = m->private;
-       struct ftrace_event_call *next = call;
+       struct list_head *list = m->private;
+       struct ftrace_event_call *call;
 
        (*pos)++;
 
        for (;;) {
-               if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+               if (list == &ftrace_events)
                        return NULL;
 
+               call = list_entry(list, struct ftrace_event_call, list);
+
                /*
                 * The ftrace subsystem is for showing formats only.
                 * They can not be enabled or disabled via the event files.
@@ -240,45 +290,51 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
                if (call->regfunc)
                        break;
 
-               call++;
-               next = call;
+               list = list->next;
        }
 
-       m->private = ++next;
+       m->private = list->next;
 
        return call;
 }
 
 static void *t_start(struct seq_file *m, loff_t *pos)
 {
+       mutex_lock(&event_mutex);
+       if (*pos == 0)
+               m->private = ftrace_events.next;
        return t_next(m, NULL, pos);
 }
 
 static void *
 s_next(struct seq_file *m, void *v, loff_t *pos)
 {
-       struct ftrace_event_call *call = m->private;
-       struct ftrace_event_call *next;
+       struct list_head *list = m->private;
+       struct ftrace_event_call *call;
 
        (*pos)++;
 
  retry:
-       if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
+       if (list == &ftrace_events)
                return NULL;
 
+       call = list_entry(list, struct ftrace_event_call, list);
+
        if (!call->enabled) {
-               call++;
+               list = list->next;
                goto retry;
        }
 
-       next = call;
-       m->private = ++next;
+       m->private = list->next;
 
        return call;
 }
 
 static void *s_start(struct seq_file *m, loff_t *pos)
 {
+       mutex_lock(&event_mutex);
+       if (*pos == 0)
+               m->private = ftrace_events.next;
        return s_next(m, NULL, pos);
 }
 
@@ -295,12 +351,12 @@ static int t_show(struct seq_file *m, void *v)
 
 static void t_stop(struct seq_file *m, void *p)
 {
+       mutex_unlock(&event_mutex);
 }
 
 static int
 ftrace_event_seq_open(struct inode *inode, struct file *file)
 {
-       int ret;
        const struct seq_operations *seq_ops;
 
        if ((file->f_mode & FMODE_WRITE) &&
@@ -308,13 +364,7 @@ ftrace_event_seq_open(struct inode *inode, struct file *file)
                ftrace_clear_events();
 
        seq_ops = inode->i_private;
-       ret = seq_open(file, seq_ops);
-       if (!ret) {
-               struct seq_file *m = file->private_data;
-
-               m->private = __start_ftrace_events;
-       }
-       return ret;
+       return seq_open(file, seq_ops);
 }
 
 static ssize_t
@@ -374,8 +424,93 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
        return cnt;
 }
 
+static ssize_t
+system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
+                  loff_t *ppos)
+{
+       const char set_to_char[4] = { '?', '0', '1', 'X' };
+       const char *system = filp->private_data;
+       struct ftrace_event_call *call;
+       char buf[2];
+       int set = 0;
+       int ret;
+
+       mutex_lock(&event_mutex);
+       list_for_each_entry(call, &ftrace_events, list) {
+               if (!call->name || !call->regfunc)
+                       continue;
+
+               if (system && strcmp(call->system, system) != 0)
+                       continue;
+
+               /*
+                * We need to find out if all the events are set
+                * or if all events or cleared, or if we have
+                * a mixture.
+                */
+               set |= (1 << !!call->enabled);
+
+               /*
+                * If we have a mixture, no need to look further.
+                */
+               if (set == 3)
+                       break;
+       }
+       mutex_unlock(&event_mutex);
+
+       buf[0] = set_to_char[set];
+       buf[1] = '\n';
+
+       ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
+
+       return ret;
+}
+
+static ssize_t
+system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
+                   loff_t *ppos)
+{
+       const char *system = filp->private_data;
+       unsigned long val;
+       char buf[64];
+       ssize_t ret;
+
+       if (cnt >= sizeof(buf))
+               return -EINVAL;
+
+       if (copy_from_user(&buf, ubuf, cnt))
+               return -EFAULT;
+
+       buf[cnt] = 0;
+
+       ret = strict_strtoul(buf, 10, &val);
+       if (ret < 0)
+               return ret;
+
+       ret = tracing_update_buffers();
+       if (ret < 0)
+               return ret;
+
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
+       ret = __ftrace_set_clr_event(NULL, system, NULL, val);
+       if (ret)
+               goto out;
+
+       ret = cnt;
+
+out:
+       *ppos += cnt;
+
+       return ret;
+}
+
+extern char *__bad_type_size(void);
+
 #undef FIELD
 #define FIELD(type, name)                                              \
+       sizeof(type) != sizeof(field.name) ? __bad_type_size() :        \
        #type, "common_" #name, offsetof(typeof(field), name),          \
                sizeof(field.name)
 
@@ -391,7 +526,7 @@ static int trace_write_header(struct trace_seq *s)
                                "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
                                "\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
                                "\n",
-                               FIELD(unsigned char, type),
+                               FIELD(unsigned short, type),
                                FIELD(unsigned char, flags),
                                FIELD(unsigned char, preempt_count),
                                FIELD(int, pid),
@@ -481,7 +616,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
        trace_seq_init(s);
 
-       filter_print_preds(call->preds, s);
+       print_event_filter(call, s);
        r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
        kfree(s);
@@ -494,38 +629,26 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
                   loff_t *ppos)
 {
        struct ftrace_event_call *call = filp->private_data;
-       char buf[64], *pbuf = buf;
-       struct filter_pred *pred;
+       char *buf;
        int err;
 
-       if (cnt >= sizeof(buf))
+       if (cnt >= PAGE_SIZE)
                return -EINVAL;
 
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-       buf[cnt] = '\0';
-
-       pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-       if (!pred)
+       buf = (char *)__get_free_page(GFP_TEMPORARY);
+       if (!buf)
                return -ENOMEM;
 
-       err = filter_parse(&pbuf, pred);
-       if (err < 0) {
-               filter_free_pred(pred);
-               return err;
-       }
-
-       if (pred->clear) {
-               filter_free_preds(call);
-               filter_free_pred(pred);
-               return cnt;
+       if (copy_from_user(buf, ubuf, cnt)) {
+               free_page((unsigned long) buf);
+               return -EFAULT;
        }
+       buf[cnt] = '\0';
 
-       err = filter_add_pred(call, pred);
-       if (err < 0) {
-               filter_free_pred(pred);
+       err = apply_event_filter(call, buf);
+       free_page((unsigned long) buf);
+       if (err < 0)
                return err;
-       }
 
        *ppos += cnt;
 
@@ -549,7 +672,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 
        trace_seq_init(s);
 
-       filter_print_preds(system->preds, s);
+       print_subsystem_event_filter(system, s);
        r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
 
        kfree(s);
@@ -562,45 +685,56 @@ subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
                       loff_t *ppos)
 {
        struct event_subsystem *system = filp->private_data;
-       char buf[64], *pbuf = buf;
-       struct filter_pred *pred;
+       char *buf;
        int err;
 
-       if (cnt >= sizeof(buf))
+       if (cnt >= PAGE_SIZE)
                return -EINVAL;
 
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-       buf[cnt] = '\0';
-
-       pred = kzalloc(sizeof(*pred), GFP_KERNEL);
-       if (!pred)
+       buf = (char *)__get_free_page(GFP_TEMPORARY);
+       if (!buf)
                return -ENOMEM;
 
-       err = filter_parse(&pbuf, pred);
-       if (err < 0) {
-               filter_free_pred(pred);
-               return err;
-       }
-
-       if (pred->clear) {
-               filter_free_subsystem_preds(system);
-               filter_free_pred(pred);
-               return cnt;
+       if (copy_from_user(buf, ubuf, cnt)) {
+               free_page((unsigned long) buf);
+               return -EFAULT;
        }
+       buf[cnt] = '\0';
 
-       err = filter_add_subsystem_pred(system, pred);
-       if (err < 0) {
-               filter_free_subsystem_preds(system);
-               filter_free_pred(pred);
+       err = apply_subsystem_event_filter(system, buf);
+       free_page((unsigned long) buf);
+       if (err < 0)
                return err;
-       }
 
        *ppos += cnt;
 
        return cnt;
 }
 
+static ssize_t
+show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
+{
+       int (*func)(struct trace_seq *s) = filp->private_data;
+       struct trace_seq *s;
+       int r;
+
+       if (*ppos)
+               return 0;
+
+       s = kmalloc(sizeof(*s), GFP_KERNEL);
+       if (!s)
+               return -ENOMEM;
+
+       trace_seq_init(s);
+
+       func(s);
+       r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+
+       kfree(s);
+
+       return r;
+}
+
 static const struct seq_operations show_event_seq_ops = {
        .start = t_start,
        .next = t_next,
@@ -658,6 +792,17 @@ static const struct file_operations ftrace_subsystem_filter_fops = {
        .write = subsystem_filter_write,
 };
 
+static const struct file_operations ftrace_system_enable_fops = {
+       .open = tracing_open_generic,
+       .read = system_enable_read,
+       .write = system_enable_write,
+};
+
+static const struct file_operations ftrace_show_header_fops = {
+       .open = tracing_open_generic,
+       .read = show_header,
+};
+
 static struct dentry *event_trace_events_dir(void)
 {
        static struct dentry *d_tracer;
@@ -684,6 +829,7 @@ static struct dentry *
 event_subsystem_dir(const char *name, struct dentry *d_events)
 {
        struct event_subsystem *system;
+       struct dentry *entry;
 
        /* First see if we did not already create this dir */
        list_for_each_entry(system, &event_subsystems, list) {
@@ -707,16 +853,46 @@ event_subsystem_dir(const char *name, struct dentry *d_events)
                return d_events;
        }
 
-       system->name = name;
+       system->name = kstrdup(name, GFP_KERNEL);
+       if (!system->name) {
+               debugfs_remove(system->entry);
+               kfree(system);
+               return d_events;
+       }
+
        list_add(&system->list, &event_subsystems);
 
-       system->preds = NULL;
+       system->filter = NULL;
+
+       system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
+       if (!system->filter) {
+               pr_warning("Could not allocate filter for subsystem "
+                          "'%s'\n", name);
+               return system->entry;
+       }
+
+       entry = debugfs_create_file("filter", 0644, system->entry, system,
+                                   &ftrace_subsystem_filter_fops);
+       if (!entry) {
+               kfree(system->filter);
+               system->filter = NULL;
+               pr_warning("Could not create debugfs "
+                          "'%s/filter' entry\n", name);
+       }
+
+       entry = trace_create_file("enable", 0644, system->entry,
+                                 (void *)system->name,
+                                 &ftrace_system_enable_fops);
 
        return system->entry;
 }
 
 static int
-event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
+event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
+                const struct file_operations *id,
+                const struct file_operations *enable,
+                const struct file_operations *filter,
+                const struct file_operations *format)
 {
        struct dentry *entry;
        int ret;
@@ -725,7 +901,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
         * If the trace point header did not define TRACE_SYSTEM
         * then the system would be called "TRACE_SYSTEM".
         */
-       if (strcmp(call->system, "TRACE_SYSTEM") != 0)
+       if (strcmp(call->system, TRACE_SYSTEM) != 0)
                d_events = event_subsystem_dir(call->system, d_events);
 
        if (call->raw_init) {
@@ -744,21 +920,13 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
                return -1;
        }
 
-       if (call->regfunc) {
-               entry = debugfs_create_file("enable", 0644, call->dir, call,
-                                           &ftrace_enable_fops);
-               if (!entry)
-                       pr_warning("Could not create debugfs "
-                                  "'%s/enable' entry\n", call->name);
-       }
+       if (call->regfunc)
+               entry = trace_create_file("enable", 0644, call->dir, call,
+                                         enable);
 
-       if (call->id) {
-               entry = debugfs_create_file("id", 0444, call->dir, call,
-                               &ftrace_event_id_fops);
-               if (!entry)
-                       pr_warning("Could not create debugfs '%s/id' entry\n",
-                                       call->name);
-       }
+       if (call->id)
+               entry = trace_create_file("id", 0444, call->dir, call,
+                                         id);
 
        if (call->define_fields) {
                ret = call->define_fields();
@@ -767,32 +935,195 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
                                   " events/%s\n", call->name);
                        return ret;
                }
-               entry = debugfs_create_file("filter", 0644, call->dir, call,
-                                           &ftrace_event_filter_fops);
-               if (!entry)
-                       pr_warning("Could not create debugfs "
-                                  "'%s/filter' entry\n", call->name);
+               entry = trace_create_file("filter", 0644, call->dir, call,
+                                         filter);
        }
 
        /* A trace may not want to export its format */
        if (!call->show_format)
                return 0;
 
-       entry = debugfs_create_file("format", 0444, call->dir, call,
-                                   &ftrace_event_format_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'%s/format' entry\n", call->name);
+       entry = trace_create_file("format", 0444, call->dir, call,
+                                 format);
+
+       return 0;
+}
+
+#define for_each_event(event, start, end)                      \
+       for (event = start;                                     \
+            (unsigned long)event < (unsigned long)end;         \
+            event++)
+
+#ifdef CONFIG_MODULES
+
+static LIST_HEAD(ftrace_module_file_list);
+
+/*
+ * Modules must own their file_operations to keep up with
+ * reference counting.
+ */
+struct ftrace_module_file_ops {
+       struct list_head                list;
+       struct module                   *mod;
+       struct file_operations          id;
+       struct file_operations          enable;
+       struct file_operations          format;
+       struct file_operations          filter;
+};
+
+static struct ftrace_module_file_ops *
+trace_create_file_ops(struct module *mod)
+{
+       struct ftrace_module_file_ops *file_ops;
+
+       /*
+        * This is a bit of a PITA. To allow for correct reference
+        * counting, modules must "own" their file_operations.
+        * To do this, we allocate the file operations that will be
+        * used in the event directory.
+        */
+
+       file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
+       if (!file_ops)
+               return NULL;
+
+       file_ops->mod = mod;
+
+       file_ops->id = ftrace_event_id_fops;
+       file_ops->id.owner = mod;
+
+       file_ops->enable = ftrace_enable_fops;
+       file_ops->enable.owner = mod;
+
+       file_ops->filter = ftrace_event_filter_fops;
+       file_ops->filter.owner = mod;
+
+       file_ops->format = ftrace_event_format_fops;
+       file_ops->format.owner = mod;
+
+       list_add(&file_ops->list, &ftrace_module_file_list);
+
+       return file_ops;
+}
+
+static void trace_module_add_events(struct module *mod)
+{
+       struct ftrace_module_file_ops *file_ops = NULL;
+       struct ftrace_event_call *call, *start, *end;
+       struct dentry *d_events;
+
+       start = mod->trace_events;
+       end = mod->trace_events + mod->num_trace_events;
+
+       if (start == end)
+               return;
+
+       d_events = event_trace_events_dir();
+       if (!d_events)
+               return;
+
+       for_each_event(call, start, end) {
+               /* The linker may leave blanks */
+               if (!call->name)
+                       continue;
+
+               /*
+                * This module has events, create file ops for this module
+                * if not already done.
+                */
+               if (!file_ops) {
+                       file_ops = trace_create_file_ops(mod);
+                       if (!file_ops)
+                               return;
+               }
+               call->mod = mod;
+               list_add(&call->list, &ftrace_events);
+               event_create_dir(call, d_events,
+                                &file_ops->id, &file_ops->enable,
+                                &file_ops->filter, &file_ops->format);
+       }
+}
+
+static void trace_module_remove_events(struct module *mod)
+{
+       struct ftrace_module_file_ops *file_ops;
+       struct ftrace_event_call *call, *p;
+       bool found = false;
+
+       down_write(&trace_event_mutex);
+       list_for_each_entry_safe(call, p, &ftrace_events, list) {
+               if (call->mod == mod) {
+                       found = true;
+                       ftrace_event_enable_disable(call, 0);
+                       if (call->event)
+                               __unregister_ftrace_event(call->event);
+                       debugfs_remove_recursive(call->dir);
+                       list_del(&call->list);
+                       trace_destroy_fields(call);
+                       destroy_preds(call);
+               }
+       }
+
+       /* Now free the file_operations */
+       list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
+               if (file_ops->mod == mod)
+                       break;
+       }
+       if (&file_ops->list != &ftrace_module_file_list) {
+               list_del(&file_ops->list);
+               kfree(file_ops);
+       }
+
+       /*
+        * It is safest to reset the ring buffer if the module being unloaded
+        * registered any events.
+        */
+       if (found)
+               tracing_reset_current_online_cpus();
+       up_write(&trace_event_mutex);
+}
+
+static int trace_module_notify(struct notifier_block *self,
+                              unsigned long val, void *data)
+{
+       struct module *mod = data;
+
+       mutex_lock(&event_mutex);
+       switch (val) {
+       case MODULE_STATE_COMING:
+               trace_module_add_events(mod);
+               break;
+       case MODULE_STATE_GOING:
+               trace_module_remove_events(mod);
+               break;
+       }
+       mutex_unlock(&event_mutex);
 
        return 0;
 }
+#else
+static int trace_module_notify(struct notifier_block *self,
+                              unsigned long val, void *data)
+{
+       return 0;
+}
+#endif /* CONFIG_MODULES */
+
+struct notifier_block trace_module_nb = {
+       .notifier_call = trace_module_notify,
+       .priority = 0,
+};
+
+extern struct ftrace_event_call __start_ftrace_events[];
+extern struct ftrace_event_call __stop_ftrace_events[];
 
 static __init int event_trace_init(void)
 {
-       struct ftrace_event_call *call = __start_ftrace_events;
+       struct ftrace_event_call *call;
        struct dentry *d_tracer;
        struct dentry *entry;
        struct dentry *d_events;
+       int ret;
 
        d_tracer = tracing_init_dentry();
        if (!d_tracer)
@@ -816,13 +1147,243 @@ static __init int event_trace_init(void)
        if (!d_events)
                return 0;
 
-       for_each_event(call) {
+       /* ring buffer internal formats */
+       trace_create_file("header_page", 0444, d_events,
+                         ring_buffer_print_page_header,
+                         &ftrace_show_header_fops);
+
+       trace_create_file("header_event", 0444, d_events,
+                         ring_buffer_print_entry_header,
+                         &ftrace_show_header_fops);
+
+       trace_create_file("enable", 0644, d_events,
+                         NULL, &ftrace_system_enable_fops);
+
+       for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
                /* The linker may leave blanks */
                if (!call->name)
                        continue;
-               event_create_dir(call, d_events);
+               list_add(&call->list, &ftrace_events);
+               event_create_dir(call, d_events, &ftrace_event_id_fops,
+                                &ftrace_enable_fops, &ftrace_event_filter_fops,
+                                &ftrace_event_format_fops);
        }
 
+       ret = register_module_notifier(&trace_module_nb);
+       if (ret)
+               pr_warning("Failed to register trace events module notifier\n");
+
        return 0;
 }
 fs_initcall(event_trace_init);
+
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+
+static DEFINE_SPINLOCK(test_spinlock);
+static DEFINE_SPINLOCK(test_spinlock_irq);
+static DEFINE_MUTEX(test_mutex);
+
+static __init void test_work(struct work_struct *dummy)
+{
+       spin_lock(&test_spinlock);
+       spin_lock_irq(&test_spinlock_irq);
+       udelay(1);
+       spin_unlock_irq(&test_spinlock_irq);
+       spin_unlock(&test_spinlock);
+
+       mutex_lock(&test_mutex);
+       msleep(1);
+       mutex_unlock(&test_mutex);
+}
+
+static __init int event_test_thread(void *unused)
+{
+       void *test_malloc;
+
+       test_malloc = kmalloc(1234, GFP_KERNEL);
+       if (!test_malloc)
+               pr_info("failed to kmalloc\n");
+
+       schedule_on_each_cpu(test_work);
+
+       kfree(test_malloc);
+
+       set_current_state(TASK_INTERRUPTIBLE);
+       while (!kthread_should_stop())
+               schedule();
+
+       return 0;
+}
+
+/*
+ * Do various things that may trigger events.
+ */
+static __init void event_test_stuff(void)
+{
+       struct task_struct *test_thread;
+
+       test_thread = kthread_run(event_test_thread, NULL, "test-events");
+       msleep(1);
+       kthread_stop(test_thread);
+}
+
+/*
+ * For every trace event defined, we will test each trace point separately,
+ * and then by groups, and finally all trace points.
+ */
+static __init void event_trace_self_tests(void)
+{
+       struct ftrace_event_call *call;
+       struct event_subsystem *system;
+       int ret;
+
+       pr_info("Running tests on trace events:\n");
+
+       list_for_each_entry(call, &ftrace_events, list) {
+
+               /* Only test those that have a regfunc */
+               if (!call->regfunc)
+                       continue;
+
+               pr_info("Testing event %s: ", call->name);
+
+               /*
+                * If an event is already enabled, someone is using
+                * it and the self test should not be on.
+                */
+               if (call->enabled) {
+                       pr_warning("Enabled event during self test!\n");
+                       WARN_ON_ONCE(1);
+                       continue;
+               }
+
+               ftrace_event_enable_disable(call, 1);
+               event_test_stuff();
+               ftrace_event_enable_disable(call, 0);
+
+               pr_cont("OK\n");
+       }
+
+       /* Now test at the sub system level */
+
+       pr_info("Running tests on trace event systems:\n");
+
+       list_for_each_entry(system, &event_subsystems, list) {
+
+               /* the ftrace system is special, skip it */
+               if (strcmp(system->name, "ftrace") == 0)
+                       continue;
+
+               pr_info("Testing event system %s: ", system->name);
+
+               ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
+               if (WARN_ON_ONCE(ret)) {
+                       pr_warning("error enabling system %s\n",
+                                  system->name);
+                       continue;
+               }
+
+               event_test_stuff();
+
+               ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
+               if (WARN_ON_ONCE(ret))
+                       pr_warning("error disabling system %s\n",
+                                  system->name);
+
+               pr_cont("OK\n");
+       }
+
+       /* Test with all events enabled */
+
+       pr_info("Running tests on all trace events:\n");
+       pr_info("Testing all events: ");
+
+       ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
+       if (WARN_ON_ONCE(ret)) {
+               pr_warning("error enabling all events\n");
+               return;
+       }
+
+       event_test_stuff();
+
+       /* reset sysname */
+       ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
+       if (WARN_ON_ONCE(ret)) {
+               pr_warning("error disabling all events\n");
+               return;
+       }
+
+       pr_cont("OK\n");
+}
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+static DEFINE_PER_CPU(atomic_t, test_event_disable);
+
+static void
+function_test_events_call(unsigned long ip, unsigned long parent_ip)
+{
+       struct ring_buffer_event *event;
+       struct ftrace_entry *entry;
+       unsigned long flags;
+       long disabled;
+       int resched;
+       int cpu;
+       int pc;
+
+       pc = preempt_count();
+       resched = ftrace_preempt_disable();
+       cpu = raw_smp_processor_id();
+       disabled = atomic_inc_return(&per_cpu(test_event_disable, cpu));
+
+       if (disabled != 1)
+               goto out;
+
+       local_save_flags(flags);
+
+       event = trace_current_buffer_lock_reserve(TRACE_FN, sizeof(*entry),
+                                                 flags, pc);
+       if (!event)
+               goto out;
+       entry   = ring_buffer_event_data(event);
+       entry->ip                       = ip;
+       entry->parent_ip                = parent_ip;
+
+       trace_nowake_buffer_unlock_commit(event, flags, pc);
+
+ out:
+       atomic_dec(&per_cpu(test_event_disable, cpu));
+       ftrace_preempt_enable(resched);
+}
+
+static struct ftrace_ops trace_ops __initdata  =
+{
+       .func = function_test_events_call,
+};
+
+static __init void event_trace_self_test_with_function(void)
+{
+       register_ftrace_function(&trace_ops);
+       pr_info("Running tests again, along with the function tracer\n");
+       event_trace_self_tests();
+       unregister_ftrace_function(&trace_ops);
+}
+#else
+static __init void event_trace_self_test_with_function(void)
+{
+}
+#endif
+
+static __init int event_trace_self_tests_init(void)
+{
+
+       event_trace_self_tests();
+
+       event_trace_self_test_with_function();
+
+       return 0;
+}
+
+late_initcall(event_trace_self_tests_init);
+
+#endif
index e03cbf1..db6e54b 100644 (file)
 #include <linux/uaccess.h>
 #include <linux/module.h>
 #include <linux/ctype.h>
+#include <linux/mutex.h>
 
 #include "trace.h"
 #include "trace_output.h"
 
-static int filter_pred_64(struct filter_pred *pred, void *event)
+static DEFINE_MUTEX(filter_mutex);
+
+enum filter_op_ids
+{
+       OP_OR,
+       OP_AND,
+       OP_NE,
+       OP_EQ,
+       OP_LT,
+       OP_LE,
+       OP_GT,
+       OP_GE,
+       OP_NONE,
+       OP_OPEN_PAREN,
+};
+
+struct filter_op {
+       int id;
+       char *string;
+       int precedence;
+};
+
+static struct filter_op filter_ops[] = {
+       { OP_OR, "||", 1 },
+       { OP_AND, "&&", 2 },
+       { OP_NE, "!=", 4 },
+       { OP_EQ, "==", 4 },
+       { OP_LT, "<", 5 },
+       { OP_LE, "<=", 5 },
+       { OP_GT, ">", 5 },
+       { OP_GE, ">=", 5 },
+       { OP_NONE, "OP_NONE", 0 },
+       { OP_OPEN_PAREN, "(", 0 },
+};
+
+enum {
+       FILT_ERR_NONE,
+       FILT_ERR_INVALID_OP,
+       FILT_ERR_UNBALANCED_PAREN,
+       FILT_ERR_TOO_MANY_OPERANDS,
+       FILT_ERR_OPERAND_TOO_LONG,
+       FILT_ERR_FIELD_NOT_FOUND,
+       FILT_ERR_ILLEGAL_FIELD_OP,
+       FILT_ERR_ILLEGAL_INTVAL,
+       FILT_ERR_BAD_SUBSYS_FILTER,
+       FILT_ERR_TOO_MANY_PREDS,
+       FILT_ERR_MISSING_FIELD,
+       FILT_ERR_INVALID_FILTER,
+};
+
+static char *err_text[] = {
+       "No error",
+       "Invalid operator",
+       "Unbalanced parens",
+       "Too many operands",
+       "Operand too long",
+       "Field not found",
+       "Illegal operation for field type",
+       "Illegal integer value",
+       "Couldn't find or set field in one of a subsystem's events",
+       "Too many terms in predicate expression",
+       "Missing field name and/or value",
+       "Meaningless filter expression",
+};
+
+struct opstack_op {
+       int op;
+       struct list_head list;
+};
+
+struct postfix_elt {
+       int op;
+       char *operand;
+       struct list_head list;
+};
+
+struct filter_parse_state {
+       struct filter_op *ops;
+       struct list_head opstack;
+       struct list_head postfix;
+       int lasterr;
+       int lasterr_pos;
+
+       struct {
+               char *string;
+               unsigned int cnt;
+               unsigned int tail;
+       } infix;
+
+       struct {
+               char string[MAX_FILTER_STR_VAL];
+               int pos;
+               unsigned int tail;
+       } operand;
+};
+
+DEFINE_COMPARISON_PRED(s64);
+DEFINE_COMPARISON_PRED(u64);
+DEFINE_COMPARISON_PRED(s32);
+DEFINE_COMPARISON_PRED(u32);
+DEFINE_COMPARISON_PRED(s16);
+DEFINE_COMPARISON_PRED(u16);
+DEFINE_COMPARISON_PRED(s8);
+DEFINE_COMPARISON_PRED(u8);
+
+DEFINE_EQUALITY_PRED(64);
+DEFINE_EQUALITY_PRED(32);
+DEFINE_EQUALITY_PRED(16);
+DEFINE_EQUALITY_PRED(8);
+
+static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
+                          void *event __attribute((unused)),
+                          int val1, int val2)
+{
+       return val1 && val2;
+}
+
+static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
+                         void *event __attribute((unused)),
+                         int val1, int val2)
+{
+       return val1 || val2;
+}
+
+/* Filter predicate for fixed sized arrays of characters */
+static int filter_pred_string(struct filter_pred *pred, void *event,
+                             int val1, int val2)
 {
-       u64 *addr = (u64 *)(event + pred->offset);
-       u64 val = (u64)pred->val;
-       int match;
+       char *addr = (char *)(event + pred->offset);
+       int cmp, match;
+
+       cmp = strncmp(addr, pred->str_val, pred->str_len);
 
-       match = (val == *addr) ^ pred->not;
+       match = (!cmp) ^ pred->not;
 
        return match;
 }
 
-static int filter_pred_32(struct filter_pred *pred, void *event)
+/*
+ * Filter predicate for dynamic sized arrays of characters.
+ * These are implemented through a list of strings at the end
+ * of the entry.
+ * Also each of these strings have a field in the entry which
+ * contains its offset from the beginning of the entry.
+ * We have then first to get this field, dereference it
+ * and add it to the address of the entry, and at last we have
+ * the address of the string.
+ */
+static int filter_pred_strloc(struct filter_pred *pred, void *event,
+                             int val1, int val2)
 {
-       u32 *addr = (u32 *)(event + pred->offset);
-       u32 val = (u32)pred->val;
-       int match;
+       int str_loc = *(int *)(event + pred->offset);
+       char *addr = (char *)(event + str_loc);
+       int cmp, match;
+
+       cmp = strncmp(addr, pred->str_val, pred->str_len);
 
-       match = (val == *addr) ^ pred->not;
+       match = (!cmp) ^ pred->not;
 
        return match;
 }
 
-static int filter_pred_16(struct filter_pred *pred, void *event)
+static int filter_pred_none(struct filter_pred *pred, void *event,
+                           int val1, int val2)
+{
+       return 0;
+}
+
+/* return 1 if event matches, 0 otherwise (discard) */
+int filter_match_preds(struct ftrace_event_call *call, void *rec)
 {
-       u16 *addr = (u16 *)(event + pred->offset);
-       u16 val = (u16)pred->val;
-       int match;
+       struct event_filter *filter = call->filter;
+       int match, top = 0, val1 = 0, val2 = 0;
+       int stack[MAX_FILTER_PRED];
+       struct filter_pred *pred;
+       int i;
+
+       for (i = 0; i < filter->n_preds; i++) {
+               pred = filter->preds[i];
+               if (!pred->pop_n) {
+                       match = pred->fn(pred, rec, val1, val2);
+                       stack[top++] = match;
+                       continue;
+               }
+               if (pred->pop_n > top) {
+                       WARN_ON_ONCE(1);
+                       return 0;
+               }
+               val1 = stack[--top];
+               val2 = stack[--top];
+               match = pred->fn(pred, rec, val1, val2);
+               stack[top++] = match;
+       }
 
-       match = (val == *addr) ^ pred->not;
+       return stack[--top];
+}
+EXPORT_SYMBOL_GPL(filter_match_preds);
 
-       return match;
+static void parse_error(struct filter_parse_state *ps, int err, int pos)
+{
+       ps->lasterr = err;
+       ps->lasterr_pos = pos;
 }
 
-static int filter_pred_8(struct filter_pred *pred, void *event)
+static void remove_filter_string(struct event_filter *filter)
 {
-       u8 *addr = (u8 *)(event + pred->offset);
-       u8 val = (u8)pred->val;
-       int match;
+       kfree(filter->filter_string);
+       filter->filter_string = NULL;
+}
 
-       match = (val == *addr) ^ pred->not;
+static int replace_filter_string(struct event_filter *filter,
+                                char *filter_string)
+{
+       kfree(filter->filter_string);
+       filter->filter_string = kstrdup(filter_string, GFP_KERNEL);
+       if (!filter->filter_string)
+               return -ENOMEM;
 
-       return match;
+       return 0;
 }
 
-static int filter_pred_string(struct filter_pred *pred, void *event)
+static int append_filter_string(struct event_filter *filter,
+                               char *string)
 {
-       char *addr = (char *)(event + pred->offset);
-       int cmp, match;
+       int newlen;
+       char *new_filter_string;
 
-       cmp = strncmp(addr, pred->str_val, pred->str_len);
+       BUG_ON(!filter->filter_string);
+       newlen = strlen(filter->filter_string) + strlen(string) + 1;
+       new_filter_string = kmalloc(newlen, GFP_KERNEL);
+       if (!new_filter_string)
+               return -ENOMEM;
 
-       match = (!cmp) ^ pred->not;
+       strcpy(new_filter_string, filter->filter_string);
+       strcat(new_filter_string, string);
+       kfree(filter->filter_string);
+       filter->filter_string = new_filter_string;
 
-       return match;
+       return 0;
 }
 
-/* return 1 if event matches, 0 otherwise (discard) */
-int filter_match_preds(struct ftrace_event_call *call, void *rec)
+static void append_filter_err(struct filter_parse_state *ps,
+                             struct event_filter *filter)
 {
-       int i, matched, and_failed = 0;
-       struct filter_pred *pred;
+       int pos = ps->lasterr_pos;
+       char *buf, *pbuf;
 
-       for (i = 0; i < MAX_FILTER_PRED; i++) {
-               if (call->preds[i]) {
-                       pred = call->preds[i];
-                       if (and_failed && !pred->or)
-                               continue;
-                       matched = pred->fn(pred, rec);
-                       if (!matched && !pred->or) {
-                               and_failed = 1;
-                               continue;
-                       } else if (matched && pred->or)
-                               return 1;
-               } else
-                       break;
-       }
+       buf = (char *)__get_free_page(GFP_TEMPORARY);
+       if (!buf)
+               return;
 
-       if (and_failed)
-               return 0;
+       append_filter_string(filter, "\n");
+       memset(buf, ' ', PAGE_SIZE);
+       if (pos > PAGE_SIZE - 128)
+               pos = 0;
+       buf[pos] = '^';
+       pbuf = &buf[pos] + 1;
 
-       return 1;
+       sprintf(pbuf, "\nparse_error: %s\n", err_text[ps->lasterr]);
+       append_filter_string(filter, buf);
+       free_page((unsigned long) buf);
 }
 
-void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
+void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
 {
-       char *field_name;
-       struct filter_pred *pred;
-       int i;
+       struct event_filter *filter = call->filter;
 
-       if (!preds) {
+       mutex_lock(&filter_mutex);
+       if (filter->filter_string)
+               trace_seq_printf(s, "%s\n", filter->filter_string);
+       else
                trace_seq_printf(s, "none\n");
-               return;
-       }
+       mutex_unlock(&filter_mutex);
+}
 
-       for (i = 0; i < MAX_FILTER_PRED; i++) {
-               if (preds[i]) {
-                       pred = preds[i];
-                       field_name = pred->field_name;
-                       if (i)
-                               trace_seq_printf(s, pred->or ? "|| " : "&& ");
-                       trace_seq_printf(s, "%s ", field_name);
-                       trace_seq_printf(s, pred->not ? "!= " : "== ");
-                       if (pred->str_val)
-                               trace_seq_printf(s, "%s\n", pred->str_val);
-                       else
-                               trace_seq_printf(s, "%llu\n", pred->val);
-               } else
-                       break;
-       }
+void print_subsystem_event_filter(struct event_subsystem *system,
+                                 struct trace_seq *s)
+{
+       struct event_filter *filter = system->filter;
+
+       mutex_lock(&filter_mutex);
+       if (filter->filter_string)
+               trace_seq_printf(s, "%s\n", filter->filter_string);
+       else
+               trace_seq_printf(s, "none\n");
+       mutex_unlock(&filter_mutex);
 }
 
 static struct ftrace_event_field *
@@ -150,284 +328,828 @@ find_event_field(struct ftrace_event_call *call, char *name)
        return NULL;
 }
 
-void filter_free_pred(struct filter_pred *pred)
+static void filter_free_pred(struct filter_pred *pred)
 {
        if (!pred)
                return;
 
        kfree(pred->field_name);
-       kfree(pred->str_val);
        kfree(pred);
 }
 
-void filter_free_preds(struct ftrace_event_call *call)
+static void filter_clear_pred(struct filter_pred *pred)
 {
-       int i;
+       kfree(pred->field_name);
+       pred->field_name = NULL;
+       pred->str_len = 0;
+}
 
-       if (call->preds) {
-               for (i = 0; i < MAX_FILTER_PRED; i++)
-                       filter_free_pred(call->preds[i]);
-               kfree(call->preds);
-               call->preds = NULL;
+static int filter_set_pred(struct filter_pred *dest,
+                          struct filter_pred *src,
+                          filter_pred_fn_t fn)
+{
+       *dest = *src;
+       if (src->field_name) {
+               dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
+               if (!dest->field_name)
+                       return -ENOMEM;
        }
+       dest->fn = fn;
+
+       return 0;
 }
 
-void filter_free_subsystem_preds(struct event_subsystem *system)
+static void filter_disable_preds(struct ftrace_event_call *call)
 {
-       struct ftrace_event_call *call = __start_ftrace_events;
+       struct event_filter *filter = call->filter;
        int i;
 
-       if (system->preds) {
-               for (i = 0; i < MAX_FILTER_PRED; i++)
-                       filter_free_pred(system->preds[i]);
-               kfree(system->preds);
-               system->preds = NULL;
-       }
+       call->filter_active = 0;
+       filter->n_preds = 0;
 
-       events_for_each(call) {
-               if (!call->name || !call->regfunc)
-                       continue;
+       for (i = 0; i < MAX_FILTER_PRED; i++)
+               filter->preds[i]->fn = filter_pred_none;
+}
+
+void destroy_preds(struct ftrace_event_call *call)
+{
+       struct event_filter *filter = call->filter;
+       int i;
 
-               if (!strcmp(call->system, system->name))
-                       filter_free_preds(call);
+       for (i = 0; i < MAX_FILTER_PRED; i++) {
+               if (filter->preds[i])
+                       filter_free_pred(filter->preds[i]);
        }
+       kfree(filter->preds);
+       kfree(filter);
+       call->filter = NULL;
 }
 
-static int __filter_add_pred(struct ftrace_event_call *call,
-                            struct filter_pred *pred)
+int init_preds(struct ftrace_event_call *call)
 {
+       struct event_filter *filter;
+       struct filter_pred *pred;
        int i;
 
-       if (call->preds && !pred->compound)
-               filter_free_preds(call);
+       filter = call->filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+       if (!call->filter)
+               return -ENOMEM;
 
-       if (!call->preds) {
-               call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
-                                     GFP_KERNEL);
-               if (!call->preds)
-                       return -ENOMEM;
-       }
+       call->filter_active = 0;
+       filter->n_preds = 0;
+
+       filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
+       if (!filter->preds)
+               goto oom;
 
        for (i = 0; i < MAX_FILTER_PRED; i++) {
-               if (!call->preds[i]) {
-                       call->preds[i] = pred;
-                       return 0;
+               pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+               if (!pred)
+                       goto oom;
+               pred->fn = filter_pred_none;
+               filter->preds[i] = pred;
+       }
+
+       return 0;
+
+oom:
+       destroy_preds(call);
+
+       return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(init_preds);
+
+static void filter_free_subsystem_preds(struct event_subsystem *system)
+{
+       struct event_filter *filter = system->filter;
+       struct ftrace_event_call *call;
+       int i;
+
+       if (filter->n_preds) {
+               for (i = 0; i < filter->n_preds; i++)
+                       filter_free_pred(filter->preds[i]);
+               kfree(filter->preds);
+               filter->preds = NULL;
+               filter->n_preds = 0;
+       }
+
+       mutex_lock(&event_mutex);
+       list_for_each_entry(call, &ftrace_events, list) {
+               if (!call->define_fields)
+                       continue;
+
+               if (!strcmp(call->system, system->name)) {
+                       filter_disable_preds(call);
+                       remove_filter_string(call->filter);
                }
        }
+       mutex_unlock(&event_mutex);
+}
+
+static int filter_add_pred_fn(struct filter_parse_state *ps,
+                             struct ftrace_event_call *call,
+                             struct filter_pred *pred,
+                             filter_pred_fn_t fn)
+{
+       struct event_filter *filter = call->filter;
+       int idx, err;
+
+       if (filter->n_preds == MAX_FILTER_PRED) {
+               parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
+               return -ENOSPC;
+       }
+
+       idx = filter->n_preds;
+       filter_clear_pred(filter->preds[idx]);
+       err = filter_set_pred(filter->preds[idx], pred, fn);
+       if (err)
+               return err;
 
-       return -ENOSPC;
+       filter->n_preds++;
+       call->filter_active = 1;
+
+       return 0;
 }
 
+enum {
+       FILTER_STATIC_STRING = 1,
+       FILTER_DYN_STRING
+};
+
 static int is_string_field(const char *type)
 {
+       if (strstr(type, "__data_loc") && strstr(type, "char"))
+               return FILTER_DYN_STRING;
+
        if (strchr(type, '[') && strstr(type, "char"))
-               return 1;
+               return FILTER_STATIC_STRING;
 
        return 0;
 }
 
-int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
+static int is_legal_op(struct ftrace_event_field *field, int op)
 {
-       struct ftrace_event_field *field;
-
-       field = find_event_field(call, pred->field_name);
-       if (!field)
-               return -EINVAL;
+       if (is_string_field(field->type) && (op != OP_EQ && op != OP_NE))
+               return 0;
 
-       pred->offset = field->offset;
+       return 1;
+}
 
-       if (is_string_field(field->type)) {
-               if (!pred->str_val)
-                       return -EINVAL;
-               pred->fn = filter_pred_string;
-               pred->str_len = field->size;
-               return __filter_add_pred(call, pred);
-       } else {
-               if (pred->str_val)
-                       return -EINVAL;
-       }
+static filter_pred_fn_t select_comparison_fn(int op, int field_size,
+                                            int field_is_signed)
+{
+       filter_pred_fn_t fn = NULL;
 
-       switch (field->size) {
+       switch (field_size) {
        case 8:
-               pred->fn = filter_pred_64;
+               if (op == OP_EQ || op == OP_NE)
+                       fn = filter_pred_64;
+               else if (field_is_signed)
+                       fn = filter_pred_s64;
+               else
+                       fn = filter_pred_u64;
                break;
        case 4:
-               pred->fn = filter_pred_32;
+               if (op == OP_EQ || op == OP_NE)
+                       fn = filter_pred_32;
+               else if (field_is_signed)
+                       fn = filter_pred_s32;
+               else
+                       fn = filter_pred_u32;
                break;
        case 2:
-               pred->fn = filter_pred_16;
+               if (op == OP_EQ || op == OP_NE)
+                       fn = filter_pred_16;
+               else if (field_is_signed)
+                       fn = filter_pred_s16;
+               else
+                       fn = filter_pred_u16;
                break;
        case 1:
-               pred->fn = filter_pred_8;
+               if (op == OP_EQ || op == OP_NE)
+                       fn = filter_pred_8;
+               else if (field_is_signed)
+                       fn = filter_pred_s8;
+               else
+                       fn = filter_pred_u8;
                break;
-       default:
-               return -EINVAL;
        }
 
-       return __filter_add_pred(call, pred);
+       return fn;
 }
 
-static struct filter_pred *copy_pred(struct filter_pred *pred)
+static int filter_add_pred(struct filter_parse_state *ps,
+                          struct ftrace_event_call *call,
+                          struct filter_pred *pred)
 {
-       struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
-       if (!new_pred)
-               return NULL;
+       struct ftrace_event_field *field;
+       filter_pred_fn_t fn;
+       unsigned long long val;
+       int string_type;
+
+       pred->fn = filter_pred_none;
+
+       if (pred->op == OP_AND) {
+               pred->pop_n = 2;
+               return filter_add_pred_fn(ps, call, pred, filter_pred_and);
+       } else if (pred->op == OP_OR) {
+               pred->pop_n = 2;
+               return filter_add_pred_fn(ps, call, pred, filter_pred_or);
+       }
+
+       field = find_event_field(call, pred->field_name);
+       if (!field) {
+               parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
+               return -EINVAL;
+       }
 
-       memcpy(new_pred, pred, sizeof(*pred));
+       pred->offset = field->offset;
 
-       if (pred->field_name) {
-               new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
-               if (!new_pred->field_name) {
-                       kfree(new_pred);
-                       return NULL;
-               }
+       if (!is_legal_op(field, pred->op)) {
+               parse_error(ps, FILT_ERR_ILLEGAL_FIELD_OP, 0);
+               return -EINVAL;
        }
 
-       if (pred->str_val) {
-               new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
-               if (!new_pred->str_val) {
-                       filter_free_pred(new_pred);
-                       return NULL;
+       string_type = is_string_field(field->type);
+       if (string_type) {
+               if (string_type == FILTER_STATIC_STRING)
+                       fn = filter_pred_string;
+               else
+                       fn = filter_pred_strloc;
+               pred->str_len = field->size;
+               if (pred->op == OP_NE)
+                       pred->not = 1;
+               return filter_add_pred_fn(ps, call, pred, fn);
+       } else {
+               if (strict_strtoull(pred->str_val, 0, &val)) {
+                       parse_error(ps, FILT_ERR_ILLEGAL_INTVAL, 0);
+                       return -EINVAL;
                }
+               pred->val = val;
+       }
+
+       fn = select_comparison_fn(pred->op, field->size, field->is_signed);
+       if (!fn) {
+               parse_error(ps, FILT_ERR_INVALID_OP, 0);
+               return -EINVAL;
        }
 
-       return new_pred;
+       if (pred->op == OP_NE)
+               pred->not = 1;
+
+       return filter_add_pred_fn(ps, call, pred, fn);
 }
 
-int filter_add_subsystem_pred(struct event_subsystem *system,
-                             struct filter_pred *pred)
+static int filter_add_subsystem_pred(struct filter_parse_state *ps,
+                                    struct event_subsystem *system,
+                                    struct filter_pred *pred,
+                                    char *filter_string)
 {
-       struct ftrace_event_call *call = __start_ftrace_events;
-       struct filter_pred *event_pred;
-       int i;
-
-       if (system->preds && !pred->compound)
-               filter_free_subsystem_preds(system);
+       struct event_filter *filter = system->filter;
+       struct ftrace_event_call *call;
+       int err = 0;
 
-       if (!system->preds) {
-               system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
+       if (!filter->preds) {
+               filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
                                        GFP_KERNEL);
-               if (!system->preds)
+
+               if (!filter->preds)
                        return -ENOMEM;
        }
 
-       for (i = 0; i < MAX_FILTER_PRED; i++) {
-               if (!system->preds[i]) {
-                       system->preds[i] = pred;
-                       break;
-               }
+       if (filter->n_preds == MAX_FILTER_PRED) {
+               parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
+               return -ENOSPC;
        }
 
-       if (i == MAX_FILTER_PRED)
-               return -ENOSPC;
+       filter->preds[filter->n_preds] = pred;
+       filter->n_preds++;
 
-       events_for_each(call) {
-               int err;
+       mutex_lock(&event_mutex);
+       list_for_each_entry(call, &ftrace_events, list) {
 
-               if (!call->name || !call->regfunc)
+               if (!call->define_fields)
                        continue;
 
                if (strcmp(call->system, system->name))
                        continue;
 
-               if (!find_event_field(call, pred->field_name))
-                       continue;
+               err = filter_add_pred(ps, call, pred);
+               if (err) {
+                       mutex_unlock(&event_mutex);
+                       filter_free_subsystem_preds(system);
+                       parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+                       goto out;
+               }
+               replace_filter_string(call->filter, filter_string);
+       }
+       mutex_unlock(&event_mutex);
+out:
+       return err;
+}
 
-               event_pred = copy_pred(pred);
-               if (!event_pred)
-                       goto oom;
+static void parse_init(struct filter_parse_state *ps,
+                      struct filter_op *ops,
+                      char *infix_string)
+{
+       memset(ps, '\0', sizeof(*ps));
 
-               err = filter_add_pred(call, event_pred);
-               if (err)
-                       filter_free_pred(event_pred);
-               if (err == -ENOMEM)
-                       goto oom;
+       ps->infix.string = infix_string;
+       ps->infix.cnt = strlen(infix_string);
+       ps->ops = ops;
+
+       INIT_LIST_HEAD(&ps->opstack);
+       INIT_LIST_HEAD(&ps->postfix);
+}
+
+static char infix_next(struct filter_parse_state *ps)
+{
+       ps->infix.cnt--;
+
+       return ps->infix.string[ps->infix.tail++];
+}
+
+static char infix_peek(struct filter_parse_state *ps)
+{
+       if (ps->infix.tail == strlen(ps->infix.string))
+               return 0;
+
+       return ps->infix.string[ps->infix.tail];
+}
+
+static void infix_advance(struct filter_parse_state *ps)
+{
+       ps->infix.cnt--;
+       ps->infix.tail++;
+}
+
+static inline int is_precedence_lower(struct filter_parse_state *ps,
+                                     int a, int b)
+{
+       return ps->ops[a].precedence < ps->ops[b].precedence;
+}
+
+static inline int is_op_char(struct filter_parse_state *ps, char c)
+{
+       int i;
+
+       for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+               if (ps->ops[i].string[0] == c)
+                       return 1;
        }
 
        return 0;
+}
 
-oom:
-       system->preds[i] = NULL;
-       return -ENOMEM;
+static int infix_get_op(struct filter_parse_state *ps, char firstc)
+{
+       char nextc = infix_peek(ps);
+       char opstr[3];
+       int i;
+
+       opstr[0] = firstc;
+       opstr[1] = nextc;
+       opstr[2] = '\0';
+
+       for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+               if (!strcmp(opstr, ps->ops[i].string)) {
+                       infix_advance(ps);
+                       return ps->ops[i].id;
+               }
+       }
+
+       opstr[1] = '\0';
+
+       for (i = 0; strcmp(ps->ops[i].string, "OP_NONE"); i++) {
+               if (!strcmp(opstr, ps->ops[i].string))
+                       return ps->ops[i].id;
+       }
+
+       return OP_NONE;
 }
 
-int filter_parse(char **pbuf, struct filter_pred *pred)
+static inline void clear_operand_string(struct filter_parse_state *ps)
 {
-       char *tmp, *tok, *val_str = NULL;
-       int tok_n = 0;
+       memset(ps->operand.string, '\0', MAX_FILTER_STR_VAL);
+       ps->operand.tail = 0;
+}
 
-       /* field ==/!= number, or/and field ==/!= number, number */
-       while ((tok = strsep(pbuf, " \n"))) {
-               if (tok_n == 0) {
-                       if (!strcmp(tok, "0")) {
-                               pred->clear = 1;
-                               return 0;
-                       } else if (!strcmp(tok, "&&")) {
-                               pred->or = 0;
-                               pred->compound = 1;
-                       } else if (!strcmp(tok, "||")) {
-                               pred->or = 1;
-                               pred->compound = 1;
-                       } else
-                               pred->field_name = tok;
-                       tok_n = 1;
+static inline int append_operand_char(struct filter_parse_state *ps, char c)
+{
+       if (ps->operand.tail == MAX_FILTER_STR_VAL - 1)
+               return -EINVAL;
+
+       ps->operand.string[ps->operand.tail++] = c;
+
+       return 0;
+}
+
+static int filter_opstack_push(struct filter_parse_state *ps, int op)
+{
+       struct opstack_op *opstack_op;
+
+       opstack_op = kmalloc(sizeof(*opstack_op), GFP_KERNEL);
+       if (!opstack_op)
+               return -ENOMEM;
+
+       opstack_op->op = op;
+       list_add(&opstack_op->list, &ps->opstack);
+
+       return 0;
+}
+
+static int filter_opstack_empty(struct filter_parse_state *ps)
+{
+       return list_empty(&ps->opstack);
+}
+
+static int filter_opstack_top(struct filter_parse_state *ps)
+{
+       struct opstack_op *opstack_op;
+
+       if (filter_opstack_empty(ps))
+               return OP_NONE;
+
+       opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+
+       return opstack_op->op;
+}
+
+static int filter_opstack_pop(struct filter_parse_state *ps)
+{
+       struct opstack_op *opstack_op;
+       int op;
+
+       if (filter_opstack_empty(ps))
+               return OP_NONE;
+
+       opstack_op = list_first_entry(&ps->opstack, struct opstack_op, list);
+       op = opstack_op->op;
+       list_del(&opstack_op->list);
+
+       kfree(opstack_op);
+
+       return op;
+}
+
+static void filter_opstack_clear(struct filter_parse_state *ps)
+{
+       while (!filter_opstack_empty(ps))
+               filter_opstack_pop(ps);
+}
+
+static char *curr_operand(struct filter_parse_state *ps)
+{
+       return ps->operand.string;
+}
+
+static int postfix_append_operand(struct filter_parse_state *ps, char *operand)
+{
+       struct postfix_elt *elt;
+
+       elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+       if (!elt)
+               return -ENOMEM;
+
+       elt->op = OP_NONE;
+       elt->operand = kstrdup(operand, GFP_KERNEL);
+       if (!elt->operand) {
+               kfree(elt);
+               return -ENOMEM;
+       }
+
+       list_add_tail(&elt->list, &ps->postfix);
+
+       return 0;
+}
+
+static int postfix_append_op(struct filter_parse_state *ps, int op)
+{
+       struct postfix_elt *elt;
+
+       elt = kmalloc(sizeof(*elt), GFP_KERNEL);
+       if (!elt)
+               return -ENOMEM;
+
+       elt->op = op;
+       elt->operand = NULL;
+
+       list_add_tail(&elt->list, &ps->postfix);
+
+       return 0;
+}
+
+static void postfix_clear(struct filter_parse_state *ps)
+{
+       struct postfix_elt *elt;
+
+       while (!list_empty(&ps->postfix)) {
+               elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
+               kfree(elt->operand);
+               list_del(&elt->list);
+       }
+}
+
+static int filter_parse(struct filter_parse_state *ps)
+{
+       int in_string = 0;
+       int op, top_op;
+       char ch;
+
+       while ((ch = infix_next(ps))) {
+               if (ch == '"') {
+                       in_string ^= 1;
                        continue;
                }
-               if (tok_n == 1) {
-                       if (!pred->field_name)
-                               pred->field_name = tok;
-                       else if (!strcmp(tok, "!="))
-                               pred->not = 1;
-                       else if (!strcmp(tok, "=="))
-                               pred->not = 0;
-                       else {
-                               pred->field_name = NULL;
+
+               if (in_string)
+                       goto parse_operand;
+
+               if (isspace(ch))
+                       continue;
+
+               if (is_op_char(ps, ch)) {
+                       op = infix_get_op(ps, ch);
+                       if (op == OP_NONE) {
+                               parse_error(ps, FILT_ERR_INVALID_OP, 0);
                                return -EINVAL;
                        }
-                       tok_n = 2;
+
+                       if (strlen(curr_operand(ps))) {
+                               postfix_append_operand(ps, curr_operand(ps));
+                               clear_operand_string(ps);
+                       }
+
+                       while (!filter_opstack_empty(ps)) {
+                               top_op = filter_opstack_top(ps);
+                               if (!is_precedence_lower(ps, top_op, op)) {
+                                       top_op = filter_opstack_pop(ps);
+                                       postfix_append_op(ps, top_op);
+                                       continue;
+                               }
+                               break;
+                       }
+
+                       filter_opstack_push(ps, op);
                        continue;
                }
-               if (tok_n == 2) {
-                       if (pred->compound) {
-                               if (!strcmp(tok, "!="))
-                                       pred->not = 1;
-                               else if (!strcmp(tok, "=="))
-                                       pred->not = 0;
-                               else {
-                                       pred->field_name = NULL;
-                                       return -EINVAL;
-                               }
-                       } else {
-                               val_str = tok;
-                               break; /* done */
+
+               if (ch == '(') {
+                       filter_opstack_push(ps, OP_OPEN_PAREN);
+                       continue;
+               }
+
+               if (ch == ')') {
+                       if (strlen(curr_operand(ps))) {
+                               postfix_append_operand(ps, curr_operand(ps));
+                               clear_operand_string(ps);
+                       }
+
+                       top_op = filter_opstack_pop(ps);
+                       while (top_op != OP_NONE) {
+                               if (top_op == OP_OPEN_PAREN)
+                                       break;
+                               postfix_append_op(ps, top_op);
+                               top_op = filter_opstack_pop(ps);
+                       }
+                       if (top_op == OP_NONE) {
+                               parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+                               return -EINVAL;
                        }
-                       tok_n = 3;
                        continue;
                }
-               if (tok_n == 3) {
-                       val_str = tok;
-                       break; /* done */
+parse_operand:
+               if (append_operand_char(ps, ch)) {
+                       parse_error(ps, FILT_ERR_OPERAND_TOO_LONG, 0);
+                       return -EINVAL;
                }
        }
 
-       if (!val_str) {
-               pred->field_name = NULL;
-               return -EINVAL;
+       if (strlen(curr_operand(ps)))
+               postfix_append_operand(ps, curr_operand(ps));
+
+       while (!filter_opstack_empty(ps)) {
+               top_op = filter_opstack_pop(ps);
+               if (top_op == OP_NONE)
+                       break;
+               if (top_op == OP_OPEN_PAREN) {
+                       parse_error(ps, FILT_ERR_UNBALANCED_PAREN, 0);
+                       return -EINVAL;
+               }
+               postfix_append_op(ps, top_op);
        }
 
-       pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
-       if (!pred->field_name)
-               return -ENOMEM;
+       return 0;
+}
 
-       pred->val = simple_strtoull(val_str, &tmp, 0);
-       if (tmp == val_str) {
-               pred->str_val = kstrdup(val_str, GFP_KERNEL);
-               if (!pred->str_val)
-                       return -ENOMEM;
-       } else if (*tmp != '\0')
+static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
+{
+       struct filter_pred *pred;
+
+       pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+       if (!pred)
+               return NULL;
+
+       pred->field_name = kstrdup(operand1, GFP_KERNEL);
+       if (!pred->field_name) {
+               kfree(pred);
+               return NULL;
+       }
+
+       strcpy(pred->str_val, operand2);
+       pred->str_len = strlen(operand2);
+
+       pred->op = op;
+
+       return pred;
+}
+
+static struct filter_pred *create_logical_pred(int op)
+{
+       struct filter_pred *pred;
+
+       pred = kzalloc(sizeof(*pred), GFP_KERNEL);
+       if (!pred)
+               return NULL;
+
+       pred->op = op;
+
+       return pred;
+}
+
+static int check_preds(struct filter_parse_state *ps)
+{
+       int n_normal_preds = 0, n_logical_preds = 0;
+       struct postfix_elt *elt;
+
+       list_for_each_entry(elt, &ps->postfix, list) {
+               if (elt->op == OP_NONE)
+                       continue;
+
+               if (elt->op == OP_AND || elt->op == OP_OR) {
+                       n_logical_preds++;
+                       continue;
+               }
+               n_normal_preds++;
+       }
+
+       if (!n_normal_preds || n_logical_preds >= n_normal_preds) {
+               parse_error(ps, FILT_ERR_INVALID_FILTER, 0);
                return -EINVAL;
+       }
 
        return 0;
 }
 
+static int replace_preds(struct event_subsystem *system,
+                        struct ftrace_event_call *call,
+                        struct filter_parse_state *ps,
+                        char *filter_string)
+{
+       char *operand1 = NULL, *operand2 = NULL;
+       struct filter_pred *pred;
+       struct postfix_elt *elt;
+       int err;
+
+       err = check_preds(ps);
+       if (err)
+               return err;
+
+       list_for_each_entry(elt, &ps->postfix, list) {
+               if (elt->op == OP_NONE) {
+                       if (!operand1)
+                               operand1 = elt->operand;
+                       else if (!operand2)
+                               operand2 = elt->operand;
+                       else {
+                               parse_error(ps, FILT_ERR_TOO_MANY_OPERANDS, 0);
+                               return -EINVAL;
+                       }
+                       continue;
+               }
+
+               if (elt->op == OP_AND || elt->op == OP_OR) {
+                       pred = create_logical_pred(elt->op);
+                       if (call) {
+                               err = filter_add_pred(ps, call, pred);
+                               filter_free_pred(pred);
+                       } else
+                               err = filter_add_subsystem_pred(ps, system,
+                                                       pred, filter_string);
+                       if (err)
+                               return err;
+
+                       operand1 = operand2 = NULL;
+                       continue;
+               }
+
+               if (!operand1 || !operand2) {
+                       parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
+                       return -EINVAL;
+               }
+
+               pred = create_pred(elt->op, operand1, operand2);
+               if (call) {
+                       err = filter_add_pred(ps, call, pred);
+                       filter_free_pred(pred);
+               } else
+                       err = filter_add_subsystem_pred(ps, system, pred,
+                                                       filter_string);
+               if (err)
+                       return err;
+
+               operand1 = operand2 = NULL;
+       }
+
+       return 0;
+}
+
+int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
+{
+       int err;
+
+       struct filter_parse_state *ps;
+
+       mutex_lock(&filter_mutex);
+
+       if (!strcmp(strstrip(filter_string), "0")) {
+               filter_disable_preds(call);
+               remove_filter_string(call->filter);
+               mutex_unlock(&filter_mutex);
+               return 0;
+       }
+
+       err = -ENOMEM;
+       ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+       if (!ps)
+               goto out_unlock;
+
+       filter_disable_preds(call);
+       replace_filter_string(call->filter, filter_string);
+
+       parse_init(ps, filter_ops, filter_string);
+       err = filter_parse(ps);
+       if (err) {
+               append_filter_err(ps, call->filter);
+               goto out;
+       }
+
+       err = replace_preds(NULL, call, ps, filter_string);
+       if (err)
+               append_filter_err(ps, call->filter);
+
+out:
+       filter_opstack_clear(ps);
+       postfix_clear(ps);
+       kfree(ps);
+out_unlock:
+       mutex_unlock(&filter_mutex);
+
+       return err;
+}
+
+int apply_subsystem_event_filter(struct event_subsystem *system,
+                                char *filter_string)
+{
+       int err;
+
+       struct filter_parse_state *ps;
+
+       mutex_lock(&filter_mutex);
+
+       if (!strcmp(strstrip(filter_string), "0")) {
+               filter_free_subsystem_preds(system);
+               remove_filter_string(system->filter);
+               mutex_unlock(&filter_mutex);
+               return 0;
+       }
+
+       err = -ENOMEM;
+       ps = kzalloc(sizeof(*ps), GFP_KERNEL);
+       if (!ps)
+               goto out_unlock;
+
+       filter_free_subsystem_preds(system);
+       replace_filter_string(system->filter, filter_string);
+
+       parse_init(ps, filter_ops, filter_string);
+       err = filter_parse(ps);
+       if (err) {
+               append_filter_err(ps, system->filter);
+               goto out;
+       }
+
+       err = replace_preds(system, NULL, ps, filter_string);
+       if (err)
+               append_filter_err(ps, system->filter);
+
+out:
+       filter_opstack_clear(ps);
+       postfix_clear(ps);
+       kfree(ps);
+out_unlock:
+       mutex_unlock(&filter_mutex);
+
+       return err;
+}
 
diff --git a/kernel/trace/trace_events_stage_1.h b/kernel/trace/trace_events_stage_1.h
deleted file mode 100644 (file)
index 38985f9..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Stage 1 of the trace events.
- *
- * Override the macros in <trace/trace_event_types.h> to include the following:
- *
- * struct ftrace_raw_<call> {
- *     struct trace_entry              ent;
- *     <type>                          <item>;
- *     <type2>                         <item2>[<len>];
- *     [...]
- * };
- *
- * The <type> <item> is created by the __field(type, item) macro or
- * the __array(type2, item2, len) macro.
- * We simply do "type item;", and that will create the fields
- * in the structure.
- */
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)
-
-#undef __array
-#define __array(type, item, len)       type    item[len];
-
-#undef __field
-#define __field(type, item)            type    item;
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(name, proto, args, tstruct, assign, print) \
-       struct ftrace_raw_##name {                              \
-               struct trace_entry      ent;                    \
-               tstruct                                         \
-       };                                                      \
-       static struct ftrace_event_call event_##name
-
-#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_2.h b/kernel/trace/trace_events_stage_2.h
deleted file mode 100644 (file)
index d363c66..0000000
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Stage 2 of the trace events.
- *
- * Override the macros in <trace/trace_event_types.h> to include the following:
- *
- * enum print_line_t
- * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
- * {
- *     struct trace_seq *s = &iter->seq;
- *     struct ftrace_raw_<call> *field; <-- defined in stage 1
- *     struct trace_entry *entry;
- *     int ret;
- *
- *     entry = iter->ent;
- *
- *     if (entry->type != event_<call>.id) {
- *             WARN_ON_ONCE(1);
- *             return TRACE_TYPE_UNHANDLED;
- *     }
- *
- *     field = (typeof(field))entry;
- *
- *     ret = trace_seq_printf(s, <TP_printk> "\n");
- *     if (!ret)
- *             return TRACE_TYPE_PARTIAL_LINE;
- *
- *     return TRACE_TYPE_HANDLED;
- * }
- *
- * This is the method used to print the raw event to the trace
- * output format. Note, this is not needed if the data is read
- * in binary.
- */
-
-#undef __entry
-#define __entry field
-
-#undef TP_printk
-#define TP_printk(fmt, args...) fmt "\n", args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
-enum print_line_t                                                      \
-ftrace_raw_output_##call(struct trace_iterator *iter, int flags)       \
-{                                                                      \
-       struct trace_seq *s = &iter->seq;                               \
-       struct ftrace_raw_##call *field;                                \
-       struct trace_entry *entry;                                      \
-       int ret;                                                        \
-                                                                       \
-       entry = iter->ent;                                              \
-                                                                       \
-       if (entry->type != event_##call.id) {                           \
-               WARN_ON_ONCE(1);                                        \
-               return TRACE_TYPE_UNHANDLED;                            \
-       }                                                               \
-                                                                       \
-       field = (typeof(field))entry;                                   \
-                                                                       \
-       ret = trace_seq_printf(s, #call ": " print);                    \
-       if (!ret)                                                       \
-               return TRACE_TYPE_PARTIAL_LINE;                         \
-                                                                       \
-       return TRACE_TYPE_HANDLED;                                      \
-}
-       
-#include <trace/trace_event_types.h>
-
-/*
- * Setup the showing format of trace point.
- *
- * int
- * ftrace_format_##call(struct trace_seq *s)
- * {
- *     struct ftrace_raw_##call field;
- *     int ret;
- *
- *     ret = trace_seq_printf(s, #type " " #item ";"
- *                            " offset:%u; size:%u;\n",
- *                            offsetof(struct ftrace_raw_##call, item),
- *                            sizeof(field.type));
- *
- * }
- */
-
-#undef TP_STRUCT__entry
-#define TP_STRUCT__entry(args...) args
-
-#undef __field
-#define __field(type, item)                                    \
-       ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
-                              "offset:%u;\tsize:%u;\n",                \
-                              (unsigned int)offsetof(typeof(field), item), \
-                              (unsigned int)sizeof(field.item));       \
-       if (!ret)                                                       \
-               return 0;
-
-#undef __array
-#define __array(type, item, len)                                               \
-       ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"    \
-                              "offset:%u;\tsize:%u;\n",                \
-                              (unsigned int)offsetof(typeof(field), item), \
-                              (unsigned int)sizeof(field.item));       \
-       if (!ret)                                                       \
-               return 0;
-
-#undef __entry
-#define __entry REC
-
-#undef TP_printk
-#define TP_printk(fmt, args...) "%s, %s\n", #fmt, __stringify(args)
-
-#undef TP_fast_assign
-#define TP_fast_assign(args...) args
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)           \
-static int                                                             \
-ftrace_format_##call(struct trace_seq *s)                              \
-{                                                                      \
-       struct ftrace_raw_##call field;                                 \
-       int ret;                                                        \
-                                                                       \
-       tstruct;                                                        \
-                                                                       \
-       trace_seq_printf(s, "\nprint fmt: " print);                     \
-                                                                       \
-       return ret;                                                     \
-}
-
-#include <trace/trace_event_types.h>
-
-#undef __field
-#define __field(type, item)                                            \
-       ret = trace_define_field(event_call, #type, #item,              \
-                                offsetof(typeof(field), item),         \
-                                sizeof(field.item));                   \
-       if (ret)                                                        \
-               return ret;
-
-#undef __array
-#define __array(type, item, len)                                       \
-       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
-                                offsetof(typeof(field), item),         \
-                                sizeof(field.item));                   \
-       if (ret)                                                        \
-               return ret;
-
-#define __common_field(type, item)                                     \
-       ret = trace_define_field(event_call, #type, "common_" #item,    \
-                                offsetof(typeof(field.ent), item),     \
-                                sizeof(field.ent.item));               \
-       if (ret)                                                        \
-               return ret;
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, func, print)           \
-int                                                                    \
-ftrace_define_fields_##call(void)                                      \
-{                                                                      \
-       struct ftrace_raw_##call field;                                 \
-       struct ftrace_event_call *event_call = &event_##call;           \
-       int ret;                                                        \
-                                                                       \
-       __common_field(unsigned char, type);                            \
-       __common_field(unsigned char, flags);                           \
-       __common_field(unsigned char, preempt_count);                   \
-       __common_field(int, pid);                                       \
-       __common_field(int, tgid);                                      \
-                                                                       \
-       tstruct;                                                        \
-                                                                       \
-       return ret;                                                     \
-}
-
-#include <trace/trace_event_types.h>
diff --git a/kernel/trace/trace_events_stage_3.h b/kernel/trace/trace_events_stage_3.h
deleted file mode 100644 (file)
index 9d2fa78..0000000
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Stage 3 of the trace events.
- *
- * Override the macros in <trace/trace_event_types.h> to include the following:
- *
- * static void ftrace_event_<call>(proto)
- * {
- *     event_trace_printk(_RET_IP_, "<call>: " <fmt>);
- * }
- *
- * static int ftrace_reg_event_<call>(void)
- * {
- *     int ret;
- *
- *     ret = register_trace_<call>(ftrace_event_<call>);
- *     if (!ret)
- *             pr_info("event trace: Could not activate trace point "
- *                     "probe to  <call>");
- *     return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *     unregister_trace_<call>(ftrace_event_<call>);
- * }
- *
- * For those macros defined with TRACE_FORMAT:
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *     .name                   = "<call>",
- *     .regfunc                = ftrace_reg_event_<call>,
- *     .unregfunc              = ftrace_unreg_event_<call>,
- * }
- *
- *
- * For those macros defined with TRACE_EVENT:
- *
- * static struct ftrace_event_call event_<call>;
- *
- * static void ftrace_raw_event_<call>(proto)
- * {
- *     struct ring_buffer_event *event;
- *     struct ftrace_raw_<call> *entry; <-- defined in stage 1
- *     unsigned long irq_flags;
- *     int pc;
- *
- *     local_save_flags(irq_flags);
- *     pc = preempt_count();
- *
- *     event = trace_current_buffer_lock_reserve(event_<call>.id,
- *                               sizeof(struct ftrace_raw_<call>),
- *                               irq_flags, pc);
- *     if (!event)
- *             return;
- *     entry   = ring_buffer_event_data(event);
- *
- *     <assign>;  <-- Here we assign the entries by the __field and
- *                     __array macros.
- *
- *     trace_current_buffer_unlock_commit(event, irq_flags, pc);
- * }
- *
- * static int ftrace_raw_reg_event_<call>(void)
- * {
- *     int ret;
- *
- *     ret = register_trace_<call>(ftrace_raw_event_<call>);
- *     if (!ret)
- *             pr_info("event trace: Could not activate trace point "
- *                     "probe to <call>");
- *     return ret;
- * }
- *
- * static void ftrace_unreg_event_<call>(void)
- * {
- *     unregister_trace_<call>(ftrace_raw_event_<call>);
- * }
- *
- * static struct trace_event ftrace_event_type_<call> = {
- *     .trace                  = ftrace_raw_output_<call>, <-- stage 2
- * };
- *
- * static int ftrace_raw_init_event_<call>(void)
- * {
- *     int id;
- *
- *     id = register_ftrace_event(&ftrace_event_type_<call>);
- *     if (!id)
- *             return -ENODEV;
- *     event_<call>.id = id;
- *     return 0;
- * }
- *
- * static struct ftrace_event_call __used
- * __attribute__((__aligned__(4)))
- * __attribute__((section("_ftrace_events"))) event_<call> = {
- *     .name                   = "<call>",
- *     .system                 = "<system>",
- *     .raw_init               = ftrace_raw_init_event_<call>,
- *     .regfunc                = ftrace_reg_event_<call>,
- *     .unregfunc              = ftrace_unreg_event_<call>,
- *     .show_format            = ftrace_format_<call>,
- * }
- *
- */
-
-#undef TP_FMT
-#define TP_FMT(fmt, args...)   fmt "\n", ##args
-
-#ifdef CONFIG_EVENT_PROFILE
-#define _TRACE_PROFILE(call, proto, args)                              \
-static void ftrace_profile_##call(proto)                               \
-{                                                                      \
-       extern void perf_tpcounter_event(int);                          \
-       perf_tpcounter_event(event_##call.id);                          \
-}                                                                      \
-                                                                       \
-static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
-{                                                                      \
-       int ret = 0;                                                    \
-                                                                       \
-       if (!atomic_inc_return(&call->profile_count))                   \
-               ret = register_trace_##call(ftrace_profile_##call);     \
-                                                                       \
-       return ret;                                                     \
-}                                                                      \
-                                                                       \
-static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
-{                                                                      \
-       if (atomic_add_negative(-1, &call->profile_count))              \
-               unregister_trace_##call(ftrace_profile_##call);         \
-}
-
-#define _TRACE_PROFILE_INIT(call)                                      \
-       .profile_count = ATOMIC_INIT(-1),                               \
-       .profile_enable = ftrace_profile_enable_##call,                 \
-       .profile_disable = ftrace_profile_disable_##call,
-
-#else
-#define _TRACE_PROFILE(call, proto, args)
-#define _TRACE_PROFILE_INIT(call)
-#endif
-
-#define _TRACE_FORMAT(call, proto, args, fmt)                          \
-static void ftrace_event_##call(proto)                                 \
-{                                                                      \
-       event_trace_printk(_RET_IP_, #call ": " fmt);                   \
-}                                                                      \
-                                                                       \
-static int ftrace_reg_event_##call(void)                               \
-{                                                                      \
-       int ret;                                                        \
-                                                                       \
-       ret = register_trace_##call(ftrace_event_##call);               \
-       if (ret)                                                        \
-               pr_info("event trace: Could not activate trace point "  \
-                       "probe to " #call "\n");                        \
-       return ret;                                                     \
-}                                                                      \
-                                                                       \
-static void ftrace_unreg_event_##call(void)                            \
-{                                                                      \
-       unregister_trace_##call(ftrace_event_##call);                   \
-}                                                                      \
-                                                                       \
-static struct ftrace_event_call event_##call;                          \
-                                                                       \
-static int ftrace_init_event_##call(void)                              \
-{                                                                      \
-       int id;                                                         \
-                                                                       \
-       id = register_ftrace_event(NULL);                               \
-       if (!id)                                                        \
-               return -ENODEV;                                         \
-       event_##call.id = id;                                           \
-       return 0;                                                       \
-}
-
-#undef TRACE_FORMAT
-#define TRACE_FORMAT(call, proto, args, fmt)                           \
-_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))          \
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))                      \
-static struct ftrace_event_call __used                                 \
-__attribute__((__aligned__(4)))                                                \
-__attribute__((section("_ftrace_events"))) event_##call = {            \
-       .name                   = #call,                                \
-       .system                 = __stringify(TRACE_SYSTEM),            \
-       .raw_init               = ftrace_init_event_##call,             \
-       .regfunc                = ftrace_reg_event_##call,              \
-       .unregfunc              = ftrace_unreg_event_##call,            \
-       _TRACE_PROFILE_INIT(call)                                       \
-}
-
-#undef __entry
-#define __entry entry
-
-#undef TRACE_EVENT
-#define TRACE_EVENT(call, proto, args, tstruct, assign, print)         \
-_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))                      \
-                                                                       \
-static struct ftrace_event_call event_##call;                          \
-                                                                       \
-static void ftrace_raw_event_##call(proto)                             \
-{                                                                      \
-       struct ftrace_event_call *call = &event_##call;                 \
-       struct ring_buffer_event *event;                                \
-       struct ftrace_raw_##call *entry;                                \
-       unsigned long irq_flags;                                        \
-       int pc;                                                         \
-                                                                       \
-       local_save_flags(irq_flags);                                    \
-       pc = preempt_count();                                           \
-                                                                       \
-       event = trace_current_buffer_lock_reserve(event_##call.id,      \
-                                 sizeof(struct ftrace_raw_##call),     \
-                                 irq_flags, pc);                       \
-       if (!event)                                                     \
-               return;                                                 \
-       entry   = ring_buffer_event_data(event);                        \
-                                                                       \
-       assign;                                                         \
-                                                                       \
-       if (call->preds && !filter_match_preds(call, entry))            \
-               ring_buffer_event_discard(event);                       \
-                                                                       \
-       trace_nowake_buffer_unlock_commit(event, irq_flags, pc);        \
-                                                                       \
-}                                                                      \
-                                                                       \
-static int ftrace_raw_reg_event_##call(void)                           \
-{                                                                      \
-       int ret;                                                        \
-                                                                       \
-       ret = register_trace_##call(ftrace_raw_event_##call);           \
-       if (ret)                                                        \
-               pr_info("event trace: Could not activate trace point "  \
-                       "probe to " #call "\n");                        \
-       return ret;                                                     \
-}                                                                      \
-                                                                       \
-static void ftrace_raw_unreg_event_##call(void)                                \
-{                                                                      \
-       unregister_trace_##call(ftrace_raw_event_##call);               \
-}                                                                      \
-                                                                       \
-static struct trace_event ftrace_event_type_##call = {                 \
-       .trace                  = ftrace_raw_output_##call,             \
-};                                                                     \
-                                                                       \
-static int ftrace_raw_init_event_##call(void)                          \
-{                                                                      \
-       int id;                                                         \
-                                                                       \
-       id = register_ftrace_event(&ftrace_event_type_##call);          \
-       if (!id)                                                        \
-               return -ENODEV;                                         \
-       event_##call.id = id;                                           \
-       INIT_LIST_HEAD(&event_##call.fields);                           \
-       return 0;                                                       \
-}                                                                      \
-                                                                       \
-static struct ftrace_event_call __used                                 \
-__attribute__((__aligned__(4)))                                                \
-__attribute__((section("_ftrace_events"))) event_##call = {            \
-       .name                   = #call,                                \
-       .system                 = __stringify(TRACE_SYSTEM),            \
-       .raw_init               = ftrace_raw_init_event_##call,         \
-       .regfunc                = ftrace_raw_reg_event_##call,          \
-       .unregfunc              = ftrace_raw_unreg_event_##call,        \
-       .show_format            = ftrace_format_##call,                 \
-       .define_fields          = ftrace_define_fields_##call,          \
-       _TRACE_PROFILE_INIT(call)                                       \
-}
-
-#include <trace/trace_event_types.h>
-
-#undef _TRACE_PROFILE
-#undef _TRACE_PROFILE_INIT
-
index 07a22c3..d06cf89 100644 (file)
 #undef TRACE_STRUCT
 #define TRACE_STRUCT(args...) args
 
+extern void __bad_type_size(void);
+
 #undef TRACE_FIELD
 #define TRACE_FIELD(type, item, assign)                                        \
+       if (sizeof(type) != sizeof(field.item))                         \
+               __bad_type_size();                                      \
        ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"      \
                               "offset:%u;\tsize:%u;\n",                \
                               (unsigned int)offsetof(typeof(field), item), \
@@ -30,7 +34,7 @@
 
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd)                      \
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd)                 \
        ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t"   \
                               "offset:%u;\tsize:%u;\n",                \
                               (unsigned int)offsetof(typeof(field), item), \
@@ -46,6 +50,9 @@
        if (!ret)                                                       \
                return 0;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)        \
+       TRACE_FIELD(type, item, assign)
 
 #undef TP_RAW_FMT
 #define TP_RAW_FMT(args...) args
@@ -65,6 +72,22 @@ ftrace_format_##call(struct trace_seq *s)                            \
        return ret;                                                     \
 }
 
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,   \
+                                   tpfmt)                              \
+static int                                                             \
+ftrace_format_##call(struct trace_seq *s)                              \
+{                                                                      \
+       struct args field;                                              \
+       int ret;                                                        \
+                                                                       \
+       tstruct;                                                        \
+                                                                       \
+       trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt);            \
+                                                                       \
+       return ret;                                                     \
+}
+
 #include "trace_event_types.h"
 
 #undef TRACE_ZERO_CHAR
@@ -78,6 +101,10 @@ ftrace_format_##call(struct trace_seq *s)                           \
 #define TRACE_FIELD(type, item, assign)\
        entry->item = assign;
 
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)        \
+       TRACE_FIELD(type, item, assign)
+
 #undef TP_CMD
 #define TP_CMD(cmd...) cmd
 
@@ -85,18 +112,95 @@ ftrace_format_##call(struct trace_seq *s)                          \
 #define TRACE_ENTRY    entry
 
 #undef TRACE_FIELD_SPECIAL
-#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
+#define TRACE_FIELD_SPECIAL(type_item, item, len, cmd) \
        cmd;
 
 #undef TRACE_EVENT_FORMAT
 #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)     \
+int ftrace_define_fields_##call(void);                                 \
+static int ftrace_raw_init_event_##call(void);                         \
+                                                                       \
+struct ftrace_event_call __used                                                \
+__attribute__((__aligned__(4)))                                                \
+__attribute__((section("_ftrace_events"))) event_##call = {            \
+       .name                   = #call,                                \
+       .id                     = proto,                                \
+       .system                 = __stringify(TRACE_SYSTEM),            \
+       .raw_init               = ftrace_raw_init_event_##call,         \
+       .show_format            = ftrace_format_##call,                 \
+       .define_fields          = ftrace_define_fields_##call,          \
+};                                                                     \
+static int ftrace_raw_init_event_##call(void)                          \
+{                                                                      \
+       INIT_LIST_HEAD(&event_##call.fields);                           \
+       init_preds(&event_##call);                                      \
+       return 0;                                                       \
+}                                                                      \
+
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,   \
+                                   tpfmt)                              \
                                                                        \
-static struct ftrace_event_call __used                                 \
+struct ftrace_event_call __used                                                \
 __attribute__((__aligned__(4)))                                                \
 __attribute__((section("_ftrace_events"))) event_##call = {            \
        .name                   = #call,                                \
        .id                     = proto,                                \
        .system                 = __stringify(TRACE_SYSTEM),            \
        .show_format            = ftrace_format_##call,                 \
+};
+
+#include "trace_event_types.h"
+
+#undef TRACE_FIELD
+#define TRACE_FIELD(type, item, assign)                                        \
+       ret = trace_define_field(event_call, #type, #item,              \
+                                offsetof(typeof(field), item),         \
+                                sizeof(field.item), is_signed_type(type));     \
+       if (ret)                                                        \
+               return ret;
+
+#undef TRACE_FIELD_SPECIAL
+#define TRACE_FIELD_SPECIAL(type, item, len, cmd)                      \
+       ret = trace_define_field(event_call, #type "[" #len "]", #item, \
+                                offsetof(typeof(field), item),         \
+                                sizeof(field.item), 0);                \
+       if (ret)                                                        \
+               return ret;
+
+#undef TRACE_FIELD_SIGN
+#define TRACE_FIELD_SIGN(type, item, assign, is_signed)                        \
+       ret = trace_define_field(event_call, #type, #item,              \
+                                offsetof(typeof(field), item),         \
+                                sizeof(field.item), is_signed);        \
+       if (ret)                                                        \
+               return ret;
+
+#undef TRACE_FIELD_ZERO_CHAR
+#define TRACE_FIELD_ZERO_CHAR(item)
+
+#undef TRACE_EVENT_FORMAT
+#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)     \
+int                                                                    \
+ftrace_define_fields_##call(void)                                      \
+{                                                                      \
+       struct ftrace_event_call *event_call = &event_##call;           \
+       struct args field;                                              \
+       int ret;                                                        \
+                                                                       \
+       __common_field(unsigned char, type, 0);                         \
+       __common_field(unsigned char, flags, 0);                        \
+       __common_field(unsigned char, preempt_count, 0);                \
+       __common_field(int, pid, 1);                                    \
+       __common_field(int, tgid, 1);                                   \
+                                                                       \
+       tstruct;                                                        \
+                                                                       \
+       return ret;                                                     \
 }
+
+#undef TRACE_EVENT_FORMAT_NOFILTER
+#define TRACE_EVENT_FORMAT_NOFILTER(call, proto, args, fmt, tstruct,   \
+                                   tpfmt)
+
 #include "trace_event_types.h"
index d28687e..8b59241 100644 (file)
@@ -65,6 +65,12 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
        if (!current->ret_stack)
                return -EBUSY;
 
+       /*
+        * We must make sure the ret_stack is tested before we read
+        * anything else.
+        */
+       smp_rmb();
+
        /* The return trace stack is full */
        if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
                atomic_inc(&current->trace_overrun);
@@ -78,13 +84,14 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
        current->ret_stack[index].ret = ret;
        current->ret_stack[index].func = func;
        current->ret_stack[index].calltime = calltime;
+       current->ret_stack[index].subtime = 0;
        *depth = index;
 
        return 0;
 }
 
 /* Retrieve a function return address to the trace stack on thread info.*/
-void
+static void
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
 {
        int index;
@@ -104,9 +111,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
        trace->calltime = current->ret_stack[index].calltime;
        trace->overrun = atomic_read(&current->trace_overrun);
        trace->depth = index;
-       barrier();
-       current->curr_ret_stack--;
-
 }
 
 /*
@@ -121,6 +125,8 @@ unsigned long ftrace_return_to_handler(void)
        ftrace_pop_return_trace(&trace, &ret);
        trace.rettime = trace_clock_local();
        ftrace_graph_return(&trace);
+       barrier();
+       current->curr_ret_stack--;
 
        if (unlikely(!ret)) {
                ftrace_graph_stop();
@@ -426,8 +432,8 @@ print_graph_irq(struct trace_iterator *iter, unsigned long addr,
        return TRACE_TYPE_HANDLED;
 }
 
-static enum print_line_t
-print_graph_duration(unsigned long long duration, struct trace_seq *s)
+enum print_line_t
+trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
 {
        unsigned long nsecs_rem = do_div(duration, 1000);
        /* log10(ULONG_MAX) + '\0' */
@@ -464,12 +470,23 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
                if (!ret)
                        return TRACE_TYPE_PARTIAL_LINE;
        }
+       return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t
+print_graph_duration(unsigned long long duration, struct trace_seq *s)
+{
+       int ret;
+
+       ret = trace_print_graph_duration(duration, s);
+       if (ret != TRACE_TYPE_HANDLED)
+               return ret;
 
        ret = trace_seq_printf(s, "|  ");
        if (!ret)
                return TRACE_TYPE_PARTIAL_LINE;
-       return TRACE_TYPE_HANDLED;
 
+       return TRACE_TYPE_HANDLED;
 }
 
 /* Case of a leaf function on its call entry */
index 7bfdf4c..ca7d7c4 100644 (file)
@@ -1,10 +1,9 @@
 /*
- * h/w branch tracer for x86 based on bts
+ * h/w branch tracer for x86 based on BTS
  *
  * Copyright (C) 2008-2009 Intel Corporation.
  * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
  */
-#include <linux/spinlock.h>
 #include <linux/kallsyms.h>
 #include <linux/debugfs.h>
 #include <linux/ftrace.h>
 
 #include <asm/ds.h>
 
-#include "trace.h"
 #include "trace_output.h"
+#include "trace.h"
 
 
-#define SIZEOF_BTS (1 << 13)
+#define BTS_BUFFER_SIZE (1 << 13)
 
-/*
- * The tracer lock protects the below per-cpu tracer array.
- * It needs to be held to:
- * - start tracing on all cpus
- * - stop tracing on all cpus
- * - start tracing on a single hotplug cpu
- * - stop tracing on a single hotplug cpu
- * - read the trace from all cpus
- * - read the trace from a single cpu
- */
-static DEFINE_SPINLOCK(bts_tracer_lock);
 static DEFINE_PER_CPU(struct bts_tracer *, tracer);
-static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
+static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], buffer);
 
 #define this_tracer per_cpu(tracer, smp_processor_id())
-#define this_buffer per_cpu(buffer, smp_processor_id())
 
-static int __read_mostly trace_hw_branches_enabled;
+static int trace_hw_branches_enabled __read_mostly;
+static int trace_hw_branches_suspended __read_mostly;
 static struct trace_array *hw_branch_trace __read_mostly;
 
 
-/*
- * Start tracing on the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_start_cpu(void *arg)
+static void bts_trace_init_cpu(int cpu)
 {
-       if (this_tracer)
-               ds_release_bts(this_tracer);
-
-       this_tracer =
-               ds_request_bts(/* task = */ NULL, this_buffer, SIZEOF_BTS,
-                              /* ovfl = */ NULL, /* th = */ (size_t)-1,
-                              BTS_KERNEL);
-       if (IS_ERR(this_tracer)) {
-               this_tracer = NULL;
-               return;
-       }
+       per_cpu(tracer, cpu) =
+               ds_request_bts_cpu(cpu, per_cpu(buffer, cpu), BTS_BUFFER_SIZE,
+                                  NULL, (size_t)-1, BTS_KERNEL);
+
+       if (IS_ERR(per_cpu(tracer, cpu)))
+               per_cpu(tracer, cpu) = NULL;
 }
 
-static void bts_trace_start(struct trace_array *tr)
+static int bts_trace_init(struct trace_array *tr)
 {
-       spin_lock(&bts_tracer_lock);
+       int cpu;
+
+       hw_branch_trace = tr;
+       trace_hw_branches_enabled = 0;
 
-       on_each_cpu(bts_trace_start_cpu, NULL, 1);
-       trace_hw_branches_enabled = 1;
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               bts_trace_init_cpu(cpu);
 
-       spin_unlock(&bts_tracer_lock);
+               if (likely(per_cpu(tracer, cpu)))
+                       trace_hw_branches_enabled = 1;
+       }
+       trace_hw_branches_suspended = 0;
+       put_online_cpus();
+
+       /* If we could not enable tracing on a single cpu, we fail. */
+       return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
 }
 
-/*
- * Stop tracing on the current cpu.
- * The argument is ignored.
- *
- * pre: bts_tracer_lock must be locked.
- */
-static void bts_trace_stop_cpu(void *arg)
+static void bts_trace_reset(struct trace_array *tr)
 {
-       if (this_tracer) {
-               ds_release_bts(this_tracer);
-               this_tracer = NULL;
+       int cpu;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu) {
+               if (likely(per_cpu(tracer, cpu))) {
+                       ds_release_bts(per_cpu(tracer, cpu));
+                       per_cpu(tracer, cpu) = NULL;
+               }
        }
+       trace_hw_branches_enabled = 0;
+       trace_hw_branches_suspended = 0;
+       put_online_cpus();
 }
 
-static void bts_trace_stop(struct trace_array *tr)
+static void bts_trace_start(struct trace_array *tr)
 {
-       spin_lock(&bts_tracer_lock);
+       int cpu;
 
-       trace_hw_branches_enabled = 0;
-       on_each_cpu(bts_trace_stop_cpu, NULL, 1);
+       get_online_cpus();
+       for_each_online_cpu(cpu)
+               if (likely(per_cpu(tracer, cpu)))
+                       ds_resume_bts(per_cpu(tracer, cpu));
+       trace_hw_branches_suspended = 0;
+       put_online_cpus();
+}
 
-       spin_unlock(&bts_tracer_lock);
+static void bts_trace_stop(struct trace_array *tr)
+{
+       int cpu;
+
+       get_online_cpus();
+       for_each_online_cpu(cpu)
+               if (likely(per_cpu(tracer, cpu)))
+                       ds_suspend_bts(per_cpu(tracer, cpu));
+       trace_hw_branches_suspended = 1;
+       put_online_cpus();
 }
 
 static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
                                     unsigned long action, void *hcpu)
 {
-       unsigned int cpu = (unsigned long)hcpu;
-
-       spin_lock(&bts_tracer_lock);
-
-       if (!trace_hw_branches_enabled)
-               goto out;
+       int cpu = (long)hcpu;
 
        switch (action) {
        case CPU_ONLINE:
        case CPU_DOWN_FAILED:
-               smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
+               /* The notification is sent with interrupts enabled. */
+               if (trace_hw_branches_enabled) {
+                       bts_trace_init_cpu(cpu);
+
+                       if (trace_hw_branches_suspended &&
+                           likely(per_cpu(tracer, cpu)))
+                               ds_suspend_bts(per_cpu(tracer, cpu));
+               }
                break;
+
        case CPU_DOWN_PREPARE:
-               smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
-               break;
+               /* The notification is sent with interrupts enabled. */
+               if (likely(per_cpu(tracer, cpu))) {
+                       ds_release_bts(per_cpu(tracer, cpu));
+                       per_cpu(tracer, cpu) = NULL;
+               }
        }
 
- out:
-       spin_unlock(&bts_tracer_lock);
        return NOTIFY_DONE;
 }
 
@@ -126,20 +134,6 @@ static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
        .notifier_call = bts_hotcpu_handler
 };
 
-static int bts_trace_init(struct trace_array *tr)
-{
-       hw_branch_trace = tr;
-
-       bts_trace_start(tr);
-
-       return 0;
-}
-
-static void bts_trace_reset(struct trace_array *tr)
-{
-       bts_trace_stop(tr);
-}
-
 static void bts_trace_print_header(struct seq_file *m)
 {
        seq_puts(m, "# CPU#        TO  <-  FROM\n");
@@ -147,10 +141,10 @@ static void bts_trace_print_header(struct seq_file *m)
 
 static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 {
+       unsigned long symflags = TRACE_ITER_SYM_OFFSET;
        struct trace_entry *entry = iter->ent;
        struct trace_seq *seq = &iter->seq;
        struct hw_branch_entry *it;
-       unsigned long symflags = TRACE_ITER_SYM_OFFSET;
 
        trace_assign_type(it, entry);
 
@@ -168,6 +162,7 @@ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
 
 void trace_hw_branch(u64 from, u64 to)
 {
+       struct ftrace_event_call *call = &event_hw_branch;
        struct trace_array *tr = hw_branch_trace;
        struct ring_buffer_event *event;
        struct hw_branch_entry *entry;
@@ -194,7 +189,8 @@ void trace_hw_branch(u64 from, u64 to)
        entry->ent.type = TRACE_HW_BRANCHES;
        entry->from = from;
        entry->to   = to;
-       trace_buffer_unlock_commit(tr, event, 0, 0);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               trace_buffer_unlock_commit(tr, event, 0, 0);
 
  out:
        atomic_dec(&tr->data[cpu]->disabled);
@@ -224,11 +220,11 @@ static void trace_bts_at(const struct bts_trace *trace, void *at)
 /*
  * Collect the trace on the current cpu and write it into the ftrace buffer.
  *
- * pre: bts_tracer_lock must be locked
+ * pre: tracing must be suspended on the current cpu
  */
 static void trace_bts_cpu(void *arg)
 {
-       struct trace_array *tr = (struct trace_array *) arg;
+       struct trace_array *tr = (struct trace_array *)arg;
        const struct bts_trace *trace;
        unsigned char *at;
 
@@ -241,10 +237,9 @@ static void trace_bts_cpu(void *arg)
        if (unlikely(!this_tracer))
                return;
 
-       ds_suspend_bts(this_tracer);
        trace = ds_read_bts(this_tracer);
        if (!trace)
-               goto out;
+               return;
 
        for (at = trace->ds.top; (void *)at < trace->ds.end;
             at += trace->ds.size)
@@ -253,18 +248,27 @@ static void trace_bts_cpu(void *arg)
        for (at = trace->ds.begin; (void *)at < trace->ds.top;
             at += trace->ds.size)
                trace_bts_at(trace, at);
-
-out:
-       ds_resume_bts(this_tracer);
 }
 
 static void trace_bts_prepare(struct trace_iterator *iter)
 {
-       spin_lock(&bts_tracer_lock);
+       int cpu;
 
+       get_online_cpus();
+       for_each_online_cpu(cpu)
+               if (likely(per_cpu(tracer, cpu)))
+                       ds_suspend_bts(per_cpu(tracer, cpu));
+       /*
+        * We need to collect the trace on the respective cpu since ftrace
+        * implicitly adds the record for the current cpu.
+        * Once that is more flexible, we could collect the data from any cpu.
+        */
        on_each_cpu(trace_bts_cpu, iter->tr, 1);
 
-       spin_unlock(&bts_tracer_lock);
+       for_each_online_cpu(cpu)
+               if (likely(per_cpu(tracer, cpu)))
+                       ds_resume_bts(per_cpu(tracer, cpu));
+       put_online_cpus();
 }
 
 static void trace_bts_close(struct trace_iterator *iter)
@@ -274,11 +278,11 @@ static void trace_bts_close(struct trace_iterator *iter)
 
 void trace_hw_branch_oops(void)
 {
-       spin_lock(&bts_tracer_lock);
-
-       trace_bts_cpu(hw_branch_trace);
-
-       spin_unlock(&bts_tracer_lock);
+       if (this_tracer) {
+               ds_suspend_bts_noirq(this_tracer);
+               trace_bts_cpu(hw_branch_trace);
+               ds_resume_bts_noirq(this_tracer);
+       }
 }
 
 struct tracer bts_tracer __read_mostly =
@@ -291,7 +295,10 @@ struct tracer bts_tracer __read_mostly =
        .start          = bts_trace_start,
        .stop           = bts_trace_stop,
        .open           = trace_bts_prepare,
-       .close          = trace_bts_close
+       .close          = trace_bts_close,
+#ifdef CONFIG_FTRACE_SELFTEST
+       .selftest       = trace_selftest_startup_hw_branches,
+#endif /* CONFIG_FTRACE_SELFTEST */
 };
 
 __init static int init_bts_trace(void)
index 8e37fcd..d53b45e 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/kernel.h>
 #include <linux/mmiotrace.h>
 #include <linux/pci.h>
+#include <linux/time.h>
+
 #include <asm/atomic.h>
 
 #include "trace.h"
@@ -174,7 +176,7 @@ static enum print_line_t mmio_print_rw(struct trace_iterator *iter)
        struct mmiotrace_rw *rw;
        struct trace_seq *s     = &iter->seq;
        unsigned long long t    = ns2usecs(iter->ts);
-       unsigned long usec_rem  = do_div(t, 1000000ULL);
+       unsigned long usec_rem  = do_div(t, USEC_PER_SEC);
        unsigned secs           = (unsigned long)t;
        int ret = 1;
 
@@ -221,7 +223,7 @@ static enum print_line_t mmio_print_map(struct trace_iterator *iter)
        struct mmiotrace_map *m;
        struct trace_seq *s     = &iter->seq;
        unsigned long long t    = ns2usecs(iter->ts);
-       unsigned long usec_rem  = do_div(t, 1000000ULL);
+       unsigned long usec_rem  = do_div(t, USEC_PER_SEC);
        unsigned secs           = (unsigned long)t;
        int ret;
 
index 64b54a5..7938f3a 100644 (file)
 /* must be a power of 2 */
 #define EVENT_HASHSIZE 128
 
-static DEFINE_MUTEX(trace_event_mutex);
+DECLARE_RWSEM(trace_event_mutex);
+
+DEFINE_PER_CPU(struct trace_seq, ftrace_event_seq);
+EXPORT_PER_CPU_SYMBOL(ftrace_event_seq);
+
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
 static int next_event_type = __TRACE_LAST_TYPE + 1;
 
+void trace_print_seq(struct seq_file *m, struct trace_seq *s)
+{
+       int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+
+       s->buffer[len] = 0;
+       seq_puts(m, s->buffer);
+
+       trace_seq_init(s);
+}
+
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
 {
        struct trace_seq *s = &iter->seq;
@@ -84,6 +98,39 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 
        return len;
 }
+EXPORT_SYMBOL_GPL(trace_seq_printf);
+
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+       int len = (PAGE_SIZE - 1) - s->len;
+       int ret;
+
+       if (!len)
+               return 0;
+
+       ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+       /* If we can't write it all, don't bother writing anything */
+       if (ret >= len)
+               return 0;
+
+       s->len += ret;
+
+       return len;
+}
+EXPORT_SYMBOL_GPL(trace_seq_vprintf);
 
 int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
@@ -201,6 +248,67 @@ int trace_seq_path(struct trace_seq *s, struct path *path)
        return 0;
 }
 
+const char *
+ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
+                      unsigned long flags,
+                      const struct trace_print_flags *flag_array)
+{
+       unsigned long mask;
+       const char *str;
+       const char *ret = p->buffer + p->len;
+       int i;
+
+       for (i = 0;  flag_array[i].name && flags; i++) {
+
+               mask = flag_array[i].mask;
+               if ((flags & mask) != mask)
+                       continue;
+
+               str = flag_array[i].name;
+               flags &= ~mask;
+               if (p->len && delim)
+                       trace_seq_puts(p, delim);
+               trace_seq_puts(p, str);
+       }
+
+       /* check for left over flags */
+       if (flags) {
+               if (p->len && delim)
+                       trace_seq_puts(p, delim);
+               trace_seq_printf(p, "0x%lx", flags);
+       }
+
+       trace_seq_putc(p, 0);
+
+       return ret;
+}
+EXPORT_SYMBOL(ftrace_print_flags_seq);
+
+const char *
+ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
+                        const struct trace_print_flags *symbol_array)
+{
+       int i;
+       const char *ret = p->buffer + p->len;
+
+       for (i = 0;  symbol_array[i].name; i++) {
+
+               if (val != symbol_array[i].mask)
+                       continue;
+
+               trace_seq_puts(p, symbol_array[i].name);
+               break;
+       }
+
+       if (!p->len)
+               trace_seq_printf(p, "0x%lx", val);
+               
+       trace_seq_putc(p, 0);
+
+       return ret;
+}
+EXPORT_SYMBOL(ftrace_print_symbols_seq);
+
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
@@ -311,17 +419,20 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 
                if (ip == ULONG_MAX || !ret)
                        break;
-               if (i && ret)
-                       ret = trace_seq_puts(s, " <- ");
+               if (ret)
+                       ret = trace_seq_puts(s, " => ");
                if (!ip) {
                        if (ret)
                                ret = trace_seq_puts(s, "??");
+                       if (ret)
+                               ret = trace_seq_puts(s, "\n");
                        continue;
                }
                if (!ret)
                        break;
                if (ret)
                        ret = seq_print_user_ip(s, mm, ip, sym_flags);
+               ret = trace_seq_puts(s, "\n");
        }
 
        if (mm)
@@ -455,6 +566,7 @@ static int task_state_char(unsigned long state)
  * @type: the type of event to look for
  *
  * Returns an event of type @type otherwise NULL
+ * Called with trace_event_read_lock() held.
  */
 struct trace_event *ftrace_find_event(int type)
 {
@@ -464,7 +576,7 @@ struct trace_event *ftrace_find_event(int type)
 
        key = type & (EVENT_HASHSIZE - 1);
 
-       hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
+       hlist_for_each_entry(event, n, &event_hash[key], node) {
                if (event->type == type)
                        return event;
        }
@@ -472,6 +584,46 @@ struct trace_event *ftrace_find_event(int type)
        return NULL;
 }
 
+static LIST_HEAD(ftrace_event_list);
+
+static int trace_search_list(struct list_head **list)
+{
+       struct trace_event *e;
+       int last = __TRACE_LAST_TYPE;
+
+       if (list_empty(&ftrace_event_list)) {
+               *list = &ftrace_event_list;
+               return last + 1;
+       }
+
+       /*
+        * We used up all possible max events,
+        * lets see if somebody freed one.
+        */
+       list_for_each_entry(e, &ftrace_event_list, list) {
+               if (e->type != last + 1)
+                       break;
+               last++;
+       }
+
+       /* Did we used up all 65 thousand events??? */
+       if ((last + 1) > FTRACE_MAX_EVENT)
+               return 0;
+
+       *list = &e->list;
+       return last + 1;
+}
+
+void trace_event_read_lock(void)
+{
+       down_read(&trace_event_mutex);
+}
+
+void trace_event_read_unlock(void)
+{
+       up_read(&trace_event_mutex);
+}
+
 /**
  * register_ftrace_event - register output for an event type
  * @event: the event type to register
@@ -492,22 +644,42 @@ int register_ftrace_event(struct trace_event *event)
        unsigned key;
        int ret = 0;
 
-       mutex_lock(&trace_event_mutex);
+       down_write(&trace_event_mutex);
 
-       if (!event) {
-               ret = next_event_type++;
+       if (WARN_ON(!event))
                goto out;
-       }
 
-       if (!event->type)
-               event->type = next_event_type++;
-       else if (event->type > __TRACE_LAST_TYPE) {
+       INIT_LIST_HEAD(&event->list);
+
+       if (!event->type) {
+               struct list_head *list = NULL;
+
+               if (next_event_type > FTRACE_MAX_EVENT) {
+
+                       event->type = trace_search_list(&list);
+                       if (!event->type)
+                               goto out;
+
+               } else {
+                       
+                       event->type = next_event_type++;
+                       list = &ftrace_event_list;
+               }
+
+               if (WARN_ON(ftrace_find_event(event->type)))
+                       goto out;
+
+               list_add_tail(&event->list, list);
+
+       } else if (event->type > __TRACE_LAST_TYPE) {
                printk(KERN_WARNING "Need to add type to trace.h\n");
                WARN_ON(1);
-       }
-
-       if (ftrace_find_event(event->type))
                goto out;
+       } else {
+               /* Is this event already used */
+               if (ftrace_find_event(event->type))
+                       goto out;
+       }
 
        if (event->trace == NULL)
                event->trace = trace_nop_print;
@@ -520,14 +692,25 @@ int register_ftrace_event(struct trace_event *event)
 
        key = event->type & (EVENT_HASHSIZE - 1);
 
-       hlist_add_head_rcu(&event->node, &event_hash[key]);
+       hlist_add_head(&event->node, &event_hash[key]);
 
        ret = event->type;
  out:
-       mutex_unlock(&trace_event_mutex);
+       up_write(&trace_event_mutex);
 
        return ret;
 }
+EXPORT_SYMBOL_GPL(register_ftrace_event);
+
+/*
+ * Used by module code with the trace_event_mutex held for write.
+ */
+int __unregister_ftrace_event(struct trace_event *event)
+{
+       hlist_del(&event->node);
+       list_del(&event->list);
+       return 0;
+}
 
 /**
  * unregister_ftrace_event - remove a no longer used event
@@ -535,12 +718,13 @@ int register_ftrace_event(struct trace_event *event)
  */
 int unregister_ftrace_event(struct trace_event *event)
 {
-       mutex_lock(&trace_event_mutex);
-       hlist_del(&event->node);
-       mutex_unlock(&trace_event_mutex);
+       down_write(&trace_event_mutex);
+       __unregister_ftrace_event(event);
+       up_write(&trace_event_mutex);
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(unregister_ftrace_event);
 
 /*
  * Standard events
@@ -833,14 +1017,16 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 
        trace_assign_type(field, iter->ent);
 
+       if (!trace_seq_puts(s, "<stack trace>\n"))
+               goto partial;
        for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
-               if (i) {
-                       if (!trace_seq_puts(s, " <= "))
-                               goto partial;
+               if (!field->caller[i] || (field->caller[i] == ULONG_MAX))
+                       break;
+               if (!trace_seq_puts(s, " => "))
+                       goto partial;
 
-                       if (!seq_print_ip_sym(s, field->caller[i], flags))
-                               goto partial;
-               }
+               if (!seq_print_ip_sym(s, field->caller[i], flags))
+                       goto partial;
                if (!trace_seq_puts(s, "\n"))
                        goto partial;
        }
@@ -868,10 +1054,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
 
        trace_assign_type(field, iter->ent);
 
-       if (!seq_print_userip_objs(field, s, flags))
+       if (!trace_seq_puts(s, "<user stack trace>\n"))
                goto partial;
 
-       if (!trace_seq_putc(s, '\n'))
+       if (!seq_print_userip_objs(field, s, flags))
                goto partial;
 
        return TRACE_TYPE_HANDLED;
index e0bde39..d38bec4 100644 (file)
@@ -1,41 +1,17 @@
 #ifndef __TRACE_EVENTS_H
 #define __TRACE_EVENTS_H
 
+#include <linux/trace_seq.h>
 #include "trace.h"
 
-typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
-                                             int flags);
-
-struct trace_event {
-       struct hlist_node       node;
-       int                     type;
-       trace_print_func        trace;
-       trace_print_func        raw;
-       trace_print_func        hex;
-       trace_print_func        binary;
-};
-
 extern enum print_line_t
 trace_print_bprintk_msg_only(struct trace_iterator *iter);
 extern enum print_line_t
 trace_print_printk_msg_only(struct trace_iterator *iter);
 
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-       __attribute__ ((format (printf, 2, 3)));
-extern int
-trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
 extern int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
                unsigned long sym_flags);
-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
-                                size_t cnt);
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-extern int trace_seq_putmem(struct trace_seq *s, const void *mem, size_t len);
-extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
-                               size_t len);
-extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
-extern int trace_seq_path(struct trace_seq *s, struct path *path);
 extern int seq_print_userip_objs(const struct userstack_entry *entry,
                                 struct trace_seq *s, unsigned long sym_flags);
 extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
@@ -44,13 +20,17 @@ extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
 extern int trace_print_context(struct trace_iterator *iter);
 extern int trace_print_lat_context(struct trace_iterator *iter);
 
+extern void trace_event_read_lock(void);
+extern void trace_event_read_unlock(void);
 extern struct trace_event *ftrace_find_event(int type);
-extern int register_ftrace_event(struct trace_event *event);
-extern int unregister_ftrace_event(struct trace_event *event);
 
 extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
                                         int flags);
 
+/* used by module unregistering */
+extern int __unregister_ftrace_event(struct trace_event *event);
+extern struct rw_semaphore trace_event_mutex;
+
 #define MAX_MEMHEX_BYTES       8
 #define HEX_CHARS              (MAX_MEMHEX_BYTES*2 + 1)
 
index 1184397..8a30d98 100644 (file)
@@ -36,6 +36,7 @@ static void probe_power_start(struct power_trace *it, unsigned int type,
 
 static void probe_power_end(struct power_trace *it)
 {
+       struct ftrace_event_call *call = &event_power;
        struct ring_buffer_event *event;
        struct trace_power *entry;
        struct trace_array_cpu *data;
@@ -54,7 +55,8 @@ static void probe_power_end(struct power_trace *it)
                goto out;
        entry   = ring_buffer_event_data(event);
        entry->state_data = *it;
-       trace_buffer_unlock_commit(tr, event, 0, 0);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
        preempt_enable();
 }
@@ -62,6 +64,7 @@ static void probe_power_end(struct power_trace *it)
 static void probe_power_mark(struct power_trace *it, unsigned int type,
                                unsigned int level)
 {
+       struct ftrace_event_call *call = &event_power;
        struct ring_buffer_event *event;
        struct trace_power *entry;
        struct trace_array_cpu *data;
@@ -84,7 +87,8 @@ static void probe_power_mark(struct power_trace *it, unsigned int type,
                goto out;
        entry   = ring_buffer_event_data(event);
        entry->state_data = *it;
-       trace_buffer_unlock_commit(tr, event, 0, 0);
+       if (!filter_check_discard(call, entry, tr->buffer, event))
+               trace_buffer_unlock_commit(tr, event, 0, 0);
  out:
        preempt_enable();
 }
index eb81556..9bece96 100644 (file)
@@ -245,17 +245,13 @@ static const struct file_operations ftrace_formats_fops = {
 static __init int init_trace_printk_function_export(void)
 {
        struct dentry *d_tracer;
-       struct dentry *entry;
 
        d_tracer = tracing_init_dentry();
        if (!d_tracer)
                return 0;
 
-       entry = debugfs_create_file("printk_formats", 0444, d_tracer,
+       trace_create_file("printk_formats", 0444, d_tracer,
                                    NULL, &ftrace_formats_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs "
-                          "'printk_formats' entry\n");
 
        return 0;
 }
index 9117cea..a98106d 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
@@ -29,13 +29,13 @@ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
        int cpu;
        int pc;
 
-       if (!sched_ref || sched_stopped)
+       if (unlikely(!sched_ref))
                return;
 
        tracing_record_cmdline(prev);
        tracing_record_cmdline(next);
 
-       if (!tracer_enabled)
+       if (!tracer_enabled || sched_stopped)
                return;
 
        pc = preempt_count();
@@ -56,15 +56,15 @@ probe_sched_wakeup(struct rq *__rq, struct task_struct *wakee, int success)
        unsigned long flags;
        int cpu, pc;
 
-       if (!likely(tracer_enabled))
+       if (unlikely(!sched_ref))
                return;
 
-       pc = preempt_count();
        tracing_record_cmdline(current);
 
-       if (sched_stopped)
+       if (!tracer_enabled || sched_stopped)
                return;
 
+       pc = preempt_count();
        local_irq_save(flags);
        cpu = raw_smp_processor_id();
        data = ctx_trace->data[cpu];
index 5bc00e8..eacb272 100644 (file)
@@ -15,7 +15,7 @@
 #include <linux/kallsyms.h>
 #include <linux/uaccess.h>
 #include <linux/ftrace.h>
-#include <trace/sched.h>
+#include <trace/events/sched.h>
 
 #include "trace.h"
 
@@ -138,9 +138,6 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
 
        pc = preempt_count();
 
-       /* The task we are waiting for is waking up */
-       data = wakeup_trace->data[wakeup_cpu];
-
        /* disable local data, not wakeup_cpu data */
        cpu = raw_smp_processor_id();
        disabled = atomic_inc_return(&wakeup_trace->data[cpu]->disabled);
@@ -154,6 +151,9 @@ probe_wakeup_sched_switch(struct rq *rq, struct task_struct *prev,
        if (unlikely(!tracer_enabled || next != wakeup_task))
                goto out_unlock;
 
+       /* The task we are waiting for is waking up */
+       data = wakeup_trace->data[wakeup_cpu];
+
        trace_function(wakeup_trace, CALLER_ADDR0, CALLER_ADDR1, flags, pc);
        tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
 
index 08f4eb2..00dd648 100644 (file)
@@ -16,6 +16,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
        case TRACE_BRANCH:
        case TRACE_GRAPH_ENT:
        case TRACE_GRAPH_RET:
+       case TRACE_HW_BRANCHES:
                return 1;
        }
        return 0;
@@ -188,6 +189,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
 #else
 # define trace_selftest_startup_dynamic_tracing(trace, tr, func) ({ 0; })
 #endif /* CONFIG_DYNAMIC_FTRACE */
+
 /*
  * Simple verification test of ftrace function tracer.
  * Enable ftrace, sleep 1/10 second, and then read the trace
@@ -749,3 +751,59 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
        return ret;
 }
 #endif /* CONFIG_BRANCH_TRACER */
+
+#ifdef CONFIG_HW_BRANCH_TRACER
+int
+trace_selftest_startup_hw_branches(struct tracer *trace,
+                                  struct trace_array *tr)
+{
+       struct trace_iterator *iter;
+       struct tracer tracer;
+       unsigned long count;
+       int ret;
+
+       if (!trace->open) {
+               printk(KERN_CONT "missing open function...");
+               return -1;
+       }
+
+       ret = tracer_init(trace, tr);
+       if (ret) {
+               warn_failed_init_tracer(trace, ret);
+               return ret;
+       }
+
+       /*
+        * The hw-branch tracer needs to collect the trace from the various
+        * cpu trace buffers - before tracing is stopped.
+        */
+       iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+       if (!iter)
+               return -ENOMEM;
+
+       memcpy(&tracer, trace, sizeof(tracer));
+
+       iter->trace = &tracer;
+       iter->tr = tr;
+       iter->pos = -1;
+       mutex_init(&iter->mutex);
+
+       trace->open(iter);
+
+       mutex_destroy(&iter->mutex);
+       kfree(iter);
+
+       tracing_stop();
+
+       ret = trace_test_buffer(tr, &count);
+       trace->reset(tr);
+       tracing_start();
+
+       if (!ret && !count) {
+               printk(KERN_CONT "no entries found..");
+               ret = -1;
+       }
+
+       return ret;
+}
+#endif /* CONFIG_HW_BRANCH_TRACER */
index c750f65..2d7aebd 100644 (file)
@@ -265,7 +265,7 @@ static int t_show(struct seq_file *m, void *v)
                seq_printf(m, "        Depth    Size   Location"
                           "    (%d entries)\n"
                           "        -----    ----   --------\n",
-                          max_stack_trace.nr_entries);
+                          max_stack_trace.nr_entries - 1);
 
                if (!stack_tracer_enabled && !max_stack_size)
                        print_disabled(m);
@@ -352,19 +352,14 @@ __setup("stacktrace", enable_stacktrace);
 static __init int stack_trace_init(void)
 {
        struct dentry *d_tracer;
-       struct dentry *entry;
 
        d_tracer = tracing_init_dentry();
 
-       entry = debugfs_create_file("stack_max_size", 0644, d_tracer,
-                                   &max_stack_size, &stack_max_size_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'stack_max_size' entry\n");
+       trace_create_file("stack_max_size", 0644, d_tracer,
+                       &max_stack_size, &stack_max_size_fops);
 
-       entry = debugfs_create_file("stack_trace", 0444, d_tracer,
-                                   NULL, &stack_trace_fops);
-       if (!entry)
-               pr_warning("Could not create debugfs 'stack_trace' entry\n");
+       trace_create_file("stack_trace", 0444, d_tracer,
+                       NULL, &stack_trace_fops);
 
        if (stack_tracer_enabled)
                register_ftrace_function(&trace_ops);
index acdebd7..c006437 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * Infrastructure for statistic tracing (histogram output).
  *
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
  *
  * Based on the code from trace_branch.c which is
  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
 
 
 #include <linux/list.h>
+#include <linux/rbtree.h>
 #include <linux/debugfs.h>
 #include "trace_stat.h"
 #include "trace.h"
 
 
-/* List of stat entries from a tracer */
-struct trace_stat_list {
-       struct list_head        list;
+/*
+ * List of stat red-black nodes from a tracer
+ * We use a such tree to sort quickly the stat
+ * entries from the tracer.
+ */
+struct stat_node {
+       struct rb_node          node;
        void                    *stat;
 };
 
 /* A stat session is the stats output in one file */
-struct tracer_stat_session {
+struct stat_session {
        struct list_head        session_list;
        struct tracer_stat      *ts;
-       struct list_head        stat_list;
+       struct rb_root          stat_root;
        struct mutex            stat_mutex;
        struct dentry           *file;
 };
@@ -37,18 +42,48 @@ static DEFINE_MUTEX(all_stat_sessions_mutex);
 /* The root directory for all stat files */
 static struct dentry           *stat_dir;
 
+/*
+ * Iterate through the rbtree using a post order traversal path
+ * to release the next node.
+ * It won't necessary release one at each iteration
+ * but it will at least advance closer to the next one
+ * to be released.
+ */
+static struct rb_node *release_next(struct rb_node *node)
+{
+       struct stat_node *snode;
+       struct rb_node *parent = rb_parent(node);
+
+       if (node->rb_left)
+               return node->rb_left;
+       else if (node->rb_right)
+               return node->rb_right;
+       else {
+               if (!parent)
+                       ;
+               else if (parent->rb_left == node)
+                       parent->rb_left = NULL;
+               else
+                       parent->rb_right = NULL;
+
+               snode = container_of(node, struct stat_node, node);
+               kfree(snode);
+
+               return parent;
+       }
+}
 
-static void reset_stat_session(struct tracer_stat_session *session)
+static void reset_stat_session(struct stat_session *session)
 {
-       struct trace_stat_list *node, *next;
+       struct rb_node *node = session->stat_root.rb_node;
 
-       list_for_each_entry_safe(node, next, &session->stat_list, list)
-               kfree(node);
+       while (node)
+               node = release_next(node);
 
-       INIT_LIST_HEAD(&session->stat_list);
+       session->stat_root = RB_ROOT;
 }
 
-static void destroy_session(struct tracer_stat_session *session)
+static void destroy_session(struct stat_session *session)
 {
        debugfs_remove(session->file);
        reset_stat_session(session);
@@ -56,25 +91,60 @@ static void destroy_session(struct tracer_stat_session *session)
        kfree(session);
 }
 
+typedef int (*cmp_stat_t)(void *, void *);
+
+static int insert_stat(struct rb_root *root, void *stat, cmp_stat_t cmp)
+{
+       struct rb_node **new = &(root->rb_node), *parent = NULL;
+       struct stat_node *data;
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+       data->stat = stat;
+
+       /*
+        * Figure out where to put new node
+        * This is a descendent sorting
+        */
+       while (*new) {
+               struct stat_node *this;
+               int result;
+
+               this = container_of(*new, struct stat_node, node);
+               result = cmp(data->stat, this->stat);
+
+               parent = *new;
+               if (result >= 0)
+                       new = &((*new)->rb_left);
+               else
+                       new = &((*new)->rb_right);
+       }
+
+       rb_link_node(&data->node, parent, new);
+       rb_insert_color(&data->node, root);
+       return 0;
+}
+
 /*
  * For tracers that don't provide a stat_cmp callback.
- * This one will force an immediate insertion on tail of
- * the list.
+ * This one will force an insertion as right-most node
+ * in the rbtree.
  */
 static int dummy_cmp(void *p1, void *p2)
 {
-       return 1;
+       return -1;
 }
 
 /*
- * Initialize the stat list at each trace_stat file opening.
+ * Initialize the stat rbtree at each trace_stat file opening.
  * All of these copies and sorting are required on all opening
  * since the stats could have changed between two file sessions.
  */
-static int stat_seq_init(struct tracer_stat_session *session)
+static int stat_seq_init(struct stat_session *session)
 {
-       struct trace_stat_list *iter_entry, *new_entry;
        struct tracer_stat *ts = session->ts;
+       struct rb_root *root = &session->stat_root;
        void *stat;
        int ret = 0;
        int i;
@@ -85,29 +155,16 @@ static int stat_seq_init(struct tracer_stat_session *session)
        if (!ts->stat_cmp)
                ts->stat_cmp = dummy_cmp;
 
-       stat = ts->stat_start();
+       stat = ts->stat_start(ts);
        if (!stat)
                goto exit;
 
-       /*
-        * The first entry. Actually this is the second, but the first
-        * one (the stat_list head) is pointless.
-        */
-       new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
-       if (!new_entry) {
-               ret = -ENOMEM;
+       ret = insert_stat(root, stat, ts->stat_cmp);
+       if (ret)
                goto exit;
-       }
-
-       INIT_LIST_HEAD(&new_entry->list);
-
-       list_add(&new_entry->list, &session->stat_list);
-
-       new_entry->stat = stat;
 
        /*
-        * Iterate over the tracer stat entries and store them in a sorted
-        * list.
+        * Iterate over the tracer stat entries and store them in an rbtree.
         */
        for (i = 1; ; i++) {
                stat = ts->stat_next(stat, i);
@@ -116,36 +173,16 @@ static int stat_seq_init(struct tracer_stat_session *session)
                if (!stat)
                        break;
 
-               new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
-               if (!new_entry) {
-                       ret = -ENOMEM;
-                       goto exit_free_list;
-               }
-
-               INIT_LIST_HEAD(&new_entry->list);
-               new_entry->stat = stat;
-
-               list_for_each_entry_reverse(iter_entry, &session->stat_list,
-                               list) {
-
-                       /* Insertion with a descendent sorting */
-                       if (ts->stat_cmp(iter_entry->stat,
-                                       new_entry->stat) >= 0) {
-
-                               list_add(&new_entry->list, &iter_entry->list);
-                               break;
-                       }
-               }
-
-               /* The current larger value */
-               if (list_empty(&new_entry->list))
-                       list_add(&new_entry->list, &session->stat_list);
+               ret = insert_stat(root, stat, ts->stat_cmp);
+               if (ret)
+                       goto exit_free_rbtree;
        }
+
 exit:
        mutex_unlock(&session->stat_mutex);
        return ret;
 
-exit_free_list:
+exit_free_rbtree:
        reset_stat_session(session);
        mutex_unlock(&session->stat_mutex);
        return ret;
@@ -154,38 +191,51 @@ exit_free_list:
 
 static void *stat_seq_start(struct seq_file *s, loff_t *pos)
 {
-       struct tracer_stat_session *session = s->private;
+       struct stat_session *session = s->private;
+       struct rb_node *node;
+       int i;
 
-       /* Prevent from tracer switch or stat_list modification */
+       /* Prevent from tracer switch or rbtree modification */
        mutex_lock(&session->stat_mutex);
 
        /* If we are in the beginning of the file, print the headers */
-       if (!*pos && session->ts->stat_headers)
+       if (!*pos && session->ts->stat_headers) {
+               (*pos)++;
                return SEQ_START_TOKEN;
+       }
 
-       return seq_list_start(&session->stat_list, *pos);
+       node = rb_first(&session->stat_root);
+       for (i = 0; node && i < *pos; i++)
+               node = rb_next(node);
+
+       (*pos)++;
+
+       return node;
 }
 
 static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
 {
-       struct tracer_stat_session *session = s->private;
+       struct stat_session *session = s->private;
+       struct rb_node *node = p;
+
+       (*pos)++;
 
        if (p == SEQ_START_TOKEN)
-               return seq_list_start(&session->stat_list, *pos);
+               return rb_first(&session->stat_root);
 
-       return seq_list_next(p, &session->stat_list, pos);
+       return rb_next(node);
 }
 
 static void stat_seq_stop(struct seq_file *s, void *p)
 {
-       struct tracer_stat_session *session = s->private;
+       struct stat_session *session = s->private;
        mutex_unlock(&session->stat_mutex);
 }
 
 static int stat_seq_show(struct seq_file *s, void *v)
 {
-       struct tracer_stat_session *session = s->private;
-       struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
+       struct stat_session *session = s->private;
+       struct stat_node *l = container_of(v, struct stat_node, node);
 
        if (v == SEQ_START_TOKEN)
                return session->ts->stat_headers(s);
@@ -205,7 +255,7 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
 {
        int ret;
 
-       struct tracer_stat_session *session = inode->i_private;
+       struct stat_session *session = inode->i_private;
 
        ret = seq_open(file, &trace_stat_seq_ops);
        if (!ret) {
@@ -218,11 +268,11 @@ static int tracing_stat_open(struct inode *inode, struct file *file)
 }
 
 /*
- * Avoid consuming memory with our now useless list.
+ * Avoid consuming memory with our now useless rbtree.
  */
 static int tracing_stat_release(struct inode *i, struct file *f)
 {
-       struct tracer_stat_session *session = i->i_private;
+       struct stat_session *session = i->i_private;
 
        mutex_lock(&session->stat_mutex);
        reset_stat_session(session);
@@ -251,7 +301,7 @@ static int tracing_stat_init(void)
        return 0;
 }
 
-static int init_stat_file(struct tracer_stat_session *session)
+static int init_stat_file(struct stat_session *session)
 {
        if (!stat_dir && tracing_stat_init())
                return -ENODEV;
@@ -266,7 +316,7 @@ static int init_stat_file(struct tracer_stat_session *session)
 
 int register_stat_tracer(struct tracer_stat *trace)
 {
-       struct tracer_stat_session *session, *node, *tmp;
+       struct stat_session *session, *node;
        int ret;
 
        if (!trace)
@@ -277,7 +327,7 @@ int register_stat_tracer(struct tracer_stat *trace)
 
        /* Already registered? */
        mutex_lock(&all_stat_sessions_mutex);
-       list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
+       list_for_each_entry(node, &all_stat_sessions, session_list) {
                if (node->ts == trace) {
                        mutex_unlock(&all_stat_sessions_mutex);
                        return -EINVAL;
@@ -286,15 +336,13 @@ int register_stat_tracer(struct tracer_stat *trace)
        mutex_unlock(&all_stat_sessions_mutex);
 
        /* Init the session */
-       session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
+       session = kzalloc(sizeof(*session), GFP_KERNEL);
        if (!session)
                return -ENOMEM;
 
        session->ts = trace;
        INIT_LIST_HEAD(&session->session_list);
-       INIT_LIST_HEAD(&session->stat_list);
        mutex_init(&session->stat_mutex);
-       session->file = NULL;
 
        ret = init_stat_file(session);
        if (ret) {
@@ -312,7 +360,7 @@ int register_stat_tracer(struct tracer_stat *trace)
 
 void unregister_stat_tracer(struct tracer_stat *trace)
 {
-       struct tracer_stat_session *node, *tmp;
+       struct stat_session *node, *tmp;
 
        mutex_lock(&all_stat_sessions_mutex);
        list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
index 202274c..f3546a2 100644 (file)
@@ -12,7 +12,7 @@ struct tracer_stat {
        /* The name of your stat file */
        const char              *name;
        /* Iteration over statistic entries */
-       void                    *(*stat_start)(void);
+       void                    *(*stat_start)(struct tracer_stat *trace);
        void                    *(*stat_next)(void *prev, int idx);
        /* Compare two entries for stats sorting */
        int                     (*stat_cmp)(void *p1, void *p2);
index 91fd19c..e04b76c 100644 (file)
@@ -321,11 +321,7 @@ static const struct file_operations sysprof_sample_fops = {
 
 void init_tracer_sysprof_debugfs(struct dentry *d_tracer)
 {
-       struct dentry *entry;
 
-       entry = debugfs_create_file("sysprof_sample_period", 0644,
+       trace_create_file("sysprof_sample_period", 0644,
                        d_tracer, NULL, &sysprof_sample_fops);
-       if (entry)
-               return;
-       pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
 }
index 797201e..97fcea4 100644 (file)
@@ -6,7 +6,7 @@
  */
 
 
-#include <trace/workqueue.h>
+#include <trace/events/workqueue.h>
 #include <linux/list.h>
 #include <linux/percpu.h>
 #include "trace_stat.h"
@@ -16,8 +16,6 @@
 /* A cpu workqueue thread */
 struct cpu_workqueue_stats {
        struct list_head            list;
-/* Useful to know if we print the cpu headers */
-       bool                        first_entry;
        int                         cpu;
        pid_t                       pid;
 /* Can be inserted from interrupt or user context, need to be atomic */
@@ -47,12 +45,11 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
                          struct work_struct *work)
 {
        int cpu = cpumask_first(&wq_thread->cpus_allowed);
-       struct cpu_workqueue_stats *node, *next;
+       struct cpu_workqueue_stats *node;
        unsigned long flags;
 
        spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-       list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-                                                       list) {
+       list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
                if (node->pid == wq_thread->pid) {
                        atomic_inc(&node->inserted);
                        goto found;
@@ -69,12 +66,11 @@ probe_workqueue_execution(struct task_struct *wq_thread,
                          struct work_struct *work)
 {
        int cpu = cpumask_first(&wq_thread->cpus_allowed);
-       struct cpu_workqueue_stats *node, *next;
+       struct cpu_workqueue_stats *node;
        unsigned long flags;
 
        spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-       list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
-                                                       list) {
+       list_for_each_entry(node, &workqueue_cpu_stat(cpu)->list, list) {
                if (node->pid == wq_thread->pid) {
                        node->executed++;
                        goto found;
@@ -105,8 +101,6 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
        cws->pid = wq_thread->pid;
 
        spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-       if (list_empty(&workqueue_cpu_stat(cpu)->list))
-               cws->first_entry = true;
        list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
        spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
@@ -152,7 +146,7 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
        return ret;
 }
 
-static void *workqueue_stat_start(void)
+static void *workqueue_stat_start(struct tracer_stat *trace)
 {
        int cpu;
        void *ret = NULL;
@@ -191,16 +185,9 @@ static void *workqueue_stat_next(void *prev, int idx)
 static int workqueue_stat_show(struct seq_file *s, void *p)
 {
        struct cpu_workqueue_stats *cws = p;
-       unsigned long flags;
-       int cpu = cws->cpu;
        struct pid *pid;
        struct task_struct *tsk;
 
-       spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
-       if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
-               seq_printf(s, "\n");
-       spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
-
        pid = find_get_pid(cws->pid);
        if (pid) {
                tsk = get_pid_task(pid, PIDTYPE_PID);
index 42a2dbc..ea7c3b4 100644 (file)
@@ -154,7 +154,7 @@ void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait,
        if (!list_empty(&wait->task_list))
                list_del_init(&wait->task_list);
        else if (waitqueue_active(q))
-               __wake_up_common(q, mode, 1, 0, key);
+               __wake_up_locked_key(q, mode, key);
        spin_unlock_irqrestore(&q->lock, flags);
 }
 EXPORT_SYMBOL(abort_exclusive_wait);
index f71fb2a..0668795 100644 (file)
@@ -33,7 +33,8 @@
 #include <linux/kallsyms.h>
 #include <linux/debug_locks.h>
 #include <linux/lockdep.h>
-#include <trace/workqueue.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/workqueue.h>
 
 /*
  * The per-CPU workqueue (if single thread, we always use the first
@@ -124,8 +125,6 @@ struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
        return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
 }
 
-DEFINE_TRACE(workqueue_insertion);
-
 static void insert_work(struct cpu_workqueue_struct *cwq,
                        struct work_struct *work, struct list_head *head)
 {
@@ -262,8 +261,6 @@ int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work_on);
 
-DEFINE_TRACE(workqueue_execution);
-
 static void run_workqueue(struct cpu_workqueue_struct *cwq)
 {
        spin_lock_irq(&cwq->lock);
@@ -753,8 +750,6 @@ init_cpu_workqueue(struct workqueue_struct *wq, int cpu)
        return cwq;
 }
 
-DEFINE_TRACE(workqueue_creation);
-
 static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
 {
        struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
@@ -860,8 +855,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name,
 }
 EXPORT_SYMBOL_GPL(__create_workqueue_key);
 
-DEFINE_TRACE(workqueue_destruction);
-
 static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
 {
        /*
index 8ade0a7..9960be0 100644 (file)
@@ -10,6 +10,9 @@ menu "Library routines"
 config BITREVERSE
        tristate
 
+config RATIONAL
+       boolean
+
 config GENERIC_FIND_FIRST_BIT
        bool
 
index 33a40e4..1f6edef 100644 (file)
@@ -50,6 +50,7 @@ ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
 endif
 
 obj-$(CONFIG_BITREVERSE) += bitrev.o
+obj-$(CONFIG_RATIONAL) += rational.o
 obj-$(CONFIG_CRC_CCITT)        += crc-ccitt.o
 obj-$(CONFIG_CRC16)    += crc16.o
 obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o
index 1f71b97..eb23aaa 100644 (file)
@@ -119,6 +119,12 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
 }
 EXPORT_SYMBOL(alloc_cpumask_var_node);
 
+bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node)
+{
+       return alloc_cpumask_var_node(mask, flags | __GFP_ZERO, node);
+}
+EXPORT_SYMBOL(zalloc_cpumask_var_node);
+
 /**
  * alloc_cpumask_var - allocate a struct cpumask
  * @mask: pointer to cpumask_var_t where the cpumask is returned
@@ -135,6 +141,12 @@ bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 }
 EXPORT_SYMBOL(alloc_cpumask_var);
 
+bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+       return alloc_cpumask_var(mask, flags | __GFP_ZERO);
+}
+EXPORT_SYMBOL(zalloc_cpumask_var);
+
 /**
  * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena.
  * @mask: pointer to cpumask_var_t where the cpumask is returned
index 69da09a..ad65fc0 100644 (file)
 #include <linux/dma-debug.h>
 #include <linux/spinlock.h>
 #include <linux/debugfs.h>
+#include <linux/uaccess.h>
 #include <linux/device.h>
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/ctype.h>
 #include <linux/list.h>
 #include <linux/slab.h>
 
@@ -85,6 +87,7 @@ static u32 show_num_errors = 1;
 
 static u32 num_free_entries;
 static u32 min_free_entries;
+static u32 nr_total_entries;
 
 /* number of preallocated entries requested by kernel cmdline */
 static u32 req_entries;
@@ -97,6 +100,16 @@ static struct dentry *show_all_errors_dent  __read_mostly;
 static struct dentry *show_num_errors_dent  __read_mostly;
 static struct dentry *num_free_entries_dent __read_mostly;
 static struct dentry *min_free_entries_dent __read_mostly;
+static struct dentry *filter_dent           __read_mostly;
+
+/* per-driver filter related state */
+
+#define NAME_MAX_LEN   64
+
+static char                  current_driver_name[NAME_MAX_LEN] __read_mostly;
+static struct device_driver *current_driver                    __read_mostly;
+
+static DEFINE_RWLOCK(driver_name_lock);
 
 static const char *type2name[4] = { "single", "page",
                                    "scather-gather", "coherent" };
@@ -104,6 +117,11 @@ static const char *type2name[4] = { "single", "page",
 static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
                                   "DMA_FROM_DEVICE", "DMA_NONE" };
 
+/* little merge helper - remove it after the merge window */
+#ifndef BUS_NOTIFY_UNBOUND_DRIVER
+#define BUS_NOTIFY_UNBOUND_DRIVER 0x0005
+#endif
+
 /*
  * The access to some variables in this macro is racy. We can't use atomic_t
  * here because all these variables are exported to debugfs. Some of them even
@@ -121,15 +139,54 @@ static inline void dump_entry_trace(struct dma_debug_entry *entry)
 {
 #ifdef CONFIG_STACKTRACE
        if (entry) {
-               printk(KERN_WARNING "Mapped at:\n");
+               pr_warning("Mapped at:\n");
                print_stack_trace(&entry->stacktrace, 0);
        }
 #endif
 }
 
+static bool driver_filter(struct device *dev)
+{
+       struct device_driver *drv;
+       unsigned long flags;
+       bool ret;
+
+       /* driver filter off */
+       if (likely(!current_driver_name[0]))
+               return true;
+
+       /* driver filter on and initialized */
+       if (current_driver && dev->driver == current_driver)
+               return true;
+
+       if (current_driver || !current_driver_name[0])
+               return false;
+
+       /* driver filter on but not yet initialized */
+       drv = get_driver(dev->driver);
+       if (!drv)
+               return false;
+
+       /* lock to protect against change of current_driver_name */
+       read_lock_irqsave(&driver_name_lock, flags);
+
+       ret = false;
+       if (drv->name &&
+           strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
+               current_driver = drv;
+               ret = true;
+       }
+
+       read_unlock_irqrestore(&driver_name_lock, flags);
+       put_driver(drv);
+
+       return ret;
+}
+
 #define err_printk(dev, entry, format, arg...) do {            \
                error_count += 1;                               \
-               if (show_all_errors || show_num_errors > 0) {   \
+               if (driver_filter(dev) &&                       \
+                   (show_all_errors || show_num_errors > 0)) { \
                        WARN(1, "%s %s: " format,               \
                             dev_driver_string(dev),            \
                             dev_name(dev) , ## arg);           \
@@ -185,15 +242,50 @@ static void put_hash_bucket(struct hash_bucket *bucket,
 static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
                                                struct dma_debug_entry *ref)
 {
-       struct dma_debug_entry *entry;
+       struct dma_debug_entry *entry, *ret = NULL;
+       int matches = 0, match_lvl, last_lvl = 0;
 
        list_for_each_entry(entry, &bucket->list, list) {
-               if ((entry->dev_addr == ref->dev_addr) &&
-                   (entry->dev == ref->dev))
+               if ((entry->dev_addr != ref->dev_addr) ||
+                   (entry->dev != ref->dev))
+                       continue;
+
+               /*
+                * Some drivers map the same physical address multiple
+                * times. Without a hardware IOMMU this results in the
+                * same device addresses being put into the dma-debug
+                * hash multiple times too. This can result in false
+                * positives being reported. Therfore we implement a
+                * best-fit algorithm here which returns the entry from
+                * the hash which fits best to the reference value
+                * instead of the first-fit.
+                */
+               matches += 1;
+               match_lvl = 0;
+               entry->size      == ref->size      ? ++match_lvl : match_lvl;
+               entry->type      == ref->type      ? ++match_lvl : match_lvl;
+               entry->direction == ref->direction ? ++match_lvl : match_lvl;
+
+               if (match_lvl == 3) {
+                       /* perfect-fit - return the result */
                        return entry;
+               } else if (match_lvl > last_lvl) {
+                       /*
+                        * We found an entry that fits better then the
+                        * previous one
+                        */
+                       last_lvl = match_lvl;
+                       ret      = entry;
+               }
        }
 
-       return NULL;
+       /*
+        * If we have multiple matches but no perfect-fit, just return
+        * NULL.
+        */
+       ret = (matches == 1) ? ret : NULL;
+
+       return ret;
 }
 
 /*
@@ -257,6 +349,21 @@ static void add_dma_entry(struct dma_debug_entry *entry)
        put_hash_bucket(bucket, &flags);
 }
 
+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+       struct dma_debug_entry *entry;
+
+       entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+       list_del(&entry->list);
+       memset(entry, 0, sizeof(*entry));
+
+       num_free_entries -= 1;
+       if (num_free_entries < min_free_entries)
+               min_free_entries = num_free_entries;
+
+       return entry;
+}
+
 /* struct dma_entry allocator
  *
  * The next two functions implement the allocator for
@@ -270,15 +377,12 @@ static struct dma_debug_entry *dma_entry_alloc(void)
        spin_lock_irqsave(&free_entries_lock, flags);
 
        if (list_empty(&free_entries)) {
-               printk(KERN_ERR "DMA-API: debugging out of memory "
-                               "- disabling\n");
+               pr_err("DMA-API: debugging out of memory - disabling\n");
                global_disable = true;
                goto out;
        }
 
-       entry = list_entry(free_entries.next, struct dma_debug_entry, list);
-       list_del(&entry->list);
-       memset(entry, 0, sizeof(*entry));
+       entry = __dma_entry_alloc();
 
 #ifdef CONFIG_STACKTRACE
        entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
@@ -286,9 +390,6 @@ static struct dma_debug_entry *dma_entry_alloc(void)
        entry->stacktrace.skip = 2;
        save_stack_trace(&entry->stacktrace);
 #endif
-       num_free_entries -= 1;
-       if (num_free_entries < min_free_entries)
-               min_free_entries = num_free_entries;
 
 out:
        spin_unlock_irqrestore(&free_entries_lock, flags);
@@ -310,6 +411,53 @@ static void dma_entry_free(struct dma_debug_entry *entry)
        spin_unlock_irqrestore(&free_entries_lock, flags);
 }
 
+int dma_debug_resize_entries(u32 num_entries)
+{
+       int i, delta, ret = 0;
+       unsigned long flags;
+       struct dma_debug_entry *entry;
+       LIST_HEAD(tmp);
+
+       spin_lock_irqsave(&free_entries_lock, flags);
+
+       if (nr_total_entries < num_entries) {
+               delta = num_entries - nr_total_entries;
+
+               spin_unlock_irqrestore(&free_entries_lock, flags);
+
+               for (i = 0; i < delta; i++) {
+                       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+                       if (!entry)
+                               break;
+
+                       list_add_tail(&entry->list, &tmp);
+               }
+
+               spin_lock_irqsave(&free_entries_lock, flags);
+
+               list_splice(&tmp, &free_entries);
+               nr_total_entries += i;
+               num_free_entries += i;
+       } else {
+               delta = nr_total_entries - num_entries;
+
+               for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+                       entry = __dma_entry_alloc();
+                       kfree(entry);
+               }
+
+               nr_total_entries -= i;
+       }
+
+       if (nr_total_entries != num_entries)
+               ret = 1;
+
+       spin_unlock_irqrestore(&free_entries_lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(dma_debug_resize_entries);
+
 /*
  * DMA-API debugging init code
  *
@@ -334,8 +482,7 @@ static int prealloc_memory(u32 num_entries)
        num_free_entries = num_entries;
        min_free_entries = num_entries;
 
-       printk(KERN_INFO "DMA-API: preallocated %d debug entries\n",
-                       num_entries);
+       pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
 
        return 0;
 
@@ -349,11 +496,102 @@ out_err:
        return -ENOMEM;
 }
 
+static ssize_t filter_read(struct file *file, char __user *user_buf,
+                          size_t count, loff_t *ppos)
+{
+       char buf[NAME_MAX_LEN + 1];
+       unsigned long flags;
+       int len;
+
+       if (!current_driver_name[0])
+               return 0;
+
+       /*
+        * We can't copy to userspace directly because current_driver_name can
+        * only be read under the driver_name_lock with irqs disabled. So
+        * create a temporary copy first.
+        */
+       read_lock_irqsave(&driver_name_lock, flags);
+       len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
+       read_unlock_irqrestore(&driver_name_lock, flags);
+
+       return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static ssize_t filter_write(struct file *file, const char __user *userbuf,
+                           size_t count, loff_t *ppos)
+{
+       char buf[NAME_MAX_LEN];
+       unsigned long flags;
+       size_t len;
+       int i;
+
+       /*
+        * We can't copy from userspace directly. Access to
+        * current_driver_name is protected with a write_lock with irqs
+        * disabled. Since copy_from_user can fault and may sleep we
+        * need to copy to temporary buffer first
+        */
+       len = min(count, (size_t)(NAME_MAX_LEN - 1));
+       if (copy_from_user(buf, userbuf, len))
+               return -EFAULT;
+
+       buf[len] = 0;
+
+       write_lock_irqsave(&driver_name_lock, flags);
+
+       /*
+        * Now handle the string we got from userspace very carefully.
+        * The rules are:
+        *         - only use the first token we got
+        *         - token delimiter is everything looking like a space
+        *           character (' ', '\n', '\t' ...)
+        *
+        */
+       if (!isalnum(buf[0])) {
+               /*
+                * If the first character userspace gave us is not
+                * alphanumerical then assume the filter should be
+                * switched off.
+                */
+               if (current_driver_name[0])
+                       pr_info("DMA-API: switching off dma-debug driver filter\n");
+               current_driver_name[0] = 0;
+               current_driver = NULL;
+               goto out_unlock;
+       }
+
+       /*
+        * Now parse out the first token and use it as the name for the
+        * driver to filter for.
+        */
+       for (i = 0; i < NAME_MAX_LEN; ++i) {
+               current_driver_name[i] = buf[i];
+               if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
+                       break;
+       }
+       current_driver_name[i] = 0;
+       current_driver = NULL;
+
+       pr_info("DMA-API: enable driver filter for driver [%s]\n",
+               current_driver_name);
+
+out_unlock:
+       write_unlock_irqrestore(&driver_name_lock, flags);
+
+       return count;
+}
+
+const struct file_operations filter_fops = {
+       .read  = filter_read,
+       .write = filter_write,
+};
+
 static int dma_debug_fs_init(void)
 {
        dma_debug_dent = debugfs_create_dir("dma-api", NULL);
        if (!dma_debug_dent) {
-               printk(KERN_ERR "DMA-API: can not create debugfs directory\n");
+               pr_err("DMA-API: can not create debugfs directory\n");
                return -ENOMEM;
        }
 
@@ -392,6 +630,11 @@ static int dma_debug_fs_init(void)
        if (!min_free_entries_dent)
                goto out_err;
 
+       filter_dent = debugfs_create_file("driver_filter", 0644,
+                                         dma_debug_dent, NULL, &filter_fops);
+       if (!filter_dent)
+               goto out_err;
+
        return 0;
 
 out_err:
@@ -400,9 +643,64 @@ out_err:
        return -ENOMEM;
 }
 
+static int device_dma_allocations(struct device *dev)
+{
+       struct dma_debug_entry *entry;
+       unsigned long flags;
+       int count = 0, i;
+
+       local_irq_save(flags);
+
+       for (i = 0; i < HASH_SIZE; ++i) {
+               spin_lock(&dma_entry_hash[i].lock);
+               list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
+                       if (entry->dev == dev)
+                               count += 1;
+               }
+               spin_unlock(&dma_entry_hash[i].lock);
+       }
+
+       local_irq_restore(flags);
+
+       return count;
+}
+
+static int dma_debug_device_change(struct notifier_block *nb,
+                                   unsigned long action, void *data)
+{
+       struct device *dev = data;
+       int count;
+
+
+       switch (action) {
+       case BUS_NOTIFY_UNBOUND_DRIVER:
+               count = device_dma_allocations(dev);
+               if (count == 0)
+                       break;
+               err_printk(dev, NULL, "DMA-API: device driver has pending "
+                               "DMA allocations while released from device "
+                               "[count=%d]\n", count);
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
 void dma_debug_add_bus(struct bus_type *bus)
 {
-       /* FIXME: register notifier */
+       struct notifier_block *nb;
+
+       nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
+       if (nb == NULL) {
+               pr_err("dma_debug_add_bus: out of memory\n");
+               return;
+       }
+
+       nb->notifier_call = dma_debug_device_change;
+
+       bus_register_notifier(bus, nb);
 }
 
 /*
@@ -421,8 +719,7 @@ void dma_debug_init(u32 num_entries)
        }
 
        if (dma_debug_fs_init() != 0) {
-               printk(KERN_ERR "DMA-API: error creating debugfs entries "
-                               "- disabling\n");
+               pr_err("DMA-API: error creating debugfs entries - disabling\n");
                global_disable = true;
 
                return;
@@ -432,14 +729,15 @@ void dma_debug_init(u32 num_entries)
                num_entries = req_entries;
 
        if (prealloc_memory(num_entries) != 0) {
-               printk(KERN_ERR "DMA-API: debugging out of memory error "
-                               "- disabled\n");
+               pr_err("DMA-API: debugging out of memory error - disabled\n");
                global_disable = true;
 
                return;
        }
 
-       printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
+       nr_total_entries = num_free_entries;
+
+       pr_info("DMA-API: debugging enabled by kernel config\n");
 }
 
 static __init int dma_debug_cmdline(char *str)
@@ -448,8 +746,7 @@ static __init int dma_debug_cmdline(char *str)
                return -EINVAL;
 
        if (strncmp(str, "off", 3) == 0) {
-               printk(KERN_INFO "DMA-API: debugging disabled on kernel "
-                                "command line\n");
+               pr_info("DMA-API: debugging disabled on kernel command line\n");
                global_disable = true;
        }
 
@@ -723,15 +1020,15 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
                entry->type           = dma_debug_sg;
                entry->dev            = dev;
                entry->paddr          = sg_phys(s);
-               entry->size           = s->length;
-               entry->dev_addr       = s->dma_address;
+               entry->size           = sg_dma_len(s);
+               entry->dev_addr       = sg_dma_address(s);
                entry->direction      = direction;
                entry->sg_call_ents   = nents;
                entry->sg_mapped_ents = mapped_ents;
 
                if (!PageHighMem(sg_page(s))) {
                        check_for_stack(dev, sg_virt(s));
-                       check_for_illegal_area(dev, sg_virt(s), s->length);
+                       check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
                }
 
                add_dma_entry(entry);
@@ -739,13 +1036,33 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
 }
 EXPORT_SYMBOL(debug_dma_map_sg);
 
+static int get_nr_mapped_entries(struct device *dev, struct scatterlist *s)
+{
+       struct dma_debug_entry *entry, ref;
+       struct hash_bucket *bucket;
+       unsigned long flags;
+       int mapped_ents;
+
+       ref.dev      = dev;
+       ref.dev_addr = sg_dma_address(s);
+       ref.size     = sg_dma_len(s),
+
+       bucket       = get_hash_bucket(&ref, &flags);
+       entry        = hash_bucket_find(bucket, &ref);
+       mapped_ents  = 0;
+
+       if (entry)
+               mapped_ents = entry->sg_mapped_ents;
+       put_hash_bucket(bucket, &flags);
+
+       return mapped_ents;
+}
+
 void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
                        int nelems, int dir)
 {
-       struct dma_debug_entry *entry;
        struct scatterlist *s;
        int mapped_ents = 0, i;
-       unsigned long flags;
 
        if (unlikely(global_disable))
                return;
@@ -756,8 +1073,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
                        .type           = dma_debug_sg,
                        .dev            = dev,
                        .paddr          = sg_phys(s),
-                       .dev_addr       = s->dma_address,
-                       .size           = s->length,
+                       .dev_addr       = sg_dma_address(s),
+                       .size           = sg_dma_len(s),
                        .direction      = dir,
                        .sg_call_ents   = 0,
                };
@@ -765,14 +1082,9 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
                if (mapped_ents && i >= mapped_ents)
                        break;
 
-               if (mapped_ents == 0) {
-                       struct hash_bucket *bucket;
+               if (!i) {
                        ref.sg_call_ents = nelems;
-                       bucket = get_hash_bucket(&ref, &flags);
-                       entry = hash_bucket_find(bucket, &ref);
-                       if (entry)
-                               mapped_ents = entry->sg_mapped_ents;
-                       put_hash_bucket(bucket, &flags);
+                       mapped_ents = get_nr_mapped_entries(dev, s);
                }
 
                check_unmap(&ref);
@@ -874,14 +1186,20 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
                               int nelems, int direction)
 {
        struct scatterlist *s;
-       int i;
+       int mapped_ents = 0, i;
 
        if (unlikely(global_disable))
                return;
 
        for_each_sg(sg, s, nelems, i) {
-               check_sync(dev, s->dma_address, s->dma_length, 0,
-                               direction, true);
+               if (!i)
+                       mapped_ents = get_nr_mapped_entries(dev, s);
+
+               if (i >= mapped_ents)
+                       break;
+
+               check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
+                          direction, true);
        }
 }
 EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
@@ -890,15 +1208,39 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
                                  int nelems, int direction)
 {
        struct scatterlist *s;
-       int i;
+       int mapped_ents = 0, i;
 
        if (unlikely(global_disable))
                return;
 
        for_each_sg(sg, s, nelems, i) {
-               check_sync(dev, s->dma_address, s->dma_length, 0,
-                               direction, false);
+               if (!i)
+                       mapped_ents = get_nr_mapped_entries(dev, s);
+
+               if (i >= mapped_ents)
+                       break;
+
+               check_sync(dev, sg_dma_address(s), sg_dma_len(s), 0,
+                          direction, false);
        }
 }
 EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
 
+static int __init dma_debug_driver_setup(char *str)
+{
+       int i;
+
+       for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
+               current_driver_name[i] = *str;
+               if (*str == 0)
+                       break;
+       }
+
+       if (current_driver_name[0])
+               pr_info("DMA-API: enable driver filter for driver [%s]\n",
+                       current_driver_name);
+
+
+       return 1;
+}
+__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/lib/rational.c b/lib/rational.c
new file mode 100644 (file)
index 0000000..b3c099b
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * rational fractions
+ *
+ * Copyright (C) 2009 emlix GmbH, Oskar Schirmer <os@emlix.com>
+ *
+ * helper functions when coping with rational numbers
+ */
+
+#include <linux/rational.h>
+
+/*
+ * calculate best rational approximation for a given fraction
+ * taking into account restricted register size, e.g. to find
+ * appropriate values for a pll with 5 bit denominator and
+ * 8 bit numerator register fields, trying to set up with a
+ * frequency ratio of 3.1415, one would say:
+ *
+ * rational_best_approximation(31415, 10000,
+ *             (1 << 8) - 1, (1 << 5) - 1, &n, &d);
+ *
+ * you may look at given_numerator as a fixed point number,
+ * with the fractional part size described in given_denominator.
+ *
+ * for theoretical background, see:
+ * http://en.wikipedia.org/wiki/Continued_fraction
+ */
+
+void rational_best_approximation(
+       unsigned long given_numerator, unsigned long given_denominator,
+       unsigned long max_numerator, unsigned long max_denominator,
+       unsigned long *best_numerator, unsigned long *best_denominator)
+{
+       unsigned long n, d, n0, d0, n1, d1;
+       n = given_numerator;
+       d = given_denominator;
+       n0 = d1 = 0;
+       n1 = d0 = 1;
+       for (;;) {
+               unsigned long t, a;
+               if ((n1 > max_numerator) || (d1 > max_denominator)) {
+                       n1 = n0;
+                       d1 = d0;
+                       break;
+               }
+               if (d == 0)
+                       break;
+               t = d;
+               a = n / d;
+               d = n % d;
+               n = t;
+               t = n0 + a * n1;
+               n0 = n1;
+               n1 = t;
+               t = d0 + a * d1;
+               d0 = d1;
+               d1 = t;
+       }
+       *best_numerator = n1;
+       *best_denominator = d1;
+}
+
+EXPORT_SYMBOL(rational_best_approximation);
index 2b0b5a7..bffe6d7 100644 (file)
@@ -60,8 +60,8 @@ enum dma_sync_target {
 int swiotlb_force;
 
 /*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in unmap_single and
+ * sync_single_*, to see if the memory was in fact allocated by this
  * API.
  */
 static char *io_tlb_start, *io_tlb_end;
@@ -129,7 +129,7 @@ dma_addr_t __weak swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
        return paddr;
 }
 
-phys_addr_t __weak swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t __weak swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
 {
        return baddr;
 }
@@ -140,9 +140,15 @@ static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
        return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
 }
 
-static void *swiotlb_bus_to_virt(dma_addr_t address)
+void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
 {
-       return phys_to_virt(swiotlb_bus_to_phys(address));
+       return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
+}
+
+int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
+                                              dma_addr_t addr, size_t size)
+{
+       return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
 int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -309,10 +315,10 @@ cleanup1:
        return -ENOMEM;
 }
 
-static int
+static inline int
 address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 {
-       return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+       return swiotlb_arch_address_needs_mapping(hwdev, addr, size);
 }
 
 static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -341,7 +347,7 @@ static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
                unsigned long flags;
 
                while (size) {
-                       sz = min(PAGE_SIZE - offset, size);
+                       sz = min_t(size_t, PAGE_SIZE - offset, size);
 
                        local_irq_save(flags);
                        buffer = kmap_atomic(pfn_to_page(pfn),
@@ -476,7 +482,7 @@ found:
  * dma_addr is the kernel virtual address of the bounce buffer to unmap.
  */
 static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
 {
        unsigned long flags;
        int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -560,7 +566,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
                                   size)) {
                /*
                 * The allocated memory isn't reachable by the device.
-                * Fall back on swiotlb_map_single().
                 */
                free_pages((unsigned long) ret, order);
                ret = NULL;
@@ -568,9 +573,8 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
        if (!ret) {
                /*
                 * We are either out of memory or the device can't DMA
-                * to GFP_DMA memory; fall back on
-                * swiotlb_map_single(), which will grab memory from
-                * the lowest available address range.
+                * to GFP_DMA memory; fall back on map_single(), which
+                * will grab memory from the lowest available address range.
                 */
                ret = map_single(hwdev, 0, size, DMA_FROM_DEVICE);
                if (!ret)
@@ -587,7 +591,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
                       (unsigned long long)dev_addr);
 
                /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-               unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
+               do_unmap_single(hwdev, ret, size, DMA_TO_DEVICE);
                return NULL;
        }
        *dma_handle = dev_addr;
@@ -604,7 +608,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
                free_pages((unsigned long) vaddr, get_order(size));
        else
                /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-               unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
+               do_unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL(swiotlb_free_coherent);
 
@@ -634,7 +638,7 @@ swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
  * physical address to use is returned.
  *
  * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
  */
 dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
                            unsigned long offset, size_t size,
@@ -642,18 +646,17 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
                            struct dma_attrs *attrs)
 {
        phys_addr_t phys = page_to_phys(page) + offset;
-       void *ptr = page_address(page) + offset;
        dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
        void *map;
 
        BUG_ON(dir == DMA_NONE);
        /*
-        * If the pointer passed in happens to be in the device's DMA window,
+        * If the address happens to be in the device's DMA window,
         * we can safely return the device addr and not worry about bounce
         * buffering it.
         */
        if (!address_needs_mapping(dev, dev_addr, size) &&
-           !range_needs_mapping(virt_to_phys(ptr), size))
+           !range_needs_mapping(phys, size))
                return dev_addr;
 
        /*
@@ -679,23 +682,35 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
 
 /*
  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call.  All
+ * match what was provided for in a previous swiotlb_map_page call.  All
  * other usages are undefined.
  *
  * After this call, reads by the cpu to the buffer are guaranteed to see
  * whatever the device wrote there.
  */
+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+                        size_t size, int dir)
+{
+       char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+
+       BUG_ON(dir == DMA_NONE);
+
+       if (is_swiotlb_buffer(dma_addr)) {
+               do_unmap_single(hwdev, dma_addr, size, dir);
+               return;
+       }
+
+       if (dir != DMA_FROM_DEVICE)
+               return;
+
+       dma_mark_clean(dma_addr, size);
+}
+
 void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
                        size_t size, enum dma_data_direction dir,
                        struct dma_attrs *attrs)
 {
-       char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
-       BUG_ON(dir == DMA_NONE);
-       if (is_swiotlb_buffer(dma_addr))
-               unmap_single(hwdev, dma_addr, size, dir);
-       else if (dir == DMA_FROM_DEVICE)
-               dma_mark_clean(dma_addr, size);
+       unmap_single(hwdev, dev_addr, size, dir);
 }
 EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
 
@@ -703,7 +718,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
  * Make physical memory consistent for a single streaming mode DMA translation
  * after a transfer.
  *
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * If you perform a swiotlb_map_page() but wish to interrogate the buffer
  * using the cpu, yet do not wish to teardown the dma mapping, you must
  * call this function before doing so.  At the next point you give the dma
  * address back to the card, you must first perform a
@@ -713,13 +728,19 @@ static void
 swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
                    size_t size, int dir, int target)
 {
-       char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+       char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
 
        BUG_ON(dir == DMA_NONE);
-       if (is_swiotlb_buffer(dma_addr))
+
+       if (is_swiotlb_buffer(dma_addr)) {
                sync_single(hwdev, dma_addr, size, dir, target);
-       else if (dir == DMA_FROM_DEVICE)
-               dma_mark_clean(dma_addr, size);
+               return;
+       }
+
+       if (dir != DMA_FROM_DEVICE)
+               return;
+
+       dma_mark_clean(dma_addr, size);
 }
 
 void
@@ -746,13 +767,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
                          unsigned long offset, size_t size,
                          int dir, int target)
 {
-       char *dma_addr = swiotlb_bus_to_virt(dev_addr) + offset;
-
-       BUG_ON(dir == DMA_NONE);
-       if (is_swiotlb_buffer(dma_addr))
-               sync_single(hwdev, dma_addr, size, dir, target);
-       else if (dir == DMA_FROM_DEVICE)
-               dma_mark_clean(dma_addr, size);
+       swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
 }
 
 void
@@ -777,7 +792,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
 
 /*
  * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
+ * This is the scatter-gather version of the above swiotlb_map_page
  * interface.  Here the scatter gather list elements are each tagged with the
  * appropriate dma address and length.  They are obtained via
  * sg_dma_{address,length}(SG).
@@ -788,7 +803,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
  *       The routine returns the number of addr/length pairs actually
  *       used, at most nents.
  *
- * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * Device ownership issues as mentioned above for swiotlb_map_page are the
  * same here.
  */
 int
@@ -836,7 +851,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
 
 /*
  * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
+ * concerning calls here are the same as for swiotlb_unmap_page() above.
  */
 void
 swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
@@ -847,13 +862,9 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
 
        BUG_ON(dir == DMA_NONE);
 
-       for_each_sg(sgl, sg, nelems, i) {
-               if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
-                       unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
-                                    sg->dma_length, dir);
-               else if (dir == DMA_FROM_DEVICE)
-                       dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
-       }
+       for_each_sg(sgl, sg, nelems, i)
+               unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+
 }
 EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
 
@@ -879,15 +890,9 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
        struct scatterlist *sg;
        int i;
 
-       BUG_ON(dir == DMA_NONE);
-
-       for_each_sg(sgl, sg, nelems, i) {
-               if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
-                       sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
+       for_each_sg(sgl, sg, nelems, i)
+               swiotlb_sync_single(hwdev, sg->dma_address,
                                    sg->dma_length, dir, target);
-               else if (dir == DMA_FROM_DEVICE)
-                       dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
-       }
 }
 
 void
index 7536ace..756ccaf 100644 (file)
@@ -408,6 +408,8 @@ enum format_type {
        FORMAT_TYPE_LONG_LONG,
        FORMAT_TYPE_ULONG,
        FORMAT_TYPE_LONG,
+       FORMAT_TYPE_UBYTE,
+       FORMAT_TYPE_BYTE,
        FORMAT_TYPE_USHORT,
        FORMAT_TYPE_SHORT,
        FORMAT_TYPE_UINT,
@@ -573,12 +575,15 @@ static char *string(char *buf, char *end, char *s, struct printf_spec spec)
 }
 
 static char *symbol_string(char *buf, char *end, void *ptr,
-                               struct printf_spec spec)
+                               struct printf_spec spec, char ext)
 {
        unsigned long value = (unsigned long) ptr;
 #ifdef CONFIG_KALLSYMS
        char sym[KSYM_SYMBOL_LEN];
-       sprint_symbol(sym, value);
+       if (ext != 'f')
+               sprint_symbol(sym, value);
+       else
+               kallsyms_lookup(value, NULL, NULL, NULL, sym);
        return string(buf, end, sym, spec);
 #else
        spec.field_width = 2*sizeof(void *);
@@ -690,7 +695,8 @@ static char *ip4_addr_string(char *buf, char *end, u8 *addr,
  *
  * Right now we handle:
  *
- * - 'F' For symbolic function descriptor pointers
+ * - 'F' For symbolic function descriptor pointers with offset
+ * - 'f' For simple symbolic function names without offset
  * - 'S' For symbolic direct pointers
  * - 'R' For a struct resource pointer, it prints the range of
  *       addresses (not the name nor the flags)
@@ -713,10 +719,11 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 
        switch (*fmt) {
        case 'F':
+       case 'f':
                ptr = dereference_function_descriptor(ptr);
                /* Fallthrough */
        case 'S':
-               return symbol_string(buf, end, ptr, spec);
+               return symbol_string(buf, end, ptr, spec, *fmt);
        case 'R':
                return resource_string(buf, end, ptr, spec);
        case 'm':
@@ -853,11 +860,15 @@ qualifier:
        spec->qualifier = -1;
        if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
            *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
-               spec->qualifier = *fmt;
-               ++fmt;
-               if (spec->qualifier == 'l' && *fmt == 'l') {
-                       spec->qualifier = 'L';
-                       ++fmt;
+               spec->qualifier = *fmt++;
+               if (unlikely(spec->qualifier == *fmt)) {
+                       if (spec->qualifier == 'l') {
+                               spec->qualifier = 'L';
+                               ++fmt;
+                       } else if (spec->qualifier == 'h') {
+                               spec->qualifier = 'H';
+                               ++fmt;
+                       }
                }
        }
 
@@ -919,6 +930,11 @@ qualifier:
                spec->type = FORMAT_TYPE_SIZE_T;
        } else if (spec->qualifier == 't') {
                spec->type = FORMAT_TYPE_PTRDIFF;
+       } else if (spec->qualifier == 'H') {
+               if (spec->flags & SIGN)
+                       spec->type = FORMAT_TYPE_BYTE;
+               else
+                       spec->type = FORMAT_TYPE_UBYTE;
        } else if (spec->qualifier == 'h') {
                if (spec->flags & SIGN)
                        spec->type = FORMAT_TYPE_SHORT;
@@ -943,7 +959,8 @@ qualifier:
  *
  * This function follows C99 vsnprintf, but has some extensions:
  * %pS output the name of a text symbol
- * %pF output the name of a function pointer
+ * %pF output the name of a function pointer with its offset
+ * %pf output the name of a function pointer without its offset
  * %pR output the address range in a struct resource
  *
  * The return value is the number of characters which would
@@ -1087,6 +1104,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
                        case FORMAT_TYPE_PTRDIFF:
                                num = va_arg(args, ptrdiff_t);
                                break;
+                       case FORMAT_TYPE_UBYTE:
+                               num = (unsigned char) va_arg(args, int);
+                               break;
+                       case FORMAT_TYPE_BYTE:
+                               num = (signed char) va_arg(args, int);
+                               break;
                        case FORMAT_TYPE_USHORT:
                                num = (unsigned short) va_arg(args, int);
                                break;
@@ -1363,6 +1386,10 @@ do {                                                                     \
                        case FORMAT_TYPE_PTRDIFF:
                                save_arg(ptrdiff_t);
                                break;
+                       case FORMAT_TYPE_UBYTE:
+                       case FORMAT_TYPE_BYTE:
+                               save_arg(char);
+                               break;
                        case FORMAT_TYPE_USHORT:
                        case FORMAT_TYPE_SHORT:
                                save_arg(short);
@@ -1391,7 +1418,8 @@ EXPORT_SYMBOL_GPL(vbin_printf);
  *
  * The format follows C99 vsnprintf, but has some extensions:
  * %pS output the name of a text symbol
- * %pF output the name of a function pointer
+ * %pF output the name of a function pointer with its offset
+ * %pf output the name of a function pointer without its offset
  * %pR output the address range in a struct resource
  * %n is ignored
  *
@@ -1538,6 +1566,12 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
                        case FORMAT_TYPE_PTRDIFF:
                                num = get_arg(ptrdiff_t);
                                break;
+                       case FORMAT_TYPE_UBYTE:
+                               num = get_arg(unsigned char);
+                               break;
+                       case FORMAT_TYPE_BYTE:
+                               num = get_arg(signed char);
+                               break;
                        case FORMAT_TYPE_USHORT:
                                num = get_arg(unsigned short);
                                break;
index e590272..65f5e17 100644 (file)
 #include <linux/hash.h>
 #include <linux/highmem.h>
 #include <linux/blktrace_api.h>
-#include <trace/block.h>
 #include <asm/tlbflush.h>
 
+#include <trace/events/block.h>
+
 #define POOL_SIZE      64
 #define ISA_POOL_SIZE  16
 
 static mempool_t *page_pool, *isa_page_pool;
 
-DEFINE_TRACE(block_bio_bounce);
-
 #ifdef CONFIG_HIGHMEM
 static __init int init_emergency_pool(void)
 {
index cbe9e05..ac13043 100644 (file)
@@ -629,52 +629,43 @@ void user_shm_unlock(size_t size, struct user_struct *user)
        free_uid(user);
 }
 
-void *alloc_locked_buffer(size_t size)
+int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
+                         size_t size)
 {
-       unsigned long rlim, vm, pgsz;
-       void *buffer = NULL;
+       unsigned long lim, vm, pgsz;
+       int error = -ENOMEM;
 
        pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-       down_write(&current->mm->mmap_sem);
-
-       rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
-       vm   = current->mm->total_vm + pgsz;
-       if (rlim < vm)
-               goto out;
+       down_write(&mm->mmap_sem);
 
-       rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
-       vm   = current->mm->locked_vm + pgsz;
-       if (rlim < vm)
+       lim = rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT;
+       vm   = mm->total_vm + pgsz;
+       if (lim < vm)
                goto out;
 
-       buffer = kzalloc(size, GFP_KERNEL);
-       if (!buffer)
+       lim = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
+       vm   = mm->locked_vm + pgsz;
+       if (lim < vm)
                goto out;
 
-       current->mm->total_vm  += pgsz;
-       current->mm->locked_vm += pgsz;
+       mm->total_vm  += pgsz;
+       mm->locked_vm += pgsz;
 
+       error = 0;
  out:
-       up_write(&current->mm->mmap_sem);
-       return buffer;
+       up_write(&mm->mmap_sem);
+       return error;
 }
 
-void release_locked_buffer(void *buffer, size_t size)
+void refund_locked_memory(struct mm_struct *mm, size_t size)
 {
        unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-       down_write(&current->mm->mmap_sem);
-
-       current->mm->total_vm  -= pgsz;
-       current->mm->locked_vm -= pgsz;
-
-       up_write(&current->mm->mmap_sem);
-}
+       down_write(&mm->mmap_sem);
 
-void free_locked_buffer(void *buffer, size_t size)
-{
-       release_locked_buffer(buffer, size);
+       mm->total_vm  -= pgsz;
+       mm->locked_vm -= pgsz;
 
-       kfree(buffer);
+       up_write(&mm->mmap_sem);
 }
index fe753ec..474c7e9 100644 (file)
@@ -149,10 +149,6 @@ static unsigned long __meminitdata dma_reserve;
   static int __meminitdata nr_nodemap_entries;
   static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
   static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-  static unsigned long __meminitdata node_boundary_start_pfn[MAX_NUMNODES];
-  static unsigned long __meminitdata node_boundary_end_pfn[MAX_NUMNODES];
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
   static unsigned long __initdata required_kernelcore;
   static unsigned long __initdata required_movablecore;
   static unsigned long __meminitdata zone_movable_pfn[MAX_NUMNODES];
@@ -3102,64 +3098,6 @@ void __init sparse_memory_present_with_active_regions(int nid)
                                early_node_map[i].end_pfn);
 }
 
-/**
- * push_node_boundaries - Push node boundaries to at least the requested boundary
- * @nid: The nid of the node to push the boundary for
- * @start_pfn: The start pfn of the node
- * @end_pfn: The end pfn of the node
- *
- * In reserve-based hot-add, mem_map is allocated that is unused until hotadd
- * time. Specifically, on x86_64, SRAT will report ranges that can potentially
- * be hotplugged even though no physical memory exists. This function allows
- * an arch to push out the node boundaries so mem_map is allocated that can
- * be used later.
- */
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-void __init push_node_boundaries(unsigned int nid,
-               unsigned long start_pfn, unsigned long end_pfn)
-{
-       mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-                       "Entering push_node_boundaries(%u, %lu, %lu)\n",
-                       nid, start_pfn, end_pfn);
-
-       /* Initialise the boundary for this node if necessary */
-       if (node_boundary_end_pfn[nid] == 0)
-               node_boundary_start_pfn[nid] = -1UL;
-
-       /* Update the boundaries */
-       if (node_boundary_start_pfn[nid] > start_pfn)
-               node_boundary_start_pfn[nid] = start_pfn;
-       if (node_boundary_end_pfn[nid] < end_pfn)
-               node_boundary_end_pfn[nid] = end_pfn;
-}
-
-/* If necessary, push the node boundary out for reserve hotadd */
-static void __meminit account_node_boundary(unsigned int nid,
-               unsigned long *start_pfn, unsigned long *end_pfn)
-{
-       mminit_dprintk(MMINIT_TRACE, "zoneboundary",
-                       "Entering account_node_boundary(%u, %lu, %lu)\n",
-                       nid, *start_pfn, *end_pfn);
-
-       /* Return if boundary information has not been provided */
-       if (node_boundary_end_pfn[nid] == 0)
-               return;
-
-       /* Check the boundaries and update if necessary */
-       if (node_boundary_start_pfn[nid] < *start_pfn)
-               *start_pfn = node_boundary_start_pfn[nid];
-       if (node_boundary_end_pfn[nid] > *end_pfn)
-               *end_pfn = node_boundary_end_pfn[nid];
-}
-#else
-void __init push_node_boundaries(unsigned int nid,
-               unsigned long start_pfn, unsigned long end_pfn) {}
-
-static void __meminit account_node_boundary(unsigned int nid,
-               unsigned long *start_pfn, unsigned long *end_pfn) {}
-#endif
-
-
 /**
  * get_pfn_range_for_nid - Return the start and end page frames for a node
  * @nid: The nid to return the range for. If MAX_NUMNODES, the min and max PFN are returned.
@@ -3185,9 +3123,6 @@ void __meminit get_pfn_range_for_nid(unsigned int nid,
 
        if (*start_pfn == -1UL)
                *start_pfn = 0;
-
-       /* Push the node boundaries out if requested */
-       account_node_boundary(nid, start_pfn, end_pfn);
 }
 
 /*
@@ -3793,10 +3728,6 @@ void __init remove_all_active_ranges(void)
 {
        memset(early_node_map, 0, sizeof(early_node_map));
        nr_nodemap_entries = 0;
-#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
-       memset(node_boundary_start_pfn, 0, sizeof(node_boundary_start_pfn));
-       memset(node_boundary_end_pfn, 0, sizeof(node_boundary_end_pfn));
-#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
 }
 
 /* Compare two active node_active_regions */
index 1aa5d8f..c0b2c1a 100644 (file)
@@ -23,7 +23,7 @@
  * Allocation is done in offset-size areas of single unit space.  Ie,
  * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
  * c1:u1, c1:u2 and c1:u3.  Percpu access can be done by configuring
- * percpu base registers UNIT_SIZE apart.
+ * percpu base registers pcpu_unit_size apart.
  *
  * There are usually many small percpu allocations many of them as
  * small as 4 bytes.  The allocator organizes chunks into lists
@@ -38,8 +38,8 @@
  * region and negative allocated.  Allocation inside a chunk is done
  * by scanning this map sequentially and serving the first matching
  * entry.  This is mostly copied from the percpu_modalloc() allocator.
- * Chunks are also linked into a rb tree to ease address to chunk
- * mapping during free.
+ * Chunks can be determined from the address using the index field
+ * in the page struct. The index field contains a pointer to the chunk.
  *
  * To use this allocator, arch code should do the followings.
  *
@@ -61,7 +61,6 @@
 #include <linux/mutex.h>
 #include <linux/percpu.h>
 #include <linux/pfn.h>
-#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
@@ -88,7 +87,6 @@
 
 struct pcpu_chunk {
        struct list_head        list;           /* linked to pcpu_slot lists */
-       struct rb_node          rb_node;        /* key is chunk->vm->addr */
        int                     free_size;      /* free bytes in the chunk */
        int                     contig_hint;    /* max contiguous size hint */
        struct vm_struct        *vm;            /* mapped vmalloc region */
@@ -110,9 +108,21 @@ static size_t pcpu_chunk_struct_size __read_mostly;
 void *pcpu_base_addr __read_mostly;
 EXPORT_SYMBOL_GPL(pcpu_base_addr);
 
-/* optional reserved chunk, only accessible for reserved allocations */
+/*
+ * The first chunk which always exists.  Note that unlike other
+ * chunks, this one can be allocated and mapped in several different
+ * ways and thus often doesn't live in the vmalloc area.
+ */
+static struct pcpu_chunk *pcpu_first_chunk;
+
+/*
+ * Optional reserved chunk.  This chunk reserves part of the first
+ * chunk and serves it for reserved allocations.  The amount of
+ * reserved offset is in pcpu_reserved_chunk_limit.  When reserved
+ * area doesn't exist, the following variables contain NULL and 0
+ * respectively.
+ */
 static struct pcpu_chunk *pcpu_reserved_chunk;
-/* offset limit of the reserved chunk */
 static int pcpu_reserved_chunk_limit;
 
 /*
@@ -121,7 +131,7 @@ static int pcpu_reserved_chunk_limit;
  * There are two locks - pcpu_alloc_mutex and pcpu_lock.  The former
  * protects allocation/reclaim paths, chunks and chunk->page arrays.
  * The latter is a spinlock and protects the index data structures -
- * chunk slots, rbtree, chunks and area maps in chunks.
+ * chunk slots, chunks and area maps in chunks.
  *
  * During allocation, pcpu_alloc_mutex is kept locked all the time and
  * pcpu_lock is grabbed and released as necessary.  All actual memory
@@ -140,7 +150,6 @@ static DEFINE_MUTEX(pcpu_alloc_mutex);      /* protects whole alloc and reclaim */
 static DEFINE_SPINLOCK(pcpu_lock);     /* protects index data structures */
 
 static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
-static struct rb_root pcpu_addr_root = RB_ROOT;        /* chunks by address */
 
 /* reclaim work to release fully free chunks, scheduled from free path */
 static void pcpu_reclaim(struct work_struct *work);
@@ -191,6 +200,18 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
        return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
 }
 
+/* set the pointer to a chunk in a page struct */
+static void pcpu_set_page_chunk(struct page *page, struct pcpu_chunk *pcpu)
+{
+       page->index = (unsigned long)pcpu;
+}
+
+/* obtain pointer to a chunk from a page struct */
+static struct pcpu_chunk *pcpu_get_page_chunk(struct page *page)
+{
+       return (struct pcpu_chunk *)page->index;
+}
+
 /**
  * pcpu_mem_alloc - allocate memory
  * @size: bytes to allocate
@@ -257,93 +278,26 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
        }
 }
 
-static struct rb_node **pcpu_chunk_rb_search(void *addr,
-                                            struct rb_node **parentp)
-{
-       struct rb_node **p = &pcpu_addr_root.rb_node;
-       struct rb_node *parent = NULL;
-       struct pcpu_chunk *chunk;
-
-       while (*p) {
-               parent = *p;
-               chunk = rb_entry(parent, struct pcpu_chunk, rb_node);
-
-               if (addr < chunk->vm->addr)
-                       p = &(*p)->rb_left;
-               else if (addr > chunk->vm->addr)
-                       p = &(*p)->rb_right;
-               else
-                       break;
-       }
-
-       if (parentp)
-               *parentp = parent;
-       return p;
-}
-
 /**
- * pcpu_chunk_addr_search - search for chunk containing specified address
- * @addr: address to search for
- *
- * Look for chunk which might contain @addr.  More specifically, it
- * searchs for the chunk with the highest start address which isn't
- * beyond @addr.
- *
- * CONTEXT:
- * pcpu_lock.
+ * pcpu_chunk_addr_search - determine chunk containing specified address
+ * @addr: address for which the chunk needs to be determined.
  *
  * RETURNS:
  * The address of the found chunk.
  */
 static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
 {
-       struct rb_node *n, *parent;
-       struct pcpu_chunk *chunk;
+       void *first_start = pcpu_first_chunk->vm->addr;
 
-       /* is it in the reserved chunk? */
-       if (pcpu_reserved_chunk) {
-               void *start = pcpu_reserved_chunk->vm->addr;
-
-               if (addr >= start && addr < start + pcpu_reserved_chunk_limit)
+       /* is it in the first chunk? */
+       if (addr >= first_start && addr < first_start + pcpu_chunk_size) {
+               /* is it in the reserved area? */
+               if (addr < first_start + pcpu_reserved_chunk_limit)
                        return pcpu_reserved_chunk;
+               return pcpu_first_chunk;
        }
 
-       /* nah... search the regular ones */
-       n = *pcpu_chunk_rb_search(addr, &parent);
-       if (!n) {
-               /* no exactly matching chunk, the parent is the closest */
-               n = parent;
-               BUG_ON(!n);
-       }
-       chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-
-       if (addr < chunk->vm->addr) {
-               /* the parent was the next one, look for the previous one */
-               n = rb_prev(n);
-               BUG_ON(!n);
-               chunk = rb_entry(n, struct pcpu_chunk, rb_node);
-       }
-
-       return chunk;
-}
-
-/**
- * pcpu_chunk_addr_insert - insert chunk into address rb tree
- * @new: chunk to insert
- *
- * Insert @new into address rb tree.
- *
- * CONTEXT:
- * pcpu_lock.
- */
-static void pcpu_chunk_addr_insert(struct pcpu_chunk *new)
-{
-       struct rb_node **p, *parent;
-
-       p = pcpu_chunk_rb_search(new->vm->addr, &parent);
-       BUG_ON(*p);
-       rb_link_node(&new->rb_node, parent, p);
-       rb_insert_color(&new->rb_node, &pcpu_addr_root);
+       return pcpu_get_page_chunk(vmalloc_to_page(addr));
 }
 
 /**
@@ -755,6 +709,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
                                                  alloc_mask, 0);
                        if (!*pagep)
                                goto err;
+                       pcpu_set_page_chunk(*pagep, chunk);
                }
        }
 
@@ -879,7 +834,6 @@ restart:
 
        spin_lock_irq(&pcpu_lock);
        pcpu_chunk_relocate(chunk, -1);
-       pcpu_chunk_addr_insert(chunk);
        goto restart;
 
 area_found:
@@ -968,7 +922,6 @@ static void pcpu_reclaim(struct work_struct *work)
                if (chunk == list_first_entry(head, struct pcpu_chunk, list))
                        continue;
 
-               rb_erase(&chunk->rb_node, &pcpu_addr_root);
                list_move(&chunk->list, &todo);
        }
 
@@ -1147,7 +1100,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
 
        if (reserved_size) {
                schunk->free_size = reserved_size;
-               pcpu_reserved_chunk = schunk;   /* not for dynamic alloc */
+               pcpu_reserved_chunk = schunk;
+               pcpu_reserved_chunk_limit = static_size + reserved_size;
        } else {
                schunk->free_size = dyn_size;
                dyn_size = 0;                   /* dynamic area covered */
@@ -1158,8 +1112,6 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
        if (schunk->free_size)
                schunk->map[schunk->map_used++] = schunk->free_size;
 
-       pcpu_reserved_chunk_limit = static_size + schunk->free_size;
-
        /* init dynamic chunk if necessary */
        if (dyn_size) {
                dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
@@ -1226,13 +1178,8 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
        }
 
        /* link the first chunk in */
-       if (!dchunk) {
-               pcpu_chunk_relocate(schunk, -1);
-               pcpu_chunk_addr_insert(schunk);
-       } else {
-               pcpu_chunk_relocate(dchunk, -1);
-               pcpu_chunk_addr_insert(dchunk);
-       }
+       pcpu_first_chunk = dchunk ?: schunk;
+       pcpu_chunk_relocate(pcpu_first_chunk, -1);
 
        /* we're done */
        pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
index 9a90b00..f85831d 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
 #include       <linux/cpu.h>
 #include       <linux/sysctl.h>
 #include       <linux/module.h>
-#include       <trace/kmemtrace.h>
+#include       <linux/kmemtrace.h>
 #include       <linux/rcupdate.h>
 #include       <linux/string.h>
 #include       <linux/uaccess.h>
index f92e66d..9b1737b 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -66,7 +66,7 @@
 #include <linux/module.h>
 #include <linux/rcupdate.h>
 #include <linux/list.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <asm/atomic.h>
 
 /*
index 65ffda5..5e805a6 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
-#include <trace/kmemtrace.h>
+#include <linux/kmemtrace.h>
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/mempolicy.h>
index 55bef16..abc65aa 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,9 +4,11 @@
 #include <linux/module.h>
 #include <linux/err.h>
 #include <linux/sched.h>
-#include <linux/tracepoint.h>
 #include <asm/uaccess.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 /**
  * kstrdup - allocate space for and copy an existing string
  * @s: the string to duplicate
@@ -255,13 +257,6 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
 EXPORT_SYMBOL_GPL(get_user_pages_fast);
 
 /* Tracepoints definitions. */
-DEFINE_TRACE(kmalloc);
-DEFINE_TRACE(kmem_cache_alloc);
-DEFINE_TRACE(kmalloc_node);
-DEFINE_TRACE(kmem_cache_alloc_node);
-DEFINE_TRACE(kfree);
-DEFINE_TRACE(kmem_cache_free);
-
 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
index 9fd0dc3..b75b6ce 100644 (file)
@@ -23,7 +23,7 @@
 #include <linux/bitops.h>
 #include <net/genetlink.h>
 
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include <asm/unaligned.h>
 
index c8fb456..499a67e 100644 (file)
 #include <linux/workqueue.h>
 #include <linux/netlink.h>
 #include <linux/net_dropmon.h>
-#include <trace/skb.h>
 
 #include <asm/unaligned.h>
 #include <asm/bitops.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/skb.h>
 
-DEFINE_TRACE(kfree_skb);
 EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
index e505b53..c2e4fb8 100644 (file)
@@ -65,7 +65,7 @@
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
-#include <trace/skb.h>
+#include <trace/events/skb.h>
 
 #include "kmap_skb.h"
 
index cc29b44..e5becb9 100644 (file)
@@ -167,6 +167,9 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
        struct tcf_exts e;
        int err;
 
+       if (!tca[TCA_OPTIONS])
+               return -EINVAL;
+
        if (head == NULL) {
                if (!handle)
                        return -EINVAL;
index 4b02f5a..b75d28c 100644 (file)
@@ -19,6 +19,12 @@ config SAMPLE_TRACEPOINTS
        help
          This build tracepoints example modules.
 
+config SAMPLE_TRACE_EVENTS
+       tristate "Build trace_events examples -- loadable modules only"
+       depends on EVENT_TRACING && m
+       help
+         This build trace event example modules.
+
 config SAMPLE_KOBJECT
        tristate "Build kobject examples"
        help
index 10eaca8..13e4b47 100644 (file)
@@ -1,3 +1,3 @@
 # Makefile for Linux samples code
 
-obj-$(CONFIG_SAMPLES)  += markers/ kobject/ kprobes/ tracepoints/
+obj-$(CONFIG_SAMPLES)  += markers/ kobject/ kprobes/ tracepoints/ trace_events/
diff --git a/samples/trace_events/Makefile b/samples/trace_events/Makefile
new file mode 100644 (file)
index 0000000..0d428dc
--- /dev/null
@@ -0,0 +1,6 @@
+# builds the trace events example kernel modules;
+# then to use one (as root):  insmod <module_name.ko>
+
+CFLAGS_trace-events-sample.o := -I$(src)
+
+obj-$(CONFIG_SAMPLE_TRACE_EVENTS) += trace-events-sample.o
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
new file mode 100644 (file)
index 0000000..aabc4e9
--- /dev/null
@@ -0,0 +1,52 @@
+#include <linux/module.h>
+#include <linux/kthread.h>
+
+/*
+ * Any file that uses trace points, must include the header.
+ * But only one file, must include the header by defining
+ * CREATE_TRACE_POINTS first.  This will make the C code that
+ * creates the handles for the trace points.
+ */
+#define CREATE_TRACE_POINTS
+#include "trace-events-sample.h"
+
+
+static void simple_thread_func(int cnt)
+{
+       set_current_state(TASK_INTERRUPTIBLE);
+       schedule_timeout(HZ);
+       trace_foo_bar("hello", cnt);
+}
+
+static int simple_thread(void *arg)
+{
+       int cnt = 0;
+
+       while (!kthread_should_stop())
+               simple_thread_func(cnt++);
+
+       return 0;
+}
+
+static struct task_struct *simple_tsk;
+
+static int __init trace_event_init(void)
+{
+       simple_tsk = kthread_run(simple_thread, NULL, "event-sample");
+       if (IS_ERR(simple_tsk))
+               return -1;
+
+       return 0;
+}
+
+static void __exit trace_event_exit(void)
+{
+       kthread_stop(simple_tsk);
+}
+
+module_init(trace_event_init);
+module_exit(trace_event_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("trace-events-sample");
+MODULE_LICENSE("GPL");
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
new file mode 100644 (file)
index 0000000..128a897
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Notice that this file is not protected like a normal header.
+ * We also must allow for rereading of this file. The
+ *
+ *  || defined(TRACE_HEADER_MULTI_READ)
+ *
+ * serves this purpose.
+ */
+#if !defined(_TRACE_EVENT_SAMPLE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EVENT_SAMPLE_H
+
+/*
+ * All trace headers should include tracepoint.h, until we finally
+ * make it into a standard header.
+ */
+#include <linux/tracepoint.h>
+
+/*
+ * If TRACE_SYSTEM is defined, that will be the directory created
+ * in the ftrace directory under /debugfs/tracing/events/<system>
+ *
+ * The define_trace.h belowe will also look for a file name of
+ * TRACE_SYSTEM.h where TRACE_SYSTEM is what is defined here.
+ *
+ * If you want a different system than file name, you can override
+ * the header name by defining TRACE_INCLUDE_FILE
+ *
+ * If this file was called, goofy.h, then we would define:
+ *
+ * #define TRACE_INCLUDE_FILE goofy
+ *
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM sample
+
+/*
+ * The TRACE_EVENT macro is broken up into 5 parts.
+ *
+ * name: name of the trace point. This is also how to enable the tracepoint.
+ *   A function called trace_foo_bar() will be created.
+ *
+ * proto: the prototype of the function trace_foo_bar()
+ *   Here it is trace_foo_bar(char *foo, int bar).
+ *
+ * args:  must match the arguments in the prototype.
+ *    Here it is simply "foo, bar".
+ *
+ * struct:  This defines the way the data will be stored in the ring buffer.
+ *    There are currently two types of elements. __field and __array.
+ *    a __field is broken up into (type, name). Where type can be any
+ *    type but an array.
+ *    For an array. there are three fields. (type, name, size). The
+ *    type of elements in the array, the name of the field and the size
+ *    of the array.
+ *
+ *    __array( char, foo, 10) is the same as saying   char foo[10].
+ *
+ * fast_assign: This is a C like function that is used to store the items
+ *    into the ring buffer.
+ *
+ * printk: This is a way to print out the data in pretty print. This is
+ *    useful if the system crashes and you are logging via a serial line,
+ *    the data can be printed to the console using this "printk" method.
+ *
+ * Note, that for both the assign and the printk, __entry is the handler
+ * to the data structure in the ring buffer, and is defined by the
+ * TP_STRUCT__entry.
+ */
+TRACE_EVENT(foo_bar,
+
+       TP_PROTO(char *foo, int bar),
+
+       TP_ARGS(foo, bar),
+
+       TP_STRUCT__entry(
+               __array(        char,   foo,    10              )
+               __field(        int,    bar                     )
+       ),
+
+       TP_fast_assign(
+               strncpy(__entry->foo, foo, 10);
+               __entry->bar    = bar;
+       ),
+
+       TP_printk("foo %s %d", __entry->foo, __entry->bar)
+);
+#endif
+
+/***** NOTICE! The #if protection ends here. *****/
+
+
+/*
+ * There are several ways I could have done this. If I left out the
+ * TRACE_INCLUDE_PATH, then it would default to the kernel source
+ * include/trace/events directory.
+ *
+ * I could specify a path from the define_trace.h file back to this
+ * file.
+ *
+ * #define TRACE_INCLUDE_PATH ../../samples/trace_events
+ *
+ * But I chose to simply make it use the current directory and then in
+ * the Makefile I added:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(PWD)/samples/trace_events/
+ *
+ * This will make sure the current path is part of the include
+ * structure for our file so that we can find it.
+ *
+ * I could have made only the top level directory the include:
+ *
+ * CFLAGS_trace-events-sample.o := -I$(PWD)
+ *
+ * And then let the path to this directory be the TRACE_INCLUDE_PATH:
+ *
+ * #define TRACE_INCLUDE_PATH samples/trace_events
+ *
+ * But then if something defines "samples" or "trace_events" then we
+ * could risk that being converted too, and give us an unexpected
+ * result.
+ */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+/*
+ * TRACE_INCLUDE_FILE is not needed if the filename and TRACE_SYSTEM are equal
+ */
+#define TRACE_INCLUDE_FILE trace-events-sample
+#include <trace/define_trace.h>
index cba61ca..2b70661 100644 (file)
@@ -188,20 +188,34 @@ cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@
 # ---------------------------------------------------------------------------
 
 quiet_cmd_gzip = GZIP    $@
-cmd_gzip = gzip -f -9 < $< > $@
+cmd_gzip = (cat $(filter-out FORCE,$^) | gzip -f -9 > $@) || \
+       (rm -f $@ ; false)
 
 
 # Bzip2
 # ---------------------------------------------------------------------------
 
-# Bzip2 does not include size in file... so we have to fake that
-size_append=$(CONFIG_SHELL) $(srctree)/scripts/bin_size
-
-quiet_cmd_bzip2 = BZIP2    $@
-cmd_bzip2 = (bzip2 -9 < $< && $(size_append) $<) > $@ || (rm -f $@ ; false)
+# Bzip2 and LZMA do not include size in file... so we have to fake that;
+# append the size as a 32-bit littleendian number as gzip does.
+size_append = echo -ne $(shell                                         \
+dec_size=0;                                                            \
+for F in $1; do                                                                \
+       fsize=$$(stat -c "%s" $$F);                                     \
+       dec_size=$$(expr $$dec_size + $$fsize);                         \
+done;                                                                  \
+printf "%08x" $$dec_size |                                             \
+       sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g' \
+)
+
+quiet_cmd_bzip2 = BZIP2   $@
+cmd_bzip2 = (cat $(filter-out FORCE,$^) | \
+       bzip2 -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+       (rm -f $@ ; false)
 
 # Lzma
 # ---------------------------------------------------------------------------
 
 quiet_cmd_lzma = LZMA    $@
-cmd_lzma = (lzma -9 -c $< && $(size_append) $<) >$@ || (rm -f $@ ; false)
+cmd_lzma = (cat $(filter-out FORCE,$^) | \
+       lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+       (rm -f $@ ; false)
diff --git a/scripts/bin_size b/scripts/bin_size
deleted file mode 100644 (file)
index 43e1b36..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-if [ $# = 0 ] ; then
-   echo Usage: $0 file
-fi
-
-size_dec=`stat -c "%s" $1`
-size_hex_echo_string=`printf "%08x" $size_dec |
-     sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'`
-/bin/echo -ne $size_hex_echo_string
index 3208a3a..acd8c4a 100755 (executable)
@@ -1828,6 +1828,25 @@ sub reset_state {
     $state = 0;
 }
 
+sub tracepoint_munge($) {
+       my $file = shift;
+       my $tracepointname = 0;
+       my $tracepointargs = 0;
+
+       if($prototype =~ m/TRACE_EVENT\((.*?),/) {
+               $tracepointname = $1;
+       }
+       if($prototype =~ m/TP_PROTO\((.*?)\)/) {
+               $tracepointargs = $1;
+       }
+       if (($tracepointname eq 0) || ($tracepointargs eq 0)) {
+               print STDERR "Warning(${file}:$.): Unrecognized tracepoint format: \n".
+                            "$prototype\n";
+       } else {
+               $prototype = "static inline void trace_$tracepointname($tracepointargs)";
+       }
+}
+
 sub syscall_munge() {
        my $void = 0;
 
@@ -1882,6 +1901,9 @@ sub process_state3_function($$) {
        if ($prototype =~ /SYSCALL_DEFINE/) {
                syscall_munge();
        }
+       if ($prototype =~ /TRACE_EVENT/) {
+               tracepoint_munge($file);
+       }
        dump_function($prototype, $file);
        reset_state();
     }
index 409596e..0fae7da 100755 (executable)
@@ -26,7 +26,7 @@
 # which will also be the location of that section after final link.
 # e.g.
 #
-#  .section ".text.sched"
+#  .section ".sched.text", "ax"
 #  .globl my_func
 #  my_func:
 #        [...]
@@ -39,7 +39,7 @@
 #        [...]
 #
 # Both relocation offsets for the mcounts in the above example will be
-# offset from .text.sched. If we make another file called tmp.s with:
+# offset from .sched.text. If we make another file called tmp.s with:
 #
 #  .section __mcount_loc
 #  .quad  my_func + 0x5
@@ -51,7 +51,7 @@
 # But this gets hard if my_func is not globl (a static function).
 # In such a case we have:
 #
-#  .section ".text.sched"
+#  .section ".sched.text", "ax"
 #  my_func:
 #        [...]
 #        call mcount  (offset: 0x5)
index ffbe259..510186f 100644 (file)
@@ -84,8 +84,8 @@ static void *ima_measurements_next(struct seq_file *m, void *v, loff_t *pos)
         * against concurrent list-extension
         */
        rcu_read_lock();
-       qe = list_entry(rcu_dereference(qe->later.next),
-                       struct ima_queue_entry, later);
+       qe = list_entry_rcu(qe->later.next,
+                           struct ima_queue_entry, later);
        rcu_read_unlock();
        (*pos)++;
 
index e03a7e1..11d2cb1 100644 (file)
@@ -734,8 +734,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
                return;
        }
 
-       m = list_entry(rcu_dereference(smk_netlbladdr_list.next),
-                        struct smk_netlbladdr, list);
+       m = list_entry_rcu(smk_netlbladdr_list.next,
+                          struct smk_netlbladdr, list);
 
        /* the comparison '>' is a bit hacky, but works */
        if (new->smk_mask.s_addr > m->smk_mask.s_addr) {
@@ -748,8 +748,8 @@ static void smk_netlbladdr_insert(struct smk_netlbladdr *new)
                        list_add_rcu(&new->list, &m->list);
                        return;
                }
-               m_next = list_entry(rcu_dereference(m->list.next),
-                                struct smk_netlbladdr, list);
+               m_next = list_entry_rcu(m->list.next,
+                                       struct smk_netlbladdr, list);
                if (new->smk_mask.s_addr > m_next->smk_mask.s_addr) {
                        list_add_rcu(&new->list, &m->list);
                        return;
index 1ecbe23..4d0dd39 100644 (file)
@@ -2301,10 +2301,11 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
        bad_pfn = page_to_pfn(bad_page);
 
-       if (!alloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
+       if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
                r = -ENOMEM;
                goto out_free_0;
        }
+       cpumask_clear(cpus_hardware_enabled);
 
        r = kvm_arch_hardware_setup();
        if (r < 0)