Merge branch 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 13 Apr 2015 20:21:51 +0000 (13:21 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 13 Apr 2015 20:21:51 +0000 (13:21 -0700)
Pull x86 cacheinfo sysfs changes from Ingo Molnar:
 "This tree converts the x86 cacheinfo sysfs code to use the generic
  code in drivers/base/cacheinfo.c.

  It's not intended to change the sysfs ABI:

      'This patch neither alters any existing sysfs entries nor their
       formating, however since the generic cacheinfo has switched to
       use the device attributes instead of the traditional raw
       kobjects, a directory named 'power' along with its standard
       attributes are added similar to any other device'"

* 'x86-cpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/cpu/cacheinfo: Fix cache_get_priv_group() for Intel processors
  x86/cacheinfo: Move cacheinfo sysfs code to generic infrastructure

1284 files changed:
Documentation/CodeOfConflict [new file with mode: 0644]
Documentation/devicetree/bindings/arm/exynos/power_domain.txt
Documentation/devicetree/bindings/arm/sti.txt
Documentation/devicetree/bindings/net/apm-xgene-enet.txt
Documentation/devicetree/bindings/net/dsa/dsa.txt
Documentation/devicetree/bindings/power/power_domain.txt
Documentation/devicetree/bindings/serial/8250.txt [moved from Documentation/devicetree/bindings/serial/of-serial.txt with 100% similarity]
Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt [new file with mode: 0644]
Documentation/devicetree/bindings/serial/snps-dw-apb-uart.txt
Documentation/devicetree/bindings/submitting-patches.txt
Documentation/devicetree/bindings/thermal/rcar-thermal.txt
Documentation/devicetree/bindings/vendor-prefixes.txt
Documentation/devicetree/bindings/watchdog/atmel-wdt.txt
Documentation/input/alps.txt
Documentation/input/event-codes.txt
Documentation/input/multi-touch-protocol.txt
Documentation/kernel-parameters.txt
Documentation/rtc.txt
Documentation/virtual/kvm/api.txt
Documentation/virtual/kvm/devices/s390_flic.txt
Documentation/x86/boot.txt
MAINTAINERS
Makefile
arch/alpha/kernel/rtc.c
arch/arc/kernel/signal.c
arch/arm/Kconfig
arch/arm/Makefile
arch/arm/boot/dts/am335x-bone-common.dtsi
arch/arm/boot/dts/am335x-bone.dts
arch/arm/boot/dts/am335x-lxm.dts
arch/arm/boot/dts/am33xx-clocks.dtsi
arch/arm/boot/dts/am43xx-clocks.dtsi
arch/arm/boot/dts/at91sam9260.dtsi
arch/arm/boot/dts/at91sam9261.dtsi
arch/arm/boot/dts/at91sam9263.dtsi
arch/arm/boot/dts/at91sam9g45.dtsi
arch/arm/boot/dts/at91sam9n12.dtsi
arch/arm/boot/dts/at91sam9x5.dtsi
arch/arm/boot/dts/dm8168-evm.dts
arch/arm/boot/dts/dm816x.dtsi
arch/arm/boot/dts/dra7-evm.dts
arch/arm/boot/dts/dra7.dtsi
arch/arm/boot/dts/dra72-evm.dts
arch/arm/boot/dts/dra7xx-clocks.dtsi
arch/arm/boot/dts/exynos3250.dtsi
arch/arm/boot/dts/exynos4-cpu-thermal.dtsi [new file with mode: 0644]
arch/arm/boot/dts/exynos4.dtsi
arch/arm/boot/dts/exynos4210-trats.dts
arch/arm/boot/dts/exynos4210-universal_c210.dts
arch/arm/boot/dts/exynos4210.dtsi
arch/arm/boot/dts/exynos4212.dtsi
arch/arm/boot/dts/exynos4412-odroid-common.dtsi
arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi [new file with mode: 0644]
arch/arm/boot/dts/exynos4412-trats2.dts
arch/arm/boot/dts/exynos4412.dtsi
arch/arm/boot/dts/exynos4x12.dtsi
arch/arm/boot/dts/exynos5250.dtsi
arch/arm/boot/dts/exynos5420-trip-points.dtsi [new file with mode: 0644]
arch/arm/boot/dts/exynos5420.dtsi
arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi [new file with mode: 0644]
arch/arm/boot/dts/exynos5440-trip-points.dtsi [new file with mode: 0644]
arch/arm/boot/dts/exynos5440.dtsi
arch/arm/boot/dts/imx6qdl-sabresd.dtsi
arch/arm/boot/dts/imx6sl-evk.dts
arch/arm/boot/dts/omap3.dtsi
arch/arm/boot/dts/omap5-core-thermal.dtsi
arch/arm/boot/dts/omap5-gpu-thermal.dtsi
arch/arm/boot/dts/omap5.dtsi
arch/arm/boot/dts/omap54xx-clocks.dtsi
arch/arm/boot/dts/rk3288.dtsi
arch/arm/boot/dts/sama5d3.dtsi
arch/arm/boot/dts/sama5d4.dtsi
arch/arm/boot/dts/socfpga.dtsi
arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts
arch/arm/boot/dts/sun4i-a10.dtsi
arch/arm/boot/dts/sun5i-a13.dtsi
arch/arm/boot/dts/sun7i-a20.dtsi
arch/arm/common/bL_switcher.c
arch/arm/configs/at91_dt_defconfig
arch/arm/configs/multi_v7_defconfig
arch/arm/configs/omap2plus_defconfig
arch/arm/configs/sama5_defconfig
arch/arm/configs/sunxi_defconfig
arch/arm/configs/vexpress_defconfig
arch/arm/crypto/aesbs-core.S_shipped
arch/arm/crypto/bsaes-armv7.pl
arch/arm/include/asm/jump_label.h
arch/arm/include/asm/kvm_arm.h
arch/arm/include/asm/kvm_host.h
arch/arm/include/asm/kvm_mmio.h
arch/arm/include/asm/kvm_mmu.h
arch/arm/include/asm/mach/time.h
arch/arm/include/debug/at91.S
arch/arm/include/uapi/asm/kvm.h
arch/arm/kernel/asm-offsets.c
arch/arm/kernel/setup.c
arch/arm/kernel/time.c
arch/arm/kvm/Kconfig
arch/arm/kvm/Makefile
arch/arm/kvm/arm.c
arch/arm/kvm/guest.c
arch/arm/kvm/interrupts_head.S
arch/arm/kvm/mmio.c
arch/arm/kvm/mmu.c
arch/arm/kvm/trace.h
arch/arm/mach-at91/pm.c
arch/arm/mach-at91/pm.h
arch/arm/mach-at91/pm_slowclock.S
arch/arm/mach-exynos/platsmp.c
arch/arm/mach-exynos/pm_domains.c
arch/arm/mach-exynos/suspend.c
arch/arm/mach-imx/mach-imx6q.c
arch/arm/mach-omap2/cpuidle44xx.c
arch/arm/mach-omap2/id.c
arch/arm/mach-omap2/omap_hwmod.c
arch/arm/mach-omap2/omap_hwmod.h
arch/arm/mach-omap2/omap_hwmod_7xx_data.c
arch/arm/mach-omap2/pdata-quirks.c
arch/arm/mach-omap2/prm44xx.c
arch/arm/mach-pxa/idp.c
arch/arm/mach-pxa/irq.c
arch/arm/mach-pxa/lpd270.c
arch/arm/mach-pxa/zeus.c
arch/arm/mach-sa1100/neponset.c
arch/arm/mach-sa1100/pleb.c
arch/arm/mach-socfpga/core.h
arch/arm/mach-socfpga/socfpga.c
arch/arm/mach-sti/board-dt.c
arch/arm/mach-sunxi/Kconfig
arch/arm/mach-tegra/cpuidle-tegra114.c
arch/arm/mach-tegra/cpuidle-tegra20.c
arch/arm/mach-tegra/cpuidle-tegra30.c
arch/arm/mm/cache-l2x0.c
arch/arm/mm/dma-mapping.c
arch/arm/mm/fault.c
arch/arm/mm/pageattr.c
arch/arm/plat-omap/counter_32k.c
arch/arm/plat-omap/dmtimer.c
arch/arm64/boot/dts/apm/apm-storm.dtsi
arch/arm64/boot/dts/arm/juno-clocks.dtsi
arch/arm64/include/asm/cmpxchg.h
arch/arm64/include/asm/esr.h
arch/arm64/include/asm/jump_label.h
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/kvm_mmio.h
arch/arm64/include/asm/kvm_mmu.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/percpu.h
arch/arm64/include/asm/proc-fns.h
arch/arm64/include/asm/tlb.h
arch/arm64/include/asm/tlbflush.h
arch/arm64/include/uapi/asm/kvm.h
arch/arm64/kernel/efi.c
arch/arm64/kernel/head.S
arch/arm64/kernel/process.c
arch/arm64/kernel/vdso.c
arch/arm64/kvm/Kconfig
arch/arm64/kvm/Makefile
arch/arm64/mm/dma-mapping.c
arch/c6x/include/asm/pgtable.h
arch/metag/include/asm/io.h
arch/metag/include/asm/pgtable-bits.h [new file with mode: 0644]
arch/metag/include/asm/pgtable.h
arch/microblaze/kernel/entry.S
arch/mips/include/asm/asmmacro-32.h
arch/mips/include/asm/asmmacro.h
arch/mips/include/asm/fpu.h
arch/mips/include/asm/jump_label.h
arch/mips/include/asm/kdebug.h
arch/mips/include/asm/kvm_host.h
arch/mips/include/asm/processor.h
arch/mips/include/uapi/asm/kvm.h
arch/mips/kernel/asm-offsets.c
arch/mips/kernel/genex.S
arch/mips/kernel/ptrace.c
arch/mips/kernel/r4k_fpu.S
arch/mips/kernel/traps.c
arch/mips/kvm/Makefile
arch/mips/kvm/emulate.c
arch/mips/kvm/fpu.S [new file with mode: 0644]
arch/mips/kvm/locore.S
arch/mips/kvm/mips.c
arch/mips/kvm/msa.S [new file with mode: 0644]
arch/mips/kvm/stats.c
arch/mips/kvm/tlb.c
arch/mips/kvm/trap_emul.c
arch/mips/lasat/sysctl.c
arch/nios2/include/asm/ptrace.h
arch/nios2/include/asm/thread_info.h
arch/nios2/include/asm/ucontext.h [deleted file]
arch/nios2/include/uapi/asm/Kbuild
arch/nios2/include/uapi/asm/elf.h
arch/nios2/include/uapi/asm/ptrace.h
arch/nios2/include/uapi/asm/sigcontext.h
arch/nios2/kernel/entry.S
arch/nios2/kernel/signal.c
arch/nios2/mm/cacheflush.c
arch/nios2/mm/fault.c
arch/parisc/include/asm/pgalloc.h
arch/parisc/kernel/syscall_table.S
arch/powerpc/include/asm/cputhreads.h
arch/powerpc/include/asm/ppc-opcode.h
arch/powerpc/include/asm/reg.h
arch/powerpc/kernel/cputable.c
arch/powerpc/kernel/dbell.c
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S
arch/powerpc/kvm/mpic.c
arch/powerpc/kvm/powerpc.c
arch/powerpc/platforms/powernv/opal-wrappers.S
arch/powerpc/platforms/powernv/smp.c
arch/powerpc/platforms/pseries/hvCall.S
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/mobility.c
arch/s390/include/asm/elf.h
arch/s390/include/asm/jump_label.h
arch/s390/include/asm/kvm_host.h
arch/s390/include/asm/mmu_context.h
arch/s390/include/asm/page.h
arch/s390/include/uapi/asm/kvm.h
arch/s390/include/uapi/asm/sie.h
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/ftrace.c
arch/s390/kernel/jump_label.c
arch/s390/kernel/module.c
arch/s390/kernel/perf_cpum_sf.c
arch/s390/kernel/processor.c
arch/s390/kernel/swsusp_asm64.S
arch/s390/kernel/time.c
arch/s390/kvm/diag.c
arch/s390/kvm/gaccess.c
arch/s390/kvm/gaccess.h
arch/s390/kvm/guestdbg.c
arch/s390/kvm/intercept.c
arch/s390/kvm/interrupt.c
arch/s390/kvm/kvm-s390.c
arch/s390/kvm/kvm-s390.h
arch/s390/kvm/priv.c
arch/s390/kvm/sigp.c
arch/s390/pci/pci.c
arch/s390/pci/pci_mmio.c
arch/sparc/Kconfig
arch/sparc/include/asm/hypervisor.h
arch/sparc/include/asm/io_64.h
arch/sparc/include/asm/jump_label.h
arch/sparc/include/asm/starfire.h
arch/sparc/kernel/entry.h
arch/sparc/kernel/hvapi.c
arch/sparc/kernel/hvcalls.S
arch/sparc/kernel/pci.c
arch/sparc/kernel/pcr.c
arch/sparc/kernel/perf_event.c
arch/sparc/kernel/process_64.c
arch/sparc/kernel/smp_64.c
arch/sparc/kernel/starfire.c
arch/sparc/kernel/sys_sparc_64.c
arch/sparc/kernel/time_32.c
arch/sparc/kernel/traps_64.c
arch/sparc/lib/memmove.S
arch/sparc/mm/init_64.c
arch/tile/kernel/time.c
arch/x86/Kconfig
arch/x86/boot/compressed/aslr.c
arch/x86/boot/compressed/head_32.S
arch/x86/boot/compressed/head_64.S
arch/x86/boot/compressed/misc.c
arch/x86/boot/compressed/misc.h
arch/x86/boot/string.c
arch/x86/boot/video-mode.c
arch/x86/boot/video.c
arch/x86/boot/video.h
arch/x86/configs/i386_defconfig
arch/x86/configs/x86_64_defconfig
arch/x86/crypto/aesni-intel_glue.c
arch/x86/crypto/crc32c-pcl-intel-asm_64.S
arch/x86/crypto/twofish-x86_64-asm_64.S
arch/x86/ia32/Makefile
arch/x86/ia32/ia32_signal.c
arch/x86/ia32/ia32entry.S
arch/x86/ia32/nosyscall.c [deleted file]
arch/x86/ia32/sys_ia32.c
arch/x86/ia32/syscall_ia32.c [deleted file]
arch/x86/include/asm/alternative-asm.h
arch/x86/include/asm/alternative.h
arch/x86/include/asm/apic.h
arch/x86/include/asm/barrier.h
arch/x86/include/asm/calling.h
arch/x86/include/asm/compat.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/asm/desc.h
arch/x86/include/asm/dwarf2.h
arch/x86/include/asm/efi.h
arch/x86/include/asm/elf.h
arch/x86/include/asm/fpu-internal.h
arch/x86/include/asm/hw_irq.h
arch/x86/include/asm/insn.h
arch/x86/include/asm/iommu_table.h
arch/x86/include/asm/irqflags.h
arch/x86/include/asm/jump_label.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_para.h
arch/x86/include/asm/mwait.h
arch/x86/include/asm/page_types.h
arch/x86/include/asm/paravirt.h
arch/x86/include/asm/pci_x86.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/pvclock.h
arch/x86/include/asm/segment.h
arch/x86/include/asm/setup.h
arch/x86/include/asm/sigcontext.h
arch/x86/include/asm/sighandling.h
arch/x86/include/asm/smap.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/special_insns.h
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/uaccess_64.h
arch/x86/include/uapi/asm/bootparam.h
arch/x86/include/uapi/asm/ptrace-abi.h
arch/x86/include/uapi/asm/ptrace.h
arch/x86/include/uapi/asm/sigcontext.h
arch/x86/include/uapi/asm/vmx.h
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/alternative.c
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/apic_numachip.c
arch/x86/kernel/apic/x2apic_cluster.c
arch/x86/kernel/apic/x2apic_uv_x.c
arch/x86/kernel/asm-offsets_32.c
arch/x86/kernel/asm-offsets_64.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/mkcapflags.sh
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/crash.c
arch/x86/kernel/dumpstack.c
arch/x86/kernel/dumpstack_32.c
arch/x86/kernel/early_printk.c
arch/x86/kernel/entry_32.S
arch/x86/kernel/entry_64.S
arch/x86/kernel/head64.c
arch/x86/kernel/head_32.S
arch/x86/kernel/head_64.S
arch/x86/kernel/i387.c
arch/x86/kernel/ioport.c
arch/x86/kernel/irq.c
arch/x86/kernel/irq_32.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/irqinit.c
arch/x86/kernel/kgdb.c
arch/x86/kernel/kprobes/core.c
arch/x86/kernel/module.c
arch/x86/kernel/perf_regs.c
arch/x86/kernel/process.c
arch/x86/kernel/process_32.c
arch/x86/kernel/process_64.c
arch/x86/kernel/ptrace.c
arch/x86/kernel/pvclock.c
arch/x86/kernel/reboot.c
arch/x86/kernel/relocate_kernel_32.S
arch/x86/kernel/relocate_kernel_64.S
arch/x86/kernel/setup.c
arch/x86/kernel/signal.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/syscall_32.c
arch/x86/kernel/time.c
arch/x86/kernel/traps.c
arch/x86/kernel/uprobes.c
arch/x86/kernel/vm86_32.c
arch/x86/kernel/vsyscall_gtod.c
arch/x86/kernel/xsave.c
arch/x86/kvm/Makefile
arch/x86/kvm/cpuid.c
arch/x86/kvm/cpuid.h
arch/x86/kvm/emulate.c
arch/x86/kvm/i8254.c
arch/x86/kvm/i8254.h
arch/x86/kvm/i8259.c
arch/x86/kvm/ioapic.c
arch/x86/kvm/ioapic.h
arch/x86/kvm/irq.h
arch/x86/kvm/lapic.c
arch/x86/kvm/lapic.h
arch/x86/kvm/mmu.c
arch/x86/kvm/pmu.c
arch/x86/kvm/svm.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.c
arch/x86/lguest/boot.c
arch/x86/lib/atomic64_cx8_32.S
arch/x86/lib/checksum_32.S
arch/x86/lib/clear_page_64.S
arch/x86/lib/copy_page_64.S
arch/x86/lib/copy_user_64.S
arch/x86/lib/csum-copy_64.S
arch/x86/lib/insn.c
arch/x86/lib/memcpy_64.S
arch/x86/lib/memmove_64.S
arch/x86/lib/memset_64.S
arch/x86/lib/msr-reg.S
arch/x86/lib/rwsem.S
arch/x86/lib/thunk_32.S
arch/x86/lib/thunk_64.S
arch/x86/lib/usercopy_64.c
arch/x86/lib/x86-opcode-map.txt
arch/x86/mm/fault.c
arch/x86/mm/init.c
arch/x86/oprofile/backtrace.c
arch/x86/pci/common.c
arch/x86/pci/intel_mid_pci.c
arch/x86/pci/irq.c
arch/x86/platform/efi/efi.c
arch/x86/platform/efi/efi_32.c
arch/x86/platform/efi/efi_64.c
arch/x86/platform/uv/tlb_uv.c
arch/x86/power/cpu.c
arch/x86/syscalls/syscall_32.tbl
arch/x86/syscalls/syscall_64.tbl
arch/x86/um/asm/barrier.h
arch/x86/um/sys_call_table_64.c
arch/x86/vdso/vclock_gettime.c
arch/x86/vdso/vdso32/sigreturn.S
arch/x86/xen/enlighten.c
arch/x86/xen/p2m.c
arch/x86/xen/smp.c
arch/x86/xen/suspend.c
arch/x86/xen/xen-asm_64.S
block/blk-merge.c
block/blk-mq-tag.c
block/blk-mq.c
block/blk-settings.c
drivers/acpi/acpi_lpss.c
drivers/acpi/acpi_pad.c
drivers/acpi/pci_irq.c
drivers/acpi/processor_idle.c
drivers/android/binder.c
drivers/ata/libata-core.c
drivers/ata/sata_fsl.c
drivers/base/regmap/internal.h
drivers/base/regmap/regcache-rbtree.c
drivers/base/regmap/regcache.c
drivers/base/regmap/regmap-irq.c
drivers/base/regmap/regmap.c
drivers/block/nbd.c
drivers/block/nvme-core.c
drivers/char/ipmi/ipmi_powernv.c
drivers/char/ipmi/ipmi_si_intf.c
drivers/char/ipmi/ipmi_ssif.c
drivers/char/tpm/tpm-chip.c
drivers/char/tpm/tpm_ibmvtpm.c
drivers/char/tpm/tpm_ibmvtpm.h
drivers/char/virtio_console.c
drivers/clk/clk-divider.c
drivers/clk/clk.c
drivers/clk/qcom/gcc-msm8960.c
drivers/clk/qcom/lcc-ipq806x.c
drivers/clk/qcom/lcc-msm8960.c
drivers/clk/ti/fapll.c
drivers/clocksource/Kconfig
drivers/clocksource/arm_arch_timer.c
drivers/clocksource/dw_apb_timer_of.c
drivers/clocksource/em_sti.c
drivers/clocksource/sh_cmt.c
drivers/clocksource/sh_tmu.c
drivers/clocksource/sun4i_timer.c
drivers/clocksource/tegra20_timer.c
drivers/clocksource/time-efm32.c
drivers/clocksource/timer-atmel-pit.c
drivers/clocksource/timer-sun5i.c
drivers/cpufreq/cpufreq.c
drivers/cpuidle/cpuidle-mvebu-v7.c
drivers/cpuidle/cpuidle.c
drivers/cpuidle/driver.c
drivers/cpuidle/sysfs.c
drivers/dma/amba-pl08x.c
drivers/dma/at_hdmac.c
drivers/dma/at_hdmac_regs.h
drivers/dma/bcm2835-dma.c
drivers/dma/cppi41.c
drivers/dma/dma-jz4740.c
drivers/dma/dmaengine.c
drivers/dma/dw/platform.c
drivers/dma/edma.c
drivers/dma/imx-sdma.c
drivers/dma/moxart-dma.c
drivers/dma/omap-dma.c
drivers/firmware/dmi_scan.c
drivers/firmware/efi/libstub/arm-stub.c
drivers/firmware/efi/libstub/efistub.h
drivers/firmware/efi/libstub/fdt.c
drivers/gpio/gpio-mpc8xxx.c
drivers/gpio/gpio-syscon.c
drivers/gpio/gpiolib-acpi.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
drivers/gpu/drm/drm_crtc.c
drivers/gpu/drm/drm_dp_mst_topology.c
drivers/gpu/drm/drm_edid_load.c
drivers/gpu/drm/drm_mm.c
drivers/gpu/drm/drm_probe_helper.c
drivers/gpu/drm/exynos/Kconfig
drivers/gpu/drm/exynos/exynos7_drm_decon.c
drivers/gpu/drm/exynos/exynos_drm_connector.c [deleted file]
drivers/gpu/drm/exynos/exynos_drm_connector.h [deleted file]
drivers/gpu/drm/exynos/exynos_drm_fimd.c
drivers/gpu/drm/exynos/exynos_drm_plane.c
drivers/gpu/drm/exynos/exynos_mixer.c
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_sprite.c
drivers/gpu/drm/i915/intel_uncore.c
drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
drivers/gpu/drm/nouveau/nvkm/engine/device/gm100.c
drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
drivers/gpu/drm/nouveau/nvkm/subdev/bios/i2c.c
drivers/gpu/drm/radeon/cikd.h
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_bios.c
drivers/gpu/drm/radeon/radeon_fence.c
drivers/gpu/drm/radeon/radeon_kfd.c
drivers/gpu/drm/radeon/radeon_mn.c
drivers/gpu/drm/radeon/radeon_object.c
drivers/gpu/drm/radeon/radeon_pm.c
drivers/gpu/drm/radeon/radeon_ring.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/radeon/si.c
drivers/gpu/drm/radeon/vce_v2_0.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/hid/hid-core.c
drivers/hid/hid-ids.h
drivers/hid/hid-tivo.c
drivers/hid/usbhid/hid-quirks.c
drivers/hid/wacom_wac.c
drivers/i2c/i2c-core.c
drivers/ide/ide-tape.c
drivers/idle/intel_idle.c
drivers/iio/accel/bma180.c
drivers/iio/accel/bmc150-accel.c
drivers/iio/accel/kxcjk-1013.c
drivers/iio/adc/Kconfig
drivers/iio/adc/at91_adc.c
drivers/iio/adc/mcp3422.c
drivers/iio/adc/qcom-spmi-iadc.c
drivers/iio/adc/ti_am335x_adc.c
drivers/iio/adc/vf610_adc.c
drivers/iio/common/ssp_sensors/ssp_dev.c
drivers/iio/dac/ad5686.c
drivers/iio/gyro/bmg160.c
drivers/iio/humidity/dht11.c
drivers/iio/humidity/si7020.c
drivers/iio/imu/adis16400_core.c
drivers/iio/imu/adis_trigger.c
drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
drivers/iio/imu/inv_mpu6050/inv_mpu_ring.c
drivers/iio/imu/kmx61.c
drivers/iio/industrialio-core.c
drivers/iio/industrialio-event.c
drivers/iio/light/Kconfig
drivers/iio/magnetometer/Kconfig
drivers/iio/proximity/sx9500.c
drivers/infiniband/core/umem.c
drivers/infiniband/hw/mlx4/mad.c
drivers/infiniband/hw/mlx4/main.c
drivers/input/keyboard/tc3589x-keypad.c
drivers/input/misc/mma8450.c
drivers/input/mouse/alps.c
drivers/input/mouse/cyapa_gen3.c
drivers/input/mouse/cyapa_gen5.c
drivers/input/mouse/focaltech.c
drivers/input/mouse/psmouse-base.c
drivers/input/mouse/psmouse.h
drivers/input/mouse/synaptics.c
drivers/input/mouse/synaptics.h
drivers/input/touchscreen/Kconfig
drivers/iommu/Kconfig
drivers/iommu/arm-smmu.c
drivers/iommu/exynos-iommu.c
drivers/iommu/intel-iommu.c
drivers/iommu/io-pgtable-arm.c
drivers/iommu/ipmmu-vmsa.c
drivers/iommu/omap-iommu.c
drivers/iommu/rockchip-iommu.c
drivers/irqchip/irq-armada-370-xp.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-gic-v3.c
drivers/irqchip/irq-gic.c
drivers/isdn/icn/icn.c
drivers/lguest/Kconfig
drivers/md/dm-io.c
drivers/md/dm-snap.c
drivers/md/dm-thin.c
drivers/md/dm.c
drivers/md/md.c
drivers/md/raid0.c
drivers/media/dvb-frontends/rtl2832.c
drivers/media/pci/cx23885/cx23885-417.c
drivers/media/platform/s5p-jpeg/jpeg-core.c
drivers/media/platform/s5p-jpeg/jpeg-hw-exynos3250.c
drivers/media/platform/s5p-mfc/s5p_mfc.c
drivers/media/platform/s5p-mfc/s5p_mfc_common.h
drivers/media/platform/s5p-mfc/s5p_mfc_opr.h
drivers/media/platform/s5p-mfc/s5p_mfc_opr_v5.c
drivers/media/platform/s5p-mfc/s5p_mfc_opr_v6.c
drivers/media/platform/s5p-tv/Kconfig
drivers/media/platform/sh_veu.c
drivers/media/platform/soc_camera/atmel-isi.c
drivers/media/platform/soc_camera/soc_camera.c
drivers/media/usb/dvb-usb-v2/rtl28xxu.c
drivers/media/usb/gspca/Kconfig
drivers/media/v4l2-core/videobuf2-core.c
drivers/media/v4l2-core/videobuf2-dma-contig.c
drivers/mfd/kempld-core.c
drivers/mfd/rtsx_usb.c
drivers/misc/enclosure.c
drivers/misc/mei/init.c
drivers/misc/sgi-xp/xpc_main.c
drivers/mmc/core/pwrseq_simple.c
drivers/mtd/nand/Kconfig
drivers/mtd/nand/pxa3xx_nand.c
drivers/mtd/ubi/eba.c
drivers/net/bonding/bond_main.c
drivers/net/can/Kconfig
drivers/net/can/dev.c
drivers/net/can/flexcan.c
drivers/net/can/usb/gs_usb.c
drivers/net/can/usb/kvaser_usb.c
drivers/net/can/usb/peak_usb/pcan_ucan.h
drivers/net/can/usb/peak_usb/pcan_usb_fd.c
drivers/net/ethernet/amd/pcnet32.c
drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
drivers/net/ethernet/apm/xgene/xgene_enet_main.c
drivers/net/ethernet/broadcom/bcm63xx_enet.c
drivers/net/ethernet/broadcom/bgmac.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_stats.h
drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
drivers/net/ethernet/cadence/macb.c
drivers/net/ethernet/cadence/macb.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
drivers/net/ethernet/chelsio/cxgb4/sge.c
drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
drivers/net/ethernet/chelsio/cxgb4/t4fw_version.h
drivers/net/ethernet/chelsio/cxgb4vf/sge.c
drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
drivers/net/ethernet/dec/tulip/tulip_core.c
drivers/net/ethernet/emulex/benet/be.h
drivers/net/ethernet/emulex/benet/be_cmds.c
drivers/net/ethernet/emulex/benet/be_cmds.h
drivers/net/ethernet/emulex/benet/be_main.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/freescale/ucc_geth.c
drivers/net/ethernet/ibm/ibmveth.c
drivers/net/ethernet/marvell/mvneta.c
drivers/net/ethernet/mellanox/mlx4/cmd.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/eq.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/rocker/rocker.c
drivers/net/ethernet/smsc/smc91x.c
drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
drivers/net/ethernet/wiznet/w5100.c
drivers/net/ethernet/wiznet/w5300.c
drivers/net/ipvlan/ipvlan.h
drivers/net/ipvlan/ipvlan_core.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/team/team.c
drivers/net/usb/asix_common.c
drivers/net/usb/cdc_ether.c
drivers/net/usb/cdc_ncm.c
drivers/net/usb/cx82310_eth.c
drivers/net/usb/r8152.c
drivers/net/usb/sr9800.c
drivers/net/usb/usbnet.c
drivers/net/virtio_net.c
drivers/net/vxlan.c
drivers/net/wireless/ath/ath9k/beacon.c
drivers/net/wireless/ath/ath9k/common.h
drivers/net/wireless/ath/ath9k/hw.c
drivers/net/wireless/b43/main.c
drivers/net/wireless/brcm80211/brcmfmac/feature.c
drivers/net/wireless/brcm80211/brcmfmac/vendor.c
drivers/net/wireless/iwlwifi/dvm/dev.h
drivers/net/wireless/iwlwifi/dvm/mac80211.c
drivers/net/wireless/iwlwifi/dvm/ucode.c
drivers/net/wireless/iwlwifi/iwl-1000.c
drivers/net/wireless/iwlwifi/iwl-2000.c
drivers/net/wireless/iwlwifi/iwl-5000.c
drivers/net/wireless/iwlwifi/iwl-6000.c
drivers/net/wireless/iwlwifi/iwl-drv.c
drivers/net/wireless/iwlwifi/mvm/coex.c
drivers/net/wireless/iwlwifi/mvm/coex_legacy.c
drivers/net/wireless/iwlwifi/mvm/mac80211.c
drivers/net/wireless/iwlwifi/mvm/rs.c
drivers/net/wireless/iwlwifi/mvm/scan.c
drivers/net/wireless/iwlwifi/mvm/time-event.c
drivers/net/wireless/iwlwifi/mvm/tx.c
drivers/net/wireless/iwlwifi/pcie/drv.c
drivers/net/wireless/rtlwifi/base.c
drivers/net/wireless/rtlwifi/pci.c
drivers/net/xen-netback/interface.c
drivers/net/xen-netback/netback.c
drivers/net/xen-netfront.c
drivers/of/Kconfig
drivers/of/address.c
drivers/of/base.c
drivers/of/irq.c
drivers/of/overlay.c
drivers/of/unittest.c
drivers/pci/host/pci-xgene.c
drivers/pci/host/pcie-designware.c
drivers/pci/host/pcie-spear13xx.c
drivers/pci/hotplug/cpci_hotplug_pci.c
drivers/pci/pci-acpi.c
drivers/pci/pci-sysfs.c
drivers/pci/pcie/aer/aerdrv_errprint.c
drivers/pcmcia/Kconfig
drivers/pcmcia/Makefile
drivers/pcmcia/rsrc_pci.c [deleted file]
drivers/phy/phy-armada375-usb2.c
drivers/phy/phy-core.c
drivers/phy/phy-exynos-dp-video.c
drivers/phy/phy-exynos-mipi-video.c
drivers/phy/phy-exynos4210-usb2.c
drivers/phy/phy-exynos4x12-usb2.c
drivers/phy/phy-exynos5-usbdrd.c
drivers/phy/phy-exynos5250-usb2.c
drivers/phy/phy-hix5hd2-sata.c
drivers/phy/phy-miphy28lp.c
drivers/phy/phy-miphy365x.c
drivers/phy/phy-omap-control.c
drivers/phy/phy-omap-usb2.c
drivers/phy/phy-rockchip-usb.c
drivers/phy/phy-ti-pipe3.c
drivers/phy/phy-twl4030-usb.c
drivers/phy/phy-xgene.c
drivers/pinctrl/intel/pinctrl-baytrail.c
drivers/pinctrl/intel/pinctrl-cherryview.c
drivers/pinctrl/pinctrl-at91.c
drivers/pinctrl/sunxi/pinctrl-sun4i-a10.c
drivers/pinctrl/sunxi/pinctrl-sunxi.c
drivers/pinctrl/sunxi/pinctrl-sunxi.h
drivers/powercap/intel_rapl.c
drivers/regulator/core.c
drivers/regulator/da9210-regulator.c
drivers/regulator/palmas-regulator.c
drivers/regulator/rk808-regulator.c
drivers/regulator/tps65910-regulator.c
drivers/rpmsg/virtio_rpmsg_bus.c
drivers/rtc/class.c
drivers/rtc/interface.c
drivers/rtc/rtc-ab3100.c
drivers/rtc/rtc-at91rm9200.c
drivers/rtc/rtc-mc13xxx.c
drivers/rtc/rtc-mrst.c
drivers/rtc/rtc-mxc.c
drivers/rtc/rtc-s3c.c
drivers/rtc/rtc-test.c
drivers/rtc/systohc.c
drivers/s390/block/dcssblk.c
drivers/s390/block/scm_blk_cluster.c
drivers/scsi/be2iscsi/be_main.c
drivers/scsi/ipr.c
drivers/scsi/libsas/sas_ata.c
drivers/scsi/libsas/sas_discover.c
drivers/scsi/qla2xxx/tcm_qla2xxx.c
drivers/scsi/scsi_lib.c
drivers/spi/spi-atmel.c
drivers/spi/spi-dw-mid.c
drivers/spi/spi-dw-pci.c
drivers/spi/spi-dw.c
drivers/spi/spi-img-spfi.c
drivers/spi/spi-pl022.c
drivers/spi/spi-qup.c
drivers/spi/spi-ti-qspi.c
drivers/spi/spi.c
drivers/staging/comedi/drivers/adv_pci1710.c
drivers/staging/comedi/drivers/comedi_isadma.c
drivers/staging/comedi/drivers/vmk80xx.c
drivers/staging/iio/Kconfig
drivers/staging/iio/adc/mxs-lradc.c
drivers/staging/iio/magnetometer/hmc5843_core.c
drivers/staging/iio/resolver/ad2s1200.c
drivers/staging/vt6655/device_main.c
drivers/staging/vt6655/rf.c
drivers/staging/vt6656/rf.c
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target_erl0.c
drivers/target/loopback/tcm_loop.c
drivers/target/target_core_device.c
drivers/target/target_core_pscsi.c
drivers/target/target_core_sbc.c
drivers/target/target_core_spc.c
drivers/target/target_core_transport.c
drivers/target/tcm_fc/tfc_io.c
drivers/thermal/st/st_thermal.c
drivers/thermal/st/st_thermal_memmap.c
drivers/thermal/st/st_thermal_syscfg.c
drivers/thermal/thermal_core.c
drivers/tty/bfin_jtag_comm.c
drivers/tty/serial/8250/8250_core.c
drivers/tty/serial/8250/8250_dw.c
drivers/tty/serial/8250/8250_pci.c
drivers/tty/serial/fsl_lpuart.c
drivers/tty/serial/of_serial.c
drivers/tty/serial/samsung.c
drivers/tty/serial/sprd_serial.c
drivers/tty/tty_io.c
drivers/tty/tty_ioctl.c
drivers/usb/chipidea/udc.c
drivers/usb/class/cdc-acm.c
drivers/usb/common/usb-otg-fsm.c
drivers/usb/core/devio.c
drivers/usb/dwc2/core_intr.c
drivers/usb/dwc3/dwc3-omap.c
drivers/usb/gadget/configfs.c
drivers/usb/gadget/function/f_fs.c
drivers/usb/gadget/function/f_hid.c
drivers/usb/gadget/function/f_loopback.c
drivers/usb/gadget/function/f_phonet.c
drivers/usb/gadget/function/f_sourcesink.c
drivers/usb/gadget/function/f_uac2.c
drivers/usb/gadget/function/g_zero.h
drivers/usb/gadget/function/uvc_v4l2.c
drivers/usb/gadget/function/uvc_video.c
drivers/usb/gadget/legacy/g_ffs.c
drivers/usb/gadget/legacy/inode.c
drivers/usb/gadget/legacy/tcm_usb_gadget.c
drivers/usb/gadget/legacy/zero.c
drivers/usb/host/ehci-atmel.c
drivers/usb/host/xhci-hub.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-plat.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.h
drivers/usb/isp1760/isp1760-core.c
drivers/usb/isp1760/isp1760-hcd.c
drivers/usb/isp1760/isp1760-udc.c
drivers/usb/musb/Kconfig
drivers/usb/musb/musb_core.c
drivers/usb/musb/musb_dsps.c
drivers/usb/musb/musb_host.c
drivers/usb/musb/omap2430.c
drivers/usb/phy/phy-am335x-control.c
drivers/usb/renesas_usbhs/Kconfig
drivers/usb/serial/bus.c
drivers/usb/serial/ch341.c
drivers/usb/serial/console.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/ftdi_sio.c
drivers/usb/serial/ftdi_sio_ids.h
drivers/usb/serial/generic.c
drivers/usb/serial/keyspan_pda.c
drivers/usb/serial/mxuport.c
drivers/usb/serial/pl2303.c
drivers/usb/serial/usb-serial.c
drivers/usb/storage/unusual_uas.h
drivers/usb/storage/usb.c
drivers/vfio/pci/vfio_pci_intrs.c
drivers/vhost/scsi.c
drivers/video/fbdev/amba-clcd.c
drivers/video/fbdev/core/fbmon.c
drivers/video/fbdev/omap2/dss/display-sysfs.c
drivers/virtio/virtio_balloon.c
drivers/virtio/virtio_mmio.c
drivers/watchdog/imgpdc_wdt.c
drivers/watchdog/mtk_wdt.c
drivers/xen/Kconfig
drivers/xen/balloon.c
drivers/xen/events/events_base.c
drivers/xen/xen-pciback/conf_space.c
drivers/xen/xen-pciback/conf_space.h
drivers/xen/xen-pciback/conf_space_header.c
drivers/xen/xen-scsiback.c
fs/affs/file.c
fs/aio.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/qgroup.c
fs/btrfs/tests/inode-tests.c
fs/btrfs/transaction.c
fs/cifs/cifsencrypt.c
fs/cifs/connect.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/fs-writeback.c
fs/fuse/dev.c
fs/hfsplus/brec.c
fs/kernfs/file.c
fs/locks.c
fs/nfsd/blocklayout.c
fs/nfsd/blocklayoutxdr.c
fs/nfsd/nfs4layouts.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfscache.c
fs/nilfs2/segment.c
fs/notify/fanotify/fanotify.c
fs/ocfs2/file.c
fs/ocfs2/ocfs2.h
fs/ocfs2/ocfs2_fs.h
fs/overlayfs/super.c
fs/proc/task_mmu.c
include/dt-bindings/pinctrl/am33xx.h
include/dt-bindings/pinctrl/am43xx.h
include/kvm/arm_arch_timer.h
include/kvm/arm_vgic.h
include/kvm/iodev.h [moved from virt/kvm/iodev.h with 66% similarity]
include/linux/blk_types.h
include/linux/clk.h
include/linux/clockchips.h
include/linux/clocksource.h
include/linux/compiler.h
include/linux/cpuidle.h
include/linux/device-mapper.h
include/linux/dmapool.h
include/linux/efi.h
include/linux/fs.h
include/linux/irq_work.h
include/linux/irqchip/arm-gic-v3.h
include/linux/jump_label.h
include/linux/kasan.h
include/linux/kvm_host.h
include/linux/lcm.h
include/linux/libata.h
include/linux/mfd/palmas.h
include/linux/mmzone.h
include/linux/module.h
include/linux/moduleloader.h
include/linux/netdevice.h
include/linux/of_platform.h
include/linux/pinctrl/consumer.h
include/linux/regulator/driver.h
include/linux/rtc.h
include/linux/sched.h
include/linux/seqlock.h
include/linux/serial_core.h
include/linux/skbuff.h
include/linux/spi/spi.h
include/linux/stddef.h
include/linux/sunrpc/debug.h
include/linux/tick.h
include/linux/timekeeper_internal.h
include/linux/timekeeping.h
include/linux/uio.h
include/linux/usb/serial.h
include/linux/usb/usbnet.h
include/linux/vfio.h
include/linux/vmalloc.h
include/linux/workqueue.h
include/linux/writeback.h
include/media/atmel-isi.h
include/net/dst.h
include/net/ip.h
include/net/ip6_route.h
include/net/netfilter/nf_log.h
include/net/netfilter/nf_tables.h
include/net/sock.h
include/net/vxlan.h
include/soc/at91/at91sam9_ddrsdr.h
include/target/target_core_backend.h
include/trace/events/regmap.h
include/uapi/linux/input.h
include/uapi/linux/kvm.h
include/uapi/linux/nfsd/export.h
include/uapi/linux/serial.h
include/uapi/linux/virtio_blk.h
include/uapi/linux/virtio_scsi.h
include/video/omapdss.h
include/xen/xenbus.h
kernel/cpu.c
kernel/cpuset.c
kernel/events/core.c
kernel/futex.c
kernel/livepatch/core.c
kernel/locking/lockdep.c
kernel/locking/mcs_spinlock.h
kernel/locking/mutex.c
kernel/locking/osq_lock.c
kernel/locking/rtmutex.c
kernel/locking/rwsem-spinlock.c
kernel/locking/rwsem-xadd.c
kernel/locking/rwsem.c
kernel/locking/rwsem.h [new file with mode: 0644]
kernel/module.c
kernel/power/snapshot.c
kernel/printk/console_cmdline.h
kernel/printk/printk.c
kernel/sched/core.c
kernel/sched/deadline.c
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/features.h
kernel/sched/idle.c
kernel/sched/rt.c
kernel/sched/sched.h
kernel/sysctl.c
kernel/time/Kconfig
kernel/time/Makefile
kernel/time/clockevents.c
kernel/time/clocksource.c
kernel/time/hrtimer.c
kernel/time/jiffies.c
kernel/time/ntp.c
kernel/time/sched_clock.c
kernel/time/tick-broadcast-hrtimer.c
kernel/time/tick-broadcast.c
kernel/time/tick-common.c
kernel/time/tick-internal.h
kernel/time/tick-oneshot.c
kernel/time/tick-sched.c
kernel/time/tick-sched.h [new file with mode: 0644]
kernel/time/timekeeping.c
kernel/time/timekeeping.h
kernel/time/timer.c
kernel/time/timer_list.c
kernel/trace/ftrace.c
kernel/workqueue.c
lib/Kconfig.debug
lib/Makefile
lib/iov_iter.c [moved from mm/iov_iter.c with 97% similarity]
lib/lcm.c
lib/lockref.c
lib/lz4/lz4_decompress.c
lib/nlattr.c
lib/seq_buf.c
mm/Makefile
mm/cma.c
mm/huge_memory.c
mm/hugetlb.c
mm/kasan/kasan.c
mm/memcontrol.c
mm/memory.c
mm/memory_hotplug.c
mm/mlock.c
mm/mmap.c
mm/mprotect.c
mm/mremap.c
mm/nommu.c
mm/page-writeback.c
mm/page_alloc.c
mm/page_isolation.c
mm/pagewalk.c
mm/rmap.c
mm/slub.c
mm/vmalloc.c
net/9p/trans_virtio.c
net/bridge/br_if.c
net/caif/caif_socket.c
net/can/af_can.c
net/ceph/messenger.c
net/compat.c
net/core/dev.c
net/core/fib_rules.c
net/core/net_namespace.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock.c
net/core/sysctl_net_core.c
net/decnet/dn_rules.c
net/dsa/dsa.c
net/ipv4/fib_frontend.c
net/ipv4/inet_connection_sock.c
net/ipv4/inet_diag.c
net/ipv4/ip_forward.c
net/ipv4/ip_fragment.c
net/ipv4/ip_sockglue.c
net/ipv4/ipmr.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/ping.c
net/ipv4/tcp.c
net/ipv4/tcp_cong.c
net/ipv4/tcp_cubic.c
net/ipv4/tcp_input.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_output.c
net/ipv4/xfrm4_output.c
net/ipv6/datagram.c
net/ipv6/fib6_rules.c
net/ipv6/ip6_output.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6mr.c
net/ipv6/ndisc.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/ping.c
net/ipv6/tcp_ipv6.c
net/ipv6/udp_offload.c
net/ipv6/xfrm6_output.c
net/ipv6/xfrm6_policy.c
net/irda/ircomm/ircomm_tty.c
net/iucv/af_iucv.c
net/l2tp/l2tp_core.c
net/mac80211/agg-rx.c
net/mac80211/ieee80211_i.h
net/mac80211/mlme.c
net/mac80211/rx.c
net/mac80211/sta_info.h
net/mac80211/util.c
net/netfilter/ipvs/ip_vs_sync.c
net/netfilter/nf_log.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_core.c
net/netfilter/nfnetlink_cthelper.c
net/netfilter/nft_compat.c
net/netfilter/nft_hash.c
net/netfilter/xt_TPROXY.c
net/openvswitch/vport.c
net/packet/af_packet.c
net/rds/iw_rdma.c
net/rxrpc/ar-error.c
net/rxrpc/ar-recvmsg.c
net/sched/act_bpf.c
net/sched/cls_u32.c
net/socket.c
net/sunrpc/cache.c
net/sunrpc/clnt.c
net/sunrpc/debugfs.c
net/sunrpc/sunrpc_syms.c
net/sunrpc/xprt.c
net/tipc/core.c
net/tipc/link.c
net/wireless/nl80211.c
net/xfrm/xfrm_policy.c
security/selinux/selinuxfs.c
sound/core/control.c
sound/firewire/bebob/bebob_maudio.c
sound/firewire/dice/dice-interface.h
sound/firewire/dice/dice-proc.c
sound/firewire/iso-resources.c
sound/pci/hda/hda_controller.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_intel.c
sound/pci/hda/hda_proc.c
sound/pci/hda/patch_cirrus.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_realtek.c
sound/soc/codecs/adav80x.c
sound/soc/codecs/ak4641.c
sound/soc/codecs/ak4671.c
sound/soc/codecs/cs4271.c
sound/soc/codecs/da732x.c
sound/soc/codecs/es8328.c
sound/soc/codecs/pcm1681.c
sound/soc/codecs/pcm512x.c
sound/soc/codecs/rt286.c
sound/soc/codecs/sgtl5000.c
sound/soc/codecs/sn95031.c
sound/soc/codecs/tas5086.c
sound/soc/codecs/wm2000.c
sound/soc/codecs/wm8731.c
sound/soc/codecs/wm8903.c
sound/soc/codecs/wm8904.c
sound/soc/codecs/wm8955.c
sound/soc/codecs/wm8960.c
sound/soc/codecs/wm9712.c
sound/soc/codecs/wm9713.c
sound/soc/fsl/fsl_spdif.c
sound/soc/fsl/fsl_ssi.c
sound/soc/intel/sst-haswell-dsp.c
sound/soc/intel/sst-haswell-ipc.c
sound/soc/kirkwood/kirkwood-i2s.c
sound/soc/soc-core.c
sound/usb/mixer_quirks.c
sound/usb/quirks-table.h
sound/usb/quirks.c
tools/perf/bench/mem-memcpy-x86-64-asm-def.h
tools/perf/bench/mem-memcpy-x86-64-asm.S
tools/perf/bench/mem-memcpy.c
tools/perf/bench/mem-memset-x86-64-asm-def.h
tools/perf/bench/mem-memset-x86-64-asm.S
tools/perf/util/annotate.c
tools/perf/util/include/asm/alternative-asm.h
tools/power/cpupower/Makefile
tools/testing/selftests/Makefile
tools/testing/selftests/breakpoints/Makefile
tools/testing/selftests/cpu-hotplug/Makefile
tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh [moved from tools/testing/selftests/cpu-hotplug/on-off-test.sh with 100% similarity, mode: 0755]
tools/testing/selftests/efivarfs/Makefile
tools/testing/selftests/efivarfs/efivarfs.sh [changed mode: 0644->0755]
tools/testing/selftests/exec/Makefile
tools/testing/selftests/exec/execveat.c
tools/testing/selftests/firmware/Makefile
tools/testing/selftests/firmware/fw_filesystem.sh [changed mode: 0644->0755]
tools/testing/selftests/firmware/fw_userhelper.sh [changed mode: 0644->0755]
tools/testing/selftests/ftrace/Makefile
tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
tools/testing/selftests/ftrace/test.d/event/event-enable.tc
tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
tools/testing/selftests/gen_kselftest_tar.sh [new file with mode: 0755]
tools/testing/selftests/ipc/Makefile
tools/testing/selftests/kcmp/Makefile
tools/testing/selftests/kselftest_install.sh [new file with mode: 0755]
tools/testing/selftests/lib.mk [new file with mode: 0644]
tools/testing/selftests/memfd/Makefile
tools/testing/selftests/memory-hotplug/Makefile
tools/testing/selftests/memory-hotplug/mem-on-off-test.sh [moved from tools/testing/selftests/memory-hotplug/on-off-test.sh with 100% similarity, mode: 0755]
tools/testing/selftests/mount/.gitignore [new file with mode: 0644]
tools/testing/selftests/mount/Makefile
tools/testing/selftests/mqueue/Makefile
tools/testing/selftests/net/Makefile
tools/testing/selftests/net/run_afpackettests [changed mode: 0644->0755]
tools/testing/selftests/net/run_netsocktests [changed mode: 0644->0755]
tools/testing/selftests/powerpc/Makefile
tools/testing/selftests/powerpc/copyloops/Makefile
tools/testing/selftests/powerpc/mm/Makefile
tools/testing/selftests/powerpc/pmu/Makefile
tools/testing/selftests/powerpc/pmu/ebb/Makefile
tools/testing/selftests/powerpc/primitives/Makefile
tools/testing/selftests/powerpc/stringloops/Makefile
tools/testing/selftests/powerpc/tm/Makefile
tools/testing/selftests/ptrace/Makefile
tools/testing/selftests/size/Makefile
tools/testing/selftests/sysctl/Makefile
tools/testing/selftests/sysctl/run_numerictests [changed mode: 0644->0755]
tools/testing/selftests/sysctl/run_stringtests [changed mode: 0644->0755]
tools/testing/selftests/timers/Makefile
tools/testing/selftests/timers/alarmtimer-suspend.c [new file with mode: 0644]
tools/testing/selftests/timers/change_skew.c [new file with mode: 0644]
tools/testing/selftests/timers/clocksource-switch.c [new file with mode: 0644]
tools/testing/selftests/timers/inconsistency-check.c [new file with mode: 0644]
tools/testing/selftests/timers/leap-a-day.c [new file with mode: 0644]
tools/testing/selftests/timers/leapcrash.c [new file with mode: 0644]
tools/testing/selftests/timers/mqueue-lat.c [new file with mode: 0644]
tools/testing/selftests/timers/nanosleep.c [new file with mode: 0644]
tools/testing/selftests/timers/nsleep-lat.c [new file with mode: 0644]
tools/testing/selftests/timers/posix_timers.c
tools/testing/selftests/timers/raw_skew.c [new file with mode: 0644]
tools/testing/selftests/timers/rtctest.c [new file with mode: 0644]
tools/testing/selftests/timers/set-2038.c [new file with mode: 0644]
tools/testing/selftests/timers/set-tai.c [new file with mode: 0644]
tools/testing/selftests/timers/set-timer-lat.c [new file with mode: 0644]
tools/testing/selftests/timers/skew_consistency.c [new file with mode: 0644]
tools/testing/selftests/timers/threadtest.c [new file with mode: 0644]
tools/testing/selftests/timers/valid-adjtimex.c [new file with mode: 0644]
tools/testing/selftests/user/Makefile
tools/testing/selftests/vm/Makefile
tools/testing/selftests/vm/run_vmtests [changed mode: 0644->0755]
tools/testing/selftests/x86/.gitignore [new file with mode: 0644]
tools/testing/selftests/x86/Makefile [new file with mode: 0644]
tools/testing/selftests/x86/run_x86_tests.sh [new file with mode: 0644]
tools/testing/selftests/x86/sigreturn.c [new file with mode: 0644]
tools/testing/selftests/x86/trivial_32bit_program.c [new file with mode: 0644]
virt/kvm/arm/arch_timer.c
virt/kvm/arm/vgic-v2-emul.c
virt/kvm/arm/vgic-v2.c
virt/kvm/arm/vgic-v3-emul.c
virt/kvm/arm/vgic-v3.c
virt/kvm/arm/vgic.c
virt/kvm/arm/vgic.h
virt/kvm/coalesced_mmio.c
virt/kvm/eventfd.c
virt/kvm/irqchip.c
virt/kvm/kvm_main.c

diff --git a/Documentation/CodeOfConflict b/Documentation/CodeOfConflict
new file mode 100644 (file)
index 0000000..1684d0b
--- /dev/null
@@ -0,0 +1,27 @@
+Code of Conflict
+----------------
+
+The Linux kernel development effort is a very personal process compared
+to "traditional" ways of developing software.  Your code and ideas
+behind it will be carefully reviewed, often resulting in critique and
+criticism.  The review will almost always require improvements to the
+code before it can be included in the kernel.  Know that this happens
+because everyone involved wants to see the best possible solution for
+the overall success of Linux.  This development process has been proven
+to create the most robust operating system kernel ever, and we do not
+want to do anything to cause the quality of submission and eventual
+result to ever decrease.
+
+If however, anyone feels personally abused, threatened, or otherwise
+uncomfortable due to this process, that is not acceptable.  If so,
+please contact the Linux Foundation's Technical Advisory Board at
+<tab@lists.linux-foundation.org>, or the individual members, and they
+will work to resolve the issue to the best of their ability.  For more
+information on who is on the Technical Advisory Board and what their
+role is, please see:
+       http://www.linuxfoundation.org/programs/advisory-councils/tab
+
+As a reviewer of code, please strive to keep things civil and focused on
+the technical issues involved.  We are all humans, and frustrations can
+be high on both sides of the process.  Try to keep in mind the immortal
+words of Bill and Ted, "Be excellent to each other."
index f4445e5..1e09703 100644 (file)
@@ -22,6 +22,8 @@ Optional Properties:
        - pclkN, clkN: Pairs of parent of input clock and input clock to the
                devices in this power domain. Maximum of 4 pairs (N = 0 to 3)
                are supported currently.
+- power-domains: phandle pointing to the parent power domain, for more details
+                see Documentation/devicetree/bindings/power/power_domain.txt
 
 Node of a device using power domains must have a power-domains property
 defined with a phandle to respective power domain.
index d70ec35..8d27f6b 100644 (file)
@@ -13,6 +13,10 @@ Boards with the ST STiH407 SoC shall have the following properties:
 Required root node property:
 compatible = "st,stih407";
 
+Boards with the ST STiH410 SoC shall have the following properties:
+Required root node property:
+compatible = "st,stih410";
+
 Boards with the ST STiH418 SoC shall have the following properties:
 Required root node property:
 compatible = "st,stih418";
index cfcc527..6151999 100644 (file)
@@ -4,7 +4,10 @@ Ethernet nodes are defined to describe on-chip ethernet interfaces in
 APM X-Gene SoC.
 
 Required properties for all the ethernet interfaces:
-- compatible: Should be "apm,xgene-enet"
+- compatible: Should state binding information from the following list,
+  - "apm,xgene-enet":    RGMII based 1G interface
+  - "apm,xgene1-sgenet": SGMII based 1G interface
+  - "apm,xgene1-xgenet": XFI based 10G interface
 - reg: Address and length of the register set for the device. It contains the
   information of registers in the same order as described by reg-names
 - reg-names: Should contain the register set names
index e124847..f0b4cd7 100644 (file)
@@ -19,7 +19,9 @@ the parent DSA node. The maximum number of allowed child nodes is 4
 (DSA_MAX_SWITCHES).
 Each of these switch child nodes should have the following required properties:
 
-- reg                  : Describes the switch address on the MII bus
+- reg                  : Contains two fields. The first one describes the
+                         address on the MII bus. The second is the switch
+                         number that must be unique in cascaded configurations
 - #address-cells       : Must be 1
 - #size-cells          : Must be 0
 
index 98c1667..0f8ed37 100644 (file)
@@ -19,6 +19,16 @@ Required properties:
    providing multiple PM domains (e.g. power controllers), but can be any value
    as specified by device tree binding documentation of particular provider.
 
+Optional properties:
+ - power-domains : A phandle and PM domain specifier as defined by bindings of
+                   the power controller specified by phandle.
+   Some power domains might be powered from another power domain (or have
+   other hardware specific dependencies). For representing such dependency
+   a standard PM domain consumer binding is used. When provided, all domains
+   created by the given provider should be subdomains of the domain
+   specified by this binding. More details about power domain specifier are
+   available in the next section.
+
 Example:
 
        power: power-controller@12340000 {
@@ -30,6 +40,25 @@ Example:
 The node above defines a power controller that is a PM domain provider and
 expects one cell as its phandle argument.
 
+Example 2:
+
+       parent: power-controller@12340000 {
+               compatible = "foo,power-controller";
+               reg = <0x12340000 0x1000>;
+               #power-domain-cells = <1>;
+       };
+
+       child: power-controller@12340000 {
+               compatible = "foo,power-controller";
+               reg = <0x12341000 0x1000>;
+               power-domains = <&parent 0>;
+               #power-domain-cells = <1>;
+       };
+
+The nodes above define two power controllers: 'parent' and 'child'.
+Domains created by the 'child' power controller are subdomains of '0' power
+domain provided by the 'parent' power controller.
+
 ==PM domain consumers==
 
 Required properties:
diff --git a/Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt b/Documentation/devicetree/bindings/serial/axis,etraxfs-uart.txt
new file mode 100644 (file)
index 0000000..ebcbb62
--- /dev/null
@@ -0,0 +1,19 @@
+ETRAX FS UART
+
+Required properties:
+- compatible : "axis,etraxfs-uart"
+- reg: offset and length of the register set for the device.
+- interrupts: device interrupt
+
+Optional properties:
+- {dtr,dsr,ri,cd}-gpios: specify a GPIO for DTR/DSR/RI/CD
+  line respectively.
+
+Example:
+
+serial@b00260000 {
+       compatible = "axis,etraxfs-uart";
+       reg = <0xb0026000 0x1000>;
+       interrupts = <68>;
+       status = "disabled";
+};
index 7f76214..289c40e 100644 (file)
@@ -21,6 +21,18 @@ Optional properties:
 - reg-io-width : the size (in bytes) of the IO accesses that should be
   performed on the device.  If this property is not present then single byte
   accesses are used.
+- dcd-override : Override the DCD modem status signal. This signal will always
+  be reported as active instead of being obtained from the modem status
+  register. Define this if your serial port does not use this pin.
+- dsr-override : Override the DTS modem status signal. This signal will always
+  be reported as active instead of being obtained from the modem status
+  register. Define this if your serial port does not use this pin.
+- cts-override : Override the CTS modem status signal. This signal will always
+  be reported as active instead of being obtained from the modem status
+  register. Define this if your serial port does not use this pin.
+- ri-override : Override the RI modem status signal. This signal will always be
+  reported as inactive instead of being obtained from the modem status register.
+  Define this if your serial port does not use this pin.
 
 Example:
 
@@ -31,6 +43,10 @@ Example:
                interrupts = <10>;
                reg-shift = <2>;
                reg-io-width = <4>;
+               dcd-override;
+               dsr-override;
+               cts-override;
+               ri-override;
        };
 
 Example with one clock:
index 56742bc..7d44eae 100644 (file)
@@ -12,6 +12,9 @@ I. For patch submitters
 
        devicetree@vger.kernel.org
 
+     and Cc: the DT maintainers. Use scripts/get_maintainer.pl to identify
+     all of the DT maintainers.
+
   3) The Documentation/ portion of the patch should come in the series before
      the code implementing the binding.
 
index 43404b1..332e625 100644 (file)
@@ -4,7 +4,7 @@ Required properties:
 - compatible           : "renesas,thermal-<soctype>", "renesas,rcar-thermal"
                          as fallback.
                          Examples with soctypes are:
-                           - "renesas,thermal-r8a73a4" (R-Mobile AP6)
+                           - "renesas,thermal-r8a73a4" (R-Mobile APE6)
                            - "renesas,thermal-r8a7779" (R-Car H1)
                            - "renesas,thermal-r8a7790" (R-Car H2)
                            - "renesas,thermal-r8a7791" (R-Car M2-W)
index 389ca13..fae26d0 100644 (file)
@@ -20,6 +20,7 @@ amlogic       Amlogic, Inc.
 ams    AMS AG
 amstaos        AMS-Taos Inc.
 apm    Applied Micro Circuits Corporation (APM)
+arasan Arasan Chip Systems
 arm    ARM Ltd.
 armadeus       ARMadeus Systems SARL
 asahi-kasei    Asahi Kasei Corp.
@@ -27,6 +28,7 @@ atmel Atmel Corporation
 auo    AU Optronics Corporation
 avago  Avago Technologies
 avic   Shanghai AVIC Optoelectronics Co., Ltd.
+axis   Axis Communications AB
 bosch  Bosch Sensortec GmbH
 brcm   Broadcom Corporation
 buffalo        Buffalo, Inc.
index f90e294..a4d8697 100644 (file)
@@ -26,6 +26,11 @@ Optional properties:
 - atmel,disable : Should be present if you want to disable the watchdog.
 - atmel,idle-halt : Should be present if you want to stop the watchdog when
        entering idle state.
+       CAUTION: This property should be used with care, it actually makes the
+       watchdog not counting when the CPU is in idle state, therefore the
+       watchdog reset time depends on mean CPU usage and will not reset at all
+       if the CPU stop working while it is in idle state, which is probably
+       not what you want.
 - atmel,dbg-halt : Should be present if you want to stop the watchdog when
        entering debug state.
 
index a63e5e0..92ae734 100644 (file)
@@ -114,6 +114,9 @@ ALPS Absolute Mode - Protocol Version 2
  byte 4:  0   y6   y5   y4   y3   y2   y1   y0
  byte 5:  0   z6   z5   z4   z3   z2   z1   z0
 
+Protocol Version 2 DualPoint devices send standard PS/2 mouse packets for
+the DualPoint Stick.
+
 Dualpoint device -- interleaved packet format
 ---------------------------------------------
 
@@ -127,6 +130,11 @@ Dualpoint device -- interleaved packet format
  byte 7:    0   y6   y5   y4   y3   y2   y1   y0
  byte 8:    0   z6   z5   z4   z3   z2   z1   z0
 
+Devices which use the interleaving format normally send standard PS/2 mouse
+packets for the DualPoint Stick + ALPS Absolute Mode packets for the
+touchpad, switching to the interleaved packet format when both the stick and
+the touchpad are used at the same time.
+
 ALPS Absolute Mode - Protocol Version 3
 ---------------------------------------
 
index c587a96..9670561 100644 (file)
@@ -294,6 +294,12 @@ accordingly. This property does not affect kernel behavior.
 The kernel does not provide button emulation for such devices but treats
 them as any other INPUT_PROP_BUTTONPAD device.
 
+INPUT_PROP_ACCELEROMETER
+-------------------------
+Directional axes on this device (absolute and/or relative x, y, z) represent
+accelerometer data. All other axes retain their meaning. A device must not mix
+regular directional axes and accelerometer axes on the same event node.
+
 Guidelines:
 ==========
 The guidelines below ensure proper single-touch and multi-finger functionality.
index 7b4f59c..b85d000 100644 (file)
@@ -312,9 +312,12 @@ ABS_MT_TOOL_TYPE
 
 The type of approaching tool. A lot of kernel drivers cannot distinguish
 between different tool types, such as a finger or a pen. In such cases, the
-event should be omitted. The protocol currently supports MT_TOOL_FINGER and
-MT_TOOL_PEN [2]. For type B devices, this event is handled by input core;
-drivers should instead use input_mt_report_slot_state().
+event should be omitted. The protocol currently supports MT_TOOL_FINGER,
+MT_TOOL_PEN, and MT_TOOL_PALM [2]. For type B devices, this event is handled
+by input core; drivers should instead use input_mt_report_slot_state().
+A contact's ABS_MT_TOOL_TYPE may change over time while still touching the
+device, because the firmware may not be able to determine which tool is being
+used when it first appears.
 
 ABS_MT_BLOB_ID
 
index bfcb1a6..01aa47d 100644 (file)
@@ -1036,7 +1036,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        Format: {"off" | "on" | "skip[mbr]"}
 
        efi=            [EFI]
-                       Format: { "old_map", "nochunk", "noruntime" }
+                       Format: { "old_map", "nochunk", "noruntime", "debug" }
                        old_map [X86-64]: switch to the old ioremap-based EFI
                        runtime services mapping. 32-bit still uses this one by
                        default.
@@ -1044,6 +1044,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        boot stub, as chunking can cause problems with some
                        firmware implementations.
                        noruntime : disable EFI runtime services support
+                       debug: enable misc debug output
 
        efi_no_storage_paranoia [EFI; X86]
                        Using this parameter you can use more than 50% of
index 596b60c..8446f1e 100644 (file)
@@ -204,266 +204,4 @@ Some common examples:
 
     *  RTC_PIE_ON, RTC_PIE_OFF: These are also emulated by the generic code.
 
-If all else fails, check out the rtc-test.c driver!
-
-
--------------------- 8< ---------------- 8< -----------------------------
-
-/*
- *      Real Time Clock Driver Test/Example Program
- *
- *      Compile with:
- *                  gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
- *
- *      Copyright (C) 1996, Paul Gortmaker.
- *
- *      Released under the GNU General Public License, version 2,
- *      included herein by reference.
- *
- */
-
-#include <stdio.h>
-#include <linux/rtc.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <errno.h>
-
-
-/*
- * This expects the new RTC class driver framework, working with
- * clocks that will often not be clones of what the PC-AT had.
- * Use the command line to specify another RTC if you need one.
- */
-static const char default_rtc[] = "/dev/rtc0";
-
-
-int main(int argc, char **argv)
-{
-       int i, fd, retval, irqcount = 0;
-       unsigned long tmp, data;
-       struct rtc_time rtc_tm;
-       const char *rtc = default_rtc;
-
-       switch (argc) {
-       case 2:
-               rtc = argv[1];
-               /* FALLTHROUGH */
-       case 1:
-               break;
-       default:
-               fprintf(stderr, "usage:  rtctest [rtcdev]\n");
-               return 1;
-       }
-
-       fd = open(rtc, O_RDONLY);
-
-       if (fd ==  -1) {
-               perror(rtc);
-               exit(errno);
-       }
-
-       fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
-
-       /* Turn on update interrupts (one per second) */
-       retval = ioctl(fd, RTC_UIE_ON, 0);
-       if (retval == -1) {
-               if (errno == ENOTTY) {
-                       fprintf(stderr,
-                               "\n...Update IRQs not supported.\n");
-                       goto test_READ;
-               }
-               perror("RTC_UIE_ON ioctl");
-               exit(errno);
-       }
-
-       fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
-                       rtc);
-       fflush(stderr);
-       for (i=1; i<6; i++) {
-               /* This read will block */
-               retval = read(fd, &data, sizeof(unsigned long));
-               if (retval == -1) {
-                       perror("read");
-                       exit(errno);
-               }
-               fprintf(stderr, " %d",i);
-               fflush(stderr);
-               irqcount++;
-       }
-
-       fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
-       fflush(stderr);
-       for (i=1; i<6; i++) {
-               struct timeval tv = {5, 0};     /* 5 second timeout on select */
-               fd_set readfds;
-
-               FD_ZERO(&readfds);
-               FD_SET(fd, &readfds);
-               /* The select will wait until an RTC interrupt happens. */
-               retval = select(fd+1, &readfds, NULL, NULL, &tv);
-               if (retval == -1) {
-                       perror("select");
-                       exit(errno);
-               }
-               /* This read won't block unlike the select-less case above. */
-               retval = read(fd, &data, sizeof(unsigned long));
-               if (retval == -1) {
-                       perror("read");
-                       exit(errno);
-               }
-               fprintf(stderr, " %d",i);
-               fflush(stderr);
-               irqcount++;
-       }
-
-       /* Turn off update interrupts */
-       retval = ioctl(fd, RTC_UIE_OFF, 0);
-       if (retval == -1) {
-               perror("RTC_UIE_OFF ioctl");
-               exit(errno);
-       }
-
-test_READ:
-       /* Read the RTC time/date */
-       retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
-       if (retval == -1) {
-               perror("RTC_RD_TIME ioctl");
-               exit(errno);
-       }
-
-       fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
-               rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
-               rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-       /* Set the alarm to 5 sec in the future, and check for rollover */
-       rtc_tm.tm_sec += 5;
-       if (rtc_tm.tm_sec >= 60) {
-               rtc_tm.tm_sec %= 60;
-               rtc_tm.tm_min++;
-       }
-       if (rtc_tm.tm_min == 60) {
-               rtc_tm.tm_min = 0;
-               rtc_tm.tm_hour++;
-       }
-       if (rtc_tm.tm_hour == 24)
-               rtc_tm.tm_hour = 0;
-
-       retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
-       if (retval == -1) {
-               if (errno == ENOTTY) {
-                       fprintf(stderr,
-                               "\n...Alarm IRQs not supported.\n");
-                       goto test_PIE;
-               }
-               perror("RTC_ALM_SET ioctl");
-               exit(errno);
-       }
-
-       /* Read the current alarm settings */
-       retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
-       if (retval == -1) {
-               perror("RTC_ALM_READ ioctl");
-               exit(errno);
-       }
-
-       fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
-               rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
-
-       /* Enable alarm interrupts */
-       retval = ioctl(fd, RTC_AIE_ON, 0);
-       if (retval == -1) {
-               perror("RTC_AIE_ON ioctl");
-               exit(errno);
-       }
-
-       fprintf(stderr, "Waiting 5 seconds for alarm...");
-       fflush(stderr);
-       /* This blocks until the alarm ring causes an interrupt */
-       retval = read(fd, &data, sizeof(unsigned long));
-       if (retval == -1) {
-               perror("read");
-               exit(errno);
-       }
-       irqcount++;
-       fprintf(stderr, " okay. Alarm rang.\n");
-
-       /* Disable alarm interrupts */
-       retval = ioctl(fd, RTC_AIE_OFF, 0);
-       if (retval == -1) {
-               perror("RTC_AIE_OFF ioctl");
-               exit(errno);
-       }
-
-test_PIE:
-       /* Read periodic IRQ rate */
-       retval = ioctl(fd, RTC_IRQP_READ, &tmp);
-       if (retval == -1) {
-               /* not all RTCs support periodic IRQs */
-               if (errno == ENOTTY) {
-                       fprintf(stderr, "\nNo periodic IRQ support\n");
-                       goto done;
-               }
-               perror("RTC_IRQP_READ ioctl");
-               exit(errno);
-       }
-       fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
-
-       fprintf(stderr, "Counting 20 interrupts at:");
-       fflush(stderr);
-
-       /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
-       for (tmp=2; tmp<=64; tmp*=2) {
-
-               retval = ioctl(fd, RTC_IRQP_SET, tmp);
-               if (retval == -1) {
-                       /* not all RTCs can change their periodic IRQ rate */
-                       if (errno == ENOTTY) {
-                               fprintf(stderr,
-                                       "\n...Periodic IRQ rate is fixed\n");
-                               goto done;
-                       }
-                       perror("RTC_IRQP_SET ioctl");
-                       exit(errno);
-               }
-
-               fprintf(stderr, "\n%ldHz:\t", tmp);
-               fflush(stderr);
-
-               /* Enable periodic interrupts */
-               retval = ioctl(fd, RTC_PIE_ON, 0);
-               if (retval == -1) {
-                       perror("RTC_PIE_ON ioctl");
-                       exit(errno);
-               }
-
-               for (i=1; i<21; i++) {
-                       /* This blocks */
-                       retval = read(fd, &data, sizeof(unsigned long));
-                       if (retval == -1) {
-                               perror("read");
-                               exit(errno);
-                       }
-                       fprintf(stderr, " %d",i);
-                       fflush(stderr);
-                       irqcount++;
-               }
-
-               /* Disable periodic interrupts */
-               retval = ioctl(fd, RTC_PIE_OFF, 0);
-               if (retval == -1) {
-                       perror("RTC_PIE_OFF ioctl");
-                       exit(errno);
-               }
-       }
-
-done:
-       fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
-
-       close(fd);
-
-       return 0;
-}
+If all else fails, check out the tools/testing/selftests/timers/rtctest.c test!
index b112efc..bc9f6fe 100644 (file)
@@ -997,7 +997,7 @@ for vm-wide capabilities.
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390
+Architectures: x86, s390, arm, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -1011,7 +1011,7 @@ uniprocessor guests).
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86]
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86,arm/arm64]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
                                  which has not yet received an INIT signal [x86]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
@@ -1020,7 +1020,7 @@ Possible values are:
                                  is waiting for an interrupt [x86]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
                                  accessible via KVM_GET_VCPU_EVENTS) [x86]
- - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390]
+ - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390,arm/arm64]
  - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
  - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
                                  [s390]
@@ -1031,11 +1031,15 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not.
 
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, s390
+Architectures: x86, s390, arm, arm64
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
@@ -1047,6 +1051,10 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
+For arm/arm64:
+
+The only states that are valid are KVM_MP_STATE_STOPPED and
+KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not.
 
 4.40 KVM_SET_IDENTITY_MAP_ADDR
 
@@ -1967,15 +1975,25 @@ registers, find a list below:
   MIPS  | KVM_REG_MIPS_CP0_STATUS       | 32
   MIPS  | KVM_REG_MIPS_CP0_CAUSE        | 32
   MIPS  | KVM_REG_MIPS_CP0_EPC          | 64
+  MIPS  | KVM_REG_MIPS_CP0_PRID         | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG       | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG1      | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG2      | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG3      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG4      | 32
+  MIPS  | KVM_REG_MIPS_CP0_CONFIG5      | 32
   MIPS  | KVM_REG_MIPS_CP0_CONFIG7      | 32
   MIPS  | KVM_REG_MIPS_CP0_ERROREPC     | 64
   MIPS  | KVM_REG_MIPS_COUNT_CTL        | 64
   MIPS  | KVM_REG_MIPS_COUNT_RESUME     | 64
   MIPS  | KVM_REG_MIPS_COUNT_HZ         | 64
+  MIPS  | KVM_REG_MIPS_FPR_32(0..31)    | 32
+  MIPS  | KVM_REG_MIPS_FPR_64(0..31)    | 64
+  MIPS  | KVM_REG_MIPS_VEC_128(0..31)   | 128
+  MIPS  | KVM_REG_MIPS_FCR_IR           | 32
+  MIPS  | KVM_REG_MIPS_FCR_CSR          | 32
+  MIPS  | KVM_REG_MIPS_MSA_IR           | 32
+  MIPS  | KVM_REG_MIPS_MSA_CSR          | 32
 
 ARM registers are mapped using the lower 32 bits.  The upper 16 of that
 is the register group type, or coprocessor number:
@@ -2029,6 +2047,25 @@ patterns depending on whether they're 32-bit or 64-bit registers:
 MIPS KVM control registers (see above) have the following id bit patterns:
   0x7030 0000 0002 <reg:16>
 
+MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following
+id bit patterns depending on the size of the register being accessed. They are
+always accessed according to the current guest FPU mode (Status.FR and
+Config5.FRE), i.e. as the guest would see them, and they become unpredictable
+if the guest FPU mode is changed. MIPS SIMD Architecture (MSA) vector
+registers (see KVM_REG_MIPS_VEC_128() above) have similar patterns as they
+overlap the FPU registers:
+  0x7020 0000 0003 00 <0:3> <reg:5> (32-bit FPU registers)
+  0x7030 0000 0003 00 <0:3> <reg:5> (64-bit FPU registers)
+  0x7040 0000 0003 00 <0:3> <reg:5> (128-bit MSA vector registers)
+
+MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the
+following id bit patterns:
+  0x7020 0000 0003 01 <0:3> <reg:5>
+
+MIPS MSA control registers (see KVM_REG_MIPS_MSA_{IR,CSR} above) have the
+following id bit patterns:
+  0x7020 0000 0003 02 <0:3> <reg:5>
+
 
 4.69 KVM_GET_ONE_REG
 
@@ -2234,7 +2271,7 @@ into the hash PTE second double word).
 4.75 KVM_IRQFD
 
 Capability: KVM_CAP_IRQFD
-Architectures: x86 s390
+Architectures: x86 s390 arm arm64
 Type: vm ioctl
 Parameters: struct kvm_irqfd (in)
 Returns: 0 on success, -1 on error
@@ -2260,6 +2297,10 @@ Note that closing the resamplefd is not sufficient to disable the
 irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
 and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
 
+On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared
+Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is
+given by gsi + 32.
+
 4.76 KVM_PPC_ALLOCATE_HTAB
 
 Capability: KVM_CAP_PPC_ALLOC_HTAB
@@ -2716,6 +2757,227 @@ The fields in each entry are defined as follows:
    eax, ebx, ecx, edx: the values returned by the cpuid instruction for
          this function/index combination
 
+4.89 KVM_S390_MEM_OP
+
+Capability: KVM_CAP_S390_MEM_OP
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_mem_op (in)
+Returns: = 0 on success,
+         < 0 on generic error (e.g. -EFAULT or -ENOMEM),
+         > 0 if an exception occurred while walking the page tables
+
+Read or write data from/to the logical (virtual) memory of a VPCU.
+
+Parameters are specified via the following structure:
+
+struct kvm_s390_mem_op {
+       __u64 gaddr;            /* the guest address */
+       __u64 flags;            /* flags */
+       __u32 size;             /* amount of bytes */
+       __u32 op;               /* type of operation */
+       __u64 buf;              /* buffer in userspace */
+       __u8 ar;                /* the access register number */
+       __u8 reserved[31];      /* should be set to 0 */
+};
+
+The type of operation is specified in the "op" field. It is either
+KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or
+KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The
+KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check
+whether the corresponding memory access would create an access exception
+(without touching the data in the memory at the destination). In case an
+access exception occurred while walking the MMU tables of the guest, the
+ioctl returns a positive error number to indicate the type of exception.
+This exception is also raised directly at the corresponding VCPU if the
+flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field.
+
+The start address of the memory region has to be specified in the "gaddr"
+field, and the length of the region in the "size" field. "buf" is the buffer
+supplied by the userspace application where the read data should be written
+to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written
+is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL
+when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access
+register number to be used.
+
+The "reserved" field is meant for future extensions. It is not used by
+KVM with the currently defined set of flags.
+
+4.90 KVM_S390_GET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
+         keys, negative value on error
+
+This ioctl is used to get guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+
+struct kvm_s390_skeys {
+       __u64 start_gfn;
+       __u64 count;
+       __u64 skeydata_addr;
+       __u32 flags;
+       __u32 reserved[9];
+};
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to get.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer large enough to hold count
+bytes. This buffer will be filled with storage key data by the ioctl.
+
+4.91 KVM_S390_SET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, negative value on error
+
+This ioctl is used to set guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+See section on KVM_S390_GET_SKEYS for struct definition.
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to set.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer containing count bytes of
+storage keys. Each byte in the buffer will be set as the storage key for a
+single frame starting at start_gfn for count frames.
+
+Note: If any architecturally invalid key value is found in the given data then
+the ioctl will return -EINVAL.
+
+4.92 KVM_S390_IRQ
+
+Capability: KVM_CAP_S390_INJECT_IRQ
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq (in)
+Returns: 0 on success, -1 on error
+Errors:
+  EINVAL: interrupt type is invalid
+          type is KVM_S390_SIGP_STOP and flag parameter is invalid value
+          type is KVM_S390_INT_EXTERNAL_CALL and code is bigger
+            than the maximum of VCPUs
+  EBUSY:  type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped
+          type is KVM_S390_SIGP_STOP and a stop irq is already pending
+          type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt
+            is already pending
+
+Allows to inject an interrupt to the guest.
+
+Using struct kvm_s390_irq as a parameter allows
+to inject additional payload which is not
+possible via KVM_S390_INTERRUPT.
+
+Interrupt parameters are passed via kvm_s390_irq:
+
+struct kvm_s390_irq {
+       __u64 type;
+       union {
+               struct kvm_s390_io_info io;
+               struct kvm_s390_ext_info ext;
+               struct kvm_s390_pgm_info pgm;
+               struct kvm_s390_emerg_info emerg;
+               struct kvm_s390_extcall_info extcall;
+               struct kvm_s390_prefix_info prefix;
+               struct kvm_s390_stop_info stop;
+               struct kvm_s390_mchk_info mchk;
+               char reserved[64];
+       } u;
+};
+
+type can be one of the following:
+
+KVM_S390_SIGP_STOP - sigp stop; parameter in .stop
+KVM_S390_PROGRAM_INT - program check; parameters in .pgm
+KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix
+KVM_S390_RESTART - restart; no parameters
+KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters
+KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters
+KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
+KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
+KVM_S390_MCHK - machine check interrupt; parameters in .mchk
+
+
+Note that the vcpu ioctl is asynchronous to vcpu execution.
+
+4.94 KVM_S390_GET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (out)
+Returns: >= number of bytes copied into buffer,
+         -EINVAL if buffer size is 0,
+         -ENOBUFS if buffer size is too small to fit all pending interrupts,
+         -EFAULT if the buffer address was invalid
+
+This ioctl allows userspace to retrieve the complete state of all currently
+pending interrupts in a single buffer. Use cases include migration
+and introspection. The parameter structure contains the address of a
+userspace buffer and its length:
+
+struct kvm_s390_irq_state {
+       __u64 buf;
+       __u32 flags;
+       __u32 len;
+       __u32 reserved[4];
+};
+
+Userspace passes in the above struct and for each pending interrupt a
+struct kvm_s390_irq is copied to the provided buffer.
+
+If -ENOBUFS is returned the buffer provided was too small and userspace
+may retry with a bigger buffer.
+
+4.95 KVM_S390_SET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (in)
+Returns: 0 on success,
+         -EFAULT if the buffer address was invalid,
+         -EINVAL for an invalid buffer length (see below),
+         -EBUSY if there were already interrupts pending,
+         errors occurring when actually injecting the
+          interrupt. See KVM_S390_IRQ.
+
+This ioctl allows userspace to set the complete state of all cpu-local
+interrupts currently pending for the vcpu. It is intended for restoring
+interrupt state after a migration. The input parameter is a userspace buffer
+containing a struct kvm_s390_irq_state:
+
+struct kvm_s390_irq_state {
+       __u64 buf;
+       __u32 len;
+       __u32 pad;
+};
+
+The userspace memory referenced by buf contains a struct kvm_s390_irq
+for each interrupt to be injected into the guest.
+If one of the interrupts could not be injected for some reason the
+ioctl aborts.
+
+len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
+and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
+which is the maximum number of possibly pending cpu-local interrupts.
+
 5. The kvm_run structure
 ------------------------
 
@@ -3189,6 +3451,31 @@ Parameters: none
 This capability enables the in-kernel irqchip for s390. Please refer to
 "4.24 KVM_CREATE_IRQCHIP" for details.
 
+6.9 KVM_CAP_MIPS_FPU
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the host Floating Point Unit by the guest. It
+allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is
+done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed
+(depending on the current guest FPU register mode), and the Status.FR,
+Config5.FRE bits are accessible via the KVM API and also from the guest,
+depending on them being supported by the FPU.
+
+6.10 KVM_CAP_MIPS_MSA
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest.
+It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest.
+Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
+accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
+the guest.
+
 7. Capabilities that can be enabled on VMs
 ------------------------------------------
 
@@ -3248,3 +3535,41 @@ All other orders will be handled completely in user space.
 Only privileged operation exceptions will be checked for in the kernel (or even
 in the hardware prior to interception). If this capability is not enabled, the
 old way of handling SIGP orders is used (partially in kernel and user space).
+
+7.3 KVM_CAP_S390_VECTOR_REGISTERS
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success, negative value on error
+
+Allows use of the vector registers introduced with z13 processor, and
+provides for the synchronization between host and user space.  Will
+return -EINVAL if the machine does not support vectors.
+
+7.4 KVM_CAP_S390_USER_STSI
+
+Architectures: s390
+Parameters: none
+
+This capability allows post-handlers for the STSI instruction. After
+initial handling in the kernel, KVM exits to user space with
+KVM_EXIT_S390_STSI to allow user space to insert further data.
+
+Before exiting to userspace, kvm handlers should fill in s390_stsi field of
+vcpu->run:
+struct {
+       __u64 addr;
+       __u8 ar;
+       __u8 reserved;
+       __u8 fc;
+       __u8 sel1;
+       __u16 sel2;
+} s390_stsi;
+
+@addr - guest address of STSI SYSIB
+@fc   - function code
+@sel1 - selector 1
+@sel2 - selector 2
+@ar   - access register number
+
+KVM handlers should exit to userspace with rc = -EREMOTE.
index 4ceef53..d1ad9d5 100644 (file)
@@ -27,6 +27,9 @@ Groups:
     Copies all floating interrupts into a buffer provided by userspace.
     When the buffer is too small it returns -ENOMEM, which is the indication
     for userspace to try again with a bigger buffer.
+    -ENOBUFS is returned when the allocation of a kernelspace buffer has
+    failed.
+    -EFAULT is returned when copying data to userspace failed.
     All interrupts remain pending, i.e. are not deleted from the list of
     currently pending interrupts.
     attr->addr contains the userspace address of the buffer into which all
index a75e3ad..88b8589 100644 (file)
@@ -406,6 +406,12 @@ Protocol:  2.00+
        - If 0, the protected-mode code is loaded at 0x10000.
        - If 1, the protected-mode code is loaded at 0x100000.
 
+  Bit 1 (kernel internal): ALSR_FLAG
+       - Used internally by the compressed kernel to communicate
+         KASLR status to kernel proper.
+         If 1, KASLR enabled.
+         If 0, KASLR disabled.
+
   Bit 5 (write): QUIET_FLAG
        - If 0, print early messages.
        - If 1, suppress early messages.
index 6239a30..b846868 100644 (file)
@@ -637,8 +637,7 @@ F:      drivers/gpu/drm/radeon/radeon_kfd.h
 F:      include/uapi/linux/kfd_ioctl.h
 
 AMD MICROCODE UPDATE SUPPORT
-M:     Andreas Herrmann <herrmann.der.user@googlemail.com>
-L:     amd64-microcode@amd64.org
+M:     Borislav Petkov <bp@alien8.de>
 S:     Maintained
 F:     arch/x86/kernel/cpu/microcode/amd*
 
@@ -1030,6 +1029,16 @@ F:       arch/arm/mach-mxs/
 F:     arch/arm/boot/dts/imx*
 F:     arch/arm/configs/imx*_defconfig
 
+ARM/FREESCALE VYBRID ARM ARCHITECTURE
+M:     Shawn Guo <shawn.guo@linaro.org>
+M:     Sascha Hauer <kernel@pengutronix.de>
+R:     Stefan Agner <stefan@agner.ch>
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:     Maintained
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
+F:     arch/arm/mach-imx/*vf610*
+F:     arch/arm/boot/dts/vf*
+
 ARM/GLOMATION GESBC9312SX MACHINE SUPPORT
 M:     Lennert Buytenhek <kernel@wantstofly.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -1176,7 +1185,7 @@ M:        Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-mvebu/
-F:     drivers/rtc/armada38x-rtc
+F:     drivers/rtc/rtc-armada38x.c
 
 ARM/Marvell Berlin SoC support
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
@@ -1188,6 +1197,7 @@ ARM/Marvell Dove/MV78xx0/Orion SOC support
 M:     Jason Cooper <jason@lakedaemon.net>
 M:     Andrew Lunn <andrew@lunn.ch>
 M:     Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
+M:     Gregory Clement <gregory.clement@free-electrons.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-dove/
@@ -1351,6 +1361,7 @@ F:        drivers/i2c/busses/i2c-rk3x.c
 F:     drivers/*/*rockchip*
 F:     drivers/*/*/*rockchip*
 F:     sound/soc/rockchip/
+N:     rockchip
 
 ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
 M:     Kukjin Kim <kgene@kernel.org>
@@ -1664,8 +1675,8 @@ F:        drivers/misc/eeprom/at24.c
 F:     include/linux/platform_data/at24.h
 
 ATA OVER ETHERNET (AOE) DRIVER
-M:     "Ed L. Cashin" <ecashin@coraid.com>
-W:     http://support.coraid.com/support/linux
+M:     "Ed L. Cashin" <ed.cashin@acm.org>
+W:     http://www.openaoe.org/
 S:     Supported
 F:     Documentation/aoe/
 F:     drivers/block/aoe/
@@ -1730,7 +1741,7 @@ S:        Maintained
 F:     drivers/net/ethernet/atheros/
 
 ATM
-M:     Chas Williams <chas@cmf.nrl.navy.mil>
+M:     Chas Williams <3chas3@gmail.com>
 L:     linux-atm-general@lists.sourceforge.net (moderated for non-subscribers)
 L:     netdev@vger.kernel.org
 W:     http://linux-atm.sourceforge.net
@@ -2107,7 +2118,6 @@ F:        drivers/net/ethernet/broadcom/bnx2x/
 
 BROADCOM BCM281XX/BCM11XXX/BCM216XX ARM ARCHITECTURE
 M:     Christian Daudt <bcm@fixthebug.org>
-M:     Matt Porter <mporter@linaro.org>
 M:     Florian Fainelli <f.fainelli@gmail.com>
 L:     bcm-kernel-feedback-list@broadcom.com
 T:     git git://github.com/broadcom/mach-bcm
@@ -2369,8 +2379,9 @@ F:        arch/x86/include/asm/tce.h
 
 CAN NETWORK LAYER
 M:     Oliver Hartkopp <socketcan@hartkopp.net>
+M:     Marc Kleine-Budde <mkl@pengutronix.de>
 L:     linux-can@vger.kernel.org
-W:     http://gitorious.org/linux-can
+W:     https://github.com/linux-can
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
 S:     Maintained
@@ -2386,7 +2397,7 @@ CAN NETWORK DRIVERS
 M:     Wolfgang Grandegger <wg@grandegger.com>
 M:     Marc Kleine-Budde <mkl@pengutronix.de>
 L:     linux-can@vger.kernel.org
-W:     http://gitorious.org/linux-can
+W:     https://github.com/linux-can
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can-next.git
 S:     Maintained
@@ -3241,6 +3252,13 @@ S:       Maintained
 F:     Documentation/hwmon/dme1737
 F:     drivers/hwmon/dme1737.c
 
+DMI/SMBIOS SUPPORT
+M:     Jean Delvare <jdelvare@suse.de>
+S:     Maintained
+F:     drivers/firmware/dmi-id.c
+F:     drivers/firmware/dmi_scan.c
+F:     include/linux/dmi.h
+
 DOCKING STATION DRIVER
 M:     Shaohua Li <shaohua.li@intel.com>
 L:     linux-acpi@vger.kernel.org
@@ -5076,7 +5094,7 @@ S:        Supported
 F:     drivers/platform/x86/intel_menlow.c
 
 INTEL IA32 MICROCODE UPDATE SUPPORT
-M:     Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
+M:     Borislav Petkov <bp@alien8.de>
 S:     Maintained
 F:     arch/x86/kernel/cpu/microcode/core*
 F:     arch/x86/kernel/cpu/microcode/intel*
@@ -5117,22 +5135,21 @@ M:      Deepak Saxena <dsaxena@plexity.net>
 S:     Maintained
 F:     drivers/char/hw_random/ixp4xx-rng.c
 
-INTEL ETHERNET DRIVERS (e100/e1000/e1000e/fm10k/igb/igbvf/ixgb/ixgbe/ixgbevf/i40e/i40evf)
+INTEL ETHERNET DRIVERS
 M:     Jeff Kirsher <jeffrey.t.kirsher@intel.com>
-M:     Jesse Brandeburg <jesse.brandeburg@intel.com>
-M:     Bruce Allan <bruce.w.allan@intel.com>
-M:     Carolyn Wyborny <carolyn.wyborny@intel.com>
-M:     Don Skidmore <donald.c.skidmore@intel.com>
-M:     Greg Rose <gregory.v.rose@intel.com>
-M:     Matthew Vick <matthew.vick@intel.com>
-M:     John Ronciak <john.ronciak@intel.com>
-M:     Mitch Williams <mitch.a.williams@intel.com>
-M:     Linux NICS <linux.nics@intel.com>
-L:     e1000-devel@lists.sourceforge.net
+R:     Jesse Brandeburg <jesse.brandeburg@intel.com>
+R:     Shannon Nelson <shannon.nelson@intel.com>
+R:     Carolyn Wyborny <carolyn.wyborny@intel.com>
+R:     Don Skidmore <donald.c.skidmore@intel.com>
+R:     Matthew Vick <matthew.vick@intel.com>
+R:     John Ronciak <john.ronciak@intel.com>
+R:     Mitch Williams <mitch.a.williams@intel.com>
+L:     intel-wired-lan@lists.osuosl.org
 W:     http://www.intel.com/support/feedback.htm
 W:     http://e1000.sourceforge.net/
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net.git
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next.git
+Q:     http://patchwork.ozlabs.org/project/intel-wired-lan/list/
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-queue.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue.git
 S:     Supported
 F:     Documentation/networking/e100.txt
 F:     Documentation/networking/e1000.txt
@@ -5574,6 +5591,8 @@ S:        Supported
 F:     Documentation/*/kvm*.txt
 F:     Documentation/virtual/kvm/
 F:     arch/*/kvm/
+F:     arch/x86/kernel/kvm.c
+F:     arch/x86/kernel/kvmclock.c
 F:     arch/*/include/asm/kvm*
 F:     include/linux/kvm*
 F:     include/uapi/linux/kvm*
@@ -8542,6 +8561,7 @@ F:        include/uapi/linux/timex.h
 F:     kernel/time/clocksource.c
 F:     kernel/time/time*.c
 F:     kernel/time/ntp.c
+F:     tools/testing/selftests/timers/
 
 SC1200 WDT DRIVER
 M:     Zwane Mwaikambo <zwanem@gmail.com>
@@ -10196,6 +10216,13 @@ S:     Maintained
 F:     Documentation/usb/ohci.txt
 F:     drivers/usb/host/ohci*
 
+USB OTG FSM (Finite State Machine)
+M:     Peter Chen <Peter.Chen@freescale.com>
+T:     git git://github.com/hzpeterchen/linux-usb.git
+L:     linux-usb@vger.kernel.org
+S:     Maintained
+F:     drivers/usb/common/usb-otg-fsm.c
+
 USB OVER IP DRIVER
 M:     Valentina Manea <valentina.manea.m@gmail.com>
 M:     Shuah Khan <shuah.kh@samsung.com>
index e6a9b1b..9b76ce1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 VERSION = 4
 PATCHLEVEL = 0
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION =
 NAME = Hurr durr I'ma sheep
 
 # *DOCUMENTATION*
@@ -779,6 +779,7 @@ KBUILD_ARFLAGS := $(call ar-option,D)
 # check for 'asm goto'
 ifeq ($(shell $(CONFIG_SHELL) $(srctree)/scripts/gcc-goto.sh $(CC)), y)
        KBUILD_CFLAGS += -DCC_HAVE_ASM_GOTO
+       KBUILD_AFLAGS += -DCC_HAVE_ASM_GOTO
 endif
 
 include $(srctree)/scripts/Makefile.kasan
index c8d284d..f535a3f 100644 (file)
@@ -116,7 +116,7 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
 }
 
 static int
-alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime)
+alpha_rtc_set_mmss(struct device *dev, time64_t nowtime)
 {
        int retval = 0;
        int real_seconds, real_minutes, cmos_minutes;
@@ -211,7 +211,7 @@ alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
 static const struct rtc_class_ops alpha_rtc_ops = {
        .read_time = alpha_rtc_read_time,
        .set_time = alpha_rtc_set_time,
-       .set_mmss = alpha_rtc_set_mmss,
+       .set_mmss64 = alpha_rtc_set_mmss,
        .ioctl = alpha_rtc_ioctl,
 };
 
@@ -276,7 +276,7 @@ do_remote_mmss(void *data)
 }
 
 static int
-remote_set_mmss(struct device *dev, unsigned long now)
+remote_set_mmss(struct device *dev, time64_t now)
 {
        union remote_data x;
        if (smp_processor_id() != boot_cpuid) {
@@ -290,7 +290,7 @@ remote_set_mmss(struct device *dev, unsigned long now)
 static const struct rtc_class_ops remote_rtc_ops = {
        .read_time = remote_read_time,
        .set_time = remote_set_time,
-       .set_mmss = remote_set_mmss,
+       .set_mmss64 = remote_set_mmss,
        .ioctl = alpha_rtc_ioctl,
 };
 #endif
index 114234e..edda76f 100644 (file)
@@ -67,7 +67,7 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
               sigset_t *set)
 {
        int err;
-       err = __copy_to_user(&(sf->uc.uc_mcontext.regs), regs,
+       err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs,
                             sizeof(sf->uc.uc_mcontext.regs.scratch));
        err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
 
@@ -83,7 +83,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
        if (!err)
                set_current_blocked(&set);
 
-       err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs),
+       err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch),
                                sizeof(sf->uc.uc_mcontext.regs.scratch));
 
        return err;
@@ -131,6 +131,15 @@ SYSCALL_DEFINE0(rt_sigreturn)
        /* Don't restart from sigreturn */
        syscall_wont_restart(regs);
 
+       /*
+        * Ensure that sigreturn always returns to user mode (in case the
+        * regs saved on user stack got fudged between save and sigreturn)
+        * Otherwise it is easy to panic the kernel with a custom
+        * signal handler and/or restorer which clobberes the status32/ret
+        * to return to a bogus location in kernel mode.
+        */
+       regs->status32 |= STATUS_U_MASK;
+
        return regs->r0;
 
 badframe:
@@ -229,8 +238,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 
        /*
         * handler returns using sigreturn stub provided already by userpsace
+        * If not, nuke the process right away
         */
-       BUG_ON(!(ksig->ka.sa.sa_flags & SA_RESTORER));
+       if(!(ksig->ka.sa.sa_flags & SA_RESTORER))
+               return 1;
+
        regs->blink = (unsigned long)ksig->ka.sa.sa_restorer;
 
        /* User Stack for signal handler will be above the frame just carved */
@@ -296,12 +308,12 @@ static void
 handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 {
        sigset_t *oldset = sigmask_to_save();
-       int ret;
+       int failed;
 
        /* Set up the stack frame */
-       ret = setup_rt_frame(ksig, oldset, regs);
+       failed = setup_rt_frame(ksig, oldset, regs);
 
-       signal_setup_done(ret, ksig, 0);
+       signal_setup_done(failed, ksig, 0);
 }
 
 void do_signal(struct pt_regs *regs)
index 9f1f09a..cf4c0c9 100644 (file)
@@ -619,6 +619,7 @@ config ARCH_PXA
        select GENERIC_CLOCKEVENTS
        select GPIO_PXA
        select HAVE_IDE
+       select IRQ_DOMAIN
        select MULTI_IRQ_HANDLER
        select PLAT_PXA
        select SPARSE_IRQ
index 7f99cd6..eb7bb51 100644 (file)
@@ -150,6 +150,7 @@ machine-$(CONFIG_ARCH_BERLIN)               += berlin
 machine-$(CONFIG_ARCH_CLPS711X)                += clps711x
 machine-$(CONFIG_ARCH_CNS3XXX)         += cns3xxx
 machine-$(CONFIG_ARCH_DAVINCI)         += davinci
+machine-$(CONFIG_ARCH_DIGICOLOR)       += digicolor
 machine-$(CONFIG_ARCH_DOVE)            += dove
 machine-$(CONFIG_ARCH_EBSA110)         += ebsa110
 machine-$(CONFIG_ARCH_EFM32)           += efm32
index 2c6248d..c3255e0 100644 (file)
        cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
        cd-inverted;
 };
+
+&aes {
+       status = "okay";
+};
+
+&sham {
+       status = "okay";
+};
index 83d40f7..6b84937 100644 (file)
 &mmc1 {
        vmmc-supply = <&ldo3_reg>;
 };
-
-&sham {
-       status = "okay";
-};
-
-&aes {
-       status = "okay";
-};
index 7266a00..5c5667a 100644 (file)
        dual_emac_res_vlan = <3>;
 };
 
+&phy_sel {
+       rmii-clock-ext;
+};
+
 &mac {
        pinctrl-names = "default", "sleep";
        pinctrl-0 = <&cpsw_default>;
index 712edce..071b56a 100644 (file)
@@ -99,7 +99,7 @@
        ehrpwm0_tbclk: ehrpwm0_tbclk@44e10664 {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
-               clocks = <&dpll_per_m2_ck>;
+               clocks = <&l4ls_gclk>;
                ti,bit-shift = <0>;
                reg = <0x0664>;
        };
        ehrpwm1_tbclk: ehrpwm1_tbclk@44e10664 {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
-               clocks = <&dpll_per_m2_ck>;
+               clocks = <&l4ls_gclk>;
                ti,bit-shift = <1>;
                reg = <0x0664>;
        };
        ehrpwm2_tbclk: ehrpwm2_tbclk@44e10664 {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
-               clocks = <&dpll_per_m2_ck>;
+               clocks = <&l4ls_gclk>;
                ti,bit-shift = <2>;
                reg = <0x0664>;
        };
index c7dc9da..cfb4968 100644 (file)
        ehrpwm0_tbclk: ehrpwm0_tbclk {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
-               clocks = <&dpll_per_m2_ck>;
+               clocks = <&l4ls_gclk>;
                ti,bit-shift = <0>;
                reg = <0x0664>;
        };
        ehrpwm1_tbclk: ehrpwm1_tbclk {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
-               clocks = <&dpll_per_m2_ck>;
+               clocks = <&l4ls_gclk>;
                ti,bit-shift = <1>;
                reg = <0x0664>;
        };
        ehrpwm2_tbclk: ehrpwm2_tbclk {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
-               clocks = <&dpll_per_m2_ck>;
+               clocks = <&l4ls_gclk>;
                ti,bit-shift = <2>;
                reg = <0x0664>;
        };
        ehrpwm3_tbclk: ehrpwm3_tbclk {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
-               clocks = <&dpll_per_m2_ck>;
+               clocks = <&l4ls_gclk>;
                ti,bit-shift = <4>;
                reg = <0x0664>;
        };
        ehrpwm4_tbclk: ehrpwm4_tbclk {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
-               clocks = <&dpll_per_m2_ck>;
+               clocks = <&l4ls_gclk>;
                ti,bit-shift = <5>;
                reg = <0x0664>;
        };
        ehrpwm5_tbclk: ehrpwm5_tbclk {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
-               clocks = <&dpll_per_m2_ck>;
+               clocks = <&l4ls_gclk>;
                ti,bit-shift = <6>;
                reg = <0x0664>;
        };
index fff0ee6..e7f0a4a 100644 (file)
 
                                        pinctrl_usart3_rts: usart3_rts-0 {
                                                atmel,pins =
-                                                       <AT91_PIOB 8 AT91_PERIPH_B AT91_PINCTRL_NONE>;  /* PC8 periph B */
+                                                       <AT91_PIOC 8 AT91_PERIPH_B AT91_PINCTRL_NONE>;
                                        };
 
                                        pinctrl_usart3_cts: usart3_cts-0 {
                                                atmel,pins =
-                                                       <AT91_PIOB 10 AT91_PERIPH_B AT91_PINCTRL_NONE>; /* PC10 periph B */
+                                                       <AT91_PIOC 10 AT91_PERIPH_B AT91_PINCTRL_NONE>;
                                        };
                                };
 
                        };
 
                        usb1: gadget@fffa4000 {
-                               compatible = "atmel,at91rm9200-udc";
+                               compatible = "atmel,at91sam9260-udc";
                                reg = <0xfffa4000 0x4000>;
                                interrupts = <10 IRQ_TYPE_LEVEL_HIGH 2>;
                                clocks = <&udc_clk>, <&udpck>;
                                atmel,watchdog-type = "hardware";
                                atmel,reset-type = "all";
                                atmel,dbg-halt;
-                               atmel,idle-halt;
                                status = "disabled";
                        };
 
index e247b0b..d55fdf2 100644 (file)
                        };
 
                        usb1: gadget@fffa4000 {
-                               compatible = "atmel,at91rm9200-udc";
+                               compatible = "atmel,at91sam9261-udc";
                                reg = <0xfffa4000 0x4000>;
                                interrupts = <10 IRQ_TYPE_LEVEL_HIGH 2>;
-                               clocks = <&usb>, <&udc_clk>, <&udpck>;
-                               clock-names = "usb_clk", "udc_clk", "udpck";
+                               clocks = <&udc_clk>, <&udpck>;
+                               clock-names = "pclk", "hclk";
+                               atmel,matrix = <&matrix>;
                                status = "disabled";
                        };
 
                        };
 
                        matrix: matrix@ffffee00 {
-                               compatible = "atmel,at91sam9260-bus-matrix";
+                               compatible = "atmel,at91sam9260-bus-matrix", "syscon";
                                reg = <0xffffee00 0x200>;
                        };
 
index 1f67bb4..fce301c 100644 (file)
@@ -69,7 +69,7 @@
 
        sram1: sram@00500000 {
                compatible = "mmio-sram";
-               reg = <0x00300000 0x4000>;
+               reg = <0x00500000 0x4000>;
        };
 
        ahb {
                        };
 
                        usb1: gadget@fff78000 {
-                               compatible = "atmel,at91rm9200-udc";
+                               compatible = "atmel,at91sam9263-udc";
                                reg = <0xfff78000 0x4000>;
                                interrupts = <24 IRQ_TYPE_LEVEL_HIGH 2>;
                                clocks = <&udc_clk>, <&udpck>;
                                atmel,watchdog-type = "hardware";
                                atmel,reset-type = "all";
                                atmel,dbg-halt;
-                               atmel,idle-halt;
                                status = "disabled";
                        };
 
index ee80aa9..488af63 100644 (file)
                                atmel,watchdog-type = "hardware";
                                atmel,reset-type = "all";
                                atmel,dbg-halt;
-                               atmel,idle-halt;
                                status = "disabled";
                        };
 
                        compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
                        reg = <0x00800000 0x100000>;
                        interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>;
-                       clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
+                       clocks = <&utmi>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
                        clock-names = "usb_clk", "ehci_clk", "hclk", "uhpck";
                        status = "disabled";
                };
index c2666a7..0c53a37 100644 (file)
                                atmel,watchdog-type = "hardware";
                                atmel,reset-type = "all";
                                atmel,dbg-halt;
-                               atmel,idle-halt;
                                status = "disabled";
                        };
 
index 818dabd..d221179 100644 (file)
                                reg = <0x00500000 0x80000
                                       0xf803c000 0x400>;
                                interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>;
-                               clocks = <&usb>, <&udphs_clk>;
+                               clocks = <&utmi>, <&udphs_clk>;
                                clock-names = "hclk", "pclk";
                                status = "disabled";
 
                                atmel,watchdog-type = "hardware";
                                atmel,reset-type = "all";
                                atmel,dbg-halt;
-                               atmel,idle-halt;
                                status = "disabled";
                        };
 
                        compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
                        reg = <0x00700000 0x100000>;
                        interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>;
-                       clocks = <&usb>, <&uhphs_clk>, <&uhpck>;
+                       clocks = <&utmi>, <&uhphs_clk>, <&uhpck>;
                        clock-names = "usb_clk", "ehci_clk", "uhpck";
                        status = "disabled";
                };
index d3a29c1..afe678f 100644 (file)
                >;
        };
 
+       mmc_pins: pinmux_mmc_pins {
+               pinctrl-single,pins = <
+                       DM816X_IOPAD(0x0a70, MUX_MODE0)                 /* SD_POW */
+                       DM816X_IOPAD(0x0a74, MUX_MODE0)                 /* SD_CLK */
+                       DM816X_IOPAD(0x0a78, MUX_MODE0)                 /* SD_CMD */
+                       DM816X_IOPAD(0x0a7C, MUX_MODE0)                 /* SD_DAT0 */
+                       DM816X_IOPAD(0x0a80, MUX_MODE0)                 /* SD_DAT1 */
+                       DM816X_IOPAD(0x0a84, MUX_MODE0)                 /* SD_DAT2 */
+                       DM816X_IOPAD(0x0a88, MUX_MODE0)                 /* SD_DAT2 */
+                       DM816X_IOPAD(0x0a8c, MUX_MODE2)                 /* GP1[7] */
+                       DM816X_IOPAD(0x0a90, MUX_MODE2)                 /* GP1[8] */
+               >;
+       };
+
        usb0_pins: pinmux_usb0_pins {
                pinctrl-single,pins = <
                        DM816X_IOPAD(0x0d00, MUX_MODE0)                 /* USB0_DRVVBUS */
 };
 
 &mmc1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&mmc_pins>;
        vmmc-supply = <&vmmcsd_fixed>;
+       bus-width = <4>;
+       cd-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>;
+       wp-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>;
 };
 
 /* At least dm8168-evm rev c won't support multipoint, later may */
index 3c97b5f..f35715b 100644 (file)
                };
 
                gpio1: gpio@48032000 {
-                       compatible = "ti,omap3-gpio";
+                       compatible = "ti,omap4-gpio";
                        ti,hwmods = "gpio1";
+                       ti,gpio-always-on;
                        reg = <0x48032000 0x1000>;
-                       interrupts = <97>;
+                       interrupts = <96>;
+                       gpio-controller;
+                       #gpio-cells = <2>;
+                       interrupt-controller;
+                       #interrupt-cells = <2>;
                };
 
                gpio2: gpio@4804c000 {
-                       compatible = "ti,omap3-gpio";
+                       compatible = "ti,omap4-gpio";
                        ti,hwmods = "gpio2";
+                       ti,gpio-always-on;
                        reg = <0x4804c000 0x1000>;
-                       interrupts = <99>;
+                       interrupts = <98>;
+                       gpio-controller;
+                       #gpio-cells = <2>;
+                       interrupt-controller;
+                       #interrupt-cells = <2>;
                };
 
                gpmc: gpmc@50000000 {
index 3290a96..7563d7c 100644 (file)
 
        dcan1_pins_default: dcan1_pins_default {
                pinctrl-single,pins = <
-                       0x3d0   (PIN_OUTPUT | MUX_MODE0) /* dcan1_tx */
-                       0x3d4   (MUX_MODE15)            /* dcan1_rx.off */
-                       0x418   (PULL_DIS | MUX_MODE1) /* wakeup0.dcan1_rx */
+                       0x3d0   (PIN_OUTPUT_PULLUP | MUX_MODE0) /* dcan1_tx */
+                       0x418   (PULL_UP | MUX_MODE1) /* wakeup0.dcan1_rx */
                >;
        };
 
        dcan1_pins_sleep: dcan1_pins_sleep {
                pinctrl-single,pins = <
-                       0x3d0   (MUX_MODE15)    /* dcan1_tx.off */
-                       0x3d4   (MUX_MODE15)    /* dcan1_rx.off */
-                       0x418   (MUX_MODE15)    /* wakeup0.off */
+                       0x3d0   (MUX_MODE15 | PULL_UP)  /* dcan1_tx.off */
+                       0x418   (MUX_MODE15 | PULL_UP)  /* wakeup0.off */
                >;
        };
 };
index 127608d..c4659a9 100644 (file)
                                              "wkupclk", "refclk",
                                              "div-clk", "phy-div";
                                #phy-cells = <0>;
-                               ti,hwmods = "pcie1-phy";
                        };
 
                        pcie2_phy: pciephy@4a095000 {
                                              "wkupclk", "refclk",
                                              "div-clk", "phy-div";
                                #phy-cells = <0>;
-                               ti,hwmods = "pcie2-phy";
                                status = "disabled";
                        };
                };
index e0264d0..40ed539 100644 (file)
 
        dcan1_pins_default: dcan1_pins_default {
                pinctrl-single,pins = <
-                       0x3d0   (PIN_OUTPUT | MUX_MODE0) /* dcan1_tx */
-                       0x3d4   (MUX_MODE15)            /* dcan1_rx.off */
-                       0x418   (PULL_DIS | MUX_MODE1) /* wakeup0.dcan1_rx */
+                       0x3d0   (PIN_OUTPUT_PULLUP | MUX_MODE0) /* dcan1_tx */
+                       0x418   (PULL_UP | MUX_MODE1)   /* wakeup0.dcan1_rx */
                >;
        };
 
        dcan1_pins_sleep: dcan1_pins_sleep {
                pinctrl-single,pins = <
-                       0x3d0   (MUX_MODE15)    /* dcan1_tx.off */
-                       0x3d4   (MUX_MODE15)    /* dcan1_rx.off */
-                       0x418   (MUX_MODE15)    /* wakeup0.off */
+                       0x3d0   (MUX_MODE15 | PULL_UP)  /* dcan1_tx.off */
+                       0x418   (MUX_MODE15 | PULL_UP)  /* wakeup0.off */
                >;
        };
 
index 4bdcbd6..99b09a4 100644 (file)
                ti,invert-autoidle-bit;
        };
 
+       dpll_core_byp_mux: dpll_core_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+               ti,bit-shift = <23>;
+               reg = <0x012c>;
+       };
+
        dpll_core_ck: dpll_core_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-core-clock";
-               clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+               clocks = <&sys_clkin1>, <&dpll_core_byp_mux>;
                reg = <0x0120>, <0x0124>, <0x012c>, <0x0128>;
        };
 
                clock-div = <1>;
        };
 
+       dpll_dsp_byp_mux: dpll_dsp_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>;
+               ti,bit-shift = <23>;
+               reg = <0x0240>;
+       };
+
        dpll_dsp_ck: dpll_dsp_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-clock";
-               clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>;
+               clocks = <&sys_clkin1>, <&dpll_dsp_byp_mux>;
                reg = <0x0234>, <0x0238>, <0x0240>, <0x023c>;
        };
 
                clock-div = <1>;
        };
 
+       dpll_iva_byp_mux: dpll_iva_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>;
+               ti,bit-shift = <23>;
+               reg = <0x01ac>;
+       };
+
        dpll_iva_ck: dpll_iva_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-clock";
-               clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>;
+               clocks = <&sys_clkin1>, <&dpll_iva_byp_mux>;
                reg = <0x01a0>, <0x01a4>, <0x01ac>, <0x01a8>;
        };
 
                clock-div = <1>;
        };
 
+       dpll_gpu_byp_mux: dpll_gpu_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+               ti,bit-shift = <23>;
+               reg = <0x02e4>;
+       };
+
        dpll_gpu_ck: dpll_gpu_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-clock";
-               clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+               clocks = <&sys_clkin1>, <&dpll_gpu_byp_mux>;
                reg = <0x02d8>, <0x02dc>, <0x02e4>, <0x02e0>;
        };
 
                clock-div = <1>;
        };
 
+       dpll_ddr_byp_mux: dpll_ddr_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+               ti,bit-shift = <23>;
+               reg = <0x021c>;
+       };
+
        dpll_ddr_ck: dpll_ddr_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-clock";
-               clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+               clocks = <&sys_clkin1>, <&dpll_ddr_byp_mux>;
                reg = <0x0210>, <0x0214>, <0x021c>, <0x0218>;
        };
 
                ti,invert-autoidle-bit;
        };
 
+       dpll_gmac_byp_mux: dpll_gmac_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+               ti,bit-shift = <23>;
+               reg = <0x02b4>;
+       };
+
        dpll_gmac_ck: dpll_gmac_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-clock";
-               clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+               clocks = <&sys_clkin1>, <&dpll_gmac_byp_mux>;
                reg = <0x02a8>, <0x02ac>, <0x02b4>, <0x02b0>;
        };
 
                clock-div = <1>;
        };
 
+       dpll_eve_byp_mux: dpll_eve_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>;
+               ti,bit-shift = <23>;
+               reg = <0x0290>;
+       };
+
        dpll_eve_ck: dpll_eve_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-clock";
-               clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>;
+               clocks = <&sys_clkin1>, <&dpll_eve_byp_mux>;
                reg = <0x0284>, <0x0288>, <0x0290>, <0x028c>;
        };
 
                clock-div = <1>;
        };
 
+       dpll_per_byp_mux: dpll_per_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>;
+               ti,bit-shift = <23>;
+               reg = <0x014c>;
+       };
+
        dpll_per_ck: dpll_per_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-clock";
-               clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>;
+               clocks = <&sys_clkin1>, <&dpll_per_byp_mux>;
                reg = <0x0140>, <0x0144>, <0x014c>, <0x0148>;
        };
 
                clock-div = <1>;
        };
 
+       dpll_usb_byp_mux: dpll_usb_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>;
+               ti,bit-shift = <23>;
+               reg = <0x018c>;
+       };
+
        dpll_usb_ck: dpll_usb_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-j-type-clock";
-               clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>;
+               clocks = <&sys_clkin1>, <&dpll_usb_byp_mux>;
                reg = <0x0180>, <0x0184>, <0x018c>, <0x0188>;
        };
 
index 277b48b..ac6b0ae 100644 (file)
@@ -18,6 +18,7 @@
  */
 
 #include "skeleton.dtsi"
+#include "exynos4-cpu-thermal.dtsi"
 #include <dt-bindings/clock/exynos3250.h>
 
 / {
                        interrupts = <0 216 0>;
                        clocks = <&cmu CLK_TMU_APBIF>;
                        clock-names = "tmu_apbif";
+                       #include "exynos4412-tmu-sensor-conf.dtsi"
                        status = "disabled";
                };
 
diff --git a/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi b/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi
new file mode 100644 (file)
index 0000000..735cb2f
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Device tree sources for Exynos4 thermal zone
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/thermal/thermal.h>
+
+/ {
+thermal-zones {
+       cpu_thermal: cpu-thermal {
+               thermal-sensors = <&tmu 0>;
+               polling-delay-passive = <0>;
+               polling-delay = <0>;
+               trips {
+                       cpu_alert0: cpu-alert-0 {
+                               temperature = <70000>; /* millicelsius */
+                               hysteresis = <10000>; /* millicelsius */
+                               type = "active";
+                       };
+                       cpu_alert1: cpu-alert-1 {
+                               temperature = <95000>; /* millicelsius */
+                               hysteresis = <10000>; /* millicelsius */
+                               type = "active";
+                       };
+                       cpu_alert2: cpu-alert-2 {
+                               temperature = <110000>; /* millicelsius */
+                               hysteresis = <10000>; /* millicelsius */
+                               type = "active";
+                       };
+                       cpu_crit0: cpu-crit-0 {
+                               temperature = <120000>; /* millicelsius */
+                               hysteresis = <0>; /* millicelsius */
+                               type = "critical";
+                       };
+               };
+               cooling-maps {
+                       map0 {
+                               trip = <&cpu_alert0>;
+                       };
+                       map1 {
+                               trip = <&cpu_alert1>;
+                       };
+               };
+       };
+};
+};
index 76173ca..77ea547 100644 (file)
@@ -38,6 +38,7 @@
                i2c5 = &i2c_5;
                i2c6 = &i2c_6;
                i2c7 = &i2c_7;
+               i2c8 = &i2c_8;
                csis0 = &csis_0;
                csis1 = &csis_1;
                fimc0 = &fimc_0;
                compatible = "samsung,exynos4210-pd";
                reg = <0x10023C20 0x20>;
                #power-domain-cells = <0>;
+               power-domains = <&pd_lcd0>;
        };
 
        pd_cam: cam-power-domain@10023C00 {
                status = "disabled";
        };
 
+       i2c_8: i2c@138E0000 {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               compatible = "samsung,s3c2440-hdmiphy-i2c";
+               reg = <0x138E0000 0x100>;
+               interrupts = <0 93 0>;
+               clocks = <&clock CLK_I2C_HDMI>;
+               clock-names = "i2c";
+               status = "disabled";
+
+               hdmi_i2c_phy: hdmiphy@38 {
+                       compatible = "exynos4210-hdmiphy";
+                       reg = <0x38>;
+               };
+       };
+
        spi_0: spi@13920000 {
                compatible = "samsung,exynos4210-spi";
                reg = <0x13920000 0x100>;
                status = "disabled";
        };
 
+       tmu: tmu@100C0000 {
+               #include "exynos4412-tmu-sensor-conf.dtsi"
+       };
+
+       hdmi: hdmi@12D00000 {
+               compatible = "samsung,exynos4210-hdmi";
+               reg = <0x12D00000 0x70000>;
+               interrupts = <0 92 0>;
+               clock-names = "hdmi", "sclk_hdmi", "sclk_pixel", "sclk_hdmiphy",
+                       "mout_hdmi";
+               clocks = <&clock CLK_HDMI>, <&clock CLK_SCLK_HDMI>,
+                       <&clock CLK_SCLK_PIXEL>, <&clock CLK_SCLK_HDMIPHY>,
+                       <&clock CLK_MOUT_HDMI>;
+               phy = <&hdmi_i2c_phy>;
+               power-domains = <&pd_tv>;
+               samsung,syscon-phandle = <&pmu_system_controller>;
+               status = "disabled";
+       };
+
+       mixer: mixer@12C10000 {
+               compatible = "samsung,exynos4210-mixer";
+               interrupts = <0 91 0>;
+               reg = <0x12C10000 0x2100>, <0x12c00000 0x300>;
+               power-domains = <&pd_tv>;
+               status = "disabled";
+       };
+
        ppmu_dmc0: ppmu_dmc0@106a0000 {
                compatible = "samsung,exynos-ppmu";
                reg = <0x106a0000 0x2000>;
index 3d6652a..32c5fd8 100644 (file)
                status = "okay";
        };
 
+       tmu@100C0000 {
+               status = "okay";
+       };
+
+       thermal-zones {
+               cpu_thermal: cpu-thermal {
+                       cooling-maps {
+                               map0 {
+                                    /* Corresponds to 800MHz at freq_table */
+                                    cooling-device = <&cpu0 2 2>;
+                               };
+                               map1 {
+                                    /* Corresponds to 200MHz at freq_table */
+                                    cooling-device = <&cpu0 4 4>;
+                              };
+                      };
+               };
+       };
+
        camera {
                pinctrl-names = "default";
                pinctrl-0 = <>;
index b57e6b8..d4f2b11 100644 (file)
                        assigned-clock-rates = <0>, <160000000>;
                };
        };
+
+       hdmi_en: voltage-regulator-hdmi-5v {
+               compatible = "regulator-fixed";
+               regulator-name = "HDMI_5V";
+               regulator-min-microvolt = <5000000>;
+               regulator-max-microvolt = <5000000>;
+               gpio = <&gpe0 1 0>;
+               enable-active-high;
+       };
+
+       hdmi_ddc: i2c-ddc {
+               compatible = "i2c-gpio";
+               gpios = <&gpe4 2 0 &gpe4 3 0>;
+               i2c-gpio,delay-us = <100>;
+               #address-cells = <1>;
+               #size-cells = <0>;
+
+               pinctrl-0 = <&i2c_ddc_bus>;
+               pinctrl-names = "default";
+               status = "okay";
+       };
+
+       mixer@12C10000 {
+               status = "okay";
+       };
+
+       hdmi@12D00000 {
+               hpd-gpio = <&gpx3 7 0>;
+               pinctrl-names = "default";
+               pinctrl-0 = <&hdmi_hpd>;
+               hdmi-en-supply = <&hdmi_en>;
+               vdd-supply = <&ldo3_reg>;
+               vdd_osc-supply = <&ldo4_reg>;
+               vdd_pll-supply = <&ldo3_reg>;
+               ddc = <&hdmi_ddc>;
+               status = "okay";
+       };
+
+       i2c@138E0000 {
+               status = "okay";
+       };
+};
+
+&pinctrl_1 {
+       hdmi_hpd: hdmi-hpd {
+               samsung,pins = "gpx3-7";
+               samsung,pin-pud = <0>;
+       };
+};
+
+&pinctrl_0 {
+       i2c_ddc_bus: i2c-ddc-bus {
+               samsung,pins = "gpe4-2", "gpe4-3";
+               samsung,pin-function = <2>;
+               samsung,pin-pud = <3>;
+               samsung,pin-drv = <0>;
+       };
 };
 
 &mdma1 {
index 67c832c..be89f83 100644 (file)
@@ -21,6 +21,7 @@
 
 #include "exynos4.dtsi"
 #include "exynos4210-pinctrl.dtsi"
+#include "exynos4-cpu-thermal.dtsi"
 
 / {
        compatible = "samsung,exynos4210", "samsung,exynos4";
                #address-cells = <1>;
                #size-cells = <0>;
 
-               cpu@900 {
+               cpu0: cpu@900 {
                        device_type = "cpu";
                        compatible = "arm,cortex-a9";
                        reg = <0x900>;
+                       cooling-min-level = <4>;
+                       cooling-max-level = <2>;
+                       #cooling-cells = <2>; /* min followed by max */
                };
 
                cpu@901 {
                reg = <0x03860000 0x1000>;
        };
 
-       tmu@100C0000 {
+       tmu: tmu@100C0000 {
                compatible = "samsung,exynos4210-tmu";
                interrupt-parent = <&combiner>;
                reg = <0x100C0000 0x100>;
                interrupts = <2 4>;
                clocks = <&clock CLK_TMU_APBIF>;
                clock-names = "tmu_apbif";
+               samsung,tmu_gain = <15>;
+               samsung,tmu_reference_voltage = <7>;
                status = "disabled";
        };
 
+       thermal-zones {
+               cpu_thermal: cpu-thermal {
+                       polling-delay-passive = <0>;
+                       polling-delay = <0>;
+                       thermal-sensors = <&tmu 0>;
+
+                       trips {
+                             cpu_alert0: cpu-alert-0 {
+                                     temperature = <85000>; /* millicelsius */
+                             };
+                             cpu_alert1: cpu-alert-1 {
+                                     temperature = <100000>; /* millicelsius */
+                             };
+                             cpu_alert2: cpu-alert-2 {
+                                     temperature = <110000>; /* millicelsius */
+                             };
+                       };
+               };
+       };
+
        g2d@12800000 {
                compatible = "samsung,s5pv210-g2d";
                reg = <0x12800000 0x1000>;
                };
        };
 
+       mixer: mixer@12C10000 {
+               clock-names = "mixer", "hdmi", "sclk_hdmi", "vp", "mout_mixer",
+                       "sclk_mixer";
+               clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+                       <&clock CLK_SCLK_HDMI>, <&clock CLK_VP>,
+                       <&clock CLK_MOUT_MIXER>, <&clock CLK_SCLK_MIXER>;
+       };
+
        ppmu_lcd1: ppmu_lcd1@12240000 {
                compatible = "samsung,exynos-ppmu";
                reg = <0x12240000 0x2000>;
index dd0a43e..5be0328 100644 (file)
                #address-cells = <1>;
                #size-cells = <0>;
 
-               cpu@A00 {
+               cpu0: cpu@A00 {
                        device_type = "cpu";
                        compatible = "arm,cortex-a9";
                        reg = <0xA00>;
+                       cooling-min-level = <13>;
+                       cooling-max-level = <7>;
+                       #cooling-cells = <2>; /* min followed by max */
                };
 
                cpu@A01 {
index de80b5b..adb4f6a 100644 (file)
                                        regulator-always-on;
                                };
 
+                               ldo8_reg: ldo@8 {
+                                       regulator-compatible = "LDO8";
+                                       regulator-name = "VDD10_HDMI_1.0V";
+                                       regulator-min-microvolt = <1000000>;
+                                       regulator-max-microvolt = <1000000>;
+                               };
+
+                               ldo10_reg: ldo@10 {
+                                       regulator-compatible = "LDO10";
+                                       regulator-name = "VDDQ_MIPIHSI_1.8V";
+                                       regulator-min-microvolt = <1800000>;
+                                       regulator-max-microvolt = <1800000>;
+                               };
+
                                ldo11_reg: LDO11 {
                                        regulator-name = "VDD18_ABB1_1.8V";
                                        regulator-min-microvolt = <1800000>;
        ehci: ehci@12580000 {
                status = "okay";
        };
+
+       tmu@100C0000 {
+               vtmu-supply = <&ldo10_reg>;
+               status = "okay";
+       };
+
+       thermal-zones {
+               cpu_thermal: cpu-thermal {
+                       cooling-maps {
+                               map0 {
+                                    /* Corresponds to 800MHz at freq_table */
+                                    cooling-device = <&cpu0 7 7>;
+                               };
+                               map1 {
+                                    /* Corresponds to 200MHz at freq_table */
+                                    cooling-device = <&cpu0 13 13>;
+                              };
+                      };
+               };
+       };
+
+       mixer: mixer@12C10000 {
+               status = "okay";
+       };
+
+       hdmi@12D00000 {
+               hpd-gpio = <&gpx3 7 0>;
+               pinctrl-names = "default";
+               pinctrl-0 = <&hdmi_hpd>;
+               vdd-supply = <&ldo8_reg>;
+               vdd_osc-supply = <&ldo10_reg>;
+               vdd_pll-supply = <&ldo8_reg>;
+               ddc = <&hdmi_ddc>;
+               status = "okay";
+       };
+
+       hdmi_ddc: i2c@13880000 {
+               status = "okay";
+               pinctrl-names = "default";
+               pinctrl-0 = <&i2c2_bus>;
+       };
+
+       i2c@138E0000 {
+               status = "okay";
+       };
 };
 
 &pinctrl_1 {
                samsung,pin-pud = <0>;
                samsung,pin-drv = <0>;
        };
+
+       hdmi_hpd: hdmi-hpd {
+               samsung,pins = "gpx3-7";
+               samsung,pin-pud = <1>;
+       };
 };
diff --git a/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi b/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi
new file mode 100644 (file)
index 0000000..e3f7934
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Device tree sources for Exynos4412 TMU sensor configuration
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/thermal/thermal_exynos.h>
+
+#thermal-sensor-cells = <0>;
+samsung,tmu_gain = <8>;
+samsung,tmu_reference_voltage = <16>;
+samsung,tmu_noise_cancel_mode = <4>;
+samsung,tmu_efuse_value = <55>;
+samsung,tmu_min_efuse_value = <40>;
+samsung,tmu_max_efuse_value = <100>;
+samsung,tmu_first_point_trim = <25>;
+samsung,tmu_second_point_trim = <85>;
+samsung,tmu_default_temp_offset = <50>;
+samsung,tmu_cal_type = <TYPE_ONE_POINT_TRIMMING>;
index 21f7480..173ffa4 100644 (file)
                pulldown-ohm = <100000>; /* 100K */
                io-channels = <&adc 2>;  /* Battery temperature */
        };
+
+       thermal-zones {
+               cpu_thermal: cpu-thermal {
+                       cooling-maps {
+                               map0 {
+                                    /* Corresponds to 800MHz at freq_table */
+                                    cooling-device = <&cpu0 7 7>;
+                               };
+                               map1 {
+                                    /* Corresponds to 200MHz at freq_table */
+                                    cooling-device = <&cpu0 13 13>;
+                              };
+                      };
+               };
+       };
 };
 
 &pmu_system_controller {
index 0f6ec93..68ad43b 100644 (file)
                #address-cells = <1>;
                #size-cells = <0>;
 
-               cpu@A00 {
+               cpu0: cpu@A00 {
                        device_type = "cpu";
                        compatible = "arm,cortex-a9";
                        reg = <0xA00>;
+                       cooling-min-level = <13>;
+                       cooling-max-level = <7>;
+                       #cooling-cells = <2>; /* min followed by max */
                };
 
                cpu@A01 {
index f5e0ae7..6a6abe1 100644 (file)
@@ -19,6 +19,7 @@
 
 #include "exynos4.dtsi"
 #include "exynos4x12-pinctrl.dtsi"
+#include "exynos4-cpu-thermal.dtsi"
 
 / {
        aliases {
                clock-names = "tmu_apbif";
                status = "disabled";
        };
+
+       hdmi: hdmi@12D00000 {
+               compatible = "samsung,exynos4212-hdmi";
+       };
+
+       mixer: mixer@12C10000 {
+               compatible = "samsung,exynos4212-mixer";
+               clock-names = "mixer", "hdmi", "sclk_hdmi", "vp";
+               clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+                        <&clock CLK_SCLK_HDMI>, <&clock CLK_VP>;
+       };
 };
index 9bb1b0b..adbde1a 100644 (file)
@@ -20,7 +20,7 @@
 #include <dt-bindings/clock/exynos5250.h>
 #include "exynos5.dtsi"
 #include "exynos5250-pinctrl.dtsi"
-
+#include "exynos4-cpu-thermal.dtsi"
 #include <dt-bindings/clock/exynos-audss-clk.h>
 
 / {
                #address-cells = <1>;
                #size-cells = <0>;
 
-               cpu@0 {
+               cpu0: cpu@0 {
                        device_type = "cpu";
                        compatible = "arm,cortex-a15";
                        reg = <0>;
                        clock-frequency = <1700000000>;
+                       cooling-min-level = <15>;
+                       cooling-max-level = <9>;
+                       #cooling-cells = <2>; /* min followed by max */
                };
                cpu@1 {
                        device_type = "cpu";
                #power-domain-cells = <0>;
        };
 
+       pd_disp1: disp1-power-domain@100440A0 {
+               compatible = "samsung,exynos4210-pd";
+               reg = <0x100440A0 0x20>;
+               #power-domain-cells = <0>;
+       };
+
        clock: clock-controller@10010000 {
                compatible = "samsung,exynos5250-clock";
                reg = <0x10010000 0x30000>;
                status = "disabled";
        };
 
-       tmu@10060000 {
+       tmu: tmu@10060000 {
                compatible = "samsung,exynos5250-tmu";
                reg = <0x10060000 0x100>;
                interrupts = <0 65 0>;
                clocks = <&clock CLK_TMU>;
                clock-names = "tmu_apbif";
+               #include "exynos4412-tmu-sensor-conf.dtsi"
+       };
+
+       thermal-zones {
+               cpu_thermal: cpu-thermal {
+                       polling-delay-passive = <0>;
+                       polling-delay = <0>;
+                       thermal-sensors = <&tmu 0>;
+
+                       cooling-maps {
+                               map0 {
+                                    /* Corresponds to 800MHz at freq_table */
+                                    cooling-device = <&cpu0 9 9>;
+                               };
+                               map1 {
+                                    /* Corresponds to 200MHz at freq_table */
+                                    cooling-device = <&cpu0 15 15>;
+                              };
+                      };
+               };
        };
 
        serial@12C00000 {
        hdmi: hdmi {
                compatible = "samsung,exynos4212-hdmi";
                reg = <0x14530000 0x70000>;
+               power-domains = <&pd_disp1>;
                interrupts = <0 95 0>;
                clocks = <&clock CLK_HDMI>, <&clock CLK_SCLK_HDMI>,
                         <&clock CLK_SCLK_PIXEL>, <&clock CLK_SCLK_HDMIPHY>,
        mixer {
                compatible = "samsung,exynos5250-mixer";
                reg = <0x14450000 0x10000>;
+               power-domains = <&pd_disp1>;
                interrupts = <0 94 0>;
-               clocks = <&clock CLK_MIXER>, <&clock CLK_SCLK_HDMI>;
-               clock-names = "mixer", "sclk_hdmi";
+               clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+                        <&clock CLK_SCLK_HDMI>;
+               clock-names = "mixer", "hdmi", "sclk_hdmi";
        };
 
        dp_phy: video-phy@10040720 {
        };
 
        dp: dp-controller@145B0000 {
+               power-domains = <&pd_disp1>;
                clocks = <&clock CLK_DP>;
                clock-names = "dp";
                phys = <&dp_phy>;
        };
 
        fimd: fimd@14400000 {
+               power-domains = <&pd_disp1>;
                clocks = <&clock CLK_SCLK_FIMD1>, <&clock CLK_FIMD1>;
                clock-names = "sclk_fimd", "fimd";
        };
diff --git a/arch/arm/boot/dts/exynos5420-trip-points.dtsi b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
new file mode 100644 (file)
index 0000000..5d31fc1
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Device tree sources for default Exynos5420 thermal zone definition
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+polling-delay-passive = <0>;
+polling-delay = <0>;
+trips {
+       cpu-alert-0 {
+               temperature = <85000>; /* millicelsius */
+               hysteresis = <10000>; /* millicelsius */
+               type = "active";
+       };
+       cpu-alert-1 {
+               temperature = <103000>; /* millicelsius */
+               hysteresis = <10000>; /* millicelsius */
+               type = "active";
+       };
+       cpu-alert-2 {
+               temperature = <110000>; /* millicelsius */
+               hysteresis = <10000>; /* millicelsius */
+               type = "active";
+       };
+       cpu-crit-0 {
+               temperature = <1200000>; /* millicelsius */
+               hysteresis = <0>; /* millicelsius */
+               type = "critical";
+       };
+};
index 9dc2e97..c0e98cf 100644 (file)
                compatible = "samsung,exynos5420-mixer";
                reg = <0x14450000 0x10000>;
                interrupts = <0 94 0>;
-               clocks = <&clock CLK_MIXER>, <&clock CLK_SCLK_HDMI>;
-               clock-names = "mixer", "sclk_hdmi";
+               clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+                        <&clock CLK_SCLK_HDMI>;
+               clock-names = "mixer", "hdmi", "sclk_hdmi";
                power-domains = <&disp_pd>;
        };
 
                interrupts = <0 65 0>;
                clocks = <&clock CLK_TMU>;
                clock-names = "tmu_apbif";
+               #include "exynos4412-tmu-sensor-conf.dtsi"
        };
 
        tmu_cpu1: tmu@10064000 {
                interrupts = <0 183 0>;
                clocks = <&clock CLK_TMU>;
                clock-names = "tmu_apbif";
+               #include "exynos4412-tmu-sensor-conf.dtsi"
        };
 
        tmu_cpu2: tmu@10068000 {
                interrupts = <0 184 0>;
                clocks = <&clock CLK_TMU>, <&clock CLK_TMU>;
                clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+               #include "exynos4412-tmu-sensor-conf.dtsi"
        };
 
        tmu_cpu3: tmu@1006c000 {
                interrupts = <0 185 0>;
                clocks = <&clock CLK_TMU>, <&clock CLK_TMU_GPU>;
                clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+               #include "exynos4412-tmu-sensor-conf.dtsi"
        };
 
        tmu_gpu: tmu@100a0000 {
                interrupts = <0 215 0>;
                clocks = <&clock CLK_TMU_GPU>, <&clock CLK_TMU>;
                clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+               #include "exynos4412-tmu-sensor-conf.dtsi"
+       };
+
+       thermal-zones {
+               cpu0_thermal: cpu0-thermal {
+                       thermal-sensors = <&tmu_cpu0>;
+                       #include "exynos5420-trip-points.dtsi"
+               };
+               cpu1_thermal: cpu1-thermal {
+                      thermal-sensors = <&tmu_cpu1>;
+                      #include "exynos5420-trip-points.dtsi"
+               };
+               cpu2_thermal: cpu2-thermal {
+                      thermal-sensors = <&tmu_cpu2>;
+                      #include "exynos5420-trip-points.dtsi"
+               };
+               cpu3_thermal: cpu3-thermal {
+                      thermal-sensors = <&tmu_cpu3>;
+                      #include "exynos5420-trip-points.dtsi"
+               };
+               gpu_thermal: gpu-thermal {
+                      thermal-sensors = <&tmu_gpu>;
+                      #include "exynos5420-trip-points.dtsi"
+               };
        };
 
         watchdog: watchdog@101D0000 {
diff --git a/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi b/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi
new file mode 100644 (file)
index 0000000..7b2fba0
--- /dev/null
@@ -0,0 +1,24 @@
+/*
+ * Device tree sources for Exynos5440 TMU sensor configuration
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/thermal/thermal_exynos.h>
+
+#thermal-sensor-cells = <0>;
+samsung,tmu_gain = <5>;
+samsung,tmu_reference_voltage = <16>;
+samsung,tmu_noise_cancel_mode = <4>;
+samsung,tmu_efuse_value = <0x5d2d>;
+samsung,tmu_min_efuse_value = <16>;
+samsung,tmu_max_efuse_value = <76>;
+samsung,tmu_first_point_trim = <25>;
+samsung,tmu_second_point_trim = <70>;
+samsung,tmu_default_temp_offset = <25>;
+samsung,tmu_cal_type = <TYPE_ONE_POINT_TRIMMING>;
diff --git a/arch/arm/boot/dts/exynos5440-trip-points.dtsi b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
new file mode 100644 (file)
index 0000000..48adfa8
--- /dev/null
@@ -0,0 +1,25 @@
+/*
+ * Device tree sources for default Exynos5440 thermal zone definition
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+polling-delay-passive = <0>;
+polling-delay = <0>;
+trips {
+       cpu-alert-0 {
+               temperature = <100000>; /* millicelsius */
+               hysteresis = <0>; /* millicelsius */
+               type = "active";
+       };
+       cpu-crit-0 {
+               temperature = <1050000>; /* millicelsius */
+               hysteresis = <0>; /* millicelsius */
+               type = "critical";
+       };
+};
index 8f3373c..59d9416 100644 (file)
                interrupts = <0 58 0>;
                clocks = <&clock CLK_B_125>;
                clock-names = "tmu_apbif";
+               #include "exynos5440-tmu-sensor-conf.dtsi"
        };
 
        tmuctrl_1: tmuctrl@16011C {
                interrupts = <0 58 0>;
                clocks = <&clock CLK_B_125>;
                clock-names = "tmu_apbif";
+               #include "exynos5440-tmu-sensor-conf.dtsi"
        };
 
        tmuctrl_2: tmuctrl@160120 {
                interrupts = <0 58 0>;
                clocks = <&clock CLK_B_125>;
                clock-names = "tmu_apbif";
+               #include "exynos5440-tmu-sensor-conf.dtsi"
+       };
+
+       thermal-zones {
+               cpu0_thermal: cpu0-thermal {
+                       thermal-sensors = <&tmuctrl_0>;
+                       #include "exynos5440-trip-points.dtsi"
+               };
+               cpu1_thermal: cpu1-thermal {
+                      thermal-sensors = <&tmuctrl_1>;
+                      #include "exynos5440-trip-points.dtsi"
+               };
+               cpu2_thermal: cpu2-thermal {
+                      thermal-sensors = <&tmuctrl_2>;
+                      #include "exynos5440-trip-points.dtsi"
+               };
        };
 
        sata@210000 {
index f1cd214..a626e6d 100644 (file)
@@ -35,6 +35,7 @@
                        regulator-max-microvolt = <5000000>;
                        gpio = <&gpio3 22 0>;
                        enable-active-high;
+                       vin-supply = <&swbst_reg>;
                };
 
                reg_usb_h1_vbus: regulator@1 {
@@ -45,6 +46,7 @@
                        regulator-max-microvolt = <5000000>;
                        gpio = <&gpio1 29 0>;
                        enable-active-high;
+                       vin-supply = <&swbst_reg>;
                };
 
                reg_audio: regulator@2 {
index fda4932..945887d 100644 (file)
@@ -52,6 +52,7 @@
                        regulator-max-microvolt = <5000000>;
                        gpio = <&gpio4 0 0>;
                        enable-active-high;
+                       vin-supply = <&swbst_reg>;
                };
 
                reg_usb_otg2_vbus: regulator@1 {
@@ -62,6 +63,7 @@
                        regulator-max-microvolt = <5000000>;
                        gpio = <&gpio4 2 0>;
                        enable-active-high;
+                       vin-supply = <&swbst_reg>;
                };
 
                reg_aud3v: regulator@2 {
index f4f78c4..3fdc84f 100644 (file)
@@ -92,6 +92,8 @@
                        ti,hwmods = "aes";
                        reg = <0x480c5000 0x50>;
                        interrupts = <0>;
+                       dmas = <&sdma 65 &sdma 66>;
+                       dma-names = "tx", "rx";
                };
 
                prm: prm@48306000 {
                        ti,hwmods = "sham";
                        reg = <0x480c3000 0x64>;
                        interrupts = <49>;
+                       dmas = <&sdma 69>;
+                       dma-names = "rx";
                };
 
                smartreflex_core: smartreflex@480cb000 {
index 19212ac..de8a3d4 100644 (file)
@@ -13,7 +13,7 @@
 
 core_thermal: core_thermal {
        polling-delay-passive = <250>; /* milliseconds */
-       polling-delay = <1000>; /* milliseconds */
+       polling-delay = <500>; /* milliseconds */
 
                        /* sensor       ID */
        thermal-sensors = <&bandgap     2>;
index 1b87aca..bc3090f 100644 (file)
@@ -13,7 +13,7 @@
 
 gpu_thermal: gpu_thermal {
        polling-delay-passive = <250>; /* milliseconds */
-       polling-delay = <1000>; /* milliseconds */
+       polling-delay = <500>; /* milliseconds */
 
                        /* sensor       ID */
        thermal-sensors = <&bandgap     1>;
index ddff674..4a485b6 100644 (file)
        };
 };
 
+&cpu_thermal {
+       polling-delay = <500>; /* milliseconds */
+};
+
 /include/ "omap54xx-clocks.dtsi"
index 58c2746..83b425f 100644 (file)
                ti,index-starts-at-one;
        };
 
+       dpll_core_byp_mux: dpll_core_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin>, <&dpll_abe_m3x2_ck>;
+               ti,bit-shift = <23>;
+               reg = <0x012c>;
+       };
+
        dpll_core_ck: dpll_core_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-core-clock";
-               clocks = <&sys_clkin>, <&dpll_abe_m3x2_ck>;
+               clocks = <&sys_clkin>, <&dpll_core_byp_mux>;
                reg = <0x0120>, <0x0124>, <0x012c>, <0x0128>;
        };
 
                clock-div = <1>;
        };
 
+       dpll_iva_byp_mux: dpll_iva_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin>, <&iva_dpll_hs_clk_div>;
+               ti,bit-shift = <23>;
+               reg = <0x01ac>;
+       };
+
        dpll_iva_ck: dpll_iva_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-clock";
-               clocks = <&sys_clkin>, <&iva_dpll_hs_clk_div>;
+               clocks = <&sys_clkin>, <&dpll_iva_byp_mux>;
                reg = <0x01a0>, <0x01a4>, <0x01ac>, <0x01a8>;
        };
 
        };
 };
 &cm_core_clocks {
+
+       dpll_per_byp_mux: dpll_per_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin>, <&per_dpll_hs_clk_div>;
+               ti,bit-shift = <23>;
+               reg = <0x014c>;
+       };
+
        dpll_per_ck: dpll_per_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-clock";
-               clocks = <&sys_clkin>, <&per_dpll_hs_clk_div>;
+               clocks = <&sys_clkin>, <&dpll_per_byp_mux>;
                reg = <0x0140>, <0x0144>, <0x014c>, <0x0148>;
        };
 
                ti,index-starts-at-one;
        };
 
+       dpll_usb_byp_mux: dpll_usb_byp_mux {
+               #clock-cells = <0>;
+               compatible = "ti,mux-clock";
+               clocks = <&sys_clkin>, <&usb_dpll_hs_clk_div>;
+               ti,bit-shift = <23>;
+               reg = <0x018c>;
+       };
+
        dpll_usb_ck: dpll_usb_ck {
                #clock-cells = <0>;
                compatible = "ti,omap4-dpll-j-type-clock";
-               clocks = <&sys_clkin>, <&usb_dpll_hs_clk_div>;
+               clocks = <&sys_clkin>, <&dpll_usb_byp_mux>;
                reg = <0x0180>, <0x0184>, <0x018c>, <0x0188>;
        };
 
index d771f68..eccc78d 100644 (file)
                        "mac_clk_rx", "mac_clk_tx",
                        "clk_mac_ref", "clk_mac_refout",
                        "aclk_mac", "pclk_mac";
+               status = "disabled";
        };
 
        usb_host0_ehci: usb@ff500000 {
index 261311b..367af53 100644 (file)
                                atmel,watchdog-type = "hardware";
                                atmel,reset-type = "all";
                                atmel,dbg-halt;
-                               atmel,idle-halt;
                                status = "disabled";
                        };
 
                        compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
                        reg = <0x00700000 0x100000>;
                        interrupts = <32 IRQ_TYPE_LEVEL_HIGH 2>;
-                       clocks = <&usb>, <&uhphs_clk>, <&uhpck>;
+                       clocks = <&utmi>, <&uhphs_clk>, <&uhpck>;
                        clock-names = "usb_clk", "ehci_clk", "uhpck";
                        status = "disabled";
                };
index d986b41..4303874 100644 (file)
@@ -66,6 +66,7 @@
                gpio4 = &pioE;
                tcb0 = &tcb0;
                tcb1 = &tcb1;
+               i2c0 = &i2c0;
                i2c2 = &i2c2;
        };
        cpus {
                        compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
                        reg = <0x00600000 0x100000>;
                        interrupts = <46 IRQ_TYPE_LEVEL_HIGH 2>;
-                       clocks = <&usb>, <&uhphs_clk>, <&uhpck>;
+                       clocks = <&utmi>, <&uhphs_clk>, <&uhpck>;
                        clock-names = "usb_clk", "ehci_clk", "uhpck";
                        status = "disabled";
                };
 
                                        lcdck: lcdck {
                                                #clock-cells = <0>;
-                                               reg = <4>;
-                                               clocks = <&smd>;
+                                               reg = <3>;
+                                               clocks = <&mck>;
                                        };
 
                                        smdck: smdck {
                                                reg = <50>;
                                        };
 
-                                       lcd_clk: lcd_clk {
+                                       lcdc_clk: lcdc_clk {
                                                #clock-cells = <0>;
                                                reg = <51>;
                                        };
index 252c3d1..d9176e6 100644 (file)
                        #address-cells = <1>;
                        #size-cells = <0>;
                        reg = <0xfff01000 0x1000>;
-                       interrupts = <0 156 4>;
+                       interrupts = <0 155 4>;
                        num-cs = <4>;
                        clocks = <&spi_m_clk>;
                        status = "disabled";
                        reg-shift = <2>;
                        reg-io-width = <4>;
                        clocks = <&l4_sp_clk>;
+                       dmas = <&pdma 28>,
+                              <&pdma 29>;
+                       dma-names = "tx", "rx";
                };
 
                uart1: serial1@ffc03000 {
                        reg-shift = <2>;
                        reg-io-width = <4>;
                        clocks = <&l4_sp_clk>;
+                       dmas = <&pdma 30>,
+                              <&pdma 31>;
+                       dma-names = "tx", "rx";
                };
 
                rst: rstmgr@ffd05000 {
index ab7891c..75742f8 100644 (file)
        model = "Olimex A10-OLinuXino-LIME";
        compatible = "olimex,a10-olinuxino-lime", "allwinner,sun4i-a10";
 
+       cpus {
+               cpu0: cpu@0 {
+                       /*
+                        * The A10-Lime is known to be unstable
+                        * when running at 1008 MHz
+                        */
+                       operating-points = <
+                               /* kHz    uV */
+                               912000  1350000
+                               864000  1300000
+                               624000  1250000
+                               >;
+                       cooling-max-level = <2>;
+               };
+       };
+
        soc@01c00000 {
                emac: ethernet@01c0b000 {
                        pinctrl-names = "default";
index 5c29258..eebb785 100644 (file)
@@ -75,7 +75,6 @@
                        clock-latency = <244144>; /* 8 32k periods */
                        operating-points = <
                                /* kHz    uV */
-                               1056000 1500000
                                1008000 1400000
                                912000  1350000
                                864000  1300000
@@ -83,7 +82,7 @@
                                >;
                        #cooling-cells = <2>;
                        cooling-min-level = <0>;
-                       cooling-max-level = <4>;
+                       cooling-max-level = <3>;
                };
        };
 
index f8818f1..883cb48 100644 (file)
@@ -47,7 +47,6 @@
                        clock-latency = <244144>; /* 8 32k periods */
                        operating-points = <
                                /* kHz    uV */
-                               1104000 1500000
                                1008000 1400000
                                912000  1350000
                                864000  1300000
@@ -57,7 +56,7 @@
                                >;
                        #cooling-cells = <2>;
                        cooling-min-level = <0>;
-                       cooling-max-level = <6>;
+                       cooling-max-level = <5>;
                };
        };
 
index 3a8530b..fdd1817 100644 (file)
                        clock-latency = <244144>; /* 8 32k periods */
                        operating-points = <
                                /* kHz    uV */
-                               1008000 1450000
                                960000  1400000
                                912000  1400000
                                864000  1300000
                                >;
                        #cooling-cells = <2>;
                        cooling-min-level = <0>;
-                       cooling-max-level = <7>;
+                       cooling-max-level = <6>;
                };
 
                cpu@1 {
index 6eaddc4..37dc0fe 100644 (file)
@@ -151,8 +151,6 @@ static int bL_switch_to(unsigned int new_cluster_id)
        unsigned int mpidr, this_cpu, that_cpu;
        unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
        struct completion inbound_alive;
-       struct tick_device *tdev;
-       enum clock_event_mode tdev_mode;
        long volatile *handshake_ptr;
        int ipi_nr, ret;
 
@@ -219,13 +217,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
        /* redirect GIC's SGIs to our counterpart */
        gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
 
-       tdev = tick_get_device(this_cpu);
-       if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
-               tdev = NULL;
-       if (tdev) {
-               tdev_mode = tdev->evtdev->mode;
-               clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
-       }
+       tick_suspend_local();
 
        ret = cpu_pm_enter();
 
@@ -251,11 +243,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
 
        ret = cpu_pm_exit();
 
-       if (tdev) {
-               clockevents_set_mode(tdev->evtdev, tdev_mode);
-               clockevents_program_event(tdev->evtdev,
-                                         tdev->evtdev->next_event, 1);
-       }
+       tick_resume_local();
 
        trace_cpu_migrate_finish(ktime_get_real_ns(), ib_mpidr);
        local_fiq_enable();
index f2670f6..811e72b 100644 (file)
@@ -70,6 +70,7 @@ CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
 # CONFIG_SCSI_LOWLEVEL is not set
 CONFIG_NETDEVICES=y
+CONFIG_ARM_AT91_ETHER=y
 CONFIG_MACB=y
 # CONFIG_NET_VENDOR_BROADCOM is not set
 CONFIG_DM9000=y
index b7e6b6f..06075b6 100644 (file)
@@ -99,7 +99,7 @@ CONFIG_PCI_RCAR_GEN2=y
 CONFIG_PCI_RCAR_GEN2_PCIE=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_SMP=y
-CONFIG_NR_CPUS=8
+CONFIG_NR_CPUS=16
 CONFIG_HIGHPTE=y
 CONFIG_CMA=y
 CONFIG_ARM_APPENDED_DTB=y
index a097cff..8e10859 100644 (file)
@@ -377,6 +377,7 @@ CONFIG_PWM_TWL=m
 CONFIG_PWM_TWL_LED=m
 CONFIG_OMAP_USB2=m
 CONFIG_TI_PIPE3=y
+CONFIG_TWL4030_USB=m
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_FS_XATTR is not set
index 41d856e..510c747 100644 (file)
@@ -3,8 +3,6 @@
 CONFIG_SYSVIPC=y
 CONFIG_IRQ_DOMAIN_DEBUG=y
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED=y
-CONFIG_SYSFS_DEPRECATED_V2=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EMBEDDED=y
 CONFIG_SLAB=y
index 38840a8..8f6a570 100644 (file)
@@ -4,6 +4,7 @@ CONFIG_BLK_DEV_INITRD=y
 CONFIG_PERF_EVENTS=y
 CONFIG_ARCH_SUNXI=y
 CONFIG_SMP=y
+CONFIG_NR_CPUS=8
 CONFIG_AEABI=y
 CONFIG_HIGHMEM=y
 CONFIG_HIGHPTE=y
index f489fda..37fe607 100644 (file)
@@ -118,8 +118,8 @@ CONFIG_HID_ZEROPLUS=y
 CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
-CONFIG_USB_ISP1760_HCD=y
 CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
 CONFIG_MMC=y
 CONFIG_MMC_ARMMMCI=y
 CONFIG_NEW_LEDS=y
index 71e5fc7..1d1800f 100644 (file)
 # define VFP_ABI_FRAME 0
 # define BSAES_ASM_EXTENDED_KEY
 # define XTS_CHAIN_TWEAK
-# define __ARM_ARCH__  7
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
 #endif
 
 #ifdef __thumb__
 # define adrl adr
 #endif
 
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
 .text
 .syntax        unified         @ ARMv7-capable assembler is expected to handle this
 #ifdef __thumb2__
@@ -74,8 +78,6 @@
 .code   32
 #endif
 
-.fpu   neon
-
 .type  _bsaes_decrypt8,%function
 .align 4
 _bsaes_decrypt8:
@@ -2095,9 +2097,11 @@ bsaes_xts_decrypt:
        vld1.8  {q8}, [r0]                      @ initial tweak
        adr     r2, .Lxts_magic
 
+#ifndef        XTS_CHAIN_TWEAK
        tst     r9, #0xf                        @ if not multiple of 16
        it      ne                              @ Thumb2 thing, sanity check in ARM
        subne   r9, #0x10                       @ subtract another 16 bytes
+#endif
        subs    r9, #0x80
 
        blo     .Lxts_dec_short
index be068db..a4d3856 100644 (file)
@@ -701,14 +701,18 @@ $code.=<<___;
 # define VFP_ABI_FRAME 0
 # define BSAES_ASM_EXTENDED_KEY
 # define XTS_CHAIN_TWEAK
-# define __ARM_ARCH__  7
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
 #endif
 
 #ifdef __thumb__
 # define adrl adr
 #endif
 
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch  armv7-a
+.fpu   neon
+
 .text
 .syntax        unified         @ ARMv7-capable assembler is expected to handle this
 #ifdef __thumb2__
@@ -717,8 +721,6 @@ $code.=<<___;
 .code   32
 #endif
 
-.fpu   neon
-
 .type  _bsaes_decrypt8,%function
 .align 4
 _bsaes_decrypt8:
@@ -2076,9 +2078,11 @@ bsaes_xts_decrypt:
        vld1.8  {@XMM[8]}, [r0]                 @ initial tweak
        adr     $magic, .Lxts_magic
 
+#ifndef        XTS_CHAIN_TWEAK
        tst     $len, #0xf                      @ if not multiple of 16
        it      ne                              @ Thumb2 thing, sanity check in ARM
        subne   $len, #0x10                     @ subtract another 16 bytes
+#endif
        subs    $len, #0x80
 
        blo     .Lxts_dec_short
index 70f9b9b..5f337dc 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _ASM_ARM_JUMP_LABEL_H
 #define _ASM_ARM_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/types.h>
 
@@ -27,8 +27,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u32 jump_label_t;
 
 struct jump_entry {
@@ -37,4 +35,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
index 816db0b..d995821 100644 (file)
 #define HSR_COND       (0xfU << HSR_COND_SHIFT)
 
 #define FSC_FAULT      (0x04)
+#define FSC_ACCESS     (0x08)
 #define FSC_PERM       (0x0c)
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
index 41008cd..d71607c 100644 (file)
@@ -27,6 +27,8 @@
 #include <asm/fpstate.h>
 #include <kvm/arm_arch_timer.h>
 
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
 #if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #else
@@ -165,19 +167,10 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
-                             unsigned long end)
-{
-       return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-       return 0;
-}
-
 static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
                                                         unsigned long address)
 {
index 3f83db2..d8e90c8 100644 (file)
@@ -28,28 +28,6 @@ struct kvm_decode {
        bool sign_extend;
 };
 
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
-       phys_addr_t     phys_addr;
-       u8              data[8];
-       u32             len;
-       bool            is_write;
-       void            *private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
-                                   struct kvm_exit_mmio *mmio)
-{
-       run->mmio.phys_addr     = mmio->phys_addr;
-       run->mmio.len           = mmio->len;
-       run->mmio.is_write      = mmio->is_write;
-       memcpy(run->mmio.data, mmio->data, mmio->len);
-       run->exit_reason        = KVM_EXIT_MMIO;
-}
-
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                 phys_addr_t fault_ipa);
index bf0fe99..4cf48c3 100644 (file)
@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
        (__boundary - 1 < (end) - 1)? __boundary: (end);                \
 })
 
+#define kvm_pgd_index(addr)                    pgd_index(addr)
+
 static inline bool kvm_page_empty(void *ptr)
 {
        struct page *ptr_page = virt_to_page(ptr);
        return page_count(ptr_page) == 1;
 }
 
-
 #define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
 #define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
 #define kvm_pud_table_empty(kvm, pudp) (0)
 
 #define KVM_PREALLOC_LEVEL     0
 
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
 {
-       return 0;
+       return kvm->arch.pgd;
 }
 
-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
 {
-       return kvm->arch.pgd;
+       return PTRS_PER_S2_PGD * sizeof(pgd_t);
 }
 
 struct kvm;
index 90c12e1..0f79e4d 100644 (file)
@@ -12,8 +12,7 @@
 
 extern void timer_tick(void);
 
-struct timespec;
-typedef void (*clock_access_fn)(struct timespec *);
+typedef void (*clock_access_fn)(struct timespec64 *);
 extern int register_persistent_clock(clock_access_fn read_boot,
                                     clock_access_fn read_persistent);
 
index 80a6501..c3c45e6 100644 (file)
 #define AT91_DBGU 0xfc00c000 /* SAMA5D4_BASE_USART3 */
 #endif
 
-/* Keep in sync with mach-at91/include/mach/hardware.h */
+#ifdef CONFIG_MMU
 #define AT91_IO_P2V(x) ((x) - 0x01000000)
+#else
+#define AT91_IO_P2V(x) (x)
+#endif
 
 #define AT91_DBGU_SR           (0x14)  /* Status Register */
 #define AT91_DBGU_THR          (0x1c)  /* Transmitter Holding Register */
index 0db25bc..2499867 100644 (file)
@@ -198,6 +198,9 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX            127
 
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE               0x95c1ba5e
 #define KVM_PSCI_FN(n)                 (KVM_PSCI_FN_BASE + (n))
index 2d2d608..488eaac 100644 (file)
@@ -190,7 +190,6 @@ int main(void)
   DEFINE(VCPU_HxFAR,           offsetof(struct kvm_vcpu, arch.fault.hxfar));
   DEFINE(VCPU_HPFAR,           offsetof(struct kvm_vcpu, arch.fault.hpfar));
   DEFINE(VCPU_HYP_PC,          offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
-#ifdef CONFIG_KVM_ARM_VGIC
   DEFINE(VCPU_VGIC_CPU,                offsetof(struct kvm_vcpu, arch.vgic_cpu));
   DEFINE(VGIC_V2_CPU_HCR,      offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
   DEFINE(VGIC_V2_CPU_VMCR,     offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
@@ -200,14 +199,11 @@ int main(void)
   DEFINE(VGIC_V2_CPU_APR,      offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
   DEFINE(VGIC_V2_CPU_LR,       offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
   DEFINE(VGIC_CPU_NR_LR,       offsetof(struct vgic_cpu, nr_lr));
-#ifdef CONFIG_KVM_ARM_TIMER
   DEFINE(VCPU_TIMER_CNTV_CTL,  offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
   DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
   DEFINE(KVM_TIMER_CNTVOFF,    offsetof(struct kvm, arch.timer.cntvoff));
   DEFINE(KVM_TIMER_ENABLED,    offsetof(struct kvm, arch.timer.enabled));
-#endif
   DEFINE(KVM_VGIC_VCTRL,       offsetof(struct kvm, arch.vgic.vctrl_base));
-#endif
   DEFINE(KVM_VTTBR,            offsetof(struct kvm, arch.vttbr));
 #endif
   return 0; 
index e55408e..1d60beb 100644 (file)
@@ -246,12 +246,9 @@ static int __get_cpu_architecture(void)
                if (cpu_arch)
                        cpu_arch += CPU_ARCH_ARMv3;
        } else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
-               unsigned int mmfr0;
-
                /* Revised CPUID format. Read the Memory Model Feature
                 * Register 0 and check for VMSAv7 or PMSAv7 */
-               asm("mrc        p15, 0, %0, c0, c1, 4"
-                   : "=r" (mmfr0));
+               unsigned int mmfr0 = read_cpuid_ext(CPUID_EXT_MMFR0);
                if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
                    (mmfr0 & 0x000000f0) >= 0x00000030)
                        cpu_arch = CPU_ARCH_ARMv7;
index 0cc7e58..a66e37e 100644 (file)
@@ -76,7 +76,7 @@ void timer_tick(void)
 }
 #endif
 
-static void dummy_clock_access(struct timespec *ts)
+static void dummy_clock_access(struct timespec64 *ts)
 {
        ts->tv_sec = 0;
        ts->tv_nsec = 0;
@@ -85,12 +85,12 @@ static void dummy_clock_access(struct timespec *ts)
 static clock_access_fn __read_persistent_clock = dummy_clock_access;
 static clock_access_fn __read_boot_clock = dummy_clock_access;;
 
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
 {
        __read_persistent_clock(ts);
 }
 
-void read_boot_clock(struct timespec *ts)
+void read_boot_clock64(struct timespec64 *ts)
 {
        __read_boot_clock(ts);
 }
index 338ace7..f1f79d1 100644 (file)
@@ -18,6 +18,7 @@ if VIRTUALIZATION
 
 config KVM
        bool "Kernel-based Virtual Machine (KVM) support"
+       depends on MMU && OF
        select PREEMPT_NOTIFIERS
        select ANON_INODES
        select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -26,10 +27,12 @@ config KVM
        select KVM_ARM_HOST
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
        select SRCU
-       depends on ARM_VIRT_EXT && ARM_LPAE
+       select MMU_NOTIFIER
+       select HAVE_KVM_EVENTFD
+       select HAVE_KVM_IRQFD
+       depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
        ---help---
-         Support hosting virtualized guest machines. You will also
-         need to select one or more of the processor modules below.
+         Support hosting virtualized guest machines.
 
          This module provides access to the hardware capabilities through
          a character device node named /dev/kvm.
@@ -37,10 +40,7 @@ config KVM
          If unsure, say N.
 
 config KVM_ARM_HOST
-       bool "KVM host support for ARM cpus."
-       depends on KVM
-       depends on MMU
-       select  MMU_NOTIFIER
+       bool
        ---help---
          Provides host support for ARM processors.
 
@@ -55,20 +55,4 @@ config KVM_ARM_MAX_VCPUS
          large, so only choose a reasonable number that you expect to
          actually use.
 
-config KVM_ARM_VGIC
-       bool "KVM support for Virtual GIC"
-       depends on KVM_ARM_HOST && OF
-       select HAVE_KVM_IRQCHIP
-       default y
-       ---help---
-         Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
-       bool "KVM support for Architected Timers"
-       depends on KVM_ARM_VGIC && ARM_ARCH_TIMER
-       select HAVE_KVM_IRQCHIP
-       default y
-       ---help---
-         Adds support for the Architected Timers in virtual machines
-
 endif # VIRTUALIZATION
index 443b8be..139e46c 100644 (file)
@@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt)
        plus_virt_def := -DREQUIRES_VIRT=1
 endif
 
-ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
+ccflags-y += -Iarch/arm/kvm
 CFLAGS_arm.o := -I. $(plus_virt_def)
 CFLAGS_mmu.o := -I.
 
@@ -15,12 +15,12 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
 AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
 
 KVM := ../../../virt/kvm
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 
 obj-y += kvm-arm.o init.o interrupts.o
 obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
 obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+obj-y += $(KVM)/arm/vgic.o
+obj-y += $(KVM)/arm/vgic-v2.o
+obj-y += $(KVM)/arm/vgic-v2-emul.o
+obj-y += $(KVM)/arm/arch_timer.o
index 5560f74..6f53645 100644 (file)
@@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
 static u8 kvm_next_vmid;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
-static bool vgic_present;
-
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
        BUG_ON(preemptible());
@@ -173,8 +171,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        int r;
        switch (ext) {
        case KVM_CAP_IRQCHIP:
-               r = vgic_present;
-               break;
+       case KVM_CAP_IRQFD:
+       case KVM_CAP_IOEVENTFD:
        case KVM_CAP_DEVICE_CTRL:
        case KVM_CAP_USER_MEMORY:
        case KVM_CAP_SYNC_MMU:
@@ -183,6 +181,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_ARM_PSCI:
        case KVM_CAP_ARM_PSCI_0_2:
        case KVM_CAP_READONLY_MEM:
+       case KVM_CAP_MP_STATE:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
@@ -268,7 +267,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-       return 0;
+       return kvm_timer_should_fire(vcpu);
 }
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -313,13 +312,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
-       return -EINVAL;
+       if (vcpu->arch.pause)
+               mp_state->mp_state = KVM_MP_STATE_STOPPED;
+       else
+               mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+
+       return 0;
 }
 
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
                                    struct kvm_mp_state *mp_state)
 {
-       return -EINVAL;
+       switch (mp_state->mp_state) {
+       case KVM_MP_STATE_RUNNABLE:
+               vcpu->arch.pause = false;
+               break;
+       case KVM_MP_STATE_STOPPED:
+               vcpu->arch.pause = true;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
 }
 
 /**
@@ -452,6 +467,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+       return vgic_initialized(kvm);
+}
+
 static void vcpu_pause(struct kvm_vcpu *vcpu)
 {
        wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
@@ -831,8 +851,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 
        switch (dev_id) {
        case KVM_ARM_DEVICE_VGIC_V2:
-               if (!vgic_present)
-                       return -ENXIO;
                return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
        default:
                return -ENODEV;
@@ -847,10 +865,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
        switch (ioctl) {
        case KVM_CREATE_IRQCHIP: {
-               if (vgic_present)
-                       return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
-               else
-                       return -ENXIO;
+               return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
        }
        case KVM_ARM_SET_DEVICE_ADDR: {
                struct kvm_arm_device_addr dev_addr;
@@ -1035,10 +1050,6 @@ static int init_hyp_mode(void)
        if (err)
                goto out_free_context;
 
-#ifdef CONFIG_KVM_ARM_VGIC
-               vgic_present = true;
-#endif
-
        /*
         * Init HYP architected timer support
         */
index 384bab6..d503fbb 100644 (file)
@@ -109,22 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
        return -EINVAL;
 }
 
-#ifndef CONFIG_KVM_ARM_TIMER
-
-#define NUM_TIMER_REGS 0
-
-static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
-       return 0;
-}
-
-static bool is_timer_reg(u64 index)
-{
-       return false;
-}
-
-#else
-
 #define NUM_TIMER_REGS 3
 
 static bool is_timer_reg(u64 index)
@@ -152,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
        return 0;
 }
 
-#endif
-
 static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 {
        void __user *uaddr = (void __user *)(long)reg->addr;
index 14d4883..35e4a3a 100644 (file)
@@ -402,7 +402,6 @@ vcpu        .req    r0              @ vcpu pointer always in r0
  * Assumes vcpu pointer in vcpu reg
  */
 .macro save_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
        /* Get VGIC VCTRL base into r2 */
        ldr     r2, [vcpu, #VCPU_KVM]
        ldr     r2, [r2, #KVM_VGIC_VCTRL]
@@ -460,7 +459,6 @@ ARM_BE8(rev r6, r6  )
        subs    r4, r4, #1
        bne     1b
 2:
-#endif
 .endm
 
 /*
@@ -469,7 +467,6 @@ ARM_BE8(rev r6, r6  )
  * Assumes vcpu pointer in vcpu reg
  */
 .macro restore_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
        /* Get VGIC VCTRL base into r2 */
        ldr     r2, [vcpu, #VCPU_KVM]
        ldr     r2, [r2, #KVM_VGIC_VCTRL]
@@ -501,7 +498,6 @@ ARM_BE8(rev r6, r6  )
        subs    r4, r4, #1
        bne     1b
 2:
-#endif
 .endm
 
 #define CNTHCTL_PL1PCTEN       (1 << 0)
@@ -515,7 +511,6 @@ ARM_BE8(rev r6, r6  )
  * Clobbers r2-r5
  */
 .macro save_timer_state
-#ifdef CONFIG_KVM_ARM_TIMER
        ldr     r4, [vcpu, #VCPU_KVM]
        ldr     r2, [r4, #KVM_TIMER_ENABLED]
        cmp     r2, #0
@@ -537,7 +532,6 @@ ARM_BE8(rev r6, r6  )
        mcrr    p15, 4, r2, r2, c14     @ CNTVOFF
 
 1:
-#endif
        @ Allow physical timer/counter access for the host
        mrc     p15, 4, r2, c14, c1, 0  @ CNTHCTL
        orr     r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
@@ -559,7 +553,6 @@ ARM_BE8(rev r6, r6  )
        bic     r2, r2, #CNTHCTL_PL1PCEN
        mcr     p15, 4, r2, c14, c1, 0  @ CNTHCTL
 
-#ifdef CONFIG_KVM_ARM_TIMER
        ldr     r4, [vcpu, #VCPU_KVM]
        ldr     r2, [r4, #KVM_TIMER_ENABLED]
        cmp     r2, #0
@@ -579,7 +572,6 @@ ARM_BE8(rev r6, r6  )
        and     r2, r2, #3
        mcr     p15, 0, r2, c14, c3, 1  @ CNTV_CTL
 1:
-#endif
 .endm
 
 .equ vmentry,  0
index 5d3bfc0..974b1c6 100644 (file)
@@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
        return 0;
 }
 
-static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
-                     struct kvm_exit_mmio *mmio)
+static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
 {
        unsigned long rt;
-       int len;
-       bool is_write, sign_extend;
+       int access_size;
+       bool sign_extend;
 
        if (kvm_vcpu_dabt_isextabt(vcpu)) {
                /* cache operation on I/O addr, tell guest unsupported */
@@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
                return 1;
        }
 
-       len = kvm_vcpu_dabt_get_as(vcpu);
-       if (unlikely(len < 0))
-               return len;
+       access_size = kvm_vcpu_dabt_get_as(vcpu);
+       if (unlikely(access_size < 0))
+               return access_size;
 
-       is_write = kvm_vcpu_dabt_iswrite(vcpu);
+       *is_write = kvm_vcpu_dabt_iswrite(vcpu);
        sign_extend = kvm_vcpu_dabt_issext(vcpu);
        rt = kvm_vcpu_dabt_get_rd(vcpu);
 
-       mmio->is_write = is_write;
-       mmio->phys_addr = fault_ipa;
-       mmio->len = len;
+       *len = access_size;
        vcpu->arch.mmio_decode.sign_extend = sign_extend;
        vcpu->arch.mmio_decode.rt = rt;
 
@@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                 phys_addr_t fault_ipa)
 {
-       struct kvm_exit_mmio mmio;
        unsigned long data;
        unsigned long rt;
        int ret;
+       bool is_write;
+       int len;
+       u8 data_buf[8];
 
        /*
-        * Prepare MMIO operation. First stash it in a private
-        * structure that we can use for in-kernel emulation. If the
-        * kernel can't handle it, copy it into run->mmio and let user
-        * space do its magic.
+        * Prepare MMIO operation. First decode the syndrome data we get
+        * from the CPU. Then try if some in-kernel emulation feels
+        * responsible, otherwise let user space do its magic.
         */
-
        if (kvm_vcpu_dabt_isvalid(vcpu)) {
-               ret = decode_hsr(vcpu, fault_ipa, &mmio);
+               ret = decode_hsr(vcpu, &is_write, &len);
                if (ret)
                        return ret;
        } else {
@@ -188,21 +185,34 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
        rt = vcpu->arch.mmio_decode.rt;
 
-       if (mmio.is_write) {
-               data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
-                                              mmio.len);
+       if (is_write) {
+               data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
+
+               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+               mmio_write_buf(data_buf, len, data);
 
-               trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
-                              fault_ipa, data);
-               mmio_write_buf(mmio.data, mmio.len, data);
+               ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+                                      data_buf);
        } else {
-               trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+               trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
                               fault_ipa, 0);
+
+               ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+                                     data_buf);
        }
 
-       if (vgic_handle_mmio(vcpu, run, &mmio))
+       /* Now prepare kvm_run for the potential return to userland. */
+       run->mmio.is_write      = is_write;
+       run->mmio.phys_addr     = fault_ipa;
+       run->mmio.len           = len;
+       memcpy(run->mmio.data, data_buf, len);
+
+       if (!ret) {
+               /* We handled the access successfully in the kernel. */
+               kvm_handle_mmio_return(vcpu, run);
                return 1;
+       }
 
-       kvm_prepare_mmio(run, &mmio);
+       run->exit_reason        = KVM_EXIT_MMIO;
        return 0;
 }
index 3e6859b..15b050d 100644 (file)
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
        phys_addr_t addr = start, end = start + size;
        phys_addr_t next;
 
-       pgd = pgdp + pgd_index(addr);
+       pgd = pgdp + kvm_pgd_index(addr);
        do {
                next = kvm_pgd_addr_end(addr, end);
                if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
        phys_addr_t next;
        pgd_t *pgd;
 
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
        do {
                next = kvm_pgd_addr_end(addr, end);
                stage2_flush_puds(kvm, pgd, addr, next);
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
                                     __phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
 }
 
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+       free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+       unsigned int size = kvm_get_hwpgd_size();
+
+       return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
 /**
  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
  * @kvm:       The KVM struct pointer for the VM.
@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
  */
 int kvm_alloc_stage2_pgd(struct kvm *kvm)
 {
-       int ret;
        pgd_t *pgd;
+       void *hwpgd;
 
        if (kvm->arch.pgd != NULL) {
                kvm_err("kvm_arch already initialized?\n");
                return -EINVAL;
        }
 
+       hwpgd = kvm_alloc_hwpgd();
+       if (!hwpgd)
+               return -ENOMEM;
+
+       /* When the kernel uses more levels of page tables than the
+        * guest, we allocate a fake PGD and pre-populate it to point
+        * to the next-level page table, which will be the real
+        * initial page table pointed to by the VTTBR.
+        *
+        * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+        * the PMD and the kernel will use folded pud.
+        * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+        * pages.
+        */
        if (KVM_PREALLOC_LEVEL > 0) {
+               int i;
+
                /*
                 * Allocate fake pgd for the page table manipulation macros to
                 * work.  This is not used by the hardware and we have no
@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
                 */
                pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
                                       GFP_KERNEL | __GFP_ZERO);
+
+               if (!pgd) {
+                       kvm_free_hwpgd(hwpgd);
+                       return -ENOMEM;
+               }
+
+               /* Plug the HW PGD into the fake one. */
+               for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+                       if (KVM_PREALLOC_LEVEL == 1)
+                               pgd_populate(NULL, pgd + i,
+                                            (pud_t *)hwpgd + i * PTRS_PER_PUD);
+                       else if (KVM_PREALLOC_LEVEL == 2)
+                               pud_populate(NULL, pud_offset(pgd, 0) + i,
+                                            (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+               }
        } else {
                /*
                 * Allocate actual first-level Stage-2 page table used by the
                 * hardware for Stage-2 page table walks.
                 */
-               pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+               pgd = (pgd_t *)hwpgd;
        }
 
-       if (!pgd)
-               return -ENOMEM;
-
-       ret = kvm_prealloc_hwpgd(kvm, pgd);
-       if (ret)
-               goto out_err;
-
        kvm_clean_pgd(pgd);
        kvm->arch.pgd = pgd;
        return 0;
-out_err:
-       if (KVM_PREALLOC_LEVEL > 0)
-               kfree(pgd);
-       else
-               free_pages((unsigned long)pgd, S2_PGD_ORDER);
-       return ret;
 }
 
 /**
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
                return;
 
        unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
-       kvm_free_hwpgd(kvm);
+       kvm_free_hwpgd(kvm_get_hwpgd(kvm));
        if (KVM_PREALLOC_LEVEL > 0)
                kfree(kvm->arch.pgd);
-       else
-               free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
        kvm->arch.pgd = NULL;
 }
 
@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
        pgd_t *pgd;
        pud_t *pud;
 
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
        if (WARN_ON(pgd_none(*pgd))) {
                if (!cache)
                        return NULL;
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
        pgd_t *pgd;
        phys_addr_t next;
 
-       pgd = kvm->arch.pgd + pgd_index(addr);
+       pgd = kvm->arch.pgd + kvm_pgd_index(addr);
        do {
                /*
                 * Release kvm_mmu_lock periodically if the memory region is
@@ -1299,10 +1330,51 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 out_unlock:
        spin_unlock(&kvm->mmu_lock);
+       kvm_set_pfn_accessed(pfn);
        kvm_release_pfn_clean(pfn);
        return ret;
 }
 
+/*
+ * Resolve the access fault by making the page young again.
+ * Note that because the faulting entry is guaranteed not to be
+ * cached in the TLB, we don't need to invalidate anything.
+ */
+static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+       pmd_t *pmd;
+       pte_t *pte;
+       pfn_t pfn;
+       bool pfn_valid = false;
+
+       trace_kvm_access_fault(fault_ipa);
+
+       spin_lock(&vcpu->kvm->mmu_lock);
+
+       pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+       if (!pmd || pmd_none(*pmd))     /* Nothing there */
+               goto out;
+
+       if (kvm_pmd_huge(*pmd)) {       /* THP, HugeTLB */
+               *pmd = pmd_mkyoung(*pmd);
+               pfn = pmd_pfn(*pmd);
+               pfn_valid = true;
+               goto out;
+       }
+
+       pte = pte_offset_kernel(pmd, fault_ipa);
+       if (pte_none(*pte))             /* Nothing there either */
+               goto out;
+
+       *pte = pte_mkyoung(*pte);       /* Just a page... */
+       pfn = pte_pfn(*pte);
+       pfn_valid = true;
+out:
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       if (pfn_valid)
+               kvm_set_pfn_accessed(pfn);
+}
+
 /**
  * kvm_handle_guest_abort - handles all 2nd stage aborts
  * @vcpu:      the VCPU pointer
@@ -1333,7 +1405,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
        /* Check the stage-2 fault is trans. fault or write fault */
        fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
-       if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
+       if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
+           fault_status != FSC_ACCESS) {
                kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
                        kvm_vcpu_trap_get_class(vcpu),
                        (unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1369,6 +1442,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
        /* Userspace should not be able to register out-of-bounds IPAs */
        VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
 
+       if (fault_status == FSC_ACCESS) {
+               handle_access_fault(vcpu, fault_ipa);
+               ret = 1;
+               goto out_unlock;
+       }
+
        ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
        if (ret == 0)
                ret = 1;
@@ -1377,15 +1456,16 @@ out_unlock:
        return ret;
 }
 
-static void handle_hva_to_gpa(struct kvm *kvm,
-                             unsigned long start,
-                             unsigned long end,
-                             void (*handler)(struct kvm *kvm,
-                                             gpa_t gpa, void *data),
-                             void *data)
+static int handle_hva_to_gpa(struct kvm *kvm,
+                            unsigned long start,
+                            unsigned long end,
+                            int (*handler)(struct kvm *kvm,
+                                           gpa_t gpa, void *data),
+                            void *data)
 {
        struct kvm_memslots *slots;
        struct kvm_memory_slot *memslot;
+       int ret = 0;
 
        slots = kvm_memslots(kvm);
 
@@ -1409,14 +1489,17 @@ static void handle_hva_to_gpa(struct kvm *kvm,
 
                for (; gfn < gfn_end; ++gfn) {
                        gpa_t gpa = gfn << PAGE_SHIFT;
-                       handler(kvm, gpa, data);
+                       ret |= handler(kvm, gpa, data);
                }
        }
+
+       return ret;
 }
 
-static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
        unmap_stage2_range(kvm, gpa, PAGE_SIZE);
+       return 0;
 }
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -1442,7 +1525,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
        return 0;
 }
 
-static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
 {
        pte_t *pte = (pte_t *)data;
 
@@ -1454,6 +1537,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
         * through this calling path.
         */
        stage2_set_pte(kvm, NULL, gpa, pte, 0);
+       return 0;
 }
 
 
@@ -1470,6 +1554,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
        handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 }
 
+static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+       pmd_t *pmd;
+       pte_t *pte;
+
+       pmd = stage2_get_pmd(kvm, NULL, gpa);
+       if (!pmd || pmd_none(*pmd))     /* Nothing there */
+               return 0;
+
+       if (kvm_pmd_huge(*pmd)) {       /* THP, HugeTLB */
+               if (pmd_young(*pmd)) {
+                       *pmd = pmd_mkold(*pmd);
+                       return 1;
+               }
+
+               return 0;
+       }
+
+       pte = pte_offset_kernel(pmd, gpa);
+       if (pte_none(*pte))
+               return 0;
+
+       if (pte_young(*pte)) {
+               *pte = pte_mkold(*pte); /* Just a page... */
+               return 1;
+       }
+
+       return 0;
+}
+
+static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+       pmd_t *pmd;
+       pte_t *pte;
+
+       pmd = stage2_get_pmd(kvm, NULL, gpa);
+       if (!pmd || pmd_none(*pmd))     /* Nothing there */
+               return 0;
+
+       if (kvm_pmd_huge(*pmd))         /* THP, HugeTLB */
+               return pmd_young(*pmd);
+
+       pte = pte_offset_kernel(pmd, gpa);
+       if (!pte_none(*pte))            /* Just a page... */
+               return pte_young(*pte);
+
+       return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+       trace_kvm_age_hva(start, end);
+       return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+       trace_kvm_test_age_hva(hva);
+       return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
+}
+
 void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
        mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
index 6817664..0ec3539 100644 (file)
@@ -68,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault,
                  __entry->hxfar, __entry->vcpu_pc)
 );
 
+TRACE_EVENT(kvm_access_fault,
+       TP_PROTO(unsigned long ipa),
+       TP_ARGS(ipa),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  ipa             )
+       ),
+
+       TP_fast_assign(
+               __entry->ipa            = ipa;
+       ),
+
+       TP_printk("IPA: %lx", __entry->ipa)
+);
+
 TRACE_EVENT(kvm_irq_line,
        TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
        TP_ARGS(type, vcpu_idx, irq_num, level),
@@ -210,6 +225,39 @@ TRACE_EVENT(kvm_set_spte_hva,
        TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
 );
 
+TRACE_EVENT(kvm_age_hva,
+       TP_PROTO(unsigned long start, unsigned long end),
+       TP_ARGS(start, end),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  start           )
+               __field(        unsigned long,  end             )
+       ),
+
+       TP_fast_assign(
+               __entry->start          = start;
+               __entry->end            = end;
+       ),
+
+       TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+                 __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_test_age_hva,
+       TP_PROTO(unsigned long hva),
+       TP_ARGS(hva),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  hva             )
+       ),
+
+       TP_fast_assign(
+               __entry->hva            = hva;
+       ),
+
+       TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+);
+
 TRACE_EVENT(kvm_hvc,
        TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
        TP_ARGS(vcpu_pc, r0, imm),
index 5e34fb1..aa4116e 100644 (file)
@@ -270,37 +270,35 @@ static void __init at91_pm_sram_init(void)
        phys_addr_t sram_pbase;
        unsigned long sram_base;
        struct device_node *node;
-       struct platform_device *pdev;
+       struct platform_device *pdev = NULL;
 
-       node = of_find_compatible_node(NULL, NULL, "mmio-sram");
-       if (!node) {
-               pr_warn("%s: failed to find sram node!\n", __func__);
-               return;
+       for_each_compatible_node(node, NULL, "mmio-sram") {
+               pdev = of_find_device_by_node(node);
+               if (pdev) {
+                       of_node_put(node);
+                       break;
+               }
        }
 
-       pdev = of_find_device_by_node(node);
        if (!pdev) {
                pr_warn("%s: failed to find sram device!\n", __func__);
-               goto put_node;
+               return;
        }
 
        sram_pool = dev_get_gen_pool(&pdev->dev);
        if (!sram_pool) {
                pr_warn("%s: sram pool unavailable!\n", __func__);
-               goto put_node;
+               return;
        }
 
        sram_base = gen_pool_alloc(sram_pool, at91_slow_clock_sz);
        if (!sram_base) {
                pr_warn("%s: unable to alloc ocram!\n", __func__);
-               goto put_node;
+               return;
        }
 
        sram_pbase = gen_pool_virt_to_phys(sram_pool, sram_base);
        slow_clock = __arm_ioremap_exec(sram_pbase, at91_slow_clock_sz, false);
-
-put_node:
-       of_node_put(node);
 }
 #endif
 
index d2c8996..86c0aa8 100644 (file)
@@ -44,7 +44,7 @@ static inline void at91rm9200_standby(void)
                "    mcr    p15, 0, %0, c7, c0, 4\n\t"
                "    str    %5, [%1, %2]"
                :
-               : "r" (0), "r" (AT91_BASE_SYS), "r" (AT91RM9200_SDRAMC_LPR),
+               : "r" (0), "r" (at91_ramc_base[0]), "r" (AT91RM9200_SDRAMC_LPR),
                  "r" (1), "r" (AT91RM9200_SDRAMC_SRR),
                  "r" (lpr));
 }
index 556151e..931f0e3 100644 (file)
  */
 #undef SLOWDOWN_MASTER_CLOCK
 
-#define MCKRDY_TIMEOUT         1000
-#define MOSCRDY_TIMEOUT        1000
-#define PLLALOCK_TIMEOUT       1000
-#define PLLBLOCK_TIMEOUT       1000
-
 pmc    .req    r0
 sdramc .req    r1
 ramc1  .req    r2
@@ -41,60 +36,42 @@ tmp2        .req    r5
  * Wait until master clock is ready (after switching master clock source)
  */
        .macro wait_mckrdy
-       mov     tmp2, #MCKRDY_TIMEOUT
-1:     sub     tmp2, tmp2, #1
-       cmp     tmp2, #0
-       beq     2f
-       ldr     tmp1, [pmc, #AT91_PMC_SR]
+1:     ldr     tmp1, [pmc, #AT91_PMC_SR]
        tst     tmp1, #AT91_PMC_MCKRDY
        beq     1b
-2:
        .endm
 
 /*
  * Wait until master oscillator has stabilized.
  */
        .macro wait_moscrdy
-       mov     tmp2, #MOSCRDY_TIMEOUT
-1:     sub     tmp2, tmp2, #1
-       cmp     tmp2, #0
-       beq     2f
-       ldr     tmp1, [pmc, #AT91_PMC_SR]
+1:     ldr     tmp1, [pmc, #AT91_PMC_SR]
        tst     tmp1, #AT91_PMC_MOSCS
        beq     1b
-2:
        .endm
 
 /*
  * Wait until PLLA has locked.
  */
        .macro wait_pllalock
-       mov     tmp2, #PLLALOCK_TIMEOUT
-1:     sub     tmp2, tmp2, #1
-       cmp     tmp2, #0
-       beq     2f
-       ldr     tmp1, [pmc, #AT91_PMC_SR]
+1:     ldr     tmp1, [pmc, #AT91_PMC_SR]
        tst     tmp1, #AT91_PMC_LOCKA
        beq     1b
-2:
        .endm
 
 /*
  * Wait until PLLB has locked.
  */
        .macro wait_pllblock
-       mov     tmp2, #PLLBLOCK_TIMEOUT
-1:     sub     tmp2, tmp2, #1
-       cmp     tmp2, #0
-       beq     2f
-       ldr     tmp1, [pmc, #AT91_PMC_SR]
+1:     ldr     tmp1, [pmc, #AT91_PMC_SR]
        tst     tmp1, #AT91_PMC_LOCKB
        beq     1b
-2:
        .endm
 
        .text
 
+       .arm
+
 /* void at91_slow_clock(void __iomem *pmc, void __iomem *sdramc,
  *                     void __iomem *ramc1, int memctrl)
  */
@@ -134,6 +111,16 @@ ddr_sr_enable:
        cmp     memctrl, #AT91_MEMCTRL_DDRSDR
        bne     sdr_sr_enable
 
+       /* LPDDR1 --> force DDR2 mode during self-refresh */
+       ldr     tmp1, [sdramc, #AT91_DDRSDRC_MDR]
+       str     tmp1, .saved_sam9_mdr
+       bic     tmp1, tmp1, #~AT91_DDRSDRC_MD
+       cmp     tmp1, #AT91_DDRSDRC_MD_LOW_POWER_DDR
+       ldreq   tmp1, [sdramc, #AT91_DDRSDRC_MDR]
+       biceq   tmp1, tmp1, #AT91_DDRSDRC_MD
+       orreq   tmp1, tmp1, #AT91_DDRSDRC_MD_DDR2
+       streq   tmp1, [sdramc, #AT91_DDRSDRC_MDR]
+
        /* prepare for DDRAM self-refresh mode */
        ldr     tmp1, [sdramc, #AT91_DDRSDRC_LPR]
        str     tmp1, .saved_sam9_lpr
@@ -142,14 +129,26 @@ ddr_sr_enable:
 
        /* figure out if we use the second ram controller */
        cmp     ramc1, #0
-       ldrne   tmp2, [ramc1, #AT91_DDRSDRC_LPR]
-       strne   tmp2, .saved_sam9_lpr1
-       bicne   tmp2, #AT91_DDRSDRC_LPCB
-       orrne   tmp2, #AT91_DDRSDRC_LPCB_SELF_REFRESH
+       beq     ddr_no_2nd_ctrl
+
+       ldr     tmp2, [ramc1, #AT91_DDRSDRC_MDR]
+       str     tmp2, .saved_sam9_mdr1
+       bic     tmp2, tmp2, #~AT91_DDRSDRC_MD
+       cmp     tmp2, #AT91_DDRSDRC_MD_LOW_POWER_DDR
+       ldreq   tmp2, [ramc1, #AT91_DDRSDRC_MDR]
+       biceq   tmp2, tmp2, #AT91_DDRSDRC_MD
+       orreq   tmp2, tmp2, #AT91_DDRSDRC_MD_DDR2
+       streq   tmp2, [ramc1, #AT91_DDRSDRC_MDR]
+
+       ldr     tmp2, [ramc1, #AT91_DDRSDRC_LPR]
+       str     tmp2, .saved_sam9_lpr1
+       bic     tmp2, #AT91_DDRSDRC_LPCB
+       orr     tmp2, #AT91_DDRSDRC_LPCB_SELF_REFRESH
 
        /* Enable DDRAM self-refresh mode */
+       str     tmp2, [ramc1, #AT91_DDRSDRC_LPR]
+ddr_no_2nd_ctrl:
        str     tmp1, [sdramc, #AT91_DDRSDRC_LPR]
-       strne   tmp2, [ramc1, #AT91_DDRSDRC_LPR]
 
        b       sdr_sr_done
 
@@ -208,6 +207,7 @@ sdr_sr_done:
        /* Turn off the main oscillator */
        ldr     tmp1, [pmc, #AT91_CKGR_MOR]
        bic     tmp1, tmp1, #AT91_PMC_MOSCEN
+       orr     tmp1, tmp1, #AT91_PMC_KEY
        str     tmp1, [pmc, #AT91_CKGR_MOR]
 
        /* Wait for interrupt */
@@ -216,6 +216,7 @@ sdr_sr_done:
        /* Turn on the main oscillator */
        ldr     tmp1, [pmc, #AT91_CKGR_MOR]
        orr     tmp1, tmp1, #AT91_PMC_MOSCEN
+       orr     tmp1, tmp1, #AT91_PMC_KEY
        str     tmp1, [pmc, #AT91_CKGR_MOR]
 
        wait_moscrdy
@@ -280,12 +281,17 @@ sdr_sr_done:
         */
        cmp     memctrl, #AT91_MEMCTRL_DDRSDR
        bne     sdr_en_restore
+       /* Restore MDR in case of LPDDR1 */
+       ldr     tmp1, .saved_sam9_mdr
+       str     tmp1, [sdramc, #AT91_DDRSDRC_MDR]
        /* Restore LPR on AT91 with DDRAM */
        ldr     tmp1, .saved_sam9_lpr
        str     tmp1, [sdramc, #AT91_DDRSDRC_LPR]
 
        /* if we use the second ram controller */
        cmp     ramc1, #0
+       ldrne   tmp2, .saved_sam9_mdr1
+       strne   tmp2, [ramc1, #AT91_DDRSDRC_MDR]
        ldrne   tmp2, .saved_sam9_lpr1
        strne   tmp2, [ramc1, #AT91_DDRSDRC_LPR]
 
@@ -319,5 +325,11 @@ ram_restored:
 .saved_sam9_lpr1:
        .word 0
 
+.saved_sam9_mdr:
+       .word 0
+
+.saved_sam9_mdr1:
+       .word 0
+
 ENTRY(at91_slow_clock_sz)
        .word .-at91_slow_clock
index 3f32c47..d2e9f12 100644 (file)
@@ -126,8 +126,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
  */
 void exynos_cpu_power_down(int cpu)
 {
-       if (cpu == 0 && (of_machine_is_compatible("samsung,exynos5420") ||
-               of_machine_is_compatible("samsung,exynos5800"))) {
+       if (cpu == 0 && (soc_is_exynos5420() || soc_is_exynos5800())) {
                /*
                 * Bypass power down for CPU0 during suspend. Check for
                 * the SYS_PWR_REG value to decide if we are suspending
index 20f2671..37266a8 100644 (file)
@@ -161,6 +161,34 @@ no_clk:
                of_genpd_add_provider_simple(np, &pd->pd);
        }
 
+       /* Assign the child power domains to their parents */
+       for_each_compatible_node(np, NULL, "samsung,exynos4210-pd") {
+               struct generic_pm_domain *child_domain, *parent_domain;
+               struct of_phandle_args args;
+
+               args.np = np;
+               args.args_count = 0;
+               child_domain = of_genpd_get_from_provider(&args);
+               if (!child_domain)
+                       continue;
+
+               if (of_parse_phandle_with_args(np, "power-domains",
+                                        "#power-domain-cells", 0, &args) != 0)
+                       continue;
+
+               parent_domain = of_genpd_get_from_provider(&args);
+               if (!parent_domain)
+                       continue;
+
+               if (pm_genpd_add_subdomain(parent_domain, child_domain))
+                       pr_warn("%s failed to add subdomain: %s\n",
+                               parent_domain->name, child_domain->name);
+               else
+                       pr_info("%s has as child subdomain: %s.\n",
+                               parent_domain->name, child_domain->name);
+               of_node_put(np);
+       }
+
        return 0;
 }
 arch_initcall(exynos4_pm_init_power_domain);
index 52e2b1a..318d127 100644 (file)
@@ -87,8 +87,8 @@ static unsigned int exynos_pmu_spare3;
 static u32 exynos_irqwake_intmask = 0xffffffff;
 
 static const struct exynos_wkup_irq exynos3250_wkup_irq[] = {
-       { 73, BIT(1) }, /* RTC alarm */
-       { 74, BIT(2) }, /* RTC tick */
+       { 105, BIT(1) }, /* RTC alarm */
+       { 106, BIT(2) }, /* RTC tick */
        { /* sentinel */ },
 };
 
index 4ad6e47..9de3412 100644 (file)
@@ -211,8 +211,9 @@ static void __init imx6q_1588_init(void)
         * set bit IOMUXC_GPR1[21].  Or the PTP clock must be from pad
         * (external OSC), and we need to clear the bit.
         */
-       clksel = ptp_clk == enet_ref ? IMX6Q_GPR1_ENET_CLK_SEL_ANATOP :
-                                      IMX6Q_GPR1_ENET_CLK_SEL_PAD;
+       clksel = clk_is_match(ptp_clk, enet_ref) ?
+                               IMX6Q_GPR1_ENET_CLK_SEL_ANATOP :
+                               IMX6Q_GPR1_ENET_CLK_SEL_PAD;
        gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
        if (!IS_ERR(gpr))
                regmap_update_bits(gpr, IOMUXC_GPR1,
index 01e398a..57d4298 100644 (file)
@@ -14,7 +14,7 @@
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/export.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include <asm/cpuidle.h>
 #include <asm/proc-fns.h>
@@ -84,7 +84,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
 {
        struct idle_statedata *cx = state_ptr + index;
        u32 mpuss_can_lose_context = 0;
-       int cpu_id = smp_processor_id();
 
        /*
         * CPU0 has to wait and stay ON until CPU1 is OFF state.
@@ -112,7 +111,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
        mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) &&
                                 (cx->mpu_logic_state == PWRDM_POWER_OFF);
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id);
+       tick_broadcast_enter();
 
        /*
         * Call idle CPU PM enter notifier chain so that
@@ -169,7 +168,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
        if (dev->cpu == 0 && mpuss_can_lose_context)
                cpu_cluster_pm_exit();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id);
+       tick_broadcast_exit();
 
 fail:
        cpuidle_coupled_parallel_barrier(dev, &abort_barrier);
@@ -184,8 +183,7 @@ fail:
  */
 static void omap_setup_broadcast_timer(void *arg)
 {
-       int cpu = smp_processor_id();
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu);
+       tick_broadcast_enable();
 }
 
 static struct cpuidle_driver omap4_idle_driver = {
index 2a2f4d5..25f1bee 100644 (file)
@@ -720,6 +720,8 @@ static const char * __init omap_get_family(void)
                return kasprintf(GFP_KERNEL, "OMAP4");
        else if (soc_is_omap54xx())
                return kasprintf(GFP_KERNEL, "OMAP5");
+       else if (soc_is_am33xx() || soc_is_am335x())
+               return kasprintf(GFP_KERNEL, "AM33xx");
        else if (soc_is_am43xx())
                return kasprintf(GFP_KERNEL, "AM43xx");
        else if (soc_is_dra7xx())
index 92afb72..355b089 100644 (file)
@@ -1692,16 +1692,15 @@ static int _deassert_hardreset(struct omap_hwmod *oh, const char *name)
        if (ret == -EBUSY)
                pr_warn("omap_hwmod: %s: failed to hardreset\n", oh->name);
 
-       if (!ret) {
+       if (oh->clkdm) {
                /*
                 * Set the clockdomain to HW_AUTO, assuming that the
                 * previous state was HW_AUTO.
                 */
-               if (oh->clkdm && hwsup)
+               if (hwsup)
                        clkdm_allow_idle(oh->clkdm);
-       } else {
-               if (oh->clkdm)
-                       clkdm_hwmod_disable(oh->clkdm, oh);
+
+               clkdm_hwmod_disable(oh->clkdm, oh);
        }
 
        return ret;
@@ -2698,6 +2697,7 @@ static int __init _register(struct omap_hwmod *oh)
        INIT_LIST_HEAD(&oh->master_ports);
        INIT_LIST_HEAD(&oh->slave_ports);
        spin_lock_init(&oh->_lock);
+       lockdep_set_class(&oh->_lock, &oh->hwmod_key);
 
        oh->_state = _HWMOD_STATE_REGISTERED;
 
index 9d4bec6..9611c91 100644 (file)
@@ -674,6 +674,7 @@ struct omap_hwmod {
        u32                             _sysc_cache;
        void __iomem                    *_mpu_rt_va;
        spinlock_t                      _lock;
+       struct lock_class_key           hwmod_key; /* unique lock class */
        struct list_head                node;
        struct omap_hwmod_ocp_if        *_mpu_port;
        unsigned int                    (*xlate_irq)(unsigned int);
index e8692e7..16fe7a1 100644 (file)
@@ -1466,53 +1466,16 @@ static struct omap_hwmod dra7xx_ocp2scp3_hwmod = {
  *
  */
 
-static struct omap_hwmod_class dra7xx_pcie_hwmod_class = {
+static struct omap_hwmod_class dra7xx_pciess_hwmod_class = {
        .name   = "pcie",
 };
 
 /* pcie1 */
-static struct omap_hwmod dra7xx_pcie1_hwmod = {
+static struct omap_hwmod dra7xx_pciess1_hwmod = {
        .name           = "pcie1",
-       .class          = &dra7xx_pcie_hwmod_class,
+       .class          = &dra7xx_pciess_hwmod_class,
        .clkdm_name     = "pcie_clkdm",
        .main_clk       = "l4_root_clk_div",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs   = DRA7XX_CM_PCIE_CLKSTCTRL_OFFSET,
-                       .modulemode     = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/* pcie2 */
-static struct omap_hwmod dra7xx_pcie2_hwmod = {
-       .name           = "pcie2",
-       .class          = &dra7xx_pcie_hwmod_class,
-       .clkdm_name     = "pcie_clkdm",
-       .main_clk       = "l4_root_clk_div",
-       .prcm = {
-               .omap4 = {
-                       .clkctrl_offs = DRA7XX_CM_PCIE_CLKSTCTRL_OFFSET,
-                       .modulemode   = MODULEMODE_SWCTRL,
-               },
-       },
-};
-
-/*
- * 'PCIE PHY' class
- *
- */
-
-static struct omap_hwmod_class dra7xx_pcie_phy_hwmod_class = {
-       .name   = "pcie-phy",
-};
-
-/* pcie1 phy */
-static struct omap_hwmod dra7xx_pcie1_phy_hwmod = {
-       .name           = "pcie1-phy",
-       .class          = &dra7xx_pcie_phy_hwmod_class,
-       .clkdm_name     = "l3init_clkdm",
-       .main_clk       = "l4_root_clk_div",
        .prcm = {
                .omap4 = {
                        .clkctrl_offs = DRA7XX_CM_L3INIT_PCIESS1_CLKCTRL_OFFSET,
@@ -1522,11 +1485,11 @@ static struct omap_hwmod dra7xx_pcie1_phy_hwmod = {
        },
 };
 
-/* pcie2 phy */
-static struct omap_hwmod dra7xx_pcie2_phy_hwmod = {
-       .name           = "pcie2-phy",
-       .class          = &dra7xx_pcie_phy_hwmod_class,
-       .clkdm_name     = "l3init_clkdm",
+/* pcie2 */
+static struct omap_hwmod dra7xx_pciess2_hwmod = {
+       .name           = "pcie2",
+       .class          = &dra7xx_pciess_hwmod_class,
+       .clkdm_name     = "pcie_clkdm",
        .main_clk       = "l4_root_clk_div",
        .prcm = {
                .omap4 = {
@@ -2877,50 +2840,34 @@ static struct omap_hwmod_ocp_if dra7xx_l4_cfg__ocp2scp3 = {
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l3_main_1 -> pcie1 */
-static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pcie1 = {
+/* l3_main_1 -> pciess1 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pciess1 = {
        .master         = &dra7xx_l3_main_1_hwmod,
-       .slave          = &dra7xx_pcie1_hwmod,
+       .slave          = &dra7xx_pciess1_hwmod,
        .clk            = "l3_iclk_div",
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_cfg -> pcie1 */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie1 = {
+/* l4_cfg -> pciess1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pciess1 = {
        .master         = &dra7xx_l4_cfg_hwmod,
-       .slave          = &dra7xx_pcie1_hwmod,
+       .slave          = &dra7xx_pciess1_hwmod,
        .clk            = "l4_root_clk_div",
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l3_main_1 -> pcie2 */
-static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pcie2 = {
+/* l3_main_1 -> pciess2 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pciess2 = {
        .master         = &dra7xx_l3_main_1_hwmod,
-       .slave          = &dra7xx_pcie2_hwmod,
+       .slave          = &dra7xx_pciess2_hwmod,
        .clk            = "l3_iclk_div",
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
 
-/* l4_cfg -> pcie2 */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie2 = {
-       .master         = &dra7xx_l4_cfg_hwmod,
-       .slave          = &dra7xx_pcie2_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_cfg -> pcie1 phy */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie1_phy = {
-       .master         = &dra7xx_l4_cfg_hwmod,
-       .slave          = &dra7xx_pcie1_phy_hwmod,
-       .clk            = "l4_root_clk_div",
-       .user           = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_cfg -> pcie2 phy */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie2_phy = {
+/* l4_cfg -> pciess2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pciess2 = {
        .master         = &dra7xx_l4_cfg_hwmod,
-       .slave          = &dra7xx_pcie2_phy_hwmod,
+       .slave          = &dra7xx_pciess2_hwmod,
        .clk            = "l4_root_clk_div",
        .user           = OCP_USER_MPU | OCP_USER_SDMA,
 };
@@ -3327,12 +3274,10 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
        &dra7xx_l4_cfg__mpu,
        &dra7xx_l4_cfg__ocp2scp1,
        &dra7xx_l4_cfg__ocp2scp3,
-       &dra7xx_l3_main_1__pcie1,
-       &dra7xx_l4_cfg__pcie1,
-       &dra7xx_l3_main_1__pcie2,
-       &dra7xx_l4_cfg__pcie2,
-       &dra7xx_l4_cfg__pcie1_phy,
-       &dra7xx_l4_cfg__pcie2_phy,
+       &dra7xx_l3_main_1__pciess1,
+       &dra7xx_l4_cfg__pciess1,
+       &dra7xx_l3_main_1__pciess2,
+       &dra7xx_l4_cfg__pciess2,
        &dra7xx_l3_main_1__qspi,
        &dra7xx_l4_per3__rtcss,
        &dra7xx_l4_cfg__sata,
index 190fa43..e642b07 100644 (file)
@@ -173,6 +173,7 @@ static void __init omap3_igep0030_rev_g_legacy_init(void)
 
 static void __init omap3_evm_legacy_init(void)
 {
+       hsmmc2_internal_input_clk();
        legacy_init_wl12xx(WL12XX_REFCLOCK_38, 0, 149);
 }
 
index a08a617..d6d6bc3 100644 (file)
@@ -252,10 +252,10 @@ static void omap44xx_prm_save_and_clear_irqen(u32 *saved_mask)
 {
        saved_mask[0] =
                omap4_prm_read_inst_reg(OMAP4430_PRM_OCP_SOCKET_INST,
-                                       OMAP4_PRM_IRQSTATUS_MPU_OFFSET);
+                                       OMAP4_PRM_IRQENABLE_MPU_OFFSET);
        saved_mask[1] =
                omap4_prm_read_inst_reg(OMAP4430_PRM_OCP_SOCKET_INST,
-                                       OMAP4_PRM_IRQSTATUS_MPU_2_OFFSET);
+                                       OMAP4_PRM_IRQENABLE_MPU_2_OFFSET);
 
        omap4_prm_write_inst_reg(0, OMAP4430_PRM_OCP_SOCKET_INST,
                                 OMAP4_PRM_IRQENABLE_MPU_OFFSET);
index 7d8eab8..f6d02e4 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/platform_data/video-pxafb.h>
 #include <mach/bitfield.h>
 #include <linux/platform_data/mmc-pxamci.h>
+#include <linux/smc91x.h>
 
 #include "generic.h"
 #include "devices.h"
index 0eecd83..89a7c06 100644 (file)
@@ -11,6 +11,7 @@
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  */
+#include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
@@ -40,7 +41,6 @@
 #define ICHP_VAL_IRQ           (1 << 31)
 #define ICHP_IRQ(i)            (((i) >> 16) & 0x7fff)
 #define IPR_VALID              (1 << 31)
-#define IRQ_BIT(n)             (((n) - PXA_IRQ(0)) & 0x1f)
 
 #define MAX_INTERNAL_IRQS      128
 
@@ -51,6 +51,7 @@
 static void __iomem *pxa_irq_base;
 static int pxa_internal_irq_nr;
 static bool cpu_has_ipr;
+static struct irq_domain *pxa_irq_domain;
 
 static inline void __iomem *irq_base(int i)
 {
@@ -66,18 +67,20 @@ static inline void __iomem *irq_base(int i)
 void pxa_mask_irq(struct irq_data *d)
 {
        void __iomem *base = irq_data_get_irq_chip_data(d);
+       irq_hw_number_t irq = irqd_to_hwirq(d);
        uint32_t icmr = __raw_readl(base + ICMR);
 
-       icmr &= ~(1 << IRQ_BIT(d->irq));
+       icmr &= ~BIT(irq & 0x1f);
        __raw_writel(icmr, base + ICMR);
 }
 
 void pxa_unmask_irq(struct irq_data *d)
 {
        void __iomem *base = irq_data_get_irq_chip_data(d);
+       irq_hw_number_t irq = irqd_to_hwirq(d);
        uint32_t icmr = __raw_readl(base + ICMR);
 
-       icmr |= 1 << IRQ_BIT(d->irq);
+       icmr |= BIT(irq & 0x1f);
        __raw_writel(icmr, base + ICMR);
 }
 
@@ -118,40 +121,63 @@ asmlinkage void __exception_irq_entry ichp_handle_irq(struct pt_regs *regs)
        } while (1);
 }
 
-void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int))
+static int pxa_irq_map(struct irq_domain *h, unsigned int virq,
+                      irq_hw_number_t hw)
 {
-       int irq, i, n;
+       void __iomem *base = irq_base(hw / 32);
 
-       BUG_ON(irq_nr > MAX_INTERNAL_IRQS);
+       /* initialize interrupt priority */
+       if (cpu_has_ipr)
+               __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw));
+
+       irq_set_chip_and_handler(virq, &pxa_internal_irq_chip,
+                                handle_level_irq);
+       irq_set_chip_data(virq, base);
+       set_irq_flags(virq, IRQF_VALID);
+
+       return 0;
+}
+
+static struct irq_domain_ops pxa_irq_ops = {
+       .map    = pxa_irq_map,
+       .xlate  = irq_domain_xlate_onecell,
+};
+
+static __init void
+pxa_init_irq_common(struct device_node *node, int irq_nr,
+                   int (*fn)(struct irq_data *, unsigned int))
+{
+       int n;
 
        pxa_internal_irq_nr = irq_nr;
-       cpu_has_ipr = !cpu_is_pxa25x();
-       pxa_irq_base = io_p2v(0x40d00000);
+       pxa_irq_domain = irq_domain_add_legacy(node, irq_nr,
+                                              PXA_IRQ(0), 0,
+                                              &pxa_irq_ops, NULL);
+       if (!pxa_irq_domain)
+               panic("Unable to add PXA IRQ domain\n");
+       irq_set_default_host(pxa_irq_domain);
 
        for (n = 0; n < irq_nr; n += 32) {
                void __iomem *base = irq_base(n >> 5);
 
                __raw_writel(0, base + ICMR);   /* disable all IRQs */
                __raw_writel(0, base + ICLR);   /* all IRQs are IRQ, not FIQ */
-               for (i = n; (i < (n + 32)) && (i < irq_nr); i++) {
-                       /* initialize interrupt priority */
-                       if (cpu_has_ipr)
-                               __raw_writel(i | IPR_VALID, pxa_irq_base + IPR(i));
-
-                       irq = PXA_IRQ(i);
-                       irq_set_chip_and_handler(irq, &pxa_internal_irq_chip,
-                                                handle_level_irq);
-                       irq_set_chip_data(irq, base);
-                       set_irq_flags(irq, IRQF_VALID);
-               }
        }
-
        /* only unmasked interrupts kick us out of idle */
        __raw_writel(1, irq_base(0) + ICCR);
 
        pxa_internal_irq_chip.irq_set_wake = fn;
 }
 
+void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int))
+{
+       BUG_ON(irq_nr > MAX_INTERNAL_IRQS);
+
+       pxa_irq_base = io_p2v(0x40d00000);
+       cpu_has_ipr = !cpu_is_pxa25x();
+       pxa_init_irq_common(NULL, irq_nr, fn);
+}
+
 #ifdef CONFIG_PM
 static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32];
 static unsigned long saved_ipr[MAX_INTERNAL_IRQS];
@@ -203,30 +229,6 @@ struct syscore_ops pxa_irq_syscore_ops = {
 };
 
 #ifdef CONFIG_OF
-static struct irq_domain *pxa_irq_domain;
-
-static int pxa_irq_map(struct irq_domain *h, unsigned int virq,
-                      irq_hw_number_t hw)
-{
-       void __iomem *base = irq_base(hw / 32);
-
-       /* initialize interrupt priority */
-       if (cpu_has_ipr)
-               __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw));
-
-       irq_set_chip_and_handler(hw, &pxa_internal_irq_chip,
-                                handle_level_irq);
-       irq_set_chip_data(hw, base);
-       set_irq_flags(hw, IRQF_VALID);
-
-       return 0;
-}
-
-static struct irq_domain_ops pxa_irq_ops = {
-       .map    = pxa_irq_map,
-       .xlate  = irq_domain_xlate_onecell,
-};
-
 static const struct of_device_id intc_ids[] __initconst = {
        { .compatible = "marvell,pxa-intc", },
        {}
@@ -236,7 +238,7 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int))
 {
        struct device_node *node;
        struct resource res;
-       int n, ret;
+       int ret;
 
        node = of_find_matching_node(NULL, intc_ids);
        if (!node) {
@@ -267,23 +269,6 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int))
                return;
        }
 
-       pxa_irq_domain = irq_domain_add_legacy(node, pxa_internal_irq_nr, 0, 0,
-                                              &pxa_irq_ops, NULL);
-       if (!pxa_irq_domain)
-               panic("Unable to add PXA IRQ domain\n");
-
-       irq_set_default_host(pxa_irq_domain);
-
-       for (n = 0; n < pxa_internal_irq_nr; n += 32) {
-               void __iomem *base = irq_base(n >> 5);
-
-               __raw_writel(0, base + ICMR);   /* disable all IRQs */
-               __raw_writel(0, base + ICLR);   /* all IRQs are IRQ, not FIQ */
-       }
-
-       /* only unmasked interrupts kick us out of idle */
-       __raw_writel(1, irq_base(0) + ICCR);
-
-       pxa_internal_irq_chip.irq_set_wake = fn;
+       pxa_init_irq_common(node, pxa_internal_irq_nr, fn);
 }
 #endif /* CONFIG_OF */
index 28da319..eaee2c2 100644 (file)
@@ -195,7 +195,7 @@ static struct resource smc91x_resources[] = {
 };
 
 struct smc91x_platdata smc91x_platdata = {
-       .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT;
+       .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
 };
 
 static struct platform_device smc91x_device = {
index 205f9bf..ac2ae5c 100644 (file)
@@ -412,7 +412,7 @@ static struct fixed_voltage_config can_regulator_pdata = {
 };
 
 static struct platform_device can_regulator_device = {
-       .name   = "reg-fixed-volage",
+       .name   = "reg-fixed-voltage",
        .id     = 0,
        .dev    = {
                .platform_data  = &can_regulator_pdata,
index 7b0cd31..af868d2 100644 (file)
@@ -268,8 +268,8 @@ static int neponset_probe(struct platform_device *dev)
                .id = 0,
                .res = smc91x_resources,
                .num_res = ARRAY_SIZE(smc91x_resources),
-               .data = &smc91c_platdata,
-               .size_data = sizeof(smc91c_platdata),
+               .data = &smc91x_platdata,
+               .size_data = sizeof(smc91x_platdata),
        };
        int ret, irq;
 
index 696fd0f..1525d7b 100644 (file)
@@ -54,7 +54,7 @@ static struct platform_device smc91x_device = {
        .num_resources  = ARRAY_SIZE(smc91x_resources),
        .resource       = smc91x_resources,
        .dev = {
-               .platform_data  = &smc91c_platdata,
+               .platform_data  = &smc91x_platdata,
        },
 };
 
index 483cb46..a0f3b1c 100644 (file)
@@ -45,6 +45,6 @@ extern char secondary_trampoline, secondary_trampoline_end;
 
 extern unsigned long socfpga_cpu1start_addr;
 
-#define SOCFPGA_SCU_VIRT_BASE   0xfffec000
+#define SOCFPGA_SCU_VIRT_BASE   0xfee00000
 
 #endif
index 383d61e..f5e597c 100644 (file)
@@ -23,6 +23,7 @@
 #include <asm/hardware/cache-l2x0.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
+#include <asm/cacheflush.h>
 
 #include "core.h"
 
@@ -73,6 +74,10 @@ void __init socfpga_sysmgr_init(void)
                        (u32 *) &socfpga_cpu1start_addr))
                pr_err("SMP: Need cpu1-start-addr in device tree.\n");
 
+       /* Ensure that socfpga_cpu1start_addr is visible to other CPUs */
+       smp_wmb();
+       sync_cache_w(&socfpga_cpu1start_addr);
+
        sys_manager_base_addr = of_iomap(np, 0);
 
        np = of_find_compatible_node(NULL, NULL, "altr,rst-mgr");
index b067390..b373aca 100644 (file)
@@ -18,6 +18,7 @@ static const char *stih41x_dt_match[] __initdata = {
        "st,stih415",
        "st,stih416",
        "st,stih407",
+       "st,stih410",
        "st,stih418",
        NULL
 };
index a77604f..81502b9 100644 (file)
@@ -1,10 +1,12 @@
 menuconfig ARCH_SUNXI
        bool "Allwinner SoCs" if ARCH_MULTI_V7
        select ARCH_REQUIRE_GPIOLIB
+       select ARCH_HAS_RESET_CONTROLLER
        select CLKSRC_MMIO
        select GENERIC_IRQ_CHIP
        select PINCTRL
        select SUN4I_TIMER
+       select RESET_CONTROLLER
 
 if ARCH_SUNXI
 
@@ -20,10 +22,8 @@ config MACH_SUN5I
 config MACH_SUN6I
        bool "Allwinner A31 (sun6i) SoCs support"
        default ARCH_SUNXI
-       select ARCH_HAS_RESET_CONTROLLER
        select ARM_GIC
        select MFD_SUN6I_PRCM
-       select RESET_CONTROLLER
        select SUN5I_HSTIMER
 
 config MACH_SUN7I
@@ -37,16 +37,12 @@ config MACH_SUN7I
 config MACH_SUN8I
        bool "Allwinner A23 (sun8i) SoCs support"
        default ARCH_SUNXI
-       select ARCH_HAS_RESET_CONTROLLER
        select ARM_GIC
        select MFD_SUN6I_PRCM
-       select RESET_CONTROLLER
 
 config MACH_SUN9I
        bool "Allwinner (sun9i) SoCs support"
        default ARCH_SUNXI
-       select ARCH_HAS_RESET_CONTROLLER
        select ARM_GIC
-       select RESET_CONTROLLER
 
 endif
index f2b586d..155807f 100644 (file)
@@ -15,7 +15,7 @@
  */
 
 #include <asm/firmware.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -44,7 +44,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
        tegra_set_cpu_in_lp2();
        cpu_pm_enter();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        call_firmware_op(prepare_idle);
 
@@ -52,7 +52,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
        if (call_firmware_op(do_idle, 0) == -ENOSYS)
                cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        cpu_pm_exit();
        tegra_clear_cpu_in_lp2();
index 4f25a7c..48844ae 100644 (file)
@@ -20,7 +20,7 @@
  */
 
 #include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -136,11 +136,11 @@ static bool tegra20_cpu_cluster_power_down(struct cpuidle_device *dev,
        if (tegra20_reset_cpu_1() || !tegra_cpu_rail_off_ready())
                return false;
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        tegra_idle_lp2_last();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        if (cpu_online(1))
                tegra20_wake_cpu1_from_reset();
@@ -153,13 +153,13 @@ static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev,
                                         struct cpuidle_driver *drv,
                                         int index)
 {
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        cpu_suspend(0, tegra20_sleep_cpu_secondary_finish);
 
        tegra20_cpu_clear_resettable();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        return true;
 }
index f8815ed..84d809a 100644 (file)
@@ -20,7 +20,7 @@
  */
 
 #include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/cpu_pm.h>
 #include <linux/kernel.h>
@@ -76,11 +76,11 @@ static bool tegra30_cpu_cluster_power_down(struct cpuidle_device *dev,
                return false;
        }
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        tegra_idle_lp2_last();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        return true;
 }
@@ -90,13 +90,13 @@ static bool tegra30_cpu_core_power_down(struct cpuidle_device *dev,
                                        struct cpuidle_driver *drv,
                                        int index)
 {
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+       tick_broadcast_enter();
 
        smp_wmb();
 
        cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+       tick_broadcast_exit();
 
        return true;
 }
index c6c7696..8f15f70 100644 (file)
@@ -1131,23 +1131,22 @@ static void __init l2c310_of_parse(const struct device_node *np,
        }
 
        ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
-       if (ret)
-               return;
-
-       switch (assoc) {
-       case 16:
-               *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
-               *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
-               *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
-               break;
-       case 8:
-               *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
-               *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
-               break;
-       default:
-               pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n",
-                      assoc);
-               break;
+       if (!ret) {
+               switch (assoc) {
+               case 16:
+                       *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+                       *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
+                       *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+                       break;
+               case 8:
+                       *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+                       *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+                       break;
+               default:
+                       pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n",
+                              assoc);
+                       break;
+               }
        }
 
        prefetch = l2x0_saved_regs.prefetch_ctrl;
index 170a116..c274476 100644 (file)
@@ -171,7 +171,7 @@ static int __dma_supported(struct device *dev, u64 mask, bool warn)
         */
        if (sizeof(mask) != sizeof(dma_addr_t) &&
            mask > (dma_addr_t)~0 &&
-           dma_to_pfn(dev, ~0) < max_pfn) {
+           dma_to_pfn(dev, ~0) < max_pfn - 1) {
                if (warn) {
                        dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
                                 mask);
index a982dc3..6333d9c 100644 (file)
@@ -552,6 +552,7 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 
        pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
                inf->name, fsr, addr);
+       show_pte(current->mm, addr);
 
        info.si_signo = inf->sig;
        info.si_errno = 0;
index 004e35c..cf30daf 100644 (file)
@@ -49,7 +49,10 @@ static int change_memory_common(unsigned long addr, int numpages,
                WARN_ON_ONCE(1);
        }
 
-       if (!is_module_address(start) || !is_module_address(end - 1))
+       if (start < MODULES_VADDR || start >= MODULES_END)
+               return -EINVAL;
+
+       if (end < MODULES_VADDR || start >= MODULES_END)
                return -EINVAL;
 
        data.set_mask = set_mask;
index 61b4d70..2438b96 100644 (file)
@@ -44,24 +44,20 @@ static u64 notrace omap_32k_read_sched_clock(void)
 }
 
 /**
- * omap_read_persistent_clock -  Return time from a persistent clock.
+ * omap_read_persistent_clock64 -  Return time from a persistent clock.
  *
  * Reads the time from a source which isn't disabled during PM, the
  * 32k sync timer.  Convert the cycles elapsed since last read into
- * nsecs and adds to a monotonically increasing timespec.
+ * nsecs and adds to a monotonically increasing timespec64.
  */
-static struct timespec persistent_ts;
+static struct timespec64 persistent_ts;
 static cycles_t cycles;
 static unsigned int persistent_mult, persistent_shift;
-static DEFINE_SPINLOCK(read_persistent_clock_lock);
 
-static void omap_read_persistent_clock(struct timespec *ts)
+static void omap_read_persistent_clock64(struct timespec64 *ts)
 {
        unsigned long long nsecs;
        cycles_t last_cycles;
-       unsigned long flags;
-
-       spin_lock_irqsave(&read_persistent_clock_lock, flags);
 
        last_cycles = cycles;
        cycles = sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0;
@@ -69,11 +65,9 @@ static void omap_read_persistent_clock(struct timespec *ts)
        nsecs = clocksource_cyc2ns(cycles - last_cycles,
                                        persistent_mult, persistent_shift);
 
-       timespec_add_ns(&persistent_ts, nsecs);
+       timespec64_add_ns(&persistent_ts, nsecs);
 
        *ts = persistent_ts;
-
-       spin_unlock_irqrestore(&read_persistent_clock_lock, flags);
 }
 
 /**
@@ -103,7 +97,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
 
        /*
         * 120000 rough estimate from the calculations in
-        * __clocksource_updatefreq_scale.
+        * __clocksource_update_freq_scale.
         */
        clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
                        32768, NSEC_PER_SEC, 120000);
@@ -116,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
        }
 
        sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
-       register_persistent_clock(NULL, omap_read_persistent_clock);
+       register_persistent_clock(NULL, omap_read_persistent_clock64);
        pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
 
        return 0;
index db10169..8ca94d3 100644 (file)
@@ -799,6 +799,7 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        const struct of_device_id *match;
        const struct dmtimer_platform_data *pdata;
+       int ret;
 
        match = of_match_device(of_match_ptr(omap_timer_match), dev);
        pdata = match ? match->data : dev->platform_data;
@@ -860,7 +861,12 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
        }
 
        if (!timer->reserved) {
-               pm_runtime_get_sync(dev);
+               ret = pm_runtime_get_sync(dev);
+               if (ret < 0) {
+                       dev_err(dev, "%s: pm_runtime_get_sync failed!\n",
+                               __func__);
+                       goto err_get_sync;
+               }
                __omap_dm_timer_init_regs(timer);
                pm_runtime_put(dev);
        }
@@ -873,6 +879,11 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
        dev_dbg(dev, "Device Probed.\n");
 
        return 0;
+
+err_get_sync:
+       pm_runtime_put_noidle(dev);
+       pm_runtime_disable(dev);
+       return ret;
 }
 
 /**
@@ -899,6 +910,8 @@ static int omap_dm_timer_remove(struct platform_device *pdev)
                }
        spin_unlock_irqrestore(&dm_timer_lock, flags);
 
+       pm_runtime_disable(&pdev->dev);
+
        return ret;
 }
 
index f1ad9c2..a857794 100644 (file)
                };
 
                sgenet0: ethernet@1f210000 {
-                       compatible = "apm,xgene-enet";
+                       compatible = "apm,xgene1-sgenet";
                        status = "disabled";
                        reg = <0x0 0x1f210000 0x0 0xd100>,
                              <0x0 0x1f200000 0x0 0Xc300>,
                };
 
                xgenet: ethernet@1f610000 {
-                       compatible = "apm,xgene-enet";
+                       compatible = "apm,xgene1-xgenet";
                        status = "disabled";
                        reg = <0x0 0x1f610000 0x0 0xd100>,
                              <0x0 0x1f600000 0x0 0Xc300>,
index ea2b566..c9b89ef 100644 (file)
@@ -8,7 +8,7 @@
  */
 
        /* SoC fixed clocks */
-       soc_uartclk: refclk72738khz {
+       soc_uartclk: refclk7273800hz {
                compatible = "fixed-clock";
                #clock-cells = <0>;
                clock-frequency = <7273800>;
index cb95930..d8c25b7 100644 (file)
@@ -246,14 +246,30 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
        __ret; \
 })
 
-#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-
-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
-       cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \
-                               o1, o2, n1, n2)
+#define _protect_cmpxchg_local(pcp, o, n)                      \
+({                                                             \
+       typeof(*raw_cpu_ptr(&(pcp))) __ret;                     \
+       preempt_disable();                                      \
+       __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n);       \
+       preempt_enable();                                       \
+       __ret;                                                  \
+})
+
+#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+
+#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2)          \
+({                                                                     \
+       int __ret;                                                      \
+       preempt_disable();                                              \
+       __ret = cmpxchg_double_local(   raw_cpu_ptr(&(ptr1)),           \
+                                       raw_cpu_ptr(&(ptr2)),           \
+                                       o1, o2, n1, n2);                \
+       preempt_enable();                                               \
+       __ret;                                                          \
+})
 
 #define cmpxchg64(ptr,o,n)             cmpxchg((ptr),(o),(n))
 #define cmpxchg64_local(ptr,o,n)       cmpxchg_local((ptr),(o),(n))
index 92bbae3..7052245 100644 (file)
@@ -90,6 +90,7 @@
 #define ESR_ELx_FSC            (0x3F)
 #define ESR_ELx_FSC_TYPE       (0x3C)
 #define ESR_ELx_FSC_EXTABT     (0x10)
+#define ESR_ELx_FSC_ACCESS     (0x08)
 #define ESR_ELx_FSC_FAULT      (0x04)
 #define ESR_ELx_FSC_PERM       (0x0C)
 #define ESR_ELx_CV             (UL(1) << 24)
index 076a1c7..c0e5165 100644 (file)
  */
 #ifndef __ASM_JUMP_LABEL_H
 #define __ASM_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <asm/insn.h>
 
-#ifdef __KERNEL__
-
 #define JUMP_LABEL_NOP_SIZE            AARCH64_INSN_SIZE
 
 static __always_inline bool arch_static_branch(struct static_key *key)
@@ -39,8 +40,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u64 jump_label_t;
 
 struct jump_entry {
@@ -49,4 +48,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif /* __ASM_JUMP_LABEL_H */
index 94674eb..ac6fafb 100644 (file)
  * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
  * not known to exist and will break with this configuration.
  *
+ * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
+ * (see hyp-init.S).
+ *
  * Note that when using 4K pages, we concatenate two first level page tables
  * together.
  *
 #ifdef CONFIG_ARM64_64K_PAGES
 /*
  * Stage2 translation configuration:
- * 40bits output (PS = 2)
  * 40bits input  (T0SZ = 24)
  * 64kB pages (TG0 = 1)
  * 2 level page tables (SL = 1)
 #else
 /*
  * Stage2 translation configuration:
- * 40bits output (PS = 2)
  * 40bits input  (T0SZ = 24)
  * 4kB pages (TG0 = 0)
  * 3 level page tables (SL = 1)
 
 /* For compatibility with fault code shared with 32-bit */
 #define FSC_FAULT      ESR_ELx_FSC_FAULT
+#define FSC_ACCESS     ESR_ELx_FSC_ACCESS
 #define FSC_PERM       ESR_ELx_FSC_PERM
 
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
index 8ac3c70..f0f58c9 100644 (file)
@@ -28,6 +28,8 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
 #if defined(CONFIG_KVM_ARM_MAX_VCPUS)
 #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
 #else
@@ -177,19 +179,10 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
                        unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
-                             unsigned long end)
-{
-       return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
-       return 0;
-}
-
 static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
                                                         unsigned long address)
 {
index 9f52beb..889c908 100644 (file)
@@ -31,28 +31,6 @@ struct kvm_decode {
        bool sign_extend;
 };
 
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
-       phys_addr_t     phys_addr;
-       u8              data[8];
-       u32             len;
-       bool            is_write;
-       void            *private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
-                                   struct kvm_exit_mmio *mmio)
-{
-       run->mmio.phys_addr     = mmio->phys_addr;
-       run->mmio.len           = mmio->len;
-       run->mmio.is_write      = mmio->is_write;
-       memcpy(run->mmio.data, mmio->data, mmio->len);
-       run->exit_reason        = KVM_EXIT_MMIO;
-}
-
 int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
 int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
                 phys_addr_t fault_ipa);
index 6458b53..bbfb600 100644 (file)
@@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 #define PTRS_PER_S2_PGD                (1 << PTRS_PER_S2_PGD_SHIFT)
 #define S2_PGD_ORDER           get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
 
+#define kvm_pgd_index(addr)    (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
 /*
  * If we are concatenating first level stage-2 page tables, we would have less
  * than or equal to 16 pointers in the fake PGD, because that's what the
@@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 #define KVM_PREALLOC_LEVEL     (0)
 #endif
 
-/**
- * kvm_prealloc_hwpgd - allocate inital table for VTTBR
- * @kvm:       The KVM struct pointer for the VM.
- * @pgd:       The kernel pseudo pgd
- *
- * When the kernel uses more levels of page tables than the guest, we allocate
- * a fake PGD and pre-populate it to point to the next-level page table, which
- * will be the real initial page table pointed to by the VTTBR.
- *
- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
- * the kernel will use folded pud.  When KVM_PREALLOC_LEVEL==1, we
- * allocate 2 consecutive PUD pages.
- */
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
-{
-       unsigned int i;
-       unsigned long hwpgd;
-
-       if (KVM_PREALLOC_LEVEL == 0)
-               return 0;
-
-       hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
-       if (!hwpgd)
-               return -ENOMEM;
-
-       for (i = 0; i < PTRS_PER_S2_PGD; i++) {
-               if (KVM_PREALLOC_LEVEL == 1)
-                       pgd_populate(NULL, pgd + i,
-                                    (pud_t *)hwpgd + i * PTRS_PER_PUD);
-               else if (KVM_PREALLOC_LEVEL == 2)
-                       pud_populate(NULL, pud_offset(pgd, 0) + i,
-                                    (pmd_t *)hwpgd + i * PTRS_PER_PMD);
-       }
-
-       return 0;
-}
-
 static inline void *kvm_get_hwpgd(struct kvm *kvm)
 {
        pgd_t *pgd = kvm->arch.pgd;
@@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
        return pmd_offset(pud, 0);
 }
 
-static inline void kvm_free_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
 {
-       if (KVM_PREALLOC_LEVEL > 0) {
-               unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
-               free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
-       }
+       if (KVM_PREALLOC_LEVEL > 0)
+               return PTRS_PER_S2_PGD * PAGE_SIZE;
+       return PTRS_PER_S2_PGD * sizeof(pgd_t);
 }
 
 static inline bool kvm_page_empty(void *ptr)
index a9eee33..101a42b 100644 (file)
@@ -151,6 +151,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
        unsigned int cpu = smp_processor_id();
 
+       /*
+        * init_mm.pgd does not contain any user mappings and it is always
+        * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
+        */
+       if (next == &init_mm) {
+               cpu_set_reserved_ttbr0();
+               return;
+       }
+
        if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
                check_and_switch_context(next, tsk);
 }
index 09da25b..4fde8c1 100644 (file)
@@ -204,25 +204,47 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
        return ret;
 }
 
+#define _percpu_read(pcp)                                              \
+({                                                                     \
+       typeof(pcp) __retval;                                           \
+       preempt_disable();                                              \
+       __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)),      \
+                                             sizeof(pcp));             \
+       preempt_enable();                                               \
+       __retval;                                                       \
+})
+
+#define _percpu_write(pcp, val)                                                \
+do {                                                                   \
+       preempt_disable();                                              \
+       __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val),       \
+                               sizeof(pcp));                           \
+       preempt_enable();                                               \
+} while(0)                                                             \
+
+#define _pcp_protect(operation, pcp, val)                      \
+({                                                             \
+       typeof(pcp) __retval;                                   \
+       preempt_disable();                                      \
+       __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)),  \
+                                         (val), sizeof(pcp));  \
+       preempt_enable();                                       \
+       __retval;                                               \
+})
+
 #define _percpu_add(pcp, val) \
-       __percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+       _pcp_protect(__percpu_add, pcp, val)
 
-#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val))
+#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
 
 #define _percpu_and(pcp, val) \
-       __percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+       _pcp_protect(__percpu_and, pcp, val)
 
 #define _percpu_or(pcp, val) \
-       __percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
-
-#define _percpu_read(pcp) (typeof(pcp))        \
-       (__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp)))
-
-#define _percpu_write(pcp, val) \
-       __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))
+       _pcp_protect(__percpu_or, pcp, val)
 
 #define _percpu_xchg(pcp, val) (typeof(pcp)) \
-       (__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)))
+       _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))
 
 #define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
 #define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)
index 9a8fd84..941c375 100644 (file)
@@ -39,7 +39,11 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
 
 #include <asm/memory.h>
 
-#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
+#define cpu_switch_mm(pgd,mm)                          \
+do {                                                   \
+       BUG_ON(pgd == swapper_pg_dir);                  \
+       cpu_do_switch_mm(virt_to_phys(pgd),mm);         \
+} while (0)
 
 #define cpu_get_pgd()                                  \
 ({                                                     \
index c028fe3..53d9c35 100644 (file)
@@ -48,6 +48,7 @@ static inline void tlb_flush(struct mmu_gather *tlb)
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
                                  unsigned long addr)
 {
+       __flush_tlb_pgtable(tlb->mm, addr);
        pgtable_page_dtor(pte);
        tlb_remove_entry(tlb, pte);
 }
@@ -56,6 +57,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
                                  unsigned long addr)
 {
+       __flush_tlb_pgtable(tlb->mm, addr);
        tlb_remove_entry(tlb, virt_to_page(pmdp));
 }
 #endif
@@ -64,6 +66,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
                                  unsigned long addr)
 {
+       __flush_tlb_pgtable(tlb->mm, addr);
        tlb_remove_entry(tlb, virt_to_page(pudp));
 }
 #endif
index 4abe9b9..c3bb05b 100644 (file)
@@ -143,6 +143,19 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
                flush_tlb_all();
 }
 
+/*
+ * Used to invalidate the TLB (walk caches) corresponding to intermediate page
+ * table levels (pgd/pud/pmd).
+ */
+static inline void __flush_tlb_pgtable(struct mm_struct *mm,
+                                      unsigned long uaddr)
+{
+       unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48);
+
+       dsb(ishst);
+       asm("tlbi       vae1is, %0" : : "r" (addr));
+       dsb(ish);
+}
 /*
  * On AArch64, the cache coherency is handled via the set_pte_at() function.
  */
index 3ef77a4..c154c0b 100644 (file)
@@ -191,6 +191,9 @@ struct kvm_arch_memory_slot {
 /* Highest supported SPI, from VGIC_NR_IRQS */
 #define KVM_ARM_IRQ_GIC_MAX            127
 
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS          1
+
 /* PSCI interface */
 #define KVM_PSCI_FN_BASE               0x95c1ba5e
 #define KVM_PSCI_FN(n)                 (KVM_PSCI_FN_BASE + (n))
index b42c7b4..ab21e0d 100644 (file)
@@ -337,7 +337,11 @@ core_initcall(arm64_dmi_init);
 
 static void efi_set_pgd(struct mm_struct *mm)
 {
-       cpu_switch_mm(mm->pgd, mm);
+       if (mm == &init_mm)
+               cpu_set_reserved_ttbr0();
+       else
+               cpu_switch_mm(mm->pgd, mm);
+
        flush_tlb_all();
        if (icache_is_aivivt())
                __flush_icache_all();
@@ -354,3 +358,12 @@ void efi_virtmap_unload(void)
        efi_set_pgd(current->active_mm);
        preempt_enable();
 }
+
+/*
+ * UpdateCapsule() depends on the system being shutdown via
+ * ResetSystem().
+ */
+bool efi_poweroff_required(void)
+{
+       return efi_enabled(EFI_RUNTIME_SERVICES);
+}
index 8ce88e0..07f9305 100644 (file)
@@ -585,8 +585,8 @@ ENDPROC(set_cpu_boot_mode_flag)
  * zeroing of .bss would clobber it.
  */
        .pushsection    .data..cacheline_aligned
-ENTRY(__boot_cpu_mode)
        .align  L1_CACHE_SHIFT
+ENTRY(__boot_cpu_mode)
        .long   BOOT_CPU_MODE_EL2
        .long   0
        .popsection
index fde9923..c6b1f3b 100644 (file)
@@ -21,6 +21,7 @@
 #include <stdarg.h>
 
 #include <linux/compat.h>
+#include <linux/efi.h>
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -150,6 +151,13 @@ void machine_restart(char *cmd)
        local_irq_disable();
        smp_send_stop();
 
+       /*
+        * UpdateCapsule() depends on the system being reset via
+        * ResetSystem().
+        */
+       if (efi_enabled(EFI_RUNTIME_SERVICES))
+               efi_reboot(reboot_mode, NULL);
+
        /* Now call the architecture specific reboot code. */
        if (arm_pm_restart)
                arm_pm_restart(reboot_mode, cmd);
index 32aeea0..ec37ab3 100644 (file)
@@ -200,7 +200,7 @@ up_fail:
 void update_vsyscall(struct timekeeper *tk)
 {
        struct timespec xtime_coarse;
-       u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
+       u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
 
        ++vdso_data->tb_seq_count;
        smp_wmb();
@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
        vdso_data->wtm_clock_nsec               = tk->wall_to_monotonic.tv_nsec;
 
        if (!use_syscall) {
-               vdso_data->cs_cycle_last        = tk->tkr.cycle_last;
+               vdso_data->cs_cycle_last        = tk->tkr_mono.cycle_last;
                vdso_data->xtime_clock_sec      = tk->xtime_sec;
-               vdso_data->xtime_clock_nsec     = tk->tkr.xtime_nsec;
-               vdso_data->cs_mult              = tk->tkr.mult;
-               vdso_data->cs_shift             = tk->tkr.shift;
+               vdso_data->xtime_clock_nsec     = tk->tkr_mono.xtime_nsec;
+               vdso_data->cs_mult              = tk->tkr_mono.mult;
+               vdso_data->cs_shift             = tk->tkr_mono.shift;
        }
 
        smp_wmb();
index f5590c8..5105e29 100644 (file)
@@ -18,6 +18,7 @@ if VIRTUALIZATION
 
 config KVM
        bool "Kernel-based Virtual Machine (KVM) support"
+       depends on OF
        select MMU_NOTIFIER
        select PREEMPT_NOTIFIERS
        select ANON_INODES
@@ -25,10 +26,10 @@ config KVM
        select HAVE_KVM_ARCH_TLB_FLUSH_ALL
        select KVM_MMIO
        select KVM_ARM_HOST
-       select KVM_ARM_VGIC
-       select KVM_ARM_TIMER
        select KVM_GENERIC_DIRTYLOG_READ_PROTECT
        select SRCU
+       select HAVE_KVM_EVENTFD
+       select HAVE_KVM_IRQFD
        ---help---
          Support hosting virtualized guest machines.
 
@@ -50,17 +51,4 @@ config KVM_ARM_MAX_VCPUS
          large, so only choose a reasonable number that you expect to
          actually use.
 
-config KVM_ARM_VGIC
-       bool
-       depends on KVM_ARM_HOST && OF
-       select HAVE_KVM_IRQCHIP
-       ---help---
-         Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
-       bool
-       depends on KVM_ARM_VGIC
-       ---help---
-         Adds support for the Architected Timers in virtual machines.
-
 endif # VIRTUALIZATION
index 4e6e09e..d5904f8 100644 (file)
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+ccflags-y += -Iarch/arm64/kvm
 CFLAGS_arm.o := -I.
 CFLAGS_mmu.o := -I.
 
@@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
 
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
 
@@ -19,11 +19,11 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
 kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
 kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
 
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
-kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
index 58e0c2b..ef7d112 100644 (file)
@@ -51,7 +51,7 @@ static int __init early_coherent_pool(char *p)
 }
 early_param("coherent_pool", early_coherent_pool);
 
-static void *__alloc_from_pool(size_t size, struct page **ret_page)
+static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
 {
        unsigned long val;
        void *ptr = NULL;
@@ -67,6 +67,8 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
 
                *ret_page = phys_to_page(phys);
                ptr = (void *)val;
+               if (flags & __GFP_ZERO)
+                       memset(ptr, 0, size);
        }
 
        return ptr;
@@ -101,6 +103,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
                flags |= GFP_DMA;
        if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) {
                struct page *page;
+               void *addr;
 
                size = PAGE_ALIGN(size);
                page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
@@ -109,7 +112,10 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
                        return NULL;
 
                *dma_handle = phys_to_dma(dev, page_to_phys(page));
-               return page_address(page);
+               addr = page_address(page);
+               if (flags & __GFP_ZERO)
+                       memset(addr, 0, size);
+               return addr;
        } else {
                return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
        }
@@ -146,7 +152,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
 
        if (!coherent && !(flags & __GFP_WAIT)) {
                struct page *page = NULL;
-               void *addr = __alloc_from_pool(size, &page);
+               void *addr = __alloc_from_pool(size, &page, flags);
 
                if (addr)
                        *dma_handle = phys_to_dma(dev, page_to_phys(page));
index 78d4483..ec4db6d 100644 (file)
@@ -67,6 +67,11 @@ extern unsigned long empty_zero_page;
  */
 #define pgtable_cache_init()   do { } while (0)
 
+/*
+ * c6x is !MMU, so define the simpliest implementation
+ */
+#define pgprot_writecombine pgprot_noncached
+
 #include <asm-generic/pgtable.h>
 
 #endif /* _ASM_C6X_PGTABLE_H */
index 9359e50..d5779b0 100644 (file)
@@ -2,6 +2,7 @@
 #define _ASM_METAG_IO_H
 
 #include <linux/types.h>
+#include <asm/pgtable-bits.h>
 
 #define IO_SPACE_LIMIT  0
 
diff --git a/arch/metag/include/asm/pgtable-bits.h b/arch/metag/include/asm/pgtable-bits.h
new file mode 100644 (file)
index 0000000..25ba672
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Meta page table definitions.
+ */
+
+#ifndef _METAG_PGTABLE_BITS_H
+#define _METAG_PGTABLE_BITS_H
+
+#include <asm/metag_mem.h>
+
+/*
+ * Definitions for MMU descriptors
+ *
+ * These are the hardware bits in the MMCU pte entries.
+ * Derived from the Meta toolkit headers.
+ */
+#define _PAGE_PRESENT          MMCU_ENTRY_VAL_BIT
+#define _PAGE_WRITE            MMCU_ENTRY_WR_BIT
+#define _PAGE_PRIV             MMCU_ENTRY_PRIV_BIT
+/* Write combine bit - this can cause writes to occur out of order */
+#define _PAGE_WR_COMBINE       MMCU_ENTRY_WRC_BIT
+/* Sys coherent bit - this bit is never used by Linux */
+#define _PAGE_SYS_COHERENT     MMCU_ENTRY_SYS_BIT
+#define _PAGE_ALWAYS_ZERO_1    0x020
+#define _PAGE_CACHE_CTRL0      0x040
+#define _PAGE_CACHE_CTRL1      0x080
+#define _PAGE_ALWAYS_ZERO_2    0x100
+#define _PAGE_ALWAYS_ZERO_3    0x200
+#define _PAGE_ALWAYS_ZERO_4    0x400
+#define _PAGE_ALWAYS_ZERO_5    0x800
+
+/* These are software bits that we stuff into the gaps in the hardware
+ * pte entries that are not used.  Note, these DO get stored in the actual
+ * hardware, but the hardware just does not use them.
+ */
+#define _PAGE_ACCESSED         _PAGE_ALWAYS_ZERO_1
+#define _PAGE_DIRTY            _PAGE_ALWAYS_ZERO_2
+
+/* Pages owned, and protected by, the kernel. */
+#define _PAGE_KERNEL           _PAGE_PRIV
+
+/* No cacheing of this page */
+#define _PAGE_CACHE_WIN0       (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
+/* burst cacheing - good for data streaming */
+#define _PAGE_CACHE_WIN1       (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
+/* One cache way per thread */
+#define _PAGE_CACHE_WIN2       (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
+/* Full on cacheing */
+#define _PAGE_CACHE_WIN3       (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
+
+#define _PAGE_CACHEABLE                (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
+
+/* which bits are used for cache control ... */
+#define _PAGE_CACHE_MASK       (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
+                                _PAGE_WR_COMBINE)
+
+/* This is a mask of the bits that pte_modify is allowed to change. */
+#define _PAGE_CHG_MASK         (PAGE_MASK)
+
+#define _PAGE_SZ_SHIFT         1
+#define _PAGE_SZ_4K            (0x0)
+#define _PAGE_SZ_8K            (0x1 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_16K           (0x2 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_32K           (0x3 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_64K           (0x4 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_128K          (0x5 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_256K          (0x6 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_512K          (0x7 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_1M            (0x8 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_2M            (0x9 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_4M            (0xa << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_MASK          (0xf << _PAGE_SZ_SHIFT)
+
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define _PAGE_SZ               (_PAGE_SZ_4K)
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define _PAGE_SZ               (_PAGE_SZ_8K)
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define _PAGE_SZ               (_PAGE_SZ_16K)
+#endif
+#define _PAGE_TABLE            (_PAGE_SZ | _PAGE_PRESENT)
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_8K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_16K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_32K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_64K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_128K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_256K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define _PAGE_SZHUGE          (_PAGE_SZ_512K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define _PAGE_SZHUGE          (_PAGE_SZ_1M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define _PAGE_SZHUGE          (_PAGE_SZ_2M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define _PAGE_SZHUGE          (_PAGE_SZ_4M)
+#endif
+
+#endif /* _METAG_PGTABLE_BITS_H */
index d0604c0..ffa3a3a 100644 (file)
@@ -5,6 +5,7 @@
 #ifndef _METAG_PGTABLE_H
 #define _METAG_PGTABLE_H
 
+#include <asm/pgtable-bits.h>
 #include <asm-generic/pgtable-nopmd.h>
 
 /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
 #define VMALLOC_END            0x7FFFFFFF
 #endif
 
-/*
- * Definitions for MMU descriptors
- *
- * These are the hardware bits in the MMCU pte entries.
- * Derived from the Meta toolkit headers.
- */
-#define _PAGE_PRESENT          MMCU_ENTRY_VAL_BIT
-#define _PAGE_WRITE            MMCU_ENTRY_WR_BIT
-#define _PAGE_PRIV             MMCU_ENTRY_PRIV_BIT
-/* Write combine bit - this can cause writes to occur out of order */
-#define _PAGE_WR_COMBINE       MMCU_ENTRY_WRC_BIT
-/* Sys coherent bit - this bit is never used by Linux */
-#define _PAGE_SYS_COHERENT     MMCU_ENTRY_SYS_BIT
-#define _PAGE_ALWAYS_ZERO_1    0x020
-#define _PAGE_CACHE_CTRL0      0x040
-#define _PAGE_CACHE_CTRL1      0x080
-#define _PAGE_ALWAYS_ZERO_2    0x100
-#define _PAGE_ALWAYS_ZERO_3    0x200
-#define _PAGE_ALWAYS_ZERO_4    0x400
-#define _PAGE_ALWAYS_ZERO_5    0x800
-
-/* These are software bits that we stuff into the gaps in the hardware
- * pte entries that are not used.  Note, these DO get stored in the actual
- * hardware, but the hardware just does not use them.
- */
-#define _PAGE_ACCESSED         _PAGE_ALWAYS_ZERO_1
-#define _PAGE_DIRTY            _PAGE_ALWAYS_ZERO_2
-
-/* Pages owned, and protected by, the kernel. */
-#define _PAGE_KERNEL           _PAGE_PRIV
-
-/* No cacheing of this page */
-#define _PAGE_CACHE_WIN0       (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
-/* burst cacheing - good for data streaming */
-#define _PAGE_CACHE_WIN1       (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
-/* One cache way per thread */
-#define _PAGE_CACHE_WIN2       (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
-/* Full on cacheing */
-#define _PAGE_CACHE_WIN3       (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
-
-#define _PAGE_CACHEABLE                (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
-
-/* which bits are used for cache control ... */
-#define _PAGE_CACHE_MASK       (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
-                                _PAGE_WR_COMBINE)
-
-/* This is a mask of the bits that pte_modify is allowed to change. */
-#define _PAGE_CHG_MASK         (PAGE_MASK)
-
-#define _PAGE_SZ_SHIFT         1
-#define _PAGE_SZ_4K            (0x0)
-#define _PAGE_SZ_8K            (0x1 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_16K           (0x2 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_32K           (0x3 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_64K           (0x4 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_128K          (0x5 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_256K          (0x6 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_512K          (0x7 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_1M            (0x8 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_2M            (0x9 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_4M            (0xa << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_MASK          (0xf << _PAGE_SZ_SHIFT)
-
-#if defined(CONFIG_PAGE_SIZE_4K)
-#define _PAGE_SZ               (_PAGE_SZ_4K)
-#elif defined(CONFIG_PAGE_SIZE_8K)
-#define _PAGE_SZ               (_PAGE_SZ_8K)
-#elif defined(CONFIG_PAGE_SIZE_16K)
-#define _PAGE_SZ               (_PAGE_SZ_16K)
-#endif
-#define _PAGE_TABLE            (_PAGE_SZ | _PAGE_PRESENT)
-
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
-# define _PAGE_SZHUGE          (_PAGE_SZ_8K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
-# define _PAGE_SZHUGE          (_PAGE_SZ_16K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
-# define _PAGE_SZHUGE          (_PAGE_SZ_32K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-# define _PAGE_SZHUGE          (_PAGE_SZ_64K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
-# define _PAGE_SZHUGE          (_PAGE_SZ_128K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
-# define _PAGE_SZHUGE          (_PAGE_SZ_256K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-# define _PAGE_SZHUGE          (_PAGE_SZ_512K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
-# define _PAGE_SZHUGE          (_PAGE_SZ_1M)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
-# define _PAGE_SZHUGE          (_PAGE_SZ_2M)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
-# define _PAGE_SZHUGE          (_PAGE_SZ_4M)
-#endif
-
 /*
  * The Linux memory management assumes a three-level page table setup. On
  * Meta, we use that, but "fold" the mid level into the top-level page
index 0536bc0..ef54851 100644 (file)
@@ -348,8 +348,9 @@ C_ENTRY(_user_exception):
  * The LP register should point to the location where the called function
  * should return.  [note that MAKE_SYS_CALL uses label 1] */
        /* See if the system call number is valid */
+       blti    r12, 5f
        addi    r11, r12, -__NR_syscalls;
-       bgei    r11,5f;
+       bgei    r11, 5f;
        /* Figure out which function to use for this system call.  */
        /* Note Microblaze barrel shift is optional, so don't rely on it */
        add     r12, r12, r12;                  /* convert num -> ptr */
@@ -375,7 +376,7 @@ C_ENTRY(_user_exception):
 
        /* The syscall number is invalid, return an error.  */
 5:
-       rtsd    r15, 8;         /* looks like a normal subroutine return */
+       braid   ret_from_trap
        addi    r3, r0, -ENOSYS;
 
 /* Entry point used to return from a syscall/trap */
@@ -411,7 +412,7 @@ C_ENTRY(ret_from_trap):
        bri     1b
 
        /* Maybe handle a signal */
-5:     
+5:
        andi    r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME;
        beqi    r11, 4f;                /* Signals to handle, handle them */
 
index cdac7b3..8038647 100644 (file)
        .set push
        SET_HARDFLOAT
        cfc1    \tmp,  fcr31
-       swc1    $f0,  THREAD_FPR0_LS64(\thread)
-       swc1    $f1,  THREAD_FPR1_LS64(\thread)
-       swc1    $f2,  THREAD_FPR2_LS64(\thread)
-       swc1    $f3,  THREAD_FPR3_LS64(\thread)
-       swc1    $f4,  THREAD_FPR4_LS64(\thread)
-       swc1    $f5,  THREAD_FPR5_LS64(\thread)
-       swc1    $f6,  THREAD_FPR6_LS64(\thread)
-       swc1    $f7,  THREAD_FPR7_LS64(\thread)
-       swc1    $f8,  THREAD_FPR8_LS64(\thread)
-       swc1    $f9,  THREAD_FPR9_LS64(\thread)
-       swc1    $f10, THREAD_FPR10_LS64(\thread)
-       swc1    $f11, THREAD_FPR11_LS64(\thread)
-       swc1    $f12, THREAD_FPR12_LS64(\thread)
-       swc1    $f13, THREAD_FPR13_LS64(\thread)
-       swc1    $f14, THREAD_FPR14_LS64(\thread)
-       swc1    $f15, THREAD_FPR15_LS64(\thread)
-       swc1    $f16, THREAD_FPR16_LS64(\thread)
-       swc1    $f17, THREAD_FPR17_LS64(\thread)
-       swc1    $f18, THREAD_FPR18_LS64(\thread)
-       swc1    $f19, THREAD_FPR19_LS64(\thread)
-       swc1    $f20, THREAD_FPR20_LS64(\thread)
-       swc1    $f21, THREAD_FPR21_LS64(\thread)
-       swc1    $f22, THREAD_FPR22_LS64(\thread)
-       swc1    $f23, THREAD_FPR23_LS64(\thread)
-       swc1    $f24, THREAD_FPR24_LS64(\thread)
-       swc1    $f25, THREAD_FPR25_LS64(\thread)
-       swc1    $f26, THREAD_FPR26_LS64(\thread)
-       swc1    $f27, THREAD_FPR27_LS64(\thread)
-       swc1    $f28, THREAD_FPR28_LS64(\thread)
-       swc1    $f29, THREAD_FPR29_LS64(\thread)
-       swc1    $f30, THREAD_FPR30_LS64(\thread)
-       swc1    $f31, THREAD_FPR31_LS64(\thread)
+       swc1    $f0,  THREAD_FPR0(\thread)
+       swc1    $f1,  THREAD_FPR1(\thread)
+       swc1    $f2,  THREAD_FPR2(\thread)
+       swc1    $f3,  THREAD_FPR3(\thread)
+       swc1    $f4,  THREAD_FPR4(\thread)
+       swc1    $f5,  THREAD_FPR5(\thread)
+       swc1    $f6,  THREAD_FPR6(\thread)
+       swc1    $f7,  THREAD_FPR7(\thread)
+       swc1    $f8,  THREAD_FPR8(\thread)
+       swc1    $f9,  THREAD_FPR9(\thread)
+       swc1    $f10, THREAD_FPR10(\thread)
+       swc1    $f11, THREAD_FPR11(\thread)
+       swc1    $f12, THREAD_FPR12(\thread)
+       swc1    $f13, THREAD_FPR13(\thread)
+       swc1    $f14, THREAD_FPR14(\thread)
+       swc1    $f15, THREAD_FPR15(\thread)
+       swc1    $f16, THREAD_FPR16(\thread)
+       swc1    $f17, THREAD_FPR17(\thread)
+       swc1    $f18, THREAD_FPR18(\thread)
+       swc1    $f19, THREAD_FPR19(\thread)
+       swc1    $f20, THREAD_FPR20(\thread)
+       swc1    $f21, THREAD_FPR21(\thread)
+       swc1    $f22, THREAD_FPR22(\thread)
+       swc1    $f23, THREAD_FPR23(\thread)
+       swc1    $f24, THREAD_FPR24(\thread)
+       swc1    $f25, THREAD_FPR25(\thread)
+       swc1    $f26, THREAD_FPR26(\thread)
+       swc1    $f27, THREAD_FPR27(\thread)
+       swc1    $f28, THREAD_FPR28(\thread)
+       swc1    $f29, THREAD_FPR29(\thread)
+       swc1    $f30, THREAD_FPR30(\thread)
+       swc1    $f31, THREAD_FPR31(\thread)
        sw      \tmp, THREAD_FCR31(\thread)
        .set pop
        .endm
        .set push
        SET_HARDFLOAT
        lw      \tmp, THREAD_FCR31(\thread)
-       lwc1    $f0,  THREAD_FPR0_LS64(\thread)
-       lwc1    $f1,  THREAD_FPR1_LS64(\thread)
-       lwc1    $f2,  THREAD_FPR2_LS64(\thread)
-       lwc1    $f3,  THREAD_FPR3_LS64(\thread)
-       lwc1    $f4,  THREAD_FPR4_LS64(\thread)
-       lwc1    $f5,  THREAD_FPR5_LS64(\thread)
-       lwc1    $f6,  THREAD_FPR6_LS64(\thread)
-       lwc1    $f7,  THREAD_FPR7_LS64(\thread)
-       lwc1    $f8,  THREAD_FPR8_LS64(\thread)
-       lwc1    $f9,  THREAD_FPR9_LS64(\thread)
-       lwc1    $f10, THREAD_FPR10_LS64(\thread)
-       lwc1    $f11, THREAD_FPR11_LS64(\thread)
-       lwc1    $f12, THREAD_FPR12_LS64(\thread)
-       lwc1    $f13, THREAD_FPR13_LS64(\thread)
-       lwc1    $f14, THREAD_FPR14_LS64(\thread)
-       lwc1    $f15, THREAD_FPR15_LS64(\thread)
-       lwc1    $f16, THREAD_FPR16_LS64(\thread)
-       lwc1    $f17, THREAD_FPR17_LS64(\thread)
-       lwc1    $f18, THREAD_FPR18_LS64(\thread)
-       lwc1    $f19, THREAD_FPR19_LS64(\thread)
-       lwc1    $f20, THREAD_FPR20_LS64(\thread)
-       lwc1    $f21, THREAD_FPR21_LS64(\thread)
-       lwc1    $f22, THREAD_FPR22_LS64(\thread)
-       lwc1    $f23, THREAD_FPR23_LS64(\thread)
-       lwc1    $f24, THREAD_FPR24_LS64(\thread)
-       lwc1    $f25, THREAD_FPR25_LS64(\thread)
-       lwc1    $f26, THREAD_FPR26_LS64(\thread)
-       lwc1    $f27, THREAD_FPR27_LS64(\thread)
-       lwc1    $f28, THREAD_FPR28_LS64(\thread)
-       lwc1    $f29, THREAD_FPR29_LS64(\thread)
-       lwc1    $f30, THREAD_FPR30_LS64(\thread)
-       lwc1    $f31, THREAD_FPR31_LS64(\thread)
+       lwc1    $f0,  THREAD_FPR0(\thread)
+       lwc1    $f1,  THREAD_FPR1(\thread)
+       lwc1    $f2,  THREAD_FPR2(\thread)
+       lwc1    $f3,  THREAD_FPR3(\thread)
+       lwc1    $f4,  THREAD_FPR4(\thread)
+       lwc1    $f5,  THREAD_FPR5(\thread)
+       lwc1    $f6,  THREAD_FPR6(\thread)
+       lwc1    $f7,  THREAD_FPR7(\thread)
+       lwc1    $f8,  THREAD_FPR8(\thread)
+       lwc1    $f9,  THREAD_FPR9(\thread)
+       lwc1    $f10, THREAD_FPR10(\thread)
+       lwc1    $f11, THREAD_FPR11(\thread)
+       lwc1    $f12, THREAD_FPR12(\thread)
+       lwc1    $f13, THREAD_FPR13(\thread)
+       lwc1    $f14, THREAD_FPR14(\thread)
+       lwc1    $f15, THREAD_FPR15(\thread)
+       lwc1    $f16, THREAD_FPR16(\thread)
+       lwc1    $f17, THREAD_FPR17(\thread)
+       lwc1    $f18, THREAD_FPR18(\thread)
+       lwc1    $f19, THREAD_FPR19(\thread)
+       lwc1    $f20, THREAD_FPR20(\thread)
+       lwc1    $f21, THREAD_FPR21(\thread)
+       lwc1    $f22, THREAD_FPR22(\thread)
+       lwc1    $f23, THREAD_FPR23(\thread)
+       lwc1    $f24, THREAD_FPR24(\thread)
+       lwc1    $f25, THREAD_FPR25(\thread)
+       lwc1    $f26, THREAD_FPR26(\thread)
+       lwc1    $f27, THREAD_FPR27(\thread)
+       lwc1    $f28, THREAD_FPR28(\thread)
+       lwc1    $f29, THREAD_FPR29(\thread)
+       lwc1    $f30, THREAD_FPR30(\thread)
+       lwc1    $f31, THREAD_FPR31(\thread)
        ctc1    \tmp, fcr31
        .set pop
        .endm
index 0cae459..6156ac8 100644 (file)
        .set    push
        SET_HARDFLOAT
        cfc1    \tmp, fcr31
-       sdc1    $f0,  THREAD_FPR0_LS64(\thread)
-       sdc1    $f2,  THREAD_FPR2_LS64(\thread)
-       sdc1    $f4,  THREAD_FPR4_LS64(\thread)
-       sdc1    $f6,  THREAD_FPR6_LS64(\thread)
-       sdc1    $f8,  THREAD_FPR8_LS64(\thread)
-       sdc1    $f10, THREAD_FPR10_LS64(\thread)
-       sdc1    $f12, THREAD_FPR12_LS64(\thread)
-       sdc1    $f14, THREAD_FPR14_LS64(\thread)
-       sdc1    $f16, THREAD_FPR16_LS64(\thread)
-       sdc1    $f18, THREAD_FPR18_LS64(\thread)
-       sdc1    $f20, THREAD_FPR20_LS64(\thread)
-       sdc1    $f22, THREAD_FPR22_LS64(\thread)
-       sdc1    $f24, THREAD_FPR24_LS64(\thread)
-       sdc1    $f26, THREAD_FPR26_LS64(\thread)
-       sdc1    $f28, THREAD_FPR28_LS64(\thread)
-       sdc1    $f30, THREAD_FPR30_LS64(\thread)
+       sdc1    $f0,  THREAD_FPR0(\thread)
+       sdc1    $f2,  THREAD_FPR2(\thread)
+       sdc1    $f4,  THREAD_FPR4(\thread)
+       sdc1    $f6,  THREAD_FPR6(\thread)
+       sdc1    $f8,  THREAD_FPR8(\thread)
+       sdc1    $f10, THREAD_FPR10(\thread)
+       sdc1    $f12, THREAD_FPR12(\thread)
+       sdc1    $f14, THREAD_FPR14(\thread)
+       sdc1    $f16, THREAD_FPR16(\thread)
+       sdc1    $f18, THREAD_FPR18(\thread)
+       sdc1    $f20, THREAD_FPR20(\thread)
+       sdc1    $f22, THREAD_FPR22(\thread)
+       sdc1    $f24, THREAD_FPR24(\thread)
+       sdc1    $f26, THREAD_FPR26(\thread)
+       sdc1    $f28, THREAD_FPR28(\thread)
+       sdc1    $f30, THREAD_FPR30(\thread)
        sw      \tmp, THREAD_FCR31(\thread)
        .set    pop
        .endm
        .set    push
        .set    mips64r2
        SET_HARDFLOAT
-       sdc1    $f1,  THREAD_FPR1_LS64(\thread)
-       sdc1    $f3,  THREAD_FPR3_LS64(\thread)
-       sdc1    $f5,  THREAD_FPR5_LS64(\thread)
-       sdc1    $f7,  THREAD_FPR7_LS64(\thread)
-       sdc1    $f9,  THREAD_FPR9_LS64(\thread)
-       sdc1    $f11, THREAD_FPR11_LS64(\thread)
-       sdc1    $f13, THREAD_FPR13_LS64(\thread)
-       sdc1    $f15, THREAD_FPR15_LS64(\thread)
-       sdc1    $f17, THREAD_FPR17_LS64(\thread)
-       sdc1    $f19, THREAD_FPR19_LS64(\thread)
-       sdc1    $f21, THREAD_FPR21_LS64(\thread)
-       sdc1    $f23, THREAD_FPR23_LS64(\thread)
-       sdc1    $f25, THREAD_FPR25_LS64(\thread)
-       sdc1    $f27, THREAD_FPR27_LS64(\thread)
-       sdc1    $f29, THREAD_FPR29_LS64(\thread)
-       sdc1    $f31, THREAD_FPR31_LS64(\thread)
+       sdc1    $f1,  THREAD_FPR1(\thread)
+       sdc1    $f3,  THREAD_FPR3(\thread)
+       sdc1    $f5,  THREAD_FPR5(\thread)
+       sdc1    $f7,  THREAD_FPR7(\thread)
+       sdc1    $f9,  THREAD_FPR9(\thread)
+       sdc1    $f11, THREAD_FPR11(\thread)
+       sdc1    $f13, THREAD_FPR13(\thread)
+       sdc1    $f15, THREAD_FPR15(\thread)
+       sdc1    $f17, THREAD_FPR17(\thread)
+       sdc1    $f19, THREAD_FPR19(\thread)
+       sdc1    $f21, THREAD_FPR21(\thread)
+       sdc1    $f23, THREAD_FPR23(\thread)
+       sdc1    $f25, THREAD_FPR25(\thread)
+       sdc1    $f27, THREAD_FPR27(\thread)
+       sdc1    $f29, THREAD_FPR29(\thread)
+       sdc1    $f31, THREAD_FPR31(\thread)
        .set    pop
        .endm
 
        .set    push
        SET_HARDFLOAT
        lw      \tmp, THREAD_FCR31(\thread)
-       ldc1    $f0,  THREAD_FPR0_LS64(\thread)
-       ldc1    $f2,  THREAD_FPR2_LS64(\thread)
-       ldc1    $f4,  THREAD_FPR4_LS64(\thread)
-       ldc1    $f6,  THREAD_FPR6_LS64(\thread)
-       ldc1    $f8,  THREAD_FPR8_LS64(\thread)
-       ldc1    $f10, THREAD_FPR10_LS64(\thread)
-       ldc1    $f12, THREAD_FPR12_LS64(\thread)
-       ldc1    $f14, THREAD_FPR14_LS64(\thread)
-       ldc1    $f16, THREAD_FPR16_LS64(\thread)
-       ldc1    $f18, THREAD_FPR18_LS64(\thread)
-       ldc1    $f20, THREAD_FPR20_LS64(\thread)
-       ldc1    $f22, THREAD_FPR22_LS64(\thread)
-       ldc1    $f24, THREAD_FPR24_LS64(\thread)
-       ldc1    $f26, THREAD_FPR26_LS64(\thread)
-       ldc1    $f28, THREAD_FPR28_LS64(\thread)
-       ldc1    $f30, THREAD_FPR30_LS64(\thread)
+       ldc1    $f0,  THREAD_FPR0(\thread)
+       ldc1    $f2,  THREAD_FPR2(\thread)
+       ldc1    $f4,  THREAD_FPR4(\thread)
+       ldc1    $f6,  THREAD_FPR6(\thread)
+       ldc1    $f8,  THREAD_FPR8(\thread)
+       ldc1    $f10, THREAD_FPR10(\thread)
+       ldc1    $f12, THREAD_FPR12(\thread)
+       ldc1    $f14, THREAD_FPR14(\thread)
+       ldc1    $f16, THREAD_FPR16(\thread)
+       ldc1    $f18, THREAD_FPR18(\thread)
+       ldc1    $f20, THREAD_FPR20(\thread)
+       ldc1    $f22, THREAD_FPR22(\thread)
+       ldc1    $f24, THREAD_FPR24(\thread)
+       ldc1    $f26, THREAD_FPR26(\thread)
+       ldc1    $f28, THREAD_FPR28(\thread)
+       ldc1    $f30, THREAD_FPR30(\thread)
        ctc1    \tmp, fcr31
        .endm
 
        .set    push
        .set    mips64r2
        SET_HARDFLOAT
-       ldc1    $f1,  THREAD_FPR1_LS64(\thread)
-       ldc1    $f3,  THREAD_FPR3_LS64(\thread)
-       ldc1    $f5,  THREAD_FPR5_LS64(\thread)
-       ldc1    $f7,  THREAD_FPR7_LS64(\thread)
-       ldc1    $f9,  THREAD_FPR9_LS64(\thread)
-       ldc1    $f11, THREAD_FPR11_LS64(\thread)
-       ldc1    $f13, THREAD_FPR13_LS64(\thread)
-       ldc1    $f15, THREAD_FPR15_LS64(\thread)
-       ldc1    $f17, THREAD_FPR17_LS64(\thread)
-       ldc1    $f19, THREAD_FPR19_LS64(\thread)
-       ldc1    $f21, THREAD_FPR21_LS64(\thread)
-       ldc1    $f23, THREAD_FPR23_LS64(\thread)
-       ldc1    $f25, THREAD_FPR25_LS64(\thread)
-       ldc1    $f27, THREAD_FPR27_LS64(\thread)
-       ldc1    $f29, THREAD_FPR29_LS64(\thread)
-       ldc1    $f31, THREAD_FPR31_LS64(\thread)
+       ldc1    $f1,  THREAD_FPR1(\thread)
+       ldc1    $f3,  THREAD_FPR3(\thread)
+       ldc1    $f5,  THREAD_FPR5(\thread)
+       ldc1    $f7,  THREAD_FPR7(\thread)
+       ldc1    $f9,  THREAD_FPR9(\thread)
+       ldc1    $f11, THREAD_FPR11(\thread)
+       ldc1    $f13, THREAD_FPR13(\thread)
+       ldc1    $f15, THREAD_FPR15(\thread)
+       ldc1    $f17, THREAD_FPR17(\thread)
+       ldc1    $f19, THREAD_FPR19(\thread)
+       ldc1    $f21, THREAD_FPR21(\thread)
+       ldc1    $f23, THREAD_FPR23(\thread)
+       ldc1    $f25, THREAD_FPR25(\thread)
+       ldc1    $f27, THREAD_FPR27(\thread)
+       ldc1    $f29, THREAD_FPR29(\thread)
+       ldc1    $f31, THREAD_FPR31(\thread)
        .set    pop
        .endm
 
        .endm
 
 #ifdef TOOLCHAIN_SUPPORTS_MSA
+       .macro  _cfcmsa rd, cs
+       .set    push
+       .set    mips32r2
+       .set    msa
+       cfcmsa  \rd, $\cs
+       .set    pop
+       .endm
+
+       .macro  _ctcmsa cd, rs
+       .set    push
+       .set    mips32r2
+       .set    msa
+       ctcmsa  $\cd, \rs
+       .set    pop
+       .endm
+
        .macro  ld_d    wd, off, base
        .set    push
        .set    mips32r2
        .set    pop
        .endm
 
-       .macro  copy_u_w        rd, ws, n
+       .macro  copy_u_w        ws, n
        .set    push
        .set    mips32r2
        .set    msa
-       copy_u.w \rd, $w\ws[\n]
+       copy_u.w $1, $w\ws[\n]
        .set    pop
        .endm
 
-       .macro  copy_u_d        rd, ws, n
+       .macro  copy_u_d        ws, n
        .set    push
        .set    mips64r2
        .set    msa
-       copy_u.d \rd, $w\ws[\n]
+       copy_u.d $1, $w\ws[\n]
        .set    pop
        .endm
 
-       .macro  insert_w        wd, n, rs
+       .macro  insert_w        wd, n
        .set    push
        .set    mips32r2
        .set    msa
-       insert.w $w\wd[\n], \rs
+       insert.w $w\wd[\n], $1
        .set    pop
        .endm
 
-       .macro  insert_d        wd, n, rs
+       .macro  insert_d        wd, n
        .set    push
        .set    mips64r2
        .set    msa
-       insert.d $w\wd[\n], \rs
+       insert.d $w\wd[\n], $1
        .set    pop
        .endm
 #else
        /*
         * Temporary until all toolchains in use include MSA support.
         */
-       .macro  cfcmsa  rd, cs
+       .macro  _cfcmsa rd, cs
        .set    push
        .set    noat
        SET_HARDFLOAT
        .set    pop
        .endm
 
-       .macro  ctcmsa  cd, rs
+       .macro  _ctcmsa cd, rs
        .set    push
        .set    noat
        SET_HARDFLOAT
        .set    pop
        .endm
 
-       .macro  copy_u_w        rd, ws, n
+       .macro  copy_u_w        ws, n
        .set    push
        .set    noat
        SET_HARDFLOAT
        .insn
        .word   COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
-       /* move triggers an assembler bug... */
-       or      \rd, $1, zero
        .set    pop
        .endm
 
-       .macro  copy_u_d        rd, ws, n
+       .macro  copy_u_d        ws, n
        .set    push
        .set    noat
        SET_HARDFLOAT
        .insn
        .word   COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
-       /* move triggers an assembler bug... */
-       or      \rd, $1, zero
        .set    pop
        .endm
 
-       .macro  insert_w        wd, n, rs
+       .macro  insert_w        wd, n
        .set    push
        .set    noat
        SET_HARDFLOAT
-       /* move triggers an assembler bug... */
-       or      $1, \rs, zero
        .word   INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6)
        .set    pop
        .endm
 
-       .macro  insert_d        wd, n, rs
+       .macro  insert_d        wd, n
        .set    push
        .set    noat
        SET_HARDFLOAT
-       /* move triggers an assembler bug... */
-       or      $1, \rs, zero
        .word   INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6)
        .set    pop
        .endm
        .set    push
        .set    noat
        SET_HARDFLOAT
-       cfcmsa  $1, MSA_CSR
+       _cfcmsa $1, MSA_CSR
        sw      $1, THREAD_MSA_CSR(\thread)
        .set    pop
        .endm
        .set    noat
        SET_HARDFLOAT
        lw      $1, THREAD_MSA_CSR(\thread)
-       ctcmsa  MSA_CSR, $1
+       _ctcmsa MSA_CSR, $1
        .set    pop
        ld_d    0, THREAD_FPR0, \thread
        ld_d    1, THREAD_FPR1, \thread
        insert_w \wd, 2
        insert_w \wd, 3
 #endif
-       .if     31-\wd
-       msa_init_upper  (\wd+1)
-       .endif
        .endm
 
        .macro  msa_init_all_upper
        SET_HARDFLOAT
        not     $1, zero
        msa_init_upper  0
+       msa_init_upper  1
+       msa_init_upper  2
+       msa_init_upper  3
+       msa_init_upper  4
+       msa_init_upper  5
+       msa_init_upper  6
+       msa_init_upper  7
+       msa_init_upper  8
+       msa_init_upper  9
+       msa_init_upper  10
+       msa_init_upper  11
+       msa_init_upper  12
+       msa_init_upper  13
+       msa_init_upper  14
+       msa_init_upper  15
+       msa_init_upper  16
+       msa_init_upper  17
+       msa_init_upper  18
+       msa_init_upper  19
+       msa_init_upper  20
+       msa_init_upper  21
+       msa_init_upper  22
+       msa_init_upper  23
+       msa_init_upper  24
+       msa_init_upper  25
+       msa_init_upper  26
+       msa_init_upper  27
+       msa_init_upper  28
+       msa_init_upper  29
+       msa_init_upper  30
+       msa_init_upper  31
        .set    pop
        .endm
 
index dd083e9..b104ad9 100644 (file)
@@ -48,6 +48,12 @@ enum fpu_mode {
 #define FPU_FR_MASK            0x1
 };
 
+#define __disable_fpu()                                                        \
+do {                                                                   \
+       clear_c0_status(ST0_CU1);                                       \
+       disable_fpu_hazard();                                           \
+} while (0)
+
 static inline int __enable_fpu(enum fpu_mode mode)
 {
        int fr;
@@ -86,7 +92,12 @@ fr_common:
                enable_fpu_hazard();
 
                /* check FR has the desired value */
-               return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE;
+               if (!!(read_c0_status() & ST0_FR) == !!fr)
+                       return 0;
+
+               /* unsupported FR value */
+               __disable_fpu();
+               return SIGFPE;
 
        default:
                BUG();
@@ -95,12 +106,6 @@ fr_common:
        return SIGFPE;
 }
 
-#define __disable_fpu()                                                        \
-do {                                                                   \
-       clear_c0_status(ST0_CU1);                                       \
-       disable_fpu_hazard();                                           \
-} while (0)
-
 #define clear_fpu_owner()      clear_thread_flag(TIF_USEDFPU)
 
 static inline int __is_fpu_owner(void)
@@ -170,6 +175,7 @@ static inline void lose_fpu(int save)
                }
                disable_msa();
                clear_thread_flag(TIF_USEDMSA);
+               __disable_fpu();
        } else if (is_fpu_owner()) {
                if (save)
                        _save_fp(current);
index fdbff44..608aa57 100644 (file)
@@ -8,9 +8,9 @@
 #ifndef _ASM_MIPS_JUMP_LABEL_H
 #define _ASM_MIPS_JUMP_LABEL_H
 
-#include <linux/types.h>
+#ifndef __ASSEMBLY__
 
-#ifdef __KERNEL__
+#include <linux/types.h>
 
 #define JUMP_LABEL_NOP_SIZE 4
 
@@ -39,8 +39,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 #ifdef CONFIG_64BIT
 typedef u64 jump_label_t;
 #else
@@ -53,4 +51,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif /* _ASM_MIPS_JUMP_LABEL_H */
index 6a9af5f..cba22ab 100644 (file)
@@ -10,7 +10,8 @@ enum die_val {
        DIE_RI,
        DIE_PAGE_FAULT,
        DIE_BREAK,
-       DIE_SSTEPBP
+       DIE_SSTEPBP,
+       DIE_MSAFP
 };
 
 #endif /* _ASM_MIPS_KDEBUG_H */
index ac4fc71..4c25823 100644 (file)
 
 /* MIPS KVM register ids */
 #define MIPS_CP0_32(_R, _S)                                    \
-       (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
+       (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S)))
 
 #define MIPS_CP0_64(_R, _S)                                    \
-       (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
+       (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U64 | (8 * (_R) + (_S)))
 
 #define KVM_REG_MIPS_CP0_INDEX         MIPS_CP0_32(0, 0)
 #define KVM_REG_MIPS_CP0_ENTRYLO0      MIPS_CP0_64(2, 0)
 #define KVM_REG_MIPS_CP0_STATUS                MIPS_CP0_32(12, 0)
 #define KVM_REG_MIPS_CP0_CAUSE         MIPS_CP0_32(13, 0)
 #define KVM_REG_MIPS_CP0_EPC           MIPS_CP0_64(14, 0)
+#define KVM_REG_MIPS_CP0_PRID          MIPS_CP0_32(15, 0)
 #define KVM_REG_MIPS_CP0_EBASE         MIPS_CP0_64(15, 1)
 #define KVM_REG_MIPS_CP0_CONFIG                MIPS_CP0_32(16, 0)
 #define KVM_REG_MIPS_CP0_CONFIG1       MIPS_CP0_32(16, 1)
 #define KVM_REG_MIPS_CP0_CONFIG2       MIPS_CP0_32(16, 2)
 #define KVM_REG_MIPS_CP0_CONFIG3       MIPS_CP0_32(16, 3)
+#define KVM_REG_MIPS_CP0_CONFIG4       MIPS_CP0_32(16, 4)
+#define KVM_REG_MIPS_CP0_CONFIG5       MIPS_CP0_32(16, 5)
 #define KVM_REG_MIPS_CP0_CONFIG7       MIPS_CP0_32(16, 7)
 #define KVM_REG_MIPS_CP0_XCONTEXT      MIPS_CP0_64(20, 0)
 #define KVM_REG_MIPS_CP0_ERROREPC      MIPS_CP0_64(30, 0)
@@ -119,6 +122,10 @@ struct kvm_vcpu_stat {
        u32 syscall_exits;
        u32 resvd_inst_exits;
        u32 break_inst_exits;
+       u32 trap_inst_exits;
+       u32 msa_fpe_exits;
+       u32 fpe_exits;
+       u32 msa_disabled_exits;
        u32 flush_dcache_exits;
        u32 halt_successful_poll;
        u32 halt_wakeup;
@@ -138,6 +145,10 @@ enum kvm_mips_exit_types {
        SYSCALL_EXITS,
        RESVD_INST_EXITS,
        BREAK_INST_EXITS,
+       TRAP_INST_EXITS,
+       MSA_FPE_EXITS,
+       FPE_EXITS,
+       MSA_DISABLED_EXITS,
        FLUSH_DCACHE_EXITS,
        MAX_KVM_MIPS_EXIT_TYPES
 };
@@ -206,6 +217,8 @@ struct mips_coproc {
 #define MIPS_CP0_CONFIG1_SEL   1
 #define MIPS_CP0_CONFIG2_SEL   2
 #define MIPS_CP0_CONFIG3_SEL   3
+#define MIPS_CP0_CONFIG4_SEL   4
+#define MIPS_CP0_CONFIG5_SEL   5
 
 /* Config0 register bits */
 #define CP0C0_M                        31
@@ -262,31 +275,6 @@ struct mips_coproc {
 #define CP0C3_SM               1
 #define CP0C3_TL               0
 
-/* Have config1, Cacheable, noncoherent, write-back, write allocate*/
-#define MIPS_CONFIG0                                           \
-  ((1 << CP0C0_M) | (0x3 << CP0C0_K0))
-
-/* Have config2, no coprocessor2 attached, no MDMX support attached,
-   no performance counters, watch registers present,
-   no code compression, EJTAG present, no FPU, no watch registers */
-#define MIPS_CONFIG1                                           \
-((1 << CP0C1_M) |                                              \
- (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) |         \
- (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) |         \
- (0 << CP0C1_FP))
-
-/* Have config3, no tertiary/secondary caches implemented */
-#define MIPS_CONFIG2                                           \
-((1 << CP0C2_M))
-
-/* No config4, no DSP ASE, no large physaddr (PABITS),
-   no external interrupt controller, no vectored interrupts,
-   no 1kb pages, no SmartMIPS ASE, no trace logic */
-#define MIPS_CONFIG3                                           \
-((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) |       \
- (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) |     \
- (0 << CP0C3_SM) | (0 << CP0C3_TL))
-
 /* MMU types, the first four entries have the same layout as the
    CP0C0_MT field.  */
 enum mips_mmu_types {
@@ -321,7 +309,9 @@ enum mips_mmu_types {
  */
 #define T_TRAP                 13      /* Trap instruction */
 #define T_VCEI                 14      /* Virtual coherency exception */
+#define T_MSAFPE               14      /* MSA floating point exception */
 #define T_FPE                  15      /* Floating point exception */
+#define T_MSADIS               21      /* MSA disabled exception */
 #define T_WATCH                        23      /* Watch address reference */
 #define T_VCED                 31      /* Virtual coherency data */
 
@@ -374,6 +364,9 @@ struct kvm_mips_tlb {
        long tlb_lo1;
 };
 
+#define KVM_MIPS_FPU_FPU       0x1
+#define KVM_MIPS_FPU_MSA       0x2
+
 #define KVM_MIPS_GUEST_TLB_SIZE        64
 struct kvm_vcpu_arch {
        void *host_ebase, *guest_ebase;
@@ -395,6 +388,8 @@ struct kvm_vcpu_arch {
 
        /* FPU State */
        struct mips_fpu_struct fpu;
+       /* Which FPU state is loaded (KVM_MIPS_FPU_*) */
+       unsigned int fpu_inuse;
 
        /* COP0 State */
        struct mips_coproc *cop0;
@@ -441,6 +436,9 @@ struct kvm_vcpu_arch {
 
        /* WAIT executed */
        int wait;
+
+       u8 fpu_enabled;
+       u8 msa_enabled;
 };
 
 
@@ -482,11 +480,15 @@ struct kvm_vcpu_arch {
 #define kvm_read_c0_guest_config1(cop0)                (cop0->reg[MIPS_CP0_CONFIG][1])
 #define kvm_read_c0_guest_config2(cop0)                (cop0->reg[MIPS_CP0_CONFIG][2])
 #define kvm_read_c0_guest_config3(cop0)                (cop0->reg[MIPS_CP0_CONFIG][3])
+#define kvm_read_c0_guest_config4(cop0)                (cop0->reg[MIPS_CP0_CONFIG][4])
+#define kvm_read_c0_guest_config5(cop0)                (cop0->reg[MIPS_CP0_CONFIG][5])
 #define kvm_read_c0_guest_config7(cop0)                (cop0->reg[MIPS_CP0_CONFIG][7])
 #define kvm_write_c0_guest_config(cop0, val)   (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
 #define kvm_write_c0_guest_config1(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
 #define kvm_write_c0_guest_config2(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
 #define kvm_write_c0_guest_config3(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
+#define kvm_write_c0_guest_config4(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][4] = (val))
+#define kvm_write_c0_guest_config5(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][5] = (val))
 #define kvm_write_c0_guest_config7(cop0, val)  (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
 #define kvm_read_c0_guest_errorepc(cop0)       (cop0->reg[MIPS_CP0_ERROR_PC][0])
 #define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
@@ -567,6 +569,31 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
        kvm_set_c0_guest_ebase(cop0, ((val) & (change)));               \
 }
 
+/* Helpers */
+
+static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
+{
+       return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) &&
+               vcpu->fpu_enabled;
+}
+
+static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu)
+{
+       return kvm_mips_guest_can_have_fpu(vcpu) &&
+               kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP;
+}
+
+static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu)
+{
+       return (!__builtin_constant_p(cpu_has_msa) || cpu_has_msa) &&
+               vcpu->msa_enabled;
+}
+
+static inline bool kvm_mips_guest_has_msa(struct kvm_vcpu_arch *vcpu)
+{
+       return kvm_mips_guest_can_have_msa(vcpu) &&
+               kvm_read_c0_guest_config3(vcpu->cop0) & MIPS_CONF3_MSA;
+}
 
 struct kvm_mips_callbacks {
        int (*handle_cop_unusable)(struct kvm_vcpu *vcpu);
@@ -578,6 +605,10 @@ struct kvm_mips_callbacks {
        int (*handle_syscall)(struct kvm_vcpu *vcpu);
        int (*handle_res_inst)(struct kvm_vcpu *vcpu);
        int (*handle_break)(struct kvm_vcpu *vcpu);
+       int (*handle_trap)(struct kvm_vcpu *vcpu);
+       int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
+       int (*handle_fpe)(struct kvm_vcpu *vcpu);
+       int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
        int (*vm_init)(struct kvm *kvm);
        int (*vcpu_init)(struct kvm_vcpu *vcpu);
        int (*vcpu_setup)(struct kvm_vcpu *vcpu);
@@ -596,6 +627,8 @@ struct kvm_mips_callbacks {
                           const struct kvm_one_reg *reg, s64 *v);
        int (*set_one_reg)(struct kvm_vcpu *vcpu,
                           const struct kvm_one_reg *reg, s64 v);
+       int (*vcpu_get_regs)(struct kvm_vcpu *vcpu);
+       int (*vcpu_set_regs)(struct kvm_vcpu *vcpu);
 };
 extern struct kvm_mips_callbacks *kvm_mips_callbacks;
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@@ -606,6 +639,19 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
 /* Trampoline ASM routine to start running in "Guest" context */
 extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
 
+/* FPU/MSA context management */
+void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fcsr(struct kvm_vcpu_arch *vcpu);
+void __kvm_save_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa_upper(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msacsr(struct kvm_vcpu_arch *vcpu);
+void kvm_own_fpu(struct kvm_vcpu *vcpu);
+void kvm_own_msa(struct kvm_vcpu *vcpu);
+void kvm_drop_fpu(struct kvm_vcpu *vcpu);
+void kvm_lose_fpu(struct kvm_vcpu *vcpu);
+
 /* TLB handling */
 uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu);
 
@@ -711,6 +757,26 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
                                                     struct kvm_run *run,
                                                     struct kvm_vcpu *vcpu);
 
+extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+                                                      uint32_t *opc,
+                                                      struct kvm_run *run,
+                                                      struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+                                                        uint32_t *opc,
+                                                        struct kvm_run *run,
+                                                        struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+                                                     uint32_t *opc,
+                                                     struct kvm_run *run,
+                                                     struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+                                                        uint32_t *opc,
+                                                        struct kvm_run *run,
+                                                        struct kvm_vcpu *vcpu);
+
 extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
                                                         struct kvm_run *run);
 
@@ -749,6 +815,11 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst,
                                            struct kvm_run *run,
                                            struct kvm_vcpu *vcpu);
 
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu);
+
 /* Dynamic binary translation */
 extern int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
                                      struct kvm_vcpu *vcpu);
index b5dcbee..9b3b48e 100644 (file)
@@ -105,7 +105,7 @@ union fpureg {
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
 # define FPR_IDX(width, idx)   (idx)
 #else
-# define FPR_IDX(width, idx)   ((FPU_REG_WIDTH / (width)) - 1 - (idx))
+# define FPR_IDX(width, idx)   ((idx) ^ ((64 / (width)) - 1))
 #endif
 
 #define BUILD_FPR_ACCESS(width) \
index 2c04b6d..6985eb5 100644 (file)
@@ -36,77 +36,85 @@ struct kvm_regs {
 
 /*
  * for KVM_GET_FPU and KVM_SET_FPU
- *
- * If Status[FR] is zero (32-bit FPU), the upper 32-bits of the FPRs
- * are zero filled.
  */
 struct kvm_fpu {
-       __u64 fpr[32];
-       __u32 fir;
-       __u32 fccr;
-       __u32 fexr;
-       __u32 fenr;
-       __u32 fcsr;
-       __u32 pad;
 };
 
 
 /*
- * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access CP0
+ * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various
  * registers.  The id field is broken down as follows:
  *
- *  bits[2..0]   - Register 'sel' index.
- *  bits[7..3]   - Register 'rd'  index.
- *  bits[15..8]  - Must be zero.
- *  bits[31..16] - 1 -> CP0 registers.
- *  bits[51..32] - Must be zero.
  *  bits[63..52] - As per linux/kvm.h
+ *  bits[51..32] - Must be zero.
+ *  bits[31..16] - Register set.
+ *
+ * Register set = 0: GP registers from kvm_regs (see definitions below).
+ *
+ * Register set = 1: CP0 registers.
+ *  bits[15..8]  - Must be zero.
+ *  bits[7..3]   - Register 'rd'  index.
+ *  bits[2..0]   - Register 'sel' index.
+ *
+ * Register set = 2: KVM specific registers (see definitions below).
+ *
+ * Register set = 3: FPU / MSA registers (see definitions below).
  *
  * Other sets registers may be added in the future.  Each set would
  * have its own identifier in bits[31..16].
- *
- * The registers defined in struct kvm_regs are also accessible, the
- * id values for these are below.
  */
 
-#define KVM_REG_MIPS_R0 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0)
-#define KVM_REG_MIPS_R1 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 1)
-#define KVM_REG_MIPS_R2 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 2)
-#define KVM_REG_MIPS_R3 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 3)
-#define KVM_REG_MIPS_R4 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 4)
-#define KVM_REG_MIPS_R5 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 5)
-#define KVM_REG_MIPS_R6 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 6)
-#define KVM_REG_MIPS_R7 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 7)
-#define KVM_REG_MIPS_R8 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 8)
-#define KVM_REG_MIPS_R9 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 9)
-#define KVM_REG_MIPS_R10 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 10)
-#define KVM_REG_MIPS_R11 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 11)
-#define KVM_REG_MIPS_R12 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 12)
-#define KVM_REG_MIPS_R13 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 13)
-#define KVM_REG_MIPS_R14 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 14)
-#define KVM_REG_MIPS_R15 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 15)
-#define KVM_REG_MIPS_R16 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 16)
-#define KVM_REG_MIPS_R17 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 17)
-#define KVM_REG_MIPS_R18 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 18)
-#define KVM_REG_MIPS_R19 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 19)
-#define KVM_REG_MIPS_R20 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 20)
-#define KVM_REG_MIPS_R21 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 21)
-#define KVM_REG_MIPS_R22 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 22)
-#define KVM_REG_MIPS_R23 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 23)
-#define KVM_REG_MIPS_R24 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 24)
-#define KVM_REG_MIPS_R25 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 25)
-#define KVM_REG_MIPS_R26 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 26)
-#define KVM_REG_MIPS_R27 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 27)
-#define KVM_REG_MIPS_R28 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 28)
-#define KVM_REG_MIPS_R29 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 29)
-#define KVM_REG_MIPS_R30 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 30)
-#define KVM_REG_MIPS_R31 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 31)
-
-#define KVM_REG_MIPS_HI (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 32)
-#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33)
-#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34)
-
-/* KVM specific control registers */
+#define KVM_REG_MIPS_GP                (KVM_REG_MIPS | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_CP0       (KVM_REG_MIPS | 0x0000000000010000ULL)
+#define KVM_REG_MIPS_KVM       (KVM_REG_MIPS | 0x0000000000020000ULL)
+#define KVM_REG_MIPS_FPU       (KVM_REG_MIPS | 0x0000000000030000ULL)
+
+
+/*
+ * KVM_REG_MIPS_GP - General purpose registers from kvm_regs.
+ */
+
+#define KVM_REG_MIPS_R0                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  0)
+#define KVM_REG_MIPS_R1                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  1)
+#define KVM_REG_MIPS_R2                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  2)
+#define KVM_REG_MIPS_R3                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  3)
+#define KVM_REG_MIPS_R4                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  4)
+#define KVM_REG_MIPS_R5                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  5)
+#define KVM_REG_MIPS_R6                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  6)
+#define KVM_REG_MIPS_R7                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  7)
+#define KVM_REG_MIPS_R8                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  8)
+#define KVM_REG_MIPS_R9                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 |  9)
+#define KVM_REG_MIPS_R10       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 10)
+#define KVM_REG_MIPS_R11       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 11)
+#define KVM_REG_MIPS_R12       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 12)
+#define KVM_REG_MIPS_R13       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 13)
+#define KVM_REG_MIPS_R14       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 14)
+#define KVM_REG_MIPS_R15       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 15)
+#define KVM_REG_MIPS_R16       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 16)
+#define KVM_REG_MIPS_R17       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 17)
+#define KVM_REG_MIPS_R18       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 18)
+#define KVM_REG_MIPS_R19       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 19)
+#define KVM_REG_MIPS_R20       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 20)
+#define KVM_REG_MIPS_R21       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 21)
+#define KVM_REG_MIPS_R22       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 22)
+#define KVM_REG_MIPS_R23       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 23)
+#define KVM_REG_MIPS_R24       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 24)
+#define KVM_REG_MIPS_R25       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 25)
+#define KVM_REG_MIPS_R26       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 26)
+#define KVM_REG_MIPS_R27       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 27)
+#define KVM_REG_MIPS_R28       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 28)
+#define KVM_REG_MIPS_R29       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 29)
+#define KVM_REG_MIPS_R30       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 30)
+#define KVM_REG_MIPS_R31       (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 31)
+
+#define KVM_REG_MIPS_HI                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 32)
+#define KVM_REG_MIPS_LO                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 33)
+#define KVM_REG_MIPS_PC                (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 34)
+
+
+/*
+ * KVM_REG_MIPS_KVM - KVM specific control registers.
+ */
 
 /*
  * CP0_Count control
@@ -118,8 +126,7 @@ struct kvm_fpu {
  *        safely without losing time or guest timer interrupts.
  * Other: Reserved, do not change.
  */
-#define KVM_REG_MIPS_COUNT_CTL         (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-                                        0x20000 | 0)
+#define KVM_REG_MIPS_COUNT_CTL     (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 0)
 #define KVM_REG_MIPS_COUNT_CTL_DC      0x00000001
 
 /*
@@ -131,15 +138,46 @@ struct kvm_fpu {
  * emulated.
  * Modifications to times in the future are rejected.
  */
-#define KVM_REG_MIPS_COUNT_RESUME      (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-                                        0x20000 | 1)
+#define KVM_REG_MIPS_COUNT_RESUME   (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 1)
 /*
  * CP0_Count rate in Hz
  * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
  * discontinuities in CP0_Count.
  */
-#define KVM_REG_MIPS_COUNT_HZ          (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
-                                        0x20000 | 2)
+#define KVM_REG_MIPS_COUNT_HZ      (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 2)
+
+
+/*
+ * KVM_REG_MIPS_FPU - Floating Point and MIPS SIMD Architecture (MSA) registers.
+ *
+ *  bits[15..8]  - Register subset (see definitions below).
+ *  bits[7..5]   - Must be zero.
+ *  bits[4..0]   - Register number within register subset.
+ */
+
+#define KVM_REG_MIPS_FPR       (KVM_REG_MIPS_FPU | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_FCR       (KVM_REG_MIPS_FPU | 0x0000000000000100ULL)
+#define KVM_REG_MIPS_MSACR     (KVM_REG_MIPS_FPU | 0x0000000000000200ULL)
+
+/*
+ * KVM_REG_MIPS_FPR - Floating point / Vector registers.
+ */
+#define KVM_REG_MIPS_FPR_32(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32  | (n))
+#define KVM_REG_MIPS_FPR_64(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64  | (n))
+#define KVM_REG_MIPS_VEC_128(n)        (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U128 | (n))
+
+/*
+ * KVM_REG_MIPS_FCR - Floating point control registers.
+ */
+#define KVM_REG_MIPS_FCR_IR    (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_FCR_CSR   (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31)
+
+/*
+ * KVM_REG_MIPS_MSACR - MIPS SIMD Architecture (MSA) control registers.
+ */
+#define KVM_REG_MIPS_MSA_IR     (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  0)
+#define KVM_REG_MIPS_MSA_CSR    (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 |  1)
+
 
 /*
  * KVM MIPS specific structures and definitions
index 750d67a..e59fd7c 100644 (file)
@@ -167,72 +167,6 @@ void output_thread_fpu_defines(void)
        OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]);
        OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]);
 
-       /* the least significant 64 bits of each FP register */
-       OFFSET(THREAD_FPR0_LS64, task_struct,
-              thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR1_LS64, task_struct,
-              thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR2_LS64, task_struct,
-              thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR3_LS64, task_struct,
-              thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR4_LS64, task_struct,
-              thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR5_LS64, task_struct,
-              thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR6_LS64, task_struct,
-              thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR7_LS64, task_struct,
-              thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR8_LS64, task_struct,
-              thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR9_LS64, task_struct,
-              thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR10_LS64, task_struct,
-              thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR11_LS64, task_struct,
-              thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR12_LS64, task_struct,
-              thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR13_LS64, task_struct,
-              thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR14_LS64, task_struct,
-              thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR15_LS64, task_struct,
-              thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR16_LS64, task_struct,
-              thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR17_LS64, task_struct,
-              thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR18_LS64, task_struct,
-              thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR19_LS64, task_struct,
-              thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR20_LS64, task_struct,
-              thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR21_LS64, task_struct,
-              thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR22_LS64, task_struct,
-              thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR23_LS64, task_struct,
-              thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR24_LS64, task_struct,
-              thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR25_LS64, task_struct,
-              thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR26_LS64, task_struct,
-              thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR27_LS64, task_struct,
-              thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR28_LS64, task_struct,
-              thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR29_LS64, task_struct,
-              thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR30_LS64, task_struct,
-              thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]);
-       OFFSET(THREAD_FPR31_LS64, task_struct,
-              thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]);
-
        OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31);
        OFFSET(THREAD_MSA_CSR, task_struct, thread.fpu.msacsr);
        BLANK();
@@ -470,6 +404,45 @@ void output_kvm_defines(void)
        OFFSET(VCPU_LO, kvm_vcpu_arch, lo);
        OFFSET(VCPU_HI, kvm_vcpu_arch, hi);
        OFFSET(VCPU_PC, kvm_vcpu_arch, pc);
+       BLANK();
+
+       OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]);
+       OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]);
+       OFFSET(VCPU_FPR2, kvm_vcpu_arch, fpu.fpr[2]);
+       OFFSET(VCPU_FPR3, kvm_vcpu_arch, fpu.fpr[3]);
+       OFFSET(VCPU_FPR4, kvm_vcpu_arch, fpu.fpr[4]);
+       OFFSET(VCPU_FPR5, kvm_vcpu_arch, fpu.fpr[5]);
+       OFFSET(VCPU_FPR6, kvm_vcpu_arch, fpu.fpr[6]);
+       OFFSET(VCPU_FPR7, kvm_vcpu_arch, fpu.fpr[7]);
+       OFFSET(VCPU_FPR8, kvm_vcpu_arch, fpu.fpr[8]);
+       OFFSET(VCPU_FPR9, kvm_vcpu_arch, fpu.fpr[9]);
+       OFFSET(VCPU_FPR10, kvm_vcpu_arch, fpu.fpr[10]);
+       OFFSET(VCPU_FPR11, kvm_vcpu_arch, fpu.fpr[11]);
+       OFFSET(VCPU_FPR12, kvm_vcpu_arch, fpu.fpr[12]);
+       OFFSET(VCPU_FPR13, kvm_vcpu_arch, fpu.fpr[13]);
+       OFFSET(VCPU_FPR14, kvm_vcpu_arch, fpu.fpr[14]);
+       OFFSET(VCPU_FPR15, kvm_vcpu_arch, fpu.fpr[15]);
+       OFFSET(VCPU_FPR16, kvm_vcpu_arch, fpu.fpr[16]);
+       OFFSET(VCPU_FPR17, kvm_vcpu_arch, fpu.fpr[17]);
+       OFFSET(VCPU_FPR18, kvm_vcpu_arch, fpu.fpr[18]);
+       OFFSET(VCPU_FPR19, kvm_vcpu_arch, fpu.fpr[19]);
+       OFFSET(VCPU_FPR20, kvm_vcpu_arch, fpu.fpr[20]);
+       OFFSET(VCPU_FPR21, kvm_vcpu_arch, fpu.fpr[21]);
+       OFFSET(VCPU_FPR22, kvm_vcpu_arch, fpu.fpr[22]);
+       OFFSET(VCPU_FPR23, kvm_vcpu_arch, fpu.fpr[23]);
+       OFFSET(VCPU_FPR24, kvm_vcpu_arch, fpu.fpr[24]);
+       OFFSET(VCPU_FPR25, kvm_vcpu_arch, fpu.fpr[25]);
+       OFFSET(VCPU_FPR26, kvm_vcpu_arch, fpu.fpr[26]);
+       OFFSET(VCPU_FPR27, kvm_vcpu_arch, fpu.fpr[27]);
+       OFFSET(VCPU_FPR28, kvm_vcpu_arch, fpu.fpr[28]);
+       OFFSET(VCPU_FPR29, kvm_vcpu_arch, fpu.fpr[29]);
+       OFFSET(VCPU_FPR30, kvm_vcpu_arch, fpu.fpr[30]);
+       OFFSET(VCPU_FPR31, kvm_vcpu_arch, fpu.fpr[31]);
+
+       OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31);
+       OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr);
+       BLANK();
+
        OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0);
        OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid);
        OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid);
index 2ebaabe..af42e70 100644 (file)
@@ -360,12 +360,15 @@ NESTED(nmi_handler, PT_SIZE, sp)
        .set    mips1
        SET_HARDFLOAT
        cfc1    a1, fcr31
-       li      a2, ~(0x3f << 12)
-       and     a2, a1
-       ctc1    a2, fcr31
        .set    pop
-       TRACE_IRQS_ON
-       STI
+       CLI
+       TRACE_IRQS_OFF
+       .endm
+
+       .macro  __build_clear_msa_fpe
+       _cfcmsa a1, MSA_CSR
+       CLI
+       TRACE_IRQS_OFF
        .endm
 
        .macro  __build_clear_ade
@@ -426,7 +429,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
        BUILD_HANDLER cpu cpu sti silent                /* #11 */
        BUILD_HANDLER ov ov sti silent                  /* #12 */
        BUILD_HANDLER tr tr sti silent                  /* #13 */
-       BUILD_HANDLER msa_fpe msa_fpe sti silent        /* #14 */
+       BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent    /* #14 */
        BUILD_HANDLER fpe fpe fpe silent                /* #15 */
        BUILD_HANDLER ftlb ftlb none silent             /* #16 */
        BUILD_HANDLER msa msa sti silent                /* #21 */
index 5104528..7da6e32 100644 (file)
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
+static void init_fp_ctx(struct task_struct *target)
+{
+       /* If FP has been used then the target already has context */
+       if (tsk_used_math(target))
+               return;
+
+       /* Begin with data registers set to all 1s... */
+       memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr));
+
+       /* ...and FCSR zeroed */
+       target->thread.fpu.fcr31 = 0;
+
+       /*
+        * Record that the target has "used" math, such that the context
+        * just initialised, and any modifications made by the caller,
+        * aren't discarded.
+        */
+       set_stopped_child_used_math(target);
+}
+
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -142,6 +162,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
        if (!access_ok(VERIFY_READ, data, 33 * 8))
                return -EIO;
 
+       init_fp_ctx(child);
        fregs = get_fpu_regs(child);
 
        for (i = 0; i < 32; i++) {
@@ -439,6 +460,8 @@ static int fpr_set(struct task_struct *target,
 
        /* XXX fcr31  */
 
+       init_fp_ctx(target);
+
        if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
                return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
                                          &target->thread.fpu,
@@ -660,12 +683,7 @@ long arch_ptrace(struct task_struct *child, long request,
                case FPR_BASE ... FPR_BASE + 31: {
                        union fpureg *fregs = get_fpu_regs(child);
 
-                       if (!tsk_used_math(child)) {
-                               /* FP not yet used  */
-                               memset(&child->thread.fpu, ~0,
-                                      sizeof(child->thread.fpu));
-                               child->thread.fpu.fcr31 = 0;
-                       }
+                       init_fp_ctx(child);
 #ifdef CONFIG_32BIT
                        if (test_thread_flag(TIF_32BIT_FPREGS)) {
                                /*
index 676c503..1d88af2 100644 (file)
@@ -34,7 +34,6 @@
        .endm
 
        .set    noreorder
-       .set    MIPS_ISA_ARCH_LEVEL_RAW
 
 LEAF(_save_fp_context)
        .set    push
@@ -103,6 +102,7 @@ LEAF(_save_fp_context)
        /* Save 32-bit process floating point context */
 LEAF(_save_fp_context32)
        .set push
+       .set MIPS_ISA_ARCH_LEVEL_RAW
        SET_HARDFLOAT
        cfc1    t1, fcr31
 
index 33984c0..5b4d711 100644 (file)
@@ -701,6 +701,13 @@ asmlinkage void do_ov(struct pt_regs *regs)
 
 int process_fpemu_return(int sig, void __user *fault_addr)
 {
+       /*
+        * We can't allow the emulated instruction to leave any of the cause
+        * bits set in FCSR. If they were then the kernel would take an FP
+        * exception when restoring FP context.
+        */
+       current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+
        if (sig == SIGSEGV || sig == SIGBUS) {
                struct siginfo si = {0};
                si.si_addr = fault_addr;
@@ -781,6 +788,11 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
        if (notify_die(DIE_FP, "FP exception", regs, 0, regs_to_trapnr(regs),
                       SIGFPE) == NOTIFY_STOP)
                goto out;
+
+       /* Clear FCSR.Cause before enabling interrupts */
+       write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X);
+       local_irq_enable();
+
        die_if_kernel("FP exception in kernel code", regs);
 
        if (fcr31 & FPU_CSR_UNI_X) {
@@ -804,18 +816,12 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
                sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
                                               &fault_addr);
 
-               /*
-                * We can't allow the emulated instruction to leave any of
-                * the cause bit set in $fcr31.
-                */
-               current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+               /* If something went wrong, signal */
+               process_fpemu_return(sig, fault_addr);
 
                /* Restore the hardware register state */
                own_fpu(1);     /* Using the FPU again.  */
 
-               /* If something went wrong, signal */
-               process_fpemu_return(sig, fault_addr);
-
                goto out;
        } else if (fcr31 & FPU_CSR_INV_X)
                info.si_code = FPE_FLTINV;
@@ -1392,13 +1398,22 @@ out:
        exception_exit(prev_state);
 }
 
-asmlinkage void do_msa_fpe(struct pt_regs *regs)
+asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr)
 {
        enum ctx_state prev_state;
 
        prev_state = exception_enter();
+       if (notify_die(DIE_MSAFP, "MSA FP exception", regs, 0,
+                      regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP)
+               goto out;
+
+       /* Clear MSACSR.Cause before enabling interrupts */
+       write_msa_csr(msacsr & ~MSA_CSR_CAUSEF);
+       local_irq_enable();
+
        die_if_kernel("do_msa_fpe invoked from kernel context!", regs);
        force_sig(SIGFPE, current);
+out:
        exception_exit(prev_state);
 }
 
index 401fe02..637ebbe 100644 (file)
@@ -1,13 +1,15 @@
 # Makefile for KVM support for MIPS
 #
 
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
 
-kvm-objs := $(common-objs) mips.o emulate.o locore.o \
+common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
+
+kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \
            interrupt.o stats.o commpage.o \
-           dyntrans.o trap_emul.o
+           dyntrans.o trap_emul.o fpu.o
 
 obj-$(CONFIG_KVM)      += kvm.o
 obj-y                  += callback.o tlb.o
index fb3e8df..6230f37 100644 (file)
@@ -884,6 +884,84 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu)
        return EMULATE_DONE;
 }
 
+/**
+ * kvm_mips_config1_wrmask() - Find mask of writable bits in guest Config1
+ * @vcpu:      Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config1 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu)
+{
+       unsigned int mask = 0;
+
+       /* Permit FPU to be present if FPU is supported */
+       if (kvm_mips_guest_can_have_fpu(&vcpu->arch))
+               mask |= MIPS_CONF1_FP;
+
+       return mask;
+}
+
+/**
+ * kvm_mips_config3_wrmask() - Find mask of writable bits in guest Config3
+ * @vcpu:      Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config3 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu)
+{
+       /* Config4 is optional */
+       unsigned int mask = MIPS_CONF_M;
+
+       /* Permit MSA to be present if MSA is supported */
+       if (kvm_mips_guest_can_have_msa(&vcpu->arch))
+               mask |= MIPS_CONF3_MSA;
+
+       return mask;
+}
+
+/**
+ * kvm_mips_config4_wrmask() - Find mask of writable bits in guest Config4
+ * @vcpu:      Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config4 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu)
+{
+       /* Config5 is optional */
+       return MIPS_CONF_M;
+}
+
+/**
+ * kvm_mips_config5_wrmask() - Find mask of writable bits in guest Config5
+ * @vcpu:      Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config5 CP0
+ * register, by the guest itself.
+ */
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu)
+{
+       unsigned int mask = 0;
+
+       /* Permit MSAEn changes if MSA supported and enabled */
+       if (kvm_mips_guest_has_msa(&vcpu->arch))
+               mask |= MIPS_CONF5_MSAEN;
+
+       /*
+        * Permit guest FPU mode changes if FPU is enabled and the relevant
+        * feature exists according to FIR register.
+        */
+       if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+               if (cpu_has_fre)
+                       mask |= MIPS_CONF5_FRE;
+               /* We don't support UFR or UFE */
+       }
+
+       return mask;
+}
+
 enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
                                           uint32_t cause, struct kvm_run *run,
                                           struct kvm_vcpu *vcpu)
@@ -1021,18 +1099,114 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
                                kvm_mips_write_compare(vcpu,
                                                       vcpu->arch.gprs[rt]);
                        } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
-                               kvm_write_c0_guest_status(cop0,
-                                                         vcpu->arch.gprs[rt]);
+                               unsigned int old_val, val, change;
+
+                               old_val = kvm_read_c0_guest_status(cop0);
+                               val = vcpu->arch.gprs[rt];
+                               change = val ^ old_val;
+
+                               /* Make sure that the NMI bit is never set */
+                               val &= ~ST0_NMI;
+
+                               /*
+                                * Don't allow CU1 or FR to be set unless FPU
+                                * capability enabled and exists in guest
+                                * configuration.
+                                */
+                               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                                       val &= ~(ST0_CU1 | ST0_FR);
+
+                               /*
+                                * Also don't allow FR to be set if host doesn't
+                                * support it.
+                                */
+                               if (!(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+                                       val &= ~ST0_FR;
+
+
+                               /* Handle changes in FPU mode */
+                               preempt_disable();
+
+                               /*
+                                * FPU and Vector register state is made
+                                * UNPREDICTABLE by a change of FR, so don't
+                                * even bother saving it.
+                                */
+                               if (change & ST0_FR)
+                                       kvm_drop_fpu(vcpu);
+
+                               /*
+                                * If MSA state is already live, it is undefined
+                                * how it interacts with FR=0 FPU state, and we
+                                * don't want to hit reserved instruction
+                                * exceptions trying to save the MSA state later
+                                * when CU=1 && FR=1, so play it safe and save
+                                * it first.
+                                */
+                               if (change & ST0_CU1 && !(val & ST0_FR) &&
+                                   vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+                                       kvm_lose_fpu(vcpu);
+
                                /*
-                                * Make sure that CU1 and NMI bits are
-                                * never set
+                                * Propagate CU1 (FPU enable) changes
+                                * immediately if the FPU context is already
+                                * loaded. When disabling we leave the context
+                                * loaded so it can be quickly enabled again in
+                                * the near future.
                                 */
-                               kvm_clear_c0_guest_status(cop0,
-                                                         (ST0_CU1 | ST0_NMI));
+                               if (change & ST0_CU1 &&
+                                   vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+                                       change_c0_status(ST0_CU1, val);
+
+                               preempt_enable();
+
+                               kvm_write_c0_guest_status(cop0, val);
 
 #ifdef CONFIG_KVM_MIPS_DYN_TRANS
-                               kvm_mips_trans_mtc0(inst, opc, vcpu);
+                               /*
+                                * If FPU present, we need CU1/FR bits to take
+                                * effect fairly soon.
+                                */
+                               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                                       kvm_mips_trans_mtc0(inst, opc, vcpu);
 #endif
+                       } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) {
+                               unsigned int old_val, val, change, wrmask;
+
+                               old_val = kvm_read_c0_guest_config5(cop0);
+                               val = vcpu->arch.gprs[rt];
+
+                               /* Only a few bits are writable in Config5 */
+                               wrmask = kvm_mips_config5_wrmask(vcpu);
+                               change = (val ^ old_val) & wrmask;
+                               val = old_val ^ change;
+
+
+                               /* Handle changes in FPU/MSA modes */
+                               preempt_disable();
+
+                               /*
+                                * Propagate FRE changes immediately if the FPU
+                                * context is already loaded.
+                                */
+                               if (change & MIPS_CONF5_FRE &&
+                                   vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+                                       change_c0_config5(MIPS_CONF5_FRE, val);
+
+                               /*
+                                * Propagate MSAEn changes immediately if the
+                                * MSA context is already loaded. When disabling
+                                * we leave the context loaded so it can be
+                                * quickly enabled again in the near future.
+                                */
+                               if (change & MIPS_CONF5_MSAEN &&
+                                   vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+                                       change_c0_config5(MIPS_CONF5_MSAEN,
+                                                         val);
+
+                               preempt_enable();
+
+                               kvm_write_c0_guest_config5(cop0, val);
                        } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
                                uint32_t old_cause, new_cause;
 
@@ -1970,6 +2144,146 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
        return er;
 }
 
+enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+                                               uint32_t *opc,
+                                               struct kvm_run *run,
+                                               struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_vcpu_arch *arch = &vcpu->arch;
+       enum emulation_result er = EMULATE_DONE;
+
+       if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+               /* save old pc */
+               kvm_write_c0_guest_epc(cop0, arch->pc);
+               kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+               if (cause & CAUSEF_BD)
+                       kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+               else
+                       kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+               kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc);
+
+               kvm_change_c0_guest_cause(cop0, (0xff),
+                                         (T_TRAP << CAUSEB_EXCCODE));
+
+               /* Set PC to the exception entry point */
+               arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+       } else {
+               kvm_err("Trying to deliver TRAP when EXL is already set\n");
+               er = EMULATE_FAIL;
+       }
+
+       return er;
+}
+
+enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+                                                 uint32_t *opc,
+                                                 struct kvm_run *run,
+                                                 struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_vcpu_arch *arch = &vcpu->arch;
+       enum emulation_result er = EMULATE_DONE;
+
+       if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+               /* save old pc */
+               kvm_write_c0_guest_epc(cop0, arch->pc);
+               kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+               if (cause & CAUSEF_BD)
+                       kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+               else
+                       kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+               kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc);
+
+               kvm_change_c0_guest_cause(cop0, (0xff),
+                                         (T_MSAFPE << CAUSEB_EXCCODE));
+
+               /* Set PC to the exception entry point */
+               arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+       } else {
+               kvm_err("Trying to deliver MSAFPE when EXL is already set\n");
+               er = EMULATE_FAIL;
+       }
+
+       return er;
+}
+
+enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+                                              uint32_t *opc,
+                                              struct kvm_run *run,
+                                              struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_vcpu_arch *arch = &vcpu->arch;
+       enum emulation_result er = EMULATE_DONE;
+
+       if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+               /* save old pc */
+               kvm_write_c0_guest_epc(cop0, arch->pc);
+               kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+               if (cause & CAUSEF_BD)
+                       kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+               else
+                       kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+               kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc);
+
+               kvm_change_c0_guest_cause(cop0, (0xff),
+                                         (T_FPE << CAUSEB_EXCCODE));
+
+               /* Set PC to the exception entry point */
+               arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+       } else {
+               kvm_err("Trying to deliver FPE when EXL is already set\n");
+               er = EMULATE_FAIL;
+       }
+
+       return er;
+}
+
+enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+                                                 uint32_t *opc,
+                                                 struct kvm_run *run,
+                                                 struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_vcpu_arch *arch = &vcpu->arch;
+       enum emulation_result er = EMULATE_DONE;
+
+       if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+               /* save old pc */
+               kvm_write_c0_guest_epc(cop0, arch->pc);
+               kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+               if (cause & CAUSEF_BD)
+                       kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+               else
+                       kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+               kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc);
+
+               kvm_change_c0_guest_cause(cop0, (0xff),
+                                         (T_MSADIS << CAUSEB_EXCCODE));
+
+               /* Set PC to the exception entry point */
+               arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+       } else {
+               kvm_err("Trying to deliver MSADIS when EXL is already set\n");
+               er = EMULATE_FAIL;
+       }
+
+       return er;
+}
+
 /* ll/sc, rdhwr, sync emulation */
 
 #define OPCODE 0xfc000000
@@ -2176,6 +2490,10 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
                case T_SYSCALL:
                case T_BREAK:
                case T_RES_INST:
+               case T_TRAP:
+               case T_MSAFPE:
+               case T_FPE:
+               case T_MSADIS:
                        break;
 
                case T_COP_UNUSABLE:
diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S
new file mode 100644 (file)
index 0000000..531fbf5
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * FPU context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/fpregdef.h>
+#include <asm/mipsregs.h>
+#include <asm/regdef.h>
+
+       .set    noreorder
+       .set    noat
+
+LEAF(__kvm_save_fpu)
+       .set    push
+       .set    mips64r2
+       SET_HARDFLOAT
+       mfc0    t0, CP0_STATUS
+       sll     t0, t0, 5                       # is Status.FR set?
+       bgez    t0, 1f                          # no: skip odd doubles
+        nop
+       sdc1    $f1,  VCPU_FPR1(a0)
+       sdc1    $f3,  VCPU_FPR3(a0)
+       sdc1    $f5,  VCPU_FPR5(a0)
+       sdc1    $f7,  VCPU_FPR7(a0)
+       sdc1    $f9,  VCPU_FPR9(a0)
+       sdc1    $f11, VCPU_FPR11(a0)
+       sdc1    $f13, VCPU_FPR13(a0)
+       sdc1    $f15, VCPU_FPR15(a0)
+       sdc1    $f17, VCPU_FPR17(a0)
+       sdc1    $f19, VCPU_FPR19(a0)
+       sdc1    $f21, VCPU_FPR21(a0)
+       sdc1    $f23, VCPU_FPR23(a0)
+       sdc1    $f25, VCPU_FPR25(a0)
+       sdc1    $f27, VCPU_FPR27(a0)
+       sdc1    $f29, VCPU_FPR29(a0)
+       sdc1    $f31, VCPU_FPR31(a0)
+1:     sdc1    $f0,  VCPU_FPR0(a0)
+       sdc1    $f2,  VCPU_FPR2(a0)
+       sdc1    $f4,  VCPU_FPR4(a0)
+       sdc1    $f6,  VCPU_FPR6(a0)
+       sdc1    $f8,  VCPU_FPR8(a0)
+       sdc1    $f10, VCPU_FPR10(a0)
+       sdc1    $f12, VCPU_FPR12(a0)
+       sdc1    $f14, VCPU_FPR14(a0)
+       sdc1    $f16, VCPU_FPR16(a0)
+       sdc1    $f18, VCPU_FPR18(a0)
+       sdc1    $f20, VCPU_FPR20(a0)
+       sdc1    $f22, VCPU_FPR22(a0)
+       sdc1    $f24, VCPU_FPR24(a0)
+       sdc1    $f26, VCPU_FPR26(a0)
+       sdc1    $f28, VCPU_FPR28(a0)
+       jr      ra
+        sdc1   $f30, VCPU_FPR30(a0)
+       .set    pop
+       END(__kvm_save_fpu)
+
+LEAF(__kvm_restore_fpu)
+       .set    push
+       .set    mips64r2
+       SET_HARDFLOAT
+       mfc0    t0, CP0_STATUS
+       sll     t0, t0, 5                       # is Status.FR set?
+       bgez    t0, 1f                          # no: skip odd doubles
+        nop
+       ldc1    $f1,  VCPU_FPR1(a0)
+       ldc1    $f3,  VCPU_FPR3(a0)
+       ldc1    $f5,  VCPU_FPR5(a0)
+       ldc1    $f7,  VCPU_FPR7(a0)
+       ldc1    $f9,  VCPU_FPR9(a0)
+       ldc1    $f11, VCPU_FPR11(a0)
+       ldc1    $f13, VCPU_FPR13(a0)
+       ldc1    $f15, VCPU_FPR15(a0)
+       ldc1    $f17, VCPU_FPR17(a0)
+       ldc1    $f19, VCPU_FPR19(a0)
+       ldc1    $f21, VCPU_FPR21(a0)
+       ldc1    $f23, VCPU_FPR23(a0)
+       ldc1    $f25, VCPU_FPR25(a0)
+       ldc1    $f27, VCPU_FPR27(a0)
+       ldc1    $f29, VCPU_FPR29(a0)
+       ldc1    $f31, VCPU_FPR31(a0)
+1:     ldc1    $f0,  VCPU_FPR0(a0)
+       ldc1    $f2,  VCPU_FPR2(a0)
+       ldc1    $f4,  VCPU_FPR4(a0)
+       ldc1    $f6,  VCPU_FPR6(a0)
+       ldc1    $f8,  VCPU_FPR8(a0)
+       ldc1    $f10, VCPU_FPR10(a0)
+       ldc1    $f12, VCPU_FPR12(a0)
+       ldc1    $f14, VCPU_FPR14(a0)
+       ldc1    $f16, VCPU_FPR16(a0)
+       ldc1    $f18, VCPU_FPR18(a0)
+       ldc1    $f20, VCPU_FPR20(a0)
+       ldc1    $f22, VCPU_FPR22(a0)
+       ldc1    $f24, VCPU_FPR24(a0)
+       ldc1    $f26, VCPU_FPR26(a0)
+       ldc1    $f28, VCPU_FPR28(a0)
+       jr      ra
+        ldc1   $f30, VCPU_FPR30(a0)
+       .set    pop
+       END(__kvm_restore_fpu)
+
+LEAF(__kvm_restore_fcsr)
+       .set    push
+       SET_HARDFLOAT
+       lw      t0, VCPU_FCR31(a0)
+       /*
+        * The ctc1 must stay at this offset in __kvm_restore_fcsr.
+        * See kvm_mips_csr_die_notify() which handles t0 containing a value
+        * which triggers an FP Exception, which must be stepped over and
+        * ignored since the set cause bits must remain there for the guest.
+        */
+       ctc1    t0, fcr31
+       jr      ra
+        nop
+       .set    pop
+       END(__kvm_restore_fcsr)
index 4a68b17..c567240 100644 (file)
@@ -36,6 +36,8 @@
 #define PT_HOST_USERLOCAL   PT_EPC
 
 #define CP0_DDATA_LO        $28,3
+#define CP0_CONFIG3         $16,3
+#define CP0_CONFIG5         $16,5
 #define CP0_EBASE           $15,1
 
 #define CP0_INTCTL          $12,1
@@ -353,6 +355,42 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
        LONG_L  k0, VCPU_HOST_EBASE(k1)
        mtc0    k0,CP0_EBASE
 
+       /*
+        * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't
+        * trigger FPE for pending exceptions.
+        */
+       .set    at
+       and     v1, v0, ST0_CU1
+       beqz    v1, 1f
+        nop
+       .set    push
+       SET_HARDFLOAT
+       cfc1    t0, fcr31
+       sw      t0, VCPU_FCR31(k1)
+       ctc1    zero,fcr31
+       .set    pop
+       .set    noat
+1:
+
+#ifdef CONFIG_CPU_HAS_MSA
+       /*
+        * If MSA is enabled, save MSACSR and clear it so that later
+        * instructions don't trigger MSAFPE for pending exceptions.
+        */
+       mfc0    t0, CP0_CONFIG3
+       ext     t0, t0, 28, 1 /* MIPS_CONF3_MSAP */
+       beqz    t0, 1f
+        nop
+       mfc0    t0, CP0_CONFIG5
+       ext     t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */
+       beqz    t0, 1f
+        nop
+       _cfcmsa t0, MSA_CSR
+       sw      t0, VCPU_MSA_CSR(k1)
+       _ctcmsa MSA_CSR, zero
+1:
+#endif
+
        /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
        .set    at
        and     v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
index c9eccf5..bb68e8d 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/kdebug.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/fs.h>
@@ -48,6 +49,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "syscall",      VCPU_STAT(syscall_exits),      KVM_STAT_VCPU },
        { "resvd_inst",   VCPU_STAT(resvd_inst_exits),   KVM_STAT_VCPU },
        { "break_inst",   VCPU_STAT(break_inst_exits),   KVM_STAT_VCPU },
+       { "trap_inst",    VCPU_STAT(trap_inst_exits),    KVM_STAT_VCPU },
+       { "msa_fpe",      VCPU_STAT(msa_fpe_exits),      KVM_STAT_VCPU },
+       { "fpe",          VCPU_STAT(fpe_exits),          KVM_STAT_VCPU },
+       { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
        { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
        { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
        { "halt_wakeup",  VCPU_STAT(halt_wakeup),        KVM_STAT_VCPU },
@@ -504,10 +509,13 @@ static u64 kvm_mips_get_one_regs[] = {
        KVM_REG_MIPS_CP0_STATUS,
        KVM_REG_MIPS_CP0_CAUSE,
        KVM_REG_MIPS_CP0_EPC,
+       KVM_REG_MIPS_CP0_PRID,
        KVM_REG_MIPS_CP0_CONFIG,
        KVM_REG_MIPS_CP0_CONFIG1,
        KVM_REG_MIPS_CP0_CONFIG2,
        KVM_REG_MIPS_CP0_CONFIG3,
+       KVM_REG_MIPS_CP0_CONFIG4,
+       KVM_REG_MIPS_CP0_CONFIG5,
        KVM_REG_MIPS_CP0_CONFIG7,
        KVM_REG_MIPS_CP0_ERROREPC,
 
@@ -520,10 +528,14 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
                            const struct kvm_one_reg *reg)
 {
        struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
        int ret;
        s64 v;
+       s64 vs[2];
+       unsigned int idx;
 
        switch (reg->id) {
+       /* General purpose registers */
        case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31:
                v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0];
                break;
@@ -537,6 +549,67 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
                v = (long)vcpu->arch.pc;
                break;
 
+       /* Floating point registers */
+       case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+               /* Odd singles in top of even double when FR=0 */
+               if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+                       v = get_fpr32(&fpu->fpr[idx], 0);
+               else
+                       v = get_fpr32(&fpu->fpr[idx & ~1], idx & 1);
+               break;
+       case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+               /* Can't access odd doubles in FR=0 mode */
+               if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+                       return -EINVAL;
+               v = get_fpr64(&fpu->fpr[idx], 0);
+               break;
+       case KVM_REG_MIPS_FCR_IR:
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               v = boot_cpu_data.fpu_id;
+               break;
+       case KVM_REG_MIPS_FCR_CSR:
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               v = fpu->fcr31;
+               break;
+
+       /* MIPS SIMD Architecture (MSA) registers */
+       case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               /* Can't access MSA registers in FR=0 mode */
+               if (!(kvm_read_c0_guest_status(cop0) & ST0_FR))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+               /* least significant byte first */
+               vs[0] = get_fpr64(&fpu->fpr[idx], 0);
+               vs[1] = get_fpr64(&fpu->fpr[idx], 1);
+#else
+               /* most significant byte first */
+               vs[0] = get_fpr64(&fpu->fpr[idx], 1);
+               vs[1] = get_fpr64(&fpu->fpr[idx], 0);
+#endif
+               break;
+       case KVM_REG_MIPS_MSA_IR:
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               v = boot_cpu_data.msa_id;
+               break;
+       case KVM_REG_MIPS_MSA_CSR:
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               v = fpu->msacsr;
+               break;
+
+       /* Co-processor 0 registers */
        case KVM_REG_MIPS_CP0_INDEX:
                v = (long)kvm_read_c0_guest_index(cop0);
                break;
@@ -573,8 +646,8 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_CP0_EPC:
                v = (long)kvm_read_c0_guest_epc(cop0);
                break;
-       case KVM_REG_MIPS_CP0_ERROREPC:
-               v = (long)kvm_read_c0_guest_errorepc(cop0);
+       case KVM_REG_MIPS_CP0_PRID:
+               v = (long)kvm_read_c0_guest_prid(cop0);
                break;
        case KVM_REG_MIPS_CP0_CONFIG:
                v = (long)kvm_read_c0_guest_config(cop0);
@@ -588,9 +661,18 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_CP0_CONFIG3:
                v = (long)kvm_read_c0_guest_config3(cop0);
                break;
+       case KVM_REG_MIPS_CP0_CONFIG4:
+               v = (long)kvm_read_c0_guest_config4(cop0);
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG5:
+               v = (long)kvm_read_c0_guest_config5(cop0);
+               break;
        case KVM_REG_MIPS_CP0_CONFIG7:
                v = (long)kvm_read_c0_guest_config7(cop0);
                break;
+       case KVM_REG_MIPS_CP0_ERROREPC:
+               v = (long)kvm_read_c0_guest_errorepc(cop0);
+               break;
        /* registers to be handled specially */
        case KVM_REG_MIPS_CP0_COUNT:
        case KVM_REG_MIPS_COUNT_CTL:
@@ -612,6 +694,10 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
                u32 v32 = (u32)v;
 
                return put_user(v32, uaddr32);
+       } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+               void __user *uaddr = (void __user *)(long)reg->addr;
+
+               return copy_to_user(uaddr, vs, 16);
        } else {
                return -EINVAL;
        }
@@ -621,7 +707,10 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
                            const struct kvm_one_reg *reg)
 {
        struct mips_coproc *cop0 = vcpu->arch.cop0;
-       u64 v;
+       struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
+       s64 v;
+       s64 vs[2];
+       unsigned int idx;
 
        if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) {
                u64 __user *uaddr64 = (u64 __user *)(long)reg->addr;
@@ -635,11 +724,16 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
                if (get_user(v32, uaddr32) != 0)
                        return -EFAULT;
                v = (s64)v32;
+       } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+               void __user *uaddr = (void __user *)(long)reg->addr;
+
+               return copy_from_user(vs, uaddr, 16);
        } else {
                return -EINVAL;
        }
 
        switch (reg->id) {
+       /* General purpose registers */
        case KVM_REG_MIPS_R0:
                /* Silently ignore requests to set $0 */
                break;
@@ -656,6 +750,64 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
                vcpu->arch.pc = v;
                break;
 
+       /* Floating point registers */
+       case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+               /* Odd singles in top of even double when FR=0 */
+               if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+                       set_fpr32(&fpu->fpr[idx], 0, v);
+               else
+                       set_fpr32(&fpu->fpr[idx & ~1], idx & 1, v);
+               break;
+       case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+               /* Can't access odd doubles in FR=0 mode */
+               if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+                       return -EINVAL;
+               set_fpr64(&fpu->fpr[idx], 0, v);
+               break;
+       case KVM_REG_MIPS_FCR_IR:
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               /* Read-only */
+               break;
+       case KVM_REG_MIPS_FCR_CSR:
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+                       return -EINVAL;
+               fpu->fcr31 = v;
+               break;
+
+       /* MIPS SIMD Architecture (MSA) registers */
+       case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+               /* least significant byte first */
+               set_fpr64(&fpu->fpr[idx], 0, vs[0]);
+               set_fpr64(&fpu->fpr[idx], 1, vs[1]);
+#else
+               /* most significant byte first */
+               set_fpr64(&fpu->fpr[idx], 1, vs[0]);
+               set_fpr64(&fpu->fpr[idx], 0, vs[1]);
+#endif
+               break;
+       case KVM_REG_MIPS_MSA_IR:
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               /* Read-only */
+               break;
+       case KVM_REG_MIPS_MSA_CSR:
+               if (!kvm_mips_guest_has_msa(&vcpu->arch))
+                       return -EINVAL;
+               fpu->msacsr = v;
+               break;
+
+       /* Co-processor 0 registers */
        case KVM_REG_MIPS_CP0_INDEX:
                kvm_write_c0_guest_index(cop0, v);
                break;
@@ -686,6 +838,9 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_CP0_EPC:
                kvm_write_c0_guest_epc(cop0, v);
                break;
+       case KVM_REG_MIPS_CP0_PRID:
+               kvm_write_c0_guest_prid(cop0, v);
+               break;
        case KVM_REG_MIPS_CP0_ERROREPC:
                kvm_write_c0_guest_errorepc(cop0, v);
                break;
@@ -693,6 +848,12 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
        case KVM_REG_MIPS_CP0_COUNT:
        case KVM_REG_MIPS_CP0_COMPARE:
        case KVM_REG_MIPS_CP0_CAUSE:
+       case KVM_REG_MIPS_CP0_CONFIG:
+       case KVM_REG_MIPS_CP0_CONFIG1:
+       case KVM_REG_MIPS_CP0_CONFIG2:
+       case KVM_REG_MIPS_CP0_CONFIG3:
+       case KVM_REG_MIPS_CP0_CONFIG4:
+       case KVM_REG_MIPS_CP0_CONFIG5:
        case KVM_REG_MIPS_COUNT_CTL:
        case KVM_REG_MIPS_COUNT_RESUME:
        case KVM_REG_MIPS_COUNT_HZ:
@@ -703,6 +864,33 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
        return 0;
 }
 
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+                                    struct kvm_enable_cap *cap)
+{
+       int r = 0;
+
+       if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap))
+               return -EINVAL;
+       if (cap->flags)
+               return -EINVAL;
+       if (cap->args[0])
+               return -EINVAL;
+
+       switch (cap->cap) {
+       case KVM_CAP_MIPS_FPU:
+               vcpu->arch.fpu_enabled = true;
+               break;
+       case KVM_CAP_MIPS_MSA:
+               vcpu->arch.msa_enabled = true;
+               break;
+       default:
+               r = -EINVAL;
+               break;
+       }
+
+       return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
                         unsigned long arg)
 {
@@ -760,6 +948,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
                        r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
                        break;
                }
+       case KVM_ENABLE_CAP: {
+               struct kvm_enable_cap cap;
+
+               r = -EFAULT;
+               if (copy_from_user(&cap, argp, sizeof(cap)))
+                       goto out;
+               r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+               break;
+       }
        default:
                r = -ENOIOCTLCMD;
        }
@@ -868,11 +1065,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
        switch (ext) {
        case KVM_CAP_ONE_REG:
+       case KVM_CAP_ENABLE_CAP:
                r = 1;
                break;
        case KVM_CAP_COALESCED_MMIO:
                r = KVM_COALESCED_MMIO_PAGE_OFFSET;
                break;
+       case KVM_CAP_MIPS_FPU:
+               r = !!cpu_has_fpu;
+               break;
+       case KVM_CAP_MIPS_MSA:
+               /*
+                * We don't support MSA vector partitioning yet:
+                * 1) It would require explicit support which can't be tested
+                *    yet due to lack of support in current hardware.
+                * 2) It extends the state that would need to be saved/restored
+                *    by e.g. QEMU for migration.
+                *
+                * When vector partitioning hardware becomes available, support
+                * could be added by requiring a flag when enabling
+                * KVM_CAP_MIPS_MSA capability to indicate that userland knows
+                * to save/restore the appropriate extra state.
+                */
+               r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF);
+               break;
        default:
                r = 0;
                break;
@@ -1119,6 +1335,30 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
                ret = kvm_mips_callbacks->handle_break(vcpu);
                break;
 
+       case T_TRAP:
+               ++vcpu->stat.trap_inst_exits;
+               trace_kvm_exit(vcpu, TRAP_INST_EXITS);
+               ret = kvm_mips_callbacks->handle_trap(vcpu);
+               break;
+
+       case T_MSAFPE:
+               ++vcpu->stat.msa_fpe_exits;
+               trace_kvm_exit(vcpu, MSA_FPE_EXITS);
+               ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
+               break;
+
+       case T_FPE:
+               ++vcpu->stat.fpe_exits;
+               trace_kvm_exit(vcpu, FPE_EXITS);
+               ret = kvm_mips_callbacks->handle_fpe(vcpu);
+               break;
+
+       case T_MSADIS:
+               ++vcpu->stat.msa_disabled_exits;
+               trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
+               ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
+               break;
+
        default:
                kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x  BadVaddr: %#lx Status: %#lx\n",
                        exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
@@ -1146,12 +1386,233 @@ skip_emul:
                }
        }
 
+       if (ret == RESUME_GUEST) {
+               /*
+                * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context
+                * is live), restore FCR31 / MSACSR.
+                *
+                * This should be before returning to the guest exception
+                * vector, as it may well cause an [MSA] FP exception if there
+                * are pending exception bits unmasked. (see
+                * kvm_mips_csr_die_notifier() for how that is handled).
+                */
+               if (kvm_mips_guest_has_fpu(&vcpu->arch) &&
+                   read_c0_status() & ST0_CU1)
+                       __kvm_restore_fcsr(&vcpu->arch);
+
+               if (kvm_mips_guest_has_msa(&vcpu->arch) &&
+                   read_c0_config5() & MIPS_CONF5_MSAEN)
+                       __kvm_restore_msacsr(&vcpu->arch);
+       }
+
        /* Disable HTW before returning to guest or host */
        htw_stop();
 
        return ret;
 }
 
+/* Enable FPU for guest and restore context */
+void kvm_own_fpu(struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       unsigned int sr, cfg5;
+
+       preempt_disable();
+
+       sr = kvm_read_c0_guest_status(cop0);
+
+       /*
+        * If MSA state is already live, it is undefined how it interacts with
+        * FR=0 FPU state, and we don't want to hit reserved instruction
+        * exceptions trying to save the MSA state later when CU=1 && FR=1, so
+        * play it safe and save it first.
+        *
+        * In theory we shouldn't ever hit this case since kvm_lose_fpu() should
+        * get called when guest CU1 is set, however we can't trust the guest
+        * not to clobber the status register directly via the commpage.
+        */
+       if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) &&
+           vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+               kvm_lose_fpu(vcpu);
+
+       /*
+        * Enable FPU for guest
+        * We set FR and FRE according to guest context
+        */
+       change_c0_status(ST0_CU1 | ST0_FR, sr);
+       if (cpu_has_fre) {
+               cfg5 = kvm_read_c0_guest_config5(cop0);
+               change_c0_config5(MIPS_CONF5_FRE, cfg5);
+       }
+       enable_fpu_hazard();
+
+       /* If guest FPU state not active, restore it now */
+       if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) {
+               __kvm_restore_fpu(&vcpu->arch);
+               vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+       }
+
+       preempt_enable();
+}
+
+#ifdef CONFIG_CPU_HAS_MSA
+/* Enable MSA for guest and restore context */
+void kvm_own_msa(struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       unsigned int sr, cfg5;
+
+       preempt_disable();
+
+       /*
+        * Enable FPU if enabled in guest, since we're restoring FPU context
+        * anyway. We set FR and FRE according to guest context.
+        */
+       if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+               sr = kvm_read_c0_guest_status(cop0);
+
+               /*
+                * If FR=0 FPU state is already live, it is undefined how it
+                * interacts with MSA state, so play it safe and save it first.
+                */
+               if (!(sr & ST0_FR) &&
+                   (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU |
+                               KVM_MIPS_FPU_MSA)) == KVM_MIPS_FPU_FPU)
+                       kvm_lose_fpu(vcpu);
+
+               change_c0_status(ST0_CU1 | ST0_FR, sr);
+               if (sr & ST0_CU1 && cpu_has_fre) {
+                       cfg5 = kvm_read_c0_guest_config5(cop0);
+                       change_c0_config5(MIPS_CONF5_FRE, cfg5);
+               }
+       }
+
+       /* Enable MSA for guest */
+       set_c0_config5(MIPS_CONF5_MSAEN);
+       enable_fpu_hazard();
+
+       switch (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA)) {
+       case KVM_MIPS_FPU_FPU:
+               /*
+                * Guest FPU state already loaded, only restore upper MSA state
+                */
+               __kvm_restore_msa_upper(&vcpu->arch);
+               vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+               break;
+       case 0:
+               /* Neither FPU or MSA already active, restore full MSA state */
+               __kvm_restore_msa(&vcpu->arch);
+               vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+               if (kvm_mips_guest_has_fpu(&vcpu->arch))
+                       vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+               break;
+       default:
+               break;
+       }
+
+       preempt_enable();
+}
+#endif
+
+/* Drop FPU & MSA without saving it */
+void kvm_drop_fpu(struct kvm_vcpu *vcpu)
+{
+       preempt_disable();
+       if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+               disable_msa();
+               vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_MSA;
+       }
+       if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+               clear_c0_status(ST0_CU1 | ST0_FR);
+               vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+       }
+       preempt_enable();
+}
+
+/* Save and disable FPU & MSA */
+void kvm_lose_fpu(struct kvm_vcpu *vcpu)
+{
+       /*
+        * FPU & MSA get disabled in root context (hardware) when it is disabled
+        * in guest context (software), but the register state in the hardware
+        * may still be in use. This is why we explicitly re-enable the hardware
+        * before saving.
+        */
+
+       preempt_disable();
+       if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+               set_c0_config5(MIPS_CONF5_MSAEN);
+               enable_fpu_hazard();
+
+               __kvm_save_msa(&vcpu->arch);
+
+               /* Disable MSA & FPU */
+               disable_msa();
+               if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+                       clear_c0_status(ST0_CU1 | ST0_FR);
+               vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA);
+       } else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+               set_c0_status(ST0_CU1);
+               enable_fpu_hazard();
+
+               __kvm_save_fpu(&vcpu->arch);
+               vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+
+               /* Disable FPU */
+               clear_c0_status(ST0_CU1 | ST0_FR);
+       }
+       preempt_enable();
+}
+
+/*
+ * Step over a specific ctc1 to FCSR and a specific ctcmsa to MSACSR which are
+ * used to restore guest FCSR/MSACSR state and may trigger a "harmless" FP/MSAFP
+ * exception if cause bits are set in the value being written.
+ */
+static int kvm_mips_csr_die_notify(struct notifier_block *self,
+                                  unsigned long cmd, void *ptr)
+{
+       struct die_args *args = (struct die_args *)ptr;
+       struct pt_regs *regs = args->regs;
+       unsigned long pc;
+
+       /* Only interested in FPE and MSAFPE */
+       if (cmd != DIE_FP && cmd != DIE_MSAFP)
+               return NOTIFY_DONE;
+
+       /* Return immediately if guest context isn't active */
+       if (!(current->flags & PF_VCPU))
+               return NOTIFY_DONE;
+
+       /* Should never get here from user mode */
+       BUG_ON(user_mode(regs));
+
+       pc = instruction_pointer(regs);
+       switch (cmd) {
+       case DIE_FP:
+               /* match 2nd instruction in __kvm_restore_fcsr */
+               if (pc != (unsigned long)&__kvm_restore_fcsr + 4)
+                       return NOTIFY_DONE;
+               break;
+       case DIE_MSAFP:
+               /* match 2nd/3rd instruction in __kvm_restore_msacsr */
+               if (!cpu_has_msa ||
+                   pc < (unsigned long)&__kvm_restore_msacsr + 4 ||
+                   pc > (unsigned long)&__kvm_restore_msacsr + 8)
+                       return NOTIFY_DONE;
+               break;
+       }
+
+       /* Move PC forward a little and continue executing */
+       instruction_pointer(regs) += 4;
+
+       return NOTIFY_STOP;
+}
+
+static struct notifier_block kvm_mips_csr_die_notifier = {
+       .notifier_call = kvm_mips_csr_die_notify,
+};
+
 int __init kvm_mips_init(void)
 {
        int ret;
@@ -1161,6 +1622,8 @@ int __init kvm_mips_init(void)
        if (ret)
                return ret;
 
+       register_die_notifier(&kvm_mips_csr_die_notifier);
+
        /*
         * On MIPS, kernel modules are executed from "mapped space", which
         * requires TLBs. The TLB handling code is statically linked with
@@ -1173,7 +1636,6 @@ int __init kvm_mips_init(void)
        kvm_mips_release_pfn_clean = kvm_release_pfn_clean;
        kvm_mips_is_error_pfn = is_error_pfn;
 
-       pr_info("KVM/MIPS Initialized\n");
        return 0;
 }
 
@@ -1185,7 +1647,7 @@ void __exit kvm_mips_exit(void)
        kvm_mips_release_pfn_clean = NULL;
        kvm_mips_is_error_pfn = NULL;
 
-       pr_info("KVM/MIPS unloaded\n");
+       unregister_die_notifier(&kvm_mips_csr_die_notifier);
 }
 
 module_init(kvm_mips_init);
diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S
new file mode 100644 (file)
index 0000000..d02f0c6
--- /dev/null
@@ -0,0 +1,161 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * MIPS SIMD Architecture (MSA) context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asmmacro.h>
+#include <asm/regdef.h>
+
+       .set    noreorder
+       .set    noat
+
+LEAF(__kvm_save_msa)
+       st_d    0,  VCPU_FPR0,  a0
+       st_d    1,  VCPU_FPR1,  a0
+       st_d    2,  VCPU_FPR2,  a0
+       st_d    3,  VCPU_FPR3,  a0
+       st_d    4,  VCPU_FPR4,  a0
+       st_d    5,  VCPU_FPR5,  a0
+       st_d    6,  VCPU_FPR6,  a0
+       st_d    7,  VCPU_FPR7,  a0
+       st_d    8,  VCPU_FPR8,  a0
+       st_d    9,  VCPU_FPR9,  a0
+       st_d    10, VCPU_FPR10, a0
+       st_d    11, VCPU_FPR11, a0
+       st_d    12, VCPU_FPR12, a0
+       st_d    13, VCPU_FPR13, a0
+       st_d    14, VCPU_FPR14, a0
+       st_d    15, VCPU_FPR15, a0
+       st_d    16, VCPU_FPR16, a0
+       st_d    17, VCPU_FPR17, a0
+       st_d    18, VCPU_FPR18, a0
+       st_d    19, VCPU_FPR19, a0
+       st_d    20, VCPU_FPR20, a0
+       st_d    21, VCPU_FPR21, a0
+       st_d    22, VCPU_FPR22, a0
+       st_d    23, VCPU_FPR23, a0
+       st_d    24, VCPU_FPR24, a0
+       st_d    25, VCPU_FPR25, a0
+       st_d    26, VCPU_FPR26, a0
+       st_d    27, VCPU_FPR27, a0
+       st_d    28, VCPU_FPR28, a0
+       st_d    29, VCPU_FPR29, a0
+       st_d    30, VCPU_FPR30, a0
+       st_d    31, VCPU_FPR31, a0
+       jr      ra
+        nop
+       END(__kvm_save_msa)
+
+LEAF(__kvm_restore_msa)
+       ld_d    0,  VCPU_FPR0,  a0
+       ld_d    1,  VCPU_FPR1,  a0
+       ld_d    2,  VCPU_FPR2,  a0
+       ld_d    3,  VCPU_FPR3,  a0
+       ld_d    4,  VCPU_FPR4,  a0
+       ld_d    5,  VCPU_FPR5,  a0
+       ld_d    6,  VCPU_FPR6,  a0
+       ld_d    7,  VCPU_FPR7,  a0
+       ld_d    8,  VCPU_FPR8,  a0
+       ld_d    9,  VCPU_FPR9,  a0
+       ld_d    10, VCPU_FPR10, a0
+       ld_d    11, VCPU_FPR11, a0
+       ld_d    12, VCPU_FPR12, a0
+       ld_d    13, VCPU_FPR13, a0
+       ld_d    14, VCPU_FPR14, a0
+       ld_d    15, VCPU_FPR15, a0
+       ld_d    16, VCPU_FPR16, a0
+       ld_d    17, VCPU_FPR17, a0
+       ld_d    18, VCPU_FPR18, a0
+       ld_d    19, VCPU_FPR19, a0
+       ld_d    20, VCPU_FPR20, a0
+       ld_d    21, VCPU_FPR21, a0
+       ld_d    22, VCPU_FPR22, a0
+       ld_d    23, VCPU_FPR23, a0
+       ld_d    24, VCPU_FPR24, a0
+       ld_d    25, VCPU_FPR25, a0
+       ld_d    26, VCPU_FPR26, a0
+       ld_d    27, VCPU_FPR27, a0
+       ld_d    28, VCPU_FPR28, a0
+       ld_d    29, VCPU_FPR29, a0
+       ld_d    30, VCPU_FPR30, a0
+       ld_d    31, VCPU_FPR31, a0
+       jr      ra
+        nop
+       END(__kvm_restore_msa)
+
+       .macro  kvm_restore_msa_upper   wr, off, base
+       .set    push
+       .set    noat
+#ifdef CONFIG_64BIT
+       ld      $1, \off(\base)
+       insert_d \wr, 1
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+       lw      $1, \off(\base)
+       insert_w \wr, 2
+       lw      $1, (\off+4)(\base)
+       insert_w \wr, 3
+#else /* CONFIG_CPU_BIG_ENDIAN */
+       lw      $1, (\off+4)(\base)
+       insert_w \wr, 2
+       lw      $1, \off(\base)
+       insert_w \wr, 3
+#endif
+       .set    pop
+       .endm
+
+LEAF(__kvm_restore_msa_upper)
+       kvm_restore_msa_upper   0,  VCPU_FPR0 +8, a0
+       kvm_restore_msa_upper   1,  VCPU_FPR1 +8, a0
+       kvm_restore_msa_upper   2,  VCPU_FPR2 +8, a0
+       kvm_restore_msa_upper   3,  VCPU_FPR3 +8, a0
+       kvm_restore_msa_upper   4,  VCPU_FPR4 +8, a0
+       kvm_restore_msa_upper   5,  VCPU_FPR5 +8, a0
+       kvm_restore_msa_upper   6,  VCPU_FPR6 +8, a0
+       kvm_restore_msa_upper   7,  VCPU_FPR7 +8, a0
+       kvm_restore_msa_upper   8,  VCPU_FPR8 +8, a0
+       kvm_restore_msa_upper   9,  VCPU_FPR9 +8, a0
+       kvm_restore_msa_upper   10, VCPU_FPR10+8, a0
+       kvm_restore_msa_upper   11, VCPU_FPR11+8, a0
+       kvm_restore_msa_upper   12, VCPU_FPR12+8, a0
+       kvm_restore_msa_upper   13, VCPU_FPR13+8, a0
+       kvm_restore_msa_upper   14, VCPU_FPR14+8, a0
+       kvm_restore_msa_upper   15, VCPU_FPR15+8, a0
+       kvm_restore_msa_upper   16, VCPU_FPR16+8, a0
+       kvm_restore_msa_upper   17, VCPU_FPR17+8, a0
+       kvm_restore_msa_upper   18, VCPU_FPR18+8, a0
+       kvm_restore_msa_upper   19, VCPU_FPR19+8, a0
+       kvm_restore_msa_upper   20, VCPU_FPR20+8, a0
+       kvm_restore_msa_upper   21, VCPU_FPR21+8, a0
+       kvm_restore_msa_upper   22, VCPU_FPR22+8, a0
+       kvm_restore_msa_upper   23, VCPU_FPR23+8, a0
+       kvm_restore_msa_upper   24, VCPU_FPR24+8, a0
+       kvm_restore_msa_upper   25, VCPU_FPR25+8, a0
+       kvm_restore_msa_upper   26, VCPU_FPR26+8, a0
+       kvm_restore_msa_upper   27, VCPU_FPR27+8, a0
+       kvm_restore_msa_upper   28, VCPU_FPR28+8, a0
+       kvm_restore_msa_upper   29, VCPU_FPR29+8, a0
+       kvm_restore_msa_upper   30, VCPU_FPR30+8, a0
+       kvm_restore_msa_upper   31, VCPU_FPR31+8, a0
+       jr      ra
+        nop
+       END(__kvm_restore_msa_upper)
+
+LEAF(__kvm_restore_msacsr)
+       lw      t0, VCPU_MSA_CSR(a0)
+       /*
+        * The ctcmsa must stay at this offset in __kvm_restore_msacsr.
+        * See kvm_mips_csr_die_notify() which handles t0 containing a value
+        * which triggers an MSA FP Exception, which must be stepped over and
+        * ignored since the set cause bits must remain there for the guest.
+        */
+       _ctcmsa MSA_CSR, t0
+       jr      ra
+        nop
+       END(__kvm_restore_msacsr)
index a74d602..888bb67 100644 (file)
@@ -25,6 +25,10 @@ char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = {
        "System Call",
        "Reserved Inst",
        "Break Inst",
+       "Trap Inst",
+       "MSA FPE",
+       "FPE",
+       "MSA Disabled",
        "D-Cache Flushes",
 };
 
index b6beb0e..aed0ac2 100644 (file)
@@ -733,6 +733,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
                }
        }
 
+       /* restore guest state to registers */
+       kvm_mips_callbacks->vcpu_set_regs(vcpu);
+
        local_irq_restore(flags);
 
 }
@@ -751,6 +754,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        vcpu->arch.preempt_entryhi = read_c0_entryhi();
        vcpu->arch.last_sched_cpu = cpu;
 
+       /* save guest state in registers */
+       kvm_mips_callbacks->vcpu_get_regs(vcpu);
+
        if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
             ASID_VERSION_MASK)) {
                kvm_debug("%s: Dropping MMU Context:  %#lx\n", __func__,
index fd7257b..d836ed5 100644 (file)
@@ -39,16 +39,30 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
 
 static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
 {
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
        struct kvm_run *run = vcpu->run;
        uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
        unsigned long cause = vcpu->arch.host_cp0_cause;
        enum emulation_result er = EMULATE_DONE;
        int ret = RESUME_GUEST;
 
-       if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1)
-               er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
-       else
+       if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) {
+               /* FPU Unusable */
+               if (!kvm_mips_guest_has_fpu(&vcpu->arch) ||
+                   (kvm_read_c0_guest_status(cop0) & ST0_CU1) == 0) {
+                       /*
+                        * Unusable/no FPU in guest:
+                        * deliver guest COP1 Unusable Exception
+                        */
+                       er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
+               } else {
+                       /* Restore FPU state */
+                       kvm_own_fpu(vcpu);
+                       er = EMULATE_DONE;
+               }
+       } else {
                er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
+       }
 
        switch (er) {
        case EMULATE_DONE:
@@ -330,6 +344,107 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu)
        return ret;
 }
 
+static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+       unsigned long cause = vcpu->arch.host_cp0_cause;
+       enum emulation_result er = EMULATE_DONE;
+       int ret = RESUME_GUEST;
+
+       er = kvm_mips_emulate_trap_exc(cause, opc, run, vcpu);
+       if (er == EMULATE_DONE) {
+               ret = RESUME_GUEST;
+       } else {
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               ret = RESUME_HOST;
+       }
+       return ret;
+}
+
+static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+       unsigned long cause = vcpu->arch.host_cp0_cause;
+       enum emulation_result er = EMULATE_DONE;
+       int ret = RESUME_GUEST;
+
+       er = kvm_mips_emulate_msafpe_exc(cause, opc, run, vcpu);
+       if (er == EMULATE_DONE) {
+               ret = RESUME_GUEST;
+       } else {
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               ret = RESUME_HOST;
+       }
+       return ret;
+}
+
+static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu)
+{
+       struct kvm_run *run = vcpu->run;
+       uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+       unsigned long cause = vcpu->arch.host_cp0_cause;
+       enum emulation_result er = EMULATE_DONE;
+       int ret = RESUME_GUEST;
+
+       er = kvm_mips_emulate_fpe_exc(cause, opc, run, vcpu);
+       if (er == EMULATE_DONE) {
+               ret = RESUME_GUEST;
+       } else {
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               ret = RESUME_HOST;
+       }
+       return ret;
+}
+
+/**
+ * kvm_trap_emul_handle_msa_disabled() - Guest used MSA while disabled in root.
+ * @vcpu:      Virtual CPU context.
+ *
+ * Handle when the guest attempts to use MSA when it is disabled.
+ */
+static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
+{
+       struct mips_coproc *cop0 = vcpu->arch.cop0;
+       struct kvm_run *run = vcpu->run;
+       uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+       unsigned long cause = vcpu->arch.host_cp0_cause;
+       enum emulation_result er = EMULATE_DONE;
+       int ret = RESUME_GUEST;
+
+       if (!kvm_mips_guest_has_msa(&vcpu->arch) ||
+           (kvm_read_c0_guest_status(cop0) & (ST0_CU1 | ST0_FR)) == ST0_CU1) {
+               /*
+                * No MSA in guest, or FPU enabled and not in FR=1 mode,
+                * guest reserved instruction exception
+                */
+               er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
+       } else if (!(kvm_read_c0_guest_config5(cop0) & MIPS_CONF5_MSAEN)) {
+               /* MSA disabled by guest, guest MSA disabled exception */
+               er = kvm_mips_emulate_msadis_exc(cause, opc, run, vcpu);
+       } else {
+               /* Restore MSA/FPU state */
+               kvm_own_msa(vcpu);
+               er = EMULATE_DONE;
+       }
+
+       switch (er) {
+       case EMULATE_DONE:
+               ret = RESUME_GUEST;
+               break;
+
+       case EMULATE_FAIL:
+               run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               ret = RESUME_HOST;
+               break;
+
+       default:
+               BUG();
+       }
+       return ret;
+}
+
 static int kvm_trap_emul_vm_init(struct kvm *kvm)
 {
        return 0;
@@ -351,8 +466,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
         * guest will come up as expected, for now we simulate a MIPS 24kc
         */
        kvm_write_c0_guest_prid(cop0, 0x00019300);
-       kvm_write_c0_guest_config(cop0,
-                                 MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+       /* Have config1, Cacheable, noncoherent, write-back, write allocate */
+       kvm_write_c0_guest_config(cop0, MIPS_CONF_M | (0x3 << CP0C0_K0) |
+                                 (0x1 << CP0C0_AR) |
                                  (MMU_TYPE_R4000 << CP0C0_MT));
 
        /* Read the cache characteristics from the host Config1 Register */
@@ -368,10 +484,18 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
              (1 << CP0C1_WR) | (1 << CP0C1_CA));
        kvm_write_c0_guest_config1(cop0, config1);
 
-       kvm_write_c0_guest_config2(cop0, MIPS_CONFIG2);
-       /* MIPS_CONFIG2 | (read_c0_config2() & 0xfff) */
-       kvm_write_c0_guest_config3(cop0, MIPS_CONFIG3 | (0 << CP0C3_VInt) |
-                                        (1 << CP0C3_ULRI));
+       /* Have config3, no tertiary/secondary caches implemented */
+       kvm_write_c0_guest_config2(cop0, MIPS_CONF_M);
+       /* MIPS_CONF_M | (read_c0_config2() & 0xfff) */
+
+       /* Have config4, UserLocal */
+       kvm_write_c0_guest_config3(cop0, MIPS_CONF_M | MIPS_CONF3_ULRI);
+
+       /* Have config5 */
+       kvm_write_c0_guest_config4(cop0, MIPS_CONF_M);
+
+       /* No config6 */
+       kvm_write_c0_guest_config5(cop0, 0);
 
        /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */
        kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10));
@@ -416,6 +540,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
 {
        struct mips_coproc *cop0 = vcpu->arch.cop0;
        int ret = 0;
+       unsigned int cur, change;
 
        switch (reg->id) {
        case KVM_REG_MIPS_CP0_COUNT:
@@ -444,6 +569,44 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
                        kvm_write_c0_guest_cause(cop0, v);
                }
                break;
+       case KVM_REG_MIPS_CP0_CONFIG:
+               /* read-only for now */
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG1:
+               cur = kvm_read_c0_guest_config1(cop0);
+               change = (cur ^ v) & kvm_mips_config1_wrmask(vcpu);
+               if (change) {
+                       v = cur ^ change;
+                       kvm_write_c0_guest_config1(cop0, v);
+               }
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG2:
+               /* read-only for now */
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG3:
+               cur = kvm_read_c0_guest_config3(cop0);
+               change = (cur ^ v) & kvm_mips_config3_wrmask(vcpu);
+               if (change) {
+                       v = cur ^ change;
+                       kvm_write_c0_guest_config3(cop0, v);
+               }
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG4:
+               cur = kvm_read_c0_guest_config4(cop0);
+               change = (cur ^ v) & kvm_mips_config4_wrmask(vcpu);
+               if (change) {
+                       v = cur ^ change;
+                       kvm_write_c0_guest_config4(cop0, v);
+               }
+               break;
+       case KVM_REG_MIPS_CP0_CONFIG5:
+               cur = kvm_read_c0_guest_config5(cop0);
+               change = (cur ^ v) & kvm_mips_config5_wrmask(vcpu);
+               if (change) {
+                       v = cur ^ change;
+                       kvm_write_c0_guest_config5(cop0, v);
+               }
+               break;
        case KVM_REG_MIPS_COUNT_CTL:
                ret = kvm_mips_set_count_ctl(vcpu, v);
                break;
@@ -459,6 +622,18 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
        return ret;
 }
 
+static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu)
+{
+       kvm_lose_fpu(vcpu);
+
+       return 0;
+}
+
+static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu)
+{
+       return 0;
+}
+
 static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
        /* exit handlers */
        .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
@@ -470,6 +645,10 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
        .handle_syscall = kvm_trap_emul_handle_syscall,
        .handle_res_inst = kvm_trap_emul_handle_res_inst,
        .handle_break = kvm_trap_emul_handle_break,
+       .handle_trap = kvm_trap_emul_handle_trap,
+       .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe,
+       .handle_fpe = kvm_trap_emul_handle_fpe,
+       .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
 
        .vm_init = kvm_trap_emul_vm_init,
        .vcpu_init = kvm_trap_emul_vcpu_init,
@@ -483,6 +662,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
        .irq_clear = kvm_mips_irq_clear_cb,
        .get_one_reg = kvm_trap_emul_get_one_reg,
        .set_one_reg = kvm_trap_emul_set_one_reg,
+       .vcpu_get_regs = kvm_trap_emul_vcpu_get_regs,
+       .vcpu_set_regs = kvm_trap_emul_vcpu_set_regs,
 };
 
 int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
index 3b7f65c..cf9b463 100644 (file)
@@ -75,11 +75,11 @@ static int rtctmp;
 int proc_dolasatrtc(struct ctl_table *table, int write,
                       void *buffer, size_t *lenp, loff_t *ppos)
 {
-       struct timespec ts;
+       struct timespec64 ts;
        int r;
 
        if (!write) {
-               read_persistent_clock(&ts);
+               read_persistent_clock64(&ts);
                rtctmp = ts.tv_sec;
                /* check for time < 0 and set to 0 */
                if (rtctmp < 0)
index 20fb1cf..6424621 100644 (file)
 
 #include <uapi/asm/ptrace.h>
 
+/* This struct defines the way the registers are stored on the
+   stack during a system call.  */
+
 #ifndef __ASSEMBLY__
+struct pt_regs {
+       unsigned long  r8;      /* r8-r15 Caller-saved GP registers */
+       unsigned long  r9;
+       unsigned long  r10;
+       unsigned long  r11;
+       unsigned long  r12;
+       unsigned long  r13;
+       unsigned long  r14;
+       unsigned long  r15;
+       unsigned long  r1;      /* Assembler temporary */
+       unsigned long  r2;      /* Retval LS 32bits */
+       unsigned long  r3;      /* Retval MS 32bits */
+       unsigned long  r4;      /* r4-r7 Register arguments */
+       unsigned long  r5;
+       unsigned long  r6;
+       unsigned long  r7;
+       unsigned long  orig_r2; /* Copy of r2 ?? */
+       unsigned long  ra;      /* Return address */
+       unsigned long  fp;      /* Frame pointer */
+       unsigned long  sp;      /* Stack pointer */
+       unsigned long  gp;      /* Global pointer */
+       unsigned long  estatus;
+       unsigned long  ea;      /* Exception return address (pc) */
+       unsigned long  orig_r7;
+};
+
+/*
+ * This is the extended stack used by signal handlers and the context
+ * switcher: it's pushed after the normal "struct pt_regs".
+ */
+struct switch_stack {
+       unsigned long  r16;     /* r16-r23 Callee-saved GP registers */
+       unsigned long  r17;
+       unsigned long  r18;
+       unsigned long  r19;
+       unsigned long  r20;
+       unsigned long  r21;
+       unsigned long  r22;
+       unsigned long  r23;
+       unsigned long  fp;
+       unsigned long  gp;
+       unsigned long  ra;
+};
+
 #define user_mode(regs)        (((regs)->estatus & ESTATUS_EU))
 
 #define instruction_pointer(regs)      ((regs)->ra)
index 1f26657..a16e55c 100644 (file)
@@ -47,7 +47,6 @@ struct thread_info {
                                                  0-0x7FFFFFFF for user-thead
                                                  0-0xFFFFFFFF for kernel-thread
                                                */
-       struct restart_block    restart_block;
        struct pt_regs          *regs;
 };
 
@@ -64,9 +63,6 @@ struct thread_info {
        .cpu            = 0,                    \
        .preempt_count  = INIT_PREEMPT_COUNT,   \
        .addr_limit     = KERNEL_DS,            \
-       .restart_block  = {                     \
-               .fn = do_no_restart_syscall,    \
-       },                                      \
 }
 
 #define init_thread_info       (init_thread_union.thread_info)
diff --git a/arch/nios2/include/asm/ucontext.h b/arch/nios2/include/asm/ucontext.h
deleted file mode 100644 (file)
index 2c87614..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2010 Tobias Klauser <tklauser@distanz.ch>
- * Copyright (C) 2004 Microtronix Datacom Ltd
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_NIOS2_UCONTEXT_H
-#define _ASM_NIOS2_UCONTEXT_H
-
-typedef int greg_t;
-#define NGREG 32
-typedef greg_t gregset_t[NGREG];
-
-struct mcontext {
-       int version;
-       gregset_t gregs;
-};
-
-#define MCONTEXT_VERSION 2
-
-struct ucontext {
-       unsigned long     uc_flags;
-       struct ucontext  *uc_link;
-       stack_t           uc_stack;
-       struct mcontext   uc_mcontext;
-       sigset_t          uc_sigmask;   /* mask last for extensibility */
-};
-
-#endif
index 4f07ca3..e0bb972 100644 (file)
@@ -1,4 +1,5 @@
 include include/uapi/asm-generic/Kbuild.asm
 
 header-y += elf.h
-header-y += ucontext.h
+
+generic-y += ucontext.h
index a5b91ae..6f06d3b 100644 (file)
@@ -50,9 +50,7 @@
 
 typedef unsigned long elf_greg_t;
 
-#define ELF_NGREG      \
-       ((sizeof(struct pt_regs) + sizeof(struct switch_stack)) /       \
-               sizeof(elf_greg_t))
+#define ELF_NGREG              49
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
 typedef unsigned long elf_fpregset_t;
index e83a7c9..eff00e6 100644 (file)
 #define PTR_IPENDING   37
 #define PTR_CPUID      38
 #define PTR_CTL6       39
-#define PTR_CTL7       40
+#define PTR_EXCEPTION  40
 #define PTR_PTEADDR    41
 #define PTR_TLBACC     42
 #define PTR_TLBMISC    43
+#define PTR_ECCINJ     44
+#define PTR_BADADDR    45
+#define PTR_CONFIG     46
+#define PTR_MPUBASE    47
+#define PTR_MPUACC     48
 
-#define NUM_PTRACE_REG (PTR_TLBMISC + 1)
+#define NUM_PTRACE_REG (PTR_MPUACC + 1)
 
-/* this struct defines the way the registers are stored on the
-   stack during a system call.
-
-   There is a fake_regs in setup.c that has to match pt_regs.*/
-
-struct pt_regs {
-       unsigned long  r8;              /* r8-r15 Caller-saved GP registers */
-       unsigned long  r9;
-       unsigned long  r10;
-       unsigned long  r11;
-       unsigned long  r12;
-       unsigned long  r13;
-       unsigned long  r14;
-       unsigned long  r15;
-       unsigned long  r1;              /* Assembler temporary */
-       unsigned long  r2;              /* Retval LS 32bits */
-       unsigned long  r3;              /* Retval MS 32bits */
-       unsigned long  r4;              /* r4-r7 Register arguments */
-       unsigned long  r5;
-       unsigned long  r6;
-       unsigned long  r7;
-       unsigned long  orig_r2;         /* Copy of r2 ?? */
-       unsigned long  ra;              /* Return address */
-       unsigned long  fp;              /* Frame pointer */
-       unsigned long  sp;              /* Stack pointer */
-       unsigned long  gp;              /* Global pointer */
-       unsigned long  estatus;
-       unsigned long  ea;              /* Exception return address (pc) */
-       unsigned long  orig_r7;
-};
-
-/*
- * This is the extended stack used by signal handlers and the context
- * switcher: it's pushed after the normal "struct pt_regs".
- */
-struct switch_stack {
-       unsigned long  r16;             /* r16-r23 Callee-saved GP registers */
-       unsigned long  r17;
-       unsigned long  r18;
-       unsigned long  r19;
-       unsigned long  r20;
-       unsigned long  r21;
-       unsigned long  r22;
-       unsigned long  r23;
-       unsigned long  fp;
-       unsigned long  gp;
-       unsigned long  ra;
+/* User structures for general purpose registers.  */
+struct user_pt_regs {
+       __u32           regs[49];
 };
 
 #endif /* __ASSEMBLY__ */
index 7b8bb41..b67944a 100644 (file)
  * details.
  */
 
-#ifndef _ASM_NIOS2_SIGCONTEXT_H
-#define _ASM_NIOS2_SIGCONTEXT_H
+#ifndef _UAPI__ASM_SIGCONTEXT_H
+#define _UAPI__ASM_SIGCONTEXT_H
 
-#include <asm/ptrace.h>
+#include <linux/types.h>
+
+#define MCONTEXT_VERSION 2
 
 struct sigcontext {
-       struct pt_regs regs;
-       unsigned long  sc_mask; /* old sigmask */
+       int version;
+       unsigned long gregs[32];
 };
 
 #endif
index 7729bd3..27b006c 100644 (file)
@@ -161,7 +161,7 @@ ENTRY(inthandler)
  ***********************************************************************
  */
 ENTRY(handle_trap)
-       ldw     r24, -4(ea)     /* instruction that caused the exception */
+       ldwio   r24, -4(ea)     /* instruction that caused the exception */
        srli    r24, r24, 4
        andi    r24, r24, 0x7c
        movia   r9,trap_table
index 2d0ea25..20662b0 100644 (file)
@@ -39,11 +39,11 @@ static inline int rt_restore_ucontext(struct pt_regs *regs,
                                        struct ucontext *uc, int *pr2)
 {
        int temp;
-       greg_t *gregs = uc->uc_mcontext.gregs;
+       unsigned long *gregs = uc->uc_mcontext.gregs;
        int err;
 
        /* Always make any pending restarted system calls return -EINTR */
-       current_thread_info()->restart_block.fn = do_no_restart_syscall;
+       current->restart_block.fn = do_no_restart_syscall;
 
        err = __get_user(temp, &uc->uc_mcontext.version);
        if (temp != MCONTEXT_VERSION)
@@ -127,7 +127,7 @@ badframe:
 static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs)
 {
        struct switch_stack *sw = (struct switch_stack *)regs - 1;
-       greg_t *gregs = uc->uc_mcontext.gregs;
+       unsigned long *gregs = uc->uc_mcontext.gregs;
        int err = 0;
 
        err |= __put_user(MCONTEXT_VERSION, &uc->uc_mcontext.version);
index 2ae482b..7966429 100644 (file)
@@ -23,9 +23,6 @@ static void __flush_dcache(unsigned long start, unsigned long end)
        end += (cpuinfo.dcache_line_size - 1);
        end &= ~(cpuinfo.dcache_line_size - 1);
 
-       if (end > start + cpuinfo.dcache_size)
-               end = start + cpuinfo.dcache_size;
-
        for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
                __asm__ __volatile__ ("   flushda 0(%0)\n"
                                        : /* Outputs */
index 0d231ad..0c9b6af 100644 (file)
@@ -126,7 +126,6 @@ good_area:
                break;
        }
 
-survive:
        /*
         * If for any reason at all we couldn't handle the fault,
         * make sure we exit gracefully rather than endlessly redo
@@ -220,11 +219,6 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_global_init(tsk)) {
-               yield();
-               down_read(&mm->mmap_sem);
-               goto survive;
-       }
        if (!user_mode(regs))
                goto no_context;
        pagefault_out_of_memory();
index f213f5b..d174372 100644 (file)
@@ -26,7 +26,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
        if (likely(pgd != NULL)) {
                memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER);
-#ifdef CONFIG_64BIT
+#if PT_NLEVELS == 3
                actual_pgd += PTRS_PER_PGD;
                /* Populate first pmd with allocated memory.  We mark it
                 * with PxD_FLAG_ATTACHED as a signal to the system that this
@@ -45,7 +45,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
-#ifdef CONFIG_64BIT
+#if PT_NLEVELS == 3
        pgd -= PTRS_PER_PGD;
 #endif
        free_pages((unsigned long)pgd, PGD_ALLOC_ORDER);
@@ -72,12 +72,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 {
-#ifdef CONFIG_64BIT
        if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
-               /* This is the permanent pmd attached to the pgd;
-                * cannot free it */
+               /*
+                * This is the permanent pmd attached to the pgd;
+                * cannot free it.
+                * Increment the counter to compensate for the decrement
+                * done by generic mm code.
+                */
+               mm_inc_nr_pmds(mm);
                return;
-#endif
        free_pages((unsigned long)pmd, PMD_ORDER);
 }
 
@@ -99,7 +102,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline void
 pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
 {
-#ifdef CONFIG_64BIT
+#if PT_NLEVELS == 3
        /* preserve the gateway marker if this is the beginning of
         * the permanent pmd */
        if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
index 5a8997d..8eefb12 100644 (file)
@@ -55,8 +55,8 @@
 #define ENTRY_COMP(_name_) .word sys_##_name_
 #endif
 
-       ENTRY_SAME(restart_syscall)     /* 0 */
-       ENTRY_SAME(exit)
+90:    ENTRY_SAME(restart_syscall)     /* 0 */
+91:    ENTRY_SAME(exit)
        ENTRY_SAME(fork_wrapper)
        ENTRY_SAME(read)
        ENTRY_SAME(write)
        ENTRY_SAME(bpf)
        ENTRY_COMP(execveat)
 
-       /* Nothing yet */
+
+.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
+.error "size of syscall table does not fit value of __NR_Linux_syscalls"
+.endif
 
 #undef ENTRY_SAME
 #undef ENTRY_DIFF
index 2bf8e93..4c8ad59 100644 (file)
@@ -55,7 +55,7 @@ static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
 
 static inline int cpu_nr_cores(void)
 {
-       return NR_CPUS >> threads_shift;
+       return nr_cpu_ids >> threads_shift;
 }
 
 static inline cpumask_t cpu_online_cores_map(void)
index 03cd858..4cbe23a 100644 (file)
 #define PPC_INST_MFSPR_PVR_MASK                0xfc1fffff
 #define PPC_INST_MFTMR                 0x7c0002dc
 #define PPC_INST_MSGSND                        0x7c00019c
+#define PPC_INST_MSGCLR                        0x7c0001dc
 #define PPC_INST_MSGSNDP               0x7c00011c
 #define PPC_INST_MTTMR                 0x7c0003dc
 #define PPC_INST_NOP                   0x60000000
                                        ___PPC_RB(b) | __PPC_EH(eh))
 #define PPC_MSGSND(b)          stringify_in_c(.long PPC_INST_MSGSND | \
                                        ___PPC_RB(b))
+#define PPC_MSGCLR(b)          stringify_in_c(.long PPC_INST_MSGCLR | \
+                                       ___PPC_RB(b))
 #define PPC_MSGSNDP(b)         stringify_in_c(.long PPC_INST_MSGSNDP | \
                                        ___PPC_RB(b))
 #define PPC_POPCNTB(a, s)      stringify_in_c(.long PPC_INST_POPCNTB | \
index 1c874fb..af56b5c 100644 (file)
 #define   SRR1_ISI_N_OR_G      0x10000000 /* ISI: Access is no-exec or G */
 #define   SRR1_ISI_PROT                0x08000000 /* ISI: Other protection fault */
 #define   SRR1_WAKEMASK                0x00380000 /* reason for wakeup */
+#define   SRR1_WAKEMASK_P8     0x003c0000 /* reason for wakeup on POWER8 */
 #define   SRR1_WAKESYSERR      0x00300000 /* System error */
 #define   SRR1_WAKEEE          0x00200000 /* External interrupt */
 #define   SRR1_WAKEMT          0x00280000 /* mtctrl */
 #define          SRR1_WAKEHMI          0x00280000 /* Hypervisor maintenance */
 #define   SRR1_WAKEDEC         0x00180000 /* Decrementer interrupt */
+#define   SRR1_WAKEDBELL       0x00140000 /* Privileged doorbell on P8 */
 #define   SRR1_WAKETHERM       0x00100000 /* Thermal management interrupt */
 #define          SRR1_WAKERESET        0x00100000 /* System reset */
+#define   SRR1_WAKEHDBELL      0x000c0000 /* Hypervisor doorbell on P8 */
 #define          SRR1_WAKESTATE        0x00030000 /* Powersave exit mask [46:47] */
 #define          SRR1_WS_DEEPEST       0x00030000 /* Some resources not maintained,
                                          * may not be recoverable */
index f337666..f830468 100644 (file)
@@ -437,6 +437,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
                .machine_check_early    = __machine_check_early_realmode_p8,
                .platform               = "power8",
        },
+       {       /* Power8NVL */
+               .pvr_mask               = 0xffff0000,
+               .pvr_value              = 0x004c0000,
+               .cpu_name               = "POWER8NVL (raw)",
+               .cpu_features           = CPU_FTRS_POWER8,
+               .cpu_user_features      = COMMON_USER_POWER8,
+               .cpu_user_features2     = COMMON_USER2_POWER8,
+               .mmu_features           = MMU_FTRS_POWER8,
+               .icache_bsize           = 128,
+               .dcache_bsize           = 128,
+               .num_pmcs               = 6,
+               .pmc_type               = PPC_PMC_IBM,
+               .oprofile_cpu_type      = "ppc64/power8",
+               .oprofile_type          = PPC_OPROFILE_INVALID,
+               .cpu_setup              = __setup_cpu_power8,
+               .cpu_restore            = __restore_cpu_power8,
+               .flush_tlb              = __flush_tlb_power8,
+               .machine_check_early    = __machine_check_early_realmode_p8,
+               .platform               = "power8",
+       },
        {       /* Power8 DD1: Does not support doorbell IPIs */
                .pvr_mask               = 0xffffff00,
                .pvr_value              = 0x004d0100,
index f421781..2128f3a 100644 (file)
@@ -17,6 +17,7 @@
 
 #include <asm/dbell.h>
 #include <asm/irq_regs.h>
+#include <asm/kvm_ppc.h>
 
 #ifdef CONFIG_SMP
 void doorbell_setup_this_cpu(void)
@@ -41,6 +42,7 @@ void doorbell_exception(struct pt_regs *regs)
 
        may_hard_irq_enable();
 
+       kvmppc_set_host_ipi(smp_processor_id(), 0);
        __this_cpu_inc(irq_stat.doorbell_irqs);
 
        smp_ipi_demux();
index c2df815..9519e6b 100644 (file)
@@ -1408,7 +1408,7 @@ machine_check_handle_early:
        bne     9f                      /* continue in V mode if we are. */
 
 5:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
        /*
         * We are coming from kernel context. Check if we are coming from
         * guest. if yes, then we can continue. We will fall through
index de4018a..de74756 100644 (file)
@@ -636,7 +636,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
        spin_lock(&vcpu->arch.vpa_update_lock);
        lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
        if (lppaca)
-               yield_count = lppaca->yield_count;
+               yield_count = be32_to_cpu(lppaca->yield_count);
        spin_unlock(&vcpu->arch.vpa_update_lock);
        return yield_count;
 }
@@ -942,20 +942,20 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
 static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
                bool preserve_top32)
 {
+       struct kvm *kvm = vcpu->kvm;
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
        u64 mask;
 
+       mutex_lock(&kvm->lock);
        spin_lock(&vc->lock);
        /*
         * If ILE (interrupt little-endian) has changed, update the
         * MSR_LE bit in the intr_msr for each vcpu in this vcore.
         */
        if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
-               struct kvm *kvm = vcpu->kvm;
                struct kvm_vcpu *vcpu;
                int i;
 
-               mutex_lock(&kvm->lock);
                kvm_for_each_vcpu(i, vcpu, kvm) {
                        if (vcpu->arch.vcore != vc)
                                continue;
@@ -964,7 +964,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
                        else
                                vcpu->arch.intr_msr &= ~MSR_LE;
                }
-               mutex_unlock(&kvm->lock);
        }
 
        /*
@@ -981,6 +980,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
                mask &= 0xFFFFFFFF;
        vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
        spin_unlock(&vc->lock);
+       mutex_unlock(&kvm->lock);
 }
 
 static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
index bb94e6f..6cbf163 100644 (file)
@@ -1005,6 +1005,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
        /* Save HEIR (HV emulation assist reg) in emul_inst
           if this is an HEI (HV emulation interrupt, e40) */
        li      r3,KVM_INST_FETCH_FAILED
+       stw     r3,VCPU_LAST_INST(r9)
        cmpwi   r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
        bne     11f
        mfspr   r3,SPRN_HEIR
index 39b3a8f..6249cdc 100644 (file)
@@ -34,7 +34,7 @@
 #include <asm/kvm_para.h>
 #include <asm/kvm_host.h>
 #include <asm/kvm_ppc.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #define MAX_CPU     32
 #define MAX_SRC     256
@@ -289,11 +289,6 @@ static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
        clear_bit(n_IRQ, q->queue);
 }
 
-static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
-{
-       return test_bit(n_IRQ, q->queue);
-}
-
 static void IRQ_check(struct openpic *opp, struct irq_queue *q)
 {
        int irq = -1;
@@ -1374,8 +1369,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
        return -ENXIO;
 }
 
-static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
-                        int len, void *ptr)
+static int kvm_mpic_read(struct kvm_vcpu *vcpu,
+                        struct kvm_io_device *this,
+                        gpa_t addr, int len, void *ptr)
 {
        struct openpic *opp = container_of(this, struct openpic, mmio);
        int ret;
@@ -1415,8 +1411,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
        return ret;
 }
 
-static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
-                         int len, const void *ptr)
+static int kvm_mpic_write(struct kvm_vcpu *vcpu,
+                         struct kvm_io_device *this,
+                         gpa_t addr, int len, const void *ptr)
 {
        struct openpic *opp = container_of(this, struct openpic, mmio);
        int ret;
index 27c0fac..24bfe40 100644 (file)
@@ -807,7 +807,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-       ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+       ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
                              bytes, &run->mmio.data);
 
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -880,7 +880,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
        idx = srcu_read_lock(&vcpu->kvm->srcu);
 
-       ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+       ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
                               bytes, &run->mmio.data);
 
        srcu_read_unlock(&vcpu->kvm->srcu, idx);
index 0509bca..fcbe899 100644 (file)
@@ -9,11 +9,11 @@
  * 2 of the License, or (at your option) any later version.
  */
 
+#include <linux/jump_label.h>
 #include <asm/ppc_asm.h>
 #include <asm/hvcall.h>
 #include <asm/asm-offsets.h>
 #include <asm/opal.h>
-#include <asm/jump_label.h>
 
        .section        ".text"
 
index fc34025..38a4508 100644 (file)
@@ -33,6 +33,8 @@
 #include <asm/runlatch.h>
 #include <asm/code-patching.h>
 #include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
 
 #include "powernv.h"
 
@@ -149,7 +151,7 @@ static int pnv_smp_cpu_disable(void)
 static void pnv_smp_cpu_kill_self(void)
 {
        unsigned int cpu;
-       unsigned long srr1;
+       unsigned long srr1, wmask;
        u32 idle_states;
 
        /* Standard hot unplug procedure */
@@ -161,6 +163,10 @@ static void pnv_smp_cpu_kill_self(void)
        generic_set_cpu_dead(cpu);
        smp_wmb();
 
+       wmask = SRR1_WAKEMASK;
+       if (cpu_has_feature(CPU_FTR_ARCH_207S))
+               wmask = SRR1_WAKEMASK_P8;
+
        idle_states = pnv_get_supported_cpuidle_states();
        /* We don't want to take decrementer interrupts while we are offline,
         * so clear LPCR:PECE1. We keep PECE2 enabled.
@@ -191,10 +197,14 @@ static void pnv_smp_cpu_kill_self(void)
                 * having finished executing in a KVM guest, then srr1
                 * contains 0.
                 */
-               if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) {
+               if ((srr1 & wmask) == SRR1_WAKEEE) {
                        icp_native_flush_interrupt();
                        local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
                        smp_mb();
+               } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
+                       unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+                       asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+                       kvmppc_set_host_ipi(cpu, 0);
                }
 
                if (cpu_core_split_required())
index ccd53f9..74b5b8e 100644 (file)
@@ -7,12 +7,12 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
+#include <linux/jump_label.h>
 #include <asm/hvcall.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
-#include <asm/jump_label.h>
 
        .section        ".text"
        
index b5682fd..b7a67e3 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/console.h>
 #include <linux/export.h>
-#include <linux/static_key.h>
+#include <linux/jump_label.h>
 #include <asm/processor.h>
 #include <asm/mmu.h>
 #include <asm/page.h>
index 90cf3dc..8f35d52 100644 (file)
 static struct kobject *mobility_kobj;
 
 struct update_props_workarea {
-       u32 phandle;
-       u32 state;
-       u64 reserved;
-       u32 nprops;
+       __be32 phandle;
+       __be32 state;
+       __be64 reserved;
+       __be32 nprops;
 } __packed;
 
 #define NODE_ACTION_MASK       0xff000000
@@ -54,11 +54,11 @@ static int mobility_rtas_call(int token, char *buf, s32 scope)
        return rc;
 }
 
-static int delete_dt_node(u32 phandle)
+static int delete_dt_node(__be32 phandle)
 {
        struct device_node *dn;
 
-       dn = of_find_node_by_phandle(phandle);
+       dn = of_find_node_by_phandle(be32_to_cpu(phandle));
        if (!dn)
                return -ENOENT;
 
@@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
        return 0;
 }
 
-static int update_dt_node(u32 phandle, s32 scope)
+static int update_dt_node(__be32 phandle, s32 scope)
 {
        struct update_props_workarea *upwa;
        struct device_node *dn;
@@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope)
        char *prop_data;
        char *rtas_buf;
        int update_properties_token;
+       u32 nprops;
        u32 vd;
 
        update_properties_token = rtas_token("ibm,update-properties");
@@ -146,7 +147,7 @@ static int update_dt_node(u32 phandle, s32 scope)
        if (!rtas_buf)
                return -ENOMEM;
 
-       dn = of_find_node_by_phandle(phandle);
+       dn = of_find_node_by_phandle(be32_to_cpu(phandle));
        if (!dn) {
                kfree(rtas_buf);
                return -ENOENT;
@@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope)
                        break;
 
                prop_data = rtas_buf + sizeof(*upwa);
+               nprops = be32_to_cpu(upwa->nprops);
 
                /* On the first call to ibm,update-properties for a node the
                 * the first property value descriptor contains an empty
@@ -170,17 +172,17 @@ static int update_dt_node(u32 phandle, s32 scope)
                 */
                if (*prop_data == 0) {
                        prop_data++;
-                       vd = *(u32 *)prop_data;
+                       vd = be32_to_cpu(*(__be32 *)prop_data);
                        prop_data += vd + sizeof(vd);
-                       upwa->nprops--;
+                       nprops--;
                }
 
-               for (i = 0; i < upwa->nprops; i++) {
+               for (i = 0; i < nprops; i++) {
                        char *prop_name;
 
                        prop_name = prop_data;
                        prop_data += strlen(prop_name) + 1;
-                       vd = *(u32 *)prop_data;
+                       vd = be32_to_cpu(*(__be32 *)prop_data);
                        prop_data += sizeof(vd);
 
                        switch (vd) {
@@ -212,13 +214,13 @@ static int update_dt_node(u32 phandle, s32 scope)
        return 0;
 }
 
-static int add_dt_node(u32 parent_phandle, u32 drc_index)
+static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
 {
        struct device_node *dn;
        struct device_node *parent_dn;
        int rc;
 
-       parent_dn = of_find_node_by_phandle(parent_phandle);
+       parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
        if (!parent_dn)
                return -ENOENT;
 
@@ -237,7 +239,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index)
 int pseries_devicetree_update(s32 scope)
 {
        char *rtas_buf;
-       u32 *data;
+       __be32 *data;
        int update_nodes_token;
        int rc;
 
@@ -254,17 +256,17 @@ int pseries_devicetree_update(s32 scope)
                if (rc && rc != 1)
                        break;
 
-               data = (u32 *)rtas_buf + 4;
-               while (*data & NODE_ACTION_MASK) {
+               data = (__be32 *)rtas_buf + 4;
+               while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
                        int i;
-                       u32 action = *data & NODE_ACTION_MASK;
-                       int node_count = *data & NODE_COUNT_MASK;
+                       u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+                       u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
 
                        data++;
 
                        for (i = 0; i < node_count; i++) {
-                               u32 phandle = *data++;
-                               u32 drc_index;
+                               __be32 phandle = *data++;
+                               __be32 drc_index;
 
                                switch (action) {
                                case DELETE_DT_NODE:
index c9df40b..c9c875d 100644 (file)
@@ -211,7 +211,7 @@ do {                                                                \
 
 extern unsigned long mmap_rnd_mask;
 
-#define STACK_RND_MASK (mmap_rnd_mask)
+#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
 
 #define ARCH_DLINFO                                                        \
 do {                                                                       \
index 58642fd..2b77e23 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _ASM_S390_JUMP_LABEL_H
 #define _ASM_S390_JUMP_LABEL_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 
 #define JUMP_LABEL_NOP_SIZE 6
@@ -39,4 +41,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
index d84559e..d01fc58 100644 (file)
@@ -172,7 +172,9 @@ struct kvm_s390_sie_block {
        __u32   fac;                    /* 0x01a0 */
        __u8    reserved1a4[20];        /* 0x01a4 */
        __u64   cbrlo;                  /* 0x01b8 */
-       __u8    reserved1c0[30];        /* 0x01c0 */
+       __u8    reserved1c0[8];         /* 0x01c0 */
+       __u32   ecd;                    /* 0x01c8 */
+       __u8    reserved1cc[18];        /* 0x01cc */
        __u64   pp;                     /* 0x01de */
        __u8    reserved1e6[2];         /* 0x01e6 */
        __u64   itdba;                  /* 0x01e8 */
@@ -183,11 +185,17 @@ struct kvm_s390_itdb {
        __u8    data[256];
 } __packed;
 
+struct kvm_s390_vregs {
+       __vector128 vrs[32];
+       __u8    reserved200[512];       /* for future vector expansion */
+} __packed;
+
 struct sie_page {
        struct kvm_s390_sie_block sie_block;
        __u8 reserved200[1024];         /* 0x0200 */
        struct kvm_s390_itdb itdb;      /* 0x0600 */
-       __u8 reserved700[2304];         /* 0x0700 */
+       __u8 reserved700[1280];         /* 0x0700 */
+       struct kvm_s390_vregs vregs;    /* 0x0c00 */
 } __packed;
 
 struct kvm_vcpu_stat {
@@ -238,6 +246,7 @@ struct kvm_vcpu_stat {
        u32 instruction_sigp_stop;
        u32 instruction_sigp_stop_store_status;
        u32 instruction_sigp_store_status;
+       u32 instruction_sigp_store_adtl_status;
        u32 instruction_sigp_arch;
        u32 instruction_sigp_prefix;
        u32 instruction_sigp_restart;
@@ -270,6 +279,7 @@ struct kvm_vcpu_stat {
 #define PGM_SPECIAL_OPERATION          0x13
 #define PGM_OPERAND                    0x15
 #define PGM_TRACE_TABEL                        0x16
+#define PGM_VECTOR_PROCESSING          0x1b
 #define PGM_SPACE_SWITCH               0x1c
 #define PGM_HFP_SQUARE_ROOT            0x1d
 #define PGM_PC_TRANSLATION_SPEC                0x1f
@@ -334,6 +344,11 @@ enum irq_types {
        IRQ_PEND_COUNT
 };
 
+/* We have 2M for virtio device descriptor pages. Smallest amount of
+ * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381
+ */
+#define KVM_S390_MAX_VIRTIO_IRQS 87381
+
 /*
  * Repressible (non-floating) machine check interrupts
  * subclass bits in MCIC
@@ -411,13 +426,32 @@ struct kvm_s390_local_interrupt {
        unsigned long pending_irqs;
 };
 
+#define FIRQ_LIST_IO_ISC_0 0
+#define FIRQ_LIST_IO_ISC_1 1
+#define FIRQ_LIST_IO_ISC_2 2
+#define FIRQ_LIST_IO_ISC_3 3
+#define FIRQ_LIST_IO_ISC_4 4
+#define FIRQ_LIST_IO_ISC_5 5
+#define FIRQ_LIST_IO_ISC_6 6
+#define FIRQ_LIST_IO_ISC_7 7
+#define FIRQ_LIST_PFAULT   8
+#define FIRQ_LIST_VIRTIO   9
+#define FIRQ_LIST_COUNT   10
+#define FIRQ_CNTR_IO       0
+#define FIRQ_CNTR_SERVICE  1
+#define FIRQ_CNTR_VIRTIO   2
+#define FIRQ_CNTR_PFAULT   3
+#define FIRQ_MAX_COUNT     4
+
 struct kvm_s390_float_interrupt {
+       unsigned long pending_irqs;
        spinlock_t lock;
-       struct list_head list;
-       atomic_t active;
+       struct list_head lists[FIRQ_LIST_COUNT];
+       int counters[FIRQ_MAX_COUNT];
+       struct kvm_s390_mchk_info mchk;
+       struct kvm_s390_ext_info srv_signal;
        int next_rr_cpu;
        unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
-       unsigned int irq_count;
 };
 
 struct kvm_hw_wp_info_arch {
@@ -465,6 +499,7 @@ struct kvm_vcpu_arch {
        s390_fp_regs      host_fpregs;
        unsigned int      host_acrs[NUM_ACRS];
        s390_fp_regs      guest_fpregs;
+       struct kvm_s390_vregs   *host_vregs;
        struct kvm_s390_local_interrupt local_int;
        struct hrtimer    ckc_timer;
        struct kvm_s390_pgm_info pgm;
@@ -515,15 +550,15 @@ struct s390_io_adapter {
 #define S390_ARCH_FAC_MASK_SIZE_U64 \
        (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
 
-struct s390_model_fac {
-       /* facilities used in SIE context */
-       __u64 sie[S390_ARCH_FAC_LIST_SIZE_U64];
-       /* subset enabled by kvm */
-       __u64 kvm[S390_ARCH_FAC_LIST_SIZE_U64];
+struct kvm_s390_fac {
+       /* facility list requested by guest */
+       __u64 list[S390_ARCH_FAC_LIST_SIZE_U64];
+       /* facility mask supported by kvm & hosting machine */
+       __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64];
 };
 
 struct kvm_s390_cpu_model {
-       struct s390_model_fac *fac;
+       struct kvm_s390_fac *fac;
        struct cpuid cpu_id;
        unsigned short ibc;
 };
@@ -553,6 +588,7 @@ struct kvm_arch{
        int use_cmma;
        int user_cpu_state_ctrl;
        int user_sigp;
+       int user_stsi;
        struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
        wait_queue_head_t ipte_wq;
        int ipte_lock_count;
index f49b719..8fb3802 100644 (file)
@@ -62,6 +62,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 {
        int cpu = smp_processor_id();
 
+       S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
        if (prev == next)
                return;
        if (MACHINE_HAS_TLB_LC)
@@ -73,7 +74,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
        atomic_dec(&prev->context.attach_count);
        if (MACHINE_HAS_TLB_LC)
                cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
-       S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
 }
 
 #define finish_arch_post_lock_switch finish_arch_post_lock_switch
index 7b2ac6e..53eacbd 100644 (file)
@@ -37,16 +37,7 @@ static inline void storage_key_init_range(unsigned long start, unsigned long end
 #endif
 }
 
-static inline void clear_page(void *page)
-{
-       register unsigned long reg1 asm ("1") = 0;
-       register void *reg2 asm ("2") = page;
-       register unsigned long reg3 asm ("3") = 4096;
-       asm volatile(
-               "       mvcl    2,0"
-               : "+d" (reg2), "+d" (reg3) : "d" (reg1)
-               : "memory", "cc");
-}
+#define clear_page(page)       memset((page), 0, PAGE_SIZE)
 
 /*
  * copy_page uses the mvcl instruction with 0xb0 padding byte in order to
index 9c77e60..ef1a5fc 100644 (file)
@@ -150,6 +150,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_CRS    (1UL << 3)
 #define KVM_SYNC_ARCH0  (1UL << 4)
 #define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS    (1UL << 6)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
        __u64 prefix;   /* prefix register */
@@ -164,6 +165,9 @@ struct kvm_sync_regs {
        __u64 pft;      /* pfault token [PFAULT] */
        __u64 pfs;      /* pfault select [PFAULT] */
        __u64 pfc;      /* pfault compare [PFAULT] */
+       __u64 vrs[32][2];       /* vector registers */
+       __u8  reserved[512];    /* for future vector expansion */
+       __u32 fpc;      /* only valid with vector registers */
 };
 
 #define KVM_REG_S390_TODPR     (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
index d4096fd..ee69c08 100644 (file)
  * and returns a key, which can be used to find a mnemonic name
  * of the instruction in the icpt_insn_codes table.
  */
-#define icpt_insn_decoder(insn)                        \
+#define icpt_insn_decoder(insn) (              \
        INSN_DECODE_IPA0(0x01, insn, 48, 0xff)  \
        INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f)  \
        INSN_DECODE_IPA0(0xb2, insn, 48, 0xff)  \
        INSN_DECODE_IPA0(0xe5, insn, 48, 0xff)  \
        INSN_DECODE_IPA0(0xeb, insn, 16, 0xff)  \
        INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f)  \
-       INSN_DECODE(insn)
+       INSN_DECODE(insn))
 
 #endif /* _UAPI_ASM_S390_SIE_H */
index e07e916..8dc4db1 100644 (file)
@@ -171,6 +171,7 @@ int main(void)
 #else /* CONFIG_32BIT */
        DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
        DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
+       DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
        DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
        DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
        DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
index 82c1989..6c79f1b 100644 (file)
 
 unsigned long ftrace_plt;
 
+static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
+{
+#ifdef CC_USING_HOTPATCH
+       /* brcl 0,0 */
+       insn->opc = 0xc004;
+       insn->disp = 0;
+#else
+       /* stg r14,8(r15) */
+       insn->opc = 0xe3e0;
+       insn->disp = 0xf0080024;
+#endif
+}
+
+static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+       if (insn->opc == BREAKPOINT_INSTRUCTION)
+               return 1;
+#endif
+       return 0;
+}
+
+static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+       insn->opc = BREAKPOINT_INSTRUCTION;
+       insn->disp = KPROBE_ON_FTRACE_NOP;
+#endif
+}
+
+static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+       insn->opc = BREAKPOINT_INSTRUCTION;
+       insn->disp = KPROBE_ON_FTRACE_CALL;
+#endif
+}
+
 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
                       unsigned long addr)
 {
@@ -72,16 +110,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
                return -EFAULT;
        if (addr == MCOUNT_ADDR) {
                /* Initial code replacement */
-#ifdef CC_USING_HOTPATCH
-               /* We expect to see brcl 0,0 */
-               ftrace_generate_nop_insn(&orig);
-#else
-               /* We expect to see stg r14,8(r15) */
-               orig.opc = 0xe3e0;
-               orig.disp = 0xf0080024;
-#endif
+               ftrace_generate_orig_insn(&orig);
                ftrace_generate_nop_insn(&new);
-       } else if (old.opc == BREAKPOINT_INSTRUCTION) {
+       } else if (is_kprobe_on_ftrace(&old)) {
                /*
                 * If we find a breakpoint instruction, a kprobe has been
                 * placed at the beginning of the function. We write the
@@ -89,9 +120,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
                 * bytes of the original instruction so that the kprobes
                 * handler can execute a nop, if it reaches this breakpoint.
                 */
-               new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
-               orig.disp = KPROBE_ON_FTRACE_CALL;
-               new.disp = KPROBE_ON_FTRACE_NOP;
+               ftrace_generate_kprobe_call_insn(&orig);
+               ftrace_generate_kprobe_nop_insn(&new);
        } else {
                /* Replace ftrace call with a nop. */
                ftrace_generate_call_insn(&orig, rec->ip);
@@ -111,7 +141,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 
        if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
                return -EFAULT;
-       if (old.opc == BREAKPOINT_INSTRUCTION) {
+       if (is_kprobe_on_ftrace(&old)) {
                /*
                 * If we find a breakpoint instruction, a kprobe has been
                 * placed at the beginning of the function. We write the
@@ -119,9 +149,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
                 * bytes of the original instruction so that the kprobes
                 * handler can execute a brasl if it reaches this breakpoint.
                 */
-               new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
-               orig.disp = KPROBE_ON_FTRACE_NOP;
-               new.disp = KPROBE_ON_FTRACE_CALL;
+               ftrace_generate_kprobe_nop_insn(&orig);
+               ftrace_generate_kprobe_call_insn(&new);
        } else {
                /* Replace nop with an ftrace call. */
                ftrace_generate_nop_insn(&orig);
index cb2d51e..830066f 100644 (file)
@@ -36,16 +36,20 @@ static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
        insn->offset = (entry->target - entry->code) >> 1;
 }
 
-static void jump_label_bug(struct jump_entry *entry, struct insn *insn)
+static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
+                          struct insn *new)
 {
        unsigned char *ipc = (unsigned char *)entry->code;
-       unsigned char *ipe = (unsigned char *)insn;
+       unsigned char *ipe = (unsigned char *)expected;
+       unsigned char *ipn = (unsigned char *)new;
 
        pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
        pr_emerg("Found:    %02x %02x %02x %02x %02x %02x\n",
                 ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
        pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
                 ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
+       pr_emerg("New:      %02x %02x %02x %02x %02x %02x\n",
+                ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
        panic("Corrupted kernel text");
 }
 
@@ -69,10 +73,10 @@ static void __jump_label_transform(struct jump_entry *entry,
        }
        if (init) {
                if (memcmp((void *)entry->code, &orignop, sizeof(orignop)))
-                       jump_label_bug(entry, &old);
+                       jump_label_bug(entry, &orignop, &new);
        } else {
                if (memcmp((void *)entry->code, &old, sizeof(old)))
-                       jump_label_bug(entry, &old);
+                       jump_label_bug(entry, &old, &new);
        }
        probe_kernel_write((void *)entry->code, &new, sizeof(new));
 }
index 36154a2..2ca9586 100644 (file)
@@ -436,6 +436,7 @@ int module_finalize(const Elf_Ehdr *hdr,
                    const Elf_Shdr *sechdrs,
                    struct module *me)
 {
+       jump_label_apply_nops(me);
        vfree(me->arch.syminfo);
        me->arch.syminfo = NULL;
        return 0;
index c3f8d15..e6a1578 100644 (file)
@@ -1415,7 +1415,7 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
 
 static struct attribute *cpumsf_pmu_events_attr[] = {
        CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
-       CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
+       NULL,
        NULL,
 };
 
@@ -1606,8 +1606,11 @@ static int __init init_cpum_sampling_pmu(void)
                return -EINVAL;
        }
 
-       if (si.ad)
+       if (si.ad) {
                sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+               cpumsf_pmu_events_attr[1] =
+                       CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+       }
 
        sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
        if (!sfdbg)
index 2610823..dc488e1 100644 (file)
@@ -18,7 +18,7 @@
 
 static DEFINE_PER_CPU(struct cpuid, cpu_id);
 
-void cpu_relax(void)
+void notrace cpu_relax(void)
 {
        if (!smp_cpu_mtid && MACHINE_HAS_DIAG44)
                asm volatile("diag 0,0,0x44");
index 6b09fdf..ca62946 100644 (file)
@@ -177,6 +177,17 @@ restart_entry:
        lhi     %r1,1
        sigp    %r1,%r0,SIGP_SET_ARCHITECTURE
        sam64
+#ifdef CONFIG_SMP
+       larl    %r1,smp_cpu_mt_shift
+       icm     %r1,15,0(%r1)
+       jz      smt_done
+       llgfr   %r1,%r1
+smt_loop:
+       sigp    %r1,%r0,SIGP_SET_MULTI_THREADING
+       brc     8,smt_done                      /* accepted */
+       brc     2,smt_loop                      /* busy, try again */
+smt_done:
+#endif
        larl    %r1,.Lnew_pgm_check_psw
        lpswe   0(%r1)
 pgm_check_entry:
index 20660dd..170ddd2 100644 (file)
@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
 {
        u64 nsecps;
 
-       if (tk->tkr.clock != &clocksource_tod)
+       if (tk->tkr_mono.clock != &clocksource_tod)
                return;
 
        /* Make userspace gettimeofday spin until we're done. */
        ++vdso_data->tb_update_count;
        smp_wmb();
-       vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
+       vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
        vdso_data->xtime_clock_sec = tk->xtime_sec;
-       vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
+       vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
        vdso_data->wtom_clock_sec =
                tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
-       vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
-               + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
-       nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
+       vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+               + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+       nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
        while (vdso_data->wtom_clock_nsec >= nsecps) {
                vdso_data->wtom_clock_nsec -= nsecps;
                vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
 
        vdso_data->xtime_coarse_sec = tk->xtime_sec;
        vdso_data->xtime_coarse_nsec =
-               (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+               (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        vdso_data->wtom_coarse_sec =
                vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
        vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
                vdso_data->wtom_coarse_sec++;
        }
 
-       vdso_data->tk_mult = tk->tkr.mult;
-       vdso_data->tk_shift = tk->tkr.shift;
+       vdso_data->tk_mult = tk->tkr_mono.mult;
+       vdso_data->tk_shift = tk->tkr_mono.shift;
        smp_wmb();
        ++vdso_data->tb_update_count;
 }
@@ -283,7 +283,7 @@ void __init time_init(void)
        if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
                panic("Couldn't request external interrupt 0x1406");
 
-       if (clocksource_register(&clocksource_tod) != 0)
+       if (__clocksource_register(&clocksource_tod) != 0)
                panic("Could not register TOD clock source");
 
        /* Enable TOD clock interrupts on the boot cpu. */
index 9254aff..fc7ec95 100644 (file)
@@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
 
        if (vcpu->run->s.regs.gprs[rx] & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+       rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
@@ -213,7 +213,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
         * - gpr 3 contains the virtqueue index (passed as datamatch)
         * - gpr 4 contains the index on the bus (optionally)
         */
-       ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+       ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS,
                                      vcpu->run->s.regs.gprs[2] & 0xffffffff,
                                      8, &vcpu->run->s.regs.gprs[3],
                                      vcpu->run->s.regs.gprs[4]);
@@ -230,7 +230,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 {
-       int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
+       int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
index 267523c..a7559f7 100644 (file)
@@ -10,6 +10,7 @@
 #include <asm/pgtable.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
+#include <asm/switch_to.h>
 
 union asce {
        unsigned long val;
@@ -207,6 +208,54 @@ union raddress {
        unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
+union alet {
+       u32 val;
+       struct {
+               u32 reserved : 7;
+               u32 p        : 1;
+               u32 alesn    : 8;
+               u32 alen     : 16;
+       };
+};
+
+union ald {
+       u32 val;
+       struct {
+               u32     : 1;
+               u32 alo : 24;
+               u32 all : 7;
+       };
+};
+
+struct ale {
+       unsigned long i      : 1; /* ALEN-Invalid Bit */
+       unsigned long        : 5;
+       unsigned long fo     : 1; /* Fetch-Only Bit */
+       unsigned long p      : 1; /* Private Bit */
+       unsigned long alesn  : 8; /* Access-List-Entry Sequence Number */
+       unsigned long aleax  : 16; /* Access-List-Entry Authorization Index */
+       unsigned long        : 32;
+       unsigned long        : 1;
+       unsigned long asteo  : 25; /* ASN-Second-Table-Entry Origin */
+       unsigned long        : 6;
+       unsigned long astesn : 32; /* ASTE Sequence Number */
+} __packed;
+
+struct aste {
+       unsigned long i      : 1; /* ASX-Invalid Bit */
+       unsigned long ato    : 29; /* Authority-Table Origin */
+       unsigned long        : 1;
+       unsigned long b      : 1; /* Base-Space Bit */
+       unsigned long ax     : 16; /* Authorization Index */
+       unsigned long atl    : 12; /* Authority-Table Length */
+       unsigned long        : 2;
+       unsigned long ca     : 1; /* Controlled-ASN Bit */
+       unsigned long ra     : 1; /* Reusable-ASN Bit */
+       unsigned long asce   : 64; /* Address-Space-Control Element */
+       unsigned long ald    : 32;
+       unsigned long astesn : 32;
+       /* .. more fields there */
+} __packed;
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -307,15 +356,157 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
                ipte_unlock_simple(vcpu);
 }
 
-static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
+                         int write)
+{
+       union alet alet;
+       struct ale ale;
+       struct aste aste;
+       unsigned long ald_addr, authority_table_addr;
+       union ald ald;
+       int eax, rc;
+       u8 authority_table;
+
+       if (ar >= NUM_ACRS)
+               return -EINVAL;
+
+       save_access_regs(vcpu->run->s.regs.acrs);
+       alet.val = vcpu->run->s.regs.acrs[ar];
+
+       if (ar == 0 || alet.val == 0) {
+               asce->val = vcpu->arch.sie_block->gcr[1];
+               return 0;
+       } else if (alet.val == 1) {
+               asce->val = vcpu->arch.sie_block->gcr[7];
+               return 0;
+       }
+
+       if (alet.reserved)
+               return PGM_ALET_SPECIFICATION;
+
+       if (alet.p)
+               ald_addr = vcpu->arch.sie_block->gcr[5];
+       else
+               ald_addr = vcpu->arch.sie_block->gcr[2];
+       ald_addr &= 0x7fffffc0;
+
+       rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
+       if (rc)
+               return rc;
+
+       if (alet.alen / 8 > ald.all)
+               return PGM_ALEN_TRANSLATION;
+
+       if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
+               return PGM_ADDRESSING;
+
+       rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
+                            sizeof(struct ale));
+       if (rc)
+               return rc;
+
+       if (ale.i == 1)
+               return PGM_ALEN_TRANSLATION;
+       if (ale.alesn != alet.alesn)
+               return PGM_ALE_SEQUENCE;
+
+       rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
+       if (rc)
+               return rc;
+
+       if (aste.i)
+               return PGM_ASTE_VALIDITY;
+       if (aste.astesn != ale.astesn)
+               return PGM_ASTE_SEQUENCE;
+
+       if (ale.p == 1) {
+               eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
+               if (ale.aleax != eax) {
+                       if (eax / 16 > aste.atl)
+                               return PGM_EXTENDED_AUTHORITY;
+
+                       authority_table_addr = aste.ato * 4 + eax / 4;
+
+                       rc = read_guest_real(vcpu, authority_table_addr,
+                                            &authority_table,
+                                            sizeof(u8));
+                       if (rc)
+                               return rc;
+
+                       if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
+                               return PGM_EXTENDED_AUTHORITY;
+               }
+       }
+
+       if (ale.fo == 1 && write)
+               return PGM_PROTECTION;
+
+       asce->val = aste.asce;
+       return 0;
+}
+
+struct trans_exc_code_bits {
+       unsigned long addr : 52; /* Translation-exception Address */
+       unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
+       unsigned long      : 6;
+       unsigned long b60  : 1;
+       unsigned long b61  : 1;
+       unsigned long as   : 2;  /* ASCE Identifier */
+};
+
+enum {
+       FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+       FSI_STORE   = 1, /* Exception was due to store operation */
+       FSI_FETCH   = 2  /* Exception was due to fetch operation */
+};
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+                        ar_t ar, int write)
 {
+       int rc;
+       psw_t *psw = &vcpu->arch.sie_block->gpsw;
+       struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+       struct trans_exc_code_bits *tec_bits;
+
+       memset(pgm, 0, sizeof(*pgm));
+       tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+       tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+       tec_bits->as = psw_bits(*psw).as;
+
+       if (!psw_bits(*psw).t) {
+               asce->val = 0;
+               asce->r = 1;
+               return 0;
+       }
+
        switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
        case PSW_AS_PRIMARY:
-               return vcpu->arch.sie_block->gcr[1];
+               asce->val = vcpu->arch.sie_block->gcr[1];
+               return 0;
        case PSW_AS_SECONDARY:
-               return vcpu->arch.sie_block->gcr[7];
+               asce->val = vcpu->arch.sie_block->gcr[7];
+               return 0;
        case PSW_AS_HOME:
-               return vcpu->arch.sie_block->gcr[13];
+               asce->val = vcpu->arch.sie_block->gcr[13];
+               return 0;
+       case PSW_AS_ACCREG:
+               rc = ar_translation(vcpu, asce, ar, write);
+               switch (rc) {
+               case PGM_ALEN_TRANSLATION:
+               case PGM_ALE_SEQUENCE:
+               case PGM_ASTE_VALIDITY:
+               case PGM_ASTE_SEQUENCE:
+               case PGM_EXTENDED_AUTHORITY:
+                       vcpu->arch.pgm.exc_access_id = ar;
+                       break;
+               case PGM_PROTECTION:
+                       tec_bits->b60 = 1;
+                       tec_bits->b61 = 1;
+                       break;
+               }
+               if (rc > 0)
+                       pgm->code = rc;
+               return rc;
        }
        return 0;
 }
@@ -330,10 +521,11 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
  * @vcpu: virtual cpu
  * @gva: guest virtual address
  * @gpa: points to where guest physical (absolute) address should be stored
+ * @asce: effective asce
  * @write: indicates if access is a write access
  *
  * Translate a guest virtual address into a guest absolute address by means
- * of dynamic address translation as specified by the architecuture.
+ * of dynamic address translation as specified by the architecture.
  * If the resulting absolute address is not available in the configuration
  * an addressing exception is indicated and @gpa will not be changed.
  *
@@ -345,7 +537,8 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
  *           by the architecture
  */
 static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
-                                    unsigned long *gpa, int write)
+                                    unsigned long *gpa, const union asce asce,
+                                    int write)
 {
        union vaddress vaddr = {.addr = gva};
        union raddress raddr = {.addr = gva};
@@ -354,12 +547,10 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
        union ctlreg0 ctlreg0;
        unsigned long ptr;
        int edat1, edat2;
-       union asce asce;
 
        ctlreg0.val = vcpu->arch.sie_block->gcr[0];
        edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
        edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
-       asce.val = get_vcpu_asce(vcpu);
        if (asce.r)
                goto real_address;
        ptr = asce.origin * 4096;
@@ -506,48 +697,30 @@ static inline int is_low_address(unsigned long ga)
        return (ga & ~0x11fful) == 0;
 }
 
-static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
+                                         const union asce asce)
 {
        union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
-       union asce asce;
 
        if (!ctlreg0.lap)
                return 0;
-       asce.val = get_vcpu_asce(vcpu);
        if (psw_bits(*psw).t && asce.p)
                return 0;
        return 1;
 }
 
-struct trans_exc_code_bits {
-       unsigned long addr : 52; /* Translation-exception Address */
-       unsigned long fsi  : 2;  /* Access Exception Fetch/Store Indication */
-       unsigned long      : 7;
-       unsigned long b61  : 1;
-       unsigned long as   : 2;  /* ASCE Identifier */
-};
-
-enum {
-       FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
-       FSI_STORE   = 1, /* Exception was due to store operation */
-       FSI_FETCH   = 2  /* Exception was due to fetch operation */
-};
-
 static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
                            unsigned long *pages, unsigned long nr_pages,
-                           int write)
+                           const union asce asce, int write)
 {
        struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
        struct trans_exc_code_bits *tec_bits;
        int lap_enabled, rc;
 
-       memset(pgm, 0, sizeof(*pgm));
        tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-       tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
-       tec_bits->as = psw_bits(*psw).as;
-       lap_enabled = low_address_protection_enabled(vcpu);
+       lap_enabled = low_address_protection_enabled(vcpu, asce);
        while (nr_pages) {
                ga = kvm_s390_logical_to_effective(vcpu, ga);
                tec_bits->addr = ga >> PAGE_SHIFT;
@@ -557,7 +730,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
                }
                ga &= PAGE_MASK;
                if (psw_bits(*psw).t) {
-                       rc = guest_translate(vcpu, ga, pages, write);
+                       rc = guest_translate(vcpu, ga, pages, asce, write);
                        if (rc < 0)
                                return rc;
                        if (rc == PGM_PROTECTION)
@@ -578,7 +751,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
        return 0;
 }
 
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
                 unsigned long len, int write)
 {
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -591,20 +764,19 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
 
        if (!len)
                return 0;
-       /* Access register mode is not supported yet. */
-       if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
-               return -EOPNOTSUPP;
+       rc = get_vcpu_asce(vcpu, &asce, ar, write);
+       if (rc)
+               return rc;
        nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
        pages = pages_array;
        if (nr_pages > ARRAY_SIZE(pages_array))
                pages = vmalloc(nr_pages * sizeof(unsigned long));
        if (!pages)
                return -ENOMEM;
-       asce.val = get_vcpu_asce(vcpu);
        need_ipte_lock = psw_bits(*psw).t && !asce.r;
        if (need_ipte_lock)
                ipte_lock(vcpu);
-       rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+       rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
        for (idx = 0; idx < nr_pages && !rc; idx++) {
                gpa = *(pages + idx) + (ga & ~PAGE_MASK);
                _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
@@ -652,7 +824,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  * Note: The IPTE lock is not taken during this function, so the caller
  * has to take care of this.
  */
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
                            unsigned long *gpa, int write)
 {
        struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
@@ -661,26 +833,21 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
        union asce asce;
        int rc;
 
-       /* Access register mode is not supported yet. */
-       if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
-               return -EOPNOTSUPP;
-
        gva = kvm_s390_logical_to_effective(vcpu, gva);
-       memset(pgm, 0, sizeof(*pgm));
        tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
-       tec->as = psw_bits(*psw).as;
-       tec->fsi = write ? FSI_STORE : FSI_FETCH;
+       rc = get_vcpu_asce(vcpu, &asce, ar, write);
        tec->addr = gva >> PAGE_SHIFT;
-       if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+       if (rc)
+               return rc;
+       if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
                if (write) {
                        rc = pgm->code = PGM_PROTECTION;
                        return rc;
                }
        }
 
-       asce.val = get_vcpu_asce(vcpu);
        if (psw_bits(*psw).t && !asce.r) {      /* Use DAT? */
-               rc = guest_translate(vcpu, gva, gpa, write);
+               rc = guest_translate(vcpu, gva, gpa, asce, write);
                if (rc > 0) {
                        if (rc == PGM_PROTECTION)
                                tec->b61 = 1;
@@ -697,28 +864,51 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
 }
 
 /**
- * kvm_s390_check_low_addr_protection - check for low-address protection
- * @ga: Guest address
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+                   unsigned long length, int is_write)
+{
+       unsigned long gpa;
+       unsigned long currlen;
+       int rc = 0;
+
+       ipte_lock(vcpu);
+       while (length > 0 && !rc) {
+               currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
+               rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+               gva += currlen;
+               length -= currlen;
+       }
+       ipte_unlock(vcpu);
+
+       return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @gra: Guest real address
  *
  * Checks whether an address is subject to low-address protection and set
  * up vcpu->arch.pgm accordingly if necessary.
  *
  * Return: 0 if no protection exception, or PGM_PROTECTION if protected.
  */
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
 {
        struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
        psw_t *psw = &vcpu->arch.sie_block->gpsw;
        struct trans_exc_code_bits *tec_bits;
+       union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
 
-       if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+       if (!ctlreg0.lap || !is_low_address(gra))
                return 0;
 
        memset(pgm, 0, sizeof(*pgm));
        tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
        tec_bits->fsi = FSI_STORE;
        tec_bits->as = psw_bits(*psw).as;
-       tec_bits->addr = ga >> PAGE_SHIFT;
+       tec_bits->addr = gra >> PAGE_SHIFT;
        pgm->code = PGM_PROTECTION;
 
        return pgm->code;
index 0149cf1..ef03726 100644 (file)
@@ -156,9 +156,11 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 }
 
 int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
-                           unsigned long *gpa, int write);
+                           ar_t ar, unsigned long *gpa, int write);
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+                   unsigned long length, int is_write);
 
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
                 unsigned long len, int write);
 
 int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
@@ -168,6 +170,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  * write_guest - copy data from kernel space to guest space
  * @vcpu: virtual cpu
  * @ga: guest address
+ * @ar: access register
  * @data: source address in kernel space
  * @len: number of bytes to copy
  *
@@ -176,8 +179,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  * If DAT is off data will be copied to guest real or absolute memory.
  * If DAT is on data will be copied to the address space as specified by
  * the address space bits of the PSW:
- * Primary, secondory or home space (access register mode is currently not
- * implemented).
+ * Primary, secondary, home space or access register mode.
  * The addressing mode of the PSW is also inspected, so that address wrap
  * around is taken into account for 24-, 31- and 64-bit addressing mode,
  * if the to be copied data crosses page boundaries in guest address space.
@@ -210,16 +212,17 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
  *      if data has been changed in guest space in case of an exception.
  */
 static inline __must_check
-int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
                unsigned long len)
 {
-       return access_guest(vcpu, ga, data, len, 1);
+       return access_guest(vcpu, ga, ar, data, len, 1);
 }
 
 /**
  * read_guest - copy data from guest space to kernel space
  * @vcpu: virtual cpu
  * @ga: guest address
+ * @ar: access register
  * @data: destination address in kernel space
  * @len: number of bytes to copy
  *
@@ -229,10 +232,10 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
  * data will be copied from guest space to kernel space.
  */
 static inline __must_check
-int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
               unsigned long len)
 {
-       return access_guest(vcpu, ga, data, len, 0);
+       return access_guest(vcpu, ga, ar, data, len, 0);
 }
 
 /**
@@ -330,6 +333,6 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 void ipte_lock(struct kvm_vcpu *vcpu);
 void ipte_unlock(struct kvm_vcpu *vcpu);
 int ipte_lock_held(struct kvm_vcpu *vcpu);
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
 
 #endif /* __KVM_S390_GACCESS_H */
index 3e8d409..e97b345 100644 (file)
@@ -191,8 +191,8 @@ static int __import_wp_info(struct kvm_vcpu *vcpu,
        if (!wp_info->old_data)
                return -ENOMEM;
        /* try to backup the original value */
-       ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
-                        wp_info->len);
+       ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data,
+                            wp_info->len);
        if (ret) {
                kfree(wp_info->old_data);
                wp_info->old_data = NULL;
@@ -362,8 +362,8 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
                        continue;
 
                /* refetch the wp data and compare it to the old value */
-               if (!read_guest(vcpu, wp_info->phys_addr, temp,
-                               wp_info->len)) {
+               if (!read_guest_abs(vcpu, wp_info->phys_addr, temp,
+                                   wp_info->len)) {
                        if (memcmp(temp, wp_info->old_data, wp_info->len)) {
                                kfree(temp);
                                return wp_info;
index bebd215..9e3779e 100644 (file)
@@ -165,6 +165,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
                pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
                pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
                break;
+       case PGM_VECTOR_PROCESSING:
        case PGM_DATA:
                pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
                break;
@@ -319,7 +320,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 
        /* Make sure that the source is paged-in */
        rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
-                                    &srcaddr, 0);
+                                    reg2, &srcaddr, 0);
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@@ -328,7 +329,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 
        /* Make sure that the destination is paged-in */
        rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
-                                    &dstaddr, 1);
+                                    reg1, &dstaddr, 1);
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
index 073b5f3..9de4726 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * handling kvm guest interrupts
  *
- * Copyright IBM Corp. 2008,2014
+ * Copyright IBM Corp. 2008, 2015
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
 #include <linux/signal.h>
 #include <linux/slab.h>
 #include <linux/bitmap.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
+#include <asm/dis.h>
 #include <asm/uaccess.h>
 #include <asm/sclp.h>
+#include <asm/isc.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace-s390.h"
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
 
-static int is_ioint(u64 type)
-{
-       return ((type & 0xfffe0000u) != 0xfffe0000u);
-}
-
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
 {
        return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
@@ -72,70 +70,45 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
        return 1;
 }
 
-static u64 int_word_to_isc_bits(u32 int_word)
+static int ckc_irq_pending(struct kvm_vcpu *vcpu)
+{
+       if (!(vcpu->arch.sie_block->ckc <
+             get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+               return 0;
+       return ckc_interrupts_enabled(vcpu);
+}
+
+static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+       return !psw_extint_disabled(vcpu) &&
+              (vcpu->arch.sie_block->gcr[0] & 0x400ul);
+}
+
+static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
+{
+       return (vcpu->arch.sie_block->cputm >> 63) &&
+              cpu_timer_interrupts_enabled(vcpu);
+}
+
+static inline int is_ioirq(unsigned long irq_type)
 {
-       u8 isc = (int_word & 0x38000000) >> 27;
+       return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
+               (irq_type <= IRQ_PEND_IO_ISC_7));
+}
 
+static uint64_t isc_to_isc_bits(int isc)
+{
        return (0x80 >> isc) << 24;
 }
 
-static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
-                                     struct kvm_s390_interrupt_info *inti)
+static inline u8 int_word_to_isc(u32 int_word)
 {
-       switch (inti->type) {
-       case KVM_S390_INT_EXTERNAL_CALL:
-               if (psw_extint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
-                       return 1;
-               return 0;
-       case KVM_S390_INT_EMERGENCY:
-               if (psw_extint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
-                       return 1;
-               return 0;
-       case KVM_S390_INT_CLOCK_COMP:
-               return ckc_interrupts_enabled(vcpu);
-       case KVM_S390_INT_CPU_TIMER:
-               if (psw_extint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
-                       return 1;
-               return 0;
-       case KVM_S390_INT_SERVICE:
-       case KVM_S390_INT_PFAULT_INIT:
-       case KVM_S390_INT_PFAULT_DONE:
-       case KVM_S390_INT_VIRTIO:
-               if (psw_extint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
-                       return 1;
-               return 0;
-       case KVM_S390_PROGRAM_INT:
-       case KVM_S390_SIGP_STOP:
-       case KVM_S390_SIGP_SET_PREFIX:
-       case KVM_S390_RESTART:
-               return 1;
-       case KVM_S390_MCHK:
-               if (psw_mchk_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14)
-                       return 1;
-               return 0;
-       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               if (psw_ioint_disabled(vcpu))
-                       return 0;
-               if (vcpu->arch.sie_block->gcr[6] &
-                   int_word_to_isc_bits(inti->io.io_int_word))
-                       return 1;
-               return 0;
-       default:
-               printk(KERN_WARNING "illegal interrupt type %llx\n",
-                      inti->type);
-               BUG();
-       }
-       return 0;
+       return (int_word & 0x38000000) >> 27;
+}
+
+static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
+{
+       return vcpu->kvm->arch.float_int.pending_irqs;
 }
 
 static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
@@ -143,12 +116,31 @@ static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
        return vcpu->arch.local_int.pending_irqs;
 }
 
-static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
+                                  unsigned long active_mask)
+{
+       int i;
+
+       for (i = 0; i <= MAX_ISC; i++)
+               if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
+                       active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i));
+
+       return active_mask;
+}
+
+static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
 {
-       unsigned long active_mask = pending_local_irqs(vcpu);
+       unsigned long active_mask;
+
+       active_mask = pending_local_irqs(vcpu);
+       active_mask |= pending_floating_irqs(vcpu);
 
        if (psw_extint_disabled(vcpu))
                active_mask &= ~IRQ_PEND_EXT_MASK;
+       if (psw_ioint_disabled(vcpu))
+               active_mask &= ~IRQ_PEND_IO_MASK;
+       else
+               active_mask = disable_iscs(vcpu, active_mask);
        if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
                __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
        if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
@@ -157,8 +149,13 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
                __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
        if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
                __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+       if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+               __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
        if (psw_mchk_disabled(vcpu))
                active_mask &= ~IRQ_PEND_MCHK_MASK;
+       if (!(vcpu->arch.sie_block->gcr[14] &
+             vcpu->kvm->arch.float_int.mchk.cr14))
+               __clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
 
        /*
         * STOP irqs will never be actively delivered. They are triggered via
@@ -200,6 +197,16 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
        atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
+static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
+{
+       if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
+               return;
+       else if (psw_ioint_disabled(vcpu))
+               __set_cpuflag(vcpu, CPUSTAT_IO_INT);
+       else
+               vcpu->arch.sie_block->lctl |= LCTL_CR6;
+}
+
 static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
 {
        if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
@@ -226,47 +233,17 @@ static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
                __set_cpuflag(vcpu, CPUSTAT_STOP_INT);
 }
 
-/* Set interception request for non-deliverable local interrupts */
-static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+/* Set interception request for non-deliverable interrupts */
+static void set_intercept_indicators(struct kvm_vcpu *vcpu)
 {
+       set_intercept_indicators_io(vcpu);
        set_intercept_indicators_ext(vcpu);
        set_intercept_indicators_mchk(vcpu);
        set_intercept_indicators_stop(vcpu);
 }
 
-static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
-                                     struct kvm_s390_interrupt_info *inti)
-{
-       switch (inti->type) {
-       case KVM_S390_INT_SERVICE:
-       case KVM_S390_INT_PFAULT_DONE:
-       case KVM_S390_INT_VIRTIO:
-               if (psw_extint_disabled(vcpu))
-                       __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
-               else
-                       vcpu->arch.sie_block->lctl |= LCTL_CR0;
-               break;
-       case KVM_S390_MCHK:
-               if (psw_mchk_disabled(vcpu))
-                       vcpu->arch.sie_block->ictl |= ICTL_LPSW;
-               else
-                       vcpu->arch.sie_block->lctl |= LCTL_CR14;
-               break;
-       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               if (psw_ioint_disabled(vcpu))
-                       __set_cpuflag(vcpu, CPUSTAT_IO_INT);
-               else
-                       vcpu->arch.sie_block->lctl |= LCTL_CR6;
-               break;
-       default:
-               BUG();
-       }
-}
-
 static u16 get_ilc(struct kvm_vcpu *vcpu)
 {
-       const unsigned short table[] = { 2, 4, 4, 6 };
-
        switch (vcpu->arch.sie_block->icptcode) {
        case ICPT_INST:
        case ICPT_INSTPROGI:
@@ -274,7 +251,7 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
        case ICPT_PARTEXEC:
        case ICPT_IOINST:
                /* last instruction only stored for these icptcodes */
-               return table[vcpu->arch.sie_block->ipa >> 14];
+               return insn_length(vcpu->arch.sie_block->ipa >> 8);
        case ICPT_PROGI:
                return vcpu->arch.sie_block->pgmilc;
        default:
@@ -350,38 +327,72 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
 
 static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
 {
+       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_mchk_info mchk;
-       int rc;
+       struct kvm_s390_mchk_info mchk = {};
+       unsigned long adtl_status_addr;
+       int deliver = 0;
+       int rc = 0;
 
+       spin_lock(&fi->lock);
        spin_lock(&li->lock);
-       mchk = li->irq.mchk;
+       if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
+           test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) {
+               /*
+                * If there was an exigent machine check pending, then any
+                * repressible machine checks that might have been pending
+                * are indicated along with it, so always clear bits for
+                * repressible and exigent interrupts
+                */
+               mchk = li->irq.mchk;
+               clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+               clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+               memset(&li->irq.mchk, 0, sizeof(mchk));
+               deliver = 1;
+       }
        /*
-        * If there was an exigent machine check pending, then any repressible
-        * machine checks that might have been pending are indicated along
-        * with it, so always clear both bits
+        * We indicate floating repressible conditions along with
+        * other pending conditions. Channel Report Pending and Channel
+        * Subsystem damage are the only two and and are indicated by
+        * bits in mcic and masked in cr14.
         */
-       clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
-       clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
-       memset(&li->irq.mchk, 0, sizeof(mchk));
+       if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+               mchk.mcic |= fi->mchk.mcic;
+               mchk.cr14 |= fi->mchk.cr14;
+               memset(&fi->mchk, 0, sizeof(mchk));
+               deliver = 1;
+       }
        spin_unlock(&li->lock);
+       spin_unlock(&fi->lock);
 
-       VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-                  mchk.mcic);
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
-                                        mchk.cr14, mchk.mcic);
-
-       rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
-       rc |= put_guest_lc(vcpu, mchk.mcic,
-                          (u64 __user *) __LC_MCCK_CODE);
-       rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
-                          (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
-       rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
-                            &mchk.fixed_logout, sizeof(mchk.fixed_logout));
-       rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+       if (deliver) {
+               VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+                          mchk.mcic);
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                                                KVM_S390_MCHK,
+                                                mchk.cr14, mchk.mcic);
+
+               rc  = kvm_s390_vcpu_store_status(vcpu,
+                                                KVM_S390_STORE_STATUS_PREFIXED);
+               rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR,
+                                   &adtl_status_addr,
+                                   sizeof(unsigned long));
+               rc |= kvm_s390_vcpu_store_adtl_status(vcpu,
+                                                     adtl_status_addr);
+               rc |= put_guest_lc(vcpu, mchk.mcic,
+                                  (u64 __user *) __LC_MCCK_CODE);
+               rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+                                  (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+                                    &mchk.fixed_logout,
+                                    sizeof(mchk.fixed_logout));
+               rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+                                    &vcpu->arch.sie_block->gpsw,
+                                    sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+                                   &vcpu->arch.sie_block->gpsw,
+                                   sizeof(psw_t));
+       }
        return rc ? -EFAULT : 0;
 }
 
@@ -484,7 +495,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        struct kvm_s390_pgm_info pgm_info;
-       int rc = 0;
+       int rc = 0, nullifying = false;
        u16 ilc = get_ilc(vcpu);
 
        spin_lock(&li->lock);
@@ -509,6 +520,8 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
        case PGM_LX_TRANSLATION:
        case PGM_PRIMARY_AUTHORITY:
        case PGM_SECONDARY_AUTHORITY:
+               nullifying = true;
+               /* fall through */
        case PGM_SPACE_SWITCH:
                rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
                                  (u64 *)__LC_TRANS_EXC_CODE);
@@ -521,6 +534,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
        case PGM_EXTENDED_AUTHORITY:
                rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
                                  (u8 *)__LC_EXC_ACCESS_ID);
+               nullifying = true;
                break;
        case PGM_ASCE_TYPE:
        case PGM_PAGE_TRANSLATION:
@@ -534,6 +548,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
                                   (u8 *)__LC_EXC_ACCESS_ID);
                rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
                                   (u8 *)__LC_OP_ACCESS_ID);
+               nullifying = true;
                break;
        case PGM_MONITOR:
                rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
@@ -541,6 +556,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
                rc |= put_guest_lc(vcpu, pgm_info.mon_code,
                                   (u64 *)__LC_MON_CODE);
                break;
+       case PGM_VECTOR_PROCESSING:
        case PGM_DATA:
                rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
                                  (u32 *)__LC_DATA_EXC_CODE);
@@ -551,6 +567,15 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
                rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
                                   (u8 *)__LC_EXC_ACCESS_ID);
                break;
+       case PGM_STACK_FULL:
+       case PGM_STACK_EMPTY:
+       case PGM_STACK_SPECIFICATION:
+       case PGM_STACK_TYPE:
+       case PGM_STACK_OPERATION:
+       case PGM_TRACE_TABEL:
+       case PGM_CRYPTO_OPERATION:
+               nullifying = true;
+               break;
        }
 
        if (pgm_info.code & PGM_PER) {
@@ -564,7 +589,12 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
                                   (u8 *) __LC_PER_ACCESS_ID);
        }
 
+       if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
+               kvm_s390_rewind_psw(vcpu, ilc);
+
        rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
+       rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
+                                (u64 *) __LC_LAST_BREAK);
        rc |= put_guest_lc(vcpu, pgm_info.code,
                           (u16 *)__LC_PGM_INT_CODE);
        rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -574,16 +604,27 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
        return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
-                                         struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
 {
-       int rc;
+       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+       struct kvm_s390_ext_info ext;
+       int rc = 0;
+
+       spin_lock(&fi->lock);
+       if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+               spin_unlock(&fi->lock);
+               return 0;
+       }
+       ext = fi->srv_signal;
+       memset(&fi->srv_signal, 0, sizeof(ext));
+       clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
 
        VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-                  inti->ext.ext_params);
+                  ext.ext_params);
        vcpu->stat.deliver_service_signal++;
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                        inti->ext.ext_params, 0);
+       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+                                        ext.ext_params, 0);
 
        rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
        rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
@@ -591,106 +632,146 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
                             &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
        rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+       rc |= put_guest_lc(vcpu, ext.ext_params,
                           (u32 *)__LC_EXT_PARAMS);
+
        return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
-                                          struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
 {
-       int rc;
+       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+       struct kvm_s390_interrupt_info *inti;
+       int rc = 0;
 
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
-                                        KVM_S390_INT_PFAULT_DONE, 0,
-                                        inti->ext.ext_params2);
+       spin_lock(&fi->lock);
+       inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT],
+                                       struct kvm_s390_interrupt_info,
+                                       list);
+       if (inti) {
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                               KVM_S390_INT_PFAULT_DONE, 0,
+                               inti->ext.ext_params2);
+               list_del(&inti->list);
+               fi->counters[FIRQ_CNTR_PFAULT] -= 1;
+       }
+       if (list_empty(&fi->lists[FIRQ_LIST_PFAULT]))
+               clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
 
-       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
-       rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
-       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                          (u64 *)__LC_EXT_PARAMS2);
+       if (inti) {
+               rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+                               (u16 *)__LC_EXT_INT_CODE);
+               rc |= put_guest_lc(vcpu, PFAULT_DONE,
+                               (u16 *)__LC_EXT_CPU_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                               (u64 *)__LC_EXT_PARAMS2);
+               kfree(inti);
+       }
        return rc ? -EFAULT : 0;
 }
 
-static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
-                                        struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
 {
-       int rc;
+       struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+       struct kvm_s390_interrupt_info *inti;
+       int rc = 0;
 
-       VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-                  inti->ext.ext_params, inti->ext.ext_params2);
-       vcpu->stat.deliver_virtio_interrupt++;
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                        inti->ext.ext_params,
-                                        inti->ext.ext_params2);
+       spin_lock(&fi->lock);
+       inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO],
+                                       struct kvm_s390_interrupt_info,
+                                       list);
+       if (inti) {
+               VCPU_EVENT(vcpu, 4,
+                          "interrupt: virtio parm:%x,parm64:%llx",
+                          inti->ext.ext_params, inti->ext.ext_params2);
+               vcpu->stat.deliver_virtio_interrupt++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                               inti->type,
+                               inti->ext.ext_params,
+                               inti->ext.ext_params2);
+               list_del(&inti->list);
+               fi->counters[FIRQ_CNTR_VIRTIO] -= 1;
+       }
+       if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO]))
+               clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
 
-       rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
-       rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
-       rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-                          (u32 *)__LC_EXT_PARAMS);
-       rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-                          (u64 *)__LC_EXT_PARAMS2);
+       if (inti) {
+               rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+                               (u16 *)__LC_EXT_INT_CODE);
+               rc |= put_guest_lc(vcpu, VIRTIO_PARAM,
+                               (u16 *)__LC_EXT_CPU_ADDR);
+               rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+                               (u32 *)__LC_EXT_PARAMS);
+               rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+                               (u64 *)__LC_EXT_PARAMS2);
+               kfree(inti);
+       }
        return rc ? -EFAULT : 0;
 }
 
 static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
-                                    struct kvm_s390_interrupt_info *inti)
+                                    unsigned long irq_type)
 {
-       int rc;
+       struct list_head *isc_list;
+       struct kvm_s390_float_interrupt *fi;
+       struct kvm_s390_interrupt_info *inti = NULL;
+       int rc = 0;
 
-       VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-       vcpu->stat.deliver_io_int++;
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-                                        ((__u32)inti->io.subchannel_id << 16) |
-                                               inti->io.subchannel_nr,
-                                        ((__u64)inti->io.io_int_parm << 32) |
-                                               inti->io.io_int_word);
-
-       rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-                          (u16 *)__LC_SUBCHANNEL_ID);
-       rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-                          (u16 *)__LC_SUBCHANNEL_NR);
-       rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-                          (u32 *)__LC_IO_INT_PARM);
-       rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-                          (u32 *)__LC_IO_INT_WORD);
-       rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       return rc ? -EFAULT : 0;
-}
+       fi = &vcpu->kvm->arch.float_int;
 
-static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
-                                          struct kvm_s390_interrupt_info *inti)
-{
-       struct kvm_s390_mchk_info *mchk = &inti->mchk;
-       int rc;
+       spin_lock(&fi->lock);
+       isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0];
+       inti = list_first_entry_or_null(isc_list,
+                                       struct kvm_s390_interrupt_info,
+                                       list);
+       if (inti) {
+               VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+               vcpu->stat.deliver_io_int++;
+               trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+                               inti->type,
+                               ((__u32)inti->io.subchannel_id << 16) |
+                               inti->io.subchannel_nr,
+                               ((__u64)inti->io.io_int_parm << 32) |
+                               inti->io.io_int_word);
+               list_del(&inti->list);
+               fi->counters[FIRQ_CNTR_IO] -= 1;
+       }
+       if (list_empty(isc_list))
+               clear_bit(irq_type, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
+
+       if (inti) {
+               rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+                               (u16 *)__LC_SUBCHANNEL_ID);
+               rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+                               (u16 *)__LC_SUBCHANNEL_NR);
+               rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+                               (u32 *)__LC_IO_INT_PARM);
+               rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+                               (u32 *)__LC_IO_INT_WORD);
+               rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+                               &vcpu->arch.sie_block->gpsw,
+                               sizeof(psw_t));
+               kfree(inti);
+       }
 
-       VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-                  mchk->mcic);
-       trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
-                                        mchk->cr14, mchk->mcic);
-
-       rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
-       rc |= put_guest_lc(vcpu, mchk->mcic,
-                       (u64 __user *) __LC_MCCK_CODE);
-       rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
-                       (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
-       rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
-                            &mchk->fixed_logout, sizeof(mchk->fixed_logout));
-       rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-                            &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-       rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-                           &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
        return rc ? -EFAULT : 0;
 }
 
@@ -698,6 +779,7 @@ typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
 
 static const deliver_irq_t deliver_irq_funcs[] = {
        [IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+       [IRQ_PEND_MCHK_REP]       = __deliver_machine_check,
        [IRQ_PEND_PROG]           = __deliver_prog,
        [IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
        [IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
@@ -706,36 +788,11 @@ static const deliver_irq_t deliver_irq_funcs[] = {
        [IRQ_PEND_RESTART]        = __deliver_restart,
        [IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
        [IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+       [IRQ_PEND_EXT_SERVICE]    = __deliver_service,
+       [IRQ_PEND_PFAULT_DONE]    = __deliver_pfault_done,
+       [IRQ_PEND_VIRTIO]         = __deliver_virtio,
 };
 
-static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
-                                          struct kvm_s390_interrupt_info *inti)
-{
-       int rc;
-
-       switch (inti->type) {
-       case KVM_S390_INT_SERVICE:
-               rc = __deliver_service(vcpu, inti);
-               break;
-       case KVM_S390_INT_PFAULT_DONE:
-               rc = __deliver_pfault_done(vcpu, inti);
-               break;
-       case KVM_S390_INT_VIRTIO:
-               rc = __deliver_virtio(vcpu, inti);
-               break;
-       case KVM_S390_MCHK:
-               rc = __deliver_mchk_floating(vcpu, inti);
-               break;
-       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               rc = __deliver_io(vcpu, inti);
-               break;
-       default:
-               BUG();
-       }
-
-       return rc;
-}
-
 /* Check whether an external call is pending (deliverable or not) */
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -751,21 +808,9 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 
 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 {
-       struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
-       struct kvm_s390_interrupt_info  *inti;
        int rc;
 
-       rc = !!deliverable_local_irqs(vcpu);
-
-       if ((!rc) && atomic_read(&fi->active)) {
-               spin_lock(&fi->lock);
-               list_for_each_entry(inti, &fi->list, list)
-                       if (__interrupt_is_deliverable(vcpu, inti)) {
-                               rc = 1;
-                               break;
-                       }
-               spin_unlock(&fi->lock);
-       }
+       rc = !!deliverable_irqs(vcpu);
 
        if (!rc && kvm_cpu_has_pending_timer(vcpu))
                rc = 1;
@@ -784,12 +829,7 @@ int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
 
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-       if (!(vcpu->arch.sie_block->ckc <
-             get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
-               return 0;
-       if (!ckc_interrupts_enabled(vcpu))
-               return 0;
-       return 1;
+       return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
 }
 
 int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -884,60 +924,45 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
 {
        struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-       struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
-       struct kvm_s390_interrupt_info  *n, *inti = NULL;
        deliver_irq_t func;
-       int deliver;
        int rc = 0;
        unsigned long irq_type;
-       unsigned long deliverable_irqs;
+       unsigned long irqs;
 
        __reset_intercept_indicators(vcpu);
 
        /* pending ckc conditions might have been invalidated */
        clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
-       if (kvm_cpu_has_pending_timer(vcpu))
+       if (ckc_irq_pending(vcpu))
                set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
 
+       /* pending cpu timer conditions might have been invalidated */
+       clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+       if (cpu_timer_irq_pending(vcpu))
+               set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+
        do {
-               deliverable_irqs = deliverable_local_irqs(vcpu);
+               irqs = deliverable_irqs(vcpu);
                /* bits are in the order of interrupt priority */
-               irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+               irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
                if (irq_type == IRQ_PEND_COUNT)
                        break;
-               func = deliver_irq_funcs[irq_type];
-               if (!func) {
-                       WARN_ON_ONCE(func == NULL);
-                       clear_bit(irq_type, &li->pending_irqs);
-                       continue;
+               if (is_ioirq(irq_type)) {
+                       rc = __deliver_io(vcpu, irq_type);
+               } else {
+                       func = deliver_irq_funcs[irq_type];
+                       if (!func) {
+                               WARN_ON_ONCE(func == NULL);
+                               clear_bit(irq_type, &li->pending_irqs);
+                               continue;
+                       }
+                       rc = func(vcpu);
                }
-               rc = func(vcpu);
-       } while (!rc && irq_type != IRQ_PEND_COUNT);
+               if (rc)
+                       break;
+       } while (!rc);
 
-       set_intercept_indicators_local(vcpu);
-
-       if (!rc && atomic_read(&fi->active)) {
-               do {
-                       deliver = 0;
-                       spin_lock(&fi->lock);
-                       list_for_each_entry_safe(inti, n, &fi->list, list) {
-                               if (__interrupt_is_deliverable(vcpu, inti)) {
-                                       list_del(&inti->list);
-                                       fi->irq_count--;
-                                       deliver = 1;
-                                       break;
-                               }
-                               __set_intercept_indicator(vcpu, inti);
-                       }
-                       if (list_empty(&fi->list))
-                               atomic_set(&fi->active, 0);
-                       spin_unlock(&fi->lock);
-                       if (deliver) {
-                               rc = __deliver_floating_interrupt(vcpu, inti);
-                               kfree(inti);
-                       }
-               } while (!rc && deliver);
-       }
+       set_intercept_indicators(vcpu);
 
        return rc;
 }
@@ -1172,80 +1197,182 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
+                                                 int isc, u32 schid)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+       struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+       struct kvm_s390_interrupt_info *iter;
+       u16 id = (schid & 0xffff0000U) >> 16;
+       u16 nr = schid & 0x0000ffffU;
 
+       spin_lock(&fi->lock);
+       list_for_each_entry(iter, isc_list, list) {
+               if (schid && (id != iter->io.subchannel_id ||
+                             nr != iter->io.subchannel_nr))
+                       continue;
+               /* found an appropriate entry */
+               list_del_init(&iter->list);
+               fi->counters[FIRQ_CNTR_IO] -= 1;
+               if (list_empty(isc_list))
+                       clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+               spin_unlock(&fi->lock);
+               return iter;
+       }
+       spin_unlock(&fi->lock);
+       return NULL;
+}
+
+/*
+ * Dequeue and return an I/O interrupt matching any of the interruption
+ * subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ */
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
-                                                   u64 cr6, u64 schid)
+                                                   u64 isc_mask, u32 schid)
+{
+       struct kvm_s390_interrupt_info *inti = NULL;
+       int isc;
+
+       for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+               if (isc_mask & isc_to_isc_bits(isc))
+                       inti = get_io_int(kvm, isc, schid);
+       }
+       return inti;
+}
+
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int __inject_service(struct kvm *kvm,
+                            struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+       spin_lock(&fi->lock);
+       fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+       /*
+        * Early versions of the QEMU s390 bios will inject several
+        * service interrupts after another without handling a
+        * condition code indicating busy.
+        * We will silently ignore those superfluous sccb values.
+        * A future version of QEMU will take care of serialization
+        * of servc requests
+        */
+       if (fi->srv_signal.ext_params & SCCB_MASK)
+               goto out;
+       fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
+       set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+out:
+       spin_unlock(&fi->lock);
+       kfree(inti);
+       return 0;
+}
+
+static int __inject_virtio(struct kvm *kvm,
+                           struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+       spin_lock(&fi->lock);
+       if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
+               spin_unlock(&fi->lock);
+               return -EBUSY;
+       }
+       fi->counters[FIRQ_CNTR_VIRTIO] += 1;
+       list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]);
+       set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
+       return 0;
+}
+
+static int __inject_pfault_done(struct kvm *kvm,
+                                struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+       spin_lock(&fi->lock);
+       if (fi->counters[FIRQ_CNTR_PFAULT] >=
+               (ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
+               spin_unlock(&fi->lock);
+               return -EBUSY;
+       }
+       fi->counters[FIRQ_CNTR_PFAULT] += 1;
+       list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]);
+       set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
+       return 0;
+}
+
+#define CR_PENDING_SUBCLASS 28
+static int __inject_float_mchk(struct kvm *kvm,
+                               struct kvm_s390_interrupt_info *inti)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+       spin_lock(&fi->lock);
+       fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
+       fi->mchk.mcic |= inti->mchk.mcic;
+       set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs);
+       spin_unlock(&fi->lock);
+       kfree(inti);
+       return 0;
+}
+
+static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
        struct kvm_s390_float_interrupt *fi;
-       struct kvm_s390_interrupt_info *inti, *iter;
+       struct list_head *list;
+       int isc;
 
-       if ((!schid && !cr6) || (schid && cr6))
-               return NULL;
        fi = &kvm->arch.float_int;
        spin_lock(&fi->lock);
-       inti = NULL;
-       list_for_each_entry(iter, &fi->list, list) {
-               if (!is_ioint(iter->type))
-                       continue;
-               if (cr6 &&
-                   ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0))
-                       continue;
-               if (schid) {
-                       if (((schid & 0x00000000ffff0000) >> 16) !=
-                           iter->io.subchannel_id)
-                               continue;
-                       if ((schid & 0x000000000000ffff) !=
-                           iter->io.subchannel_nr)
-                               continue;
-               }
-               inti = iter;
-               break;
-       }
-       if (inti) {
-               list_del_init(&inti->list);
-               fi->irq_count--;
+       if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
+               spin_unlock(&fi->lock);
+               return -EBUSY;
        }
-       if (list_empty(&fi->list))
-               atomic_set(&fi->active, 0);
+       fi->counters[FIRQ_CNTR_IO] += 1;
+
+       isc = int_word_to_isc(inti->io.io_int_word);
+       list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+       list_add_tail(&inti->list, list);
+       set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
        spin_unlock(&fi->lock);
-       return inti;
+       return 0;
 }
 
 static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
 {
        struct kvm_s390_local_interrupt *li;
        struct kvm_s390_float_interrupt *fi;
-       struct kvm_s390_interrupt_info *iter;
        struct kvm_vcpu *dst_vcpu = NULL;
        int sigcpu;
-       int rc = 0;
+       u64 type = READ_ONCE(inti->type);
+       int rc;
 
        fi = &kvm->arch.float_int;
-       spin_lock(&fi->lock);
-       if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
+
+       switch (type) {
+       case KVM_S390_MCHK:
+               rc = __inject_float_mchk(kvm, inti);
+               break;
+       case KVM_S390_INT_VIRTIO:
+               rc = __inject_virtio(kvm, inti);
+               break;
+       case KVM_S390_INT_SERVICE:
+               rc = __inject_service(kvm, inti);
+               break;
+       case KVM_S390_INT_PFAULT_DONE:
+               rc = __inject_pfault_done(kvm, inti);
+               break;
+       case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+               rc = __inject_io(kvm, inti);
+               break;
+       default:
                rc = -EINVAL;
-               goto unlock_fi;
        }
-       fi->irq_count++;
-       if (!is_ioint(inti->type)) {
-               list_add_tail(&inti->list, &fi->list);
-       } else {
-               u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
+       if (rc)
+               return rc;
 
-               /* Keep I/O interrupts sorted in isc order. */
-               list_for_each_entry(iter, &fi->list, list) {
-                       if (!is_ioint(iter->type))
-                               continue;
-                       if (int_word_to_isc_bits(iter->io.io_int_word)
-                           <= isc_bits)
-                               continue;
-                       break;
-               }
-               list_add_tail(&inti->list, &iter->list);
-       }
-       atomic_set(&fi->active, 1);
-       if (atomic_read(&kvm->online_vcpus) == 0)
-               goto unlock_fi;
        sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
        if (sigcpu == KVM_MAX_VCPUS) {
                do {
@@ -1257,7 +1384,7 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
        dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
        li = &dst_vcpu->arch.local_int;
        spin_lock(&li->lock);
-       switch (inti->type) {
+       switch (type) {
        case KVM_S390_MCHK:
                atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
                break;
@@ -1270,9 +1397,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
        }
        spin_unlock(&li->lock);
        kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
-unlock_fi:
-       spin_unlock(&fi->lock);
-       return rc;
+       return 0;
+
 }
 
 int kvm_s390_inject_vm(struct kvm *kvm,
@@ -1332,10 +1458,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
        return rc;
 }
 
-void kvm_s390_reinject_io_int(struct kvm *kvm,
+int kvm_s390_reinject_io_int(struct kvm *kvm,
                              struct kvm_s390_interrupt_info *inti)
 {
-       __inject_vm(kvm, inti);
+       return __inject_vm(kvm, inti);
 }
 
 int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
@@ -1388,12 +1514,10 @@ void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
        spin_unlock(&li->lock);
 }
 
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
-       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
        int rc;
 
-       spin_lock(&li->lock);
        switch (irq->type) {
        case KVM_S390_PROGRAM_INT:
                VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
@@ -1433,83 +1557,130 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
        default:
                rc = -EINVAL;
        }
+
+       return rc;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       int rc;
+
+       spin_lock(&li->lock);
+       rc = do_inject_vcpu(vcpu, irq);
        spin_unlock(&li->lock);
        if (!rc)
                kvm_s390_vcpu_wakeup(vcpu);
        return rc;
 }
 
-void kvm_s390_clear_float_irqs(struct kvm *kvm)
+static inline void clear_irq_list(struct list_head *_list)
 {
-       struct kvm_s390_float_interrupt *fi;
-       struct kvm_s390_interrupt_info  *n, *inti = NULL;
+       struct kvm_s390_interrupt_info *inti, *n;
 
-       fi = &kvm->arch.float_int;
-       spin_lock(&fi->lock);
-       list_for_each_entry_safe(inti, n, &fi->list, list) {
+       list_for_each_entry_safe(inti, n, _list, list) {
                list_del(&inti->list);
                kfree(inti);
        }
-       fi->irq_count = 0;
-       atomic_set(&fi->active, 0);
-       spin_unlock(&fi->lock);
 }
 
-static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
-                                  u8 *addr)
+static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
+                      struct kvm_s390_irq *irq)
 {
-       struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
-       struct kvm_s390_irq irq = {0};
-
-       irq.type = inti->type;
+       irq->type = inti->type;
        switch (inti->type) {
        case KVM_S390_INT_PFAULT_INIT:
        case KVM_S390_INT_PFAULT_DONE:
        case KVM_S390_INT_VIRTIO:
-       case KVM_S390_INT_SERVICE:
-               irq.u.ext = inti->ext;
+               irq->u.ext = inti->ext;
                break;
        case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-               irq.u.io = inti->io;
+               irq->u.io = inti->io;
                break;
-       case KVM_S390_MCHK:
-               irq.u.mchk = inti->mchk;
-               break;
-       default:
-               return -EINVAL;
        }
+}
 
-       if (copy_to_user(uptr, &irq, sizeof(irq)))
-               return -EFAULT;
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+       struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+       int i;
 
-       return 0;
-}
+       spin_lock(&fi->lock);
+       for (i = 0; i < FIRQ_LIST_COUNT; i++)
+               clear_irq_list(&fi->lists[i]);
+       for (i = 0; i < FIRQ_MAX_COUNT; i++)
+               fi->counters[i] = 0;
+       spin_unlock(&fi->lock);
+};
 
-static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
 {
        struct kvm_s390_interrupt_info *inti;
        struct kvm_s390_float_interrupt *fi;
+       struct kvm_s390_irq *buf;
+       struct kvm_s390_irq *irq;
+       int max_irqs;
        int ret = 0;
        int n = 0;
+       int i;
+
+       if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
+               return -EINVAL;
+
+       /*
+        * We are already using -ENOMEM to signal
+        * userspace it may retry with a bigger buffer,
+        * so we need to use something else for this case
+        */
+       buf = vzalloc(len);
+       if (!buf)
+               return -ENOBUFS;
+
+       max_irqs = len / sizeof(struct kvm_s390_irq);
 
        fi = &kvm->arch.float_int;
        spin_lock(&fi->lock);
-
-       list_for_each_entry(inti, &fi->list, list) {
-               if (len < sizeof(struct kvm_s390_irq)) {
+       for (i = 0; i < FIRQ_LIST_COUNT; i++) {
+               list_for_each_entry(inti, &fi->lists[i], list) {
+                       if (n == max_irqs) {
+                               /* signal userspace to try again */
+                               ret = -ENOMEM;
+                               goto out;
+                       }
+                       inti_to_irq(inti, &buf[n]);
+                       n++;
+               }
+       }
+       if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+               if (n == max_irqs) {
                        /* signal userspace to try again */
                        ret = -ENOMEM;
-                       break;
+                       goto out;
                }
-               ret = copy_irq_to_user(inti, buf);
-               if (ret)
-                       break;
-               buf += sizeof(struct kvm_s390_irq);
-               len -= sizeof(struct kvm_s390_irq);
+               irq = (struct kvm_s390_irq *) &buf[n];
+               irq->type = KVM_S390_INT_SERVICE;
+               irq->u.ext = fi->srv_signal;
                n++;
        }
+       if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+               if (n == max_irqs) {
+                               /* signal userspace to try again */
+                               ret = -ENOMEM;
+                               goto out;
+               }
+               irq = (struct kvm_s390_irq *) &buf[n];
+               irq->type = KVM_S390_MCHK;
+               irq->u.mchk = fi->mchk;
+               n++;
+}
 
+out:
        spin_unlock(&fi->lock);
+       if (!ret && n > 0) {
+               if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
+                       ret = -EFAULT;
+       }
+       vfree(buf);
 
        return ret < 0 ? ret : n;
 }
@@ -1520,7 +1691,7 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
 
        switch (attr->group) {
        case KVM_DEV_FLIC_GET_ALL_IRQS:
-               r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+               r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
                                          attr->attr);
                break;
        default:
@@ -1952,3 +2123,143 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
 {
        return -EINVAL;
 }
+
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
+{
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       struct kvm_s390_irq *buf;
+       int r = 0;
+       int n;
+
+       buf = vmalloc(len);
+       if (!buf)
+               return -ENOMEM;
+
+       if (copy_from_user((void *) buf, irqstate, len)) {
+               r = -EFAULT;
+               goto out_free;
+       }
+
+       /*
+        * Don't allow setting the interrupt state
+        * when there are already interrupts pending
+        */
+       spin_lock(&li->lock);
+       if (li->pending_irqs) {
+               r = -EBUSY;
+               goto out_unlock;
+       }
+
+       for (n = 0; n < len / sizeof(*buf); n++) {
+               r = do_inject_vcpu(vcpu, &buf[n]);
+               if (r)
+                       break;
+       }
+
+out_unlock:
+       spin_unlock(&li->lock);
+out_free:
+       vfree(buf);
+
+       return r;
+}
+
+static void store_local_irq(struct kvm_s390_local_interrupt *li,
+                           struct kvm_s390_irq *irq,
+                           unsigned long irq_type)
+{
+       switch (irq_type) {
+       case IRQ_PEND_MCHK_EX:
+       case IRQ_PEND_MCHK_REP:
+               irq->type = KVM_S390_MCHK;
+               irq->u.mchk = li->irq.mchk;
+               break;
+       case IRQ_PEND_PROG:
+               irq->type = KVM_S390_PROGRAM_INT;
+               irq->u.pgm = li->irq.pgm;
+               break;
+       case IRQ_PEND_PFAULT_INIT:
+               irq->type = KVM_S390_INT_PFAULT_INIT;
+               irq->u.ext = li->irq.ext;
+               break;
+       case IRQ_PEND_EXT_EXTERNAL:
+               irq->type = KVM_S390_INT_EXTERNAL_CALL;
+               irq->u.extcall = li->irq.extcall;
+               break;
+       case IRQ_PEND_EXT_CLOCK_COMP:
+               irq->type = KVM_S390_INT_CLOCK_COMP;
+               break;
+       case IRQ_PEND_EXT_CPU_TIMER:
+               irq->type = KVM_S390_INT_CPU_TIMER;
+               break;
+       case IRQ_PEND_SIGP_STOP:
+               irq->type = KVM_S390_SIGP_STOP;
+               irq->u.stop = li->irq.stop;
+               break;
+       case IRQ_PEND_RESTART:
+               irq->type = KVM_S390_RESTART;
+               break;
+       case IRQ_PEND_SET_PREFIX:
+               irq->type = KVM_S390_SIGP_SET_PREFIX;
+               irq->u.prefix = li->irq.prefix;
+               break;
+       }
+}
+
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
+{
+       uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+       unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+       struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+       unsigned long pending_irqs;
+       struct kvm_s390_irq irq;
+       unsigned long irq_type;
+       int cpuaddr;
+       int n = 0;
+
+       spin_lock(&li->lock);
+       pending_irqs = li->pending_irqs;
+       memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending,
+              sizeof(sigp_emerg_pending));
+       spin_unlock(&li->lock);
+
+       for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) {
+               memset(&irq, 0, sizeof(irq));
+               if (irq_type == IRQ_PEND_EXT_EMERGENCY)
+                       continue;
+               if (n + sizeof(irq) > len)
+                       return -ENOBUFS;
+               store_local_irq(&vcpu->arch.local_int, &irq, irq_type);
+               if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+                       return -EFAULT;
+               n += sizeof(irq);
+       }
+
+       if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) {
+               for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) {
+                       memset(&irq, 0, sizeof(irq));
+                       if (n + sizeof(irq) > len)
+                               return -ENOBUFS;
+                       irq.type = KVM_S390_INT_EMERGENCY;
+                       irq.u.emerg.code = cpuaddr;
+                       if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+                               return -EFAULT;
+                       n += sizeof(irq);
+               }
+       }
+
+       if ((sigp_ctrl & SIGP_CTRL_C) &&
+           (atomic_read(&vcpu->arch.sie_block->cpuflags) &
+            CPUSTAT_ECALL_PEND)) {
+               if (n + sizeof(irq) > len)
+                       return -ENOBUFS;
+               memset(&irq, 0, sizeof(irq));
+               irq.type = KVM_S390_INT_EXTERNAL_CALL;
+               irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+               if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+                       return -EFAULT;
+               n += sizeof(irq);
+       }
+
+       return n;
+}
index 0c36239..afa2bd7 100644 (file)
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/pgtable.h>
 #include <asm/nmi.h>
 #include <asm/switch_to.h>
+#include <asm/isc.h>
 #include <asm/sclp.h>
 #include "kvm-s390.h"
 #include "gaccess.h"
 #include "trace.h"
 #include "trace-s390.h"
 
+#define MEM_OP_MAX_SIZE 65536  /* Maximum transfer size for KVM_S390_MEM_OP */
+#define LOCAL_IRQS 32
+#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
+                          (KVM_MAX_VCPUS + LOCAL_IRQS))
+
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -87,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
        { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
        { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
        { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
+       { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
        { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
        { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
        { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
@@ -101,8 +109,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 /* upper facilities limit for kvm */
 unsigned long kvm_s390_fac_list_mask[] = {
-       0xff82fffbf4fc2000UL,
-       0x005c000000000000UL,
+       0xffe6fffbfcfdfc40UL,
+       0x205c800000000000UL,
 };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
@@ -165,16 +173,22 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_ONE_REG:
        case KVM_CAP_ENABLE_CAP:
        case KVM_CAP_S390_CSS_SUPPORT:
-       case KVM_CAP_IRQFD:
        case KVM_CAP_IOEVENTFD:
        case KVM_CAP_DEVICE_CTRL:
        case KVM_CAP_ENABLE_CAP_VM:
        case KVM_CAP_S390_IRQCHIP:
        case KVM_CAP_VM_ATTRIBUTES:
        case KVM_CAP_MP_STATE:
+       case KVM_CAP_S390_INJECT_IRQ:
        case KVM_CAP_S390_USER_SIGP:
+       case KVM_CAP_S390_USER_STSI:
+       case KVM_CAP_S390_SKEYS:
+       case KVM_CAP_S390_IRQ_STATE:
                r = 1;
                break;
+       case KVM_CAP_S390_MEM_OP:
+               r = MEM_OP_MAX_SIZE;
+               break;
        case KVM_CAP_NR_VCPUS:
        case KVM_CAP_MAX_VCPUS:
                r = KVM_MAX_VCPUS;
@@ -185,6 +199,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_S390_COW:
                r = MACHINE_HAS_ESOP;
                break;
+       case KVM_CAP_S390_VECTOR_REGISTERS:
+               r = MACHINE_HAS_VX;
+               break;
        default:
                r = 0;
        }
@@ -265,6 +282,18 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
                kvm->arch.user_sigp = 1;
                r = 0;
                break;
+       case KVM_CAP_S390_VECTOR_REGISTERS:
+               if (MACHINE_HAS_VX) {
+                       set_kvm_facility(kvm->arch.model.fac->mask, 129);
+                       set_kvm_facility(kvm->arch.model.fac->list, 129);
+                       r = 0;
+               } else
+                       r = -EINVAL;
+               break;
+       case KVM_CAP_S390_USER_STSI:
+               kvm->arch.user_stsi = 1;
+               r = 0;
+               break;
        default:
                r = -EINVAL;
                break;
@@ -522,7 +551,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
                memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
                       sizeof(struct cpuid));
                kvm->arch.model.ibc = proc->ibc;
-               memcpy(kvm->arch.model.fac->kvm, proc->fac_list,
+               memcpy(kvm->arch.model.fac->list, proc->fac_list,
                       S390_ARCH_FAC_LIST_SIZE_BYTE);
        } else
                ret = -EFAULT;
@@ -556,7 +585,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
        }
        memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
        proc->ibc = kvm->arch.model.ibc;
-       memcpy(&proc->fac_list, kvm->arch.model.fac->kvm, S390_ARCH_FAC_LIST_SIZE_BYTE);
+       memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
        if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
                ret = -EFAULT;
        kfree(proc);
@@ -576,10 +605,10 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
        }
        get_cpu_id((struct cpuid *) &mach->cpuid);
        mach->ibc = sclp_get_ibc();
-       memcpy(&mach->fac_mask, kvm_s390_fac_list_mask,
-              kvm_s390_fac_list_mask_size() * sizeof(u64));
+       memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
+              S390_ARCH_FAC_LIST_SIZE_BYTE);
        memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
-              S390_ARCH_FAC_LIST_SIZE_U64);
+              S390_ARCH_FAC_LIST_SIZE_BYTE);
        if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
                ret = -EFAULT;
        kfree(mach);
@@ -709,6 +738,108 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
        return ret;
 }
 
+static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+       uint8_t *keys;
+       uint64_t hva;
+       unsigned long curkey;
+       int i, r = 0;
+
+       if (args->flags != 0)
+               return -EINVAL;
+
+       /* Is this guest using storage keys? */
+       if (!mm_use_skey(current->mm))
+               return KVM_S390_GET_SKEYS_NONE;
+
+       /* Enforce sane limit on memory allocation */
+       if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+               return -EINVAL;
+
+       keys = kmalloc_array(args->count, sizeof(uint8_t),
+                            GFP_KERNEL | __GFP_NOWARN);
+       if (!keys)
+               keys = vmalloc(sizeof(uint8_t) * args->count);
+       if (!keys)
+               return -ENOMEM;
+
+       for (i = 0; i < args->count; i++) {
+               hva = gfn_to_hva(kvm, args->start_gfn + i);
+               if (kvm_is_error_hva(hva)) {
+                       r = -EFAULT;
+                       goto out;
+               }
+
+               curkey = get_guest_storage_key(current->mm, hva);
+               if (IS_ERR_VALUE(curkey)) {
+                       r = curkey;
+                       goto out;
+               }
+               keys[i] = curkey;
+       }
+
+       r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+                        sizeof(uint8_t) * args->count);
+       if (r)
+               r = -EFAULT;
+out:
+       kvfree(keys);
+       return r;
+}
+
+static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+       uint8_t *keys;
+       uint64_t hva;
+       int i, r = 0;
+
+       if (args->flags != 0)
+               return -EINVAL;
+
+       /* Enforce sane limit on memory allocation */
+       if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+               return -EINVAL;
+
+       keys = kmalloc_array(args->count, sizeof(uint8_t),
+                            GFP_KERNEL | __GFP_NOWARN);
+       if (!keys)
+               keys = vmalloc(sizeof(uint8_t) * args->count);
+       if (!keys)
+               return -ENOMEM;
+
+       r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
+                          sizeof(uint8_t) * args->count);
+       if (r) {
+               r = -EFAULT;
+               goto out;
+       }
+
+       /* Enable storage key handling for the guest */
+       s390_enable_skey();
+
+       for (i = 0; i < args->count; i++) {
+               hva = gfn_to_hva(kvm, args->start_gfn + i);
+               if (kvm_is_error_hva(hva)) {
+                       r = -EFAULT;
+                       goto out;
+               }
+
+               /* Lowest order bit is reserved */
+               if (keys[i] & 0x01) {
+                       r = -EINVAL;
+                       goto out;
+               }
+
+               r = set_guest_storage_key(current->mm, hva,
+                                         (unsigned long)keys[i], 0);
+               if (r)
+                       goto out;
+       }
+out:
+       kvfree(keys);
+       return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                       unsigned int ioctl, unsigned long arg)
 {
@@ -768,6 +899,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
                r = kvm_s390_vm_has_attr(kvm, &attr);
                break;
        }
+       case KVM_S390_GET_SKEYS: {
+               struct kvm_s390_skeys args;
+
+               r = -EFAULT;
+               if (copy_from_user(&args, argp,
+                                  sizeof(struct kvm_s390_skeys)))
+                       break;
+               r = kvm_s390_get_skeys(kvm, &args);
+               break;
+       }
+       case KVM_S390_SET_SKEYS: {
+               struct kvm_s390_skeys args;
+
+               r = -EFAULT;
+               if (copy_from_user(&args, argp,
+                                  sizeof(struct kvm_s390_skeys)))
+                       break;
+               r = kvm_s390_set_skeys(kvm, &args);
+               break;
+       }
        default:
                r = -ENOTTY;
        }
@@ -778,15 +929,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
 static int kvm_s390_query_ap_config(u8 *config)
 {
        u32 fcn_code = 0x04000000UL;
-       u32 cc;
+       u32 cc = 0;
 
+       memset(config, 0, 128);
        asm volatile(
                "lgr 0,%1\n"
                "lgr 2,%2\n"
                ".long 0xb2af0000\n"            /* PQAP(QCI) */
-               "ipm %0\n"
+               "0: ipm %0\n"
                "srl %0,28\n"
-               : "=r" (cc)
+               "1:\n"
+               EX_TABLE(0b, 1b)
+               : "+r" (cc)
                : "r" (fcn_code), "r" (config)
                : "cc", "0", "2", "memory"
        );
@@ -839,9 +993,13 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
 
        kvm_s390_set_crycb_format(kvm);
 
-       /* Disable AES/DEA protected key functions by default */
-       kvm->arch.crypto.aes_kw = 0;
-       kvm->arch.crypto.dea_kw = 0;
+       /* Enable AES/DEA protected key functions by default */
+       kvm->arch.crypto.aes_kw = 1;
+       kvm->arch.crypto.dea_kw = 1;
+       get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
+                        sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+       get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
+                        sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
 
        return 0;
 }
@@ -881,53 +1039,43 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
        kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
        if (!kvm->arch.dbf)
-               goto out_nodbf;
+               goto out_err;
 
        /*
         * The architectural maximum amount of facilities is 16 kbit. To store
         * this amount, 2 kbyte of memory is required. Thus we need a full
-        * page to hold the active copy (arch.model.fac->sie) and the current
-        * facilities set (arch.model.fac->kvm). Its address size has to be
+        * page to hold the guest facility list (arch.model.fac->list) and the
+        * facility mask (arch.model.fac->mask). Its address size has to be
         * 31 bits and word aligned.
         */
        kvm->arch.model.fac =
-               (struct s390_model_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+               (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
        if (!kvm->arch.model.fac)
-               goto out_nofac;
-
-       memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list,
-              S390_ARCH_FAC_LIST_SIZE_U64);
-
-       /*
-        * If this KVM host runs *not* in a LPAR, relax the facility bits
-        * of the kvm facility mask by all missing facilities. This will allow
-        * to determine the right CPU model by means of the remaining facilities.
-        * Live guest migration must prohibit the migration of KVMs running in
-        * a LPAR to non LPAR hosts.
-        */
-       if (!MACHINE_IS_LPAR)
-               for (i = 0; i < kvm_s390_fac_list_mask_size(); i++)
-                       kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->kvm[i];
+               goto out_err;
 
-       /*
-        * Apply the kvm facility mask to limit the kvm supported/tolerated
-        * facility list.
-        */
+       /* Populate the facility mask initially. */
+       memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
+              S390_ARCH_FAC_LIST_SIZE_BYTE);
        for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
                if (i < kvm_s390_fac_list_mask_size())
-                       kvm->arch.model.fac->kvm[i] &= kvm_s390_fac_list_mask[i];
+                       kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
                else
-                       kvm->arch.model.fac->kvm[i] = 0UL;
+                       kvm->arch.model.fac->mask[i] = 0UL;
        }
 
+       /* Populate the facility list initially. */
+       memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
+              S390_ARCH_FAC_LIST_SIZE_BYTE);
+
        kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
        kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
 
        if (kvm_s390_crypto_init(kvm) < 0)
-               goto out_crypto;
+               goto out_err;
 
        spin_lock_init(&kvm->arch.float_int.lock);
-       INIT_LIST_HEAD(&kvm->arch.float_int.list);
+       for (i = 0; i < FIRQ_LIST_COUNT; i++)
+               INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
        init_waitqueue_head(&kvm->arch.ipte_wq);
        mutex_init(&kvm->arch.ipte_mutex);
 
@@ -939,7 +1087,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        } else {
                kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
                if (!kvm->arch.gmap)
-                       goto out_nogmap;
+                       goto out_err;
                kvm->arch.gmap->private = kvm;
                kvm->arch.gmap->pfault_enabled = 0;
        }
@@ -951,15 +1099,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
        spin_lock_init(&kvm->arch.start_stop_lock);
 
        return 0;
-out_nogmap:
+out_err:
        kfree(kvm->arch.crypto.crycb);
-out_crypto:
        free_page((unsigned long)kvm->arch.model.fac);
-out_nofac:
        debug_unregister(kvm->arch.dbf);
-out_nodbf:
        free_page((unsigned long)(kvm->arch.sca));
-out_err:
        return rc;
 }
 
@@ -1039,6 +1183,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
                                    KVM_SYNC_CRS |
                                    KVM_SYNC_ARCH0 |
                                    KVM_SYNC_PFAULT;
+       if (test_kvm_facility(vcpu->kvm, 129))
+               vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
 
        if (kvm_is_ucontrol(vcpu->kvm))
                return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1049,10 +1195,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
        save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-       save_fp_regs(vcpu->arch.host_fpregs.fprs);
+       if (test_kvm_facility(vcpu->kvm, 129))
+               save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+       else
+               save_fp_regs(vcpu->arch.host_fpregs.fprs);
        save_access_regs(vcpu->arch.host_acrs);
-       restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-       restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+       if (test_kvm_facility(vcpu->kvm, 129)) {
+               restore_fp_ctl(&vcpu->run->s.regs.fpc);
+               restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+       } else {
+               restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+               restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+       }
        restore_access_regs(vcpu->run->s.regs.acrs);
        gmap_enable(vcpu->arch.gmap);
        atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1062,11 +1216,19 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
        atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
        gmap_disable(vcpu->arch.gmap);
-       save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-       save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+       if (test_kvm_facility(vcpu->kvm, 129)) {
+               save_fp_ctl(&vcpu->run->s.regs.fpc);
+               save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+       } else {
+               save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+               save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+       }
        save_access_regs(vcpu->run->s.regs.acrs);
        restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-       restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+       if (test_kvm_facility(vcpu->kvm, 129))
+               restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+       else
+               restore_fp_regs(vcpu->arch.host_fpregs.fprs);
        restore_access_regs(vcpu->arch.host_acrs);
 }
 
@@ -1134,6 +1296,15 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
+{
+       struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
+
+       vcpu->arch.cpu_id = model->cpu_id;
+       vcpu->arch.sie_block->ibc = model->ibc;
+       vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
+}
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
        int rc = 0;
@@ -1142,6 +1313,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
                                                    CPUSTAT_SM |
                                                    CPUSTAT_STOPPED |
                                                    CPUSTAT_GED);
+       kvm_s390_vcpu_setup_model(vcpu);
+
        vcpu->arch.sie_block->ecb   = 6;
        if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
                vcpu->arch.sie_block->ecb |= 0x10;
@@ -1152,8 +1325,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
                vcpu->arch.sie_block->eca |= 1;
        if (sclp_has_sigpif())
                vcpu->arch.sie_block->eca |= 0x10000000U;
-       vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
-                                     ICTL_TPROT;
+       if (test_kvm_facility(vcpu->kvm, 129)) {
+               vcpu->arch.sie_block->eca |= 0x00020000;
+               vcpu->arch.sie_block->ecd |= 0x20000000;
+       }
+       vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
        if (kvm_s390_cmma_enabled(vcpu->kvm)) {
                rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -1163,13 +1339,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
        vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
 
-       mutex_lock(&vcpu->kvm->lock);
-       vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id;
-       memcpy(vcpu->kvm->arch.model.fac->sie, vcpu->kvm->arch.model.fac->kvm,
-              S390_ARCH_FAC_LIST_SIZE_BYTE);
-       vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc;
-       mutex_unlock(&vcpu->kvm->lock);
-
        kvm_s390_vcpu_crypto_setup(vcpu);
 
        return rc;
@@ -1197,6 +1366,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
        vcpu->arch.sie_block = &sie_page->sie_block;
        vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+       vcpu->arch.host_vregs = &sie_page->vregs;
 
        vcpu->arch.sie_block->icpua = id;
        if (!kvm_is_ucontrol(kvm)) {
@@ -1212,7 +1382,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
                vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
                set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
        }
-       vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->sie;
 
        spin_lock_init(&vcpu->arch.local_int.lock);
        vcpu->arch.local_int.float_int = &kvm->arch.float_int;
@@ -1732,6 +1901,31 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+{
+       psw_t *psw = &vcpu->arch.sie_block->gpsw;
+       u8 opcode;
+       int rc;
+
+       VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+       trace_kvm_s390_sie_fault(vcpu);
+
+       /*
+        * We want to inject an addressing exception, which is defined as a
+        * suppressing or terminating exception. However, since we came here
+        * by a DAT access exception, the PSW still points to the faulting
+        * instruction since DAT exceptions are nullifying. So we've got
+        * to look up the current opcode to get the length of the instruction
+        * to be able to forward the PSW.
+        */
+       rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
+       if (rc)
+               return kvm_s390_inject_prog_cond(vcpu, rc);
+       psw->addr = __rewind_psw(*psw, -insn_length(opcode));
+
+       return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+}
+
 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 {
        int rc = -1;
@@ -1763,11 +1957,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
                }
        }
 
-       if (rc == -1) {
-               VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
-               trace_kvm_s390_sie_fault(vcpu);
-               rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-       }
+       if (rc == -1)
+               rc = vcpu_post_run_fault_in_sie(vcpu);
 
        memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
 
@@ -1983,6 +2174,35 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
        return kvm_s390_store_status_unloaded(vcpu, addr);
 }
 
+/*
+ * store additional status at address
+ */
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+                                       unsigned long gpa)
+{
+       /* Only bits 0-53 are used for address formation */
+       if (!(gpa & ~0x3ff))
+               return 0;
+
+       return write_guest_abs(vcpu, gpa & ~0x3ff,
+                              (void *)&vcpu->run->s.regs.vrs, 512);
+}
+
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+       if (!test_kvm_facility(vcpu->kvm, 129))
+               return 0;
+
+       /*
+        * The guest VXRS are in the host VXRs due to the lazy
+        * copying in vcpu load/put. Let's update our copies before we save
+        * it into the save area.
+        */
+       save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+
+       return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
+}
+
 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
 {
        kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
@@ -2107,6 +2327,65 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
        return r;
 }
 
+static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
+                                 struct kvm_s390_mem_op *mop)
+{
+       void __user *uaddr = (void __user *)mop->buf;
+       void *tmpbuf = NULL;
+       int r, srcu_idx;
+       const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
+                                   | KVM_S390_MEMOP_F_CHECK_ONLY;
+
+       if (mop->flags & ~supported_flags)
+               return -EINVAL;
+
+       if (mop->size > MEM_OP_MAX_SIZE)
+               return -E2BIG;
+
+       if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+               tmpbuf = vmalloc(mop->size);
+               if (!tmpbuf)
+                       return -ENOMEM;
+       }
+
+       srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+       switch (mop->op) {
+       case KVM_S390_MEMOP_LOGICAL_READ:
+               if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+                       r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+                       break;
+               }
+               r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+               if (r == 0) {
+                       if (copy_to_user(uaddr, tmpbuf, mop->size))
+                               r = -EFAULT;
+               }
+               break;
+       case KVM_S390_MEMOP_LOGICAL_WRITE:
+               if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+                       r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+                       break;
+               }
+               if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+                       r = -EFAULT;
+                       break;
+               }
+               r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+               break;
+       default:
+               r = -EINVAL;
+       }
+
+       srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+       if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
+               kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+
+       vfree(tmpbuf);
+       return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
                         unsigned int ioctl, unsigned long arg)
 {
@@ -2116,6 +2395,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        long r;
 
        switch (ioctl) {
+       case KVM_S390_IRQ: {
+               struct kvm_s390_irq s390irq;
+
+               r = -EFAULT;
+               if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
+                       break;
+               r = kvm_s390_inject_vcpu(vcpu, &s390irq);
+               break;
+       }
        case KVM_S390_INTERRUPT: {
                struct kvm_s390_interrupt s390int;
                struct kvm_s390_irq s390irq;
@@ -2206,6 +2494,47 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
                break;
        }
+       case KVM_S390_MEM_OP: {
+               struct kvm_s390_mem_op mem_op;
+
+               if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+                       r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+               else
+                       r = -EFAULT;
+               break;
+       }
+       case KVM_S390_SET_IRQ_STATE: {
+               struct kvm_s390_irq_state irq_state;
+
+               r = -EFAULT;
+               if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+                       break;
+               if (irq_state.len > VCPU_IRQS_MAX_BUF ||
+                   irq_state.len == 0 ||
+                   irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
+                       r = -EINVAL;
+                       break;
+               }
+               r = kvm_s390_set_irq_state(vcpu,
+                                          (void __user *) irq_state.buf,
+                                          irq_state.len);
+               break;
+       }
+       case KVM_S390_GET_IRQ_STATE: {
+               struct kvm_s390_irq_state irq_state;
+
+               r = -EFAULT;
+               if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+                       break;
+               if (irq_state.len == 0) {
+                       r = -EINVAL;
+                       break;
+               }
+               r = kvm_s390_get_irq_state(vcpu,
+                                          (__u8 __user *)  irq_state.buf,
+                                          irq_state.len);
+               break;
+       }
        default:
                r = -ENOTTY;
        }
index 985c211..ca108b9 100644 (file)
@@ -70,16 +70,22 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
        kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
 }
 
-static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
+typedef u8 __bitwise ar_t;
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar)
 {
        u32 base2 = vcpu->arch.sie_block->ipb >> 28;
        u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
 
+       if (ar)
+               *ar = base2;
+
        return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
 static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
-                                             u64 *address1, u64 *address2)
+                                             u64 *address1, u64 *address2,
+                                             ar_t *ar_b1, ar_t *ar_b2)
 {
        u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
        u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
@@ -88,6 +94,11 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
 
        *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
        *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+
+       if (ar_b1)
+               *ar_b1 = base1;
+       if (ar_b2)
+               *ar_b2 = base2;
 }
 
 static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
@@ -98,7 +109,7 @@ static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2
                *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
 }
 
-static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar)
 {
        u32 base2 = vcpu->arch.sie_block->ipb >> 28;
        u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
@@ -107,14 +118,20 @@ static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
        if (disp2 & 0x80000)
                disp2+=0xfff00000;
 
+       if (ar)
+               *ar = base2;
+
        return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
 }
 
-static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar)
 {
        u32 base2 = vcpu->arch.sie_block->ipb >> 28;
        u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
 
+       if (ar)
+               *ar = base2;
+
        return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
 }
 
@@ -125,10 +142,22 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
        vcpu->arch.sie_block->gpsw.mask |= cc << 44;
 }
 
-/* test availability of facility in a kvm intance */
+/* test availability of facility in a kvm instance */
 static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
 {
-       return __test_facility(nr, kvm->arch.model.fac->kvm);
+       return __test_facility(nr, kvm->arch.model.fac->mask) &&
+               __test_facility(nr, kvm->arch.model.fac->list);
+}
+
+static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
+{
+       unsigned char *ptr;
+
+       if (nr >= MAX_FACILITY_BIT)
+               return -EINVAL;
+       ptr = (unsigned char *) fac_list + (nr >> 3);
+       *ptr |= (0x80UL >> (nr & 7));
+       return 0;
 }
 
 /* are cpu states controlled by user space */
@@ -149,9 +178,9 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
                                      struct kvm_s390_irq *irq);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
-                                                   u64 cr6, u64 schid);
-void kvm_s390_reinject_io_int(struct kvm *kvm,
-                             struct kvm_s390_interrupt_info *inti);
+                                                   u64 isc_mask, u32 schid);
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+                            struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
 /* implemented in intercept.c */
@@ -176,7 +205,10 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
 /* implemented in kvm-s390.c */
 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+                                       unsigned long addr);
 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
 void s390_vcpu_block(struct kvm_vcpu *vcpu);
@@ -240,6 +272,10 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
 extern struct kvm_device_ops kvm_flic_ops;
 int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+                          void __user *buf, int len);
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
+                          __u8 __user *buf, int len);
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
index bdd9b5b..d22d8ee 100644 (file)
@@ -36,15 +36,16 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
        struct kvm_vcpu *cpup;
        s64 hostclk, val;
        int i, rc;
+       ar_t ar;
        u64 op2;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       op2 = kvm_s390_get_base_disp_s(vcpu);
+       op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (op2 & 7)    /* Operand must be on a doubleword boundary */
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       rc = read_guest(vcpu, op2, &val, sizeof(val));
+       rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -68,20 +69,21 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
        u64 operand2;
        u32 address;
        int rc;
+       ar_t ar;
 
        vcpu->stat.instruction_spx++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       operand2 = kvm_s390_get_base_disp_s(vcpu);
+       operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        /* must be word boundary */
        if (operand2 & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
        /* get the value */
-       rc = read_guest(vcpu, operand2, &address, sizeof(address));
+       rc = read_guest(vcpu, operand2, ar, &address, sizeof(address));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -107,13 +109,14 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
        u64 operand2;
        u32 address;
        int rc;
+       ar_t ar;
 
        vcpu->stat.instruction_stpx++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       operand2 = kvm_s390_get_base_disp_s(vcpu);
+       operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        /* must be word boundary */
        if (operand2 & 3)
@@ -122,7 +125,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
        address = kvm_s390_get_prefix(vcpu);
 
        /* get the value */
-       rc = write_guest(vcpu, operand2, &address, sizeof(address));
+       rc = write_guest(vcpu, operand2, ar, &address, sizeof(address));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -136,18 +139,19 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
        u16 vcpu_id = vcpu->vcpu_id;
        u64 ga;
        int rc;
+       ar_t ar;
 
        vcpu->stat.instruction_stap++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_s(vcpu);
+       ga = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        if (ga & 1)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
+       rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -207,7 +211,7 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
        kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
        addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
        addr = kvm_s390_logical_to_effective(vcpu, addr);
-       if (kvm_s390_check_low_addr_protection(vcpu, addr))
+       if (kvm_s390_check_low_addr_prot_real(vcpu, addr))
                return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
        addr = kvm_s390_real_to_abs(vcpu, addr);
 
@@ -229,18 +233,20 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
        struct kvm_s390_interrupt_info *inti;
        unsigned long len;
        u32 tpi_data[3];
-       int cc, rc;
+       int rc;
        u64 addr;
+       ar_t ar;
 
-       rc = 0;
-       addr = kvm_s390_get_base_disp_s(vcpu);
+       addr = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (addr & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       cc = 0;
+
        inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
-       if (!inti)
-               goto no_interrupt;
-       cc = 1;
+       if (!inti) {
+               kvm_s390_set_psw_cc(vcpu, 0);
+               return 0;
+       }
+
        tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
        tpi_data[1] = inti->io.io_int_parm;
        tpi_data[2] = inti->io.io_int_word;
@@ -250,40 +256,51 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
                 * provided area.
                 */
                len = sizeof(tpi_data) - 4;
-               rc = write_guest(vcpu, addr, &tpi_data, len);
-               if (rc)
-                       return kvm_s390_inject_prog_cond(vcpu, rc);
+               rc = write_guest(vcpu, addr, ar, &tpi_data, len);
+               if (rc) {
+                       rc = kvm_s390_inject_prog_cond(vcpu, rc);
+                       goto reinject_interrupt;
+               }
        } else {
                /*
                 * Store the three-word I/O interruption code into
                 * the appropriate lowcore area.
                 */
                len = sizeof(tpi_data);
-               if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+               if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
+                       /* failed writes to the low core are not recoverable */
                        rc = -EFAULT;
+                       goto reinject_interrupt;
+               }
        }
+
+       /* irq was successfully handed to the guest */
+       kfree(inti);
+       kvm_s390_set_psw_cc(vcpu, 1);
+       return 0;
+reinject_interrupt:
        /*
         * If we encounter a problem storing the interruption code, the
         * instruction is suppressed from the guest's view: reinject the
         * interrupt.
         */
-       if (!rc)
+       if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
                kfree(inti);
-       else
-               kvm_s390_reinject_io_int(vcpu->kvm, inti);
-no_interrupt:
-       /* Set condition code and we're done. */
-       if (!rc)
-               kvm_s390_set_psw_cc(vcpu, cc);
+               rc = -EFAULT;
+       }
+       /* don't set the cc, a pgm irq was injected or we drop to user space */
        return rc ? -EFAULT : 0;
 }
 
 static int handle_tsch(struct kvm_vcpu *vcpu)
 {
-       struct kvm_s390_interrupt_info *inti;
+       struct kvm_s390_interrupt_info *inti = NULL;
+       const u64 isc_mask = 0xffUL << 24; /* all iscs set */
 
-       inti = kvm_s390_get_io_int(vcpu->kvm, 0,
-                                  vcpu->run->s.regs.gprs[1]);
+       /* a valid schid has at least one bit set */
+       if (vcpu->run->s.regs.gprs[1])
+               inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
+                                          vcpu->run->s.regs.gprs[1]);
 
        /*
         * Prepare exit to userspace.
@@ -348,7 +365,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
         * We need to shift the lower 32 facility bits (bit 0-31) from a u64
         * into a u32 memory representation. They will remain bits 0-31.
         */
-       fac = *vcpu->kvm->arch.model.fac->sie >> 32;
+       fac = *vcpu->kvm->arch.model.fac->list >> 32;
        rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
                            &fac, sizeof(fac));
        if (rc)
@@ -386,15 +403,16 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
        psw_compat_t new_psw;
        u64 addr;
        int rc;
+       ar_t ar;
 
        if (gpsw->mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       addr = kvm_s390_get_base_disp_s(vcpu);
+       addr = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (addr & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+       rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        if (!(new_psw.mask & PSW32_MASK_BASE))
@@ -412,14 +430,15 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
        psw_t new_psw;
        u64 addr;
        int rc;
+       ar_t ar;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       addr = kvm_s390_get_base_disp_s(vcpu);
+       addr = kvm_s390_get_base_disp_s(vcpu, &ar);
        if (addr & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
-       rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+       rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        vcpu->arch.sie_block->gpsw = new_psw;
@@ -433,18 +452,19 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
        u64 stidp_data = vcpu->arch.stidp_data;
        u64 operand2;
        int rc;
+       ar_t ar;
 
        vcpu->stat.instruction_stidp++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       operand2 = kvm_s390_get_base_disp_s(vcpu);
+       operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        if (operand2 & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-       rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
+       rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
 
@@ -467,6 +487,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
        for (n = mem->count - 1; n > 0 ; n--)
                memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
 
+       memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
        mem->vm[0].cpus_total = cpus;
        mem->vm[0].cpus_configured = cpus;
        mem->vm[0].cpus_standby = 0;
@@ -478,6 +499,17 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
        ASCEBC(mem->vm[0].cpi, 16);
 }
 
+static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar,
+                                u8 fc, u8 sel1, u16 sel2)
+{
+       vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
+       vcpu->run->s390_stsi.addr = addr;
+       vcpu->run->s390_stsi.ar = ar;
+       vcpu->run->s390_stsi.fc = fc;
+       vcpu->run->s390_stsi.sel1 = sel1;
+       vcpu->run->s390_stsi.sel2 = sel2;
+}
+
 static int handle_stsi(struct kvm_vcpu *vcpu)
 {
        int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
@@ -486,6 +518,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
        unsigned long mem = 0;
        u64 operand2;
        int rc = 0;
+       ar_t ar;
 
        vcpu->stat.instruction_stsi++;
        VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
@@ -508,7 +541,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
                return 0;
        }
 
-       operand2 = kvm_s390_get_base_disp_s(vcpu);
+       operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
 
        if (operand2 & 0xfff)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -532,16 +565,20 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
                break;
        }
 
-       rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
+       rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
        if (rc) {
                rc = kvm_s390_inject_prog_cond(vcpu, rc);
                goto out;
        }
+       if (vcpu->kvm->arch.user_stsi) {
+               insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+               rc = -EREMOTE;
+       }
        trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
        free_page(mem);
        kvm_s390_set_psw_cc(vcpu, 0);
        vcpu->run->s.regs.gprs[0] = 0;
-       return 0;
+       return rc;
 out_no_data:
        kvm_s390_set_psw_cc(vcpu, 3);
 out:
@@ -670,7 +707,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
        }
 
        if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-               if (kvm_s390_check_low_addr_protection(vcpu, start))
+               if (kvm_s390_check_low_addr_prot_real(vcpu, start))
                        return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
        }
 
@@ -776,13 +813,14 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
        int reg, rc, nr_regs;
        u32 ctl_array[16];
        u64 ga;
+       ar_t ar;
 
        vcpu->stat.instruction_lctl++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_rs(vcpu);
+       ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
 
        if (ga & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -791,7 +829,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
        trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
        nr_regs = ((reg3 - reg1) & 0xf) + 1;
-       rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+       rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        reg = reg1;
@@ -814,13 +852,14 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
        int reg, rc, nr_regs;
        u32 ctl_array[16];
        u64 ga;
+       ar_t ar;
 
        vcpu->stat.instruction_stctl++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_rs(vcpu);
+       ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
 
        if (ga & 3)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -836,7 +875,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
                        break;
                reg = (reg + 1) % 16;
        } while (1);
-       rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+       rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
        return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
@@ -847,13 +886,14 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
        int reg, rc, nr_regs;
        u64 ctl_array[16];
        u64 ga;
+       ar_t ar;
 
        vcpu->stat.instruction_lctlg++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_rsy(vcpu);
+       ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
 
        if (ga & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -862,7 +902,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
        trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
        nr_regs = ((reg3 - reg1) & 0xf) + 1;
-       rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+       rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
        if (rc)
                return kvm_s390_inject_prog_cond(vcpu, rc);
        reg = reg1;
@@ -884,13 +924,14 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
        int reg, rc, nr_regs;
        u64 ctl_array[16];
        u64 ga;
+       ar_t ar;
 
        vcpu->stat.instruction_stctg++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       ga = kvm_s390_get_base_disp_rsy(vcpu);
+       ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
 
        if (ga & 7)
                return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -906,7 +947,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
                        break;
                reg = (reg + 1) % 16;
        } while (1);
-       rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+       rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
        return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
@@ -931,13 +972,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
        unsigned long hva, gpa;
        int ret = 0, cc = 0;
        bool writable;
+       ar_t ar;
 
        vcpu->stat.instruction_tprot++;
 
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
+       kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL);
 
        /* we only handle the Linux memory detection case:
         * access key == 0
@@ -946,11 +988,11 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
                return -EOPNOTSUPP;
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
                ipte_lock(vcpu);
-       ret = guest_translate_address(vcpu, address1, &gpa, 1);
+       ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
        if (ret == PGM_PROTECTION) {
                /* Write protected? Try again with read-only... */
                cc = 1;
-               ret = guest_translate_address(vcpu, address1, &gpa, 0);
+               ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
        }
        if (ret) {
                if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
index 23b1e86..72e58bd 100644 (file)
@@ -393,6 +393,9 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
        case SIGP_STORE_STATUS_AT_ADDRESS:
                vcpu->stat.instruction_sigp_store_status++;
                break;
+       case SIGP_STORE_ADDITIONAL_STATUS:
+               vcpu->stat.instruction_sigp_store_adtl_status++;
+               break;
        case SIGP_SET_PREFIX:
                vcpu->stat.instruction_sigp_prefix++;
                break;
@@ -431,7 +434,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
        if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
                return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-       order_code = kvm_s390_get_base_disp_rs(vcpu);
+       order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
        if (handle_sigp_order_in_user_space(vcpu, order_code))
                return -EOPNOTSUPP;
 
@@ -473,7 +476,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
        int r3 = vcpu->arch.sie_block->ipa & 0x000f;
        u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
        struct kvm_vcpu *dest_vcpu;
-       u8 order_code = kvm_s390_get_base_disp_rs(vcpu);
+       u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
 
        trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
 
index 753a567..f0b8544 100644 (file)
@@ -287,7 +287,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
        addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
        return (void __iomem *) addr + offset;
 }
-EXPORT_SYMBOL_GPL(pci_iomap_range);
+EXPORT_SYMBOL(pci_iomap_range);
 
 void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
 {
@@ -309,7 +309,7 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
        }
        spin_unlock(&zpci_iomap_lock);
 }
-EXPORT_SYMBOL_GPL(pci_iounmap);
+EXPORT_SYMBOL(pci_iounmap);
 
 static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
                    int size, u32 *val)
@@ -483,9 +483,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
        airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
 }
 
-static void zpci_map_resources(struct zpci_dev *zdev)
+static void zpci_map_resources(struct pci_dev *pdev)
 {
-       struct pci_dev *pdev = zdev->pdev;
        resource_size_t len;
        int i;
 
@@ -499,9 +498,8 @@ static void zpci_map_resources(struct zpci_dev *zdev)
        }
 }
 
-static void zpci_unmap_resources(struct zpci_dev *zdev)
+static void zpci_unmap_resources(struct pci_dev *pdev)
 {
-       struct pci_dev *pdev = zdev->pdev;
        resource_size_t len;
        int i;
 
@@ -651,7 +649,7 @@ int pcibios_add_device(struct pci_dev *pdev)
 
        zdev->pdev = pdev;
        pdev->dev.groups = zpci_attr_groups;
-       zpci_map_resources(zdev);
+       zpci_map_resources(pdev);
 
        for (i = 0; i < PCI_BAR_COUNT; i++) {
                res = &pdev->resource[i];
@@ -663,6 +661,11 @@ int pcibios_add_device(struct pci_dev *pdev)
        return 0;
 }
 
+void pcibios_release_device(struct pci_dev *pdev)
+{
+       zpci_unmap_resources(pdev);
+}
+
 int pcibios_enable_device(struct pci_dev *pdev, int mask)
 {
        struct zpci_dev *zdev = get_zdev(pdev);
@@ -670,7 +673,6 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
        zdev->pdev = pdev;
        zpci_debug_init_device(zdev);
        zpci_fmb_enable_device(zdev);
-       zpci_map_resources(zdev);
 
        return pci_enable_resources(pdev, mask);
 }
@@ -679,7 +681,6 @@ void pcibios_disable_device(struct pci_dev *pdev)
 {
        struct zpci_dev *zdev = get_zdev(pdev);
 
-       zpci_unmap_resources(zdev);
        zpci_fmb_disable_device(zdev);
        zpci_debug_exit_device(zdev);
        zdev->pdev = NULL;
@@ -688,7 +689,8 @@ void pcibios_disable_device(struct pci_dev *pdev)
 #ifdef CONFIG_HIBERNATE_CALLBACKS
 static int zpci_restore(struct device *dev)
 {
-       struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct zpci_dev *zdev = get_zdev(pdev);
        int ret = 0;
 
        if (zdev->state != ZPCI_FN_STATE_ONLINE)
@@ -698,7 +700,7 @@ static int zpci_restore(struct device *dev)
        if (ret)
                goto out;
 
-       zpci_map_resources(zdev);
+       zpci_map_resources(pdev);
        zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
                           zdev->start_dma + zdev->iommu_size - 1,
                           (u64) zdev->dma_table);
@@ -709,12 +711,14 @@ out:
 
 static int zpci_freeze(struct device *dev)
 {
-       struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct zpci_dev *zdev = get_zdev(pdev);
 
        if (zdev->state != ZPCI_FN_STATE_ONLINE)
                return 0;
 
        zpci_unregister_ioat(zdev, 0);
+       zpci_unmap_resources(pdev);
        return clp_disable_fh(zdev);
 }
 
index 8aa271b..b1bb2b7 100644 (file)
@@ -64,8 +64,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
        if (copy_from_user(buf, user_buffer, length))
                goto out;
 
-       memcpy_toio(io_addr, buf, length);
-       ret = 0;
+       ret = zpci_memcpy_toio(io_addr, buf, length);
 out:
        if (buf != local_buf)
                kfree(buf);
@@ -98,16 +97,16 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
                goto out;
        io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
 
-       ret = -EFAULT;
-       if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+       if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
+               ret = -EFAULT;
                goto out;
-
-       memcpy_fromio(buf, io_addr, length);
-
-       if (copy_to_user(user_buffer, buf, length))
+       }
+       ret = zpci_memcpy_fromio(buf, io_addr, length);
+       if (ret)
                goto out;
+       if (copy_to_user(user_buffer, buf, length))
+               ret = -EFAULT;
 
-       ret = 0;
 out:
        if (buf != local_buf)
                kfree(buf);
index 96ac69c..efb00ec 100644 (file)
@@ -86,6 +86,9 @@ config ARCH_DEFCONFIG
        default "arch/sparc/configs/sparc32_defconfig" if SPARC32
        default "arch/sparc/configs/sparc64_defconfig" if SPARC64
 
+config ARCH_PROC_KCORE_TEXT
+       def_bool y
+
 config IOMMU_HELPER
        bool
        default y if SPARC64
index 4f6725f..f5b6537 100644 (file)
@@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
                                   unsigned long reg_val);
 #endif
 
+
+#define HV_FAST_M7_GET_PERFREG 0x43
+#define HV_FAST_M7_SET_PERFREG 0x44
+
+#ifndef        __ASSEMBLY__
+unsigned long sun4v_m7_get_perfreg(unsigned long reg_num,
+                                     unsigned long *reg_val);
+unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
+                                     unsigned long reg_val);
+#endif
+
 /* Function numbers for HV_CORE_TRAP.  */
 #define HV_CORE_SET_VER                        0x00
 #define HV_CORE_PUTCHAR                        0x01
@@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
 #define HV_GRP_SDIO                    0x0108
 #define HV_GRP_SDIO_ERR                        0x0109
 #define HV_GRP_REBOOT_DATA             0x0110
+#define HV_GRP_M7_PERF                 0x0114
 #define HV_GRP_NIAG_PERF               0x0200
 #define HV_GRP_FIRE_PERF               0x0201
 #define HV_GRP_N2_CPU                  0x0202
index 9b672be..50d4840 100644 (file)
@@ -407,16 +407,16 @@ static inline void iounmap(volatile void __iomem *addr)
 {
 }
 
-#define ioread8(X)                     readb(X)
-#define ioread16(X)                    readw(X)
-#define ioread16be(X)                  __raw_readw(X)
-#define ioread32(X)                    readl(X)
-#define ioread32be(X)                  __raw_readl(X)
-#define iowrite8(val,X)                        writeb(val,X)
-#define iowrite16(val,X)               writew(val,X)
-#define iowrite16be(val,X)             __raw_writew(val,X)
-#define iowrite32(val,X)               writel(val,X)
-#define iowrite32be(val,X)             __raw_writel(val,X)
+#define ioread8                        readb
+#define ioread16               readw
+#define ioread16be             __raw_readw
+#define ioread32               readl
+#define ioread32be             __raw_readl
+#define iowrite8               writeb
+#define iowrite16              writew
+#define iowrite16be            __raw_writew
+#define iowrite32              writel
+#define iowrite32be            __raw_writel
 
 /* Create a virtual mapping cookie for an IO port range */
 void __iomem *ioport_map(unsigned long port, unsigned int nr);
index ec2e2e2..cc9b04a 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _ASM_SPARC_JUMP_LABEL_H
 #define _ASM_SPARC_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/types.h>
 
@@ -22,8 +22,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 typedef u32 jump_label_t;
 
 struct jump_entry {
@@ -32,4 +30,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
index c100dc2..176fa0a 100644 (file)
@@ -12,7 +12,6 @@
 extern int this_is_starfire;
 
 void check_if_starfire(void);
-int starfire_hard_smp_processor_id(void);
 void starfire_hookup(int);
 unsigned int starfire_translate(unsigned long imap, unsigned int upaid);
 
index 88d322b..07cc49e 100644 (file)
@@ -98,11 +98,7 @@ void sun4v_do_mna(struct pt_regs *regs,
 void do_privop(struct pt_regs *regs);
 void do_privact(struct pt_regs *regs);
 void do_cee(struct pt_regs *regs);
-void do_cee_tl1(struct pt_regs *regs);
-void do_dae_tl1(struct pt_regs *regs);
-void do_iae_tl1(struct pt_regs *regs);
 void do_div0_tl1(struct pt_regs *regs);
-void do_fpdis_tl1(struct pt_regs *regs);
 void do_fpieee_tl1(struct pt_regs *regs);
 void do_fpother_tl1(struct pt_regs *regs);
 void do_ill_tl1(struct pt_regs *regs);
index 5c55145..662500f 100644 (file)
@@ -48,6 +48,7 @@ static struct api_info api_table[] = {
        { .group = HV_GRP_VT_CPU,                               },
        { .group = HV_GRP_T5_CPU,                               },
        { .group = HV_GRP_DIAG,         .flags = FLAG_PRE_API   },
+       { .group = HV_GRP_M7_PERF,                              },
 };
 
 static DEFINE_SPINLOCK(hvapi_lock);
index caedf83..afbaba5 100644 (file)
@@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg)
        retl
         nop
 ENDPROC(sun4v_t5_set_perfreg)
+
+ENTRY(sun4v_m7_get_perfreg)
+       mov     %o1, %o4
+       mov     HV_FAST_M7_GET_PERFREG, %o5
+       ta      HV_FAST_TRAP
+       stx     %o1, [%o4]
+       retl
+       nop
+ENDPROC(sun4v_m7_get_perfreg)
+
+ENTRY(sun4v_m7_set_perfreg)
+       mov     HV_FAST_M7_SET_PERFREG, %o5
+       ta      HV_FAST_TRAP
+       retl
+       nop
+ENDPROC(sun4v_m7_set_perfreg)
index 9ce5afe..b36365f 100644 (file)
@@ -639,10 +639,7 @@ static void pci_claim_bus_resources(struct pci_bus *bus)
                                       (unsigned long long)r->end,
                                       (unsigned int)r->flags);
 
-                       if (pci_claim_resource(dev, i) == 0)
-                               continue;
-
-                       pci_claim_bridge_resource(dev, i);
+                       pci_claim_resource(dev, i);
                }
        }
 
index 7e967c8..eb978c7 100644 (file)
@@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = {
        .pcr_nmi_disable        = PCR_N4_PICNPT,
 };
 
+static u64 m7_pcr_read(unsigned long reg_num)
+{
+       unsigned long val;
+
+       (void) sun4v_m7_get_perfreg(reg_num, &val);
+
+       return val;
+}
+
+static void m7_pcr_write(unsigned long reg_num, u64 val)
+{
+       (void) sun4v_m7_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops m7_pcr_ops = {
+       .read_pcr               = m7_pcr_read,
+       .write_pcr              = m7_pcr_write,
+       .read_pic               = n4_pic_read,
+       .write_pic              = n4_pic_write,
+       .nmi_picl_value         = n4_picl_value,
+       .pcr_nmi_enable         = (PCR_N4_PICNPT | PCR_N4_STRACE |
+                                  PCR_N4_UTRACE | PCR_N4_TOE |
+                                  (26 << PCR_N4_SL_SHIFT)),
+       .pcr_nmi_disable        = PCR_N4_PICNPT,
+};
 
 static unsigned long perf_hsvc_group;
 static unsigned long perf_hsvc_major;
@@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void)
                        perf_hsvc_group = HV_GRP_T5_CPU;
                        break;
 
+               case SUN4V_CHIP_SPARC_M7:
+                       perf_hsvc_group = HV_GRP_M7_PERF;
+                       break;
+
                default:
                        return -ENODEV;
                }
@@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void)
                pcr_ops = &n5_pcr_ops;
                break;
 
+       case SUN4V_CHIP_SPARC_M7:
+               pcr_ops = &m7_pcr_ops;
+               break;
+
        default:
                ret = -ENODEV;
                break;
index 46a5e45..86eebfa 100644 (file)
@@ -792,6 +792,42 @@ static const struct sparc_pmu niagara4_pmu = {
        .num_pic_regs   = 4,
 };
 
+static void sparc_m7_write_pmc(int idx, u64 val)
+{
+       u64 pcr;
+
+       pcr = pcr_ops->read_pcr(idx);
+       /* ensure ov and ntc are reset */
+       pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
+
+       pcr_ops->write_pic(idx, val & 0xffffffff);
+
+       pcr_ops->write_pcr(idx, pcr);
+}
+
+static const struct sparc_pmu sparc_m7_pmu = {
+       .event_map      = niagara4_event_map,
+       .cache_map      = &niagara4_cache_map,
+       .max_events     = ARRAY_SIZE(niagara4_perfmon_event_map),
+       .read_pmc       = sparc_vt_read_pmc,
+       .write_pmc      = sparc_m7_write_pmc,
+       .upper_shift    = 5,
+       .lower_shift    = 5,
+       .event_mask     = 0x7ff,
+       .user_bit       = PCR_N4_UTRACE,
+       .priv_bit       = PCR_N4_STRACE,
+
+       /* We explicitly don't support hypervisor tracing. */
+       .hv_bit         = 0,
+
+       .irq_bit        = PCR_N4_TOE,
+       .upper_nop      = 0,
+       .lower_nop      = 0,
+       .flags          = 0,
+       .max_hw_events  = 4,
+       .num_pcrs       = 4,
+       .num_pic_regs   = 4,
+};
 static const struct sparc_pmu *sparc_pmu __read_mostly;
 
 static u64 event_encoding(u64 event_id, int idx)
@@ -960,6 +996,8 @@ out:
        cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
 }
 
+static void sparc_pmu_start(struct perf_event *event, int flags);
+
 /* On this PMU each PIC has it's own PCR control register.  */
 static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
 {
@@ -972,20 +1010,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
                struct perf_event *cp = cpuc->event[i];
                struct hw_perf_event *hwc = &cp->hw;
                int idx = hwc->idx;
-               u64 enc;
 
                if (cpuc->current_idx[i] != PIC_NO_INDEX)
                        continue;
 
-               sparc_perf_event_set_period(cp, hwc, idx);
                cpuc->current_idx[i] = idx;
 
-               enc = perf_event_get_enc(cpuc->events[i]);
-               cpuc->pcr[idx] &= ~mask_for_index(idx);
-               if (hwc->state & PERF_HES_STOPPED)
-                       cpuc->pcr[idx] |= nop_for_index(idx);
-               else
-                       cpuc->pcr[idx] |= event_encoding(enc, idx);
+               sparc_pmu_start(cp, PERF_EF_RELOAD);
        }
 out:
        for (i = 0; i < cpuc->n_events; i++) {
@@ -1101,7 +1132,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
        int i;
 
        local_irq_save(flags);
-       perf_pmu_disable(event->pmu);
 
        for (i = 0; i < cpuc->n_events; i++) {
                if (event == cpuc->event[i]) {
@@ -1127,7 +1157,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
                }
        }
 
-       perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
 }
 
@@ -1361,7 +1390,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
        unsigned long flags;
 
        local_irq_save(flags);
-       perf_pmu_disable(event->pmu);
 
        n0 = cpuc->n_events;
        if (n0 >= sparc_pmu->max_hw_events)
@@ -1394,7 +1422,6 @@ nocheck:
 
        ret = 0;
 out:
-       perf_pmu_enable(event->pmu);
        local_irq_restore(flags);
        return ret;
 }
@@ -1667,6 +1694,10 @@ static bool __init supported_pmu(void)
                sparc_pmu = &niagara4_pmu;
                return true;
        }
+       if (!strcmp(sparc_pmu_type, "sparc-m7")) {
+               sparc_pmu = &sparc_m7_pmu;
+               return true;
+       }
        return false;
 }
 
index 0be7bf9..46a5964 100644 (file)
@@ -287,6 +287,8 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
                        printk("             TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n",
                               gp->tpc, gp->o7, gp->i7, gp->rpc);
                }
+
+               touch_nmi_watchdog();
        }
 
        memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
@@ -362,6 +364,8 @@ static void pmu_snapshot_all_cpus(void)
                       (cpu == this_cpu ? '*' : ' '), cpu,
                       pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3],
                       pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]);
+
+               touch_nmi_watchdog();
        }
 
        memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
index da6f1a7..61139d9 100644 (file)
@@ -1406,11 +1406,32 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
        scheduler_ipi();
 }
 
-/* This is a nop because we capture all other cpus
- * anyways when making the PROM active.
- */
+static void stop_this_cpu(void *dummy)
+{
+       prom_stopself();
+}
+
 void smp_send_stop(void)
 {
+       int cpu;
+
+       if (tlb_type == hypervisor) {
+               for_each_online_cpu(cpu) {
+                       if (cpu == smp_processor_id())
+                               continue;
+#ifdef CONFIG_SUN_LDOMS
+                       if (ldom_domaining_enabled) {
+                               unsigned long hv_err;
+                               hv_err = sun4v_cpu_stop(cpu);
+                               if (hv_err)
+                                       printk(KERN_ERR "sun4v_cpu_stop() "
+                                              "failed err=%lu\n", hv_err);
+                       } else
+#endif
+                               prom_stopcpu_cpuid(cpu);
+               }
+       } else
+               smp_call_function(stop_this_cpu, NULL, 0);
 }
 
 /**
index 82281a5..167fdfd 100644 (file)
@@ -28,11 +28,6 @@ void check_if_starfire(void)
                this_is_starfire = 1;
 }
 
-int starfire_hard_smp_processor_id(void)
-{
-       return upa_readl(0x1fff40000d0UL);
-}
-
 /*
  * Each Starfire board has 32 registers which perform translation
  * and delivery of traditional interrupt packets into the extended
index c85403d..30e7ddb 100644 (file)
@@ -333,7 +333,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second
        long err;
 
        /* No need for backward compatibility. We can start fresh... */
-       if (call <= SEMCTL) {
+       if (call <= SEMTIMEDOP) {
                switch (call) {
                case SEMOP:
                        err = sys_semtimedop(first, ptr,
index 2f80d23..18147a5 100644 (file)
@@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
        .rating = 100,
        .read   = timer_cs_read,
        .mask   = CLOCKSOURCE_MASK(64),
-       .shift  = 2,
        .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
 static __init int setup_timer_cs(void)
 {
        timer_cs_enabled = 1;
-       timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
-                                           timer_cs.shift);
-
-       return clocksource_register(&timer_cs);
+       return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
 }
 
 #ifdef CONFIG_SMP
index a27651e..0e69974 100644 (file)
@@ -2427,6 +2427,8 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
                }
                user_instruction_dump ((unsigned int __user *) regs->tpc);
        }
+       if (panic_on_oops)
+               panic("Fatal exception");
        if (regs->tstate & TSTATE_PRIV)
                do_exit(SIGKILL);
        do_exit(SIGSEGV);
@@ -2564,27 +2566,6 @@ void do_cee(struct pt_regs *regs)
        die_if_kernel("TL0: Cache Error Exception", regs);
 }
 
-void do_cee_tl1(struct pt_regs *regs)
-{
-       exception_enter();
-       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-       die_if_kernel("TL1: Cache Error Exception", regs);
-}
-
-void do_dae_tl1(struct pt_regs *regs)
-{
-       exception_enter();
-       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-       die_if_kernel("TL1: Data Access Exception", regs);
-}
-
-void do_iae_tl1(struct pt_regs *regs)
-{
-       exception_enter();
-       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-       die_if_kernel("TL1: Instruction Access Exception", regs);
-}
-
 void do_div0_tl1(struct pt_regs *regs)
 {
        exception_enter();
@@ -2592,13 +2573,6 @@ void do_div0_tl1(struct pt_regs *regs)
        die_if_kernel("TL1: DIV0 Exception", regs);
 }
 
-void do_fpdis_tl1(struct pt_regs *regs)
-{
-       exception_enter();
-       dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
-       die_if_kernel("TL1: FPU Disabled", regs);
-}
-
 void do_fpieee_tl1(struct pt_regs *regs)
 {
        exception_enter();
index b7f6334..857ad4f 100644 (file)
@@ -8,9 +8,11 @@
 
        .text
 ENTRY(memmove) /* o0=dst o1=src o2=len */
-       mov             %o0, %g1
+       brz,pn          %o2, 99f
+        mov            %o0, %g1
+
        cmp             %o0, %o1
-       bleu,pt         %xcc, memcpy
+       bleu,pt         %xcc, 2f
         add            %o1, %o2, %g7
        cmp             %g7, %o0
        bleu,pt         %xcc, memcpy
@@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
        stb             %g7, [%o0]
        bne,pt          %icc, 1b
         sub            %o0, 1, %o0
-
+99:
        retl
         mov            %g1, %o0
+
+       /* We can't just call memcpy for these memmove cases.  On some
+        * chips the memcpy uses cache initializing stores and when dst
+        * and src are close enough, those can clobber the source data
+        * before we've loaded it in.
+        */
+2:     or              %o0, %o1, %g7
+       or              %o2, %g7, %g7
+       andcc           %g7, 0x7, %g0
+       bne,pn          %xcc, 4f
+        nop
+
+3:     ldx             [%o1], %g7
+       add             %o1, 8, %o1
+       subcc           %o2, 8, %o2
+       add             %o0, 8, %o0
+       bne,pt          %icc, 3b
+        stx            %g7, [%o0 - 0x8]
+       ba,a,pt         %xcc, 99b
+
+4:     ldub            [%o1], %g7
+       add             %o1, 1, %o1
+       subcc           %o2, 1, %o2
+       add             %o0, 1, %o0
+       bne,pt          %icc, 4b
+        stb            %g7, [%o0 - 0x1]
+       ba,a,pt         %xcc, 99b
 ENDPROC(memmove)
index 3ea267c..4ca0d6b 100644 (file)
@@ -2820,7 +2820,7 @@ static int __init report_memory(void)
 
        return 0;
 }
-device_initcall(report_memory);
+arch_initcall(report_memory);
 
 #ifdef CONFIG_SMP
 #define do_flush_tlb_kernel_range      smp_flush_tlb_kernel_range
index d412b08..00178ec 100644 (file)
@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
 
 void update_vsyscall(struct timekeeper *tk)
 {
-       if (tk->tkr.clock != &cycle_counter_cs)
+       if (tk->tkr_mono.clock != &cycle_counter_cs)
                return;
 
        write_seqcount_begin(&vdso_data->tb_seq);
 
-       vdso_data->cycle_last           = tk->tkr.cycle_last;
-       vdso_data->mask                 = tk->tkr.mask;
-       vdso_data->mult                 = tk->tkr.mult;
-       vdso_data->shift                = tk->tkr.shift;
+       vdso_data->cycle_last           = tk->tkr_mono.cycle_last;
+       vdso_data->mask                 = tk->tkr_mono.mask;
+       vdso_data->mult                 = tk->tkr_mono.mult;
+       vdso_data->shift                = tk->tkr_mono.shift;
 
        vdso_data->wall_time_sec        = tk->xtime_sec;
-       vdso_data->wall_time_snsec      = tk->tkr.xtime_nsec;
+       vdso_data->wall_time_snsec      = tk->tkr_mono.xtime_nsec;
 
        vdso_data->monotonic_time_sec   = tk->xtime_sec
                                        + tk->wall_to_monotonic.tv_sec;
-       vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
+       vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
                                        + ((u64)tk->wall_to_monotonic.tv_nsec
-                                               << tk->tkr.shift);
+                                               << tk->tkr_mono.shift);
        while (vdso_data->monotonic_time_snsec >=
-                                       (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+                                       (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                vdso_data->monotonic_time_snsec -=
-                                       ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+                                       ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
                vdso_data->monotonic_time_sec++;
        }
 
        vdso_data->wall_time_coarse_sec = tk->xtime_sec;
-       vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
-                                                tk->tkr.shift);
+       vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
+                                                tk->tkr_mono.shift);
 
        vdso_data->monotonic_time_coarse_sec =
                vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
index b7d31ca..867bc5b 100644 (file)
@@ -235,12 +235,10 @@ config ARCH_WANT_GENERAL_HUGETLB
        def_bool y
 
 config ZONE_DMA32
-       bool
-       default X86_64
+       def_bool y if X86_64
 
 config AUDIT_ARCH
-       bool
-       default X86_64
+       def_bool y if X86_64
 
 config ARCH_SUPPORTS_OPTIMIZED_INLINING
        def_bool y
@@ -891,7 +889,8 @@ config UP_LATE_INIT
        depends on !SMP && X86_LOCAL_APIC
 
 config X86_UP_APIC
-       bool "Local APIC support on uniprocessors"
+       bool "Local APIC support on uniprocessors" if !PCI_MSI
+       default PCI_MSI
        depends on X86_32 && !SMP && !X86_32_NON_STANDARD
        ---help---
          A local APIC (Advanced Programmable Interrupt Controller) is an
@@ -903,10 +902,6 @@ config X86_UP_APIC
          performance counters), and the NMI watchdog which detects hard
          lockups.
 
-config X86_UP_APIC_MSI
-       def_bool y
-       select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI
-
 config X86_UP_IOAPIC
        bool "IO-APIC support on uniprocessors"
        depends on X86_UP_APIC
@@ -925,8 +920,8 @@ config X86_LOCAL_APIC
        select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 
 config X86_IO_APIC
-       def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
-       depends on X86_LOCAL_APIC
+       def_bool y
+       depends on X86_LOCAL_APIC || X86_UP_IOAPIC
        select IRQ_DOMAIN
 
 config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
@@ -1145,10 +1140,10 @@ config MICROCODE_OLD_INTERFACE
        depends on MICROCODE
 
 config MICROCODE_INTEL_EARLY
-       def_bool n
+       bool
 
 config MICROCODE_AMD_EARLY
-       def_bool n
+       bool
 
 config MICROCODE_EARLY
        bool "Early load microcode"
@@ -1747,14 +1742,11 @@ config KEXEC_VERIFY_SIG
        depends on KEXEC_FILE
        ---help---
          This option makes kernel signature verification mandatory for
-         kexec_file_load() syscall. If kernel is signature can not be
-         verified, kexec_file_load() will fail.
-
-         This option enforces signature verification at generic level.
-         One needs to enable signature verification for type of kernel
-         image being loaded to make sure it works. For example, enable
-         bzImage signature verification option to be able to load and
-         verify signatures of bzImage. Otherwise kernel loading will fail.
+         the kexec_file_load() syscall.
+
+         In addition to that option, you need to enable signature
+         verification for the corresponding kernel image type being
+         loaded in order for this to work.
 
 config KEXEC_BZIMAGE_VERIFY_SIG
        bool "Enable bzImage signature verification support"
index 7083c16..d7b1f65 100644 (file)
 static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
                LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
 
-struct kaslr_setup_data {
-       __u64 next;
-       __u32 type;
-       __u32 len;
-       __u8 data[1];
-} kaslr_setup_data;
-
 #define I8254_PORT_CONTROL     0x43
 #define I8254_PORT_COUNTER0    0x40
 #define I8254_CMD_READBACK     0xC0
@@ -302,28 +295,7 @@ static unsigned long find_random_addr(unsigned long minimum,
        return slots_fetch_random();
 }
 
-static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled)
-{
-       struct setup_data *data;
-
-       kaslr_setup_data.type = SETUP_KASLR;
-       kaslr_setup_data.len = 1;
-       kaslr_setup_data.next = 0;
-       kaslr_setup_data.data[0] = enabled;
-
-       data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
-       while (data && data->next)
-               data = (struct setup_data *)(unsigned long)data->next;
-
-       if (data)
-               data->next = (unsigned long)&kaslr_setup_data;
-       else
-               params->hdr.setup_data = (unsigned long)&kaslr_setup_data;
-
-}
-
-unsigned char *choose_kernel_location(struct boot_params *params,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
                                      unsigned char *input,
                                      unsigned long input_size,
                                      unsigned char *output,
@@ -335,17 +307,16 @@ unsigned char *choose_kernel_location(struct boot_params *params,
 #ifdef CONFIG_HIBERNATION
        if (!cmdline_find_option_bool("kaslr")) {
                debug_putstr("KASLR disabled by default...\n");
-               add_kaslr_setup_data(params, 0);
                goto out;
        }
 #else
        if (cmdline_find_option_bool("nokaslr")) {
                debug_putstr("KASLR disabled by cmdline...\n");
-               add_kaslr_setup_data(params, 0);
                goto out;
        }
 #endif
-       add_kaslr_setup_data(params, 1);
+
+       boot_params->hdr.loadflags |= KASLR_FLAG;
 
        /* Record the various known unsafe memory ranges. */
        mem_avoid_init((unsigned long)input, input_size,
index 1d7fbbc..8ef964d 100644 (file)
@@ -29,6 +29,7 @@
 #include <asm/page_types.h>
 #include <asm/boot.h>
 #include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
 
        __HEAD
 ENTRY(startup_32)
@@ -102,7 +103,7 @@ preferred_addr:
         * Test KEEP_SEGMENTS flag to see if the bootloader is asking
         * us to not reload segments
         */
-       testb   $(1<<6), BP_loadflags(%esi)
+       testb   $KEEP_SEGMENTS, BP_loadflags(%esi)
        jnz     1f
 
        cli
index 6b1766c..b0c0d16 100644 (file)
@@ -31,6 +31,7 @@
 #include <asm/msr.h>
 #include <asm/processor-flags.h>
 #include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
 
        __HEAD
        .code32
@@ -46,7 +47,7 @@ ENTRY(startup_32)
         * Test KEEP_SEGMENTS flag to see if the bootloader is asking
         * us to not reload segments
         */
-       testb $(1<<6), BP_loadflags(%esi)
+       testb $KEEP_SEGMENTS, BP_loadflags(%esi)
        jnz 1f
 
        cli
@@ -164,7 +165,7 @@ ENTRY(startup_32)
        /* After gdt is loaded */
        xorl    %eax, %eax
        lldt    %ax
-       movl    $0x20, %eax
+       movl    $__BOOT_TSS, %eax
        ltr     %ax
 
        /*
index 5903089..a107b93 100644 (file)
@@ -377,6 +377,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
 
        real_mode = rmode;
 
+       /* Clear it for solely in-kernel use */
+       real_mode->hdr.loadflags &= ~KASLR_FLAG;
+
        sanitize_boot_params(real_mode);
 
        if (real_mode->screen_info.orig_video_mode == 7) {
@@ -401,8 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
         * the entire decompressed kernel plus relocation table, or the
         * entire decompressed kernel plus .bss and .brk sections.
         */
-       output = choose_kernel_location(real_mode, input_data, input_len,
-                                       output,
+       output = choose_kernel_location(real_mode, input_data, input_len, output,
                                        output_len > run_size ? output_len
                                                              : run_size);
 
index ee3576b..89dd0d7 100644 (file)
@@ -57,7 +57,7 @@ int cmdline_find_option_bool(const char *option);
 
 #if CONFIG_RANDOMIZE_BASE
 /* aslr.c */
-unsigned char *choose_kernel_location(struct boot_params *params,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
                                      unsigned char *input,
                                      unsigned long input_size,
                                      unsigned char *output,
@@ -66,7 +66,7 @@ unsigned char *choose_kernel_location(struct boot_params *params,
 bool has_cpuflag(int flag);
 #else
 static inline
-unsigned char *choose_kernel_location(struct boot_params *params,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
                                      unsigned char *input,
                                      unsigned long input_size,
                                      unsigned char *output,
index 493f3fd..318b846 100644 (file)
@@ -30,7 +30,7 @@ int strcmp(const char *str1, const char *str2)
        int delta = 0;
 
        while (*s1 || *s2) {
-               delta = *s2 - *s1;
+               delta = *s1 - *s2;
                if (delta)
                        return delta;
                s1++;
index 748e8d0..aa8a96b 100644 (file)
 /*
  * Common variables
  */
-int adapter;                   /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
-u16 video_segment;
+int adapter;           /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
 int force_x, force_y;  /* Don't query the BIOS for cols/rows */
-
 int do_restore;                /* Screen contents changed during mode flip */
 int graphic_mode;      /* Graphic mode with linear frame buffer */
 
index 43eda28..05111bb 100644 (file)
@@ -17,6 +17,8 @@
 #include "video.h"
 #include "vesa.h"
 
+static u16 video_segment;
+
 static void store_cursor_position(void)
 {
        struct biosregs ireg, oreg;
index 0bb2549..b54e032 100644 (file)
@@ -91,7 +91,6 @@ int mode_defined(u16 mode);   /* video.c */
 #define ADAPTER_VGA    2
 
 extern int adapter;
-extern u16 video_segment;
 extern int force_x, force_y;   /* Don't query the BIOS for cols/rows */
 extern int do_restore;         /* Restore screen contents */
 extern int graphic_mode;       /* Graphics mode with linear frame buffer */
index 419819d..aaa1118 100644 (file)
@@ -248,7 +248,7 @@ CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
index 4c311dd..315b861 100644 (file)
@@ -243,7 +243,7 @@ CONFIG_USB=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
 CONFIG_USB_OHCI_HCD=y
 CONFIG_USB_UHCI_HCD=y
 CONFIG_USB_PRINTER=y
index 947c6bf..54f60ab 100644 (file)
@@ -1155,7 +1155,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
                src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
                if (!src)
                        return -ENOMEM;
-               assoc = (src + req->cryptlen + auth_tag_len);
+               assoc = (src + req->cryptlen);
                scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
                scatterwalk_map_and_copy(assoc, req->assoc, 0,
                        req->assoclen, 0);
@@ -1180,7 +1180,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
                scatterwalk_done(&src_sg_walk, 0, 0);
                scatterwalk_done(&assoc_sg_walk, 0, 0);
        } else {
-               scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1);
+               scatterwalk_map_and_copy(dst, req->dst, 0, tempCipherLen, 1);
                kfree(src);
        }
        return retval;
index 26d49eb..225be06 100644 (file)
@@ -178,7 +178,7 @@ continue_block:
        ## 2a) PROCESS FULL BLOCKS:
        ################################################################
 full_block:
-       movq    $128,%rax
+       movl    $128,%eax
        lea     128*8*2(block_0), block_1
        lea     128*8*3(block_0), block_2
        add     $128*8*1, block_0
index a039d21..a350c99 100644 (file)
@@ -264,7 +264,7 @@ ENTRY(twofish_enc_blk)
        movq    R1,     8(%rsi)
 
        popq    R1
-       movq    $1,%rax
+       movl    $1,%eax
        ret
 ENDPROC(twofish_enc_blk)
 
@@ -316,6 +316,6 @@ ENTRY(twofish_dec_blk)
        movq    R1,     8(%rsi)
 
        popq    R1
-       movq    $1,%rax
+       movl    $1,%eax
        ret
 ENDPROC(twofish_dec_blk)
index e785b42..bb635c6 100644 (file)
@@ -3,7 +3,6 @@
 #
 
 obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
-obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
 
 obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
 
index d0165c9..c81d35e 100644 (file)
@@ -161,8 +161,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
 }
 
 static int ia32_restore_sigcontext(struct pt_regs *regs,
-                                  struct sigcontext_ia32 __user *sc,
-                                  unsigned int *pax)
+                                  struct sigcontext_ia32 __user *sc)
 {
        unsigned int tmpflags, err = 0;
        void __user *buf;
@@ -184,7 +183,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
                RELOAD_SEG(es);
 
                COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-               COPY(dx); COPY(cx); COPY(ip);
+               COPY(dx); COPY(cx); COPY(ip); COPY(ax);
                /* Don't touch extended registers */
 
                COPY_SEG_CPL3(cs);
@@ -197,12 +196,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
 
                get_user_ex(tmp, &sc->fpstate);
                buf = compat_ptr(tmp);
-
-               get_user_ex(*pax, &sc->ax);
        } get_user_catch(err);
 
        err |= restore_xstate_sig(buf, 1);
 
+       force_iret();
+
        return err;
 }
 
@@ -211,7 +210,6 @@ asmlinkage long sys32_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
        sigset_t set;
-       unsigned int ax;
 
        if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
                goto badframe;
@@ -224,9 +222,9 @@ asmlinkage long sys32_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
+       if (ia32_restore_sigcontext(regs, &frame->sc))
                goto badframe;
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "32bit sigreturn");
@@ -238,7 +236,6 @@ asmlinkage long sys32_rt_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe_ia32 __user *frame;
        sigset_t set;
-       unsigned int ax;
 
        frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
 
@@ -249,13 +246,13 @@ asmlinkage long sys32_rt_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+       if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
                goto badframe;
 
        if (compat_restore_altstack(&frame->uc.uc_stack))
                goto badframe;
 
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "32bit rt sigreturn");
index 156ebca..a821b1c 100644 (file)
 
        .section .entry.text, "ax"
 
-       .macro IA32_ARG_FIXUP noebp=0
-       movl    %edi,%r8d
-       .if \noebp
-       .else
-       movl    %ebp,%r9d
-       .endif
-       xchg    %ecx,%esi
-       movl    %ebx,%edi
-       movl    %edx,%edx       /* zero extension */
-       .endm 
-
-       /* clobbers %eax */     
-       .macro  CLEAR_RREGS offset=0, _r9=rax
+       /* clobbers %rax */
+       .macro  CLEAR_RREGS _r9=rax
        xorl    %eax,%eax
-       movq    %rax,\offset+R11(%rsp)
-       movq    %rax,\offset+R10(%rsp)
-       movq    %\_r9,\offset+R9(%rsp)
-       movq    %rax,\offset+R8(%rsp)
+       movq    %rax,R11(%rsp)
+       movq    %rax,R10(%rsp)
+       movq    %\_r9,R9(%rsp)
+       movq    %rax,R8(%rsp)
        .endm
 
        /*
         * If it's -1 to make us punt the syscall, then (u32)-1 is still
         * an appropriately invalid value.
         */
-       .macro LOAD_ARGS32 offset, _r9=0
+       .macro LOAD_ARGS32 _r9=0
        .if \_r9
-       movl \offset+16(%rsp),%r9d
+       movl R9(%rsp),%r9d
        .endif
-       movl \offset+40(%rsp),%ecx
-       movl \offset+48(%rsp),%edx
-       movl \offset+56(%rsp),%esi
-       movl \offset+64(%rsp),%edi
+       movl RCX(%rsp),%ecx
+       movl RDX(%rsp),%edx
+       movl RSI(%rsp),%esi
+       movl RDI(%rsp),%edi
        movl %eax,%eax                  /* zero extension */
        .endm
        
@@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit)
 /*
  * 32bit SYSENTER instruction entry.
  *
+ * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
+ * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old rip (!!!) and rflags.
+ *
  * Arguments:
- * %eax        System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp user stack
- * 0(%ebp) Arg6        
- *     
- * Interrupts off.
- *     
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  user stack
+ * 0(%ebp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. Set up a complete hardware stack frame to share code
+ * path below. We set up a complete hardware stack frame to share code
  * with the int 0x80 path.
- */    
+ */
 ENTRY(ia32_sysenter_target)
        CFI_STARTPROC32 simple
        CFI_SIGNAL_FRAME
        CFI_DEF_CFA     rsp,0
        CFI_REGISTER    rsp,rbp
-       SWAPGS_UNSAFE_STACK
-       movq    PER_CPU_VAR(kernel_stack), %rsp
-       addq    $(KERNEL_STACK_OFFSET),%rsp
+
        /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs, here we enable it straight after entry:
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
         */
+       SWAPGS_UNSAFE_STACK
+       movq    PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
        ENABLE_INTERRUPTS(CLBR_NONE)
-       movl    %ebp,%ebp               /* zero extension */
-       pushq_cfi $__USER32_DS
-       /*CFI_REL_OFFSET ss,0*/
-       pushq_cfi %rbp
-       CFI_REL_OFFSET rsp,0
-       pushfq_cfi
-       /*CFI_REL_OFFSET rflags,0*/
-       movl    TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
-       CFI_REGISTER rip,r10
-       pushq_cfi $__USER32_CS
-       /*CFI_REL_OFFSET cs,0*/
+
+       /* Zero-extending 32-bit regs, do not remove */
+       movl    %ebp, %ebp
        movl    %eax, %eax
-       pushq_cfi %r10
-       CFI_REL_OFFSET rip,0
-       pushq_cfi %rax
+
+       movl    ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
+       CFI_REGISTER rip,r10
+
+       /* Construct struct pt_regs on stack */
+       pushq_cfi       $__USER32_DS            /* pt_regs->ss */
+       pushq_cfi       %rbp                    /* pt_regs->sp */
+       CFI_REL_OFFSET  rsp,0
+       pushfq_cfi                              /* pt_regs->flags */
+       pushq_cfi       $__USER32_CS            /* pt_regs->cs */
+       pushq_cfi       %r10 /* pt_regs->ip = thread_info->sysenter_return */
+       CFI_REL_OFFSET  rip,0
+       pushq_cfi_reg   rax                     /* pt_regs->orig_ax */
+       pushq_cfi_reg   rdi                     /* pt_regs->di */
+       pushq_cfi_reg   rsi                     /* pt_regs->si */
+       pushq_cfi_reg   rdx                     /* pt_regs->dx */
+       pushq_cfi_reg   rcx                     /* pt_regs->cx */
+       pushq_cfi_reg   rax                     /* pt_regs->ax */
        cld
-       SAVE_ARGS 0,1,0
-       /* no need to do an access_ok check here because rbp has been
-          32bit zero extended */ 
+       sub     $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+       CFI_ADJUST_CFA_OFFSET 10*8
+
+       /*
+        * no need to do an access_ok check here because rbp has been
+        * 32bit zero extended
+        */
        ASM_STAC
 1:     movl    (%rbp),%ebp
        _ASM_EXTABLE(1b,ia32_badarg)
@@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target)
         * ourselves.  To save a few cycles, we can check whether
         * NT was set instead of doing an unconditional popfq.
         */
-       testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
+       testl $X86_EFLAGS_NT,EFLAGS(%rsp)
        jnz sysenter_fix_flags
 sysenter_flags_fixed:
 
-       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        CFI_REMEMBER_STATE
        jnz  sysenter_tracesys
        cmpq    $(IA32_NR_syscalls-1),%rax
        ja      ia32_badsys
 sysenter_do_call:
-       IA32_ARG_FIXUP
+       /* 32bit syscall -> 64bit C ABI argument conversion */
+       movl    %edi,%r8d       /* arg5 */
+       movl    %ebp,%r9d       /* arg6 */
+       xchg    %ecx,%esi       /* rsi:arg2, rcx:arg4 */
+       movl    %ebx,%edi       /* arg1 */
+       movl    %edx,%edx       /* arg3 (zero extension) */
 sysenter_dispatch:
        call    *ia32_sys_call_table(,%rax,8)
-       movq    %rax,RAX-ARGOFFSET(%rsp)
+       movq    %rax,RAX(%rsp)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl   $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl   $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz     sysexit_audit
 sysexit_from_sys_call:
-       andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       /* clear IF, that popfq doesn't enable interrupts early */
-       andl    $~0x200,EFLAGS-ARGOFFSET(%rsp)
-       movl    RIP-ARGOFFSET(%rsp),%edx                /* User %eip */
-       CFI_REGISTER rip,rdx
-       RESTORE_ARGS 0,24,0,0,0,0
+       /*
+        * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
+        * NMI between STI and SYSEXIT has poorly specified behavior,
+        * and and NMI followed by an IRQ with usergs is fatal.  So
+        * we just pretend we're using SYSEXIT but we really use
+        * SYSRETL instead.
+        *
+        * This code path is still called 'sysexit' because it pairs
+        * with 'sysenter' and it uses the SYSENTER calling convention.
+        */
+       andl    $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       movl    RIP(%rsp),%ecx          /* User %eip */
+       CFI_REGISTER rip,rcx
+       RESTORE_RSI_RDI
+       xorl    %edx,%edx               /* avoid info leaks */
        xorq    %r8,%r8
        xorq    %r9,%r9
        xorq    %r10,%r10
-       xorq    %r11,%r11
-       popfq_cfi
+       movl    EFLAGS(%rsp),%r11d      /* User eflags */
        /*CFI_RESTORE rflags*/
-       popq_cfi %rcx                           /* User %esp */
-       CFI_REGISTER rsp,rcx
        TRACE_IRQS_ON
-       ENABLE_INTERRUPTS_SYSEXIT32
+
+       /*
+        * SYSRETL works even on Intel CPUs.  Use it in preference to SYSEXIT,
+        * since it avoids a dicey window with interrupts enabled.
+        */
+       movl    RSP(%rsp),%esp
+
+       /*
+        * USERGS_SYSRET32 does:
+        *  gsbase = user's gs base
+        *  eip = ecx
+        *  rflags = r11
+        *  cs = __USER32_CS
+        *  ss = __USER_DS
+        *
+        * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
+        *
+        *  pop %ebp
+        *  pop %edx
+        *  pop %ecx
+        *
+        * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
+        * avoid info leaks.  R11 ends up with VDSO32_SYSENTER_RETURN's
+        * address (already known to user code), and R12-R15 are
+        * callee-saved and therefore don't contain any interesting
+        * kernel data.
+        */
+       USERGS_SYSRET32
 
        CFI_RESTORE_STATE
 
@@ -205,18 +247,18 @@ sysexit_from_sys_call:
        movl %ebx,%esi                  /* 2nd arg: 1st syscall arg */
        movl %eax,%edi                  /* 1st arg: syscall number */
        call __audit_syscall_entry
-       movl RAX-ARGOFFSET(%rsp),%eax   /* reload syscall number */
+       movl RAX(%rsp),%eax     /* reload syscall number */
        cmpq $(IA32_NR_syscalls-1),%rax
        ja ia32_badsys
        movl %ebx,%edi                  /* reload 1st syscall arg */
-       movl RCX-ARGOFFSET(%rsp),%esi   /* reload 2nd syscall arg */
-       movl RDX-ARGOFFSET(%rsp),%edx   /* reload 3rd syscall arg */
-       movl RSI-ARGOFFSET(%rsp),%ecx   /* reload 4th syscall arg */
-       movl RDI-ARGOFFSET(%rsp),%r8d   /* reload 5th syscall arg */
+       movl RCX(%rsp),%esi     /* reload 2nd syscall arg */
+       movl RDX(%rsp),%edx     /* reload 3rd syscall arg */
+       movl RSI(%rsp),%ecx     /* reload 4th syscall arg */
+       movl RDI(%rsp),%r8d     /* reload 5th syscall arg */
        .endm
 
        .macro auditsys_exit exit
-       testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz ia32_ret_from_sys_call
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
@@ -227,13 +269,13 @@ sysexit_from_sys_call:
 1:     setbe %al               /* 1 if error, 0 if not */
        movzbl %al,%edi         /* zero-extend that into %edi */
        call __audit_syscall_exit
-       movq RAX-ARGOFFSET(%rsp),%rax   /* reload syscall return value */
+       movq RAX(%rsp),%rax     /* reload syscall return value */
        movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jz \exit
-       CLEAR_RREGS -ARGOFFSET
+       CLEAR_RREGS
        jmp int_with_check
        .endm
 
@@ -253,16 +295,16 @@ sysenter_fix_flags:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl   $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl   $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jz      sysenter_auditsys
 #endif
-       SAVE_REST
+       SAVE_EXTRA_REGS
        CLEAR_RREGS
        movq    $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
        movq    %rsp,%rdi        /* &pt_regs -> arg1 */
        call    syscall_trace_enter
-       LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-       RESTORE_REST
+       LOAD_ARGS32  /* reload args from stack in case ptrace changed it */
+       RESTORE_EXTRA_REGS
        cmpq    $(IA32_NR_syscalls-1),%rax
        ja      int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
        jmp     sysenter_do_call
@@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target)
 /*
  * 32bit SYSCALL instruction entry.
  *
+ * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
+ * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
+ * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
+ *
  * Arguments:
- * %eax        System call number.
- * %ebx Arg1
- * %ecx return EIP 
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg2    [note: not saved in the stack frame, should not be touched]
- * %esp user stack 
- * 0(%esp) Arg6
- *     
- * Interrupts off.
- *     
+ * eax  system call number
+ * ecx  return address
+ * ebx  arg1
+ * ebp  arg2   (note: not saved in the stack frame, should not be touched)
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * esp  user stack
+ * 0(%esp) arg6
+ *
  * This is purely a fast path. For anything complicated we use the int 0x80
- * path below. Set up a complete hardware stack frame to share code
- * with the int 0x80 path.     
- */    
+ * path below. We set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
 ENTRY(ia32_cstar_target)
        CFI_STARTPROC32 simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
+       CFI_DEF_CFA     rsp,0
        CFI_REGISTER    rip,rcx
        /*CFI_REGISTER  rflags,r11*/
+
+       /*
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
+        */
        SWAPGS_UNSAFE_STACK
        movl    %esp,%r8d
        CFI_REGISTER    rsp,r8
        movq    PER_CPU_VAR(kernel_stack),%rsp
-       /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs and here we enable it straight after entry:
-        */
        ENABLE_INTERRUPTS(CLBR_NONE)
-       SAVE_ARGS 8,0,0
-       movl    %eax,%eax       /* zero extension */
-       movq    %rax,ORIG_RAX-ARGOFFSET(%rsp)
-       movq    %rcx,RIP-ARGOFFSET(%rsp)
-       CFI_REL_OFFSET rip,RIP-ARGOFFSET
-       movq    %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+
+       /* Zero-extending 32-bit regs, do not remove */
+       movl    %eax,%eax
+
+       /* Construct struct pt_regs on stack */
+       pushq_cfi       $__USER32_DS            /* pt_regs->ss */
+       pushq_cfi       %r8                     /* pt_regs->sp */
+       CFI_REL_OFFSET rsp,0
+       pushq_cfi       %r11                    /* pt_regs->flags */
+       pushq_cfi       $__USER32_CS            /* pt_regs->cs */
+       pushq_cfi       %rcx                    /* pt_regs->ip */
+       CFI_REL_OFFSET rip,0
+       pushq_cfi_reg   rax                     /* pt_regs->orig_ax */
+       pushq_cfi_reg   rdi                     /* pt_regs->di */
+       pushq_cfi_reg   rsi                     /* pt_regs->si */
+       pushq_cfi_reg   rdx                     /* pt_regs->dx */
+       pushq_cfi_reg   rbp                     /* pt_regs->cx */
        movl    %ebp,%ecx
-       movq    $__USER32_CS,CS-ARGOFFSET(%rsp)
-       movq    $__USER32_DS,SS-ARGOFFSET(%rsp)
-       movq    %r11,EFLAGS-ARGOFFSET(%rsp)
-       /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
-       movq    %r8,RSP-ARGOFFSET(%rsp) 
-       CFI_REL_OFFSET rsp,RSP-ARGOFFSET
-       /* no need to do an access_ok check here because r8 has been
-          32bit zero extended */ 
-       /* hardware stack frame is complete now */      
+       pushq_cfi_reg   rax                     /* pt_regs->ax */
+       sub     $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+       CFI_ADJUST_CFA_OFFSET 10*8
+
+       /*
+        * no need to do an access_ok check here because r8 has been
+        * 32bit zero extended
+        */
        ASM_STAC
 1:     movl    (%r8),%r9d
        _ASM_EXTABLE(1b,ia32_badarg)
        ASM_CLAC
-       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       orl     $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       testl   $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        CFI_REMEMBER_STATE
        jnz   cstar_tracesys
        cmpq $IA32_NR_syscalls-1,%rax
        ja  ia32_badsys
 cstar_do_call:
-       IA32_ARG_FIXUP 1
+       /* 32bit syscall -> 64bit C ABI argument conversion */
+       movl    %edi,%r8d       /* arg5 */
+       /* r9 already loaded */ /* arg6 */
+       xchg    %ecx,%esi       /* rsi:arg2, rcx:arg4 */
+       movl    %ebx,%edi       /* arg1 */
+       movl    %edx,%edx       /* arg3 (zero extension) */
 cstar_dispatch:
        call *ia32_sys_call_table(,%rax,8)
-       movq %rax,RAX-ARGOFFSET(%rsp)
+       movq %rax,RAX(%rsp)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz sysretl_audit
 sysretl_from_sys_call:
-       andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       RESTORE_ARGS 0,-ARG_SKIP,0,0,0
-       movl RIP-ARGOFFSET(%rsp),%ecx
+       andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       RESTORE_RSI_RDI_RDX
+       movl RIP(%rsp),%ecx
        CFI_REGISTER rip,rcx
-       movl EFLAGS-ARGOFFSET(%rsp),%r11d       
+       movl EFLAGS(%rsp),%r11d
        /*CFI_REGISTER rflags,r11*/
        xorq    %r10,%r10
        xorq    %r9,%r9
        xorq    %r8,%r8
        TRACE_IRQS_ON
-       movl RSP-ARGOFFSET(%rsp),%esp
+       movl RSP(%rsp),%esp
        CFI_RESTORE rsp
+       /*
+        * 64bit->32bit SYSRET restores eip from ecx,
+        * eflags from r11 (but RF and VM bits are forced to 0),
+        * cs and ss are loaded from MSRs.
+        * (Note: 32bit->32bit SYSRET is different: since r11
+        * does not exist, it merely sets eflags.IF=1).
+        */
        USERGS_SYSRET32
-       
+
 #ifdef CONFIG_AUDITSYSCALL
 cstar_auditsys:
        CFI_RESTORE_STATE
-       movl %r9d,R9-ARGOFFSET(%rsp)    /* register to be clobbered by call */
+       movl %r9d,R9(%rsp)      /* register to be clobbered by call */
        auditsys_entry_common
-       movl R9-ARGOFFSET(%rsp),%r9d    /* reload 6th syscall arg */
+       movl R9(%rsp),%r9d      /* reload 6th syscall arg */
        jmp cstar_dispatch
 
 sysretl_audit:
@@ -368,17 +444,17 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jz cstar_auditsys
 #endif
        xchgl %r9d,%ebp
-       SAVE_REST
-       CLEAR_RREGS 0, r9
+       SAVE_EXTRA_REGS
+       CLEAR_RREGS r9
        movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
        movq %rsp,%rdi        /* &pt_regs -> arg1 */
        call syscall_trace_enter
-       LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
-       RESTORE_REST
+       LOAD_ARGS32   /* reload args from stack in case ptrace changed it */
+       RESTORE_EXTRA_REGS
        xchgl %ebp,%r9d
        cmpq $(IA32_NR_syscalls-1),%rax
        ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@@ -391,78 +467,94 @@ ia32_badarg:
        jmp ia32_sysret
        CFI_ENDPROC
 
-/* 
- * Emulated IA32 system calls via int 0x80. 
+/*
+ * Emulated IA32 system calls via int 0x80.
  *
- * Arguments:   
- * %eax        System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg6    [note: not saved in the stack frame, should not be touched]
+ * Arguments:
+ * eax  system call number
+ * ebx  arg1
+ * ecx  arg2
+ * edx  arg3
+ * esi  arg4
+ * edi  arg5
+ * ebp  arg6   (note: not saved in the stack frame, should not be touched)
  *
  * Notes:
- * Uses the same stack frame as the x86-64 version.    
- * All registers except %eax must be saved (but ptrace may violate that)
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except eax must be saved (but ptrace may violate that).
  * Arguments are zero extended. For system calls that want sign extension and
  * take long arguments a wrapper is needed. Most calls can just be called
  * directly.
- * Assumes it is only called from user space and entered with interrupts off.  
- */                            
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
 
 ENTRY(ia32_syscall)
        CFI_STARTPROC32 simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,SS+8-RIP
-       /*CFI_REL_OFFSET        ss,SS-RIP*/
-       CFI_REL_OFFSET  rsp,RSP-RIP
-       /*CFI_REL_OFFSET        rflags,EFLAGS-RIP*/
-       /*CFI_REL_OFFSET        cs,CS-RIP*/
-       CFI_REL_OFFSET  rip,RIP-RIP
-       PARAVIRT_ADJUST_EXCEPTION_FRAME
-       SWAPGS
+       CFI_DEF_CFA     rsp,5*8
+       /*CFI_REL_OFFSET        ss,4*8 */
+       CFI_REL_OFFSET  rsp,3*8
+       /*CFI_REL_OFFSET        rflags,2*8 */
+       /*CFI_REL_OFFSET        cs,1*8 */
+       CFI_REL_OFFSET  rip,0*8
+
        /*
-        * No need to follow this irqs on/off section: the syscall
-        * disabled irqs and here we enable it straight after entry:
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
         */
+       PARAVIRT_ADJUST_EXCEPTION_FRAME
+       SWAPGS
        ENABLE_INTERRUPTS(CLBR_NONE)
-       movl %eax,%eax
-       pushq_cfi %rax
+
+       /* Zero-extending 32-bit regs, do not remove */
+       movl    %eax,%eax
+
+       /* Construct struct pt_regs on stack (iret frame is already on stack) */
+       pushq_cfi_reg   rax                     /* pt_regs->orig_ax */
+       pushq_cfi_reg   rdi                     /* pt_regs->di */
+       pushq_cfi_reg   rsi                     /* pt_regs->si */
+       pushq_cfi_reg   rdx                     /* pt_regs->dx */
+       pushq_cfi_reg   rcx                     /* pt_regs->cx */
+       pushq_cfi_reg   rax                     /* pt_regs->ax */
        cld
-       /* note the registers are not zero extended to the sf.
-          this could be a problem. */
-       SAVE_ARGS 0,1,0
-       orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       sub     $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+       CFI_ADJUST_CFA_OFFSET 10*8
+
+       orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+       testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz ia32_tracesys
        cmpq $(IA32_NR_syscalls-1),%rax
        ja ia32_badsys
 ia32_do_call:
-       IA32_ARG_FIXUP
+       /* 32bit syscall -> 64bit C ABI argument conversion */
+       movl %edi,%r8d  /* arg5 */
+       movl %ebp,%r9d  /* arg6 */
+       xchg %ecx,%esi  /* rsi:arg2, rcx:arg4 */
+       movl %ebx,%edi  /* arg1 */
+       movl %edx,%edx  /* arg3 (zero extension) */
        call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
 ia32_sysret:
-       movq %rax,RAX-ARGOFFSET(%rsp)
+       movq %rax,RAX(%rsp)
 ia32_ret_from_sys_call:
-       CLEAR_RREGS -ARGOFFSET
-       jmp int_ret_from_sys_call 
+       CLEAR_RREGS
+       jmp int_ret_from_sys_call
 
-ia32_tracesys:                  
-       SAVE_REST
+ia32_tracesys:
+       SAVE_EXTRA_REGS
        CLEAR_RREGS
        movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
        movq %rsp,%rdi        /* &pt_regs -> arg1 */
        call syscall_trace_enter
-       LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
-       RESTORE_REST
+       LOAD_ARGS32     /* reload args from stack in case ptrace changed it */
+       RESTORE_EXTRA_REGS
        cmpq $(IA32_NR_syscalls-1),%rax
        ja  int_ret_from_sys_call       /* ia32_tracesys has set RAX(%rsp) */
        jmp ia32_do_call
 END(ia32_syscall)
 
 ia32_badsys:
-       movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+       movq $0,ORIG_RAX(%rsp)
        movq $-ENOSYS,%rax
        jmp ia32_sysret
 
@@ -479,8 +571,6 @@ GLOBAL(\label)
 
        PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
        PTREGSCALL stub32_sigreturn, sys32_sigreturn
-       PTREGSCALL stub32_execve, compat_sys_execve
-       PTREGSCALL stub32_execveat, compat_sys_execveat
        PTREGSCALL stub32_fork, sys_fork
        PTREGSCALL stub32_vfork, sys_vfork
 
@@ -492,24 +582,23 @@ GLOBAL(stub32_clone)
 
        ALIGN
 ia32_ptregs_common:
-       popq %r11
        CFI_ENDPROC
        CFI_STARTPROC32 simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,SS+8-ARGOFFSET
-       CFI_REL_OFFSET  rax,RAX-ARGOFFSET
-       CFI_REL_OFFSET  rcx,RCX-ARGOFFSET
-       CFI_REL_OFFSET  rdx,RDX-ARGOFFSET
-       CFI_REL_OFFSET  rsi,RSI-ARGOFFSET
-       CFI_REL_OFFSET  rdi,RDI-ARGOFFSET
-       CFI_REL_OFFSET  rip,RIP-ARGOFFSET
-/*     CFI_REL_OFFSET  cs,CS-ARGOFFSET*/
-/*     CFI_REL_OFFSET  rflags,EFLAGS-ARGOFFSET*/
-       CFI_REL_OFFSET  rsp,RSP-ARGOFFSET
-/*     CFI_REL_OFFSET  ss,SS-ARGOFFSET*/
-       SAVE_REST
+       CFI_DEF_CFA     rsp,SIZEOF_PTREGS
+       CFI_REL_OFFSET  rax,RAX
+       CFI_REL_OFFSET  rcx,RCX
+       CFI_REL_OFFSET  rdx,RDX
+       CFI_REL_OFFSET  rsi,RSI
+       CFI_REL_OFFSET  rdi,RDI
+       CFI_REL_OFFSET  rip,RIP
+/*     CFI_REL_OFFSET  cs,CS*/
+/*     CFI_REL_OFFSET  rflags,EFLAGS*/
+       CFI_REL_OFFSET  rsp,RSP
+/*     CFI_REL_OFFSET  ss,SS*/
+       SAVE_EXTRA_REGS 8
        call *%rax
-       RESTORE_REST
-       jmp  ia32_sysret        /* misbalances the return cache */
+       RESTORE_EXTRA_REGS 8
+       ret
        CFI_ENDPROC
 END(ia32_ptregs_common)
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c
deleted file mode 100644 (file)
index 51ecd5b..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/errno.h>
-
-long compat_ni_syscall(void)
-{
-       return -ENOSYS;
-}
index 8e0ceec..719cd70 100644 (file)
@@ -201,20 +201,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
                                advice);
 }
 
-long sys32_vm86_warning(void)
-{
-       struct task_struct *me = current;
-       static char lastcomm[sizeof(me->comm)];
-
-       if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
-               compat_printk(KERN_INFO
-                             "%s: vm86 mode not supported on 64 bit kernel\n",
-                             me->comm);
-               strncpy(lastcomm, me->comm, sizeof(lastcomm));
-       }
-       return -ENOSYS;
-}
-
 asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
                                   size_t count)
 {
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c
deleted file mode 100644 (file)
index 4754ba0..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-/* System call table for ia32 emulation. */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <asm/asm-offsets.h>
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
-#include <asm/syscalls_32.h>
-#undef __SYSCALL_I386
-
-#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
-
-typedef void (*sys_call_ptr_t)(void);
-
-extern void compat_ni_syscall(void);
-
-const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
-       /*
-        * Smells like a compiler bug -- it doesn't work
-        * when the & below is removed.
-        */
-       [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
-#include <asm/syscalls_32.h>
-};
index 372231c..bdf02ee 100644 (file)
        .endm
 #endif
 
-.macro altinstruction_entry orig alt feature orig_len alt_len
+.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
        .long \orig - .
        .long \alt - .
        .word \feature
        .byte \orig_len
        .byte \alt_len
+       .byte \pad_len
+.endm
+
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140:
+       \oldinstr
+141:
+       .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+142:
+
+       .pushsection .altinstructions,"a"
+       altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+       .popsection
+
+       .pushsection .altinstr_replacement,"ax"
+143:
+       \newinstr
+144:
+       .popsection
+.endm
+
+#define old_len                        141b-140b
+#define new_len1               144f-143f
+#define new_len2               145f-144f
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b)    ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140:
+       \oldinstr
+141:
+       .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+               (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+142:
+
+       .pushsection .altinstructions,"a"
+       altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+       altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+       .popsection
+
+       .pushsection .altinstr_replacement,"ax"
+143:
+       \newinstr1
+144:
+       \newinstr2
+145:
+       .popsection
 .endm
 
 #endif  /*  __ASSEMBLY__  */
index 473bdbe..ba32af0 100644 (file)
@@ -48,8 +48,9 @@ struct alt_instr {
        s32 repl_offset;        /* offset to replacement instruction */
        u16 cpuid;              /* cpuid bit set for replacement */
        u8  instrlen;           /* length of original instruction */
-       u8  replacementlen;     /* length of new instruction, <= instrlen */
-};
+       u8  replacementlen;     /* length of new instruction */
+       u8  padlen;             /* length of build-time padding */
+} __packed;
 
 extern void alternative_instructions(void);
 extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
 }
 #endif /* CONFIG_SMP */
 
-#define OLDINSTR(oldinstr)     "661:\n\t" oldinstr "\n662:\n"
+#define b_replacement(num)     "664"#num
+#define e_replacement(num)     "665"#num
 
-#define b_replacement(number)  "663"#number
-#define e_replacement(number)  "664"#number
+#define alt_end_marker         "663"
+#define alt_slen               "662b-661b"
+#define alt_pad_len            alt_end_marker"b-662b"
+#define alt_total_slen         alt_end_marker"b-661b"
+#define alt_rlen(num)          e_replacement(num)"f-"b_replacement(num)"f"
 
-#define alt_slen "662b-661b"
-#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+#define __OLDINSTR(oldinstr, num)                                      \
+       "661:\n\t" oldinstr "\n662:\n"                                  \
+       ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * "          \
+               "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
 
-#define ALTINSTR_ENTRY(feature, number)                                              \
+#define OLDINSTR(oldinstr, num)                                                \
+       __OLDINSTR(oldinstr, num)                                       \
+       alt_end_marker ":\n"
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas works with s32s.
+ */
+#define alt_max_short(a, b)    "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+
+/*
+ * Pad the second replacement alternative with additional NOPs if it is
+ * additionally longer than the first replacement alternative.
+ */
+#define OLDINSTR_2(oldinstr, num1, num2) \
+       "661:\n\t" oldinstr "\n662:\n"                                                          \
+       ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * "  \
+               "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n"  \
+       alt_end_marker ":\n"
+
+#define ALTINSTR_ENTRY(feature, num)                                         \
        " .long 661b - .\n"                             /* label           */ \
-       " .long " b_replacement(number)"f - .\n"        /* new instruction */ \
+       " .long " b_replacement(num)"f - .\n"           /* new instruction */ \
        " .word " __stringify(feature) "\n"             /* feature bit     */ \
-       " .byte " alt_slen "\n"                         /* source len      */ \
-       " .byte " alt_rlen(number) "\n"                 /* replacement len */
-
-#define DISCARD_ENTRY(number)                          /* rlen <= slen */    \
-       " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+       " .byte " alt_total_slen "\n"                   /* source len      */ \
+       " .byte " alt_rlen(num) "\n"                    /* replacement len */ \
+       " .byte " alt_pad_len "\n"                      /* pad len */
 
-#define ALTINSTR_REPLACEMENT(newinstr, feature, number)        /* replacement */     \
-       b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+#define ALTINSTR_REPLACEMENT(newinstr, feature, num)   /* replacement */     \
+       b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
 
 /* alternative assembly primitive: */
 #define ALTERNATIVE(oldinstr, newinstr, feature)                       \
-       OLDINSTR(oldinstr)                                              \
+       OLDINSTR(oldinstr, 1)                                           \
        ".pushsection .altinstructions,\"a\"\n"                         \
        ALTINSTR_ENTRY(feature, 1)                                      \
        ".popsection\n"                                                 \
-       ".pushsection .discard,\"aw\",@progbits\n"                      \
-       DISCARD_ENTRY(1)                                                \
-       ".popsection\n"                                                 \
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
        ALTINSTR_REPLACEMENT(newinstr, feature, 1)                      \
        ".popsection"
 
 #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
-       OLDINSTR(oldinstr)                                              \
+       OLDINSTR_2(oldinstr, 1, 2)                                      \
        ".pushsection .altinstructions,\"a\"\n"                         \
        ALTINSTR_ENTRY(feature1, 1)                                     \
        ALTINSTR_ENTRY(feature2, 2)                                     \
        ".popsection\n"                                                 \
-       ".pushsection .discard,\"aw\",@progbits\n"                      \
-       DISCARD_ENTRY(1)                                                \
-       DISCARD_ENTRY(2)                                                \
-       ".popsection\n"                                                 \
        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
        ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)                    \
        ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)                    \
@@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define alternative(oldinstr, newinstr, feature)                       \
        asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
 
+#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+       asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+
 /*
  * Alternative inline assembly with input.
  *
index efc3b22..976b86a 100644 (file)
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
 {
        volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
 
-       alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
+       alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
                       ASM_OUTPUT2("=r" (v), "=m" (*addr)),
                       ASM_OUTPUT2("0" (v), "m" (*addr)));
 }
@@ -204,7 +204,6 @@ extern void clear_local_APIC(void);
 extern void disconnect_bsp_APIC(int virt_wire_setup);
 extern void disable_local_APIC(void);
 extern void lapic_shutdown(void);
-extern int verify_local_APIC(void);
 extern void sync_Arb_IDs(void);
 extern void init_bsp_APIC(void);
 extern void setup_local_APIC(void);
index 2ab1eb3..959e45b 100644 (file)
@@ -95,13 +95,11 @@ do {                                                                        \
  * Stop RDTSC speculation. This is needed when you need to use RDTSC
  * (or get_cycles or vread that possibly accesses the TSC) in a defined
  * code region.
- *
- * (Could use an alternative three way for this if there was one.)
  */
 static __always_inline void rdtsc_barrier(void)
 {
-       alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-       alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+       alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+                         "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
 #endif /* _ASM_X86_BARRIER_H */
index 1f1297b..1c8b50e 100644 (file)
@@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
  * for assembly code:
  */
 
-#define R15              0
-#define R14              8
-#define R13             16
-#define R12             24
-#define RBP             32
-#define RBX             40
-
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11             48
-#define R10             56
-#define R9              64
-#define R8              72
-#define RAX             80
-#define RCX             88
-#define RDX             96
-#define RSI            104
-#define RDI            112
-#define ORIG_RAX       120       /* + error_code */
-/* end of arguments */
-
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP            128
-#define CS             136
-#define EFLAGS         144
-#define RSP            152
-#define SS             160
-
-#define ARGOFFSET      R11
-
-       .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
-       subq  $9*8+\addskip, %rsp
-       CFI_ADJUST_CFA_OFFSET   9*8+\addskip
-       movq_cfi rdi, 8*8
-       movq_cfi rsi, 7*8
-       movq_cfi rdx, 6*8
-
-       .if \save_rcx
-       movq_cfi rcx, 5*8
-       .endif
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15            0*8
+#define R14            1*8
+#define R13            2*8
+#define R12            3*8
+#define RBP            4*8
+#define RBX            5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11            6*8
+#define R10            7*8
+#define R9             8*8
+#define R8             9*8
+#define RAX            10*8
+#define RCX            11*8
+#define RDX            12*8
+#define RSI            13*8
+#define RDI            14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX       15*8
+/* Return frame for iretq */
+#define RIP            16*8
+#define CS             17*8
+#define EFLAGS         18*8
+#define RSP            19*8
+#define SS             20*8
+
+#define SIZEOF_PTREGS  21*8
+
+       .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+       subq    $15*8+\addskip, %rsp
+       CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+       .endm
 
-       .if \rax_enosys
-       movq $-ENOSYS, 4*8(%rsp)
-       .else
-       movq_cfi rax, 4*8
+       .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
+       .if \r11
+       movq_cfi r11, 6*8+\offset
        .endif
-
-       .if \save_r891011
-       movq_cfi r8,  3*8
-       movq_cfi r9,  2*8
-       movq_cfi r10, 1*8
-       movq_cfi r11, 0*8
+       .if \r8910
+       movq_cfi r10, 7*8+\offset
+       movq_cfi r9,  8*8+\offset
+       movq_cfi r8,  9*8+\offset
+       .endif
+       .if \rax
+       movq_cfi rax, 10*8+\offset
+       .endif
+       .if \rcx
+       movq_cfi rcx, 11*8+\offset
        .endif
+       movq_cfi rdx, 12*8+\offset
+       movq_cfi rsi, 13*8+\offset
+       movq_cfi rdi, 14*8+\offset
+       .endm
+       .macro SAVE_C_REGS offset=0
+       SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
+       .endm
+       .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+       SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
+       .endm
+       .macro SAVE_C_REGS_EXCEPT_R891011
+       SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
+       .endm
+       .macro SAVE_C_REGS_EXCEPT_RCX_R891011
+       SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
+       .endm
+       .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
+       SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
+       .endm
+
+       .macro SAVE_EXTRA_REGS offset=0
+       movq_cfi r15, 0*8+\offset
+       movq_cfi r14, 1*8+\offset
+       movq_cfi r13, 2*8+\offset
+       movq_cfi r12, 3*8+\offset
+       movq_cfi rbp, 4*8+\offset
+       movq_cfi rbx, 5*8+\offset
+       .endm
+       .macro SAVE_EXTRA_REGS_RBP offset=0
+       movq_cfi rbp, 4*8+\offset
+       .endm
 
+       .macro RESTORE_EXTRA_REGS offset=0
+       movq_cfi_restore 0*8+\offset, r15
+       movq_cfi_restore 1*8+\offset, r14
+       movq_cfi_restore 2*8+\offset, r13
+       movq_cfi_restore 3*8+\offset, r12
+       movq_cfi_restore 4*8+\offset, rbp
+       movq_cfi_restore 5*8+\offset, rbx
        .endm
 
-#define ARG_SKIP       (9*8)
+       .macro ZERO_EXTRA_REGS
+       xorl    %r15d, %r15d
+       xorl    %r14d, %r14d
+       xorl    %r13d, %r13d
+       xorl    %r12d, %r12d
+       xorl    %ebp, %ebp
+       xorl    %ebx, %ebx
+       .endm
 
-       .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
-                           rstor_r8910=1, rstor_rdx=1
+       .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
        .if \rstor_r11
-       movq_cfi_restore 0*8, r11
+       movq_cfi_restore 6*8, r11
        .endif
-
        .if \rstor_r8910
-       movq_cfi_restore 1*8, r10
-       movq_cfi_restore 2*8, r9
-       movq_cfi_restore 3*8, r8
+       movq_cfi_restore 7*8, r10
+       movq_cfi_restore 8*8, r9
+       movq_cfi_restore 9*8, r8
        .endif
-
        .if \rstor_rax
-       movq_cfi_restore 4*8, rax
+       movq_cfi_restore 10*8, rax
        .endif
-
        .if \rstor_rcx
-       movq_cfi_restore 5*8, rcx
+       movq_cfi_restore 11*8, rcx
        .endif
-
        .if \rstor_rdx
-       movq_cfi_restore 6*8, rdx
-       .endif
-
-       movq_cfi_restore 7*8, rsi
-       movq_cfi_restore 8*8, rdi
-
-       .if ARG_SKIP+\addskip > 0
-       addq $ARG_SKIP+\addskip, %rsp
-       CFI_ADJUST_CFA_OFFSET   -(ARG_SKIP+\addskip)
+       movq_cfi_restore 12*8, rdx
        .endif
+       movq_cfi_restore 13*8, rsi
+       movq_cfi_restore 14*8, rdi
        .endm
-
-       .macro LOAD_ARGS offset, skiprax=0
-       movq \offset(%rsp),    %r11
-       movq \offset+8(%rsp),  %r10
-       movq \offset+16(%rsp), %r9
-       movq \offset+24(%rsp), %r8
-       movq \offset+40(%rsp), %rcx
-       movq \offset+48(%rsp), %rdx
-       movq \offset+56(%rsp), %rsi
-       movq \offset+64(%rsp), %rdi
-       .if \skiprax
-       .else
-       movq \offset+72(%rsp), %rax
-       .endif
+       .macro RESTORE_C_REGS
+       RESTORE_C_REGS_HELPER 1,1,1,1,1
        .endm
-
-#define REST_SKIP      (6*8)
-
-       .macro SAVE_REST
-       subq $REST_SKIP, %rsp
-       CFI_ADJUST_CFA_OFFSET   REST_SKIP
-       movq_cfi rbx, 5*8
-       movq_cfi rbp, 4*8
-       movq_cfi r12, 3*8
-       movq_cfi r13, 2*8
-       movq_cfi r14, 1*8
-       movq_cfi r15, 0*8
+       .macro RESTORE_C_REGS_EXCEPT_RAX
+       RESTORE_C_REGS_HELPER 0,1,1,1,1
        .endm
-
-       .macro RESTORE_REST
-       movq_cfi_restore 0*8, r15
-       movq_cfi_restore 1*8, r14
-       movq_cfi_restore 2*8, r13
-       movq_cfi_restore 3*8, r12
-       movq_cfi_restore 4*8, rbp
-       movq_cfi_restore 5*8, rbx
-       addq $REST_SKIP, %rsp
-       CFI_ADJUST_CFA_OFFSET   -(REST_SKIP)
+       .macro RESTORE_C_REGS_EXCEPT_RCX
+       RESTORE_C_REGS_HELPER 1,0,1,1,1
        .endm
-
-       .macro SAVE_ALL
-       SAVE_ARGS
-       SAVE_REST
+       .macro RESTORE_C_REGS_EXCEPT_R11
+       RESTORE_C_REGS_HELPER 1,1,0,1,1
+       .endm
+       .macro RESTORE_C_REGS_EXCEPT_RCX_R11
+       RESTORE_C_REGS_HELPER 1,0,0,1,1
+       .endm
+       .macro RESTORE_RSI_RDI
+       RESTORE_C_REGS_HELPER 0,0,0,0,0
+       .endm
+       .macro RESTORE_RSI_RDI_RDX
+       RESTORE_C_REGS_HELPER 0,0,0,0,1
        .endm
 
-       .macro RESTORE_ALL addskip=0
-       RESTORE_REST
-       RESTORE_ARGS 1, \addskip
+       .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+       addq $15*8+\addskip, %rsp
+       CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
        .endm
 
        .macro icebp
@@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
  */
 
        .macro SAVE_ALL
-       pushl_cfi %eax
-       CFI_REL_OFFSET eax, 0
-       pushl_cfi %ebp
-       CFI_REL_OFFSET ebp, 0
-       pushl_cfi %edi
-       CFI_REL_OFFSET edi, 0
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %edx
-       CFI_REL_OFFSET edx, 0
-       pushl_cfi %ecx
-       CFI_REL_OFFSET ecx, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg eax
+       pushl_cfi_reg ebp
+       pushl_cfi_reg edi
+       pushl_cfi_reg esi
+       pushl_cfi_reg edx
+       pushl_cfi_reg ecx
+       pushl_cfi_reg ebx
        .endm
 
        .macro RESTORE_ALL
-       popl_cfi %ebx
-       CFI_RESTORE ebx
-       popl_cfi %ecx
-       CFI_RESTORE ecx
-       popl_cfi %edx
-       CFI_RESTORE edx
-       popl_cfi %esi
-       CFI_RESTORE esi
-       popl_cfi %edi
-       CFI_RESTORE edi
-       popl_cfi %ebp
-       CFI_RESTORE ebp
-       popl_cfi %eax
-       CFI_RESTORE eax
+       popl_cfi_reg ebx
+       popl_cfi_reg ecx
+       popl_cfi_reg edx
+       popl_cfi_reg esi
+       popl_cfi_reg edi
+       popl_cfi_reg ebp
+       popl_cfi_reg eax
        .endm
 
 #endif /* CONFIG_X86_64 */
index 59c6c40..acdee09 100644 (file)
@@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
                sp = task_pt_regs(current)->sp;
        } else {
                /* -128 for the x32 ABI redzone */
-               sp = this_cpu_read(old_rsp) - 128;
+               sp = task_pt_regs(current)->sp - 128;
        }
 
        return (void __user *)round_down(sp - len, 16);
index 90a5485..854c04b 100644 (file)
 #define X86_FEATURE_RDSEED     ( 9*32+18) /* The RDSEED instruction */
 #define X86_FEATURE_ADX                ( 9*32+19) /* The ADCX and ADOX instructions */
 #define X86_FEATURE_SMAP       ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT    ( 9*32+22) /* PCOMMIT instruction */
 #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB       ( 9*32+24) /* CLWB instruction */
 #define X86_FEATURE_AVX512PF   ( 9*32+26) /* AVX-512 Prefetch */
 #define X86_FEATURE_AVX512ER   ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
 #define X86_FEATURE_AVX512CD   ( 9*32+28) /* AVX-512 Conflict Detection */
@@ -418,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
                         " .word %P0\n"         /* 1: do replace */
                         " .byte 2b - 1b\n"     /* source len */
                         " .byte 0\n"           /* replacement len */
+                        " .byte 0\n"           /* pad len */
                         ".previous\n"
                         /* skipping size check since replacement size = 0 */
                         : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@@ -432,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
                         " .word %P0\n"         /* feature bit */
                         " .byte 2b - 1b\n"     /* source len */
                         " .byte 0\n"           /* replacement len */
+                        " .byte 0\n"           /* pad len */
                         ".previous\n"
                         /* skipping size check since replacement size = 0 */
                         : : "i" (bit) : : t_no);
@@ -457,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
                             " .word %P1\n"             /* feature bit */
                             " .byte 2b - 1b\n"         /* source len */
                             " .byte 4f - 3f\n"         /* replacement len */
+                            " .byte 0\n"               /* pad len */
                             ".previous\n"
                             ".section .discard,\"aw\",@progbits\n"
                             " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -483,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
 static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
 {
 #ifdef CC_HAVE_ASM_GOTO
-/*
- * We need to spell the jumps to the compiler because, depending on the offset,
- * the replacement jump can be bigger than the original jump, and this we cannot
- * have. Thus, we force the jump to the widest, 4-byte, signed relative
- * offset even though the last would often fit in less bytes.
- */
-               asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+               asm_volatile_goto("1: jmp %l[t_dynamic]\n"
                         "2:\n"
+                        ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+                                "((5f-4f) - (2b-1b)),0x90\n"
+                        "3:\n"
                         ".section .altinstructions,\"a\"\n"
                         " .long 1b - .\n"              /* src offset */
-                        " .long 3f - .\n"              /* repl offset */
+                        " .long 4f - .\n"              /* repl offset */
                         " .word %P1\n"                 /* always replace */
-                        " .byte 2b - 1b\n"             /* src len */
-                        " .byte 4f - 3f\n"             /* repl len */
+                        " .byte 3b - 1b\n"             /* src len */
+                        " .byte 5f - 4f\n"             /* repl len */
+                        " .byte 3b - 2b\n"             /* pad len */
                         ".previous\n"
                         ".section .altinstr_replacement,\"ax\"\n"
-                        "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
-                        "4:\n"
+                        "4: jmp %l[t_no]\n"
+                        "5:\n"
                         ".previous\n"
                         ".section .altinstructions,\"a\"\n"
                         " .long 1b - .\n"              /* src offset */
                         " .long 0\n"                   /* no replacement */
                         " .word %P0\n"                 /* feature bit */
-                        " .byte 2b - 1b\n"             /* src len */
+                        " .byte 3b - 1b\n"             /* src len */
                         " .byte 0\n"                   /* repl len */
+                        " .byte 0\n"                   /* pad len */
                         ".previous\n"
                         : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
                         : : t_dynamic, t_no);
@@ -527,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
                             " .word %P2\n"             /* always replace */
                             " .byte 2b - 1b\n"         /* source len */
                             " .byte 4f - 3f\n"         /* replacement len */
+                            " .byte 0\n"               /* pad len */
                             ".previous\n"
                             ".section .discard,\"aw\",@progbits\n"
                             " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -541,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
                             " .word %P1\n"             /* feature bit */
                             " .byte 4b - 3b\n"         /* src len */
                             " .byte 6f - 5f\n"         /* repl len */
+                            " .byte 0\n"               /* pad len */
                             ".previous\n"
                             ".section .discard,\"aw\",@progbits\n"
                             " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
index a94b82e..a0bf89f 100644 (file)
@@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
  * Pentium F0 0F bugfix can have resulted in the mapped
  * IDT being write-protected.
  */
-#define set_intr_gate(n, addr)                                         \
+#define set_intr_gate_notrace(n, addr)                                 \
        do {                                                            \
                BUG_ON((unsigned)n > 0xFF);                             \
                _set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0,        \
                          __KERNEL_CS);                                 \
+       } while (0)
+
+#define set_intr_gate(n, addr)                                         \
+       do {                                                            \
+               set_intr_gate_notrace(n, addr);                         \
                _trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
                                0, 0, __KERNEL_CS);                     \
        } while (0)
index f6f1598..de1cdaf 100644 (file)
        CFI_ADJUST_CFA_OFFSET 8
        .endm
 
+       .macro pushq_cfi_reg reg
+       pushq %\reg
+       CFI_ADJUST_CFA_OFFSET 8
+       CFI_REL_OFFSET \reg, 0
+       .endm
+
        .macro popq_cfi reg
        popq \reg
        CFI_ADJUST_CFA_OFFSET -8
        .endm
 
+       .macro popq_cfi_reg reg
+       popq %\reg
+       CFI_ADJUST_CFA_OFFSET -8
+       CFI_RESTORE \reg
+       .endm
+
        .macro pushfq_cfi
        pushfq
        CFI_ADJUST_CFA_OFFSET 8
        CFI_ADJUST_CFA_OFFSET 4
        .endm
 
+       .macro pushl_cfi_reg reg
+       pushl %\reg
+       CFI_ADJUST_CFA_OFFSET 4
+       CFI_REL_OFFSET \reg, 0
+       .endm
+
        .macro popl_cfi reg
        popl \reg
        CFI_ADJUST_CFA_OFFSET -4
        .endm
 
+       .macro popl_cfi_reg reg
+       popl %\reg
+       CFI_ADJUST_CFA_OFFSET -4
+       CFI_RESTORE \reg
+       .endm
+
        .macro pushfl_cfi
        pushfl
        CFI_ADJUST_CFA_OFFSET 4
index 25bce45..3738b13 100644 (file)
@@ -2,6 +2,8 @@
 #define _ASM_X86_EFI_H
 
 #include <asm/i387.h>
+#include <asm/pgtable.h>
+
 /*
  * We map the EFI regions needed for runtime services non-contiguously,
  * with preserved alignment on virtual addresses starting from -4G down
@@ -89,8 +91,8 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
 extern struct efi_scratch efi_scratch;
 extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
 extern int __init efi_memblock_x86_reserve_range(void);
-extern void __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(void);
+extern pgd_t * __init efi_call_phys_prolog(void);
+extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
 extern void __init efi_unmap_memmap(void);
 extern void __init efi_memory_uc(u64 addr, unsigned long size);
 extern void __init efi_map_region(efi_memory_desc_t *md);
index ca3347a..3563107 100644 (file)
@@ -171,10 +171,11 @@ do {                                              \
 static inline void elf_common_init(struct thread_struct *t,
                                   struct pt_regs *regs, const u16 ds)
 {
-       regs->ax = regs->bx = regs->cx = regs->dx = 0;
-       regs->si = regs->di = regs->bp = 0;
+       /* Commented-out registers are cleared in stub_execve */
+       /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
+       regs->si = regs->di /*= regs->bp*/ = 0;
        regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
-       regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
+       /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
        t->fs = t->gs = 0;
        t->fsindex = t->gsindex = 0;
        t->ds = t->es = ds;
index 0dbc082..72ba21a 100644 (file)
@@ -370,7 +370,7 @@ static inline void drop_fpu(struct task_struct *tsk)
        preempt_disable();
        tsk->thread.fpu_counter = 0;
        __drop_fpu(tsk);
-       clear_used_math();
+       clear_stopped_child_used_math(tsk);
        preempt_enable();
 }
 
index 9662290..e9571dd 100644 (file)
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
-                                   - FIRST_EXTERNAL_VECTOR])(void);
+extern char irq_entries_start[];
 #ifdef CONFIG_TRACING
-#define trace_interrupt interrupt
+#define trace_irq_entries_start irq_entries_start
 #endif
 
 #define VECTOR_UNDEFINED       (-1)
index 47f29b1..e7814b7 100644 (file)
@@ -69,7 +69,7 @@ struct insn {
        const insn_byte_t *next_byte;
 };
 
-#define MAX_INSN_SIZE  16
+#define MAX_INSN_SIZE  15
 
 #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
 #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
index f42a047..e37d6b3 100644 (file)
@@ -79,11 +79,12 @@ struct iommu_table_entry {
  *  d). Similar to the 'init', except that this gets called from pci_iommu_init
  *      where we do have a memory allocator.
  *
- * The standard vs the _FINISH differs in that the _FINISH variant will
- * continue detecting other IOMMUs in the call list after the
- * the detection routine returns a positive number. The _FINISH will
- * stop the execution chain. Both will still call the 'init' and
- * 'late_init' functions if they are set.
+ * The standard IOMMU_INIT differs from the IOMMU_INIT_FINISH variant
+ * in that the former will continue detecting other IOMMUs in the call
+ * list after the detection routine returns a positive number, while the
+ * latter will stop the execution chain upon first successful detection.
+ * Both variants will still call the 'init' and 'late_init' functions if
+ * they are set.
  */
 #define IOMMU_INIT_FINISH(_detect, _depend, _init, _late_init)         \
        __IOMMU_INIT(_detect, _depend, _init, _late_init, 1)
index 0a8b519..b77f5ed 100644 (file)
@@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
 #define USERGS_SYSRET32                                \
        swapgs;                                 \
        sysretl
-#define ENABLE_INTERRUPTS_SYSEXIT32            \
-       swapgs;                                 \
-       sti;                                    \
-       sysexit
 
 #else
 #define INTERRUPT_RETURN               iret
@@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void)
 
        return arch_irqs_disabled_flags(flags);
 }
+#endif /* !__ASSEMBLY__ */
 
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_TRACE_IRQFLAGS
+#  define TRACE_IRQS_ON                call trace_hardirqs_on_thunk;
+#  define TRACE_IRQS_OFF       call trace_hardirqs_off_thunk;
 #else
-
-#ifdef CONFIG_X86_64
-#define ARCH_LOCKDEP_SYS_EXIT          call lockdep_sys_exit_thunk
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ      \
+#  define TRACE_IRQS_ON
+#  define TRACE_IRQS_OFF
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#  ifdef CONFIG_X86_64
+#    define LOCKDEP_SYS_EXIT           call lockdep_sys_exit_thunk
+#    define LOCKDEP_SYS_EXIT_IRQ \
        TRACE_IRQS_ON; \
        sti; \
-       SAVE_REST; \
-       LOCKDEP_SYS_EXIT; \
-       RESTORE_REST; \
+       call lockdep_sys_exit_thunk; \
        cli; \
        TRACE_IRQS_OFF;
-
-#else
-#define ARCH_LOCKDEP_SYS_EXIT                  \
+#  else
+#    define LOCKDEP_SYS_EXIT \
        pushl %eax;                             \
        pushl %ecx;                             \
        pushl %edx;                             \
@@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void)
        popl %edx;                              \
        popl %ecx;                              \
        popl %eax;
-
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-#  define TRACE_IRQS_ON                call trace_hardirqs_on_thunk;
-#  define TRACE_IRQS_OFF       call trace_hardirqs_off_thunk;
+#    define LOCKDEP_SYS_EXIT_IRQ
+#  endif
 #else
-#  define TRACE_IRQS_ON
-#  define TRACE_IRQS_OFF
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-#  define LOCKDEP_SYS_EXIT     ARCH_LOCKDEP_SYS_EXIT
-#  define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
-# else
 #  define LOCKDEP_SYS_EXIT
 #  define LOCKDEP_SYS_EXIT_IRQ
-# endif
-
+#endif
 #endif /* __ASSEMBLY__ */
+
 #endif
index 6a2cefb..a4c1cf7 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _ASM_X86_JUMP_LABEL_H
 #define _ASM_X86_JUMP_LABEL_H
 
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
 
 #include <linux/stringify.h>
 #include <linux/types.h>
@@ -30,8 +30,6 @@ l_yes:
        return true;
 }
 
-#endif /* __KERNEL__ */
-
 #ifdef CONFIG_X86_64
 typedef u64 jump_label_t;
 #else
@@ -44,4 +42,5 @@ struct jump_entry {
        jump_label_t key;
 };
 
+#endif  /* __ASSEMBLY__ */
 #endif
index a236e39..dea2e7e 100644 (file)
@@ -81,11 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
                (base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 }
 
-#define SELECTOR_TI_MASK (1 << 2)
-#define SELECTOR_RPL_MASK 0x03
-
-#define IOPL_SHIFT 12
-
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64
 #define KVM_MMU_HASH_SHIFT 10
@@ -345,6 +340,7 @@ struct kvm_pmu {
 enum {
        KVM_DEBUGREG_BP_ENABLED = 1,
        KVM_DEBUGREG_WONT_EXIT = 2,
+       KVM_DEBUGREG_RELOAD = 4,
 };
 
 struct kvm_vcpu_arch {
@@ -431,6 +427,9 @@ struct kvm_vcpu_arch {
 
        int cpuid_nent;
        struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+       int maxphyaddr;
+
        /* emulate context */
 
        struct x86_emulate_ctxt emulate_ctxt;
@@ -550,11 +549,20 @@ struct kvm_arch_memory_slot {
        struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
+/*
+ * We use as the mode the number of bits allocated in the LDR for the
+ * logical processor ID.  It happens that these are all powers of two.
+ * This makes it is very easy to detect cases where the APICs are
+ * configured for multiple modes; in that case, we cannot use the map and
+ * hence cannot use kvm_irq_delivery_to_apic_fast either.
+ */
+#define KVM_APIC_MODE_XAPIC_CLUSTER          4
+#define KVM_APIC_MODE_XAPIC_FLAT             8
+#define KVM_APIC_MODE_X2APIC                16
+
 struct kvm_apic_map {
        struct rcu_head rcu;
-       u8 ldr_bits;
-       /* fields bellow are used to decode ldr values in different modes */
-       u32 cid_shift, cid_mask, lid_mask, broadcast;
+       u8 mode;
        struct kvm_lapic *phys_map[256];
        /* first index is cluster id second is cpu id in a cluster */
        struct kvm_lapic *logical_map[16][16];
@@ -859,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
                                      struct kvm_memory_slot *memslot);
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                                       struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@ -933,6 +943,7 @@ struct x86_emulate_ctxt;
 int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -1128,7 +1139,6 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
 int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
index e62cf89..c1adf33 100644 (file)
@@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void)
 
 static inline bool kvm_para_available(void)
 {
-       return 0;
+       return false;
 }
 
 static inline unsigned int kvm_arch_para_features(void)
index a1410db..653dfa7 100644 (file)
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
                     :: "a" (eax), "c" (ecx));
 }
 
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+       trace_hardirqs_on();
+       /* "mwait %eax, %ecx;" */
+       asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+                    :: "a" (eax), "c" (ecx));
+}
+
 /*
  * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
  * which can obviate IPI to trigger checking of need_resched.
index 95e11f7..f97fbe3 100644 (file)
@@ -51,8 +51,6 @@ extern int devmem_is_allowed(unsigned long pagenr);
 extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
-extern bool kaslr_enabled;
-
 static inline phys_addr_t get_max_mapped(void)
 {
        return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
index 965c47d..5f6051d 100644 (file)
@@ -976,11 +976,6 @@ extern void default_banner(void);
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64),       \
                  CLBR_NONE,                                            \
                  jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
-
-#define ENABLE_INTERRUPTS_SYSEXIT32                                    \
-       PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit),    \
-                 CLBR_NONE,                                            \
-                 jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
 #endif /* CONFIG_X86_32 */
 
 #endif /* __ASSEMBLY__ */
index fa1195d..164e3f8 100644 (file)
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
        int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
                                                int reg, int len, u32 *val);
index ec1c935..d2203b5 100644 (file)
@@ -210,8 +210,23 @@ struct x86_hw_tss {
        unsigned long           sp0;
        unsigned short          ss0, __ss0h;
        unsigned long           sp1;
-       /* ss1 caches MSR_IA32_SYSENTER_CS: */
-       unsigned short          ss1, __ss1h;
+
+       /*
+        * We don't use ring 1, so ss1 is a convenient scratch space in
+        * the same cacheline as sp0.  We use ss1 to cache the value in
+        * MSR_IA32_SYSENTER_CS.  When we context switch
+        * MSR_IA32_SYSENTER_CS, we first check if the new value being
+        * written matches ss1, and, if it's not, then we wrmsr the new
+        * value and update ss1.
+        *
+        * The only reason we context switch MSR_IA32_SYSENTER_CS is
+        * that we set it to zero in vm86 tasks to avoid corrupting the
+        * stack if we were to go through the sysenter path from vm86
+        * mode.
+        */
+       unsigned short          ss1;    /* MSR_IA32_SYSENTER_CS */
+
+       unsigned short          __ss1h;
        unsigned long           sp2;
        unsigned short          ss2, __ss2h;
        unsigned long           __cr3;
@@ -276,13 +291,17 @@ struct tss_struct {
        unsigned long           io_bitmap[IO_BITMAP_LONGS + 1];
 
        /*
-        * .. and then another 0x100 bytes for the emergency kernel stack:
+        * Space for the temporary SYSENTER stack:
         */
-       unsigned long           stack[64];
+       unsigned long           SYSENTER_stack[64];
 
 } ____cacheline_aligned;
 
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#endif
 
 /*
  * Save the original ist values for checking stack pointers during debugging
@@ -474,7 +493,6 @@ struct thread_struct {
 #ifdef CONFIG_X86_32
        unsigned long           sysenter_cs;
 #else
-       unsigned long           usersp; /* Copy from PDA */
        unsigned short          es;
        unsigned short          ds;
        unsigned short          fsindex;
@@ -564,6 +582,16 @@ static inline void native_swapgs(void)
 #endif
 }
 
+static inline unsigned long current_top_of_stack(void)
+{
+#ifdef CONFIG_X86_64
+       return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+#else
+       /* sp0 on x86_32 is special in and around vm86 mode. */
+       return this_cpu_read_stable(cpu_current_top_of_stack);
+#endif
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
@@ -761,10 +789,10 @@ extern char                       ignore_fpu_irq;
 #define ARCH_HAS_SPINLOCK_PREFETCH
 
 #ifdef CONFIG_X86_32
-# define BASE_PREFETCH         ASM_NOP4
+# define BASE_PREFETCH         ""
 # define ARCH_HAS_PREFETCH
 #else
-# define BASE_PREFETCH         "prefetcht0 (%1)"
+# define BASE_PREFETCH         "prefetcht0 %P1"
 #endif
 
 /*
@@ -775,10 +803,9 @@ extern char                        ignore_fpu_irq;
  */
 static inline void prefetch(const void *x)
 {
-       alternative_input(BASE_PREFETCH,
-                         "prefetchnta (%1)",
+       alternative_input(BASE_PREFETCH, "prefetchnta %P1",
                          X86_FEATURE_XMM,
-                         "r" (x));
+                         "m" (*(const char *)x));
 }
 
 /*
@@ -788,10 +815,9 @@ static inline void prefetch(const void *x)
  */
 static inline void prefetchw(const void *x)
 {
-       alternative_input(BASE_PREFETCH,
-                         "prefetchw (%1)",
-                         X86_FEATURE_3DNOW,
-                         "r" (x));
+       alternative_input(BASE_PREFETCH, "prefetchw %P1",
+                         X86_FEATURE_3DNOWPREFETCH,
+                         "m" (*(const char *)x));
 }
 
 static inline void spin_lock_prefetch(const void *x)
@@ -799,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x)
        prefetchw(x);
 }
 
+#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
+                          TOP_OF_KERNEL_STACK_PADDING)
+
 #ifdef CONFIG_X86_32
 /*
  * User space process size: 3GB (default).
@@ -809,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x)
 #define STACK_TOP_MAX          STACK_TOP
 
 #define INIT_THREAD  {                                                   \
-       .sp0                    = sizeof(init_stack) + (long)&init_stack, \
+       .sp0                    = TOP_OF_INIT_STACK,                      \
        .vm86_info              = NULL,                                   \
        .sysenter_cs            = __KERNEL_CS,                            \
        .io_bitmap_ptr          = NULL,                                   \
 }
 
-/*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
-#define INIT_TSS  {                                                      \
-       .x86_tss = {                                                      \
-               .sp0            = sizeof(init_stack) + (long)&init_stack, \
-               .ss0            = __KERNEL_DS,                            \
-               .ss1            = __KERNEL_CS,                            \
-               .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,               \
-        },                                                               \
-       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },       \
-}
-
 extern unsigned long thread_saved_pc(struct task_struct *tsk);
 
-#define THREAD_SIZE_LONGS      (THREAD_SIZE/sizeof(unsigned long))
-#define KSTK_TOP(info)                                                 \
-({                                                                     \
-       unsigned long *__ptr = (unsigned long *)(info);                 \
-       (unsigned long)(&__ptr[THREAD_SIZE_LONGS]);                     \
-})
-
 /*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
  * This is necessary to guarantee that the entire "struct pt_regs"
  * is accessible even if the CPU haven't stored the SS/ESP registers
  * on the stack (interrupt gate does not save these registers
@@ -850,11 +856,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
  * "struct pt_regs" is possible, but they may contain the
  * completely wrong values.
  */
-#define task_pt_regs(task)                                             \
-({                                                                     \
-       struct pt_regs *__regs__;                                       \
-       __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
-       __regs__ - 1;                                                   \
+#define task_pt_regs(task) \
+({                                                                     \
+       unsigned long __ptr = (unsigned long)task_stack_page(task);     \
+       __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;             \
+       ((struct pt_regs *)__ptr) - 1;                                  \
 })
 
 #define KSTK_ESP(task)         (task_pt_regs(task)->sp)
@@ -886,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 #define STACK_TOP_MAX          TASK_SIZE_MAX
 
 #define INIT_THREAD  { \
-       .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define INIT_TSS  { \
-       .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+       .sp0 = TOP_OF_INIT_STACK \
 }
 
 /*
@@ -902,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
 #define task_pt_regs(tsk)      ((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
-/*
- * User space RSP while inside the SYSCALL fast path
- */
-DECLARE_PER_CPU(unsigned long, old_rsp);
-
 #endif /* CONFIG_X86_64 */
 
 extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
index 86fc2bb..19507ff 100644 (file)
@@ -31,13 +31,17 @@ struct pt_regs {
 #else /* __i386__ */
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
        unsigned long r15;
        unsigned long r14;
        unsigned long r13;
        unsigned long r12;
        unsigned long bp;
        unsigned long bx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
        unsigned long r11;
        unsigned long r10;
        unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
        unsigned long dx;
        unsigned long si;
        unsigned long di;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
        unsigned long orig_ax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
        unsigned long ip;
        unsigned long cs;
        unsigned long flags;
@@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
 }
 
 /*
- * user_mode_vm(regs) determines whether a register set came from user mode.
- * This is true if V8086 mode was enabled OR if the register set was from
- * protected mode with RPL-3 CS value.  This tricky test checks that with
- * one comparison.  Many places in the kernel can bypass this full check
- * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+ * user_mode(regs) determines whether a register set came from user
+ * mode.  On x86_32, this is true if V8086 mode was enabled OR if the
+ * register set was from protected mode with RPL-3 CS value.  This
+ * tricky test checks that with one comparison.
+ *
+ * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
+ * the extra check.
  */
 static inline int user_mode(struct pt_regs *regs)
 {
@@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
 #endif
 }
 
-static inline int user_mode_vm(struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_32
-       return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
-               USER_RPL;
-#else
-       return user_mode(regs);
-#endif
-}
-
 static inline int v8086_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
@@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 #endif
 }
 
-#define current_user_stack_pointer()   this_cpu_read(old_rsp)
-/* ia32 vs. x32 difference */
-#define compat_user_stack_pointer()    \
-       (test_thread_flag(TIF_IA32)     \
-        ? current_pt_regs()->sp        \
-        : this_cpu_read(old_rsp))
+#define current_user_stack_pointer()   current_pt_regs()->sp
+#define compat_user_stack_pointer()    current_pt_regs()->sp
 #endif
 
 #ifdef CONFIG_X86_32
@@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
  */
 #define arch_ptrace_stop_needed(code, info)                            \
 ({                                                                     \
-       set_thread_flag(TIF_NOTIFY_RESUME);                             \
+       force_iret();                                                   \
        false;                                                          \
 })
 
index d6b078e..25b1cc0 100644 (file)
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
 
 struct pvclock_vsyscall_time_info {
        struct pvclock_vcpu_time_info pvti;
+       u32 migrate_count;
 } __attribute__((__aligned__(SMP_CACHE_BYTES)));
 
 #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
index db257a5..5a9856e 100644 (file)
@@ -3,8 +3,10 @@
 
 #include <linux/const.h>
 
-/* Constructor for a conventional segment GDT (or LDT) entry */
-/* This is a macro so it can be used in initializers */
+/*
+ * Constructor for a conventional segment GDT (or LDT) entry.
+ * This is a macro so it can be used in initializers.
+ */
 #define GDT_ENTRY(flags, base, limit)                  \
        ((((base)  & _AC(0xff000000,ULL)) << (56-24)) | \
         (((flags) & _AC(0x0000f0ff,ULL)) << 40) |      \
         (((base)  & _AC(0x00ffffff,ULL)) << 16) |      \
         (((limit) & _AC(0x0000ffff,ULL))))
 
-/* Simple and small GDT entries for booting only */
+/* Simple and small GDT entries for booting only: */
 
 #define GDT_ENTRY_BOOT_CS      2
-#define __BOOT_CS              (GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS      3
+#define GDT_ENTRY_BOOT_TSS     4
+#define __BOOT_CS              (GDT_ENTRY_BOOT_CS*8)
+#define __BOOT_DS              (GDT_ENTRY_BOOT_DS*8)
+#define __BOOT_TSS             (GDT_ENTRY_BOOT_TSS*8)
+
+/*
+ * Bottom two bits of selector give the ring
+ * privilege level
+ */
+#define SEGMENT_RPL_MASK       0x3
 
-#define GDT_ENTRY_BOOT_DS      (GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS              (GDT_ENTRY_BOOT_DS * 8)
+/* User mode is privilege level 3: */
+#define USER_RPL               0x3
 
-#define GDT_ENTRY_BOOT_TSS     (GDT_ENTRY_BOOT_CS + 2)
-#define __BOOT_TSS             (GDT_ENTRY_BOOT_TSS * 8)
+/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
+#define SEGMENT_TI_MASK                0x4
+/* LDT segment has TI set ... */
+#define SEGMENT_LDT            0x4
+/* ... GDT has it cleared */
+#define SEGMENT_GDT            0x0
 
-#define SEGMENT_RPL_MASK       0x3 /*
-                                    * Bottom two bits of selector give the ring
-                                    * privilege level
-                                    */
-#define SEGMENT_TI_MASK                0x4 /* Bit 2 is table indicator (LDT/GDT) */
-#define USER_RPL               0x3 /* User mode is privilege level 3 */
-#define SEGMENT_LDT            0x4 /* LDT segment has TI set... */
-#define SEGMENT_GDT            0x0 /* ... GDT has it cleared */
+#define GDT_ENTRY_INVALID_SEG  0
 
 #ifdef CONFIG_X86_32
 /*
  * The layout of the per-CPU GDT under Linux:
  *
- *   0 - null
+ *   0 - null                                                          <=== cacheline #1
  *   1 - reserved
  *   2 - reserved
  *   3 - reserved
  *
- *   4 - unused                        <==== new cacheline
+ *   4 - unused                                                                <=== cacheline #2
  *   5 - unused
  *
  *  ------- start of TLS (Thread-Local Storage) segments:
  *
  *   6 - TLS segment #1                        [ glibc's TLS segment ]
  *   7 - TLS segment #2                        [ Wine's %fs Win32 segment ]
- *   8 - TLS segment #3
+ *   8 - TLS segment #3                                                        <=== cacheline #3
  *   9 - reserved
  *  10 - reserved
  *  11 - reserved
  *
  *  ------- start of kernel segments:
  *
- *  12 - kernel code segment           <==== new cacheline
+ *  12 - kernel code segment                                           <=== cacheline #4
  *  13 - kernel data segment
  *  14 - default user CS
  *  15 - default user DS
- *  16 - TSS
+ *  16 - TSS                                                           <=== cacheline #5
  *  17 - LDT
  *  18 - PNPBIOS support (16->32 gate)
  *  19 - PNPBIOS support
- *  20 - PNPBIOS support
+ *  20 - PNPBIOS support                                               <=== cacheline #6
  *  21 - PNPBIOS support
  *  22 - PNPBIOS support
  *  23 - APM BIOS support
- *  24 - APM BIOS support
+ *  24 - APM BIOS support                                              <=== cacheline #7
  *  25 - APM BIOS support
  *
  *  26 - ESPFIX small SS
  *  27 - per-cpu                       [ offset to per-cpu data area ]
- *  28 - stack_canary-20               [ for stack protector ]
+ *  28 - stack_canary-20               [ for stack protector ]         <=== cacheline #8
  *  29 - unused
  *  30 - unused
  *  31 - TSS for double fault handler
  */
-#define GDT_ENTRY_TLS_MIN      6
-#define GDT_ENTRY_TLS_MAX      (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+#define GDT_ENTRY_TLS_MIN              6
+#define GDT_ENTRY_TLS_MAX              (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
 
+#define GDT_ENTRY_KERNEL_CS            12
+#define GDT_ENTRY_KERNEL_DS            13
 #define GDT_ENTRY_DEFAULT_USER_CS      14
-
 #define GDT_ENTRY_DEFAULT_USER_DS      15
+#define GDT_ENTRY_TSS                  16
+#define GDT_ENTRY_LDT                  17
+#define GDT_ENTRY_PNPBIOS_CS32         18
+#define GDT_ENTRY_PNPBIOS_CS16         19
+#define GDT_ENTRY_PNPBIOS_DS           20
+#define GDT_ENTRY_PNPBIOS_TS1          21
+#define GDT_ENTRY_PNPBIOS_TS2          22
+#define GDT_ENTRY_APMBIOS_BASE         23
+
+#define GDT_ENTRY_ESPFIX_SS            26
+#define GDT_ENTRY_PERCPU               27
+#define GDT_ENTRY_STACK_CANARY         28
+
+#define GDT_ENTRY_DOUBLEFAULT_TSS      31
 
-#define GDT_ENTRY_KERNEL_BASE          (12)
+/*
+ * Number of entries in the GDT table:
+ */
+#define GDT_ENTRIES                    32
 
-#define GDT_ENTRY_KERNEL_CS            (GDT_ENTRY_KERNEL_BASE+0)
+/*
+ * Segment selector values corresponding to the above entries:
+ */
 
-#define GDT_ENTRY_KERNEL_DS            (GDT_ENTRY_KERNEL_BASE+1)
+#define __KERNEL_CS                    (GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS                    (GDT_ENTRY_KERNEL_DS*8)
+#define __USER_DS                      (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER_CS                      (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __ESPFIX_SS                    (GDT_ENTRY_ESPFIX_SS*8)
 
-#define GDT_ENTRY_TSS                  (GDT_ENTRY_KERNEL_BASE+4)
-#define GDT_ENTRY_LDT                  (GDT_ENTRY_KERNEL_BASE+5)
+/* segment for calling fn: */
+#define PNP_CS32                       (GDT_ENTRY_PNPBIOS_CS32*8)
+/* code segment for BIOS: */
+#define PNP_CS16                       (GDT_ENTRY_PNPBIOS_CS16*8)
 
-#define GDT_ENTRY_PNPBIOS_BASE         (GDT_ENTRY_KERNEL_BASE+6)
-#define GDT_ENTRY_APMBIOS_BASE         (GDT_ENTRY_KERNEL_BASE+11)
+/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
+#define SEGMENT_IS_PNP_CODE(x)         (((x) & 0xf4) == PNP_CS32)
 
-#define GDT_ENTRY_ESPFIX_SS            (GDT_ENTRY_KERNEL_BASE+14)
-#define __ESPFIX_SS                    (GDT_ENTRY_ESPFIX_SS*8)
+/* data segment for BIOS: */
+#define PNP_DS                         (GDT_ENTRY_PNPBIOS_DS*8)
+/* transfer data segment: */
+#define PNP_TS1                                (GDT_ENTRY_PNPBIOS_TS1*8)
+/* another data segment: */
+#define PNP_TS2                                (GDT_ENTRY_PNPBIOS_TS2*8)
 
-#define GDT_ENTRY_PERCPU               (GDT_ENTRY_KERNEL_BASE+15)
 #ifdef CONFIG_SMP
-#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
+# define __KERNEL_PERCPU               (GDT_ENTRY_PERCPU*8)
 #else
-#define __KERNEL_PERCPU 0
+# define __KERNEL_PERCPU               0
 #endif
 
-#define GDT_ENTRY_STACK_CANARY         (GDT_ENTRY_KERNEL_BASE+16)
 #ifdef CONFIG_CC_STACKPROTECTOR
-#define __KERNEL_STACK_CANARY          (GDT_ENTRY_STACK_CANARY*8)
+# define __KERNEL_STACK_CANARY         (GDT_ENTRY_STACK_CANARY*8)
 #else
-#define __KERNEL_STACK_CANARY          0
+# define __KERNEL_STACK_CANARY         0
 #endif
 
-#define GDT_ENTRY_DOUBLEFAULT_TSS      31
-
-/*
- * The GDT has 32 entries
- */
-#define GDT_ENTRIES 32
+#else /* 64-bit: */
 
-/* The PnP BIOS entries in the GDT */
-#define GDT_ENTRY_PNPBIOS_CS32         (GDT_ENTRY_PNPBIOS_BASE + 0)
-#define GDT_ENTRY_PNPBIOS_CS16         (GDT_ENTRY_PNPBIOS_BASE + 1)
-#define GDT_ENTRY_PNPBIOS_DS           (GDT_ENTRY_PNPBIOS_BASE + 2)
-#define GDT_ENTRY_PNPBIOS_TS1          (GDT_ENTRY_PNPBIOS_BASE + 3)
-#define GDT_ENTRY_PNPBIOS_TS2          (GDT_ENTRY_PNPBIOS_BASE + 4)
-
-/* The PnP BIOS selectors */
-#define PNP_CS32   (GDT_ENTRY_PNPBIOS_CS32 * 8)        /* segment for calling fn */
-#define PNP_CS16   (GDT_ENTRY_PNPBIOS_CS16 * 8)        /* code segment for BIOS */
-#define PNP_DS     (GDT_ENTRY_PNPBIOS_DS * 8)  /* data segment for BIOS */
-#define PNP_TS1    (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
-#define PNP_TS2    (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
+#include <asm/cache.h>
 
+#define GDT_ENTRY_KERNEL32_CS          1
+#define GDT_ENTRY_KERNEL_CS            2
+#define GDT_ENTRY_KERNEL_DS            3
 
 /*
- * Matching rules for certain types of segments.
+ * We cannot use the same code segment descriptor for user and kernel mode,
+ * not even in long flat mode, because of different DPL.
+ *
+ * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
+ * selectors:
+ *
+ *   if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
+ *   if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
+ *
+ * ss = STAR.SYSRET_CS+8 (in either case)
+ *
+ * thus USER_DS should be between 32-bit and 64-bit code selectors:
  */
+#define GDT_ENTRY_DEFAULT_USER32_CS    4
+#define GDT_ENTRY_DEFAULT_USER_DS      5
+#define GDT_ENTRY_DEFAULT_USER_CS      6
 
-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x)   (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
-
+/* Needs two entries */
+#define GDT_ENTRY_TSS                  8
+/* Needs two entries */
+#define GDT_ENTRY_LDT                  10
 
-#else
-#include <asm/cache.h>
-
-#define GDT_ENTRY_KERNEL32_CS 1
-#define GDT_ENTRY_KERNEL_CS 2
-#define GDT_ENTRY_KERNEL_DS 3
+#define GDT_ENTRY_TLS_MIN              12
+#define GDT_ENTRY_TLS_MAX              14
 
-#define __KERNEL32_CS   (GDT_ENTRY_KERNEL32_CS * 8)
+/* Abused to load per CPU data from limit */
+#define GDT_ENTRY_PER_CPU              15
 
 /*
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ * Number of entries in the GDT table:
  */
-#define GDT_ENTRY_DEFAULT_USER32_CS 4
-#define GDT_ENTRY_DEFAULT_USER_DS 5
-#define GDT_ENTRY_DEFAULT_USER_CS 6
-#define __USER32_CS   (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
-#define __USER32_DS    __USER_DS
-
-#define GDT_ENTRY_TSS 8        /* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-
-#define GDT_ENTRY_PER_CPU 15   /* Abused to load per CPU data from limit */
-#define __PER_CPU_SEG  (GDT_ENTRY_PER_CPU * 8 + 3)
+#define GDT_ENTRIES                    16
 
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
-#define GDT_ENTRIES 16
+/*
+ * Segment selector values corresponding to the above entries:
+ *
+ * Note, selectors also need to have a correct RPL,
+ * expressed with the +3 value for user-space selectors:
+ */
+#define __KERNEL32_CS                  (GDT_ENTRY_KERNEL32_CS*8)
+#define __KERNEL_CS                    (GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS                    (GDT_ENTRY_KERNEL_DS*8)
+#define __USER32_CS                    (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
+#define __USER_DS                      (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER32_DS                    __USER_DS
+#define __USER_CS                      (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __PER_CPU_SEG                  (GDT_ENTRY_PER_CPU*8 + 3)
+
+/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
+#define FS_TLS                         0
+#define GS_TLS                         1
+
+#define GS_TLS_SEL                     ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL                     ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
 
 #endif
 
-#define __KERNEL_CS    (GDT_ENTRY_KERNEL_CS*8)
-#define __KERNEL_DS    (GDT_ENTRY_KERNEL_DS*8)
-#define __USER_DS      (GDT_ENTRY_DEFAULT_USER_DS*8+3)
-#define __USER_CS      (GDT_ENTRY_DEFAULT_USER_CS*8+3)
 #ifndef CONFIG_PARAVIRT
-#define get_kernel_rpl()  0
+# define get_kernel_rpl()              0
 #endif
 
-#define IDT_ENTRIES 256
-#define NUM_EXCEPTION_VECTORS 32
-/* Bitmask of exception vectors which push an error code on the stack */
-#define EXCEPTION_ERRCODE_MASK  0x00027d00
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define GDT_ENTRY_TLS_ENTRIES 3
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+#define IDT_ENTRIES                    256
+#define NUM_EXCEPTION_VECTORS          32
+
+/* Bitmask of exception vectors which push an error code on the stack: */
+#define EXCEPTION_ERRCODE_MASK         0x00027d00
+
+#define GDT_SIZE                       (GDT_ENTRIES*8)
+#define GDT_ENTRY_TLS_ENTRIES          3
+#define TLS_SIZE                       (GDT_ENTRY_TLS_ENTRIES* 8)
 
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
+
 extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
 #ifdef CONFIG_TRACING
-#define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handlers early_idt_handlers
 #endif
 
 /*
@@ -228,37 +260,30 @@ do {                                                                      \
 } while (0)
 
 /*
- * Save a segment register away
+ * Save a segment register away:
  */
 #define savesegment(seg, value)                                \
        asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
 
 /*
- * x86_32 user gs accessors.
+ * x86-32 user GS accessors:
  */
 #ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_32_LAZY_GS
-#define get_user_gs(regs)      (u16)({unsigned long v; savesegment(gs, v); v;})
-#define set_user_gs(regs, v)   loadsegment(gs, (unsigned long)(v))
-#define task_user_gs(tsk)      ((tsk)->thread.gs)
-#define lazy_save_gs(v)                savesegment(gs, (v))
-#define lazy_load_gs(v)                loadsegment(gs, (v))
-#else  /* X86_32_LAZY_GS */
-#define get_user_gs(regs)      (u16)((regs)->gs)
-#define set_user_gs(regs, v)   do { (regs)->gs = (v); } while (0)
-#define task_user_gs(tsk)      (task_pt_regs(tsk)->gs)
-#define lazy_save_gs(v)                do { } while (0)
-#define lazy_load_gs(v)                do { } while (0)
-#endif /* X86_32_LAZY_GS */
+# ifdef CONFIG_X86_32_LAZY_GS
+#  define get_user_gs(regs)            (u16)({ unsigned long v; savesegment(gs, v); v; })
+#  define set_user_gs(regs, v)         loadsegment(gs, (unsigned long)(v))
+#  define task_user_gs(tsk)            ((tsk)->thread.gs)
+#  define lazy_save_gs(v)              savesegment(gs, (v))
+#  define lazy_load_gs(v)              loadsegment(gs, (v))
+# else /* X86_32_LAZY_GS */
+#  define get_user_gs(regs)            (u16)((regs)->gs)
+#  define set_user_gs(regs, v)         do { (regs)->gs = (v); } while (0)
+#  define task_user_gs(tsk)            (task_pt_regs(tsk)->gs)
+#  define lazy_save_gs(v)              do { } while (0)
+#  define lazy_load_gs(v)              do { } while (0)
+# endif        /* X86_32_LAZY_GS */
 #endif /* X86_32 */
 
-static inline unsigned long get_limit(unsigned long segment)
-{
-       unsigned long __limit;
-       asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
-       return __limit + 1;
-}
-
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 
index ff4e7b2..f69e06b 100644 (file)
@@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
  */
 extern struct boot_params boot_params;
 
+static inline bool kaslr_enabled(void)
+{
+       return !!(boot_params.hdr.loadflags & KASLR_FLAG);
+}
+
 /*
  * Do NOT EVER look at the BIOS memory size location.
  * It does not work on many machines.
index 9dfce4e..6fe6b18 100644 (file)
@@ -57,9 +57,9 @@ struct sigcontext {
        unsigned long ip;
        unsigned long flags;
        unsigned short cs;
-       unsigned short gs;
-       unsigned short fs;
-       unsigned short __pad0;
+       unsigned short __pad2;  /* Was called gs, but was always zero. */
+       unsigned short __pad1;  /* Was called fs, but was always zero. */
+       unsigned short ss;
        unsigned long err;
        unsigned long trapno;
        unsigned long oldmask;
index 7a95816..89db467 100644 (file)
@@ -13,9 +13,7 @@
                         X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-                      unsigned long *pax);
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
 int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
                     struct pt_regs *regs, unsigned long mask);
 
index 8d3120f..ba665eb 100644 (file)
 
 #ifdef CONFIG_X86_SMAP
 
-#define ASM_CLAC                                                       \
-       661: ASM_NOP3 ;                                                 \
-       .pushsection .altinstr_replacement, "ax" ;                      \
-       662: __ASM_CLAC ;                                               \
-       .popsection ;                                                   \
-       .pushsection .altinstructions, "a" ;                            \
-       altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;       \
-       .popsection
-
-#define ASM_STAC                                                       \
-       661: ASM_NOP3 ;                                                 \
-       .pushsection .altinstr_replacement, "ax" ;                      \
-       662: __ASM_STAC ;                                               \
-       .popsection ;                                                   \
-       .pushsection .altinstructions, "a" ;                            \
-       altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;       \
-       .popsection
+#define ASM_CLAC \
+       ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+
+#define ASM_STAC \
+       ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
 
 #else /* CONFIG_X86_SMAP */
 
 static __always_inline void clac(void)
 {
        /* Note: a barrier is implicit in alternative() */
-       alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+       alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
 }
 
 static __always_inline void stac(void)
 {
        /* Note: a barrier is implicit in alternative() */
-       alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+       alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
 }
 
 /* These macros can be used in asm() statements */
 #define ASM_CLAC \
-       ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+       ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
 #define ASM_STAC \
-       ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+       ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
 
 #else /* CONFIG_X86_SMAP */
 
index 8cd1cc3..81d02fc 100644 (file)
@@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu);
 void native_smp_prepare_boot_cpu(void);
 void native_smp_prepare_cpus(unsigned int max_cpus);
 void native_smp_cpus_done(unsigned int max_cpus);
+void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
 int native_cpu_disable(void);
 void native_cpu_die(unsigned int cpu);
index 6a4b00f..aeb4666 100644 (file)
@@ -4,6 +4,8 @@
 
 #ifdef __KERNEL__
 
+#include <asm/nops.h>
+
 static inline void native_clts(void)
 {
        asm volatile("clts");
@@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
                       "+m" (*(volatile char __force *)__p));
 }
 
+static inline void clwb(volatile void *__p)
+{
+       volatile struct { char x[64]; } *p = __p;
+
+       asm volatile(ALTERNATIVE_2(
+               ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
+               ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
+               X86_FEATURE_CLFLUSHOPT,
+               ".byte 0x66, 0x0f, 0xae, 0x30",  /* clwb (%%rax) */
+               X86_FEATURE_CLWB)
+               : [p] "+m" (*p)
+               : [pax] "a" (p));
+}
+
+static inline void pcommit_sfence(void)
+{
+       alternative(ASM_NOP7,
+                   ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
+                   "sfence",
+                   X86_FEATURE_PCOMMIT);
+}
+
 #define nop() asm volatile ("nop")
 
 
index 1d4e4f2..ea2dbe8 100644 (file)
 #include <asm/percpu.h>
 #include <asm/types.h>
 
+/*
+ * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
+ * reserve at the top of the kernel stack.  We do it because of a nasty
+ * 32-bit corner case.  On x86_32, the hardware stack frame is
+ * variable-length.  Except for vm86 mode, struct pt_regs assumes a
+ * maximum-length frame.  If we enter from CPL 0, the top 8 bytes of
+ * pt_regs don't actually exist.  Ordinarily this doesn't matter, but it
+ * does in at least one case:
+ *
+ * If we take an NMI early enough in SYSENTER, then we can end up with
+ * pt_regs that extends above sp0.  On the way out, in the espfix code,
+ * we can read the saved SS value, but that value will be above sp0.
+ * Without this offset, that can result in a page fault.  (We are
+ * careful that, in this case, the value we read doesn't matter.)
+ *
+ * In vm86 mode, the hardware frame is much longer still, but we neither
+ * access the extra members from NMI context, nor do we write such a
+ * frame at sp0 at all.
+ *
+ * x86_64 has a fixed-length stack frame.
+ */
+#ifdef CONFIG_X86_32
+# define TOP_OF_KERNEL_STACK_PADDING 8
+#else
+# define TOP_OF_KERNEL_STACK_PADDING 0
+#endif
+
 /*
  * low level task data that entry.S needs immediate access to
  * - this struct should fit entirely inside of one cache line
@@ -145,7 +172,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
 #define STACK_WARN             (THREAD_SIZE/8)
-#define KERNEL_STACK_OFFSET    (5*(BITS_PER_LONG/8))
 
 /*
  * macros/functions for gaining access to the thread information structure
@@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
 
 static inline struct thread_info *current_thread_info(void)
 {
-       struct thread_info *ti;
-       ti = (void *)(this_cpu_read_stable(kernel_stack) +
-                     KERNEL_STACK_OFFSET - THREAD_SIZE);
-       return ti;
+       return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
 }
 
 static inline unsigned long current_stack_pointer(void)
@@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void)
 
 #else /* !__ASSEMBLY__ */
 
-/* how to get the thread information struct from ASM */
+/* Load thread_info address into "reg" */
 #define GET_THREAD_INFO(reg) \
        _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
-       _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
+       _ASM_SUB $(THREAD_SIZE),reg ;
 
 /*
- * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
- * a certain register (to be used in assembler memory operands).
+ * ASM operand which evaluates to a 'thread_info' address of
+ * the current task, if it is known that "reg" is exactly "off"
+ * bytes below the top of the stack currently.
+ *
+ * ( The kernel stack's size is known at build time, it is usually
+ *   2 or 4 pages, and the bottom  of the kernel stack contains
+ *   the thread_info structure. So to access the thread_info very
+ *   quickly from assembly code we can calculate down from the
+ *   top of the kernel stack to the bottom, using constant,
+ *   build-time calculations only. )
+ *
+ * For example, to fetch the current thread_info->flags value into %eax
+ * on x86-64 defconfig kernels, in syscall entry code where RSP is
+ * currently at exactly SIZEOF_PTREGS bytes away from the top of the
+ * stack:
+ *
+ *      mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
+ *
+ * will translate to:
+ *
+ *      8b 84 24 b8 c0 ff ff      mov    -0x3f48(%rsp), %eax
+ *
+ * which is below the current RSP by almost 16K.
  */
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
 
 #endif
 
@@ -236,6 +280,16 @@ static inline bool is_ia32_task(void)
 #endif
        return false;
 }
+
+/*
+ * Force syscall return via IRET by making it look as if there was
+ * some work pending. IRET is our most capable (but slowest) syscall
+ * return path, which is able to restore modified SS, CS and certain
+ * EFLAGS values that other (fast) syscall return instructions
+ * are not able to restore properly.
+ */
+#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
+
 #endif /* !__ASSEMBLY__ */
 
 #ifndef __ASSEMBLY__
index 12a26b9..f2f9b39 100644 (file)
@@ -231,6 +231,6 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
 }
 
 unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
+copy_user_handle_tail(char *to, char *from, unsigned len);
 
 #endif /* _ASM_X86_UACCESS_64_H */
index 44e6dd7..ab456dc 100644 (file)
@@ -7,7 +7,6 @@
 #define SETUP_DTB                      2
 #define SETUP_PCI                      3
 #define SETUP_EFI                      4
-#define SETUP_KASLR                    5
 
 /* ram_size flags */
 #define RAMDISK_IMAGE_START_MASK       0x07FF
@@ -16,6 +15,7 @@
 
 /* loadflags */
 #define LOADED_HIGH    (1<<0)
+#define KASLR_FLAG     (1<<1)
 #define QUIET_FLAG     (1<<5)
 #define KEEP_SEGMENTS  (1<<6)
 #define CAN_USE_HEAP   (1<<7)
index 7b0a55a..580aee3 100644 (file)
 #else /* __i386__ */
 
 #if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
 #define R15 0
 #define R14 8
 #define R13 16
 #define R12 24
 #define RBP 32
 #define RBX 40
-/* arguments: interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
 #define R11 48
 #define R10 56
 #define R9 64
 #define RDX 96
 #define RSI 104
 #define RDI 112
-#define ORIG_RAX 120       /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 120
+/* Return frame for iretq */
 #define RIP 128
 #define CS 136
 #define EFLAGS 144
 #define RSP 152
 #define SS 160
-#define ARGOFFSET R11
 #endif /* __ASSEMBLY__ */
 
 /* top of stack page */
index ac4b9aa..bc16115 100644 (file)
@@ -41,13 +41,17 @@ struct pt_regs {
 #ifndef __KERNEL__
 
 struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
        unsigned long r15;
        unsigned long r14;
        unsigned long r13;
        unsigned long r12;
        unsigned long rbp;
        unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
        unsigned long r11;
        unsigned long r10;
        unsigned long r9;
@@ -57,9 +61,12 @@ struct pt_regs {
        unsigned long rdx;
        unsigned long rsi;
        unsigned long rdi;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
        unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
        unsigned long rip;
        unsigned long cs;
        unsigned long eflags;
index d8b9f90..16dc4e8 100644 (file)
@@ -177,9 +177,24 @@ struct sigcontext {
        __u64 rip;
        __u64 eflags;           /* RFLAGS */
        __u16 cs;
-       __u16 gs;
-       __u16 fs;
-       __u16 __pad0;
+
+       /*
+        * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+        * Linux saved and restored fs and gs in these slots.  This
+        * was counterproductive, as fsbase and gsbase were never
+        * saved, so arch_prctl was presumably unreliable.
+        *
+        * If these slots are ever needed for any other purpose, there
+        * is some risk that very old 64-bit binaries could get
+        * confused.  I doubt that many such binaries still work,
+        * though, since the same patch in 2.5.64 also removed the
+        * 64-bit set_thread_area syscall, so it appears that there is
+        * no TLS API that works in both pre- and post-2.5.64 kernels.
+        */
+       __u16 __pad2;           /* Was gs. */
+       __u16 __pad1;           /* Was fs. */
+
+       __u16 ss;
        __u64 err;
        __u64 trapno;
        __u64 oldmask;
index c5f1a1d..1fe9218 100644 (file)
@@ -67,6 +67,7 @@
 #define EXIT_REASON_EPT_VIOLATION       48
 #define EXIT_REASON_EPT_MISCONFIG       49
 #define EXIT_REASON_INVEPT              50
+#define EXIT_REASON_RDTSCP              51
 #define EXIT_REASON_PREEMPTION_TIMER    52
 #define EXIT_REASON_INVVPID             53
 #define EXIT_REASON_WBINVD              54
index cdb1b70..c887cd9 100644 (file)
@@ -32,6 +32,7 @@ obj-$(CONFIG_X86_32)  += i386_ksyms_32.o
 obj-$(CONFIG_X86_64)   += sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)   += mcount_64.o
 obj-y                  += syscall_$(BITS).o vsyscall_gtod.o
+obj-$(CONFIG_IA32_EMULATION)   += syscall_32.o
 obj-$(CONFIG_X86_VSYSCALL_EMULATION)   += vsyscall_64.o vsyscall_emu_64.o
 obj-$(CONFIG_X86_ESPFIX64)     += espfix_64.o
 obj-$(CONFIG_SYSFS)    += ksysfs.o
index 3d525c6..803b684 100644 (file)
@@ -1337,6 +1337,26 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
        return 0;
 }
 
+/*
+ * ACPI offers an alternative platform interface model that removes
+ * ACPI hardware requirements for platforms that do not implement
+ * the PC Architecture.
+ *
+ * We initialize the Hardware-reduced ACPI model here:
+ */
+static void __init acpi_reduced_hw_init(void)
+{
+       if (acpi_gbl_reduced_hardware) {
+               /*
+                * Override x86_init functions and bypass legacy pic
+                * in Hardware-reduced ACPI mode
+                */
+               x86_init.timers.timer_init      = x86_init_noop;
+               x86_init.irqs.pre_vector_init   = x86_init_noop;
+               legacy_pic                      = &null_legacy_pic;
+       }
+}
+
 /*
  * If your system is blacklisted here, but you find that acpi=force
  * works for you, please contact linux-acpi@vger.kernel.org
@@ -1536,6 +1556,11 @@ int __init early_acpi_boot_init(void)
         */
        early_acpi_process_madt();
 
+       /*
+        * Hardware-reduced ACPI mode initialization:
+        */
+       acpi_reduced_hw_init();
+
        return 0;
 }
 
index 703130f..aef6531 100644 (file)
@@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str)
 __setup("noreplace-paravirt", setup_noreplace_paravirt);
 #endif
 
-#define DPRINTK(fmt, ...)                              \
-do {                                                   \
-       if (debug_alternative)                          \
-               printk(KERN_DEBUG fmt, ##__VA_ARGS__);  \
+#define DPRINTK(fmt, args...)                                          \
+do {                                                                   \
+       if (debug_alternative)                                          \
+               printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args);   \
+} while (0)
+
+#define DUMP_BYTES(buf, len, fmt, args...)                             \
+do {                                                                   \
+       if (unlikely(debug_alternative)) {                              \
+               int j;                                                  \
+                                                                       \
+               if (!(len))                                             \
+                       break;                                          \
+                                                                       \
+               printk(KERN_DEBUG fmt, ##args);                         \
+               for (j = 0; j < (len) - 1; j++)                         \
+                       printk(KERN_CONT "%02hhx ", buf[j]);            \
+               printk(KERN_CONT "%02hhx\n", buf[j]);                   \
+       }                                                               \
 } while (0)
 
 /*
@@ -243,12 +258,89 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
 void *text_poke_early(void *addr, const void *opcode, size_t len);
 
-/* Replace instructions with better alternatives for this CPU type.
-   This runs before SMP is initialized to avoid SMP problems with
-   self modifying code. This implies that asymmetric systems where
-   APs have less capabilities than the boot processor are not handled.
-   Tough. Make sure you disable such features by hand. */
+/*
+ * Are we looking at a near JMP with a 1 or 4-byte displacement.
+ */
+static inline bool is_jmp(const u8 opcode)
+{
+       return opcode == 0xeb || opcode == 0xe9;
+}
+
+static void __init_or_module
+recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+{
+       u8 *next_rip, *tgt_rip;
+       s32 n_dspl, o_dspl;
+       int repl_len;
+
+       if (a->replacementlen != 5)
+               return;
+
+       o_dspl = *(s32 *)(insnbuf + 1);
+
+       /* next_rip of the replacement JMP */
+       next_rip = repl_insn + a->replacementlen;
+       /* target rip of the replacement JMP */
+       tgt_rip  = next_rip + o_dspl;
+       n_dspl = tgt_rip - orig_insn;
+
+       DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
+
+       if (tgt_rip - orig_insn >= 0) {
+               if (n_dspl - 2 <= 127)
+                       goto two_byte_jmp;
+               else
+                       goto five_byte_jmp;
+       /* negative offset */
+       } else {
+               if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
+                       goto two_byte_jmp;
+               else
+                       goto five_byte_jmp;
+       }
+
+two_byte_jmp:
+       n_dspl -= 2;
+
+       insnbuf[0] = 0xeb;
+       insnbuf[1] = (s8)n_dspl;
+       add_nops(insnbuf + 2, 3);
+
+       repl_len = 2;
+       goto done;
+
+five_byte_jmp:
+       n_dspl -= 5;
+
+       insnbuf[0] = 0xe9;
+       *(s32 *)&insnbuf[1] = n_dspl;
 
+       repl_len = 5;
+
+done:
+
+       DPRINTK("final displ: 0x%08x, JMP 0x%lx",
+               n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
+}
+
+static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+{
+       if (instr[0] != 0x90)
+               return;
+
+       add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+
+       DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
+                  instr, a->instrlen - a->padlen, a->padlen);
+}
+
+/*
+ * Replace instructions with better alternatives for this CPU type. This runs
+ * before SMP is initialized to avoid SMP problems with self modifying code.
+ * This implies that asymmetric systems where APs have less capabilities than
+ * the boot processor are not handled. Tough. Make sure you disable such
+ * features by hand.
+ */
 void __init_or_module apply_alternatives(struct alt_instr *start,
                                         struct alt_instr *end)
 {
@@ -256,10 +348,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
        u8 *instr, *replacement;
        u8 insnbuf[MAX_PATCH_LEN];
 
-       DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
+       DPRINTK("alt table %p -> %p", start, end);
        /*
         * The scan order should be from start to end. A later scanned
-        * alternative code can overwrite a previous scanned alternative code.
+        * alternative code can overwrite previously scanned alternative code.
         * Some kernel functions (e.g. memcpy, memset, etc) use this order to
         * patch code.
         *
@@ -267,29 +359,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
         * order.
         */
        for (a = start; a < end; a++) {
+               int insnbuf_sz = 0;
+
                instr = (u8 *)&a->instr_offset + a->instr_offset;
                replacement = (u8 *)&a->repl_offset + a->repl_offset;
-               BUG_ON(a->replacementlen > a->instrlen);
                BUG_ON(a->instrlen > sizeof(insnbuf));
                BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
-               if (!boot_cpu_has(a->cpuid))
+               if (!boot_cpu_has(a->cpuid)) {
+                       if (a->padlen > 1)
+                               optimize_nops(a, instr);
+
                        continue;
+               }
+
+               DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
+                       a->cpuid >> 5,
+                       a->cpuid & 0x1f,
+                       instr, a->instrlen,
+                       replacement, a->replacementlen, a->padlen);
+
+               DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
+               DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
 
                memcpy(insnbuf, replacement, a->replacementlen);
+               insnbuf_sz = a->replacementlen;
 
                /* 0xe8 is a relative jump; fix the offset. */
-               if (*insnbuf == 0xe8 && a->replacementlen == 5)
-                   *(s32 *)(insnbuf + 1) += replacement - instr;
+               if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+                       *(s32 *)(insnbuf + 1) += replacement - instr;
+                       DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
+                               *(s32 *)(insnbuf + 1),
+                               (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+               }
+
+               if (a->replacementlen && is_jmp(replacement[0]))
+                       recompute_jump(a, instr, replacement, insnbuf);
 
-               add_nops(insnbuf + a->replacementlen,
-                        a->instrlen - a->replacementlen);
+               if (a->instrlen > a->replacementlen) {
+                       add_nops(insnbuf + a->replacementlen,
+                                a->instrlen - a->replacementlen);
+                       insnbuf_sz += a->instrlen - a->replacementlen;
+               }
+               DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
 
-               text_poke_early(instr, insnbuf, a->instrlen);
+               text_poke_early(instr, insnbuf, insnbuf_sz);
        }
 }
 
 #ifdef CONFIG_SMP
-
 static void alternatives_smp_lock(const s32 *start, const s32 *end,
                                  u8 *text, u8 *text_end)
 {
@@ -371,8 +488,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
        smp->locks_end  = locks_end;
        smp->text       = text;
        smp->text_end   = text_end;
-       DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
-               __func__, smp->locks, smp->locks_end,
+       DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
+               smp->locks, smp->locks_end,
                smp->text, smp->text_end, smp->name);
 
        list_add_tail(&smp->next, &smp_alt_modules);
@@ -440,7 +557,7 @@ int alternatives_text_reserved(void *start, void *end)
 
        return 0;
 }
-#endif
+#endif /* CONFIG_SMP */
 
 #ifdef CONFIG_PARAVIRT
 void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
@@ -601,7 +718,7 @@ int poke_int3_handler(struct pt_regs *regs)
        if (likely(!bp_patching_in_progress))
                return 0;
 
-       if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+       if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
                return 0;
 
        /* set up the specified breakpoint handler */
index ad3639a..dcb5285 100644 (file)
@@ -1084,67 +1084,6 @@ void lapic_shutdown(void)
        local_irq_restore(flags);
 }
 
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
-       unsigned int reg0, reg1;
-
-       /*
-        * The version register is read-only in a real APIC.
-        */
-       reg0 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-       apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
-       reg1 = apic_read(APIC_LVR);
-       apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
-       /*
-        * The two version reads above should print the same
-        * numbers.  If the second one is different, then we
-        * poke at a non-APIC.
-        */
-       if (reg1 != reg0)
-               return 0;
-
-       /*
-        * Check if the version looks reasonably.
-        */
-       reg1 = GET_APIC_VERSION(reg0);
-       if (reg1 == 0x00 || reg1 == 0xff)
-               return 0;
-       reg1 = lapic_get_maxlvt();
-       if (reg1 < 0x02 || reg1 == 0xff)
-               return 0;
-
-       /*
-        * The ID register is read/write in a real APIC.
-        */
-       reg0 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-       apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
-       reg1 = apic_read(APIC_ID);
-       apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-       apic_write(APIC_ID, reg0);
-       if (reg1 != (reg0 ^ apic->apic_id_mask))
-               return 0;
-
-       /*
-        * The next two are just to see if we have sane values.
-        * They're only really relevant if we're in Virtual Wire
-        * compatibility mode, but most boxes are anymore.
-        */
-       reg0 = apic_read(APIC_LVT0);
-       apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
-       reg1 = apic_read(APIC_LVT1);
-       apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
-       return 1;
-}
-
 /**
  * sync_Arb_IDs - synchronize APIC bus arbitration IDs
  */
@@ -2283,7 +2222,6 @@ int __init APIC_init_uniprocessor(void)
                disable_ioapic_support();
 
        default_setup_apic_routing();
-       verify_local_APIC();
        apic_bsp_setup(true);
        return 0;
 }
index c2fd21f..017149c 100644 (file)
@@ -37,10 +37,12 @@ static const struct apic apic_numachip;
 static unsigned int get_apic_id(unsigned long x)
 {
        unsigned long value;
-       unsigned int id;
+       unsigned int id = (x >> 24) & 0xff;
 
-       rdmsrl(MSR_FAM10H_NODE_ID, value);
-       id = ((x >> 24) & 0xffU) | ((value << 2) & 0xff00U);
+       if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+               rdmsrl(MSR_FAM10H_NODE_ID, value);
+               id |= (value << 2) & 0xff00;
+       }
 
        return id;
 }
@@ -155,10 +157,18 @@ static int __init numachip_probe(void)
 
 static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
 {
-       if (c->phys_proc_id != node) {
-               c->phys_proc_id = node;
-               per_cpu(cpu_llc_id, smp_processor_id()) = node;
+       u64 val;
+       u32 nodes = 1;
+
+       this_cpu_write(cpu_llc_id, node);
+
+       /* Account for nodes per socket in multi-core-module processors */
+       if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+               rdmsrl(MSR_FAM10H_NODE_ID, val);
+               nodes = ((val >> 3) & 7) + 1;
        }
+
+       c->phys_proc_id = node / nodes;
 }
 
 static int __init numachip_system_init(void)
index e658f21..d9d0bd2 100644 (file)
@@ -135,12 +135,12 @@ static void init_x2apic_ldr(void)
 
        per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
 
-       __cpu_set(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+       cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
        for_each_online_cpu(cpu) {
                if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
                        continue;
-               __cpu_set(this_cpu, per_cpu(cpus_in_cluster, cpu));
-               __cpu_set(cpu, per_cpu(cpus_in_cluster, this_cpu));
+               cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+               cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
        }
 }
 
@@ -195,7 +195,7 @@ static int x2apic_init_cpu_notifier(void)
 
        BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
 
-       __cpu_set(cpu, per_cpu(cpus_in_cluster, cpu));
+       cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
        register_hotcpu_notifier(&x2apic_cpu_notifier);
        return 1;
 }
index 8e9dcfd..c8d9295 100644 (file)
@@ -144,33 +144,60 @@ static void __init uv_set_apicid_hibit(void)
 
 static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
-       int pnodeid, is_uv1, is_uv2, is_uv3;
-
-       is_uv1 = !strcmp(oem_id, "SGI");
-       is_uv2 = !strcmp(oem_id, "SGI2");
-       is_uv3 = !strncmp(oem_id, "SGI3", 4);   /* there are varieties of UV3 */
-       if (is_uv1 || is_uv2 || is_uv3) {
-               uv_hub_info->hub_revision =
-                       (is_uv1 ? UV1_HUB_REVISION_BASE :
-                       (is_uv2 ? UV2_HUB_REVISION_BASE :
-                                 UV3_HUB_REVISION_BASE));
-               pnodeid = early_get_pnodeid();
-               early_get_apic_pnode_shift();
-               x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
-               x86_platform.nmi_init = uv_nmi_init;
-               if (!strcmp(oem_table_id, "UVL"))
-                       uv_system_type = UV_LEGACY_APIC;
-               else if (!strcmp(oem_table_id, "UVX"))
-                       uv_system_type = UV_X2APIC;
-               else if (!strcmp(oem_table_id, "UVH")) {
-                       __this_cpu_write(x2apic_extra_bits,
-                               pnodeid << uvh_apicid.s.pnode_shift);
-                       uv_system_type = UV_NON_UNIQUE_APIC;
-                       uv_set_apicid_hibit();
-                       return 1;
-               }
+       int pnodeid;
+       int uv_apic;
+
+       if (strncmp(oem_id, "SGI", 3) != 0)
+               return 0;
+
+       /*
+        * Determine UV arch type.
+        *   SGI: UV100/1000
+        *   SGI2: UV2000/3000
+        *   SGI3: UV300 (truncated to 4 chars because of different varieties)
+        */
+       uv_hub_info->hub_revision =
+               !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+               !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE :
+               !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0;
+
+       if (uv_hub_info->hub_revision == 0)
+               goto badbios;
+
+       pnodeid = early_get_pnodeid();
+       early_get_apic_pnode_shift();
+       x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
+       x86_platform.nmi_init = uv_nmi_init;
+
+       if (!strcmp(oem_table_id, "UVX")) {             /* most common */
+               uv_system_type = UV_X2APIC;
+               uv_apic = 0;
+
+       } else if (!strcmp(oem_table_id, "UVH")) {      /* only UV1 systems */
+               uv_system_type = UV_NON_UNIQUE_APIC;
+               __this_cpu_write(x2apic_extra_bits,
+                       pnodeid << uvh_apicid.s.pnode_shift);
+               uv_set_apicid_hibit();
+               uv_apic = 1;
+
+       } else  if (!strcmp(oem_table_id, "UVL")) {     /* only used for */
+               uv_system_type = UV_LEGACY_APIC;        /* very small systems */
+               uv_apic = 0;
+
+       } else {
+               goto badbios;
        }
-       return 0;
+
+       pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n",
+               oem_id, oem_table_id, uv_system_type,
+               uv_min_hub_revision_id, uv_apic);
+
+       return uv_apic;
+
+badbios:
+       pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id);
+       pr_err("Current BIOS not supported, update kernel and/or BIOS\n");
+       BUG();
 }
 
 enum uv_system_type get_uv_system_type(void)
@@ -854,10 +881,14 @@ void __init uv_system_init(void)
        unsigned long mmr_base, present, paddr;
        unsigned short pnode_mask;
        unsigned char n_lshift;
-       char *hub = (is_uv1_hub() ? "UV1" :
-                   (is_uv2_hub() ? "UV2" :
-                                   "UV3"));
+       char *hub = (is_uv1_hub() ? "UV100/1000" :
+                   (is_uv2_hub() ? "UV2000/3000" :
+                   (is_uv3_hub() ? "UV300" : NULL)));
 
+       if (!hub) {
+               pr_err("UV: Unknown/unsupported UV hub\n");
+               return;
+       }
        pr_info("UV: Found %s hub\n", hub);
        map_low_mmrs();
 
index 3b3b9d3..47703ae 100644 (file)
@@ -68,7 +68,7 @@ void foo(void)
 
        /* Offset from the sysenter stack to tss.sp0 */
        DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
-                sizeof(struct tss_struct));
+              offsetofend(struct tss_struct, SYSENTER_stack));
 
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
        BLANK();
index fdcbb4d..5ce6f2d 100644 (file)
@@ -81,6 +81,7 @@ int main(void)
 #undef ENTRY
 
        OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+       OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
        BLANK();
 
        DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
index a220239..dd9e505 100644 (file)
@@ -711,6 +711,11 @@ static void init_amd(struct cpuinfo_x86 *c)
                set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
 
        rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+
+       /* 3DNow or LM implies PREFETCHW */
+       if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
+               if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
+                       set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
 }
 
 #ifdef CONFIG_X86_32
index 2346c95..3f70538 100644 (file)
@@ -959,38 +959,37 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 #endif
 }
 
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_IA32_EMULATION
-/* May not be __init: called during resume */
-static void syscall32_cpu_init(void)
-{
-       /* Load these always in case some future AMD CPU supports
-          SYSENTER from compat mode too. */
-       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
-       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
-       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
-       wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
-#endif         /* CONFIG_IA32_EMULATION */
-#endif         /* CONFIG_X86_64 */
-
+/*
+ * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
+ * on 32-bit kernels:
+ */
 #ifdef CONFIG_X86_32
 void enable_sep_cpu(void)
 {
-       int cpu = get_cpu();
-       struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       struct tss_struct *tss;
+       int cpu;
 
-       if (!boot_cpu_has(X86_FEATURE_SEP)) {
-               put_cpu();
-               return;
-       }
+       cpu = get_cpu();
+       tss = &per_cpu(cpu_tss, cpu);
+
+       if (!boot_cpu_has(X86_FEATURE_SEP))
+               goto out;
+
+       /*
+        * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
+        * see the big comment in struct x86_hw_tss's definition.
+        */
 
        tss->x86_tss.ss1 = __KERNEL_CS;
-       tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
-       wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
-       wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
-       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
+       wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+
+       wrmsr(MSR_IA32_SYSENTER_ESP,
+             (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
+             0);
+
+       wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
+
+out:
        put_cpu();
 }
 #endif
@@ -1118,7 +1117,7 @@ static __init int setup_disablecpuid(char *arg)
 __setup("clearcpuid=", setup_disablecpuid);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
-       (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+       (unsigned long)&init_thread_union + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
 #ifdef CONFIG_X86_64
@@ -1130,8 +1129,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
                     irq_stack_union) __aligned(PAGE_SIZE) __visible;
 
 /*
- * The following four percpu variables are hot.  Align current_task to
- * cacheline size such that all four fall in the same cacheline.
+ * The following percpu variables are hot.  Align current_task to
+ * cacheline size such that they fall in the same cacheline.
  */
 DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
        &init_task;
@@ -1171,10 +1170,23 @@ void syscall_init(void)
         */
        wrmsrl(MSR_STAR,  ((u64)__USER32_CS)<<48  | ((u64)__KERNEL_CS)<<32);
        wrmsrl(MSR_LSTAR, system_call);
-       wrmsrl(MSR_CSTAR, ignore_sysret);
 
 #ifdef CONFIG_IA32_EMULATION
-       syscall32_cpu_init();
+       wrmsrl(MSR_CSTAR, ia32_cstar_target);
+       /*
+        * This only works on Intel CPUs.
+        * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
+        * This does not cause SYSENTER to jump to the wrong location, because
+        * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+        */
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+#else
+       wrmsrl(MSR_CSTAR, ignore_sysret);
+       wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+       wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+       wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
 #endif
 
        /* Flags to clear on syscall */
@@ -1226,6 +1238,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
 
+/*
+ * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
+ * the top of the kernel stack.  Use an extra percpu variable to track the
+ * top of the kernel stack directly.
+ */
+DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
+       (unsigned long)&init_thread_union + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
+
 #ifdef CONFIG_CC_STACKPROTECTOR
 DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
@@ -1307,7 +1328,7 @@ void cpu_init(void)
         */
        load_ucode_ap();
 
-       t = &per_cpu(init_tss, cpu);
+       t = &per_cpu(cpu_tss, cpu);
        oist = &per_cpu(orig_ist, cpu);
 
 #ifdef CONFIG_NUMA
@@ -1391,7 +1412,7 @@ void cpu_init(void)
 {
        int cpu = smp_processor_id();
        struct task_struct *curr = current;
-       struct tss_struct *t = &per_cpu(init_tss, cpu);
+       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
        struct thread_struct *thread = &curr->thread;
 
        wait_for_master_cpu(cpu);
index 36d99a3..3f20710 100644 (file)
@@ -6,7 +6,7 @@
 IN=$1
 OUT=$2
 
-function dump_array()
+dump_array()
 {
        ARRAY=$1
        SIZE=$2
index b71a7f8..e2888a3 100644 (file)
@@ -2146,6 +2146,12 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
  */
 static unsigned long code_segment_base(struct pt_regs *regs)
 {
+       /*
+        * For IA32 we look at the GDT/LDT segment base to convert the
+        * effective IP to a linear address.
+        */
+
+#ifdef CONFIG_X86_32
        /*
         * If we are in VM86 mode, add the segment offset to convert to a
         * linear address.
@@ -2153,18 +2159,12 @@ static unsigned long code_segment_base(struct pt_regs *regs)
        if (regs->flags & X86_VM_MASK)
                return 0x10 * regs->cs;
 
-       /*
-        * For IA32 we look at the GDT/LDT segment base to convert the
-        * effective IP to a linear address.
-        */
-#ifdef CONFIG_X86_32
        if (user_mode(regs) && regs->cs != __USER_CS)
                return get_segment_base(regs->cs);
 #else
-       if (test_thread_flag(TIF_IA32)) {
-               if (user_mode(regs) && regs->cs != __USER32_CS)
-                       return get_segment_base(regs->cs);
-       }
+       if (user_mode(regs) && !user_64bit_mode(regs) &&
+           regs->cs != __USER32_CS)
+               return get_segment_base(regs->cs);
 #endif
        return 0;
 }
index 498b6d9..2589906 100644 (file)
@@ -212,11 +212,11 @@ static struct event_constraint intel_hsw_event_constraints[] = {
        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
        INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
        /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
-       INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
+       INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
        /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
-       INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
+       INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
        /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
-       INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
+       INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
        EVENT_CONSTRAINT_END
 };
 
@@ -1649,11 +1649,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
        if (c)
                return c;
 
-       c = intel_pebs_constraints(event);
+       c = intel_shared_regs_constraints(cpuc, event);
        if (c)
                return c;
 
-       c = intel_shared_regs_constraints(cpuc, event);
+       c = intel_pebs_constraints(event);
        if (c)
                return c;
 
index aceb2f9..c76d3e3 100644 (file)
@@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
 #ifdef CONFIG_X86_32
        struct pt_regs fixed_regs;
 
-       if (!user_mode_vm(regs)) {
+       if (!user_mode(regs)) {
                crash_fixup_ss_esp(&fixed_regs, regs);
                regs = &fixed_regs;
        }
index cf3df1d..ab3b656 100644 (file)
@@ -278,7 +278,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
        print_modules();
        show_regs(regs);
 #ifdef CONFIG_X86_32
-       if (user_mode_vm(regs)) {
+       if (user_mode(regs)) {
                sp = regs->sp;
                ss = regs->ss & 0xffff;
        } else {
@@ -307,7 +307,7 @@ void die(const char *str, struct pt_regs *regs, long err)
        unsigned long flags = oops_begin();
        int sig = SIGSEGV;
 
-       if (!user_mode_vm(regs))
+       if (!user_mode(regs))
                report_bug(regs->ip, regs);
 
        if (__die(str, regs, err))
index 5abd4cd..39891ff 100644 (file)
@@ -123,13 +123,13 @@ void show_regs(struct pt_regs *regs)
        int i;
 
        show_regs_print_info(KERN_EMERG);
-       __show_regs(regs, !user_mode_vm(regs));
+       __show_regs(regs, !user_mode(regs));
 
        /*
         * When in-kernel, we also print out the stack and code at the
         * time of the fault..
         */
-       if (!user_mode_vm(regs)) {
+       if (!user_mode(regs)) {
                unsigned int code_prologue = code_bytes * 43 / 64;
                unsigned int code_len = code_bytes;
                unsigned char c;
index a62536a..49ff55e 100644 (file)
@@ -95,20 +95,6 @@ static unsigned long early_serial_base = 0x3f8;  /* ttyS0 */
 #define DLL             0       /*  Divisor Latch Low         */
 #define DLH             1       /*  Divisor latch High        */
 
-static void mem32_serial_out(unsigned long addr, int offset, int value)
-{
-       uint32_t *vaddr = (uint32_t *)addr;
-       /* shift implied by pointer type */
-       writel(value, vaddr + offset);
-}
-
-static unsigned int mem32_serial_in(unsigned long addr, int offset)
-{
-       uint32_t *vaddr = (uint32_t *)addr;
-       /* shift implied by pointer type */
-       return readl(vaddr + offset);
-}
-
 static unsigned int io_serial_in(unsigned long addr, int offset)
 {
        return inb(addr + offset);
@@ -205,6 +191,20 @@ static __init void early_serial_init(char *s)
 }
 
 #ifdef CONFIG_PCI
+static void mem32_serial_out(unsigned long addr, int offset, int value)
+{
+       u32 *vaddr = (u32 *)addr;
+       /* shift implied by pointer type */
+       writel(value, vaddr + offset);
+}
+
+static unsigned int mem32_serial_in(unsigned long addr, int offset)
+{
+       u32 *vaddr = (u32 *)addr;
+       /* shift implied by pointer type */
+       return readl(vaddr + offset);
+}
+
 /*
  * early_pci_serial_init()
  *
@@ -217,8 +217,8 @@ static __init void early_pci_serial_init(char *s)
        unsigned divisor;
        unsigned long baud = DEFAULT_BAUD;
        u8 bus, slot, func;
-       uint32_t classcode, bar0;
-       uint16_t cmdreg;
+       u32 classcode, bar0;
+       u16 cmdreg;
        char *e;
 
 
index 31e2d5b..1c30976 100644 (file)
@@ -395,10 +395,13 @@ sysenter_past_esp:
        /*CFI_REL_OFFSET cs, 0*/
        /*
         * Push current_thread_info()->sysenter_return to the stack.
-        * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
-        * pushed above; +8 corresponds to copy_thread's esp0 setting.
+        * A tiny bit of offset fixup is necessary: TI_sysenter_return
+        * is relative to thread_info, which is at the bottom of the
+        * kernel stack page.  4*4 means the 4 words pushed above;
+        * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
+        * and THREAD_SIZE takes us to the bottom.
         */
-       pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
+       pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
        CFI_REL_OFFSET eip, 0
 
        pushl_cfi %eax
@@ -432,7 +435,7 @@ sysenter_after_call:
        TRACE_IRQS_OFF
        movl TI_flags(%ebp), %ecx
        testl $_TIF_ALLWORK_MASK, %ecx
-       jne sysexit_audit
+       jnz sysexit_audit
 sysenter_exit:
 /* if something modifies registers it must also disable sysexit */
        movl PT_EIP(%esp), %edx
@@ -460,7 +463,7 @@ sysenter_audit:
 
 sysexit_audit:
        testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-       jne syscall_exit_work
+       jnz syscall_exit_work
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_ANY)
        movl %eax,%edx          /* second arg, syscall return value */
@@ -472,7 +475,7 @@ sysexit_audit:
        TRACE_IRQS_OFF
        movl TI_flags(%ebp), %ecx
        testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
-       jne syscall_exit_work
+       jnz syscall_exit_work
        movl PT_EAX(%esp),%eax  /* reload syscall return value */
        jmp sysenter_exit
 #endif
@@ -510,7 +513,7 @@ syscall_exit:
        TRACE_IRQS_OFF
        movl TI_flags(%ebp), %ecx
        testl $_TIF_ALLWORK_MASK, %ecx  # current->work
-       jne syscall_exit_work
+       jnz syscall_exit_work
 
 restore_all:
        TRACE_IRQS_IRET
@@ -612,7 +615,7 @@ work_notifysig:                             # deal with pending signals and
 #ifdef CONFIG_VM86
        testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
        movl %esp, %eax
-       jne work_notifysig_v86          # returning to kernel-space or
+       jnz work_notifysig_v86          # returning to kernel-space or
                                        # vm86-space
 1:
 #else
@@ -720,43 +723,22 @@ END(sysenter_badsys)
 .endm
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-.section .init.rodata,"a"
-ENTRY(interrupt)
-.section .entry.text, "ax"
-       .p2align 5
-       .p2align CONFIG_X86_L1_CACHE_SHIFT
+       .align 8
 ENTRY(irq_entries_start)
        RING0_INT_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-       .balign 32
-  .rept        7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+       pushl_cfi $(~vector+0x80)       /* Note: always in signed byte range */
+    vector=vector+1
+       jmp     common_interrupt
        CFI_ADJUST_CFA_OFFSET -4
-      .endif
-1:     pushl_cfi $(~vector+0x80)       /* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-       jmp 2f
-      .endif
-      .previous
-       .long 1b
-      .section .entry.text, "ax"
-vector=vector+1
-    .endif
-  .endr
-2:     jmp common_interrupt
-.endr
+       .align  8
+    .endr
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * the CPU automatically disables interrupts when executing an IRQ vector,
  * so IRQ-flags tracing has to follow that:
@@ -816,15 +798,9 @@ ENTRY(simd_coprocessor_error)
        pushl_cfi $0
 #ifdef CONFIG_X86_INVD_BUG
        /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-661:   pushl_cfi $do_general_protection
-662:
-.section .altinstructions,"a"
-       altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
-.previous
-.section .altinstr_replacement,"ax"
-663:   pushl $do_simd_coprocessor_error
-664:
-.previous
+       ALTERNATIVE "pushl_cfi $do_general_protection", \
+                   "pushl $do_simd_coprocessor_error", \
+                   X86_FEATURE_XMM
 #else
        pushl_cfi $do_simd_coprocessor_error
 #endif
@@ -1240,20 +1216,13 @@ error_code:
        /*CFI_REL_OFFSET es, 0*/
        pushl_cfi %ds
        /*CFI_REL_OFFSET ds, 0*/
-       pushl_cfi %eax
-       CFI_REL_OFFSET eax, 0
-       pushl_cfi %ebp
-       CFI_REL_OFFSET ebp, 0
-       pushl_cfi %edi
-       CFI_REL_OFFSET edi, 0
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %edx
-       CFI_REL_OFFSET edx, 0
-       pushl_cfi %ecx
-       CFI_REL_OFFSET ecx, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg eax
+       pushl_cfi_reg ebp
+       pushl_cfi_reg edi
+       pushl_cfi_reg esi
+       pushl_cfi_reg edx
+       pushl_cfi_reg ecx
+       pushl_cfi_reg ebx
        cld
        movl $(__KERNEL_PERCPU), %ecx
        movl %ecx, %fs
index 1d74d16..c7b2384 100644 (file)
  * NOTE: This code handles signal-recognition, which happens every time
  * after an interrupt and after each system call.
  *
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
  * A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * - iret frame: Architecture defined interrupt frame from SS to RIP
  * at the top of the kernel process stack.
- * - partial stack frame: partially saved registers up to R11.
- * - full stack frame: Like partial stack frame, but all register saved.
  *
  * Some macro usage:
  * - CFI macros are used to generate dwarf2 unwind information for better
  * backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
  * - ENTRY/END Define functions in the symbol table.
- * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
- * frame that is otherwise undefined after a SYSCALL
  * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
  * - idtentry - Define exception entry points.
  */
        .section .entry.text, "ax"
 
 
-#ifndef CONFIG_PREEMPT
-#define retint_kernel retint_restore_args
-#endif
-
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_usergs_sysret64)
        swapgs
@@ -82,9 +65,9 @@ ENDPROC(native_usergs_sysret64)
 #endif /* CONFIG_PARAVIRT */
 
 
-.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+.macro TRACE_IRQS_IRETQ
 #ifdef CONFIG_TRACE_IRQFLAGS
-       bt   $9,EFLAGS-\offset(%rsp)    /* interrupts off? */
+       bt   $9,EFLAGS(%rsp)    /* interrupts off? */
        jnc  1f
        TRACE_IRQS_ON
 1:
@@ -116,8 +99,8 @@ ENDPROC(native_usergs_sysret64)
        call debug_stack_reset
 .endm
 
-.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
-       bt   $9,EFLAGS-\offset(%rsp)    /* interrupts off? */
+.macro TRACE_IRQS_IRETQ_DEBUG
+       bt   $9,EFLAGS(%rsp)    /* interrupts off? */
        jnc  1f
        TRACE_IRQS_ON_DEBUG
 1:
@@ -130,34 +113,7 @@ ENDPROC(native_usergs_sysret64)
 #endif
 
 /*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
- * fast path FIXUP_TOP_OF_STACK is needed.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
-
-       /* %rsp:at FRAMEEND */
-       .macro FIXUP_TOP_OF_STACK tmp offset=0
-       movq PER_CPU_VAR(old_rsp),\tmp
-       movq \tmp,RSP+\offset(%rsp)
-       movq $__USER_DS,SS+\offset(%rsp)
-       movq $__USER_CS,CS+\offset(%rsp)
-       movq RIP+\offset(%rsp),\tmp  /* get rip */
-       movq \tmp,RCX+\offset(%rsp)  /* copy it to rcx as sysret would do */
-       movq R11+\offset(%rsp),\tmp  /* get eflags */
-       movq \tmp,EFLAGS+\offset(%rsp)
-       .endm
-
-       .macro RESTORE_TOP_OF_STACK tmp offset=0
-       movq RSP+\offset(%rsp),\tmp
-       movq \tmp,PER_CPU_VAR(old_rsp)
-       movq EFLAGS+\offset(%rsp),\tmp
-       movq \tmp,R11+\offset(%rsp)
-       .endm
-
-/*
- * initial frame state for interrupts (and exceptions without error code)
+ * empty frame
  */
        .macro EMPTY_FRAME start=1 offset=0
        .if \start
@@ -173,12 +129,12 @@ ENDPROC(native_usergs_sysret64)
  * initial frame state for interrupts (and exceptions without error code)
  */
        .macro INTR_FRAME start=1 offset=0
-       EMPTY_FRAME \start, SS+8+\offset-RIP
-       /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
-       CFI_REL_OFFSET rsp, RSP+\offset-RIP
-       /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
-       /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
-       CFI_REL_OFFSET rip, RIP+\offset-RIP
+       EMPTY_FRAME \start, 5*8+\offset
+       /*CFI_REL_OFFSET ss, 4*8+\offset*/
+       CFI_REL_OFFSET rsp, 3*8+\offset
+       /*CFI_REL_OFFSET rflags, 2*8+\offset*/
+       /*CFI_REL_OFFSET cs, 1*8+\offset*/
+       CFI_REL_OFFSET rip, 0*8+\offset
        .endm
 
 /*
@@ -186,30 +142,23 @@ ENDPROC(native_usergs_sysret64)
  * with vector already pushed)
  */
        .macro XCPT_FRAME start=1 offset=0
-       INTR_FRAME \start, RIP+\offset-ORIG_RAX
-       .endm
-
-/*
- * frame that enables calling into C.
- */
-       .macro PARTIAL_FRAME start=1 offset=0
-       XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
-       CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
-       CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
-       CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
-       CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
-       CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
-       CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
-       CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
-       CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
-       CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+       INTR_FRAME \start, 1*8+\offset
        .endm
 
 /*
  * frame that enables passing a complete pt_regs to a C function.
  */
        .macro DEFAULT_FRAME start=1 offset=0
-       PARTIAL_FRAME \start, R11+\offset-R15
+       XCPT_FRAME \start, ORIG_RAX+\offset
+       CFI_REL_OFFSET rdi, RDI+\offset
+       CFI_REL_OFFSET rsi, RSI+\offset
+       CFI_REL_OFFSET rdx, RDX+\offset
+       CFI_REL_OFFSET rcx, RCX+\offset
+       CFI_REL_OFFSET rax, RAX+\offset
+       CFI_REL_OFFSET r8, R8+\offset
+       CFI_REL_OFFSET r9, R9+\offset
+       CFI_REL_OFFSET r10, R10+\offset
+       CFI_REL_OFFSET r11, R11+\offset
        CFI_REL_OFFSET rbx, RBX+\offset
        CFI_REL_OFFSET rbp, RBP+\offset
        CFI_REL_OFFSET r12, R12+\offset
@@ -218,105 +167,30 @@ ENDPROC(native_usergs_sysret64)
        CFI_REL_OFFSET r15, R15+\offset
        .endm
 
-ENTRY(save_paranoid)
-       XCPT_FRAME 1 RDI+8
-       cld
-       movq %rdi, RDI+8(%rsp)
-       movq %rsi, RSI+8(%rsp)
-       movq_cfi rdx, RDX+8
-       movq_cfi rcx, RCX+8
-       movq_cfi rax, RAX+8
-       movq %r8, R8+8(%rsp)
-       movq %r9, R9+8(%rsp)
-       movq %r10, R10+8(%rsp)
-       movq %r11, R11+8(%rsp)
-       movq_cfi rbx, RBX+8
-       movq %rbp, RBP+8(%rsp)
-       movq %r12, R12+8(%rsp)
-       movq %r13, R13+8(%rsp)
-       movq %r14, R14+8(%rsp)
-       movq %r15, R15+8(%rsp)
-       movl $1,%ebx
-       movl $MSR_GS_BASE,%ecx
-       rdmsr
-       testl %edx,%edx
-       js 1f   /* negative -> in kernel */
-       SWAPGS
-       xorl %ebx,%ebx
-1:     ret
-       CFI_ENDPROC
-END(save_paranoid)
-
 /*
- * A newly forked process directly context switches into this address.
+ * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
  *
- * rdi: prev task we switched from
- */
-ENTRY(ret_from_fork)
-       DEFAULT_FRAME
-
-       LOCK ; btr $TIF_FORK,TI_flags(%r8)
-
-       pushq_cfi $0x0002
-       popfq_cfi                               # reset kernel eflags
-
-       call schedule_tail                      # rdi: 'prev' task parameter
-
-       GET_THREAD_INFO(%rcx)
-
-       RESTORE_REST
-
-       testl $3, CS-ARGOFFSET(%rsp)            # from kernel_thread?
-       jz   1f
-
-       /*
-        * By the time we get here, we have no idea whether our pt_regs,
-        * ti flags, and ti status came from the 64-bit SYSCALL fast path,
-        * the slow path, or one of the ia32entry paths.
-        * Use int_ret_from_sys_call to return, since it can safely handle
-        * all of the above.
-        */
-       jmp  int_ret_from_sys_call
-
-1:
-       subq $REST_SKIP, %rsp   # leave space for volatiles
-       CFI_ADJUST_CFA_OFFSET   REST_SKIP
-       movq %rbp, %rdi
-       call *%rbx
-       movl $0, RAX(%rsp)
-       RESTORE_REST
-       jmp int_ret_from_sys_call
-       CFI_ENDPROC
-END(ret_from_fork)
-
-/*
- * System call entry. Up to 6 arguments in registers are supported.
+ * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
  *
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer.  However, it does mask the flags register for us, so
- * CLD and CLAC are not needed.
- */
-
-/*
- * Register setup:
+ * Registers on entry:
  * rax  system call number
+ * rcx  return address
+ * r11  saved rflags (note: r11 is callee-clobbered register in C ABI)
  * rdi  arg0
- * rcx  return address for syscall/sysret, C arg3
  * rsi  arg1
  * rdx  arg2
- * r10  arg3   (--> moved to rcx for C)
+ * r10  arg3 (needs to be moved to rcx to conform to C ABI)
  * r8   arg4
  * r9   arg5
- * r11  eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
+ * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
  *
- * Interrupts are off on entry.
  * Only called from user space.
  *
- * XXX if we had a free scratch register we could save the RSP into the stack frame
- *      and report it properly in ps. Unfortunately we haven't.
- *
- * When user can change the frames always force IRET. That is because
+ * When user can change pt_regs->foo always force IRET. That is because
  * it deals with uncanonical addresses better. SYSRET has trouble
  * with them due to bugs in both AMD and Intel CPUs.
  */
@@ -324,9 +198,15 @@ END(ret_from_fork)
 ENTRY(system_call)
        CFI_STARTPROC   simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
+       CFI_DEF_CFA     rsp,0
        CFI_REGISTER    rip,rcx
        /*CFI_REGISTER  rflags,r11*/
+
+       /*
+        * Interrupts are off on entry.
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
+        */
        SWAPGS_UNSAFE_STACK
        /*
         * A hypervisor implementation might want to use a label
@@ -335,18 +215,38 @@ ENTRY(system_call)
         */
 GLOBAL(system_call_after_swapgs)
 
-       movq    %rsp,PER_CPU_VAR(old_rsp)
+       movq    %rsp,PER_CPU_VAR(rsp_scratch)
        movq    PER_CPU_VAR(kernel_stack),%rsp
+
+       /* Construct struct pt_regs on stack */
+       pushq_cfi $__USER_DS                    /* pt_regs->ss */
+       pushq_cfi PER_CPU_VAR(rsp_scratch)      /* pt_regs->sp */
        /*
-        * No need to follow this irqs off/on section - it's straight
-        * and short:
+        * Re-enable interrupts.
+        * We use 'rsp_scratch' as a scratch space, hence irq-off block above
+        * must execute atomically in the face of possible interrupt-driven
+        * task preemption. We must enable interrupts only after we're done
+        * with using rsp_scratch:
         */
        ENABLE_INTERRUPTS(CLBR_NONE)
-       SAVE_ARGS 8, 0, rax_enosys=1
-       movq_cfi rax,(ORIG_RAX-ARGOFFSET)
-       movq  %rcx,RIP-ARGOFFSET(%rsp)
-       CFI_REL_OFFSET rip,RIP-ARGOFFSET
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+       pushq_cfi       %r11                    /* pt_regs->flags */
+       pushq_cfi       $__USER_CS              /* pt_regs->cs */
+       pushq_cfi       %rcx                    /* pt_regs->ip */
+       CFI_REL_OFFSET rip,0
+       pushq_cfi_reg   rax                     /* pt_regs->orig_ax */
+       pushq_cfi_reg   rdi                     /* pt_regs->di */
+       pushq_cfi_reg   rsi                     /* pt_regs->si */
+       pushq_cfi_reg   rdx                     /* pt_regs->dx */
+       pushq_cfi_reg   rcx                     /* pt_regs->cx */
+       pushq_cfi       $-ENOSYS                /* pt_regs->ax */
+       pushq_cfi_reg   r8                      /* pt_regs->r8 */
+       pushq_cfi_reg   r9                      /* pt_regs->r9 */
+       pushq_cfi_reg   r10                     /* pt_regs->r10 */
+       pushq_cfi_reg   r11                     /* pt_regs->r11 */
+       sub     $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */
+       CFI_ADJUST_CFA_OFFSET 6*8
+
+       testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
        jnz tracesys
 system_call_fastpath:
 #if __SYSCALL_MASK == ~0
@@ -355,82 +255,96 @@ system_call_fastpath:
        andl $__SYSCALL_MASK,%eax
        cmpl $__NR_syscall_max,%eax
 #endif
-       ja ret_from_sys_call  /* and return regs->ax */
+       ja      1f      /* return -ENOSYS (already in pt_regs->ax) */
        movq %r10,%rcx
-       call *sys_call_table(,%rax,8)  # XXX:    rip relative
-       movq %rax,RAX-ARGOFFSET(%rsp)
+       call *sys_call_table(,%rax,8)
+       movq %rax,RAX(%rsp)
+1:
 /*
- * Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
+ * Syscall return path ending with SYSRET (fast path).
+ * Has incompletely filled pt_regs.
  */
-ret_from_sys_call:
-       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
-       jnz int_ret_from_sys_call_fixup /* Go the the slow path */
-
        LOCKDEP_SYS_EXIT
+       /*
+        * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+        * it is too small to ever cause noticeable irq latency.
+        */
        DISABLE_INTERRUPTS(CLBR_NONE)
-       TRACE_IRQS_OFF
-       CFI_REMEMBER_STATE
+
        /*
-        * sysretq will re-enable interrupts:
+        * We must check ti flags with interrupts (or at least preemption)
+        * off because we must *never* return to userspace without
+        * processing exit work that is enqueued if we're preempted here.
+        * In particular, returning to userspace with any of the one-shot
+        * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
+        * very bad.
         */
-       TRACE_IRQS_ON
-       movq RIP-ARGOFFSET(%rsp),%rcx
+       testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+       jnz int_ret_from_sys_call_irqs_off      /* Go to the slow path */
+
+       CFI_REMEMBER_STATE
+
+       RESTORE_C_REGS_EXCEPT_RCX_R11
+       movq    RIP(%rsp),%rcx
        CFI_REGISTER    rip,rcx
-       RESTORE_ARGS 1,-ARG_SKIP,0
+       movq    EFLAGS(%rsp),%r11
        /*CFI_REGISTER  rflags,r11*/
-       movq    PER_CPU_VAR(old_rsp), %rsp
+       movq    RSP(%rsp),%rsp
+       /*
+        * 64bit SYSRET restores rip from rcx,
+        * rflags from r11 (but RF and VM bits are forced to 0),
+        * cs and ss are loaded from MSRs.
+        * Restoration of rflags re-enables interrupts.
+        */
        USERGS_SYSRET64
 
        CFI_RESTORE_STATE
 
-int_ret_from_sys_call_fixup:
-       FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
-       jmp int_ret_from_sys_call
-
-       /* Do syscall tracing */
+       /* Do syscall entry tracing */
 tracesys:
-       leaq -REST_SKIP(%rsp), %rdi
-       movq $AUDIT_ARCH_X86_64, %rsi
+       movq %rsp, %rdi
+       movl $AUDIT_ARCH_X86_64, %esi
        call syscall_trace_enter_phase1
        test %rax, %rax
        jnz tracesys_phase2             /* if needed, run the slow path */
-       LOAD_ARGS 0                     /* else restore clobbered regs */
+       RESTORE_C_REGS_EXCEPT_RAX       /* else restore clobbered regs */
+       movq ORIG_RAX(%rsp), %rax
        jmp system_call_fastpath        /*      and return to the fast path */
 
 tracesys_phase2:
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %rdi
+       SAVE_EXTRA_REGS
        movq %rsp, %rdi
-       movq $AUDIT_ARCH_X86_64, %rsi
+       movl $AUDIT_ARCH_X86_64, %esi
        movq %rax,%rdx
        call syscall_trace_enter_phase2
 
        /*
-        * Reload arg registers from stack in case ptrace changed them.
+        * Reload registers from stack in case ptrace changed them.
         * We don't reload %rax because syscall_trace_entry_phase2() returned
         * the value it wants us to use in the table lookup.
         */
-       LOAD_ARGS ARGOFFSET, 1
-       RESTORE_REST
+       RESTORE_C_REGS_EXCEPT_RAX
+       RESTORE_EXTRA_REGS
 #if __SYSCALL_MASK == ~0
        cmpq $__NR_syscall_max,%rax
 #else
        andl $__SYSCALL_MASK,%eax
        cmpl $__NR_syscall_max,%eax
 #endif
-       ja   int_ret_from_sys_call      /* RAX(%rsp) is already set */
+       ja      1f      /* return -ENOSYS (already in pt_regs->ax) */
        movq %r10,%rcx  /* fixup for C */
        call *sys_call_table(,%rax,8)
-       movq %rax,RAX-ARGOFFSET(%rsp)
-       /* Use IRET because user could have changed frame */
+       movq %rax,RAX(%rsp)
+1:
+       /* Use IRET because user could have changed pt_regs->foo */
 
 /*
  * Syscall return path ending with IRET.
- * Has correct top of stack, but partial stack frame.
+ * Has correct iret frame.
  */
 GLOBAL(int_ret_from_sys_call)
        DISABLE_INTERRUPTS(CLBR_NONE)
+int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
        TRACE_IRQS_OFF
        movl $_TIF_ALLWORK_MASK,%edi
        /* edi: mask to check */
@@ -440,8 +354,8 @@ GLOBAL(int_with_check)
        movl TI_flags(%rcx),%edx
        andl %edi,%edx
        jnz   int_careful
-       andl    $~TS_COMPAT,TI_status(%rcx)
-       jmp   retint_swapgs
+       andl    $~TS_COMPAT,TI_status(%rcx)
+       jmp     syscall_return
 
        /* Either reschedule or signal or syscall exit tracking needed. */
        /* First do a reschedule test. */
@@ -458,12 +372,11 @@ int_careful:
        TRACE_IRQS_OFF
        jmp int_with_check
 
-       /* handle signals and tracing -- both require a full stack frame */
+       /* handle signals and tracing -- both require a full pt_regs */
 int_very_careful:
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
-int_check_syscall_exit_work:
-       SAVE_REST
+       SAVE_EXTRA_REGS
        /* Check for syscall exit trace */
        testl $_TIF_WORK_SYSCALL_EXIT,%edx
        jz int_signal
@@ -482,86 +395,192 @@ int_signal:
        call do_notify_resume
 1:     movl $_TIF_WORK_MASK,%edi
 int_restore_rest:
-       RESTORE_REST
+       RESTORE_EXTRA_REGS
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
        jmp int_with_check
+
+syscall_return:
+       /* The IRETQ could re-enable interrupts: */
+       DISABLE_INTERRUPTS(CLBR_ANY)
+       TRACE_IRQS_IRETQ
+
+       /*
+        * Try to use SYSRET instead of IRET if we're returning to
+        * a completely clean 64-bit userspace context.
+        */
+       movq RCX(%rsp),%rcx
+       cmpq %rcx,RIP(%rsp)             /* RCX == RIP */
+       jne opportunistic_sysret_failed
+
+       /*
+        * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+        * in kernel space.  This essentially lets the user take over
+        * the kernel, since userspace controls RSP.  It's not worth
+        * testing for canonicalness exactly -- this check detects any
+        * of the 17 high bits set, which is true for non-canonical
+        * or kernel addresses.  (This will pessimize vsyscall=native.
+        * Big deal.)
+        *
+        * If virtual addresses ever become wider, this will need
+        * to be updated to remain correct on both old and new CPUs.
+        */
+       .ifne __VIRTUAL_MASK_SHIFT - 47
+       .error "virtual address width changed -- SYSRET checks need update"
+       .endif
+       shr $__VIRTUAL_MASK_SHIFT, %rcx
+       jnz opportunistic_sysret_failed
+
+       cmpq $__USER_CS,CS(%rsp)        /* CS must match SYSRET */
+       jne opportunistic_sysret_failed
+
+       movq R11(%rsp),%r11
+       cmpq %r11,EFLAGS(%rsp)          /* R11 == RFLAGS */
+       jne opportunistic_sysret_failed
+
+       /*
+        * SYSRET can't restore RF.  SYSRET can restore TF, but unlike IRET,
+        * restoring TF results in a trap from userspace immediately after
+        * SYSRET.  This would cause an infinite loop whenever #DB happens
+        * with register state that satisfies the opportunistic SYSRET
+        * conditions.  For example, single-stepping this user code:
+        *
+        *           movq $stuck_here,%rcx
+        *           pushfq
+        *           popq %r11
+        *   stuck_here:
+        *
+        * would never get past 'stuck_here'.
+        */
+       testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
+       jnz opportunistic_sysret_failed
+
+       /* nothing to check for RSP */
+
+       cmpq $__USER_DS,SS(%rsp)        /* SS must match SYSRET */
+       jne opportunistic_sysret_failed
+
+       /*
+        * We win!  This label is here just for ease of understanding
+        * perf profiles.  Nothing jumps here.
+        */
+syscall_return_via_sysret:
+       CFI_REMEMBER_STATE
+       /* r11 is already restored (see code above) */
+       RESTORE_C_REGS_EXCEPT_R11
+       movq RSP(%rsp),%rsp
+       USERGS_SYSRET64
+       CFI_RESTORE_STATE
+
+opportunistic_sysret_failed:
+       SWAPGS
+       jmp     restore_c_regs_and_iret
        CFI_ENDPROC
 END(system_call)
 
+
        .macro FORK_LIKE func
 ENTRY(stub_\func)
        CFI_STARTPROC
-       popq    %r11                    /* save return address */
-       PARTIAL_FRAME 0
-       SAVE_REST
-       pushq   %r11                    /* put it back on stack */
-       FIXUP_TOP_OF_STACK %r11, 8
-       DEFAULT_FRAME 0 8               /* offset 8: return address */
-       call sys_\func
-       RESTORE_TOP_OF_STACK %r11, 8
-       ret $REST_SKIP          /* pop extended registers */
+       DEFAULT_FRAME 0, 8              /* offset 8: return address */
+       SAVE_EXTRA_REGS 8
+       jmp sys_\func
        CFI_ENDPROC
 END(stub_\func)
        .endm
 
-       .macro FIXED_FRAME label,func
-ENTRY(\label)
-       CFI_STARTPROC
-       PARTIAL_FRAME 0 8               /* offset 8: return address */
-       FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
-       call \func
-       RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
-       ret
-       CFI_ENDPROC
-END(\label)
-       .endm
-
        FORK_LIKE  clone
        FORK_LIKE  fork
        FORK_LIKE  vfork
-       FIXED_FRAME stub_iopl, sys_iopl
 
 ENTRY(stub_execve)
        CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
-       call sys_execve
-       movq %rax,RAX(%rsp)
-       RESTORE_REST
-       jmp int_ret_from_sys_call
+       DEFAULT_FRAME 0, 8
+       call    sys_execve
+return_from_execve:
+       testl   %eax, %eax
+       jz      1f
+       /* exec failed, can use fast SYSRET code path in this case */
+       ret
+1:
+       /* must use IRET code path (pt_regs->cs may have changed) */
+       addq    $8, %rsp
+       CFI_ADJUST_CFA_OFFSET -8
+       ZERO_EXTRA_REGS
+       movq    %rax,RAX(%rsp)
+       jmp     int_ret_from_sys_call
        CFI_ENDPROC
 END(stub_execve)
-
-ENTRY(stub_execveat)
+/*
+ * Remaining execve stubs are only 7 bytes long.
+ * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
+ */
+       .align  8
+GLOBAL(stub_execveat)
        CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
-       call sys_execveat
-       RESTORE_TOP_OF_STACK %r11
-       movq %rax,RAX(%rsp)
-       RESTORE_REST
-       jmp int_ret_from_sys_call
+       DEFAULT_FRAME 0, 8
+       call    sys_execveat
+       jmp     return_from_execve
        CFI_ENDPROC
 END(stub_execveat)
 
+#ifdef CONFIG_X86_X32_ABI
+       .align  8
+GLOBAL(stub_x32_execve)
+       CFI_STARTPROC
+       DEFAULT_FRAME 0, 8
+       call    compat_sys_execve
+       jmp     return_from_execve
+       CFI_ENDPROC
+END(stub_x32_execve)
+       .align  8
+GLOBAL(stub_x32_execveat)
+       CFI_STARTPROC
+       DEFAULT_FRAME 0, 8
+       call    compat_sys_execveat
+       jmp     return_from_execve
+       CFI_ENDPROC
+END(stub_x32_execveat)
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
+       .align  8
+GLOBAL(stub32_execve)
+       CFI_STARTPROC
+       call    compat_sys_execve
+       jmp     return_from_execve
+       CFI_ENDPROC
+END(stub32_execve)
+       .align  8
+GLOBAL(stub32_execveat)
+       CFI_STARTPROC
+       call    compat_sys_execveat
+       jmp     return_from_execve
+       CFI_ENDPROC
+END(stub32_execveat)
+#endif
+
 /*
  * sigreturn is special because it needs to restore all registers on return.
  * This cannot be done with SYSRET, so use the IRET return path instead.
  */
 ENTRY(stub_rt_sigreturn)
        CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
+       DEFAULT_FRAME 0, 8
+       /*
+        * SAVE_EXTRA_REGS result is not normally needed:
+        * sigreturn overwrites all pt_regs->GPREGS.
+        * But sigreturn can fail (!), and there is no easy way to detect that.
+        * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
+        * we SAVE_EXTRA_REGS here.
+        */
+       SAVE_EXTRA_REGS 8
        call sys_rt_sigreturn
-       movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-       RESTORE_REST
+return_from_stub:
+       addq    $8, %rsp
+       CFI_ADJUST_CFA_OFFSET -8
+       RESTORE_EXTRA_REGS
+       movq %rax,RAX(%rsp)
        jmp int_ret_from_sys_call
        CFI_ENDPROC
 END(stub_rt_sigreturn)
@@ -569,86 +588,70 @@ END(stub_rt_sigreturn)
 #ifdef CONFIG_X86_X32_ABI
 ENTRY(stub_x32_rt_sigreturn)
        CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
+       DEFAULT_FRAME 0, 8
+       SAVE_EXTRA_REGS 8
        call sys32_x32_rt_sigreturn
-       movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
-       RESTORE_REST
-       jmp int_ret_from_sys_call
+       jmp  return_from_stub
        CFI_ENDPROC
 END(stub_x32_rt_sigreturn)
+#endif
 
-ENTRY(stub_x32_execve)
-       CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
-       call compat_sys_execve
-       RESTORE_TOP_OF_STACK %r11
-       movq %rax,RAX(%rsp)
-       RESTORE_REST
-       jmp int_ret_from_sys_call
-       CFI_ENDPROC
-END(stub_x32_execve)
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
+ */
+ENTRY(ret_from_fork)
+       DEFAULT_FRAME
 
-ENTRY(stub_x32_execveat)
-       CFI_STARTPROC
-       addq $8, %rsp
-       PARTIAL_FRAME 0
-       SAVE_REST
-       FIXUP_TOP_OF_STACK %r11
-       call compat_sys_execveat
-       RESTORE_TOP_OF_STACK %r11
-       movq %rax,RAX(%rsp)
-       RESTORE_REST
+       LOCK ; btr $TIF_FORK,TI_flags(%r8)
+
+       pushq_cfi $0x0002
+       popfq_cfi                               # reset kernel eflags
+
+       call schedule_tail                      # rdi: 'prev' task parameter
+
+       RESTORE_EXTRA_REGS
+
+       testl $3,CS(%rsp)                       # from kernel_thread?
+
+       /*
+        * By the time we get here, we have no idea whether our pt_regs,
+        * ti flags, and ti status came from the 64-bit SYSCALL fast path,
+        * the slow path, or one of the ia32entry paths.
+        * Use IRET code path to return, since it can safely handle
+        * all of the above.
+        */
+       jnz     int_ret_from_sys_call
+
+       /* We came from kernel_thread */
+       /* nb: we depend on RESTORE_EXTRA_REGS above */
+       movq %rbp, %rdi
+       call *%rbx
+       movl $0, RAX(%rsp)
+       RESTORE_EXTRA_REGS
        jmp int_ret_from_sys_call
        CFI_ENDPROC
-END(stub_x32_execveat)
-
-#endif
+END(ret_from_fork)
 
 /*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
  */
-       .section .init.rodata,"a"
-ENTRY(interrupt)
-       .section .entry.text
-       .p2align 5
-       .p2align CONFIG_X86_L1_CACHE_SHIFT
+       .align 8
 ENTRY(irq_entries_start)
        INTR_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
-       .balign 32
-  .rept        7
-    .if vector < FIRST_SYSTEM_VECTOR
-      .if vector <> FIRST_EXTERNAL_VECTOR
+    vector=FIRST_EXTERNAL_VECTOR
+    .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+       pushq_cfi $(~vector+0x80)       /* Note: always in signed byte range */
+    vector=vector+1
+       jmp     common_interrupt
        CFI_ADJUST_CFA_OFFSET -8
-      .endif
-1:     pushq_cfi $(~vector+0x80)       /* Note: always in signed byte range */
-      .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
-       jmp 2f
-      .endif
-      .previous
-       .quad 1b
-      .section .entry.text
-vector=vector+1
-    .endif
-  .endr
-2:     jmp common_interrupt
-.endr
+       .align  8
+    .endr
        CFI_ENDPROC
 END(irq_entries_start)
 
-.previous
-END(interrupt)
-.previous
-
 /*
  * Interrupt entry/exit.
  *
@@ -659,47 +662,45 @@ END(interrupt)
 
 /* 0(%rsp): ~(interrupt number) */
        .macro interrupt func
-       /* reserve pt_regs for scratch regs and rbp */
-       subq $ORIG_RAX-RBP, %rsp
-       CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
        cld
-       /* start from rbp in pt_regs and jump over */
-       movq_cfi rdi, (RDI-RBP)
-       movq_cfi rsi, (RSI-RBP)
-       movq_cfi rdx, (RDX-RBP)
-       movq_cfi rcx, (RCX-RBP)
-       movq_cfi rax, (RAX-RBP)
-       movq_cfi  r8,  (R8-RBP)
-       movq_cfi  r9,  (R9-RBP)
-       movq_cfi r10, (R10-RBP)
-       movq_cfi r11, (R11-RBP)
-
-       /* Save rbp so that we can unwind from get_irq_regs() */
-       movq_cfi rbp, 0
-
-       /* Save previous stack value */
-       movq %rsp, %rsi
+       /*
+        * Since nothing in interrupt handling code touches r12...r15 members
+        * of "struct pt_regs", and since interrupts can nest, we can save
+        * four stack slots and simultaneously provide
+        * an unwind-friendly stack layout by saving "truncated" pt_regs
+        * exactly up to rbp slot, without these members.
+        */
+       ALLOC_PT_GPREGS_ON_STACK -RBP
+       SAVE_C_REGS -RBP
+       /* this goes to 0(%rsp) for unwinder, not for saving the value: */
+       SAVE_EXTRA_REGS_RBP -RBP
 
-       leaq -RBP(%rsp),%rdi    /* arg1 for handler */
-       testl $3, CS-RBP(%rsi)
+       leaq -RBP(%rsp),%rdi    /* arg1 for \func (pointer to pt_regs) */
+
+       testl $3, CS-RBP(%rsp)
        je 1f
        SWAPGS
+1:
        /*
+        * Save previous stack pointer, optionally switch to interrupt stack.
         * irq_count is used to check if a CPU is already on an interrupt stack
         * or not. While this is essentially redundant with preempt_count it is
         * a little cheaper to use a separate counter in the PDA (short of
         * moving irq_enter into assembly, which would be too much work)
         */
-1:     incl PER_CPU_VAR(irq_count)
+       movq %rsp, %rsi
+       incl PER_CPU_VAR(irq_count)
        cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
        CFI_DEF_CFA_REGISTER    rsi
-
-       /* Store previous stack value */
        pushq %rsi
+       /*
+        * For debugger:
+        * "CFA (Current Frame Address) is the value on stack + offset"
+        */
        CFI_ESCAPE      0x0f /* DW_CFA_def_cfa_expression */, 6, \
-                       0x77 /* DW_OP_breg7 */, 0, \
+                       0x77 /* DW_OP_breg7 (rsp) */, 0, \
                        0x06 /* DW_OP_deref */, \
-                       0x08 /* DW_OP_const1u */, SS+8-RBP, \
+                       0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
                        0x22 /* DW_OP_plus */
        /* We entered an interrupt context - irqs are off: */
        TRACE_IRQS_OFF
@@ -717,7 +718,7 @@ common_interrupt:
        ASM_CLAC
        addq $-0x80,(%rsp)              /* Adjust vector to [-256,-1] range */
        interrupt do_IRQ
-       /* 0(%rsp): old_rsp-ARGOFFSET */
+       /* 0(%rsp): old RSP */
 ret_from_intr:
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
@@ -725,19 +726,18 @@ ret_from_intr:
 
        /* Restore saved previous stack */
        popq %rsi
-       CFI_DEF_CFA rsi,SS+8-RBP        /* reg/off reset after def_cfa_expr */
-       leaq ARGOFFSET-RBP(%rsi), %rsp
+       CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
+       /* return code expects complete pt_regs - adjust rsp accordingly: */
+       leaq -RBP(%rsi),%rsp
        CFI_DEF_CFA_REGISTER    rsp
-       CFI_ADJUST_CFA_OFFSET   RBP-ARGOFFSET
+       CFI_ADJUST_CFA_OFFSET   RBP
 
-exit_intr:
-       GET_THREAD_INFO(%rcx)
-       testl $3,CS-ARGOFFSET(%rsp)
+       testl $3,CS(%rsp)
        je retint_kernel
-
        /* Interrupt came from user space */
+
+       GET_THREAD_INFO(%rcx)
        /*
-        * Has a correct top of stack, but a partial stack frame
         * %rcx: thread info. Interrupts off.
         */
 retint_with_reschedule:
@@ -756,70 +756,34 @@ retint_swapgs:            /* return to user-space */
        DISABLE_INTERRUPTS(CLBR_ANY)
        TRACE_IRQS_IRETQ
 
-       /*
-        * Try to use SYSRET instead of IRET if we're returning to
-        * a completely clean 64-bit userspace context.
-        */
-       movq (RCX-R11)(%rsp), %rcx
-       cmpq %rcx,(RIP-R11)(%rsp)               /* RCX == RIP */
-       jne opportunistic_sysret_failed
-
-       /*
-        * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
-        * in kernel space.  This essentially lets the user take over
-        * the kernel, since userspace controls RSP.  It's not worth
-        * testing for canonicalness exactly -- this check detects any
-        * of the 17 high bits set, which is true for non-canonical
-        * or kernel addresses.  (This will pessimize vsyscall=native.
-        * Big deal.)
-        *
-        * If virtual addresses ever become wider, this will need
-        * to be updated to remain correct on both old and new CPUs.
-        */
-       .ifne __VIRTUAL_MASK_SHIFT - 47
-       .error "virtual address width changed -- sysret checks need update"
-       .endif
-       shr $__VIRTUAL_MASK_SHIFT, %rcx
-       jnz opportunistic_sysret_failed
-
-       cmpq $__USER_CS,(CS-R11)(%rsp)          /* CS must match SYSRET */
-       jne opportunistic_sysret_failed
-
-       movq (R11-ARGOFFSET)(%rsp), %r11
-       cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp)      /* R11 == RFLAGS */
-       jne opportunistic_sysret_failed
-
-       testq $X86_EFLAGS_RF,%r11               /* sysret can't restore RF */
-       jnz opportunistic_sysret_failed
-
-       /* nothing to check for RSP */
-
-       cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp)    /* SS must match SYSRET */
-       jne opportunistic_sysret_failed
-
-       /*
-        * We win!  This label is here just for ease of understanding
-        * perf profiles.  Nothing jumps here.
-        */
-irq_return_via_sysret:
-       CFI_REMEMBER_STATE
-       RESTORE_ARGS 1,8,1
-       movq (RSP-RIP)(%rsp),%rsp
-       USERGS_SYSRET64
-       CFI_RESTORE_STATE
-
-opportunistic_sysret_failed:
        SWAPGS
-       jmp restore_args
+       jmp     restore_c_regs_and_iret
 
-retint_restore_args:   /* return to kernel space */
-       DISABLE_INTERRUPTS(CLBR_ANY)
+/* Returning to kernel space */
+retint_kernel:
+#ifdef CONFIG_PREEMPT
+       /* Interrupts are off */
+       /* Check if we need preemption */
+       bt      $9,EFLAGS(%rsp) /* interrupts were off? */
+       jnc     1f
+0:     cmpl    $0,PER_CPU_VAR(__preempt_count)
+       jnz     1f
+       call    preempt_schedule_irq
+       jmp     0b
+1:
+#endif
        /*
         * The iretq could re-enable interrupts:
         */
        TRACE_IRQS_IRETQ
-restore_args:
-       RESTORE_ARGS 1,8,1
+
+/*
+ * At this label, code paths which return to kernel and to user,
+ * which come from interrupts/exception and from syscalls, merge.
+ */
+restore_c_regs_and_iret:
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
 
 irq_return:
        INTERRUPT_RETURN
@@ -890,28 +854,17 @@ retint_signal:
        jz    retint_swapgs
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
-       SAVE_REST
+       SAVE_EXTRA_REGS
        movq $-1,ORIG_RAX(%rsp)
        xorl %esi,%esi          # oldset
        movq %rsp,%rdi          # &pt_regs
        call do_notify_resume
-       RESTORE_REST
+       RESTORE_EXTRA_REGS
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
        GET_THREAD_INFO(%rcx)
        jmp retint_with_reschedule
 
-#ifdef CONFIG_PREEMPT
-       /* Returning to kernel space. Check if we need preemption */
-       /* rcx:  threadinfo. interrupts off. */
-ENTRY(retint_kernel)
-       cmpl $0,PER_CPU_VAR(__preempt_count)
-       jnz  retint_restore_args
-       bt   $9,EFLAGS-ARGOFFSET(%rsp)  /* interrupts off? */
-       jnc  retint_restore_args
-       call preempt_schedule_irq
-       jmp exit_intr
-#endif
        CFI_ENDPROC
 END(common_interrupt)
 
@@ -1000,7 +953,7 @@ apicinterrupt IRQ_WORK_VECTOR \
 /*
  * Exception entry points.
  */
-#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
 
 .macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
 ENTRY(\sym)
@@ -1022,8 +975,7 @@ ENTRY(\sym)
        pushq_cfi $-1                   /* ORIG_RAX: no syscall to restart */
        .endif
 
-       subq $ORIG_RAX-R15, %rsp
-       CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+       ALLOC_PT_GPREGS_ON_STACK
 
        .if \paranoid
        .if \paranoid == 1
@@ -1031,10 +983,11 @@ ENTRY(\sym)
        testl $3, CS(%rsp)              /* If coming from userspace, switch */
        jnz 1f                          /* stacks. */
        .endif
-       call save_paranoid
+       call paranoid_entry
        .else
        call error_entry
        .endif
+       /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
 
        DEFAULT_FRAME 0
 
@@ -1056,19 +1009,20 @@ ENTRY(\sym)
        .endif
 
        .if \shift_ist != -1
-       subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+       subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
        .endif
 
        call \do_sym
 
        .if \shift_ist != -1
-       addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+       addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
        .endif
 
+       /* these procedures expect "no swapgs" flag in ebx */
        .if \paranoid
-       jmp paranoid_exit               /* %ebx: no swapgs flag */
+       jmp paranoid_exit
        .else
-       jmp error_exit                  /* %ebx: no swapgs flag */
+       jmp error_exit
        .endif
 
        .if \paranoid == 1
@@ -1272,7 +1226,9 @@ ENTRY(xen_failsafe_callback)
        addq $0x30,%rsp
        CFI_ADJUST_CFA_OFFSET -0x30
        pushq_cfi $-1 /* orig_ax = -1 => not a system call */
-       SAVE_ALL
+       ALLOC_PT_GPREGS_ON_STACK
+       SAVE_C_REGS
+       SAVE_EXTRA_REGS
        jmp error_exit
        CFI_ENDPROC
 END(xen_failsafe_callback)
@@ -1304,59 +1260,66 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
 idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
 #endif
 
-       /*
-        * "Paranoid" exit path from exception stack.  This is invoked
-        * only on return from non-NMI IST interrupts that came
-        * from kernel space.
-        *
-        * We may be returning to very strange contexts (e.g. very early
-        * in syscall entry), so checking for preemption here would
-        * be complicated.  Fortunately, we there's no good reason
-        * to try to handle preemption here.
-        */
+/*
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Use slow, but surefire "are we in kernel?" check.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ */
+ENTRY(paranoid_entry)
+       XCPT_FRAME 1 15*8
+       cld
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
+       movl $1,%ebx
+       movl $MSR_GS_BASE,%ecx
+       rdmsr
+       testl %edx,%edx
+       js 1f   /* negative -> in kernel */
+       SWAPGS
+       xorl %ebx,%ebx
+1:     ret
+       CFI_ENDPROC
+END(paranoid_entry)
 
-       /* ebx: no swapgs flag */
+/*
+ * "Paranoid" exit path from exception stack.  This is invoked
+ * only on return from non-NMI IST interrupts that came
+ * from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated.  Fortunately, we there's no good reason
+ * to try to handle preemption here.
+ */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(paranoid_exit)
        DEFAULT_FRAME
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF_DEBUG
        testl %ebx,%ebx                         /* swapgs needed? */
-       jnz paranoid_restore
-       TRACE_IRQS_IRETQ 0
+       jnz paranoid_exit_no_swapgs
+       TRACE_IRQS_IRETQ
        SWAPGS_UNSAFE_STACK
-       RESTORE_ALL 8
-       INTERRUPT_RETURN
-paranoid_restore:
-       TRACE_IRQS_IRETQ_DEBUG 0
-       RESTORE_ALL 8
+       jmp paranoid_exit_restore
+paranoid_exit_no_swapgs:
+       TRACE_IRQS_IRETQ_DEBUG
+paranoid_exit_restore:
+       RESTORE_EXTRA_REGS
+       RESTORE_C_REGS
+       REMOVE_PT_GPREGS_FROM_STACK 8
        INTERRUPT_RETURN
        CFI_ENDPROC
 END(paranoid_exit)
 
 /*
- * Exception entry point. This expects an error code/orig_rax on the stack.
- * returns in "no swapgs flag" in %ebx.
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
  */
 ENTRY(error_entry)
-       XCPT_FRAME
-       CFI_ADJUST_CFA_OFFSET 15*8
-       /* oldrax contains error code */
+       XCPT_FRAME 1 15*8
        cld
-       movq %rdi, RDI+8(%rsp)
-       movq %rsi, RSI+8(%rsp)
-       movq %rdx, RDX+8(%rsp)
-       movq %rcx, RCX+8(%rsp)
-       movq %rax, RAX+8(%rsp)
-       movq  %r8,  R8+8(%rsp)
-       movq  %r9,  R9+8(%rsp)
-       movq %r10, R10+8(%rsp)
-       movq %r11, R11+8(%rsp)
-       movq_cfi rbx, RBX+8
-       movq %rbp, RBP+8(%rsp)
-       movq %r12, R12+8(%rsp)
-       movq %r13, R13+8(%rsp)
-       movq %r14, R14+8(%rsp)
-       movq %r15, R15+8(%rsp)
+       SAVE_C_REGS 8
+       SAVE_EXTRA_REGS 8
        xorl %ebx,%ebx
        testl $3,CS+8(%rsp)
        je error_kernelspace
@@ -1366,12 +1329,12 @@ error_sti:
        TRACE_IRQS_OFF
        ret
 
-/*
- * There are two places in the kernel that can potentially fault with
- * usergs. Handle them here.  B stepping K8s sometimes report a
- * truncated RIP for IRET exceptions returning to compat mode. Check
- * for these here too.
- */
+       /*
       * There are two places in the kernel that can potentially fault with
       * usergs. Handle them here.  B stepping K8s sometimes report a
       * truncated RIP for IRET exceptions returning to compat mode. Check
       * for these here too.
       */
 error_kernelspace:
        CFI_REL_OFFSET rcx, RCX+8
        incl %ebx
@@ -1401,11 +1364,11 @@ error_bad_iret:
 END(error_entry)
 
 
-/* ebx:        no swapgs flag (1: don't need swapgs, 0: need it) */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
 ENTRY(error_exit)
        DEFAULT_FRAME
        movl %ebx,%eax
-       RESTORE_REST
+       RESTORE_EXTRA_REGS
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
        GET_THREAD_INFO(%rcx)
@@ -1420,19 +1383,7 @@ ENTRY(error_exit)
        CFI_ENDPROC
 END(error_exit)
 
-/*
- * Test if a given stack is an NMI stack or not.
- */
-       .macro test_in_nmi reg stack nmi_ret normal_ret
-       cmpq %\reg, \stack
-       ja \normal_ret
-       subq $EXCEPTION_STKSZ, %\reg
-       cmpq %\reg, \stack
-       jb \normal_ret
-       jmp \nmi_ret
-       .endm
-
-       /* runs on exception stack */
+/* Runs on exception stack */
 ENTRY(nmi)
        INTR_FRAME
        PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1468,7 +1419,7 @@ ENTRY(nmi)
         * NMI.
         */
 
-       /* Use %rdx as out temp variable throughout */
+       /* Use %rdx as our temp variable throughout */
        pushq_cfi %rdx
        CFI_REL_OFFSET rdx, 0
 
@@ -1493,8 +1444,17 @@ ENTRY(nmi)
         * We check the variable because the first NMI could be in a
         * breakpoint routine using a breakpoint stack.
         */
-       lea 6*8(%rsp), %rdx
-       test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+       lea     6*8(%rsp), %rdx
+       /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
+       cmpq    %rdx, 4*8(%rsp)
+       /* If the stack pointer is above the NMI stack, this is a normal NMI */
+       ja      first_nmi
+       subq    $EXCEPTION_STKSZ, %rdx
+       cmpq    %rdx, 4*8(%rsp)
+       /* If it is below the NMI stack, it is a normal NMI */
+       jb      first_nmi
+       /* Ah, it is within the NMI stack, treat it as nested */
+
        CFI_REMEMBER_STATE
 
 nested_nmi:
@@ -1587,7 +1547,7 @@ first_nmi:
        .rept 5
        pushq_cfi 11*8(%rsp)
        .endr
-       CFI_DEF_CFA_OFFSET SS+8-RIP
+       CFI_DEF_CFA_OFFSET 5*8
 
        /* Everything up to here is safe from nested NMIs */
 
@@ -1615,7 +1575,7 @@ repeat_nmi:
        pushq_cfi -6*8(%rsp)
        .endr
        subq $(5*8), %rsp
-       CFI_DEF_CFA_OFFSET SS+8-RIP
+       CFI_DEF_CFA_OFFSET 5*8
 end_repeat_nmi:
 
        /*
@@ -1624,16 +1584,16 @@ end_repeat_nmi:
         * so that we repeat another NMI.
         */
        pushq_cfi $-1           /* ORIG_RAX: no syscall to restart */
-       subq $ORIG_RAX-R15, %rsp
-       CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+       ALLOC_PT_GPREGS_ON_STACK
+
        /*
-        * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+        * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
         * as we should not be calling schedule in NMI context.
         * Even with normal interrupts enabled. An NMI should not be
         * setting NEED_RESCHED or anything that normal interrupts and
         * exceptions might do.
         */
-       call save_paranoid
+       call paranoid_entry
        DEFAULT_FRAME 0
 
        /*
@@ -1664,8 +1624,10 @@ end_repeat_nmi:
 nmi_swapgs:
        SWAPGS_UNSAFE_STACK
 nmi_restore:
+       RESTORE_EXTRA_REGS
+       RESTORE_C_REGS
        /* Pop the extra iret frame at once */
-       RESTORE_ALL 6*8
+       REMOVE_PT_GPREGS_FROM_STACK 6*8
 
        /* Clear the NMI executing stack variable */
        movq $0, 5*8(%rsp)
index c4f8d46..2b55ee6 100644 (file)
@@ -177,9 +177,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
         */
        load_ucode_bsp();
 
-       if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
-               early_printk("Kernel alive\n");
-
        clear_page(init_level4_pgt);
        /* set init_level4_pgt kernel high mapping*/
        init_level4_pgt[511] = early_level4_pgt[511];
index f36bd42..d031bad 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/cpufeature.h>
 #include <asm/percpu.h>
 #include <asm/nops.h>
+#include <asm/bootparam.h>
 
 /* Physical address */
 #define pa(X) ((X) - __PAGE_OFFSET)
@@ -90,7 +91,7 @@ ENTRY(startup_32)
        
        /* test KEEP_SEGMENTS flag to see if the bootloader is asking
                us to not reload segments */
-       testb $(1<<6), BP_loadflags(%esi)
+       testb $KEEP_SEGMENTS, BP_loadflags(%esi)
        jnz 2f
 
 /*
index 6fd514d..ae6588b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- *  linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
+ *  linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
  *
  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
  *  Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
@@ -56,7 +56,7 @@ startup_64:
         * %rsi holds a physical pointer to real_mode_data.
         *
         * We come here either directly from a 64bit bootloader, or from
-        * arch/x86_64/boot/compressed/head.S.
+        * arch/x86/boot/compressed/head_64.S.
         *
         * We only come here initially at boot nothing else comes here.
         *
@@ -146,7 +146,7 @@ startup_64:
        leaq    level2_kernel_pgt(%rip), %rdi
        leaq    4096(%rdi), %r8
        /* See if it is a valid page table entry */
-1:     testq   $1, 0(%rdi)
+1:     testb   $1, 0(%rdi)
        jz      2f
        addq    %rbp, 0(%rdi)
        /* Go to the next page */
index d5651fc..29c740d 100644 (file)
@@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
 static inline bool interrupted_user_mode(void)
 {
        struct pt_regs *regs = get_irq_regs();
-       return regs && user_mode_vm(regs);
+       return regs && user_mode(regs);
 }
 
 /*
index 4ddaf66..37dae79 100644 (file)
@@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
         * because the ->io_bitmap_max value must match the bitmap
         * contents:
         */
-       tss = &per_cpu(init_tss, get_cpu());
+       tss = &per_cpu(cpu_tss, get_cpu());
 
        if (turn_on)
                bitmap_clear(t->io_bitmap_ptr, from, num);
index 67b1cbe..e5952c2 100644 (file)
@@ -295,7 +295,7 @@ int check_irq_vectors_for_cpu_disable(void)
 
        this_cpu = smp_processor_id();
        cpumask_copy(&online_new, cpu_online_mask);
-       cpu_clear(this_cpu, online_new);
+       cpumask_clear_cpu(this_cpu, &online_new);
 
        this_count = 0;
        for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
@@ -307,7 +307,7 @@ int check_irq_vectors_for_cpu_disable(void)
 
                        data = irq_desc_get_irq_data(desc);
                        cpumask_copy(&affinity_new, data->affinity);
-                       cpu_clear(this_cpu, affinity_new);
+                       cpumask_clear_cpu(this_cpu, &affinity_new);
 
                        /* Do not count inactive or per-cpu irqs. */
                        if (!irq_has_action(irq) || irqd_is_per_cpu(data))
index 28d28f5..f9fd86a 100644 (file)
@@ -165,7 +165,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
        if (unlikely(!desc))
                return false;
 
-       if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
+       if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
                if (unlikely(overflow))
                        print_stack_overflow();
                desc->handle_irq(irq, desc);
index e4b503d..394e643 100644 (file)
@@ -44,7 +44,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
        u64 estack_top, estack_bottom;
        u64 curbase = (u64)task_stack_page(current);
 
-       if (user_mode_vm(regs))
+       if (user_mode(regs))
                return;
 
        if (regs->sp >= curbase + sizeof(struct thread_info) +
index 70e181e..cd10a64 100644 (file)
@@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
 #endif
        for_each_clear_bit_from(i, used_vectors, first_system_vector) {
                /* IA32_SYSCALL_VECTOR could be used in trap_init already. */
-               set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+               set_intr_gate(i, irq_entries_start +
+                               8 * (i - FIRST_EXTERNAL_VECTOR));
        }
 #ifdef CONFIG_X86_LOCAL_APIC
        for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
index 7ec1d5f..d6178d9 100644 (file)
@@ -72,7 +72,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
        { "bx", 8, offsetof(struct pt_regs, bx) },
        { "cx", 8, offsetof(struct pt_regs, cx) },
        { "dx", 8, offsetof(struct pt_regs, dx) },
-       { "si", 8, offsetof(struct pt_regs, dx) },
+       { "si", 8, offsetof(struct pt_regs, si) },
        { "di", 8, offsetof(struct pt_regs, di) },
        { "bp", 8, offsetof(struct pt_regs, bp) },
        { "sp", 8, offsetof(struct pt_regs, sp) },
@@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
 #ifdef CONFIG_X86_32
        switch (regno) {
        case GDB_SS:
-               if (!user_mode_vm(regs))
+               if (!user_mode(regs))
                        *(unsigned long *)mem = __KERNEL_DS;
                break;
        case GDB_SP:
-               if (!user_mode_vm(regs))
+               if (!user_mode(regs))
                        *(unsigned long *)mem = kernel_stack_pointer(regs);
                break;
        case GDB_GS:
index 4e3d5a9..24d0796 100644 (file)
@@ -602,7 +602,7 @@ int kprobe_int3_handler(struct pt_regs *regs)
        struct kprobe *p;
        struct kprobe_ctlblk *kcb;
 
-       if (user_mode_vm(regs))
+       if (user_mode(regs))
                return 0;
 
        addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
@@ -1007,7 +1007,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
        struct die_args *args = data;
        int ret = NOTIFY_DONE;
 
-       if (args->regs && user_mode_vm(args->regs))
+       if (args->regs && user_mode(args->regs))
                return ret;
 
        if (val == DIE_GPF) {
index 9bbb9b3..005c03e 100644 (file)
@@ -33,6 +33,7 @@
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
+#include <asm/setup.h>
 
 #if 0
 #define DEBUGP(fmt, ...)                               \
@@ -53,7 +54,7 @@ static DEFINE_MUTEX(module_kaslr_mutex);
 
 static unsigned long int get_module_load_offset(void)
 {
-       if (kaslr_enabled) {
+       if (kaslr_enabled()) {
                mutex_lock(&module_kaslr_mutex);
                /*
                 * Calculate the module_load_offset the first time this
index 781861c..da8cb98 100644 (file)
@@ -131,10 +131,11 @@ void perf_get_regs_user(struct perf_regs *regs_user,
        }
 
        /*
-        * RIP, flags, and the argument registers are usually saved.
-        * orig_ax is probably okay, too.
+        * These registers are always saved on 64-bit syscall entry.
+        * On 32-bit entry points, they are saved too except r8..r11.
         */
        regs_user_copy->ip = user_regs->ip;
+       regs_user_copy->ax = user_regs->ax;
        regs_user_copy->cx = user_regs->cx;
        regs_user_copy->dx = user_regs->dx;
        regs_user_copy->si = user_regs->si;
@@ -145,9 +146,12 @@ void perf_get_regs_user(struct perf_regs *regs_user,
        regs_user_copy->r11 = user_regs->r11;
        regs_user_copy->orig_ax = user_regs->orig_ax;
        regs_user_copy->flags = user_regs->flags;
+       regs_user_copy->sp = user_regs->sp;
+       regs_user_copy->cs = user_regs->cs;
+       regs_user_copy->ss = user_regs->ss;
 
        /*
-        * Don't even try to report the "rest" regs.
+        * Most system calls don't save these registers, don't report them.
         */
        regs_user_copy->bx = -1;
        regs_user_copy->bp = -1;
@@ -158,37 +162,13 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 
        /*
         * For this to be at all useful, we need a reasonable guess for
-        * sp and the ABI.  Be careful: we're in NMI context, and we're
+        * the ABI.  Be careful: we're in NMI context, and we're
         * considering current to be the current task, so we should
         * be careful not to look at any other percpu variables that might
         * change during context switches.
         */
-       if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
-           task_thread_info(current)->status & TS_COMPAT) {
-               /* Easy case: we're in a compat syscall. */
-               regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
-               regs_user_copy->sp = user_regs->sp;
-               regs_user_copy->cs = user_regs->cs;
-               regs_user_copy->ss = user_regs->ss;
-       } else if (user_regs->orig_ax != -1) {
-               /*
-                * We're probably in a 64-bit syscall.
-                * Warning: this code is severely racy.  At least it's better
-                * than just blindly copying user_regs.
-                */
-               regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
-               regs_user_copy->sp = this_cpu_read(old_rsp);
-               regs_user_copy->cs = __USER_CS;
-               regs_user_copy->ss = __USER_DS;
-               regs_user_copy->cx = -1;  /* usually contains garbage */
-       } else {
-               /* We're probably in an interrupt or exception. */
-               regs_user->abi = user_64bit_mode(user_regs) ?
-                       PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
-               regs_user_copy->sp = user_regs->sp;
-               regs_user_copy->cs = user_regs->cs;
-               regs_user_copy->ss = user_regs->ss;
-       }
+       regs_user->abi = user_64bit_mode(user_regs) ?
+               PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
 
        regs_user->regs = regs_user_copy;
 }
index 046e2d6..0c8992d 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/pm.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/random.h>
 #include <linux/user-return-notifier.h>
 #include <linux/dmi.h>
@@ -24,6 +24,7 @@
 #include <asm/syscalls.h>
 #include <asm/idle.h>
 #include <asm/uaccess.h>
+#include <asm/mwait.h>
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
 #include <asm/debugreg.h>
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+       .x86_tss = {
+               .sp0 = TOP_OF_INIT_STACK,
+#ifdef CONFIG_X86_32
+               .ss0 = __KERNEL_DS,
+               .ss1 = __KERNEL_CS,
+               .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+#endif
+        },
+#ifdef CONFIG_X86_32
+        /*
+         * Note that the .io_bitmap member must be extra-big. This is because
+         * the CPU will access an additional byte beyond the end of the IO
+         * permission bitmap. The extra byte must be all 1 bits, and must
+         * be within the limit.
+         */
+       .io_bitmap              = { [0 ... IO_BITMAP_LONGS] = ~0 },
+#endif
+};
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
 
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -109,7 +129,7 @@ void exit_thread(void)
        unsigned long *bp = t->io_bitmap_ptr;
 
        if (bp) {
-               struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+               struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
 
                t->io_bitmap_ptr = NULL;
                clear_thread_flag(TIF_IO_BITMAP);
@@ -377,14 +397,11 @@ static void amd_e400_idle(void)
 
                if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
                        cpumask_set_cpu(cpu, amd_e400_c1e_mask);
-                       /*
-                        * Force broadcast so ACPI can not interfere.
-                        */
-                       clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
-                                          &cpu);
+                       /* Force broadcast so ACPI can not interfere. */
+                       tick_broadcast_force();
                        pr_info("Switch to broadcast mode on CPU%d\n", cpu);
                }
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+               tick_broadcast_enter();
 
                default_idle();
 
@@ -393,12 +410,59 @@ static void amd_e400_idle(void)
                 * called with interrupts disabled.
                 */
                local_irq_disable();
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+               tick_broadcast_exit();
                local_irq_enable();
        } else
                default_idle();
 }
 
+/*
+ * Intel Core2 and older machines prefer MWAIT over HALT for C1.
+ * We can't rely on cpuidle installing MWAIT, because it will not load
+ * on systems that support only C1 -- so the boot default must be MWAIT.
+ *
+ * Some AMD machines are the opposite, they depend on using HALT.
+ *
+ * So for default C1, which is used during boot until cpuidle loads,
+ * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ */
+static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+{
+       if (c->x86_vendor != X86_VENDOR_INTEL)
+               return 0;
+
+       if (!cpu_has(c, X86_FEATURE_MWAIT))
+               return 0;
+
+       return 1;
+}
+
+/*
+ * MONITOR/MWAIT with no hints, used for default default C1 state.
+ * This invokes MWAIT with interrutps enabled and no flags,
+ * which is backwards compatible with the original MWAIT implementation.
+ */
+
+static void mwait_idle(void)
+{
+       if (!current_set_polling_and_test()) {
+               if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
+                       smp_mb(); /* quirk */
+                       clflush((void *)&current_thread_info()->flags);
+                       smp_mb(); /* quirk */
+               }
+
+               __monitor((void *)&current_thread_info()->flags, 0, 0);
+               if (!need_resched())
+                       __sti_mwait(0, 0);
+               else
+                       local_irq_enable();
+       } else {
+               local_irq_enable();
+       }
+       __current_clr_polling();
+}
+
 void select_idle_routine(const struct cpuinfo_x86 *c)
 {
 #ifdef CONFIG_SMP
@@ -412,6 +476,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
                /* E400: APIC timer interrupt does not wake up CPU from C1e */
                pr_info("using AMD E400 aware idle routine\n");
                x86_idle = amd_e400_idle;
+       } else if (prefer_mwait_c1_over_halt(c)) {
+               pr_info("using mwait in idle threads\n");
+               x86_idle = mwait_idle;
        } else
                x86_idle = default_idle;
 }
index 603c4f9..8ed2106 100644 (file)
@@ -73,7 +73,7 @@ void __show_regs(struct pt_regs *regs, int all)
        unsigned long sp;
        unsigned short ss, gs;
 
-       if (user_mode_vm(regs)) {
+       if (user_mode(regs)) {
                sp = regs->sp;
                ss = regs->ss & 0xffff;
                gs = get_user_gs(regs);
@@ -206,11 +206,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
        regs->ip                = new_ip;
        regs->sp                = new_sp;
        regs->flags             = X86_EFLAGS_IF;
-       /*
-        * force it to the iret return path by making it look as if there was
-        * some work pending.
-        */
-       set_thread_flag(TIF_NOTIFY_RESUME);
+       force_iret();
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
@@ -248,18 +244,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct thread_struct *prev = &prev_p->thread,
                                 *next = &next_p->thread;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
        fpu_switch_t fpu;
 
        /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
        fpu = switch_fpu_prepare(prev_p, next_p, cpu);
 
-       /*
-        * Reload esp0.
-        */
-       load_sp0(tss, next);
-
        /*
         * Save away %gs. No need to save %fs, as it was saved on the
         * stack on entry.  No need to save %es and %ds, as those are
@@ -310,9 +301,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
         */
        arch_end_context_switch(next_p);
 
+       /*
+        * Reload esp0, kernel_stack, and current_top_of_stack.  This changes
+        * current_thread_info().
+        */
+       load_sp0(tss, next);
        this_cpu_write(kernel_stack,
-                 (unsigned long)task_stack_page(next_p) +
-                 THREAD_SIZE - KERNEL_STACK_OFFSET);
+                      (unsigned long)task_stack_page(next_p) +
+                      THREAD_SIZE);
+       this_cpu_write(cpu_current_top_of_stack,
+                      (unsigned long)task_stack_page(next_p) +
+                      THREAD_SIZE);
 
        /*
         * Restore %gs if needed (which is common)
index 67fcc43..4baaa97 100644 (file)
@@ -52,7 +52,7 @@
 
 asmlinkage extern void ret_from_fork(void);
 
-__visible DEFINE_PER_CPU(unsigned long, old_rsp);
+__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
 
 /* Prints also some state that isn't saved in the pt_regs */
 void __show_regs(struct pt_regs *regs, int all)
@@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
        p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
        childregs = task_pt_regs(p);
        p->thread.sp = (unsigned long) childregs;
-       p->thread.usersp = me->thread.usersp;
        set_tsk_thread_flag(p, TIF_FORK);
        p->thread.io_bitmap_ptr = NULL;
 
@@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
         */
        if (clone_flags & CLONE_SETTLS) {
 #ifdef CONFIG_IA32_EMULATION
-               if (test_thread_flag(TIF_IA32))
+               if (is_ia32_task())
                        err = do_set_thread_area(p, -1,
                                (struct user_desc __user *)childregs->si, 0);
                else
@@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
        loadsegment(es, _ds);
        loadsegment(ds, _ds);
        load_gs_index(0);
-       current->thread.usersp  = new_sp;
        regs->ip                = new_ip;
        regs->sp                = new_sp;
-       this_cpu_write(old_rsp, new_sp);
        regs->cs                = _cs;
        regs->ss                = _ss;
        regs->flags             = X86_EFLAGS_IF;
+       force_iret();
 }
 
 void
@@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        struct thread_struct *prev = &prev_p->thread;
        struct thread_struct *next = &next_p->thread;
        int cpu = smp_processor_id();
-       struct tss_struct *tss = &per_cpu(init_tss, cpu);
+       struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
        unsigned fsindex, gsindex;
        fpu_switch_t fpu;
 
        fpu = switch_fpu_prepare(prev_p, next_p, cpu);
 
-       /* Reload esp0 and ss1. */
-       load_sp0(tss, next);
-
        /* We must save %fs and %gs before load_TLS() because
         * %fs and %gs may be cleared by load_TLS().
         *
@@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        /*
         * Switch the PDA and FPU contexts.
         */
-       prev->usersp = this_cpu_read(old_rsp);
-       this_cpu_write(old_rsp, next->usersp);
        this_cpu_write(current_task, next_p);
 
        /*
@@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
        this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
 
+       /* Reload esp0 and ss1.  This changes current_thread_info(). */
+       load_sp0(tss, next);
+
        this_cpu_write(kernel_stack,
-                 (unsigned long)task_stack_page(next_p) +
-                 THREAD_SIZE - KERNEL_STACK_OFFSET);
+               (unsigned long)task_stack_page(next_p) + THREAD_SIZE);
 
        /*
         * Now maybe reload the debug registers and handle I/O bitmaps
@@ -602,6 +597,5 @@ long sys_arch_prctl(int code, unsigned long addr)
 
 unsigned long KSTK_ESP(struct task_struct *task)
 {
-       return (test_tsk_thread_flag(task, TIF_IA32)) ?
-                       (task_pt_regs(task)->sp) : ((task)->thread.usersp);
+       return task_pt_regs(task)->sp;
 }
index e510618..a7bc794 100644 (file)
@@ -364,18 +364,12 @@ static int set_segment_reg(struct task_struct *task,
        case offsetof(struct user_regs_struct,cs):
                if (unlikely(value == 0))
                        return -EIO;
-#ifdef CONFIG_IA32_EMULATION
-               if (test_tsk_thread_flag(task, TIF_IA32))
-                       task_pt_regs(task)->cs = value;
-#endif
+               task_pt_regs(task)->cs = value;
                break;
        case offsetof(struct user_regs_struct,ss):
                if (unlikely(value == 0))
                        return -EIO;
-#ifdef CONFIG_IA32_EMULATION
-               if (test_tsk_thread_flag(task, TIF_IA32))
-                       task_pt_regs(task)->ss = value;
-#endif
+               task_pt_regs(task)->ss = value;
                break;
        }
 
@@ -1421,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk,
        memset(info, 0, sizeof(*info));
        info->si_signo = SIGTRAP;
        info->si_code = si_code;
-       info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
+       info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
 }
 
 void user_single_step_siginfo(struct task_struct *tsk,
index 2f355d2..e5ecd20 100644 (file)
@@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
        set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
 }
 
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+       if (!pvclock_vdso_info) {
+               BUG();
+               return NULL;
+       }
+
+       return &pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+       return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
+}
+
 #ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+                               void *v)
+{
+       struct task_migration_notifier *mn = v;
+       struct pvclock_vsyscall_time_info *pvti;
+
+       pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
+
+       /* this is NULL when pvclock vsyscall is not initialized */
+       if (unlikely(pvti == NULL))
+               return NOTIFY_DONE;
+
+       pvti->migrate_count++;
+
+       return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+       .notifier_call = pvclock_task_migrate,
+};
+
 /*
  * Initialize the generic pvclock vsyscall state.  This will allocate
  * a/some page(s) for the per-vcpu pvclock information, set up a
@@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
 
        WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
 
+       pvclock_vdso_info = i;
+
        for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
                __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
                             __pa(i) + (idx*PAGE_SIZE),
                             PAGE_KERNEL_VVAR);
        }
 
+
+       register_task_migration_notifier(&pvclock_migrate);
+
        return 0;
 }
 #endif
index bae6c60..86db4bc 100644 (file)
@@ -183,6 +183,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
                },
        },
 
+       /* ASRock */
+       {       /* Handle problems with rebooting on ASRock Q1900DC-ITX */
+               .callback = set_pci_reboot,
+               .ident = "ASRock Q1900DC-ITX",
+               .matches = {
+                       DMI_MATCH(DMI_BOARD_VENDOR, "ASRock"),
+                       DMI_MATCH(DMI_BOARD_NAME, "Q1900DC-ITX"),
+               },
+       },
+
        /* ASUS */
        {       /* Handle problems with rebooting on ASUS P4S800 */
                .callback = set_bios_reboot,
index e13f8e7..77630d5 100644 (file)
@@ -226,23 +226,23 @@ swap_pages:
        movl    (%ebx), %ecx
        addl    $4, %ebx
 1:
-       testl   $0x1,   %ecx  /* is it a destination page */
+       testb   $0x1, %cl     /* is it a destination page */
        jz      2f
        movl    %ecx,   %edi
        andl    $0xfffff000, %edi
        jmp     0b
 2:
-       testl   $0x2,   %ecx  /* is it an indirection page */
+       testb   $0x2, %cl    /* is it an indirection page */
        jz      2f
        movl    %ecx,   %ebx
        andl    $0xfffff000, %ebx
        jmp     0b
 2:
-       testl   $0x4,   %ecx /* is it the done indicator */
+       testb   $0x4, %cl    /* is it the done indicator */
        jz      2f
        jmp     3f
 2:
-       testl   $0x8,   %ecx /* is it the source indicator */
+       testb   $0x8, %cl    /* is it the source indicator */
        jz      0b           /* Ignore it otherwise */
        movl    %ecx,   %esi /* For every source page do a copy */
        andl    $0xfffff000, %esi
index 3fd2c69..98111b3 100644 (file)
@@ -123,7 +123,7 @@ identity_mapped:
         * Set cr4 to a known state:
         *  - physical address extension enabled
         */
-       movq    $X86_CR4_PAE, %rax
+       movl    $X86_CR4_PAE, %eax
        movq    %rax, %cr4
 
        jmp 1f
@@ -221,23 +221,23 @@ swap_pages:
        movq    (%rbx), %rcx
        addq    $8,     %rbx
 1:
-       testq   $0x1,   %rcx  /* is it a destination page? */
+       testb   $0x1,   %cl   /* is it a destination page? */
        jz      2f
        movq    %rcx,   %rdi
        andq    $0xfffffffffffff000, %rdi
        jmp     0b
 2:
-       testq   $0x2,   %rcx  /* is it an indirection page? */
+       testb   $0x2,   %cl   /* is it an indirection page? */
        jz      2f
        movq    %rcx,   %rbx
        andq    $0xfffffffffffff000, %rbx
        jmp     0b
 2:
-       testq   $0x4,   %rcx  /* is it the done indicator? */
+       testb   $0x4,   %cl   /* is it the done indicator? */
        jz      2f
        jmp     3f
 2:
-       testq   $0x8,   %rcx  /* is it the source indicator? */
+       testb   $0x8,   %cl   /* is it the source indicator? */
        jz      0b            /* Ignore it otherwise */
        movq    %rcx,   %rsi  /* For ever source page do a copy */
        andq    $0xfffffffffffff000, %rsi
@@ -246,17 +246,17 @@ swap_pages:
        movq    %rsi, %rax
 
        movq    %r10, %rdi
-       movq    $512,   %rcx
+       movl    $512, %ecx
        rep ; movsq
 
        movq    %rax, %rdi
        movq    %rdx, %rsi
-       movq    $512,   %rcx
+       movl    $512, %ecx
        rep ; movsq
 
        movq    %rdx, %rdi
        movq    %r10, %rsi
-       movq    $512,   %rcx
+       movl    $512, %ecx
        rep ; movsq
 
        lea     PAGE_SIZE(%rax), %rsi
index 98dc931..014466b 100644 (file)
 unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
-bool __read_mostly kaslr_enabled = false;
-
 #ifdef CONFIG_DMI
 RESERVE_BRK(dmi_alloc, 65536);
 #endif
@@ -427,11 +425,6 @@ static void __init reserve_initrd(void)
 }
 #endif /* CONFIG_BLK_DEV_INITRD */
 
-static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
-{
-       kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
-}
-
 static void __init parse_setup_data(void)
 {
        struct setup_data *data;
@@ -457,9 +450,6 @@ static void __init parse_setup_data(void)
                case SETUP_EFI:
                        parse_efi_setup(pa_data, data_len);
                        break;
-               case SETUP_KASLR:
-                       parse_kaslr_setup(pa_data, data_len);
-                       break;
                default:
                        break;
                }
@@ -842,14 +832,15 @@ static void __init trim_low_memory_range(void)
 static int
 dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
 {
-       if (kaslr_enabled)
+       if (kaslr_enabled()) {
                pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
                         (unsigned long)&_text - __START_KERNEL,
                         __START_KERNEL,
                         __START_KERNEL_map,
                         MODULES_VADDR-1);
-       else
+       } else {
                pr_emerg("Kernel Offset: disabled\n");
+       }
 
        return 0;
 }
index e504246..53cc408 100644 (file)
@@ -61,8 +61,7 @@
        regs->seg = GET_SEG(seg) | 3;                   \
 } while (0)
 
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
-                      unsigned long *pax)
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
 {
        void __user *buf;
        unsigned int tmpflags;
@@ -81,7 +80,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
 #endif /* CONFIG_X86_32 */
 
                COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
-               COPY(dx); COPY(cx); COPY(ip);
+               COPY(dx); COPY(cx); COPY(ip); COPY(ax);
 
 #ifdef CONFIG_X86_64
                COPY(r8);
@@ -94,27 +93,20 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
                COPY(r15);
 #endif /* CONFIG_X86_64 */
 
-#ifdef CONFIG_X86_32
                COPY_SEG_CPL3(cs);
                COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-               /* Kernel saves and restores only the CS segment register on signals,
-                * which is the bare minimum needed to allow mixed 32/64-bit code.
-                * App's signal handler can save/restore other segments if needed. */
-               COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
 
                get_user_ex(tmpflags, &sc->flags);
                regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
                regs->orig_ax = -1;             /* disable syscall checks */
 
                get_user_ex(buf, &sc->fpstate);
-
-               get_user_ex(*pax, &sc->ax);
        } get_user_catch(err);
 
        err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
 
+       force_iret();
+
        return err;
 }
 
@@ -162,8 +154,9 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
 #else /* !CONFIG_X86_32 */
                put_user_ex(regs->flags, &sc->flags);
                put_user_ex(regs->cs, &sc->cs);
-               put_user_ex(0, &sc->gs);
-               put_user_ex(0, &sc->fs);
+               put_user_ex(0, &sc->__pad2);
+               put_user_ex(0, &sc->__pad1);
+               put_user_ex(regs->ss, &sc->ss);
 #endif /* CONFIG_X86_32 */
 
                put_user_ex(fpstate, &sc->fpstate);
@@ -457,9 +450,19 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
        regs->sp = (unsigned long)frame;
 
-       /* Set up the CS register to run signal handlers in 64-bit mode,
-          even if the handler happens to be interrupting 32-bit code. */
+       /*
+        * Set up the CS and SS registers to run signal handlers in
+        * 64-bit mode, even if the handler happens to be interrupting
+        * 32-bit or 16-bit code.
+        *
+        * SS is subtle.  In 64-bit mode, we don't need any particular
+        * SS descriptor, but we do need SS to be valid.  It's possible
+        * that the old SS is entirely bogus -- this can happen if the
+        * signal we're trying to deliver is #GP or #SS caused by a bad
+        * SS value.
+        */
        regs->cs = __USER_CS;
+       regs->ss = __USER_DS;
 
        return 0;
 }
@@ -539,7 +542,6 @@ asmlinkage unsigned long sys_sigreturn(void)
 {
        struct pt_regs *regs = current_pt_regs();
        struct sigframe __user *frame;
-       unsigned long ax;
        sigset_t set;
 
        frame = (struct sigframe __user *)(regs->sp - 8);
@@ -553,9 +555,9 @@ asmlinkage unsigned long sys_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->sc, &ax))
+       if (restore_sigcontext(regs, &frame->sc))
                goto badframe;
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "sigreturn");
@@ -568,7 +570,6 @@ asmlinkage long sys_rt_sigreturn(void)
 {
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe __user *frame;
-       unsigned long ax;
        sigset_t set;
 
        frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
@@ -579,13 +580,13 @@ asmlinkage long sys_rt_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
                goto badframe;
 
        if (restore_altstack(&frame->uc.uc_stack))
                goto badframe;
 
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "rt_sigreturn");
@@ -780,7 +781,6 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe_x32 __user *frame;
        sigset_t set;
-       unsigned long ax;
 
        frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -791,13 +791,13 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
                goto badframe;
 
        if (compat_restore_altstack(&frame->uc.uc_stack))
                goto badframe;
 
-       return ax;
+       return regs->ax;
 
 badframe:
        signal_fault(regs, frame, "x32 rt_sigreturn");
index febc6aa..7035f6b 100644 (file)
@@ -779,6 +779,26 @@ out:
        return boot_error;
 }
 
+void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+       /* Just in case we booted with a single CPU. */
+       alternatives_enable_smp();
+
+       per_cpu(current_task, cpu) = idle;
+
+#ifdef CONFIG_X86_32
+       /* Stack for startup_32 can be just as for start_secondary onwards */
+       irq_ctx_init(cpu);
+       per_cpu(cpu_current_top_of_stack, cpu) =
+               (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+#else
+       clear_tsk_thread_flag(idle, TIF_FORK);
+       initial_gs = per_cpu_offset(cpu);
+#endif
+       per_cpu(kernel_stack, cpu) =
+               (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+}
+
 /*
  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -796,23 +816,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
        int cpu0_nmi_registered = 0;
        unsigned long timeout;
 
-       /* Just in case we booted with a single CPU. */
-       alternatives_enable_smp();
-
        idle->thread.sp = (unsigned long) (((struct pt_regs *)
                          (THREAD_SIZE +  task_stack_page(idle))) - 1);
-       per_cpu(current_task, cpu) = idle;
 
-#ifdef CONFIG_X86_32
-       /* Stack for startup_32 can be just as for start_secondary onwards */
-       irq_ctx_init(cpu);
-#else
-       clear_tsk_thread_flag(idle, TIF_FORK);
-       initial_gs = per_cpu_offset(cpu);
-#endif
-       per_cpu(kernel_stack, cpu) =
-               (unsigned long)task_stack_page(idle) -
-               KERNEL_STACK_OFFSET + THREAD_SIZE;
        early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
        initial_code = (unsigned long)start_secondary;
        stack_start  = idle->thread.sp;
@@ -953,6 +959,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
        /* the FPU context is blank, nobody can own it */
        __cpu_disable_lazy_restore(cpu);
 
+       common_cpu_up(cpu, tidle);
+
        err = do_boot_cpu(apicid, cpu, tidle);
        if (err) {
                pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
@@ -1086,8 +1094,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
                return SMP_NO_APIC;
        }
 
-       verify_local_APIC();
-
        /*
         * If SMP should be disabled, then really disable it!
         */
index e9bcd57..3777189 100644 (file)
@@ -5,21 +5,29 @@
 #include <linux/cache.h>
 #include <asm/asm-offsets.h>
 
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
+#ifdef CONFIG_IA32_EMULATION
+#define SYM(sym, compat) compat
+#else
+#define SYM(sym, compat) sym
+#define ia32_sys_call_table sys_call_table
+#define __NR_ia32_syscall_max __NR_syscall_max
+#endif
+
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
 #include <asm/syscalls_32.h>
 #undef __SYSCALL_I386
 
-#define __SYSCALL_I386(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
 
 typedef asmlinkage void (*sys_call_ptr_t)(void);
 
 extern asmlinkage void sys_ni_syscall(void);
 
-__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
        /*
         * Smells like a compiler bug -- it doesn't work
         * when the & below is removed.
         */
-       [0 ... __NR_syscall_max] = &sys_ni_syscall,
+       [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
 #include <asm/syscalls_32.h>
 };
index 25adc0e..d39c091 100644 (file)
@@ -30,7 +30,7 @@ unsigned long profile_pc(struct pt_regs *regs)
 {
        unsigned long pc = instruction_pointer(regs);
 
-       if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+       if (!user_mode(regs) && in_lock_functions(pc)) {
 #ifdef CONFIG_FRAME_POINTER
                return *(unsigned long *)(regs->bp + sizeof(long));
 #else
index 9d2073e..6751c5c 100644 (file)
@@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
 {
        enum ctx_state prev_state;
 
-       if (user_mode_vm(regs)) {
+       if (user_mode(regs)) {
                /* Other than that, we're just an exception. */
                prev_state = exception_enter();
        } else {
@@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
        /* Must be before exception_exit. */
        preempt_count_sub(HARDIRQ_OFFSET);
 
-       if (user_mode_vm(regs))
+       if (user_mode(regs))
                return exception_exit(prev_state);
        else
                rcu_nmi_exit();
@@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
  *
  * IST exception handlers normally cannot schedule.  As a special
  * exception, if the exception interrupted userspace code (i.e.
- * user_mode_vm(regs) would return true) and the exception was not
+ * user_mode(regs) would return true) and the exception was not
  * a double fault, it can be safe to schedule.  ist_begin_non_atomic()
  * begins a non-atomic section within an ist_enter()/ist_exit() region.
  * Callers are responsible for enabling interrupts themselves inside
@@ -167,15 +167,15 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
  */
 void ist_begin_non_atomic(struct pt_regs *regs)
 {
-       BUG_ON(!user_mode_vm(regs));
+       BUG_ON(!user_mode(regs));
 
        /*
         * Sanity check: we need to be on the normal thread stack.  This
         * will catch asm bugs and any attempt to use ist_preempt_enable
         * from double_fault.
         */
-       BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
-               & ~(THREAD_SIZE - 1)) != 0);
+       BUG_ON((unsigned long)(current_top_of_stack() -
+                              current_stack_pointer()) >= THREAD_SIZE);
 
        preempt_count_sub(HARDIRQ_OFFSET);
 }
@@ -194,8 +194,7 @@ static nokprobe_inline int
 do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
                  struct pt_regs *regs, long error_code)
 {
-#ifdef CONFIG_X86_32
-       if (regs->flags & X86_VM_MASK) {
+       if (v8086_mode(regs)) {
                /*
                 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
                 * On nmi (interrupt 2), do_trap should not be called.
@@ -207,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
                }
                return -1;
        }
-#endif
+
        if (!user_mode(regs)) {
                if (!fixup_exception(regs)) {
                        tsk->thread.error_code = error_code;
@@ -462,13 +461,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
        prev_state = exception_enter();
        conditional_sti(regs);
 
-#ifdef CONFIG_X86_32
-       if (regs->flags & X86_VM_MASK) {
+       if (v8086_mode(regs)) {
                local_irq_enable();
                handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
                goto exit;
        }
-#endif
 
        tsk = current;
        if (!user_mode(regs)) {
@@ -587,7 +584,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
        /* Copy the remainder of the stack from the current stack. */
        memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
 
-       BUG_ON(!user_mode_vm(&new_stack->regs));
+       BUG_ON(!user_mode(&new_stack->regs));
        return new_stack;
 }
 NOKPROBE_SYMBOL(fixup_bad_iret);
@@ -673,7 +670,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
        /* It's safe to allow irq's after DR6 has been saved */
        preempt_conditional_sti(regs);
 
-       if (regs->flags & X86_VM_MASK) {
+       if (v8086_mode(regs)) {
                handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
                                        X86_TRAP_DB);
                preempt_conditional_cli(regs);
@@ -721,7 +718,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
                return;
        conditional_sti(regs);
 
-       if (!user_mode_vm(regs))
+       if (!user_mode(regs))
        {
                if (!fixup_exception(regs)) {
                        task->thread.error_code = error_code;
@@ -925,9 +922,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 /* Set of traps needed for early debugging. */
 void __init early_trap_init(void)
 {
-       set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+       /*
+        * Don't use IST to set DEBUG_STACK as it doesn't work until TSS
+        * is ready in cpu_init() <-- trap_init(). Before trap_init(),
+        * CPU runs at ring 0 so it is impossible to hit an invalid
+        * stack.  Using the original stack works well enough at this
+        * early stage. DEBUG_STACK will be equipped after cpu_init() in
+        * trap_init().
+        *
+        * We don't need to set trace_idt_table like set_intr_gate(),
+        * since we don't have trace_debug and it will be reset to
+        * 'debug' in trap_init() by set_intr_gate_ist().
+        */
+       set_intr_gate_notrace(X86_TRAP_DB, debug);
        /* int3 can be called from all */
-       set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+       set_system_intr_gate(X86_TRAP_BP, &int3);
 #ifdef CONFIG_X86_32
        set_intr_gate(X86_TRAP_PF, page_fault);
 #endif
@@ -1005,6 +1014,15 @@ void __init trap_init(void)
         */
        cpu_init();
 
+       /*
+        * X86_TRAP_DB and X86_TRAP_BP have been set
+        * in early_trap_init(). However, ITS works only after
+        * cpu_init() loads TSS. See comments in early_trap_init().
+        */
+       set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+       /* int3 can be called from all */
+       set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+
        x86_init.irqs.trap_init();
 
 #ifdef CONFIG_X86_64
index 81f8adb..0b81ad6 100644 (file)
@@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
        int ret = NOTIFY_DONE;
 
        /* We are only interested in userspace traps */
-       if (regs && !user_mode_vm(regs))
+       if (regs && !user_mode(regs))
                return NOTIFY_DONE;
 
        switch (val) {
index e8edcf5..fc9db6e 100644 (file)
@@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
                do_exit(SIGSEGV);
        }
 
-       tss = &per_cpu(init_tss, get_cpu());
+       tss = &per_cpu(cpu_tss, get_cpu());
        current->thread.sp0 = current->thread.saved_sp0;
        current->thread.sysenter_cs = __KERNEL_CS;
        load_sp0(tss, &current->thread);
@@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
        tsk->thread.saved_fs = info->regs32->fs;
        tsk->thread.saved_gs = get_user_gs(info->regs32);
 
-       tss = &per_cpu(init_tss, get_cpu());
+       tss = &per_cpu(cpu_tss, get_cpu());
        tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
        if (cpu_has_sep)
                tsk->thread.sysenter_cs = 0;
index c7d791f..51e3304 100644 (file)
@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
        gtod_write_begin(vdata);
 
        /* copy vsyscall data */
-       vdata->vclock_mode      = tk->tkr.clock->archdata.vclock_mode;
-       vdata->cycle_last       = tk->tkr.cycle_last;
-       vdata->mask             = tk->tkr.mask;
-       vdata->mult             = tk->tkr.mult;
-       vdata->shift            = tk->tkr.shift;
+       vdata->vclock_mode      = tk->tkr_mono.clock->archdata.vclock_mode;
+       vdata->cycle_last       = tk->tkr_mono.cycle_last;
+       vdata->mask             = tk->tkr_mono.mask;
+       vdata->mult             = tk->tkr_mono.mult;
+       vdata->shift            = tk->tkr_mono.shift;
 
        vdata->wall_time_sec            = tk->xtime_sec;
-       vdata->wall_time_snsec          = tk->tkr.xtime_nsec;
+       vdata->wall_time_snsec          = tk->tkr_mono.xtime_nsec;
 
        vdata->monotonic_time_sec       = tk->xtime_sec
                                        + tk->wall_to_monotonic.tv_sec;
-       vdata->monotonic_time_snsec     = tk->tkr.xtime_nsec
+       vdata->monotonic_time_snsec     = tk->tkr_mono.xtime_nsec
                                        + ((u64)tk->wall_to_monotonic.tv_nsec
-                                               << tk->tkr.shift);
+                                               << tk->tkr_mono.shift);
        while (vdata->monotonic_time_snsec >=
-                                       (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+                                       (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
                vdata->monotonic_time_snsec -=
-                                       ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+                                       ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
                vdata->monotonic_time_sec++;
        }
 
        vdata->wall_time_coarse_sec     = tk->xtime_sec;
-       vdata->wall_time_coarse_nsec    = (long)(tk->tkr.xtime_nsec >>
-                                                tk->tkr.shift);
+       vdata->wall_time_coarse_nsec    = (long)(tk->tkr_mono.xtime_nsec >>
+                                                tk->tkr_mono.shift);
 
        vdata->monotonic_time_coarse_sec =
                vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
index 34f66e5..cdc6cf9 100644 (file)
@@ -379,7 +379,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
                 * thread's fpu state, reconstruct fxstate from the fsave
                 * header. Sanitize the copied state etc.
                 */
-               struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+               struct fpu *fpu = &tsk->thread.fpu;
                struct user_i387_ia32_struct env;
                int err = 0;
 
@@ -393,14 +393,15 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
                 */
                drop_fpu(tsk);
 
-               if (__copy_from_user(xsave, buf_fx, state_size) ||
+               if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) ||
                    __copy_from_user(&env, buf, sizeof(env))) {
+                       fpu_finit(fpu);
                        err = -1;
                } else {
                        sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
-                       set_used_math();
                }
 
+               set_used_math();
                if (use_eager_fpu()) {
                        preempt_disable();
                        math_state_restore();
index 08f790d..16e8f96 100644 (file)
@@ -1,5 +1,5 @@
 
-ccflags-y += -Ivirt/kvm -Iarch/x86/kvm
+ccflags-y += -Iarch/x86/kvm
 
 CFLAGS_x86.o := -I.
 CFLAGS_svm.o := -I.
index 8a80737..59b69f6 100644 (file)
@@ -104,6 +104,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
                ((best->eax & 0xff00) >> 8) != 0)
                return -EINVAL;
 
+       /* Update physical-address width */
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
        kvm_pmu_cpuid_update(vcpu);
        return 0;
 }
@@ -135,6 +138,21 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
        }
 }
 
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+       struct kvm_cpuid_entry2 *best;
+
+       best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
+       if (!best || best->eax < 0x80000008)
+               goto not_found;
+       best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+       if (best)
+               return best->eax & 0xff;
+not_found:
+       return 36;
+}
+EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
+
 /* when an old userspace process fills a new kernel module */
 int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
                             struct kvm_cpuid *cpuid,
@@ -757,21 +775,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
 }
 EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
 
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
-{
-       struct kvm_cpuid_entry2 *best;
-
-       best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
-       if (!best || best->eax < 0x80000008)
-               goto not_found;
-       best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
-       if (best)
-               return best->eax & 0xff;
-not_found:
-       return 36;
-}
-EXPORT_SYMBOL_GPL(cpuid_maxphyaddr);
-
 /*
  * If no match is found, check whether we exceed the vCPU's limit
  * and return the content of the highest valid _standard_ leaf instead.
index 4452eed..c3b1ad9 100644 (file)
@@ -20,13 +20,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
                              struct kvm_cpuid_entry2 __user *entries);
 void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
 
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
+
+static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.maxphyaddr;
+}
 
 static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
 {
        struct kvm_cpuid_entry2 *best;
 
        if (!static_cpu_has(X86_FEATURE_XSAVE))
-               return 0;
+               return false;
 
        best = kvm_find_cpuid_entry(vcpu, 1, 0);
        return best && (best->ecx & bit(X86_FEATURE_XSAVE));
index 106c015..630bcb0 100644 (file)
@@ -248,27 +248,7 @@ struct mode_dual {
        struct opcode mode64;
 };
 
-/* EFLAGS bit definitions. */
-#define EFLG_ID (1<<21)
-#define EFLG_VIP (1<<20)
-#define EFLG_VIF (1<<19)
-#define EFLG_AC (1<<18)
-#define EFLG_VM (1<<17)
-#define EFLG_RF (1<<16)
-#define EFLG_IOPL (3<<12)
-#define EFLG_NT (1<<14)
-#define EFLG_OF (1<<11)
-#define EFLG_DF (1<<10)
-#define EFLG_IF (1<<9)
-#define EFLG_TF (1<<8)
-#define EFLG_SF (1<<7)
-#define EFLG_ZF (1<<6)
-#define EFLG_AF (1<<4)
-#define EFLG_PF (1<<2)
-#define EFLG_CF (1<<0)
-
 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
-#define EFLG_RESERVED_ONE_MASK 2
 
 enum x86_transfer_type {
        X86_TRANSFER_NONE,
@@ -317,7 +297,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
  * These EFLAGS bits are restored from saved value during emulation, and
  * any changes are written back to the saved value after emulation.
  */
-#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
+#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
+                    X86_EFLAGS_PF|X86_EFLAGS_CF)
 
 #ifdef CONFIG_X86_64
 #define ON64(x) x
@@ -478,6 +459,25 @@ static void assign_masked(ulong *dest, ulong src, ulong mask)
        *dest = (*dest & ~mask) | (src & mask);
 }
 
+static void assign_register(unsigned long *reg, u64 val, int bytes)
+{
+       /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+       switch (bytes) {
+       case 1:
+               *(u8 *)reg = (u8)val;
+               break;
+       case 2:
+               *(u16 *)reg = (u16)val;
+               break;
+       case 4:
+               *reg = (u32)val;
+               break;  /* 64b: zero-extend */
+       case 8:
+               *reg = val;
+               break;
+       }
+}
+
 static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
 {
        return (1UL << (ctxt->ad_bytes << 3)) - 1;
@@ -943,6 +943,22 @@ FASTOP2(xadd);
 
 FASTOP2R(cmp, cmp_r);
 
+static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
+{
+       /* If src is zero, do not writeback, but update flags */
+       if (ctxt->src.val == 0)
+               ctxt->dst.type = OP_NONE;
+       return fastop(ctxt, em_bsf);
+}
+
+static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
+{
+       /* If src is zero, do not writeback, but update flags */
+       if (ctxt->src.val == 0)
+               ctxt->dst.type = OP_NONE;
+       return fastop(ctxt, em_bsr);
+}
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
        u8 rc;
@@ -1399,7 +1415,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
                unsigned int in_page, n;
                unsigned int count = ctxt->rep_prefix ?
                        address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
-               in_page = (ctxt->eflags & EFLG_DF) ?
+               in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
                        offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
                        PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
                n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
@@ -1412,7 +1428,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
        }
 
        if (ctxt->rep_prefix && (ctxt->d & String) &&
-           !(ctxt->eflags & EFLG_DF)) {
+           !(ctxt->eflags & X86_EFLAGS_DF)) {
                ctxt->dst.data = rc->data + rc->pos;
                ctxt->dst.type = OP_MEM_STR;
                ctxt->dst.count = (rc->end - rc->pos) / size;
@@ -1691,21 +1707,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
 static void write_register_operand(struct operand *op)
 {
-       /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
-       switch (op->bytes) {
-       case 1:
-               *(u8 *)op->addr.reg = (u8)op->val;
-               break;
-       case 2:
-               *(u16 *)op->addr.reg = (u16)op->val;
-               break;
-       case 4:
-               *op->addr.reg = (u32)op->val;
-               break;  /* 64b: zero-extend */
-       case 8:
-               *op->addr.reg = op->val;
-               break;
-       }
+       return assign_register(op->addr.reg, op->val, op->bytes);
 }
 
 static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
@@ -1792,32 +1794,34 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
 {
        int rc;
        unsigned long val, change_mask;
-       int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+       int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
        int cpl = ctxt->ops->cpl(ctxt);
 
        rc = emulate_pop(ctxt, &val, len);
        if (rc != X86EMUL_CONTINUE)
                return rc;
 
-       change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
-               | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID;
+       change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+                     X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
+                     X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
+                     X86_EFLAGS_AC | X86_EFLAGS_ID;
 
        switch(ctxt->mode) {
        case X86EMUL_MODE_PROT64:
        case X86EMUL_MODE_PROT32:
        case X86EMUL_MODE_PROT16:
                if (cpl == 0)
-                       change_mask |= EFLG_IOPL;
+                       change_mask |= X86_EFLAGS_IOPL;
                if (cpl <= iopl)
-                       change_mask |= EFLG_IF;
+                       change_mask |= X86_EFLAGS_IF;
                break;
        case X86EMUL_MODE_VM86:
                if (iopl < 3)
                        return emulate_gp(ctxt, 0);
-               change_mask |= EFLG_IF;
+               change_mask |= X86_EFLAGS_IF;
                break;
        default: /* real mode */
-               change_mask |= (EFLG_IOPL | EFLG_IF);
+               change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
                break;
        }
 
@@ -1918,7 +1922,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
 
 static int em_pushf(struct x86_emulate_ctxt *ctxt)
 {
-       ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
+       ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
        return em_push(ctxt);
 }
 
@@ -1926,6 +1930,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 {
        int rc = X86EMUL_CONTINUE;
        int reg = VCPU_REGS_RDI;
+       u32 val;
 
        while (reg >= VCPU_REGS_RAX) {
                if (reg == VCPU_REGS_RSP) {
@@ -1933,9 +1938,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
                        --reg;
                }
 
-               rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
+               rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
                if (rc != X86EMUL_CONTINUE)
                        break;
+               assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
                --reg;
        }
        return rc;
@@ -1956,7 +1962,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
        if (rc != X86EMUL_CONTINUE)
                return rc;
 
-       ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
+       ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
 
        ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
        rc = em_push(ctxt);
@@ -2022,10 +2028,14 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
        unsigned long temp_eip = 0;
        unsigned long temp_eflags = 0;
        unsigned long cs = 0;
-       unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
-                            EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
-                            EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
-       unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
+       unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+                            X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
+                            X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
+                            X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
+                            X86_EFLAGS_AC | X86_EFLAGS_ID |
+                            X86_EFLAGS_FIXED;
+       unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
+                                 X86_EFLAGS_VIP;
 
        /* TODO: Add stack limit check */
 
@@ -2054,7 +2064,6 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
 
        ctxt->_eip = temp_eip;
 
-
        if (ctxt->op_bytes == 4)
                ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
        else if (ctxt->op_bytes == 2) {
@@ -2063,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
        }
 
        ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
-       ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+       ctxt->eflags |= X86_EFLAGS_FIXED;
        ctxt->ops->set_nmi_mask(ctxt, false);
 
        return rc;
@@ -2145,12 +2154,12 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
            ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
                *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
                *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
-               ctxt->eflags &= ~EFLG_ZF;
+               ctxt->eflags &= ~X86_EFLAGS_ZF;
        } else {
                ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
                        (u32) reg_read(ctxt, VCPU_REGS_RBX);
 
-               ctxt->eflags |= EFLG_ZF;
+               ctxt->eflags |= X86_EFLAGS_ZF;
        }
        return X86EMUL_CONTINUE;
 }
@@ -2222,7 +2231,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
        ctxt->src.val = ctxt->dst.orig_val;
        fastop(ctxt, em_cmp);
 
-       if (ctxt->eflags & EFLG_ZF) {
+       if (ctxt->eflags & X86_EFLAGS_ZF) {
                /* Success: write back to memory; no update of EAX */
                ctxt->src.type = OP_NONE;
                ctxt->dst.val = ctxt->src.orig_val;
@@ -2381,14 +2390,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 
                ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
                ctxt->eflags &= ~msr_data;
-               ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+               ctxt->eflags |= X86_EFLAGS_FIXED;
 #endif
        } else {
                /* legacy mode */
                ops->get_msr(ctxt, MSR_STAR, &msr_data);
                ctxt->_eip = (u32)msr_data;
 
-               ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
+               ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
        }
 
        return X86EMUL_CONTINUE;
@@ -2425,8 +2434,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
        if ((msr_data & 0xfffc) == 0x0)
                return emulate_gp(ctxt, 0);
 
-       ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
-       cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
+       ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
+       cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
        ss_sel = cs_sel + 8;
        if (efer & EFER_LMA) {
                cs.d = 0;
@@ -2493,8 +2502,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
                        return emulate_gp(ctxt, 0);
                break;
        }
-       cs_sel |= SELECTOR_RPL_MASK;
-       ss_sel |= SELECTOR_RPL_MASK;
+       cs_sel |= SEGMENT_RPL_MASK;
+       ss_sel |= SEGMENT_RPL_MASK;
 
        ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
        ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
@@ -2512,7 +2521,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
                return false;
        if (ctxt->mode == X86EMUL_MODE_VM86)
                return true;
-       iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+       iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
        return ctxt->ops->cpl(ctxt) > iopl;
 }
 
@@ -2782,10 +2791,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
                return ret;
        ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
                                        X86_TRANSFER_TASK_SWITCH, NULL);
-       if (ret != X86EMUL_CONTINUE)
-               return ret;
 
-       return X86EMUL_CONTINUE;
+       return ret;
 }
 
 static int task_switch_32(struct x86_emulate_ctxt *ctxt,
@@ -2954,7 +2961,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
                struct operand *op)
 {
-       int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
+       int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
 
        register_address_increment(ctxt, reg, df * op->bytes);
        op->addr.mem.ea = register_address(ctxt, reg);
@@ -3323,7 +3330,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
        return X86EMUL_CONTINUE;
 }
 
-static int em_vmcall(struct x86_emulate_ctxt *ctxt)
+static int em_hypercall(struct x86_emulate_ctxt *ctxt)
 {
        int rc = ctxt->ops->fix_hypercall(ctxt);
 
@@ -3395,17 +3402,6 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
        return em_lgdt_lidt(ctxt, true);
 }
 
-static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
-{
-       int rc;
-
-       rc = ctxt->ops->fix_hypercall(ctxt);
-
-       /* Disable writeback. */
-       ctxt->dst.type = OP_NONE;
-       return rc;
-}
-
 static int em_lidt(struct x86_emulate_ctxt *ctxt)
 {
        return em_lgdt_lidt(ctxt, false);
@@ -3504,7 +3500,8 @@ static int em_sahf(struct x86_emulate_ctxt *ctxt)
 {
        u32 flags;
 
-       flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
+       flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
+               X86_EFLAGS_SF;
        flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
 
        ctxt->eflags &= ~0xffUL;
@@ -3769,7 +3766,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 
 static const struct opcode group7_rm0[] = {
        N,
-       I(SrcNone | Priv | EmulateOnUD, em_vmcall),
+       I(SrcNone | Priv | EmulateOnUD, em_hypercall),
        N, N, N, N, N, N,
 };
 
@@ -3781,7 +3778,7 @@ static const struct opcode group7_rm1[] = {
 
 static const struct opcode group7_rm3[] = {
        DIP(SrcNone | Prot | Priv,              vmrun,          check_svme_pa),
-       II(SrcNone  | Prot | EmulateOnUD,       em_vmmcall,     vmmcall),
+       II(SrcNone  | Prot | EmulateOnUD,       em_hypercall,   vmmcall),
        DIP(SrcNone | Prot | Priv,              vmload,         check_svme_pa),
        DIP(SrcNone | Prot | Priv,              vmsave,         check_svme_pa),
        DIP(SrcNone | Prot | Priv,              stgi,           check_svme),
@@ -4192,7 +4189,8 @@ static const struct opcode twobyte_table[256] = {
        N, N,
        G(BitOp, group8),
        F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
-       F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
+       I(DstReg | SrcMem | ModRM, em_bsf_c),
+       I(DstReg | SrcMem | ModRM, em_bsr_c),
        D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
        /* 0xC0 - 0xC7 */
        F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
@@ -4759,9 +4757,9 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
        if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
             (ctxt->b == 0xae) || (ctxt->b == 0xaf))
            && (((ctxt->rep_prefix == REPE_PREFIX) &&
-                ((ctxt->eflags & EFLG_ZF) == 0))
+                ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
                || ((ctxt->rep_prefix == REPNE_PREFIX) &&
-                   ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
+                   ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
                return true;
 
        return false;
@@ -4913,7 +4911,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
                        /* All REP prefixes have the same first termination condition */
                        if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
                                ctxt->eip = ctxt->_eip;
-                               ctxt->eflags &= ~EFLG_RF;
+                               ctxt->eflags &= ~X86_EFLAGS_RF;
                                goto done;
                        }
                }
@@ -4963,9 +4961,9 @@ special_insn:
        }
 
        if (ctxt->rep_prefix && (ctxt->d & String))
-               ctxt->eflags |= EFLG_RF;
+               ctxt->eflags |= X86_EFLAGS_RF;
        else
-               ctxt->eflags &= ~EFLG_RF;
+               ctxt->eflags &= ~X86_EFLAGS_RF;
 
        if (ctxt->execute) {
                if (ctxt->d & Fastop) {
@@ -5014,7 +5012,7 @@ special_insn:
                rc = emulate_int(ctxt, ctxt->src.val);
                break;
        case 0xce:              /* into */
-               if (ctxt->eflags & EFLG_OF)
+               if (ctxt->eflags & X86_EFLAGS_OF)
                        rc = emulate_int(ctxt, 4);
                break;
        case 0xe9: /* jmp rel */
@@ -5027,19 +5025,19 @@ special_insn:
                break;
        case 0xf5:      /* cmc */
                /* complement carry flag from eflags reg */
-               ctxt->eflags ^= EFLG_CF;
+               ctxt->eflags ^= X86_EFLAGS_CF;
                break;
        case 0xf8: /* clc */
-               ctxt->eflags &= ~EFLG_CF;
+               ctxt->eflags &= ~X86_EFLAGS_CF;
                break;
        case 0xf9: /* stc */
-               ctxt->eflags |= EFLG_CF;
+               ctxt->eflags |= X86_EFLAGS_CF;
                break;
        case 0xfc: /* cld */
-               ctxt->eflags &= ~EFLG_DF;
+               ctxt->eflags &= ~X86_EFLAGS_DF;
                break;
        case 0xfd: /* std */
-               ctxt->eflags |= EFLG_DF;
+               ctxt->eflags |= X86_EFLAGS_DF;
                break;
        default:
                goto cannot_emulate;
@@ -5100,7 +5098,7 @@ writeback:
                        }
                        goto done; /* skip rip writeback */
                }
-               ctxt->eflags &= ~EFLG_RF;
+               ctxt->eflags &= ~X86_EFLAGS_RF;
        }
 
        ctxt->eip = ctxt->_eip;
@@ -5137,8 +5135,7 @@ twobyte_insn:
        case 0x40 ... 0x4f:     /* cmov */
                if (test_cc(ctxt->b, ctxt->eflags))
                        ctxt->dst.val = ctxt->src.val;
-               else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
-                        ctxt->op_bytes != 4)
+               else if (ctxt->op_bytes != 4)
                        ctxt->dst.type = OP_NONE; /* no writeback */
                break;
        case 0x80 ... 0x8f: /* jnz rel, etc*/
index 298781d..4dce6f8 100644 (file)
@@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr)
                (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
 }
 
-static int pit_ioport_write(struct kvm_io_device *this,
+static int pit_ioport_write(struct kvm_vcpu *vcpu,
+                               struct kvm_io_device *this,
                            gpa_t addr, int len, const void *data)
 {
        struct kvm_pit *pit = dev_to_pit(this);
@@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this,
        return 0;
 }
 
-static int pit_ioport_read(struct kvm_io_device *this,
+static int pit_ioport_read(struct kvm_vcpu *vcpu,
+                          struct kvm_io_device *this,
                           gpa_t addr, int len, void *data)
 {
        struct kvm_pit *pit = dev_to_pit(this);
@@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this,
        return 0;
 }
 
-static int speaker_ioport_write(struct kvm_io_device *this,
+static int speaker_ioport_write(struct kvm_vcpu *vcpu,
+                               struct kvm_io_device *this,
                                gpa_t addr, int len, const void *data)
 {
        struct kvm_pit *pit = speaker_to_pit(this);
@@ -606,8 +609,9 @@ static int speaker_ioport_write(struct kvm_io_device *this,
        return 0;
 }
 
-static int speaker_ioport_read(struct kvm_io_device *this,
-                              gpa_t addr, int len, void *data)
+static int speaker_ioport_read(struct kvm_vcpu *vcpu,
+                                  struct kvm_io_device *this,
+                                  gpa_t addr, int len, void *data)
 {
        struct kvm_pit *pit = speaker_to_pit(this);
        struct kvm_kpit_state *pit_state = &pit->pit_state;
index dd1b16b..c84990b 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/kthread.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 struct kvm_kpit_channel_state {
        u32 count; /* can be 65536 */
index cc31f7c..fef922f 100644 (file)
@@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s,
                return -EOPNOTSUPP;
 
        if (len != 1) {
+               memset(val, 0, len);
                pr_pic_unimpl("non byte read\n");
                return 0;
        }
@@ -528,42 +529,42 @@ static int picdev_read(struct kvm_pic *s,
        return 0;
 }
 
-static int picdev_master_write(struct kvm_io_device *dev,
+static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                               gpa_t addr, int len, const void *val)
 {
        return picdev_write(container_of(dev, struct kvm_pic, dev_master),
                            addr, len, val);
 }
 
-static int picdev_master_read(struct kvm_io_device *dev,
+static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                              gpa_t addr, int len, void *val)
 {
        return picdev_read(container_of(dev, struct kvm_pic, dev_master),
                            addr, len, val);
 }
 
-static int picdev_slave_write(struct kvm_io_device *dev,
+static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                              gpa_t addr, int len, const void *val)
 {
        return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
                            addr, len, val);
 }
 
-static int picdev_slave_read(struct kvm_io_device *dev,
+static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                             gpa_t addr, int len, void *val)
 {
        return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
                            addr, len, val);
 }
 
-static int picdev_eclr_write(struct kvm_io_device *dev,
+static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                             gpa_t addr, int len, const void *val)
 {
        return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
                            addr, len, val);
 }
 
-static int picdev_eclr_read(struct kvm_io_device *dev,
+static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
                            gpa_t addr, int len, void *val)
 {
        return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
index b1947e0..28146f0 100644 (file)
@@ -206,6 +206,8 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
 
        old_irr = ioapic->irr;
        ioapic->irr |= mask;
+       if (edge)
+               ioapic->irr_delivered &= ~mask;
        if ((edge && old_irr == ioapic->irr) ||
            (!edge && entry.fields.remote_irr)) {
                ret = 0;
@@ -349,7 +351,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
        irqe.shorthand = 0;
 
        if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
-               ioapic->irr &= ~(1 << irq);
+               ioapic->irr_delivered |= 1 << irq;
 
        if (irq == RTC_GSI && line_status) {
                /*
@@ -422,6 +424,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
                        struct kvm_ioapic *ioapic, int vector, int trigger_mode)
 {
        int i;
+       struct kvm_lapic *apic = vcpu->arch.apic;
 
        for (i = 0; i < IOAPIC_NUM_PINS; i++) {
                union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
@@ -443,7 +446,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
                kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
                spin_lock(&ioapic->lock);
 
-               if (trigger_mode != IOAPIC_LEVEL_TRIG)
+               if (trigger_mode != IOAPIC_LEVEL_TRIG ||
+                   kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
                        continue;
 
                ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
@@ -471,13 +475,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
        }
 }
 
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
-{
-       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-       smp_rmb();
-       return test_bit(vector, ioapic->handled_vectors);
-}
-
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
 {
        struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
@@ -498,8 +495,8 @@ static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
                 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
 }
 
-static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
-                           void *val)
+static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+                               gpa_t addr, int len, void *val)
 {
        struct kvm_ioapic *ioapic = to_ioapic(this);
        u32 result;
@@ -541,8 +538,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
        return 0;
 }
 
-static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
-                            const void *val)
+static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+                                gpa_t addr, int len, const void *val)
 {
        struct kvm_ioapic *ioapic = to_ioapic(this);
        u32 data;
@@ -597,6 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
        ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
        ioapic->ioregsel = 0;
        ioapic->irr = 0;
+       ioapic->irr_delivered = 0;
        ioapic->id = 0;
        memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
        rtc_irq_eoi_tracking_reset(ioapic);
@@ -654,6 +652,7 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
 
        spin_lock(&ioapic->lock);
        memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+       state->irr &= ~ioapic->irr_delivered;
        spin_unlock(&ioapic->lock);
        return 0;
 }
@@ -667,6 +666,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
        spin_lock(&ioapic->lock);
        memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
        ioapic->irr = 0;
+       ioapic->irr_delivered = 0;
        update_handled_vectors(ioapic);
        kvm_vcpu_request_scan_ioapic(kvm);
        kvm_ioapic_inject_all(ioapic, state->irr);
index c2e36d9..ca0b0b4 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/kvm_host.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 struct kvm;
 struct kvm_vcpu;
@@ -77,6 +77,7 @@ struct kvm_ioapic {
        struct rtc_status rtc_status;
        struct delayed_work eoi_inject;
        u32 irq_eoi[IOAPIC_NUM_PINS];
+       u32 irr_delivered;
 };
 
 #ifdef DEBUG
@@ -97,13 +98,19 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
        return kvm->arch.vioapic;
 }
 
+static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+       struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+       smp_rmb();
+       return test_bit(vector, ioapic->handled_vectors);
+}
+
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
                int short_hand, unsigned int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
                        int trigger_mode);
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
 int kvm_ioapic_init(struct kvm *kvm);
 void kvm_ioapic_destroy(struct kvm *kvm);
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
index 2d03568..ad68c73 100644 (file)
@@ -27,7 +27,7 @@
 #include <linux/kvm_host.h>
 #include <linux/spinlock.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 #include "ioapic.h"
 #include "lapic.h"
 
index bd4e34d..d67206a 100644 (file)
@@ -133,6 +133,28 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
        return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
+/* The logical map is definitely wrong if we have multiple
+ * modes at the same time.  (Physical map is always right.)
+ */
+static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
+{
+       return !(map->mode & (map->mode - 1));
+}
+
+static inline void
+apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+{
+       unsigned lid_bits;
+
+       BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER !=  4);
+       BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT    !=  8);
+       BUILD_BUG_ON(KVM_APIC_MODE_X2APIC        != 16);
+       lid_bits = map->mode;
+
+       *cid = dest_id >> lid_bits;
+       *lid = dest_id & ((1 << lid_bits) - 1);
+}
+
 static void recalculate_apic_map(struct kvm *kvm)
 {
        struct kvm_apic_map *new, *old = NULL;
@@ -146,48 +168,6 @@ static void recalculate_apic_map(struct kvm *kvm)
        if (!new)
                goto out;
 
-       new->ldr_bits = 8;
-       /* flat mode is default */
-       new->cid_shift = 8;
-       new->cid_mask = 0;
-       new->lid_mask = 0xff;
-       new->broadcast = APIC_BROADCAST;
-
-       kvm_for_each_vcpu(i, vcpu, kvm) {
-               struct kvm_lapic *apic = vcpu->arch.apic;
-
-               if (!kvm_apic_present(vcpu))
-                       continue;
-
-               if (apic_x2apic_mode(apic)) {
-                       new->ldr_bits = 32;
-                       new->cid_shift = 16;
-                       new->cid_mask = new->lid_mask = 0xffff;
-                       new->broadcast = X2APIC_BROADCAST;
-               } else if (kvm_apic_get_reg(apic, APIC_LDR)) {
-                       if (kvm_apic_get_reg(apic, APIC_DFR) ==
-                                                       APIC_DFR_CLUSTER) {
-                               new->cid_shift = 4;
-                               new->cid_mask = 0xf;
-                               new->lid_mask = 0xf;
-                       } else {
-                               new->cid_shift = 8;
-                               new->cid_mask = 0;
-                               new->lid_mask = 0xff;
-                       }
-               }
-
-               /*
-                * All APICs have to be configured in the same mode by an OS.
-                * We take advatage of this while building logical id loockup
-                * table. After reset APICs are in software disabled mode, so if
-                * we find apic with different setting we assume this is the mode
-                * OS wants all apics to be in; build lookup table accordingly.
-                */
-               if (kvm_apic_sw_enabled(apic))
-                       break;
-       }
-
        kvm_for_each_vcpu(i, vcpu, kvm) {
                struct kvm_lapic *apic = vcpu->arch.apic;
                u16 cid, lid;
@@ -198,11 +178,25 @@ static void recalculate_apic_map(struct kvm *kvm)
 
                aid = kvm_apic_id(apic);
                ldr = kvm_apic_get_reg(apic, APIC_LDR);
-               cid = apic_cluster_id(new, ldr);
-               lid = apic_logical_id(new, ldr);
 
                if (aid < ARRAY_SIZE(new->phys_map))
                        new->phys_map[aid] = apic;
+
+               if (apic_x2apic_mode(apic)) {
+                       new->mode |= KVM_APIC_MODE_X2APIC;
+               } else if (ldr) {
+                       ldr = GET_APIC_LOGICAL_ID(ldr);
+                       if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
+                               new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
+                       else
+                               new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
+               }
+
+               if (!kvm_apic_logical_map_valid(new))
+                       continue;
+
+               apic_logical_id(new, ldr, &cid, &lid);
+
                if (lid && cid < ARRAY_SIZE(new->logical_map))
                        new->logical_map[cid][ffs(lid) - 1] = apic;
        }
@@ -588,15 +582,23 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
        apic_update_ppr(apic);
 }
 
-static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
 {
-       return dest == (apic_x2apic_mode(apic) ?
-                       X2APIC_BROADCAST : APIC_BROADCAST);
+       if (apic_x2apic_mode(apic))
+               return mda == X2APIC_BROADCAST;
+
+       return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
 }
 
-static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
 {
-       return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
+       if (kvm_apic_broadcast(apic, mda))
+               return true;
+
+       if (apic_x2apic_mode(apic))
+               return mda == kvm_apic_id(apic);
+
+       return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
 }
 
 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
@@ -613,6 +615,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
                       && (logical_id & mda & 0xffff) != 0;
 
        logical_id = GET_APIC_LOGICAL_ID(logical_id);
+       mda = GET_APIC_DEST_FIELD(mda);
 
        switch (kvm_apic_get_reg(apic, APIC_DFR)) {
        case APIC_DFR_FLAT:
@@ -627,10 +630,27 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
        }
 }
 
+/* KVM APIC implementation has two quirks
+ *  - dest always begins at 0 while xAPIC MDA has offset 24,
+ *  - IOxAPIC messages have to be delivered (directly) to x2APIC.
+ */
+static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
+                                              struct kvm_lapic *target)
+{
+       bool ipi = source != NULL;
+       bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
+
+       if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+               return X2APIC_BROADCAST;
+
+       return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
+}
+
 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
                           int short_hand, unsigned int dest, int dest_mode)
 {
        struct kvm_lapic *target = vcpu->arch.apic;
+       u32 mda = kvm_apic_mda(dest, source, target);
 
        apic_debug("target %p, source %p, dest 0x%x, "
                   "dest_mode 0x%x, short_hand 0x%x\n",
@@ -640,9 +660,9 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
        switch (short_hand) {
        case APIC_DEST_NOSHORT:
                if (dest_mode == APIC_DEST_PHYSICAL)
-                       return kvm_apic_match_physical_addr(target, dest);
+                       return kvm_apic_match_physical_addr(target, mda);
                else
-                       return kvm_apic_match_logical_addr(target, dest);
+                       return kvm_apic_match_logical_addr(target, mda);
        case APIC_DEST_SELF:
                return target == source;
        case APIC_DEST_ALLINC:
@@ -664,6 +684,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
        struct kvm_lapic **dst;
        int i;
        bool ret = false;
+       bool x2apic_ipi = src && apic_x2apic_mode(src);
 
        *r = -1;
 
@@ -675,15 +696,15 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
        if (irq->shorthand)
                return false;
 
+       if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
+               return false;
+
        rcu_read_lock();
        map = rcu_dereference(kvm->arch.apic_map);
 
        if (!map)
                goto out;
 
-       if (irq->dest_id == map->broadcast)
-               goto out;
-
        ret = true;
 
        if (irq->dest_mode == APIC_DEST_PHYSICAL) {
@@ -692,16 +713,20 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
 
                dst = &map->phys_map[irq->dest_id];
        } else {
-               u32 mda = irq->dest_id << (32 - map->ldr_bits);
-               u16 cid = apic_cluster_id(map, mda);
+               u16 cid;
+
+               if (!kvm_apic_logical_map_valid(map)) {
+                       ret = false;
+                       goto out;
+               }
+
+               apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
 
                if (cid >= ARRAY_SIZE(map->logical_map))
                        goto out;
 
                dst = map->logical_map[cid];
 
-               bitmap = apic_logical_id(map, mda);
-
                if (irq->delivery_mode == APIC_DM_LOWEST) {
                        int l = -1;
                        for_each_set_bit(i, &bitmap, 16) {
@@ -833,8 +858,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
 {
-       if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
-           kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
+       if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
                int trigger_mode;
                if (apic_test_vector(vector, apic->regs + APIC_TMR))
                        trigger_mode = IOAPIC_LEVEL_TRIG;
@@ -1038,7 +1062,7 @@ static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
            addr < apic->base_address + LAPIC_MMIO_LENGTH;
 }
 
-static int apic_mmio_read(struct kvm_io_device *this,
+static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
                           gpa_t address, int len, void *data)
 {
        struct kvm_lapic *apic = to_lapic(this);
@@ -1358,7 +1382,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
        return ret;
 }
 
-static int apic_mmio_write(struct kvm_io_device *this,
+static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
                            gpa_t address, int len, const void *data)
 {
        struct kvm_lapic *apic = to_lapic(this);
@@ -1498,8 +1522,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
                return;
        }
 
-       if (!kvm_vcpu_is_bsp(apic->vcpu))
-               value &= ~MSR_IA32_APICBASE_BSP;
        vcpu->arch.apic_base = value;
 
        /* update jump label if enable bit changes */
index 0bc6c65..9d28383 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef __KVM_X86_LAPIC_H
 #define __KVM_X86_LAPIC_H
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 
@@ -148,21 +148,6 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
        return kvm_x86_ops->vm_has_apicv(kvm);
 }
 
-static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
-{
-       u16 cid;
-       ldr >>= 32 - map->ldr_bits;
-       cid = (ldr >> map->cid_shift) & map->cid_mask;
-
-       return cid;
-}
-
-static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
-{
-       ldr >>= (32 - map->ldr_bits);
-       return ldr & map->lid_mask;
-}
-
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.apic->pending_events;
index cee7592..146f295 100644 (file)
@@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
                kvm_flush_remote_tlbs(kvm);
 }
 
+static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+               unsigned long *rmapp)
+{
+       u64 *sptep;
+       struct rmap_iterator iter;
+       int need_tlb_flush = 0;
+       pfn_t pfn;
+       struct kvm_mmu_page *sp;
+
+       for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+               BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+               sp = page_header(__pa(sptep));
+               pfn = spte_to_pfn(*sptep);
+
+               /*
+                * Only EPT supported for now; otherwise, one would need to
+                * find out efficiently whether the guest page tables are
+                * also using huge pages.
+                */
+               if (sp->role.direct &&
+                       !kvm_is_reserved_pfn(pfn) &&
+                       PageTransCompound(pfn_to_page(pfn))) {
+                       drop_spte(kvm, sptep);
+                       sptep = rmap_get_first(*rmapp, &iter);
+                       need_tlb_flush = 1;
+               } else
+                       sptep = rmap_get_next(&iter);
+       }
+
+       return need_tlb_flush;
+}
+
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+                       struct kvm_memory_slot *memslot)
+{
+       bool flush = false;
+       unsigned long *rmapp;
+       unsigned long last_index, index;
+       gfn_t gfn_start, gfn_end;
+
+       spin_lock(&kvm->mmu_lock);
+
+       gfn_start = memslot->base_gfn;
+       gfn_end = memslot->base_gfn + memslot->npages - 1;
+
+       if (gfn_start >= gfn_end)
+               goto out;
+
+       rmapp = memslot->arch.rmap[0];
+       last_index = gfn_to_index(gfn_end, memslot->base_gfn,
+                                       PT_PAGE_TABLE_LEVEL);
+
+       for (index = 0; index <= last_index; ++index, ++rmapp) {
+               if (*rmapp)
+                       flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp);
+
+               if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+                       if (flush) {
+                               kvm_flush_remote_tlbs(kvm);
+                               flush = false;
+                       }
+                       cond_resched_lock(&kvm->mmu_lock);
+               }
+       }
+
+       if (flush)
+               kvm_flush_remote_tlbs(kvm);
+
+out:
+       spin_unlock(&kvm->mmu_lock);
+}
+
 void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot)
 {
index 8e6b7d8..29fbf9d 100644 (file)
@@ -38,7 +38,7 @@ static struct kvm_arch_event_perf_mapping {
 };
 
 /* mapping between fixed pmc index and arch_events array */
-int fixed_pmc_events[] = {1, 0, 7};
+static int fixed_pmc_events[] = {1, 0, 7};
 
 static bool pmc_is_gp(struct kvm_pmc *pmc)
 {
index cc618c8..ce741b8 100644 (file)
@@ -1261,7 +1261,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 
        svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
                                   MSR_IA32_APICBASE_ENABLE;
-       if (kvm_vcpu_is_bsp(&svm->vcpu))
+       if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
                svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
 
        svm_init_osvw(&svm->vcpu);
@@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm)
 static int halt_interception(struct vcpu_svm *svm)
 {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
-       skip_emulated_instruction(&svm->vcpu);
        return kvm_emulate_halt(&svm->vcpu);
 }
 
 static int vmmcall_interception(struct vcpu_svm *svm)
 {
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
-       skip_emulated_instruction(&svm->vcpu);
        kvm_emulate_hypercall(&svm->vcpu);
        return 1;
 }
@@ -2757,11 +2755,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
 
-       trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
-                         vcpu->arch.regs[VCPU_REGS_RAX]);
+       trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
+                         kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
        /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
-       kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
+       kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
        svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
        skip_emulated_instruction(&svm->vcpu);
@@ -2770,12 +2768,18 @@ static int invlpga_interception(struct vcpu_svm *svm)
 
 static int skinit_interception(struct vcpu_svm *svm)
 {
-       trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+       trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
 
        kvm_queue_exception(&svm->vcpu, UD_VECTOR);
        return 1;
 }
 
+static int wbinvd_interception(struct vcpu_svm *svm)
+{
+       kvm_emulate_wbinvd(&svm->vcpu);
+       return 1;
+}
+
 static int xsetbv_interception(struct vcpu_svm *svm)
 {
        u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
@@ -2902,7 +2906,8 @@ static int rdpmc_interception(struct vcpu_svm *svm)
        return 1;
 }
 
-bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
+static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
+                                           unsigned long val)
 {
        unsigned long cr0 = svm->vcpu.arch.cr0;
        bool ret = false;
@@ -2940,7 +2945,10 @@ static int cr_interception(struct vcpu_svm *svm)
                return emulate_on_interception(svm);
 
        reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
-       cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
+       if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
+               cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
+       else
+               cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
 
        err = 0;
        if (cr >= 16) { /* mov to cr */
@@ -3133,7 +3141,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
 static int rdmsr_interception(struct vcpu_svm *svm)
 {
-       u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+       u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
        u64 data;
 
        if (svm_get_msr(&svm->vcpu, ecx, &data)) {
@@ -3142,8 +3150,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
        } else {
                trace_kvm_msr_read(ecx, data);
 
-               svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
-               svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
+               kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff);
+               kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32);
                svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
                skip_emulated_instruction(&svm->vcpu);
        }
@@ -3246,9 +3254,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 static int wrmsr_interception(struct vcpu_svm *svm)
 {
        struct msr_data msr;
-       u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
-       u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
-               | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+       u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+       u64 data = kvm_read_edx_eax(&svm->vcpu);
 
        msr.data = data;
        msr.index = ecx;
@@ -3325,7 +3332,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_READ_CR3]                     = cr_interception,
        [SVM_EXIT_READ_CR4]                     = cr_interception,
        [SVM_EXIT_READ_CR8]                     = cr_interception,
-       [SVM_EXIT_CR0_SEL_WRITE]                = emulate_on_interception,
+       [SVM_EXIT_CR0_SEL_WRITE]                = cr_interception,
        [SVM_EXIT_WRITE_CR0]                    = cr_interception,
        [SVM_EXIT_WRITE_CR3]                    = cr_interception,
        [SVM_EXIT_WRITE_CR4]                    = cr_interception,
@@ -3376,7 +3383,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
        [SVM_EXIT_STGI]                         = stgi_interception,
        [SVM_EXIT_CLGI]                         = clgi_interception,
        [SVM_EXIT_SKINIT]                       = skinit_interception,
-       [SVM_EXIT_WBINVD]                       = emulate_on_interception,
+       [SVM_EXIT_WBINVD]                       = wbinvd_interception,
        [SVM_EXIT_MONITOR]                      = monitor_interception,
        [SVM_EXIT_MWAIT]                        = mwait_interception,
        [SVM_EXIT_XSETBV]                       = xsetbv_interception,
@@ -3555,7 +3562,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
 
        if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
            || !svm_exit_handlers[exit_code]) {
-               WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
+               WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
                kvm_queue_exception(vcpu, UD_VECTOR);
                return 1;
        }
index f7b20b4..f5e8dce 100644 (file)
@@ -2168,7 +2168,10 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
 {
        unsigned long *msr_bitmap;
 
-       if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
+       if (is_guest_mode(vcpu))
+               msr_bitmap = vmx_msr_bitmap_nested;
+       else if (irqchip_in_kernel(vcpu->kvm) &&
+               apic_x2apic_mode(vcpu->arch.apic)) {
                if (is_long_mode(vcpu))
                        msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
                else
@@ -2467,6 +2470,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
        vmx->nested.nested_vmx_secondary_ctls_low = 0;
        vmx->nested.nested_vmx_secondary_ctls_high &=
                SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+               SECONDARY_EXEC_RDTSCP |
                SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                SECONDARY_EXEC_APIC_REGISTER_VIRT |
                SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -2476,8 +2480,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
        if (enable_ept) {
                /* nested EPT: emulate EPT also to L1 */
                vmx->nested.nested_vmx_secondary_ctls_high |=
-                       SECONDARY_EXEC_ENABLE_EPT |
-                       SECONDARY_EXEC_UNRESTRICTED_GUEST;
+                       SECONDARY_EXEC_ENABLE_EPT;
                vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
                         VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
                         VMX_EPT_INVEPT_BIT;
@@ -2491,6 +2494,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
        } else
                vmx->nested.nested_vmx_ept_caps = 0;
 
+       if (enable_unrestricted_guest)
+               vmx->nested.nested_vmx_secondary_ctls_high |=
+                       SECONDARY_EXEC_UNRESTRICTED_GUEST;
+
        /* miscellaneous data */
        rdmsr(MSR_IA32_VMX_MISC,
                vmx->nested.nested_vmx_misc_low,
@@ -3262,8 +3269,8 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
                 * default value.
                 */
                if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
-                       save->selector &= ~SELECTOR_RPL_MASK;
-               save->dpl = save->selector & SELECTOR_RPL_MASK;
+                       save->selector &= ~SEGMENT_RPL_MASK;
+               save->dpl = save->selector & SEGMENT_RPL_MASK;
                save->s = 1;
        }
        vmx_set_segment(vcpu, save, seg);
@@ -3836,7 +3843,7 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
        unsigned int cs_rpl;
 
        vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
-       cs_rpl = cs.selector & SELECTOR_RPL_MASK;
+       cs_rpl = cs.selector & SEGMENT_RPL_MASK;
 
        if (cs.unusable)
                return false;
@@ -3864,7 +3871,7 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
        unsigned int ss_rpl;
 
        vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
-       ss_rpl = ss.selector & SELECTOR_RPL_MASK;
+       ss_rpl = ss.selector & SEGMENT_RPL_MASK;
 
        if (ss.unusable)
                return true;
@@ -3886,7 +3893,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
        unsigned int rpl;
 
        vmx_get_segment(vcpu, &var, seg);
-       rpl = var.selector & SELECTOR_RPL_MASK;
+       rpl = var.selector & SEGMENT_RPL_MASK;
 
        if (var.unusable)
                return true;
@@ -3913,7 +3920,7 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
 
        if (tr.unusable)
                return false;
-       if (tr.selector & SELECTOR_TI_MASK)     /* TI = 1 */
+       if (tr.selector & SEGMENT_TI_MASK)      /* TI = 1 */
                return false;
        if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
                return false;
@@ -3931,7 +3938,7 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
 
        if (ldtr.unusable)
                return true;
-       if (ldtr.selector & SELECTOR_TI_MASK)   /* TI = 1 */
+       if (ldtr.selector & SEGMENT_TI_MASK)    /* TI = 1 */
                return false;
        if (ldtr.type != 2)
                return false;
@@ -3948,8 +3955,8 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
        vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
        vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
 
-       return ((cs.selector & SELECTOR_RPL_MASK) ==
-                (ss.selector & SELECTOR_RPL_MASK));
+       return ((cs.selector & SEGMENT_RPL_MASK) ==
+                (ss.selector & SEGMENT_RPL_MASK));
 }
 
 /*
@@ -4705,7 +4712,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
        vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
        kvm_set_cr8(&vmx->vcpu, 0);
        apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
-       if (kvm_vcpu_is_bsp(&vmx->vcpu))
+       if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
                apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
        apic_base_msr.host_initiated = true;
        kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
@@ -5000,7 +5007,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
                if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
                        if (vcpu->arch.halt_request) {
                                vcpu->arch.halt_request = 0;
-                               return kvm_emulate_halt(vcpu);
+                               return kvm_vcpu_halt(vcpu);
                        }
                        return 1;
                }
@@ -5065,6 +5072,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
        }
 
        if (is_invalid_opcode(intr_info)) {
+               if (is_guest_mode(vcpu)) {
+                       kvm_queue_exception(vcpu, UD_VECTOR);
+                       return 1;
+               }
                er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
                if (er != EMULATE_DONE)
                        kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5084,9 +5095,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
            !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
                vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
                vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
-               vcpu->run->internal.ndata = 2;
+               vcpu->run->internal.ndata = 3;
                vcpu->run->internal.data[0] = vect_info;
                vcpu->run->internal.data[1] = intr_info;
+               vcpu->run->internal.data[2] = error_code;
                return 0;
        }
 
@@ -5527,13 +5539,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
 
 static int handle_halt(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
        return kvm_emulate_halt(vcpu);
 }
 
 static int handle_vmcall(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
        kvm_emulate_hypercall(vcpu);
        return 1;
 }
@@ -5564,7 +5574,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu)
 
 static int handle_wbinvd(struct kvm_vcpu *vcpu)
 {
-       skip_emulated_instruction(vcpu);
        kvm_emulate_wbinvd(vcpu);
        return 1;
 }
@@ -5822,7 +5831,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
        gpa_t gpa;
 
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
-       if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+       if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
                skip_emulated_instruction(vcpu);
                return 1;
        }
@@ -5903,7 +5912,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 
                if (vcpu->arch.halt_request) {
                        vcpu->arch.halt_request = 0;
-                       ret = kvm_emulate_halt(vcpu);
+                       ret = kvm_vcpu_halt(vcpu);
                        goto out;
                }
 
@@ -7312,21 +7321,21 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
                else if (port < 0x10000)
                        bitmap = vmcs12->io_bitmap_b;
                else
-                       return 1;
+                       return true;
                bitmap += (port & 0x7fff) / 8;
 
                if (last_bitmap != bitmap)
                        if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
-                               return 1;
+                               return true;
                if (b & (1 << (port & 7)))
-                       return 1;
+                       return true;
 
                port++;
                size--;
                last_bitmap = bitmap;
        }
 
-       return 0;
+       return false;
 }
 
 /*
@@ -7342,7 +7351,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
        gpa_t bitmap;
 
        if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
-               return 1;
+               return true;
 
        /*
         * The MSR_BITMAP page is divided into four 1024-byte bitmaps,
@@ -7361,10 +7370,10 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
        if (msr_index < 1024*8) {
                unsigned char b;
                if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
-                       return 1;
+                       return true;
                return 1 & (b >> (msr_index & 7));
        } else
-               return 1; /* let L1 handle the wrong parameter */
+               return true; /* let L1 handle the wrong parameter */
 }
 
 /*
@@ -7386,7 +7395,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
                case 0:
                        if (vmcs12->cr0_guest_host_mask &
                            (val ^ vmcs12->cr0_read_shadow))
-                               return 1;
+                               return true;
                        break;
                case 3:
                        if ((vmcs12->cr3_target_count >= 1 &&
@@ -7397,37 +7406,37 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
                                        vmcs12->cr3_target_value2 == val) ||
                                (vmcs12->cr3_target_count >= 4 &&
                                        vmcs12->cr3_target_value3 == val))
-                               return 0;
+                               return false;
                        if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
-                               return 1;
+                               return true;
                        break;
                case 4:
                        if (vmcs12->cr4_guest_host_mask &
                            (vmcs12->cr4_read_shadow ^ val))
-                               return 1;
+                               return true;
                        break;
                case 8:
                        if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
-                               return 1;
+                               return true;
                        break;
                }
                break;
        case 2: /* clts */
                if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
                    (vmcs12->cr0_read_shadow & X86_CR0_TS))
-                       return 1;
+                       return true;
                break;
        case 1: /* mov from cr */
                switch (cr) {
                case 3:
                        if (vmcs12->cpu_based_vm_exec_control &
                            CPU_BASED_CR3_STORE_EXITING)
-                               return 1;
+                               return true;
                        break;
                case 8:
                        if (vmcs12->cpu_based_vm_exec_control &
                            CPU_BASED_CR8_STORE_EXITING)
-                               return 1;
+                               return true;
                        break;
                }
                break;
@@ -7438,14 +7447,14 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
                 */
                if (vmcs12->cr0_guest_host_mask & 0xe &
                    (val ^ vmcs12->cr0_read_shadow))
-                       return 1;
+                       return true;
                if ((vmcs12->cr0_guest_host_mask & 0x1) &&
                    !(vmcs12->cr0_read_shadow & 0x1) &&
                    (val & 0x1))
-                       return 1;
+                       return true;
                break;
        }
-       return 0;
+       return false;
 }
 
 /*
@@ -7468,48 +7477,48 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                                KVM_ISA_VMX);
 
        if (vmx->nested.nested_run_pending)
-               return 0;
+               return false;
 
        if (unlikely(vmx->fail)) {
                pr_info_ratelimited("%s failed vm entry %x\n", __func__,
                                    vmcs_read32(VM_INSTRUCTION_ERROR));
-               return 1;
+               return true;
        }
 
        switch (exit_reason) {
        case EXIT_REASON_EXCEPTION_NMI:
                if (!is_exception(intr_info))
-                       return 0;
+                       return false;
                else if (is_page_fault(intr_info))
                        return enable_ept;
                else if (is_no_device(intr_info) &&
                         !(vmcs12->guest_cr0 & X86_CR0_TS))
-                       return 0;
+                       return false;
                return vmcs12->exception_bitmap &
                                (1u << (intr_info & INTR_INFO_VECTOR_MASK));
        case EXIT_REASON_EXTERNAL_INTERRUPT:
-               return 0;
+               return false;
        case EXIT_REASON_TRIPLE_FAULT:
-               return 1;
+               return true;
        case EXIT_REASON_PENDING_INTERRUPT:
                return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
        case EXIT_REASON_NMI_WINDOW:
                return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
        case EXIT_REASON_TASK_SWITCH:
-               return 1;
+               return true;
        case EXIT_REASON_CPUID:
                if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
-                       return 0;
-               return 1;
+                       return false;
+               return true;
        case EXIT_REASON_HLT:
                return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
        case EXIT_REASON_INVD:
-               return 1;
+               return true;
        case EXIT_REASON_INVLPG:
                return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
        case EXIT_REASON_RDPMC:
                return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
-       case EXIT_REASON_RDTSC:
+       case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
                return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
        case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
        case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
@@ -7521,7 +7530,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 * VMX instructions trap unconditionally. This allows L1 to
                 * emulate them for its L2 guest, i.e., allows 3-level nesting!
                 */
-               return 1;
+               return true;
        case EXIT_REASON_CR_ACCESS:
                return nested_vmx_exit_handled_cr(vcpu, vmcs12);
        case EXIT_REASON_DR_ACCESS:
@@ -7532,7 +7541,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_MSR_WRITE:
                return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
        case EXIT_REASON_INVALID_STATE:
-               return 1;
+               return true;
        case EXIT_REASON_MWAIT_INSTRUCTION:
                return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
        case EXIT_REASON_MONITOR_INSTRUCTION:
@@ -7542,7 +7551,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                        nested_cpu_has2(vmcs12,
                                SECONDARY_EXEC_PAUSE_LOOP_EXITING);
        case EXIT_REASON_MCE_DURING_VMENTRY:
-               return 0;
+               return false;
        case EXIT_REASON_TPR_BELOW_THRESHOLD:
                return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
        case EXIT_REASON_APIC_ACCESS:
@@ -7551,7 +7560,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
        case EXIT_REASON_APIC_WRITE:
        case EXIT_REASON_EOI_INDUCED:
                /* apic_write and eoi_induced should exit unconditionally. */
-               return 1;
+               return true;
        case EXIT_REASON_EPT_VIOLATION:
                /*
                 * L0 always deals with the EPT violation. If nested EPT is
@@ -7559,7 +7568,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 * missing in the guest EPT table (EPT12), the EPT violation
                 * will be injected with nested_ept_inject_page_fault()
                 */
-               return 0;
+               return false;
        case EXIT_REASON_EPT_MISCONFIG:
                /*
                 * L2 never uses directly L1's EPT, but rather L0's own EPT
@@ -7567,11 +7576,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 * (EPT on EPT). So any problems with the structure of the
                 * table is L0's fault.
                 */
-               return 0;
+               return false;
        case EXIT_REASON_WBINVD:
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
        case EXIT_REASON_XSETBV:
-               return 1;
+               return true;
        case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
                /*
                 * This should never happen, since it is not possible to
@@ -7581,7 +7590,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
                 */
                return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
        default:
-               return 1;
+               return true;
        }
 }
 
@@ -8516,6 +8525,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
                                                exec_control);
                        }
                }
+               if (nested && !vmx->rdtscp_enabled)
+                       vmx->nested.nested_vmx_secondary_ctls_high &=
+                               ~SECONDARY_EXEC_RDTSCP;
        }
 
        /* Exposing INVPCID only when PCID is exposed */
@@ -8616,10 +8628,11 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
                                        struct vmcs12 *vmcs12)
 {
        struct vcpu_vmx *vmx = to_vmx(vcpu);
+       int maxphyaddr = cpuid_maxphyaddr(vcpu);
 
        if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
-               /* TODO: Also verify bits beyond physical address width are 0 */
-               if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
+               if (!PAGE_ALIGNED(vmcs12->apic_access_addr) ||
+                   vmcs12->apic_access_addr >> maxphyaddr)
                        return false;
 
                /*
@@ -8635,8 +8648,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
        }
 
        if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
-               /* TODO: Also verify bits beyond physical address width are 0 */
-               if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
+               if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) ||
+                   vmcs12->virtual_apic_page_addr >> maxphyaddr)
                        return false;
 
                if (vmx->nested.virtual_apic_page) /* shouldn't happen */
@@ -8659,7 +8672,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
        }
 
        if (nested_cpu_has_posted_intr(vmcs12)) {
-               if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64))
+               if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) ||
+                   vmcs12->posted_intr_desc_addr >> maxphyaddr)
                        return false;
 
                if (vmx->nested.pi_desc_page) { /* shouldn't happen */
@@ -8858,9 +8872,9 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
 
 static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
                                       unsigned long count_field,
-                                      unsigned long addr_field,
-                                      int maxphyaddr)
+                                      unsigned long addr_field)
 {
+       int maxphyaddr;
        u64 count, addr;
 
        if (vmcs12_read_any(vcpu, count_field, &count) ||
@@ -8870,6 +8884,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
        }
        if (count == 0)
                return 0;
+       maxphyaddr = cpuid_maxphyaddr(vcpu);
        if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
            (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
                pr_warn_ratelimited(
@@ -8883,19 +8898,16 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
                                                struct vmcs12 *vmcs12)
 {
-       int maxphyaddr;
-
        if (vmcs12->vm_exit_msr_load_count == 0 &&
            vmcs12->vm_exit_msr_store_count == 0 &&
            vmcs12->vm_entry_msr_load_count == 0)
                return 0; /* Fast path */
-       maxphyaddr = cpuid_maxphyaddr(vcpu);
        if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
-                                       VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) ||
+                                       VM_EXIT_MSR_LOAD_ADDR) ||
            nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
-                                       VM_EXIT_MSR_STORE_ADDR, maxphyaddr) ||
+                                       VM_EXIT_MSR_STORE_ADDR) ||
            nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
-                                       VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr))
+                                       VM_ENTRY_MSR_LOAD_ADDR))
                return -EINVAL;
        return 0;
 }
@@ -9145,8 +9157,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                        exec_control &= ~SECONDARY_EXEC_RDTSCP;
                /* Take the following fields only from vmcs12 */
                exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+                                 SECONDARY_EXEC_RDTSCP |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-                                  SECONDARY_EXEC_APIC_REGISTER_VIRT);
+                                 SECONDARY_EXEC_APIC_REGISTER_VIRT);
                if (nested_cpu_has(vmcs12,
                                CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
                        exec_control |= vmcs12->secondary_vm_exec_control;
@@ -9218,9 +9231,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
        }
 
        if (cpu_has_vmx_msr_bitmap() &&
-           exec_control & CPU_BASED_USE_MSR_BITMAPS &&
-           nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) {
-               vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested));
+           exec_control & CPU_BASED_USE_MSR_BITMAPS) {
+               nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
+               /* MSR_BITMAP will be set by following vmx_set_efer. */
        } else
                exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
 
@@ -9379,7 +9392,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        }
 
        if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
-               /*TODO: Also verify bits beyond physical address width are 0*/
                nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
                return 1;
        }
@@ -9518,7 +9530,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
        vmcs12->launch_state = 1;
 
        if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
-               return kvm_emulate_halt(vcpu);
+               return kvm_vcpu_halt(vcpu);
 
        vmx->nested.nested_run_pending = 1;
 
index bd7a70b..e1a8126 100644 (file)
@@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
+{
+       int i;
+
+       if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+               for (i = 0; i < KVM_NR_DB_REGS; i++)
+                       vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+               vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
+       }
+}
+
 static void kvm_update_dr6(struct kvm_vcpu *vcpu)
 {
        if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
@@ -1070,19 +1081,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
        struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
        u64 boot_ns;
 
-       boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
+       boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
 
        write_seqcount_begin(&vdata->seq);
 
        /* copy pvclock gtod data */
-       vdata->clock.vclock_mode        = tk->tkr.clock->archdata.vclock_mode;
-       vdata->clock.cycle_last         = tk->tkr.cycle_last;
-       vdata->clock.mask               = tk->tkr.mask;
-       vdata->clock.mult               = tk->tkr.mult;
-       vdata->clock.shift              = tk->tkr.shift;
+       vdata->clock.vclock_mode        = tk->tkr_mono.clock->archdata.vclock_mode;
+       vdata->clock.cycle_last         = tk->tkr_mono.cycle_last;
+       vdata->clock.mask               = tk->tkr_mono.mask;
+       vdata->clock.mult               = tk->tkr_mono.mult;
+       vdata->clock.shift              = tk->tkr_mono.shift;
 
        vdata->boot_ns                  = boot_ns;
-       vdata->nsec_base                = tk->tkr.xtime_nsec;
+       vdata->nsec_base                = tk->tkr_mono.xtime_nsec;
 
        write_seqcount_end(&vdata->seq);
 }
@@ -2744,7 +2755,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_USER_NMI:
        case KVM_CAP_REINJECT_CONTROL:
        case KVM_CAP_IRQ_INJECT_STATUS:
-       case KVM_CAP_IRQFD:
        case KVM_CAP_IOEVENTFD:
        case KVM_CAP_IOEVENTFD_NO_LENGTH:
        case KVM_CAP_PIT2:
@@ -3150,6 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
                return -EINVAL;
 
        memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+       kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = dbgregs->dr6;
        kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = dbgregs->dr7;
@@ -4115,8 +4126,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
        do {
                n = min(len, 8);
                if (!(vcpu->arch.apic &&
-                     !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
-                   && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+                     !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
+                   && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
                        break;
                handled += n;
                addr += n;
@@ -4135,8 +4146,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
        do {
                n = min(len, 8);
                if (!(vcpu->arch.apic &&
-                     !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
-                   && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+                     !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
+                                        addr, n, v))
+                   && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
                        break;
                trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
                handled += n;
@@ -4476,7 +4488,8 @@ mmio:
        return X86EMUL_CONTINUE;
 }
 
-int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
+                       unsigned long addr,
                        void *val, unsigned int bytes,
                        struct x86_exception *exception,
                        const struct read_write_emulator_ops *ops)
@@ -4539,7 +4552,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
                                   exception, &read_emultor);
 }
 
-int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
                            unsigned long addr,
                            const void *val,
                            unsigned int bytes,
@@ -4630,10 +4643,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
        int r;
 
        if (vcpu->arch.pio.in)
-               r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
+               r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
                                    vcpu->arch.pio.size, pd);
        else
-               r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+               r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
                                     vcpu->arch.pio.port, vcpu->arch.pio.size,
                                     pd);
        return r;
@@ -4706,7 +4719,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
        kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
 }
 
-int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
 {
        if (!need_emulate_wbinvd(vcpu))
                return X86EMUL_CONTINUE;
@@ -4723,19 +4736,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
                wbinvd();
        return X86EMUL_CONTINUE;
 }
+
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       return kvm_emulate_wbinvd_noskip(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
 
+
+
 static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
 {
-       kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
+       kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
 }
 
-int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
+static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
+                          unsigned long *dest)
 {
        return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
-int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
+static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
+                          unsigned long value)
 {
 
        return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
@@ -5817,7 +5840,7 @@ void kvm_arch_exit(void)
        free_percpu(shared_msrs);
 }
 
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 {
        ++vcpu->stat.halt_exits;
        if (irqchip_in_kernel(vcpu->kvm)) {
@@ -5828,6 +5851,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
                return 0;
        }
 }
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+       return kvm_vcpu_halt(vcpu);
+}
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
@@ -5904,7 +5934,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
        lapic_irq.dest_id = apicid;
 
        lapic_irq.delivery_mode = APIC_DM_REMRD;
-       kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
+       kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -5912,6 +5942,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        unsigned long nr, a0, a1, a2, a3, ret;
        int op_64_bit, r = 1;
 
+       kvm_x86_ops->skip_emulated_instruction(vcpu);
+
        if (kvm_hv_hypercall_enabled(vcpu->kvm))
                return kvm_hv_hypercall(vcpu);
 
@@ -6165,7 +6197,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 }
 
 /*
- * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * Returns 1 to let vcpu_run() continue the guest execution loop without
  * exiting to the userspace.  Otherwise, the value will be returned to the
  * userspace.
  */
@@ -6302,6 +6334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                set_debugreg(vcpu->arch.eff_db[2], 2);
                set_debugreg(vcpu->arch.eff_db[3], 3);
                set_debugreg(vcpu->arch.dr6, 6);
+               vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
        }
 
        trace_kvm_entry(vcpu->vcpu_id);
@@ -6383,42 +6416,47 @@ out:
        return r;
 }
 
+static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+       if (!kvm_arch_vcpu_runnable(vcpu)) {
+               srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+               kvm_vcpu_block(vcpu);
+               vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+               if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+                       return 1;
+       }
+
+       kvm_apic_accept_events(vcpu);
+       switch(vcpu->arch.mp_state) {
+       case KVM_MP_STATE_HALTED:
+               vcpu->arch.pv.pv_unhalted = false;
+               vcpu->arch.mp_state =
+                       KVM_MP_STATE_RUNNABLE;
+       case KVM_MP_STATE_RUNNABLE:
+               vcpu->arch.apf.halted = false;
+               break;
+       case KVM_MP_STATE_INIT_RECEIVED:
+               break;
+       default:
+               return -EINTR;
+               break;
+       }
+       return 1;
+}
 
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+static int vcpu_run(struct kvm_vcpu *vcpu)
 {
        int r;
        struct kvm *kvm = vcpu->kvm;
 
        vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 
-       r = 1;
-       while (r > 0) {
+       for (;;) {
                if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
                    !vcpu->arch.apf.halted)
                        r = vcpu_enter_guest(vcpu);
-               else {
-                       srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-                       kvm_vcpu_block(vcpu);
-                       vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-                       if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
-                               kvm_apic_accept_events(vcpu);
-                               switch(vcpu->arch.mp_state) {
-                               case KVM_MP_STATE_HALTED:
-                                       vcpu->arch.pv.pv_unhalted = false;
-                                       vcpu->arch.mp_state =
-                                               KVM_MP_STATE_RUNNABLE;
-                               case KVM_MP_STATE_RUNNABLE:
-                                       vcpu->arch.apf.halted = false;
-                                       break;
-                               case KVM_MP_STATE_INIT_RECEIVED:
-                                       break;
-                               default:
-                                       r = -EINTR;
-                                       break;
-                               }
-                       }
-               }
-
+               else
+                       r = vcpu_block(kvm, vcpu);
                if (r <= 0)
                        break;
 
@@ -6430,6 +6468,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                        r = -EINTR;
                        vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.request_irq_exits;
+                       break;
                }
 
                kvm_check_async_pf_completion(vcpu);
@@ -6438,6 +6477,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
                        r = -EINTR;
                        vcpu->run->exit_reason = KVM_EXIT_INTR;
                        ++vcpu->stat.signal_exits;
+                       break;
                }
                if (need_resched()) {
                        srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6569,7 +6609,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
        } else
                WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
-       r = __vcpu_run(vcpu);
+       r = vcpu_run(vcpu);
 
 out:
        post_kvm_run_save(vcpu);
@@ -7076,11 +7116,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
        kvm_clear_exception_queue(vcpu);
 
        memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+       kvm_update_dr0123(vcpu);
        vcpu->arch.dr6 = DR6_INIT;
        kvm_update_dr6(vcpu);
        vcpu->arch.dr7 = DR7_FIXED_1;
        kvm_update_dr7(vcpu);
 
+       vcpu->arch.cr2 = 0;
+
        kvm_make_request(KVM_REQ_EVENT, vcpu);
        vcpu->arch.apf.msr_val = 0;
        vcpu->arch.st.msr_val = 0;
@@ -7241,7 +7284,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
        vcpu->arch.pv.pv_unhalted = false;
        vcpu->arch.emulate_ctxt.ops = &emulate_ops;
-       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
+       if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
                vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
        else
                vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7289,6 +7332,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
        vcpu->arch.guest_supported_xcr0 = 0;
        vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
 
+       vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
        kvm_async_pf_hash_reset(vcpu);
        kvm_pmu_init(vcpu);
 
@@ -7429,7 +7474,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 
        for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
                if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
-                       kvm_kvfree(free->arch.rmap[i]);
+                       kvfree(free->arch.rmap[i]);
                        free->arch.rmap[i] = NULL;
                }
                if (i == 0)
@@ -7437,7 +7482,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 
                if (!dont || free->arch.lpage_info[i - 1] !=
                             dont->arch.lpage_info[i - 1]) {
-                       kvm_kvfree(free->arch.lpage_info[i - 1]);
+                       kvfree(free->arch.lpage_info[i - 1]);
                        free->arch.lpage_info[i - 1] = NULL;
                }
        }
@@ -7491,12 +7536,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 
 out_free:
        for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
-               kvm_kvfree(slot->arch.rmap[i]);
+               kvfree(slot->arch.rmap[i]);
                slot->arch.rmap[i] = NULL;
                if (i == 0)
                        continue;
 
-               kvm_kvfree(slot->arch.lpage_info[i - 1]);
+               kvfree(slot->arch.lpage_info[i - 1]);
                slot->arch.lpage_info[i - 1] = NULL;
        }
        return -ENOMEM;
@@ -7618,6 +7663,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
        /* It's OK to get 'new' slot here as it has already been installed */
        new = id_to_memslot(kvm->memslots, mem->slot);
 
+       /*
+        * Dirty logging tracks sptes in 4k granularity, meaning that large
+        * sptes have to be split.  If live migration is successful, the guest
+        * in the source machine will be destroyed and large sptes will be
+        * created in the destination. However, if the guest continues to run
+        * in the source machine (for example if live migration fails), small
+        * sptes will remain around and cause bad performance.
+        *
+        * Scan sptes if dirty logging has been stopped, dropping those
+        * which can be collapsed into a single large-page spte.  Later
+        * page faults will create the large-page sptes.
+        */
+       if ((change != KVM_MR_DELETE) &&
+               (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+               !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+               kvm_mmu_zap_collapsible_sptes(kvm, new);
+
        /*
         * Set up write protection and/or dirty logging for the new slot.
         *
index ac4453d..717908b 100644 (file)
@@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
                /* Some systems map "vectors" to interrupts weirdly.  Not us! */
                __this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
                if (i != SYSCALL_VECTOR)
-                       set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+                       set_intr_gate(i, irq_entries_start +
+                                       8 * (i - FIRST_EXTERNAL_VECTOR));
        }
 
        /*
@@ -1076,6 +1077,7 @@ static void lguest_load_sp0(struct tss_struct *tss,
 {
        lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
                   THREAD_SIZE / PAGE_SIZE);
+       tss->x86_tss.sp0 = thread->sp0;
 }
 
 /* Let's just say, I wouldn't do debugging under a Guest. */
index f5cc9eb..082a851 100644 (file)
 #include <asm/alternative-asm.h>
 #include <asm/dwarf2.h>
 
-.macro SAVE reg
-       pushl_cfi %\reg
-       CFI_REL_OFFSET \reg, 0
-.endm
-
-.macro RESTORE reg
-       popl_cfi %\reg
-       CFI_RESTORE \reg
-.endm
-
 .macro read64 reg
        movl %ebx, %eax
        movl %ecx, %edx
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
 .macro addsub_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
        CFI_STARTPROC
-       SAVE ebp
-       SAVE ebx
-       SAVE esi
-       SAVE edi
+       pushl_cfi_reg ebp
+       pushl_cfi_reg ebx
+       pushl_cfi_reg esi
+       pushl_cfi_reg edi
 
        movl %eax, %esi
        movl %edx, %edi
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
        movl %ebx, %eax
        movl %ecx, %edx
-       RESTORE edi
-       RESTORE esi
-       RESTORE ebx
-       RESTORE ebp
+       popl_cfi_reg edi
+       popl_cfi_reg esi
+       popl_cfi_reg ebx
+       popl_cfi_reg ebp
        ret
        CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -104,7 +94,7 @@ addsub_return sub sub sbb
 .macro incdec_return func ins insc
 ENTRY(atomic64_\func\()_return_cx8)
        CFI_STARTPROC
-       SAVE ebx
+       pushl_cfi_reg ebx
 
        read64 %esi
 1:
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
 10:
        movl %ebx, %eax
        movl %ecx, %edx
-       RESTORE ebx
+       popl_cfi_reg ebx
        ret
        CFI_ENDPROC
 ENDPROC(atomic64_\func\()_return_cx8)
@@ -130,7 +120,7 @@ incdec_return dec sub sbb
 
 ENTRY(atomic64_dec_if_positive_cx8)
        CFI_STARTPROC
-       SAVE ebx
+       pushl_cfi_reg ebx
 
        read64 %esi
 1:
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
 2:
        movl %ebx, %eax
        movl %ecx, %edx
-       RESTORE ebx
+       popl_cfi_reg ebx
        ret
        CFI_ENDPROC
 ENDPROC(atomic64_dec_if_positive_cx8)
 
 ENTRY(atomic64_add_unless_cx8)
        CFI_STARTPROC
-       SAVE ebp
-       SAVE ebx
+       pushl_cfi_reg ebp
+       pushl_cfi_reg ebx
 /* these just push these two parameters on the stack */
-       SAVE edi
-       SAVE ecx
+       pushl_cfi_reg edi
+       pushl_cfi_reg ecx
 
        movl %eax, %ebp
        movl %edx, %edi
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
 3:
        addl $8, %esp
        CFI_ADJUST_CFA_OFFSET -8
-       RESTORE ebx
-       RESTORE ebp
+       popl_cfi_reg ebx
+       popl_cfi_reg ebp
        ret
 4:
        cmpl %edx, 4(%esp)
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
 
 ENTRY(atomic64_inc_not_zero_cx8)
        CFI_STARTPROC
-       SAVE ebx
+       pushl_cfi_reg ebx
 
        read64 %esi
 1:
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
 
        movl $1, %eax
 3:
-       RESTORE ebx
+       popl_cfi_reg ebx
        ret
        CFI_ENDPROC
 ENDPROC(atomic64_inc_not_zero_cx8)
index e78b8ee..9bc944a 100644 (file)
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
           */           
 ENTRY(csum_partial)
        CFI_STARTPROC
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg esi
+       pushl_cfi_reg ebx
        movl 20(%esp),%eax      # Function arg: unsigned int sum
        movl 16(%esp),%ecx      # Function arg: int len
        movl 12(%esp),%esi      # Function arg: unsigned char *buff
@@ -127,14 +125,12 @@ ENTRY(csum_partial)
 6:     addl %ecx,%eax
        adcl $0, %eax 
 7:     
-       testl $1, 12(%esp)
+       testb $1, 12(%esp)
        jz 8f
        roll $8, %eax
 8:
-       popl_cfi %ebx
-       CFI_RESTORE ebx
-       popl_cfi %esi
-       CFI_RESTORE esi
+       popl_cfi_reg ebx
+       popl_cfi_reg esi
        ret
        CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -145,10 +141,8 @@ ENDPROC(csum_partial)
 
 ENTRY(csum_partial)
        CFI_STARTPROC
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg esi
+       pushl_cfi_reg ebx
        movl 20(%esp),%eax      # Function arg: unsigned int sum
        movl 16(%esp),%ecx      # Function arg: int len
        movl 12(%esp),%esi      # Function arg: const unsigned char *buf
@@ -251,14 +245,12 @@ ENTRY(csum_partial)
        addl %ebx,%eax
        adcl $0,%eax
 80: 
-       testl $1, 12(%esp)
+       testb $1, 12(%esp)
        jz 90f
        roll $8, %eax
 90: 
-       popl_cfi %ebx
-       CFI_RESTORE ebx
-       popl_cfi %esi
-       CFI_RESTORE esi
+       popl_cfi_reg ebx
+       popl_cfi_reg esi
        ret
        CFI_ENDPROC
 ENDPROC(csum_partial)
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
        CFI_STARTPROC
        subl  $4,%esp   
        CFI_ADJUST_CFA_OFFSET 4
-       pushl_cfi %edi
-       CFI_REL_OFFSET edi, 0
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
+       pushl_cfi_reg edi
+       pushl_cfi_reg esi
+       pushl_cfi_reg ebx
        movl ARGBASE+16(%esp),%eax      # sum
        movl ARGBASE+12(%esp),%ecx      # len
        movl ARGBASE+4(%esp),%esi       # src
@@ -412,12 +401,9 @@ DST(       movb %cl, (%edi)        )
 
 .previous
 
-       popl_cfi %ebx
-       CFI_RESTORE ebx
-       popl_cfi %esi
-       CFI_RESTORE esi
-       popl_cfi %edi
-       CFI_RESTORE edi
+       popl_cfi_reg ebx
+       popl_cfi_reg esi
+       popl_cfi_reg edi
        popl_cfi %ecx                   # equivalent to addl $4,%esp
        ret     
        CFI_ENDPROC
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
                
 ENTRY(csum_partial_copy_generic)
        CFI_STARTPROC
-       pushl_cfi %ebx
-       CFI_REL_OFFSET ebx, 0
-       pushl_cfi %edi
-       CFI_REL_OFFSET edi, 0
-       pushl_cfi %esi
-       CFI_REL_OFFSET esi, 0
+       pushl_cfi_reg ebx
+       pushl_cfi_reg edi
+       pushl_cfi_reg esi
        movl ARGBASE+4(%esp),%esi       #src
        movl ARGBASE+8(%esp),%edi       #dst    
        movl ARGBASE+12(%esp),%ecx      #len
@@ -506,12 +489,9 @@ DST(       movb %dl, (%edi)         )
        jmp  7b                 
 .previous                              
 
-       popl_cfi %esi
-       CFI_RESTORE esi
-       popl_cfi %edi
-       CFI_RESTORE edi
-       popl_cfi %ebx
-       CFI_RESTORE ebx
+       popl_cfi_reg esi
+       popl_cfi_reg edi
+       popl_cfi_reg ebx
        ret
        CFI_ENDPROC
 ENDPROC(csum_partial_copy_generic)
index f2145cf..e67e579 100644 (file)
@@ -1,31 +1,35 @@
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
 /*
- * Zero a page.        
- * rdi page
- */                    
-ENTRY(clear_page_c)
+ * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
+ * recommended to use this when possible and we do use them by default.
+ * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+ * Otherwise, use original.
+ */
+
+/*
+ * Zero a page.
+ * %rdi        - page
+ */
+ENTRY(clear_page)
        CFI_STARTPROC
+
+       ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
+                     "jmp clear_page_c_e", X86_FEATURE_ERMS
+
        movl $4096/8,%ecx
        xorl %eax,%eax
        rep stosq
        ret
        CFI_ENDPROC
-ENDPROC(clear_page_c)
+ENDPROC(clear_page)
 
-ENTRY(clear_page_c_e)
+ENTRY(clear_page_orig)
        CFI_STARTPROC
-       movl $4096,%ecx
-       xorl %eax,%eax
-       rep stosb
-       ret
-       CFI_ENDPROC
-ENDPROC(clear_page_c_e)
 
-ENTRY(clear_page)
-       CFI_STARTPROC
        xorl   %eax,%eax
        movl   $4096/64,%ecx
        .p2align 4
@@ -45,29 +49,13 @@ ENTRY(clear_page)
        nop
        ret
        CFI_ENDPROC
-.Lclear_page_end:
-ENDPROC(clear_page)
-
-       /*
-        * Some CPUs support enhanced REP MOVSB/STOSB instructions.
-        * It is recommended to use this when possible.
-        * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
-        * Otherwise, use original function.
-        *
-        */
+ENDPROC(clear_page_orig)
 
-#include <asm/cpufeature.h>
-
-       .section .altinstr_replacement,"ax"
-1:     .byte 0xeb                                      /* jmp <disp8> */
-       .byte (clear_page_c - clear_page) - (2f - 1b)   /* offset */
-2:     .byte 0xeb                                      /* jmp <disp8> */
-       .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
-3:
-       .previous
-       .section .altinstructions,"a"
-       altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
-                            .Lclear_page_end-clear_page, 2b-1b
-       altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \
-                            .Lclear_page_end-clear_page,3b-2b
-       .previous
+ENTRY(clear_page_c_e)
+       CFI_STARTPROC
+       movl $4096,%ecx
+       xorl %eax,%eax
+       rep stosb
+       ret
+       CFI_ENDPROC
+ENDPROC(clear_page_c_e)
index 176cca6..8239dbc 100644 (file)
@@ -2,23 +2,26 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
+/*
+ * Some CPUs run faster using the string copy instructions (sane microcode).
+ * It is also a lot simpler. Use this when possible. But, don't use streaming
+ * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
+ * prefetch distance based on SMP/UP.
+ */
        ALIGN
-copy_page_rep:
+ENTRY(copy_page)
        CFI_STARTPROC
+       ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
        movl    $4096/8, %ecx
        rep     movsq
        ret
        CFI_ENDPROC
-ENDPROC(copy_page_rep)
-
-/*
- *  Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
- *  Could vary the prefetch distance based on SMP/UP.
-*/
+ENDPROC(copy_page)
 
-ENTRY(copy_page)
+ENTRY(copy_page_regs)
        CFI_STARTPROC
        subq    $2*8,   %rsp
        CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@ ENTRY(copy_page)
        addq    $2*8, %rsp
        CFI_ADJUST_CFA_OFFSET -2*8
        ret
-.Lcopy_page_end:
        CFI_ENDPROC
-ENDPROC(copy_page)
-
-       /* Some CPUs run faster using the string copy instructions.
-          It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
-       .section .altinstr_replacement,"ax"
-1:     .byte 0xeb                                      /* jmp <disp8> */
-       .byte (copy_page_rep - copy_page) - (2f - 1b)   /* offset */
-2:
-       .previous
-       .section .altinstructions,"a"
-       altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD,       \
-               .Lcopy_page_end-copy_page, 2b-1b
-       .previous
+ENDPROC(copy_page_regs)
index dee945d..fa997df 100644 (file)
@@ -8,9 +8,6 @@
 
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
 #include <asm/current.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
 
-/*
- * By placing feature2 after feature1 in altinstructions section, we logically
- * implement:
- * If CPU has feature2, jmp to alt2 is used
- * else if CPU has feature1, jmp to alt1 is used
- * else jmp to orig is used.
- */
-       .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
-0:
-       .byte 0xe9      /* 32bit jump */
-       .long \orig-1f  /* by default jump to orig */
-1:
-       .section .altinstr_replacement,"ax"
-2:     .byte 0xe9                      /* near jump with 32bit immediate */
-       .long \alt1-1b /* offset */   /* or alternatively to alt1 */
-3:     .byte 0xe9                      /* near jump with 32bit immediate */
-       .long \alt2-1b /* offset */   /* or alternatively to alt2 */
-       .previous
-
-       .section .altinstructions,"a"
-       altinstruction_entry 0b,2b,\feature1,5,5
-       altinstruction_entry 0b,3b,\feature2,5,5
-       .previous
-       .endm
-
        .macro ALIGN_DESTINATION
-#ifdef FIX_ALIGNMENT
        /* check for bad alignment of destination */
        movl %edi,%ecx
        andl $7,%ecx
@@ -67,7 +38,6 @@
 
        _ASM_EXTABLE(100b,103b)
        _ASM_EXTABLE(101b,103b)
-#endif
        .endm
 
 /* Standard copy_to_user with segment limit checking */
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
        jc bad_to_user
        cmpq TI_addr_limit(%rax),%rcx
        ja bad_to_user
-       ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
-               copy_user_generic_unrolled,copy_user_generic_string,    \
-               copy_user_enhanced_fast_string
+       ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
+                     "jmp copy_user_generic_string",           \
+                     X86_FEATURE_REP_GOOD,                     \
+                     "jmp copy_user_enhanced_fast_string",     \
+                     X86_FEATURE_ERMS
        CFI_ENDPROC
 ENDPROC(_copy_to_user)
 
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
        jc bad_from_user
        cmpq TI_addr_limit(%rax),%rcx
        ja bad_from_user
-       ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
-               copy_user_generic_unrolled,copy_user_generic_string,    \
-               copy_user_enhanced_fast_string
+       ALTERNATIVE_2 "jmp copy_user_generic_unrolled",         \
+                     "jmp copy_user_generic_string",           \
+                     X86_FEATURE_REP_GOOD,                     \
+                     "jmp copy_user_enhanced_fast_string",     \
+                     X86_FEATURE_ERMS
        CFI_ENDPROC
 ENDPROC(_copy_from_user)
 
index 2419d5f..9734182 100644 (file)
@@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
 
        /* handle last odd byte */
 .Lhandle_1:
-       testl $1, %r10d
+       testb $1, %r10b
        jz    .Lende
        xorl  %ebx, %ebx
        source
index 1313ae6..8f72b33 100644 (file)
  */
 void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
 {
+       /*
+        * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+        * even if the input buffer is long enough to hold them.
+        */
+       if (buf_len > MAX_INSN_SIZE)
+               buf_len = MAX_INSN_SIZE;
+
        memset(insn, 0, sizeof(*insn));
        insn->kaddr = kaddr;
        insn->end_kaddr = kaddr + buf_len;
@@ -164,6 +171,12 @@ found:
                                /* VEX.W overrides opnd_size */
                                insn->opnd_bytes = 8;
                } else {
+                       /*
+                        * For VEX2, fake VEX3-like byte#2.
+                        * Makes it easier to decode vex.W, vex.vvvv,
+                        * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+                        */
+                       insn->vex_prefix.bytes[2] = b2 & 0x7f;
                        insn->vex_prefix.nbytes = 2;
                        insn->next_byte += 2;
                }
index 89b53c9..b046664 100644 (file)
@@ -1,11 +1,19 @@
 /* Copyright 2002 Andi Kleen */
 
 #include <linux/linkage.h>
-
 #include <asm/cpufeature.h>
 #include <asm/dwarf2.h>
 #include <asm/alternative-asm.h>
 
+/*
+ * We build a jump to memcpy_orig by default which gets NOPped out on
+ * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
+ * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
+ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
+ */
+
+.weak memcpy
+
 /*
  * memcpy - Copy a memory block.
  *
  * Output:
  * rax original destination
  */
+ENTRY(__memcpy)
+ENTRY(memcpy)
+       ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
+                     "jmp memcpy_erms", X86_FEATURE_ERMS
 
-/*
- * memcpy_c() - fast string ops (REP MOVSQ) based variant.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
- */
-       .section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c:
        movq %rdi, %rax
        movq %rdx, %rcx
        shrq $3, %rcx
        movl %edx, %ecx
        rep movsb
        ret
-.Lmemcpy_e:
-       .previous
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
 
 /*
- * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
- * memcpy_c. Use memcpy_c_e when possible.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
+ * memcpy_erms() - enhanced fast string memcpy. This is faster and
+ * simpler than memcpy. Use memcpy_erms when possible.
  */
-       .section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c_e:
+ENTRY(memcpy_erms)
        movq %rdi, %rax
        movq %rdx, %rcx
        rep movsb
        ret
-.Lmemcpy_e_e:
-       .previous
-
-.weak memcpy
+ENDPROC(memcpy_erms)
 
-ENTRY(__memcpy)
-ENTRY(memcpy)
+ENTRY(memcpy_orig)
        CFI_STARTPROC
        movq %rdi, %rax
 
@@ -183,26 +179,4 @@ ENTRY(memcpy)
 .Lend:
        retq
        CFI_ENDPROC
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
-
-       /*
-        * Some CPUs are adding enhanced REP MOVSB/STOSB feature
-        * If the feature is supported, memcpy_c_e() is the first choice.
-        * If enhanced rep movsb copy is not available, use fast string copy
-        * memcpy_c() when possible. This is faster and code is simpler than
-        * original memcpy().
-        * Otherwise, original memcpy() is used.
-        * In .altinstructions section, ERMS feature is placed after REG_GOOD
-         * feature to implement the right patch order.
-        *
-        * Replace only beginning, memcpy is used to apply alternatives,
-        * so it is silly to overwrite itself with nops - reboot is the
-        * only outcome...
-        */
-       .section .altinstructions, "a"
-       altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
-                            .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
-       altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
-                            .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
-       .previous
+ENDPROC(memcpy_orig)
index 9c4b530..0f8a0d0 100644 (file)
@@ -5,7 +5,6 @@
  * This assembly file is re-written from memmove_64.c file.
  *     - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
  */
-#define _STRING_C
 #include <linux/linkage.h>
 #include <asm/dwarf2.h>
 #include <asm/cpufeature.h>
@@ -44,6 +43,8 @@ ENTRY(__memmove)
        jg 2f
 
 .Lmemmove_begin_forward:
+       ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
+
        /*
         * movsq instruction have many startup latency
         * so we handle small size by general register.
@@ -207,21 +208,5 @@ ENTRY(__memmove)
 13:
        retq
        CFI_ENDPROC
-
-       .section .altinstr_replacement,"ax"
-.Lmemmove_begin_forward_efs:
-       /* Forward moving data. */
-       movq %rdx, %rcx
-       rep movsb
-       retq
-.Lmemmove_end_forward_efs:
-       .previous
-
-       .section .altinstructions,"a"
-       altinstruction_entry .Lmemmove_begin_forward,           \
-               .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS,   \
-               .Lmemmove_end_forward-.Lmemmove_begin_forward,  \
-               .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
-       .previous
 ENDPROC(__memmove)
 ENDPROC(memmove)
index 6f44935..93118fb 100644 (file)
@@ -5,19 +5,30 @@
 #include <asm/cpufeature.h>
 #include <asm/alternative-asm.h>
 
+.weak memset
+
 /*
  * ISO C memset - set a memory block to a byte value. This function uses fast
  * string to get better performance than the original function. The code is
  * simpler and shorter than the orignal function as well.
- *     
+ *
  * rdi   destination
- * rsi   value (char) 
- * rdx   count (bytes) 
- * 
+ * rsi   value (char)
+ * rdx   count (bytes)
+ *
  * rax   original destination
- */    
-       .section .altinstr_replacement, "ax", @progbits
-.Lmemset_c:
+ */
+ENTRY(memset)
+ENTRY(__memset)
+       /*
+        * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
+        * to use it when possible. If not available, use fast string instructions.
+        *
+        * Otherwise, use original memset function.
+        */
+       ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
+                     "jmp memset_erms", X86_FEATURE_ERMS
+
        movq %rdi,%r9
        movq %rdx,%rcx
        andl $7,%edx
@@ -31,8 +42,8 @@
        rep stosb
        movq %r9,%rax
        ret
-.Lmemset_e:
-       .previous
+ENDPROC(memset)
+ENDPROC(__memset)
 
 /*
  * ISO C memset - set a memory block to a byte value. This function uses
  *
  * rax   original destination
  */
-       .section .altinstr_replacement, "ax", @progbits
-.Lmemset_c_e:
+ENTRY(memset_erms)
        movq %rdi,%r9
        movb %sil,%al
        movq %rdx,%rcx
        rep stosb
        movq %r9,%rax
        ret
-.Lmemset_e_e:
-       .previous
-
-.weak memset
+ENDPROC(memset_erms)
 
-ENTRY(memset)
-ENTRY(__memset)
+ENTRY(memset_orig)
        CFI_STARTPROC
        movq %rdi,%r10
 
@@ -134,23 +140,4 @@ ENTRY(__memset)
        jmp .Lafter_bad_alignment
 .Lfinal:
        CFI_ENDPROC
-ENDPROC(memset)
-ENDPROC(__memset)
-
-       /* Some CPUs support enhanced REP MOVSB/STOSB feature.
-        * It is recommended to use this when possible.
-        *
-        * If enhanced REP MOVSB/STOSB feature is not available, use fast string
-        * instructions.
-        *
-        * Otherwise, use original memset function.
-        *
-        * In .altinstructions section, ERMS feature is placed after REG_GOOD
-         * feature to implement the right patch order.
-        */
-       .section .altinstructions,"a"
-       altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
-                            .Lfinal-__memset,.Lmemset_e-.Lmemset_c
-       altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
-                            .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
-       .previous
+ENDPROC(memset_orig)
index f6d13ee..3ca5218 100644 (file)
@@ -14,8 +14,8 @@
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
        CFI_STARTPROC
-       pushq_cfi %rbx
-       pushq_cfi %rbp
+       pushq_cfi_reg rbx
+       pushq_cfi_reg rbp
        movq    %rdi, %r10      /* Save pointer */
        xorl    %r11d, %r11d    /* Return value */
        movl    (%rdi), %eax
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
        movl    %ebp, 20(%r10)
        movl    %esi, 24(%r10)
        movl    %edi, 28(%r10)
-       popq_cfi %rbp
-       popq_cfi %rbx
+       popq_cfi_reg rbp
+       popq_cfi_reg rbx
        ret
 3:
        CFI_RESTORE_STATE
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
 .macro op_safe_regs op
 ENTRY(\op\()_safe_regs)
        CFI_STARTPROC
-       pushl_cfi %ebx
-       pushl_cfi %ebp
-       pushl_cfi %esi
-       pushl_cfi %edi
+       pushl_cfi_reg ebx
+       pushl_cfi_reg ebp
+       pushl_cfi_reg esi
+       pushl_cfi_reg edi
        pushl_cfi $0              /* Return value */
        pushl_cfi %eax
        movl    4(%eax), %ecx
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
        movl    %esi, 24(%eax)
        movl    %edi, 28(%eax)
        popl_cfi %eax
-       popl_cfi %edi
-       popl_cfi %esi
-       popl_cfi %ebp
-       popl_cfi %ebx
+       popl_cfi_reg edi
+       popl_cfi_reg esi
+       popl_cfi_reg ebp
+       popl_cfi_reg ebx
        ret
 3:
        CFI_RESTORE_STATE
index 5dff5f0..2322abe 100644 (file)
  */
 
 #define save_common_regs \
-       pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
+       pushl_cfi_reg ecx
 
 #define restore_common_regs \
-       popl_cfi %ecx; CFI_RESTORE ecx
+       popl_cfi_reg ecx
 
        /* Avoid uglifying the argument copying x86-64 needs to do. */
        .macro movq src, dst
  */
 
 #define save_common_regs \
-       pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
-       pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
-       pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
-       pushq_cfi %r8;  CFI_REL_OFFSET r8,  0; \
-       pushq_cfi %r9;  CFI_REL_OFFSET r9,  0; \
-       pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
-       pushq_cfi %r11; CFI_REL_OFFSET r11, 0
+       pushq_cfi_reg rdi; \
+       pushq_cfi_reg rsi; \
+       pushq_cfi_reg rcx; \
+       pushq_cfi_reg r8;  \
+       pushq_cfi_reg r9;  \
+       pushq_cfi_reg r10; \
+       pushq_cfi_reg r11
 
 #define restore_common_regs \
-       popq_cfi %r11; CFI_RESTORE r11; \
-       popq_cfi %r10; CFI_RESTORE r10; \
-       popq_cfi %r9;  CFI_RESTORE r9; \
-       popq_cfi %r8;  CFI_RESTORE r8; \
-       popq_cfi %rcx; CFI_RESTORE rcx; \
-       popq_cfi %rsi; CFI_RESTORE rsi; \
-       popq_cfi %rdi; CFI_RESTORE rdi
+       popq_cfi_reg r11; \
+       popq_cfi_reg r10; \
+       popq_cfi_reg r9; \
+       popq_cfi_reg r8; \
+       popq_cfi_reg rcx; \
+       popq_cfi_reg rsi; \
+       popq_cfi_reg rdi
 
 #endif
 
 ENTRY(call_rwsem_down_read_failed)
        CFI_STARTPROC
        save_common_regs
-       __ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-       CFI_REL_OFFSET __ASM_REG(dx), 0
+       __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
        movq %rax,%rdi
        call rwsem_down_read_failed
-       __ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-       CFI_RESTORE __ASM_REG(dx)
+       __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
        restore_common_regs
        ret
        CFI_ENDPROC
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
 ENTRY(call_rwsem_downgrade_wake)
        CFI_STARTPROC
        save_common_regs
-       __ASM_SIZE(push,_cfi) %__ASM_REG(dx)
-       CFI_REL_OFFSET __ASM_REG(dx), 0
+       __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
        movq %rax,%rdi
        call rwsem_downgrade_wake
-       __ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
-       CFI_RESTORE __ASM_REG(dx)
+       __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
        restore_common_regs
        ret
        CFI_ENDPROC
index e28cdaf..5eb7150 100644 (file)
        .globl \name
 \name:
        CFI_STARTPROC
-       pushl_cfi %eax
-       CFI_REL_OFFSET eax, 0
-       pushl_cfi %ecx
-       CFI_REL_OFFSET ecx, 0
-       pushl_cfi %edx
-       CFI_REL_OFFSET edx, 0
+       pushl_cfi_reg eax
+       pushl_cfi_reg ecx
+       pushl_cfi_reg edx
 
        .if \put_ret_addr_in_eax
        /* Place EIP in the arg1 */
        .endif
 
        call \func
-       popl_cfi %edx
-       CFI_RESTORE edx
-       popl_cfi %ecx
-       CFI_RESTORE ecx
-       popl_cfi %eax
-       CFI_RESTORE eax
+       popl_cfi_reg edx
+       popl_cfi_reg ecx
+       popl_cfi_reg eax
        ret
        CFI_ENDPROC
        _ASM_NOKPROBE(\name)
index b30b5eb..f89ba4e 100644 (file)
        CFI_STARTPROC
 
        /* this one pushes 9 elems, the next one would be %rIP */
-       SAVE_ARGS
+       pushq_cfi_reg rdi
+       pushq_cfi_reg rsi
+       pushq_cfi_reg rdx
+       pushq_cfi_reg rcx
+       pushq_cfi_reg rax
+       pushq_cfi_reg r8
+       pushq_cfi_reg r9
+       pushq_cfi_reg r10
+       pushq_cfi_reg r11
 
        .if \put_ret_addr_in_rdi
+       /* 9*8(%rsp) is return addr on stack */
        movq_cfi_restore 9*8, rdi
        .endif
 
 #endif
 #endif
 
-       /* SAVE_ARGS below is used only for the .cfi directives it contains. */
+#if defined(CONFIG_TRACE_IRQFLAGS) \
+ || defined(CONFIG_DEBUG_LOCK_ALLOC) \
+ || defined(CONFIG_PREEMPT)
        CFI_STARTPROC
-       SAVE_ARGS
+       CFI_ADJUST_CFA_OFFSET 9*8
 restore:
-       RESTORE_ARGS
+       popq_cfi_reg r11
+       popq_cfi_reg r10
+       popq_cfi_reg r9
+       popq_cfi_reg r8
+       popq_cfi_reg rax
+       popq_cfi_reg rcx
+       popq_cfi_reg rdx
+       popq_cfi_reg rsi
+       popq_cfi_reg rdi
        ret
        CFI_ENDPROC
        _ASM_NOKPROBE(restore)
+#endif
index c905e89..1f33b3d 100644 (file)
@@ -69,21 +69,20 @@ EXPORT_SYMBOL(copy_in_user);
  * it is not necessary to optimize tail handling.
  */
 __visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
+copy_user_handle_tail(char *to, char *from, unsigned len)
 {
-       char c;
-       unsigned zero_len;
-
        for (; len; --len, to++) {
+               char c;
+
                if (__get_user_nocheck(c, from++, sizeof(char)))
                        break;
                if (__put_user_nocheck(c, to, sizeof(char)))
                        break;
        }
-
-       for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
-               if (__put_user_nocheck(c, to++, sizeof(char)))
-                       break;
        clac();
+
+       /* If the destination is a kernel buffer, we always clear the end */
+       if ((unsigned long)to >= TASK_SIZE_MAX)
+               memset(to, 0, len);
        return len;
 }
index 1a2be7c..816488c 100644 (file)
@@ -273,6 +273,9 @@ dd: ESC
 de: ESC
 df: ESC
 # 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
 e0: LOOPNE/LOOPNZ Jb (f64)
 e1: LOOPE/LOOPZ Jb (f64)
 e2: LOOP Jb (f64)
@@ -281,6 +284,10 @@ e4: IN AL,Ib
 e5: IN eAX,Ib
 e6: OUT Ib,AL
 e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
 e8: CALL Jz (f64)
 e9: JMP-near Jz (f64)
 ea: JMP-far Ap (i64)
@@ -456,6 +463,7 @@ AVXcode: 1
 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
 # 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 80: JO Jz (f64)
 81: JNO Jz (f64)
 82: JB/JC/JNAE Jz (f64)
@@ -842,6 +850,7 @@ EndTable
 GrpTable: Grp5
 0: INC Ev
 1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
 2: CALLN Ev (f64)
 3: CALLF Ep
 4: JMPN Ev (f64)
index ede025f..181c53b 100644 (file)
@@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
        int ret = 0;
 
        /* kprobe_running() needs smp_processor_id() */
-       if (kprobes_built_in() && !user_mode_vm(regs)) {
+       if (kprobes_built_in() && !user_mode(regs)) {
                preempt_disable();
                if (kprobe_running() && kprobe_fault_handler(regs, 14))
                        ret = 1;
@@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
        instr = (void *)convert_ip_to_linear(current, regs);
        max_instr = instr + 15;
 
-       if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+       if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
                return 0;
 
        while (instr < max_instr) {
@@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
        if (error_code & PF_USER)
                return false;
 
-       if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
+       if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
                return false;
 
        return true;
@@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
         * User-mode registers count as a user access even for any
         * potential system fault or CPU buglet:
         */
-       if (user_mode_vm(regs)) {
+       if (user_mode(regs)) {
                local_irq_enable();
                error_code |= PF_USER;
                flags |= FAULT_FLAG_USER;
index a110efc..52417e7 100644 (file)
@@ -179,7 +179,8 @@ static void __init probe_page_size_mask(void)
        if (cpu_has_pge) {
                cr4_set_bits_and_update_boot(X86_CR4_PGE);
                __supported_pte_mask |= _PAGE_GLOBAL;
-       }
+       } else
+               __supported_pte_mask &= ~_PAGE_GLOBAL;
 }
 
 #ifdef CONFIG_X86_32
index 5d04be5..4e664bd 100644 (file)
@@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
 {
        struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
 
-       if (!user_mode_vm(regs)) {
+       if (!user_mode(regs)) {
                unsigned long stack = kernel_stack_pointer(regs);
                if (depth)
                        dump_trace(NULL, regs, (unsigned long *)stack, 0,
index 3d2612b..2fb3847 100644 (file)
@@ -513,31 +513,6 @@ void __init pcibios_set_cache_line_size(void)
        }
 }
 
-/*
- * Some device drivers assume dev->irq won't change after calling
- * pci_disable_device(). So delay releasing of IRQ resource to driver
- * unbinding time. Otherwise it will break PM subsystem and drivers
- * like xen-pciback etc.
- */
-static int pci_irq_notifier(struct notifier_block *nb, unsigned long action,
-                           void *data)
-{
-       struct pci_dev *dev = to_pci_dev(data);
-
-       if (action != BUS_NOTIFY_UNBOUND_DRIVER)
-               return NOTIFY_DONE;
-
-       if (pcibios_disable_irq)
-               pcibios_disable_irq(dev);
-
-       return NOTIFY_OK;
-}
-
-static struct notifier_block pci_irq_nb = {
-       .notifier_call = pci_irq_notifier,
-       .priority = INT_MIN,
-};
-
 int __init pcibios_init(void)
 {
        if (!raw_pci_ops) {
@@ -550,9 +525,6 @@ int __init pcibios_init(void)
 
        if (pci_bf_sort >= pci_force_bf)
                pci_sort_breadthfirst();
-
-       bus_register_notifier(&pci_bus_type, &pci_irq_nb);
-
        return 0;
 }
 
@@ -711,6 +683,12 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
        return 0;
 }
 
+void pcibios_disable_device (struct pci_dev *dev)
+{
+       if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+               pcibios_disable_irq(dev);
+}
+
 int pci_ext_cfg_avail(void)
 {
        if (raw_pci_ext_ops)
index efb8493..852aa4c 100644 (file)
@@ -234,10 +234,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-       if (dev->irq_managed && dev->irq > 0) {
+       if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+           dev->irq > 0) {
                mp_unmap_irq(dev->irq);
                dev->irq_managed = 0;
-               dev->irq = 0;
        }
 }
 
index e71b3db..5dc6ca5 100644 (file)
@@ -1256,9 +1256,22 @@ static int pirq_enable_irq(struct pci_dev *dev)
        return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+       if (dev->power.is_prepared)
+               return true;
+#ifdef CONFIG_PM
+       if (dev->power.runtime_status == RPM_SUSPENDING)
+               return true;
+#endif
+
+       return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
-       if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) {
+       if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
+           dev->irq_managed && dev->irq) {
                mp_unmap_irq(dev->irq);
                dev->irq = 0;
                dev->irq_managed = 0;
index dbc8627..02744df 100644 (file)
@@ -85,12 +85,20 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
        efi_memory_desc_t *virtual_map)
 {
        efi_status_t status;
+       unsigned long flags;
+       pgd_t *save_pgd;
 
-       efi_call_phys_prolog();
+       save_pgd = efi_call_phys_prolog();
+
+       /* Disable interrupts around EFI calls: */
+       local_irq_save(flags);
        status = efi_call_phys(efi_phys.set_virtual_address_map,
                               memory_map_size, descriptor_size,
                               descriptor_version, virtual_map);
-       efi_call_phys_epilog();
+       local_irq_restore(flags);
+
+       efi_call_phys_epilog(save_pgd);
+
        return status;
 }
 
@@ -491,7 +499,8 @@ void __init efi_init(void)
        if (efi_memmap_init())
                return;
 
-       print_efi_memmap();
+       if (efi_enabled(EFI_DBG))
+               print_efi_memmap();
 }
 
 void __init efi_late_init(void)
@@ -939,6 +948,8 @@ static int __init arch_parse_efi_cmdline(char *str)
 {
        if (parse_option_str(str, "old_map"))
                set_bit(EFI_OLD_MEMMAP, &efi.flags);
+       if (parse_option_str(str, "debug"))
+               set_bit(EFI_DBG, &efi.flags);
 
        return 0;
 }
index 40e7cda..ed5b673 100644 (file)
 
 /*
  * To make EFI call EFI runtime service in physical addressing mode we need
- * prolog/epilog before/after the invocation to disable interrupt, to
- * claim EFI runtime service handler exclusively and to duplicate a memory in
- * low memory space say 0 - 3G.
+ * prolog/epilog before/after the invocation to claim the EFI runtime service
+ * handler exclusively and to duplicate a memory mapping in low memory space,
+ * say 0 - 3G.
  */
-static unsigned long efi_rt_eflags;
 
 void efi_sync_low_kernel_mappings(void) {}
 void __init efi_dump_pagetable(void) {}
@@ -57,21 +56,24 @@ void __init efi_map_region(efi_memory_desc_t *md)
 void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
 void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
 
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
 {
        struct desc_ptr gdt_descr;
+       pgd_t *save_pgd;
 
-       local_irq_save(efi_rt_eflags);
-
+       /* Current pgd is swapper_pg_dir, we'll restore it later: */
+       save_pgd = swapper_pg_dir;
        load_cr3(initial_page_table);
        __flush_tlb_all();
 
        gdt_descr.address = __pa(get_cpu_gdt_table(0));
        gdt_descr.size = GDT_SIZE - 1;
        load_gdt(&gdt_descr);
+
+       return save_pgd;
 }
 
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
        struct desc_ptr gdt_descr;
 
@@ -79,10 +81,8 @@ void __init efi_call_phys_epilog(void)
        gdt_descr.size = GDT_SIZE - 1;
        load_gdt(&gdt_descr);
 
-       load_cr3(swapper_pg_dir);
+       load_cr3(save_pgd);
        __flush_tlb_all();
-
-       local_irq_restore(efi_rt_eflags);
 }
 
 void __init efi_runtime_mkexec(void)
index 17e80d8..a0ac0f9 100644 (file)
@@ -41,9 +41,6 @@
 #include <asm/realmode.h>
 #include <asm/time.h>
 
-static pgd_t *save_pgd __initdata;
-static unsigned long efi_flags __initdata;
-
 /*
  * We allocate runtime services regions bottom-up, starting from -4G, i.e.
  * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
@@ -78,17 +75,18 @@ static void __init early_code_mapping_set_exec(int executable)
        }
 }
 
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
 {
        unsigned long vaddress;
+       pgd_t *save_pgd;
+
        int pgd;
        int n_pgds;
 
        if (!efi_enabled(EFI_OLD_MEMMAP))
-               return;
+               return NULL;
 
        early_code_mapping_set_exec(1);
-       local_irq_save(efi_flags);
 
        n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
        save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
@@ -99,24 +97,29 @@ void __init efi_call_phys_prolog(void)
                set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
        }
        __flush_tlb_all();
+
+       return save_pgd;
 }
 
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
 {
        /*
         * After the lock is released, the original page table is restored.
         */
-       int pgd;
-       int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+       int pgd_idx;
+       int nr_pgds;
 
-       if (!efi_enabled(EFI_OLD_MEMMAP))
+       if (!save_pgd)
                return;
 
-       for (pgd = 0; pgd < n_pgds; pgd++)
-               set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
+       nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+
+       for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+               set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+
        kfree(save_pgd);
+
        __flush_tlb_all();
-       local_irq_restore(efi_flags);
        early_code_mapping_set_exec(0);
 }
 
index 9947985..3b6ec42 100644 (file)
@@ -415,7 +415,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
        struct reset_args reset_args;
 
        reset_args.sender = sender;
-       cpus_clear(*mask);
+       cpumask_clear(mask);
        /* find a single cpu for each uvhub in this distribution mask */
        maskbits = sizeof(struct pnmask) * BITSPERBYTE;
        /* each bit is a pnode relative to the partition base pnode */
@@ -425,7 +425,7 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp)
                        continue;
                apnode = pnode + bcp->partition_base_pnode;
                cpu = pnode_to_first_cpu(apnode, smaster);
-               cpu_set(cpu, *mask);
+               cpumask_set_cpu(cpu, mask);
        }
 
        /* IPI all cpus; preemption is already disabled */
@@ -1126,7 +1126,7 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
        /* don't actually do a shootdown of the local cpu */
        cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
 
-       if (cpu_isset(cpu, *cpumask))
+       if (cpumask_test_cpu(cpu, cpumask))
                stat->s_ntargself++;
 
        bau_desc = bcp->descriptor_base;
index 3e32ed5..757678f 100644 (file)
@@ -134,7 +134,7 @@ static void do_fpu_end(void)
 static void fix_processor_context(void)
 {
        int cpu = smp_processor_id();
-       struct tss_struct *t = &per_cpu(init_tss, cpu);
+       struct tss_struct *t = &per_cpu(cpu_tss, cpu);
 #ifdef CONFIG_X86_64
        struct desc_struct *desc = get_cpu_gdt_table(cpu);
        tss_desc tss;
index b3560ec..ef8187f 100644 (file)
 110    i386    iopl                    sys_iopl
 111    i386    vhangup                 sys_vhangup
 112    i386    idle
-113    i386    vm86old                 sys_vm86old                     sys32_vm86_warning
+113    i386    vm86old                 sys_vm86old                     sys_ni_syscall
 114    i386    wait4                   sys_wait4                       compat_sys_wait4
 115    i386    swapoff                 sys_swapoff
 116    i386    sysinfo                 sys_sysinfo                     compat_sys_sysinfo
 163    i386    mremap                  sys_mremap
 164    i386    setresuid               sys_setresuid16
 165    i386    getresuid               sys_getresuid16
-166    i386    vm86                    sys_vm86                        sys32_vm86_warning
+166    i386    vm86                    sys_vm86                        sys_ni_syscall
 167    i386    query_module
 168    i386    poll                    sys_poll
 169    i386    nfsservctl
index 8d656fb..9ef32d5 100644 (file)
 169    common  reboot                  sys_reboot
 170    common  sethostname             sys_sethostname
 171    common  setdomainname           sys_setdomainname
-172    common  iopl                    stub_iopl
+172    common  iopl                    sys_iopl
 173    common  ioperm                  sys_ioperm
 174    64      create_module
 175    common  init_module             sys_init_module
index 2d7d9a1..8ffd214 100644 (file)
@@ -64,8 +64,8 @@
  */
 static inline void rdtsc_barrier(void)
 {
-       alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
-       alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+       alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+                         "lfence", X86_FEATURE_LFENCE_RDTSC);
 }
 
 #endif
index 5cdfa9d..a75d870 100644 (file)
@@ -16,7 +16,7 @@
  */
 
 /* Not going to be implemented by UML, since we have no hardware. */
-#define stub_iopl sys_ni_syscall
+#define sys_iopl sys_ni_syscall
 #define sys_ioperm sys_ni_syscall
 
 /*
index 9793322..40d2473 100644 (file)
@@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode)
        cycle_t ret;
        u64 last;
        u32 version;
+       u32 migrate_count;
        u8 flags;
        unsigned cpu, cpu1;
 
 
        /*
-        * Note: hypervisor must guarantee that:
-        * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
-        * 2. that per-CPU pvclock time info is updated if the
-        *    underlying CPU changes.
-        * 3. that version is increased whenever underlying CPU
-        *    changes.
-        *
+        * When looping to get a consistent (time-info, tsc) pair, we
+        * also need to deal with the possibility we can switch vcpus,
+        * so make sure we always re-fetch time-info for the current vcpu.
         */
        do {
                cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -102,20 +99,27 @@ static notrace cycle_t vread_pvclock(int *mode)
                 * __getcpu() calls (Gleb).
                 */
 
-               pvti = get_pvti(cpu);
+               /* Make sure migrate_count will change if we leave the VCPU. */
+               do {
+                       pvti = get_pvti(cpu);
+                       migrate_count = pvti->migrate_count;
+
+                       cpu1 = cpu;
+                       cpu = __getcpu() & VGETCPU_CPU_MASK;
+               } while (unlikely(cpu != cpu1));
 
                version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
 
                /*
                 * Test we're still on the cpu as well as the version.
-                * We could have been migrated just after the first
-                * vgetcpu but before fetching the version, so we
-                * wouldn't notice a version change.
+                * - We must read TSC of pvti's VCPU.
+                * - KVM doesn't follow the versioning protocol, so data could
+                *   change before version if we left the VCPU.
                 */
-               cpu1 = __getcpu() & VGETCPU_CPU_MASK;
-       } while (unlikely(cpu != cpu1 ||
-                         (pvti->pvti.version & 1) ||
-                         pvti->pvti.version != version));
+               smp_rmb();
+       } while (unlikely((pvti->pvti.version & 1) ||
+                         pvti->pvti.version != version ||
+                         pvti->migrate_count != migrate_count));
 
        if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
                *mode = VCLOCK_NONE;
index 31776d0..d7ec4e2 100644 (file)
@@ -17,6 +17,7 @@
        .text
        .globl __kernel_sigreturn
        .type __kernel_sigreturn,@function
+       nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */
        ALIGN
 __kernel_sigreturn:
 .LSTART_sigreturn:
index 5240f56..81665c9 100644 (file)
@@ -912,6 +912,7 @@ static void xen_load_sp0(struct tss_struct *tss,
        mcs = xen_mc_entry(0);
        MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
        xen_mc_issue(PARAVIRT_LAZY_CPU);
+       tss->x86_tss.sp0 = thread->sp0;
 }
 
 static void xen_set_iopl_mask(unsigned mask)
index 740ae30..b47124d 100644 (file)
@@ -91,6 +91,12 @@ EXPORT_SYMBOL_GPL(xen_p2m_size);
 unsigned long xen_max_p2m_pfn __read_mostly;
 EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
 
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+#else
+#define P2M_LIMIT 0
+#endif
+
 static DEFINE_SPINLOCK(p2m_update_lock);
 
 static unsigned long *p2m_mid_missing_mfn;
@@ -385,9 +391,11 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m)
 void __init xen_vmalloc_p2m_tree(void)
 {
        static struct vm_struct vm;
+       unsigned long p2m_limit;
 
+       p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
        vm.flags = VM_ALLOC;
-       vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
+       vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
                        PMD_SIZE * PMDS_PER_MID_PAGE);
        vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
        pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
@@ -563,7 +571,7 @@ static bool alloc_p2m(unsigned long pfn)
                if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
                        p2m_init(p2m);
                else
-                       p2m_init_identity(p2m, pfn);
+                       p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1));
 
                spin_lock_irqsave(&p2m_update_lock, flags);
 
index 08e8489..7413ee3 100644 (file)
@@ -445,15 +445,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 {
        int rc;
 
-       per_cpu(current_task, cpu) = idle;
-#ifdef CONFIG_X86_32
-       irq_ctx_init(cpu);
-#else
-       clear_tsk_thread_flag(idle, TIF_FORK);
-#endif
-       per_cpu(kernel_stack, cpu) =
-               (unsigned long)task_stack_page(idle) -
-               KERNEL_STACK_OFFSET + THREAD_SIZE;
+       common_cpu_up(cpu, idle);
 
        xen_setup_runstate_info(cpu);
        xen_setup_timer(cpu);
@@ -468,10 +460,6 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
        if (rc)
                return rc;
 
-       if (num_online_cpus() == 1)
-               /* Just in case we booted with a single CPU. */
-               alternatives_enable_smp();
-
        rc = xen_smp_intr_init(cpu);
        if (rc)
                return rc;
index c4df9db..d949769 100644 (file)
@@ -1,5 +1,5 @@
 #include <linux/types.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include <xen/interface/xen.h>
 #include <xen/grant_table.h>
@@ -81,17 +81,14 @@ void xen_arch_post_suspend(int cancelled)
 
 static void xen_vcpu_notify_restore(void *data)
 {
-       unsigned long reason = (unsigned long)data;
-
        /* Boot processor notified via generic timekeeping_resume() */
-       if ( smp_processor_id() == 0)
+       if (smp_processor_id() == 0)
                return;
 
-       clockevents_notify(reason, NULL);
+       tick_resume_local();
 }
 
 void xen_arch_resume(void)
 {
-       on_each_cpu(xen_vcpu_notify_restore,
-                   (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
+       on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
 }
index 53adefd..985fc3e 100644 (file)
@@ -68,11 +68,11 @@ ENTRY(xen_sysret64)
         * We're already on the usermode stack at this point, but
         * still with the kernel gs, so we can easily switch back
         */
-       movq %rsp, PER_CPU_VAR(old_rsp)
+       movq %rsp, PER_CPU_VAR(rsp_scratch)
        movq PER_CPU_VAR(kernel_stack), %rsp
 
        pushq $__USER_DS
-       pushq PER_CPU_VAR(old_rsp)
+       pushq PER_CPU_VAR(rsp_scratch)
        pushq %r11
        pushq $__USER_CS
        pushq %rcx
@@ -87,11 +87,11 @@ ENTRY(xen_sysret32)
         * We're already on the usermode stack at this point, but
         * still with the kernel gs, so we can easily switch back
         */
-       movq %rsp, PER_CPU_VAR(old_rsp)
+       movq %rsp, PER_CPU_VAR(rsp_scratch)
        movq PER_CPU_VAR(kernel_stack), %rsp
 
        pushq $__USER32_DS
-       pushq PER_CPU_VAR(old_rsp)
+       pushq PER_CPU_VAR(rsp_scratch)
        pushq %r11
        pushq $__USER32_CS
        pushq %rcx
index fc1ff3b..fd3fee8 100644 (file)
@@ -592,7 +592,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
        if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) {
                struct bio_vec *bprev;
 
-               bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1];
+               bprev = &rq->biotail->bi_io_vec[rq->biotail->bi_vcnt - 1];
                if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset))
                        return false;
        }
index d53a764..be3290c 100644 (file)
@@ -278,9 +278,11 @@ static int bt_get(struct blk_mq_alloc_data *data,
                /*
                 * We're out of tags on this hardware queue, kick any
                 * pending IO submits before going to sleep waiting for
-                * some to complete.
+                * some to complete. Note that hctx can be NULL here for
+                * reserved tag allocation.
                 */
-               blk_mq_run_hw_queue(hctx, false);
+               if (hctx)
+                       blk_mq_run_hw_queue(hctx, false);
 
                /*
                 * Retry tag allocation after running the hardware queue,
index 4f4bea2..33c4285 100644 (file)
@@ -1457,7 +1457,7 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 
                do {
                        page = alloc_pages_node(set->numa_node,
-                               GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
+                               GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO,
                                this_order);
                        if (page)
                                break;
@@ -1479,8 +1479,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
                left -= to_do * rq_size;
                for (j = 0; j < to_do; j++) {
                        tags->rqs[i] = p;
-                       tags->rqs[i]->atomic_flags = 0;
-                       tags->rqs[i]->cmd_flags = 0;
                        if (set->ops->init_request) {
                                if (set->ops->init_request(set->driver_data,
                                                tags->rqs[i], hctx_idx, i,
@@ -1938,7 +1936,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
         */
        if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
                            PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
-               goto err_map;
+               goto err_mq_usage;
 
        setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
        blk_queue_rq_timeout(q, 30000);
@@ -1981,7 +1979,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
        blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 
        if (blk_mq_init_hw_queues(q, set))
-               goto err_hw;
+               goto err_mq_usage;
 
        mutex_lock(&all_q_mutex);
        list_add_tail(&q->all_q_node, &all_q_list);
@@ -1993,7 +1991,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 
        return q;
 
-err_hw:
+err_mq_usage:
        blk_cleanup_queue(q);
 err_hctxs:
        kfree(map);
index 6ed2cbe..12600bf 100644 (file)
@@ -585,7 +585,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                                     b->physical_block_size);
 
        t->io_min = max(t->io_min, b->io_min);
-       t->io_opt = lcm(t->io_opt, b->io_opt);
+       t->io_opt = lcm_not_zero(t->io_opt, b->io_opt);
 
        t->cluster &= b->cluster;
        t->discard_zeroes_data &= b->discard_zeroes_data;
@@ -616,7 +616,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                    b->raid_partial_stripes_expensive);
 
        /* Find lowest common alignment_offset */
-       t->alignment_offset = lcm(t->alignment_offset, alignment)
+       t->alignment_offset = lcm_not_zero(t->alignment_offset, alignment)
                % max(t->physical_block_size, t->io_min);
 
        /* Verify that new alignment_offset is on a logical block boundary */
@@ -643,7 +643,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                                                      b->max_discard_sectors);
                t->discard_granularity = max(t->discard_granularity,
                                             b->discard_granularity);
-               t->discard_alignment = lcm(t->discard_alignment, alignment) %
+               t->discard_alignment = lcm_not_zero(t->discard_alignment, alignment) %
                        t->discard_granularity;
        }
 
index 657964e..37fb190 100644 (file)
@@ -65,6 +65,7 @@ struct lpss_private_data;
 
 struct lpss_device_desc {
        unsigned int flags;
+       const char *clk_con_id;
        unsigned int prv_offset;
        size_t prv_size_override;
        void (*setup)(struct lpss_private_data *pdata);
@@ -140,6 +141,7 @@ static struct lpss_device_desc lpt_i2c_dev_desc = {
 
 static struct lpss_device_desc lpt_uart_dev_desc = {
        .flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_LTR,
+       .clk_con_id = "baudclk",
        .prv_offset = 0x800,
        .setup = lpss_uart_setup,
 };
@@ -156,6 +158,7 @@ static struct lpss_device_desc byt_pwm_dev_desc = {
 
 static struct lpss_device_desc byt_uart_dev_desc = {
        .flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_SAVE_CTX,
+       .clk_con_id = "baudclk",
        .prv_offset = 0x800,
        .setup = lpss_uart_setup,
 };
@@ -313,7 +316,7 @@ out:
                return PTR_ERR(clk);
 
        pdata->clk = clk;
-       clk_register_clkdev(clk, NULL, devname);
+       clk_register_clkdev(clk, dev_desc->clk_con_id, devname);
        return 0;
 }
 
index c7b105c..6bc9cbc 100644 (file)
@@ -26,7 +26,7 @@
 #include <linux/kthread.h>
 #include <linux/freezer.h>
 #include <linux/cpu.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <asm/mwait.h>
@@ -41,8 +41,6 @@ static unsigned long power_saving_mwait_eax;
 
 static unsigned char tsc_detected_unstable;
 static unsigned char tsc_marked_unstable;
-static unsigned char lapic_detected_unstable;
-static unsigned char lapic_marked_unstable;
 
 static void power_saving_mwait_init(void)
 {
@@ -82,13 +80,10 @@ static void power_saving_mwait_init(void)
                 */
                if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
                        tsc_detected_unstable = 1;
-               if (!boot_cpu_has(X86_FEATURE_ARAT))
-                       lapic_detected_unstable = 1;
                break;
        default:
-               /* TSC & LAPIC could halt in idle */
+               /* TSC could halt in idle */
                tsc_detected_unstable = 1;
-               lapic_detected_unstable = 1;
        }
 #endif
 }
@@ -155,7 +150,6 @@ static int power_saving_thread(void *data)
        sched_setscheduler(current, SCHED_RR, &param);
 
        while (!kthread_should_stop()) {
-               int cpu;
                unsigned long expire_time;
 
                try_to_freeze();
@@ -177,28 +171,15 @@ static int power_saving_thread(void *data)
                                mark_tsc_unstable("TSC halts in idle");
                                tsc_marked_unstable = 1;
                        }
-                       if (lapic_detected_unstable && !lapic_marked_unstable) {
-                               int i;
-                               /* LAPIC could halt in idle, so notify users */
-                               for_each_online_cpu(i)
-                                       clockevents_notify(
-                                               CLOCK_EVT_NOTIFY_BROADCAST_ON,
-                                               &i);
-                               lapic_marked_unstable = 1;
-                       }
                        local_irq_disable();
-                       cpu = smp_processor_id();
-                       if (lapic_marked_unstable)
-                               clockevents_notify(
-                                       CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+                       tick_broadcast_enable();
+                       tick_broadcast_enter();
                        stop_critical_timings();
 
                        mwait_idle_with_hints(power_saving_mwait_eax, 1);
 
                        start_critical_timings();
-                       if (lapic_marked_unstable)
-                               clockevents_notify(
-                                       CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+                       tick_broadcast_exit();
                        local_irq_enable();
 
                        if (time_before(expire_time, jiffies)) {
index e7f718d..b1def41 100644 (file)
@@ -485,6 +485,14 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
        if (!pin || !dev->irq_managed || dev->irq <= 0)
                return;
 
+       /* Keep IOAPIC pin configuration when suspending */
+       if (dev->dev.power.is_prepared)
+               return;
+#ifdef CONFIG_PM
+       if (dev->dev.power.runtime_status == RPM_SUSPENDING)
+               return;
+#endif
+
        entry = acpi_pci_irq_lookup(dev, pin);
        if (!entry)
                return;
@@ -505,6 +513,5 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
        if (gsi >= 0) {
                acpi_unregister_gsi(gsi);
                dev->irq_managed = 0;
-               dev->irq = 0;
        }
 }
index c6bb9f1..39e0c8e 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/sched.h>       /* need_resched() */
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <linux/cpuidle.h>
 #include <linux/syscore_ops.h>
 #include <acpi/processor.h>
@@ -157,12 +157,11 @@ static void lapic_timer_check_state(int state, struct acpi_processor *pr,
 static void __lapic_timer_propagate_broadcast(void *arg)
 {
        struct acpi_processor *pr = (struct acpi_processor *) arg;
-       unsigned long reason;
 
-       reason = pr->power.timer_broadcast_on_state < INT_MAX ?
-               CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
-
-       clockevents_notify(reason, &pr->id);
+       if (pr->power.timer_broadcast_on_state < INT_MAX)
+               tick_broadcast_enable();
+       else
+               tick_broadcast_disable();
 }
 
 static void lapic_timer_propagate_broadcast(struct acpi_processor *pr)
@@ -179,11 +178,10 @@ static void lapic_timer_state_broadcast(struct acpi_processor *pr,
        int state = cx - pr->power.states;
 
        if (state >= pr->power.timer_broadcast_on_state) {
-               unsigned long reason;
-
-               reason = broadcast ?  CLOCK_EVT_NOTIFY_BROADCAST_ENTER :
-                       CLOCK_EVT_NOTIFY_BROADCAST_EXIT;
-               clockevents_notify(reason, &pr->id);
+               if (broadcast)
+                       tick_broadcast_enter();
+               else
+                       tick_broadcast_exit();
        }
 }
 
@@ -922,7 +920,7 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
                return -EINVAL;
 
        drv->safe_state_index = -1;
-       for (i = 0; i < CPUIDLE_STATE_MAX; i++) {
+       for (i = CPUIDLE_DRIVER_STATE_START; i < CPUIDLE_STATE_MAX; i++) {
                drv->states[i].name[0] = '\0';
                drv->states[i].desc[0] = '\0';
        }
index 33b09b6..6607f3c 100644 (file)
@@ -551,7 +551,6 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 {
        void *page_addr;
        unsigned long user_page_addr;
-       struct vm_struct tmp_area;
        struct page **page;
        struct mm_struct *mm;
 
@@ -600,10 +599,11 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
                                proc->pid, page_addr);
                        goto err_alloc_page_failed;
                }
-               tmp_area.addr = page_addr;
-               tmp_area.size = PAGE_SIZE + PAGE_SIZE /* guard page? */;
-               ret = map_vm_area(&tmp_area, PAGE_KERNEL, page);
-               if (ret) {
+               ret = map_kernel_range_noflush((unsigned long)page_addr,
+                                       PAGE_SIZE, PAGE_KERNEL, page);
+               flush_cache_vmap((unsigned long)page_addr,
+                               (unsigned long)page_addr + PAGE_SIZE);
+               if (ret != 1) {
                        pr_err("%d: binder_alloc_buf failed to map page at %p in kernel\n",
                               proc->pid, page_addr);
                        goto err_map_kernel_failed;
index 4c35f08..23dac3b 100644 (file)
@@ -4204,9 +4204,18 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
        { "PIONEER DVD-RW  DVR-216D",   NULL,   ATA_HORKAGE_NOSETXFER },
 
        /* devices that don't properly handle queued TRIM commands */
-       { "Micron_M[56]*",              NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+       { "Micron_M500*",               NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Crucial_CT*M500*",           NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Micron_M5[15]0*",            "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Crucial_CT*M550*",           "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Crucial_CT*MX100*",          "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
+                                               ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Samsung SSD 850 PRO*",       NULL,   ATA_HORKAGE_NO_NCQ_TRIM |
                                                ATA_HORKAGE_ZERO_AFTER_TRIM, },
-       { "Crucial_CT*SSD*",            NULL,   ATA_HORKAGE_NO_NCQ_TRIM, },
 
        /*
         * As defined, the DRAT (Deterministic Read After Trim) and RZAT
@@ -4226,6 +4235,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
         */
        { "INTEL*SSDSC2MH*",            NULL,   0, },
 
+       { "Micron*",                    NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
+       { "Crucial*",                   NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
        { "INTEL*SSD*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
        { "SSD*INTEL*",                 NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
        { "Samsung*SSD*",               NULL,   ATA_HORKAGE_ZERO_AFTER_TRIM, },
@@ -4737,7 +4748,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_device *dev, int tag)
                return NULL;
 
        /* libsas case */
-       if (!ap->scsi_host) {
+       if (ap->flags & ATA_FLAG_SAS_HOST) {
                tag = ata_sas_allocate_tag(ap);
                if (tag < 0)
                        return NULL;
@@ -4776,7 +4787,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
        tag = qc->tag;
        if (likely(ata_tag_valid(tag))) {
                qc->tag = ATA_TAG_POISON;
-               if (!ap->scsi_host)
+               if (ap->flags & ATA_FLAG_SAS_HOST)
                        ata_sas_free_tag(tag, ap);
        }
 }
index f9054cd..5389579 100644 (file)
@@ -869,6 +869,8 @@ try_offline_again:
         */
        ata_msleep(ap, 1);
 
+       sata_set_spd(link);
+
        /*
         * Now, bring the host controller online again, this can take time
         * as PHY reset and communication establishment, 1st D2H FIS and
index beb8b27..a13587b 100644 (file)
@@ -243,4 +243,12 @@ extern struct regcache_ops regcache_rbtree_ops;
 extern struct regcache_ops regcache_lzo_ops;
 extern struct regcache_ops regcache_flat_ops;
 
+static inline const char *regmap_name(const struct regmap *map)
+{
+       if (map->dev)
+               return dev_name(map->dev);
+
+       return map->name;
+}
+
 #endif
index d453a2c..81751a4 100644 (file)
@@ -307,7 +307,7 @@ static int regcache_rbtree_insert_to_block(struct regmap *map,
        if (pos == 0) {
                memmove(blk + offset * map->cache_word_size,
                        blk, rbnode->blklen * map->cache_word_size);
-               bitmap_shift_right(present, present, offset, blklen);
+               bitmap_shift_left(present, present, offset, blklen);
        }
 
        /* update the rbnode block, its size and the base register */
index f373c35..87db989 100644 (file)
@@ -218,7 +218,7 @@ int regcache_read(struct regmap *map,
                ret = map->cache_ops->read(map, reg, value);
 
                if (ret == 0)
-                       trace_regmap_reg_read_cache(map->dev, reg, *value);
+                       trace_regmap_reg_read_cache(map, reg, *value);
 
                return ret;
        }
@@ -311,7 +311,7 @@ int regcache_sync(struct regmap *map)
        dev_dbg(map->dev, "Syncing %s cache\n",
                map->cache_ops->name);
        name = map->cache_ops->name;
-       trace_regcache_sync(map->dev, name, "start");
+       trace_regcache_sync(map, name, "start");
 
        if (!map->cache_dirty)
                goto out;
@@ -346,7 +346,7 @@ out:
 
        regmap_async_complete(map);
 
-       trace_regcache_sync(map->dev, name, "stop");
+       trace_regcache_sync(map, name, "stop");
 
        return ret;
 }
@@ -381,7 +381,7 @@ int regcache_sync_region(struct regmap *map, unsigned int min,
        name = map->cache_ops->name;
        dev_dbg(map->dev, "Syncing %s cache from %d-%d\n", name, min, max);
 
-       trace_regcache_sync(map->dev, name, "start region");
+       trace_regcache_sync(map, name, "start region");
 
        if (!map->cache_dirty)
                goto out;
@@ -401,7 +401,7 @@ out:
 
        regmap_async_complete(map);
 
-       trace_regcache_sync(map->dev, name, "stop region");
+       trace_regcache_sync(map, name, "stop region");
 
        return ret;
 }
@@ -428,7 +428,7 @@ int regcache_drop_region(struct regmap *map, unsigned int min,
 
        map->lock(map->lock_arg);
 
-       trace_regcache_drop_region(map->dev, min, max);
+       trace_regcache_drop_region(map, min, max);
 
        ret = map->cache_ops->drop(map, min, max);
 
@@ -455,7 +455,7 @@ void regcache_cache_only(struct regmap *map, bool enable)
        map->lock(map->lock_arg);
        WARN_ON(map->cache_bypass && enable);
        map->cache_only = enable;
-       trace_regmap_cache_only(map->dev, enable);
+       trace_regmap_cache_only(map, enable);
        map->unlock(map->lock_arg);
 }
 EXPORT_SYMBOL_GPL(regcache_cache_only);
@@ -493,7 +493,7 @@ void regcache_cache_bypass(struct regmap *map, bool enable)
        map->lock(map->lock_arg);
        WARN_ON(map->cache_only && enable);
        map->cache_bypass = enable;
-       trace_regmap_cache_bypass(map->dev, enable);
+       trace_regmap_cache_bypass(map, enable);
        map->unlock(map->lock_arg);
 }
 EXPORT_SYMBOL_GPL(regcache_cache_bypass);
@@ -608,7 +608,8 @@ static int regcache_sync_block_single(struct regmap *map, void *block,
        for (i = start; i < end; i++) {
                regtmp = block_base + (i * map->reg_stride);
 
-               if (!regcache_reg_present(cache_present, i))
+               if (!regcache_reg_present(cache_present, i) ||
+                   !regmap_writeable(map, regtmp))
                        continue;
 
                val = regcache_get_val(map, block, i);
@@ -677,7 +678,8 @@ static int regcache_sync_block_raw(struct regmap *map, void *block,
        for (i = start; i < end; i++) {
                regtmp = block_base + (i * map->reg_stride);
 
-               if (!regcache_reg_present(cache_present, i)) {
+               if (!regcache_reg_present(cache_present, i) ||
+                   !regmap_writeable(map, regtmp)) {
                        ret = regcache_sync_block_raw_flush(map, &data,
                                                            base, regtmp);
                        if (ret != 0)
index 6299a50..a6c3f75 100644 (file)
@@ -499,7 +499,8 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags,
                goto err_alloc;
        }
 
-       ret = request_threaded_irq(irq, NULL, regmap_irq_thread, irq_flags,
+       ret = request_threaded_irq(irq, NULL, regmap_irq_thread,
+                                  irq_flags | IRQF_ONESHOT,
                                   chip->name, d);
        if (ret != 0) {
                dev_err(map->dev, "Failed to request IRQ %d for %s: %d\n",
index f99b098..dbfe6a6 100644 (file)
@@ -1281,7 +1281,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
        if (map->async && map->bus->async_write) {
                struct regmap_async *async;
 
-               trace_regmap_async_write_start(map->dev, reg, val_len);
+               trace_regmap_async_write_start(map, reg, val_len);
 
                spin_lock_irqsave(&map->async_lock, flags);
                async = list_first_entry_or_null(&map->async_free,
@@ -1339,8 +1339,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
                return ret;
        }
 
-       trace_regmap_hw_write_start(map->dev, reg,
-                                   val_len / map->format.val_bytes);
+       trace_regmap_hw_write_start(map, reg, val_len / map->format.val_bytes);
 
        /* If we're doing a single register write we can probably just
         * send the work_buf directly, otherwise try to do a gather
@@ -1372,8 +1371,7 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
                kfree(buf);
        }
 
-       trace_regmap_hw_write_done(map->dev, reg,
-                                  val_len / map->format.val_bytes);
+       trace_regmap_hw_write_done(map, reg, val_len / map->format.val_bytes);
 
        return ret;
 }
@@ -1407,12 +1405,12 @@ static int _regmap_bus_formatted_write(void *context, unsigned int reg,
 
        map->format.format_write(map, reg, val);
 
-       trace_regmap_hw_write_start(map->dev, reg, 1);
+       trace_regmap_hw_write_start(map, reg, 1);
 
        ret = map->bus->write(map->bus_context, map->work_buf,
                              map->format.buf_size);
 
-       trace_regmap_hw_write_done(map->dev, reg, 1);
+       trace_regmap_hw_write_done(map, reg, 1);
 
        return ret;
 }
@@ -1470,7 +1468,7 @@ int _regmap_write(struct regmap *map, unsigned int reg,
                dev_info(map->dev, "%x <= %x\n", reg, val);
 #endif
 
-       trace_regmap_reg_write(map->dev, reg, val);
+       trace_regmap_reg_write(map, reg, val);
 
        return map->reg_write(context, reg, val);
 }
@@ -1773,7 +1771,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
        for (i = 0; i < num_regs; i++) {
                int reg = regs[i].reg;
                int val = regs[i].def;
-               trace_regmap_hw_write_start(map->dev, reg, 1);
+               trace_regmap_hw_write_start(map, reg, 1);
                map->format.format_reg(u8, reg, map->reg_shift);
                u8 += reg_bytes + pad_bytes;
                map->format.format_val(u8, val, 0);
@@ -1788,7 +1786,7 @@ static int _regmap_raw_multi_reg_write(struct regmap *map,
 
        for (i = 0; i < num_regs; i++) {
                int reg = regs[i].reg;
-               trace_regmap_hw_write_done(map->dev, reg, 1);
+               trace_regmap_hw_write_done(map, reg, 1);
        }
        return ret;
 }
@@ -2059,15 +2057,13 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val,
         */
        u8[0] |= map->read_flag_mask;
 
-       trace_regmap_hw_read_start(map->dev, reg,
-                                  val_len / map->format.val_bytes);
+       trace_regmap_hw_read_start(map, reg, val_len / map->format.val_bytes);
 
        ret = map->bus->read(map->bus_context, map->work_buf,
                             map->format.reg_bytes + map->format.pad_bytes,
                             val, val_len);
 
-       trace_regmap_hw_read_done(map->dev, reg,
-                                 val_len / map->format.val_bytes);
+       trace_regmap_hw_read_done(map, reg, val_len / map->format.val_bytes);
 
        return ret;
 }
@@ -2123,7 +2119,7 @@ static int _regmap_read(struct regmap *map, unsigned int reg,
                        dev_info(map->dev, "%x => %x\n", reg, *val);
 #endif
 
-               trace_regmap_reg_read(map->dev, reg, *val);
+               trace_regmap_reg_read(map, reg, *val);
 
                if (!map->cache_bypass)
                        regcache_write(map, reg, *val);
@@ -2480,7 +2476,7 @@ void regmap_async_complete_cb(struct regmap_async *async, int ret)
        struct regmap *map = async->map;
        bool wake;
 
-       trace_regmap_async_io_complete(map->dev);
+       trace_regmap_async_io_complete(map);
 
        spin_lock(&map->async_lock);
        list_move(&async->list, &map->async_free);
@@ -2525,7 +2521,7 @@ int regmap_async_complete(struct regmap *map)
        if (!map->bus || !map->bus->async_write)
                return 0;
 
-       trace_regmap_async_complete_start(map->dev);
+       trace_regmap_async_complete_start(map);
 
        wait_event(map->async_waitq, regmap_async_is_done(map));
 
@@ -2534,7 +2530,7 @@ int regmap_async_complete(struct regmap *map)
        map->async_ret = 0;
        spin_unlock_irqrestore(&map->async_lock, flags);
 
-       trace_regmap_async_complete_done(map->dev);
+       trace_regmap_async_complete_done(map);
 
        return ret;
 }
index 4bc2a5c..a98c41f 100644 (file)
@@ -803,10 +803,6 @@ static int __init nbd_init(void)
                return -EINVAL;
        }
 
-       nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
-       if (!nbd_dev)
-               return -ENOMEM;
-
        part_shift = 0;
        if (max_part > 0) {
                part_shift = fls(max_part);
@@ -828,6 +824,10 @@ static int __init nbd_init(void)
        if (nbds_max > 1UL << (MINORBITS - part_shift))
                return -EINVAL;
 
+       nbd_dev = kcalloc(nbds_max, sizeof(*nbd_dev), GFP_KERNEL);
+       if (!nbd_dev)
+               return -ENOMEM;
+
        for (i = 0; i < nbds_max; i++) {
                struct gendisk *disk = alloc_disk(1 << part_shift);
                if (!disk)
index ceb32dd..e23be20 100644 (file)
@@ -3003,6 +3003,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        }
        get_device(dev->device);
 
+       INIT_LIST_HEAD(&dev->node);
        INIT_WORK(&dev->probe_work, nvme_async_probe);
        schedule_work(&dev->probe_work);
        return 0;
index 79524ed..8753b0f 100644 (file)
@@ -125,6 +125,7 @@ static int ipmi_powernv_recv(struct ipmi_smi_powernv *smi)
        spin_lock_irqsave(&smi->msg_lock, flags);
 
        if (!smi->cur_msg) {
+               spin_unlock_irqrestore(&smi->msg_lock, flags);
                pr_warn("no current message?\n");
                return 0;
        }
index f6646ed..518585c 100644 (file)
@@ -262,6 +262,11 @@ struct smi_info {
         */
        bool supports_event_msg_buff;
 
+       /*
+        * Can we clear the global enables receive irq bit?
+        */
+       bool cannot_clear_recv_irq_bit;
+
        /*
         * Did we get an attention that we did not handle?
         */
@@ -461,6 +466,9 @@ static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
  * allocate messages, we just leave them in the BMC and run the system
  * polled until we can allocate some memory.  Once we have some
  * memory, we will re-enable the interrupt.
+ *
+ * Note that we cannot just use disable_irq(), since the interrupt may
+ * be shared.
  */
 static inline bool disable_si_irq(struct smi_info *smi_info)
 {
@@ -549,20 +557,15 @@ static u8 current_global_enables(struct smi_info *smi_info, u8 base,
 
        if (smi_info->supports_event_msg_buff)
                enables |= IPMI_BMC_EVT_MSG_BUFF;
-       else
-               enables &= ~IPMI_BMC_EVT_MSG_BUFF;
 
-       if (smi_info->irq && !smi_info->interrupt_disabled)
+       if ((smi_info->irq && !smi_info->interrupt_disabled) ||
+           smi_info->cannot_clear_recv_irq_bit)
                enables |= IPMI_BMC_RCV_MSG_INTR;
-       else
-               enables &= ~IPMI_BMC_RCV_MSG_INTR;
 
        if (smi_info->supports_event_msg_buff &&
            smi_info->irq && !smi_info->interrupt_disabled)
 
                enables |= IPMI_BMC_EVT_MSG_INTR;
-       else
-               enables &= ~IPMI_BMC_EVT_MSG_INTR;
 
        *irq_on = enables & (IPMI_BMC_EVT_MSG_INTR | IPMI_BMC_RCV_MSG_INTR);
 
@@ -2900,6 +2903,96 @@ static int try_get_dev_id(struct smi_info *smi_info)
        return rv;
 }
 
+/*
+ * Some BMCs do not support clearing the receive irq bit in the global
+ * enables (even if they don't support interrupts on the BMC).  Check
+ * for this and handle it properly.
+ */
+static void check_clr_rcv_irq(struct smi_info *smi_info)
+{
+       unsigned char         msg[3];
+       unsigned char         *resp;
+       unsigned long         resp_len;
+       int                   rv;
+
+       resp = kmalloc(IPMI_MAX_MSG_LENGTH, GFP_KERNEL);
+       if (!resp) {
+               printk(KERN_WARNING PFX "Out of memory allocating response for"
+                      " global enables command, cannot check recv irq bit"
+                      " handling.\n");
+               return;
+       }
+
+       msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+       msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
+       smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
+
+       rv = wait_for_msg_done(smi_info);
+       if (rv) {
+               printk(KERN_WARNING PFX "Error getting response from get"
+                      " global enables command, cannot check recv irq bit"
+                      " handling.\n");
+               goto out;
+       }
+
+       resp_len = smi_info->handlers->get_result(smi_info->si_sm,
+                                                 resp, IPMI_MAX_MSG_LENGTH);
+
+       if (resp_len < 4 ||
+                       resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
+                       resp[1] != IPMI_GET_BMC_GLOBAL_ENABLES_CMD   ||
+                       resp[2] != 0) {
+               printk(KERN_WARNING PFX "Invalid return from get global"
+                      " enables command, cannot check recv irq bit"
+                      " handling.\n");
+               rv = -EINVAL;
+               goto out;
+       }
+
+       if ((resp[3] & IPMI_BMC_RCV_MSG_INTR) == 0)
+               /* Already clear, should work ok. */
+               goto out;
+
+       msg[0] = IPMI_NETFN_APP_REQUEST << 2;
+       msg[1] = IPMI_SET_BMC_GLOBAL_ENABLES_CMD;
+       msg[2] = resp[3] & ~IPMI_BMC_RCV_MSG_INTR;
+       smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
+
+       rv = wait_for_msg_done(smi_info);
+       if (rv) {
+               printk(KERN_WARNING PFX "Error getting response from set"
+                      " global enables command, cannot check recv irq bit"
+                      " handling.\n");
+               goto out;
+       }
+
+       resp_len = smi_info->handlers->get_result(smi_info->si_sm,
+                                                 resp, IPMI_MAX_MSG_LENGTH);
+
+       if (resp_len < 3 ||
+                       resp[0] != (IPMI_NETFN_APP_REQUEST | 1) << 2 ||
+                       resp[1] != IPMI_SET_BMC_GLOBAL_ENABLES_CMD) {
+               printk(KERN_WARNING PFX "Invalid return from get global"
+                      " enables command, cannot check recv irq bit"
+                      " handling.\n");
+               rv = -EINVAL;
+               goto out;
+       }
+
+       if (resp[2] != 0) {
+               /*
+                * An error when setting the event buffer bit means
+                * clearing the bit is not supported.
+                */
+               printk(KERN_WARNING PFX "The BMC does not support clearing"
+                      " the recv irq bit, compensating, but the BMC needs to"
+                      " be fixed.\n");
+               smi_info->cannot_clear_recv_irq_bit = true;
+       }
+ out:
+       kfree(resp);
+}
+
 static int try_enable_event_buffer(struct smi_info *smi_info)
 {
        unsigned char         msg[3];
@@ -3395,6 +3488,8 @@ static int try_smi_init(struct smi_info *new_smi)
                goto out_err;
        }
 
+       check_clr_rcv_irq(new_smi);
+
        setup_oem_data_handler(new_smi);
        setup_xaction_handlers(new_smi);
 
index f6e378d..f40e3bd 100644 (file)
@@ -468,11 +468,13 @@ static int ipmi_ssif_thread(void *data)
                int result;
 
                /* Wait for something to do */
-               wait_for_completion(&ssif_info->wake_thread);
-               init_completion(&ssif_info->wake_thread);
-
+               result = wait_for_completion_interruptible(
+                                               &ssif_info->wake_thread);
                if (ssif_info->stopping)
                        break;
+               if (result == -ERESTARTSYS)
+                       continue;
+               init_completion(&ssif_info->wake_thread);
 
                if (ssif_info->i2c_read_write == I2C_SMBUS_WRITE) {
                        result = i2c_smbus_write_block_data(
index 1d278cc..e096e9c 100644 (file)
@@ -140,24 +140,24 @@ static int tpm_dev_add_device(struct tpm_chip *chip)
 {
        int rc;
 
-       rc = device_add(&chip->dev);
+       rc = cdev_add(&chip->cdev, chip->dev.devt, 1);
        if (rc) {
                dev_err(&chip->dev,
-                       "unable to device_register() %s, major %d, minor %d, err=%d\n",
+                       "unable to cdev_add() %s, major %d, minor %d, err=%d\n",
                        chip->devname, MAJOR(chip->dev.devt),
                        MINOR(chip->dev.devt), rc);
 
+               device_unregister(&chip->dev);
                return rc;
        }
 
-       rc = cdev_add(&chip->cdev, chip->dev.devt, 1);
+       rc = device_add(&chip->dev);
        if (rc) {
                dev_err(&chip->dev,
-                       "unable to cdev_add() %s, major %d, minor %d, err=%d\n",
+                       "unable to device_register() %s, major %d, minor %d, err=%d\n",
                        chip->devname, MAJOR(chip->dev.devt),
                        MINOR(chip->dev.devt), rc);
 
-               device_unregister(&chip->dev);
                return rc;
        }
 
@@ -174,27 +174,17 @@ static void tpm_dev_del_device(struct tpm_chip *chip)
  * tpm_chip_register() - create a character device for the TPM chip
  * @chip: TPM chip to use.
  *
- * Creates a character device for the TPM chip and adds sysfs interfaces for
- * the device, PPI and TCPA. As the last step this function adds the
- * chip to the list of TPM chips available for use.
+ * Creates a character device for the TPM chip and adds sysfs attributes for
+ * the device. As the last step this function adds the chip to the list of TPM
+ * chips available for in-kernel use.
  *
- * NOTE: This function should be only called after the chip initialization
- * is complete.
- *
- * Called from tpm_<specific>.c probe function only for devices
- * the driver has determined it should claim.  Prior to calling
- * this function the specific probe function has called pci_enable_device
- * upon errant exit from this function specific probe function should call
- * pci_disable_device
+ * This function should be only called after the chip initialization is
+ * complete.
  */
 int tpm_chip_register(struct tpm_chip *chip)
 {
        int rc;
 
-       rc = tpm_dev_add_device(chip);
-       if (rc)
-               return rc;
-
        /* Populate sysfs for TPM1 devices. */
        if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
                rc = tpm_sysfs_add_device(chip);
@@ -208,6 +198,10 @@ int tpm_chip_register(struct tpm_chip *chip)
                chip->bios_dir = tpm_bios_log_setup(chip->devname);
        }
 
+       rc = tpm_dev_add_device(chip);
+       if (rc)
+               return rc;
+
        /* Make the chip available. */
        spin_lock(&driver_lock);
        list_add_rcu(&chip->list, &tpm_chip_list);
index b1e53e3..42ffa5e 100644 (file)
@@ -124,7 +124,7 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
 {
        struct ibmvtpm_dev *ibmvtpm;
        struct ibmvtpm_crq crq;
-       u64 *word = (u64 *) &crq;
+       __be64 *word = (__be64 *)&crq;
        int rc;
 
        ibmvtpm = (struct ibmvtpm_dev *)TPM_VPRIV(chip);
@@ -145,11 +145,11 @@ static int tpm_ibmvtpm_send(struct tpm_chip *chip, u8 *buf, size_t count)
        memcpy((void *)ibmvtpm->rtce_buf, (void *)buf, count);
        crq.valid = (u8)IBMVTPM_VALID_CMD;
        crq.msg = (u8)VTPM_TPM_COMMAND;
-       crq.len = (u16)count;
-       crq.data = ibmvtpm->rtce_dma_handle;
+       crq.len = cpu_to_be16(count);
+       crq.data = cpu_to_be32(ibmvtpm->rtce_dma_handle);
 
-       rc = ibmvtpm_send_crq(ibmvtpm->vdev, cpu_to_be64(word[0]),
-                             cpu_to_be64(word[1]));
+       rc = ibmvtpm_send_crq(ibmvtpm->vdev, be64_to_cpu(word[0]),
+                             be64_to_cpu(word[1]));
        if (rc != H_SUCCESS) {
                dev_err(ibmvtpm->dev, "tpm_ibmvtpm_send failed rc=%d\n", rc);
                rc = 0;
index f595f14..6af9289 100644 (file)
@@ -22,9 +22,9 @@
 struct ibmvtpm_crq {
        u8 valid;
        u8 msg;
-       u16 len;
-       u32 data;
-       u64 reserved;
+       __be16 len;
+       __be32 data;
+       __be64 reserved;
 } __attribute__((packed, aligned(8)));
 
 struct ibmvtpm_crq_queue {
index fae2dbb..72d7028 100644 (file)
@@ -142,6 +142,7 @@ struct ports_device {
         * notification
         */
        struct work_struct control_work;
+       struct work_struct config_work;
 
        struct list_head ports;
 
@@ -1837,10 +1838,21 @@ static void config_intr(struct virtio_device *vdev)
 
        portdev = vdev->priv;
 
+       if (!use_multiport(portdev))
+               schedule_work(&portdev->config_work);
+}
+
+static void config_work_handler(struct work_struct *work)
+{
+       struct ports_device *portdev;
+
+       portdev = container_of(work, struct ports_device, control_work);
        if (!use_multiport(portdev)) {
+               struct virtio_device *vdev;
                struct port *port;
                u16 rows, cols;
 
+               vdev = portdev->vdev;
                virtio_cread(vdev, struct virtio_console_config, cols, &cols);
                virtio_cread(vdev, struct virtio_console_config, rows, &rows);
 
@@ -2040,12 +2052,14 @@ static int virtcons_probe(struct virtio_device *vdev)
 
        virtio_device_ready(portdev->vdev);
 
+       INIT_WORK(&portdev->config_work, &config_work_handler);
+       INIT_WORK(&portdev->control_work, &control_work_handler);
+
        if (multiport) {
                unsigned int nr_added_bufs;
 
                spin_lock_init(&portdev->c_ivq_lock);
                spin_lock_init(&portdev->c_ovq_lock);
-               INIT_WORK(&portdev->control_work, &control_work_handler);
 
                nr_added_bufs = fill_queue(portdev->c_ivq,
                                           &portdev->c_ivq_lock);
@@ -2113,6 +2127,8 @@ static void virtcons_remove(struct virtio_device *vdev)
        /* Finish up work that's lined up */
        if (use_multiport(portdev))
                cancel_work_sync(&portdev->control_work);
+       else
+               cancel_work_sync(&portdev->config_work);
 
        list_for_each_entry_safe(port, port2, &portdev->ports, list)
                unplug_port(port);
@@ -2164,6 +2180,7 @@ static int virtcons_freeze(struct virtio_device *vdev)
 
        virtqueue_disable_cb(portdev->c_ivq);
        cancel_work_sync(&portdev->control_work);
+       cancel_work_sync(&portdev->config_work);
        /*
         * Once more: if control_work_handler() was running, it would
         * enable the cb as the last step.
index db7f8bc..25006a8 100644 (file)
@@ -144,12 +144,6 @@ static unsigned long clk_divider_recalc_rate(struct clk_hw *hw,
                                   divider->flags);
 }
 
-/*
- * The reverse of DIV_ROUND_UP: The maximum number which
- * divided by m is r
- */
-#define MULT_ROUND_UP(r, m) ((r) * (m) + (m) - 1)
-
 static bool _is_valid_table_div(const struct clk_div_table *table,
                                                         unsigned int div)
 {
@@ -225,19 +219,24 @@ static int _div_round_closest(const struct clk_div_table *table,
                              unsigned long parent_rate, unsigned long rate,
                              unsigned long flags)
 {
-       int up, down, div;
+       int up, down;
+       unsigned long up_rate, down_rate;
 
-       up = down = div = DIV_ROUND_CLOSEST(parent_rate, rate);
+       up = DIV_ROUND_UP(parent_rate, rate);
+       down = parent_rate / rate;
 
        if (flags & CLK_DIVIDER_POWER_OF_TWO) {
-               up = __roundup_pow_of_two(div);
-               down = __rounddown_pow_of_two(div);
+               up = __roundup_pow_of_two(up);
+               down = __rounddown_pow_of_two(down);
        } else if (table) {
-               up = _round_up_table(table, div);
-               down = _round_down_table(table, div);
+               up = _round_up_table(table, up);
+               down = _round_down_table(table, down);
        }
 
-       return (up - div) <= (div - down) ? up : down;
+       up_rate = DIV_ROUND_UP(parent_rate, up);
+       down_rate = DIV_ROUND_UP(parent_rate, down);
+
+       return (rate - up_rate) <= (down_rate - rate) ? up : down;
 }
 
 static int _div_round(const struct clk_div_table *table,
@@ -313,7 +312,7 @@ static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
                        return i;
                }
                parent_rate = __clk_round_rate(__clk_get_parent(hw->clk),
-                               MULT_ROUND_UP(rate, i));
+                                              rate * i);
                now = DIV_ROUND_UP(parent_rate, i);
                if (_is_best_div(rate, now, best, flags)) {
                        bestdiv = i;
@@ -353,7 +352,7 @@ static long clk_divider_round_rate(struct clk_hw *hw, unsigned long rate,
                bestdiv = readl(divider->reg) >> divider->shift;
                bestdiv &= div_mask(divider->width);
                bestdiv = _get_div(divider->table, bestdiv, divider->flags);
-               return bestdiv;
+               return DIV_ROUND_UP(*prate, bestdiv);
        }
 
        return divider_round_rate(hw, rate, prate, divider->table,
index eb01529..237f23f 100644 (file)
@@ -1350,7 +1350,6 @@ static unsigned long clk_core_get_rate(struct clk_core *clk)
 
        return rate;
 }
-EXPORT_SYMBOL_GPL(clk_core_get_rate);
 
 /**
  * clk_get_rate - return the rate of clk
@@ -2170,6 +2169,32 @@ int clk_get_phase(struct clk *clk)
        return clk_core_get_phase(clk->core);
 }
 
+/**
+ * clk_is_match - check if two clk's point to the same hardware clock
+ * @p: clk compared against q
+ * @q: clk compared against p
+ *
+ * Returns true if the two struct clk pointers both point to the same hardware
+ * clock node. Put differently, returns true if struct clk *p and struct clk *q
+ * share the same struct clk_core object.
+ *
+ * Returns false otherwise. Note that two NULL clks are treated as matching.
+ */
+bool clk_is_match(const struct clk *p, const struct clk *q)
+{
+       /* trivial case: identical struct clk's or both NULL */
+       if (p == q)
+               return true;
+
+       /* true if clk->core pointers match. Avoid derefing garbage */
+       if (!IS_ERR_OR_NULL(p) && !IS_ERR_OR_NULL(q))
+               if (p->core == q->core)
+                       return true;
+
+       return false;
+}
+EXPORT_SYMBOL_GPL(clk_is_match);
+
 /**
  * __clk_init - initialize the data structures in a struct clk
  * @dev:       device initializing this clk, placeholder for now
index b0b562b..e60feff 100644 (file)
@@ -48,6 +48,17 @@ static struct clk_pll pll3 = {
        },
 };
 
+static struct clk_regmap pll4_vote = {
+       .enable_reg = 0x34c0,
+       .enable_mask = BIT(4),
+       .hw.init = &(struct clk_init_data){
+               .name = "pll4_vote",
+               .parent_names = (const char *[]){ "pll4" },
+               .num_parents = 1,
+               .ops = &clk_pll_vote_ops,
+       },
+};
+
 static struct clk_pll pll8 = {
        .l_reg = 0x3144,
        .m_reg = 0x3148,
@@ -3023,6 +3034,7 @@ static struct clk_branch rpm_msg_ram_h_clk = {
 
 static struct clk_regmap *gcc_msm8960_clks[] = {
        [PLL3] = &pll3.clkr,
+       [PLL4_VOTE] = &pll4_vote,
        [PLL8] = &pll8.clkr,
        [PLL8_VOTE] = &pll8_vote,
        [PLL14] = &pll14.clkr,
@@ -3247,6 +3259,7 @@ static const struct qcom_reset_map gcc_msm8960_resets[] = {
 
 static struct clk_regmap *gcc_apq8064_clks[] = {
        [PLL3] = &pll3.clkr,
+       [PLL4_VOTE] = &pll4_vote,
        [PLL8] = &pll8.clkr,
        [PLL8_VOTE] = &pll8_vote,
        [PLL14] = &pll14.clkr,
index 121ffde..c9ff27b 100644 (file)
@@ -462,7 +462,6 @@ static struct platform_driver lcc_ipq806x_driver = {
        .remove         = lcc_ipq806x_remove,
        .driver         = {
                .name   = "lcc-ipq806x",
-               .owner  = THIS_MODULE,
                .of_match_table = lcc_ipq806x_match_table,
        },
 };
index a75a408..e2c8632 100644 (file)
@@ -417,8 +417,8 @@ static struct clk_rcg slimbus_src = {
                .mnctr_en_bit = 8,
                .mnctr_reset_bit = 7,
                .mnctr_mode_shift = 5,
-               .n_val_shift = 16,
-               .m_val_shift = 16,
+               .n_val_shift = 24,
+               .m_val_shift = 8,
                .width = 8,
        },
        .p = {
@@ -547,7 +547,7 @@ static int lcc_msm8960_probe(struct platform_device *pdev)
                return PTR_ERR(regmap);
 
        /* Use the correct frequency plan depending on speed of PLL4 */
-       val = regmap_read(regmap, 0x4, &val);
+       regmap_read(regmap, 0x4, &val);
        if (val == 0x12) {
                slimbus_src.freq_tbl = clk_tbl_aif_osr_492;
                mi2s_osr_src.freq_tbl = clk_tbl_aif_osr_492;
@@ -574,7 +574,6 @@ static struct platform_driver lcc_msm8960_driver = {
        .remove         = lcc_msm8960_remove,
        .driver         = {
                .name   = "lcc-msm8960",
-               .owner  = THIS_MODULE,
                .of_match_table = lcc_msm8960_match_table,
        },
 };
index 6ef8963..d216406 100644 (file)
@@ -84,7 +84,7 @@ static int ti_fapll_enable(struct clk_hw *hw)
        struct fapll_data *fd = to_fapll(hw);
        u32 v = readl_relaxed(fd->base);
 
-       v |= (1 << FAPLL_MAIN_PLLEN);
+       v |= FAPLL_MAIN_PLLEN;
        writel_relaxed(v, fd->base);
 
        return 0;
@@ -95,7 +95,7 @@ static void ti_fapll_disable(struct clk_hw *hw)
        struct fapll_data *fd = to_fapll(hw);
        u32 v = readl_relaxed(fd->base);
 
-       v &= ~(1 << FAPLL_MAIN_PLLEN);
+       v &= ~FAPLL_MAIN_PLLEN;
        writel_relaxed(v, fd->base);
 }
 
@@ -104,7 +104,7 @@ static int ti_fapll_is_enabled(struct clk_hw *hw)
        struct fapll_data *fd = to_fapll(hw);
        u32 v = readl_relaxed(fd->base);
 
-       return v & (1 << FAPLL_MAIN_PLLEN);
+       return v & FAPLL_MAIN_PLLEN;
 }
 
 static unsigned long ti_fapll_recalc_rate(struct clk_hw *hw,
index 68161f7..a0b036c 100644 (file)
@@ -192,6 +192,7 @@ config SYS_SUPPORTS_EM_STI
 config SH_TIMER_CMT
        bool "Renesas CMT timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        default SYS_SUPPORTS_SH_CMT
        help
          This enables build of a clocksource and clockevent driver for
@@ -201,6 +202,7 @@ config SH_TIMER_CMT
 config SH_TIMER_MTU2
        bool "Renesas MTU2 timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        default SYS_SUPPORTS_SH_MTU2
        help
          This enables build of a clockevent driver for the Multi-Function
@@ -210,6 +212,7 @@ config SH_TIMER_MTU2
 config SH_TIMER_TMU
        bool "Renesas TMU timer driver" if COMPILE_TEST
        depends on GENERIC_CLOCKEVENTS
+       depends on HAS_IOMEM
        default SYS_SUPPORTS_SH_TMU
        help
          This enables build of a clocksource and clockevent driver for
index a3025e7..2664696 100644 (file)
@@ -661,17 +661,17 @@ static const struct of_device_id arch_timer_mem_of_match[] __initconst = {
 };
 
 static bool __init
-arch_timer_probed(int type, const struct of_device_id *matches)
+arch_timer_needs_probing(int type, const struct of_device_id *matches)
 {
        struct device_node *dn;
-       bool probed = true;
+       bool needs_probing = false;
 
        dn = of_find_matching_node(NULL, matches);
        if (dn && of_device_is_available(dn) && !(arch_timers_present & type))
-               probed = false;
+               needs_probing = true;
        of_node_put(dn);
 
-       return probed;
+       return needs_probing;
 }
 
 static void __init arch_timer_common_init(void)
@@ -680,9 +680,9 @@ static void __init arch_timer_common_init(void)
 
        /* Wait until both nodes are probed if we have two timers */
        if ((arch_timers_present & mask) != mask) {
-               if (!arch_timer_probed(ARCH_MEM_TIMER, arch_timer_mem_of_match))
+               if (arch_timer_needs_probing(ARCH_MEM_TIMER, arch_timer_mem_of_match))
                        return;
-               if (!arch_timer_probed(ARCH_CP15_TIMER, arch_timer_of_match))
+               if (arch_timer_needs_probing(ARCH_CP15_TIMER, arch_timer_of_match))
                        return;
        }
 
index d305fb0..a19a3f6 100644 (file)
@@ -108,7 +108,7 @@ static void __init add_clocksource(struct device_node *source_timer)
 
 static u64 notrace read_sched_clock(void)
 {
-       return ~__raw_readl(sched_io_base);
+       return ~readl_relaxed(sched_io_base);
 }
 
 static const struct of_device_id sptimer_ids[] __initconst = {
index d0a7bd6..dc3c6ee 100644 (file)
@@ -210,7 +210,7 @@ static int em_sti_clocksource_enable(struct clocksource *cs)
 
        ret = em_sti_start(p, USER_CLOCKSOURCE);
        if (!ret)
-               __clocksource_updatefreq_hz(cs, p->rate);
+               __clocksource_update_freq_hz(cs, p->rate);
        return ret;
 }
 
index 2bd13b5..b8ff3c6 100644 (file)
@@ -641,7 +641,7 @@ static int sh_cmt_clocksource_enable(struct clocksource *cs)
 
        ret = sh_cmt_start(ch, FLAG_CLOCKSOURCE);
        if (!ret) {
-               __clocksource_updatefreq_hz(cs, ch->rate);
+               __clocksource_update_freq_hz(cs, ch->rate);
                ch->cs_enabled = true;
        }
        return ret;
index f150ca8..b6b8fa3 100644 (file)
@@ -272,7 +272,7 @@ static int sh_tmu_clocksource_enable(struct clocksource *cs)
 
        ret = sh_tmu_enable(ch);
        if (!ret) {
-               __clocksource_updatefreq_hz(cs, ch->rate);
+               __clocksource_update_freq_hz(cs, ch->rate);
                ch->cs_enabled = true;
        }
 
index f4a9c00..1928a89 100644 (file)
@@ -170,7 +170,15 @@ static void __init sun4i_timer_init(struct device_node *node)
               TIMER_CTL_CLK_SRC(TIMER_CTL_CLK_SRC_OSC24M),
               timer_base + TIMER_CTL_REG(1));
 
-       sched_clock_register(sun4i_timer_sched_read, 32, rate);
+       /*
+        * sched_clock_register does not have priorities, and on sun6i and
+        * later there is a better sched_clock registered by arm_arch_timer.c
+        */
+       if (of_machine_is_compatible("allwinner,sun4i-a10") ||
+           of_machine_is_compatible("allwinner,sun5i-a13") ||
+           of_machine_is_compatible("allwinner,sun5i-a10s"))
+               sched_clock_register(sun4i_timer_sched_read, 32, rate);
+
        clocksource_mmio_init(timer_base + TIMER_CNTVAL_REG(1), node->name,
                              rate, 350, 32, clocksource_mmio_readl_down);
 
index d2616ef..5a112d7 100644 (file)
 static void __iomem *timer_reg_base;
 static void __iomem *rtc_base;
 
-static struct timespec persistent_ts;
+static struct timespec64 persistent_ts;
 static u64 persistent_ms, last_persistent_ms;
 
 static struct delay_timer tegra_delay_timer;
 
 #define timer_writel(value, reg) \
-       __raw_writel(value, timer_reg_base + (reg))
+       writel_relaxed(value, timer_reg_base + (reg))
 #define timer_readl(reg) \
-       __raw_readl(timer_reg_base + (reg))
+       readl_relaxed(timer_reg_base + (reg))
 
 static int tegra_timer_set_next_event(unsigned long cycles,
                                         struct clock_event_device *evt)
@@ -120,26 +120,25 @@ static u64 tegra_rtc_read_ms(void)
 }
 
 /*
- * tegra_read_persistent_clock -  Return time from a persistent clock.
+ * tegra_read_persistent_clock64 -  Return time from a persistent clock.
  *
  * Reads the time from a source which isn't disabled during PM, the
  * 32k sync timer.  Convert the cycles elapsed since last read into
- * nsecs and adds to a monotonically increasing timespec.
+ * nsecs and adds to a monotonically increasing timespec64.
  * Care must be taken that this funciton is not called while the
  * tegra_rtc driver could be executing to avoid race conditions
  * on the RTC shadow register
  */
-static void tegra_read_persistent_clock(struct timespec *ts)
+static void tegra_read_persistent_clock64(struct timespec64 *ts)
 {
        u64 delta;
-       struct timespec *tsp = &persistent_ts;
 
        last_persistent_ms = persistent_ms;
        persistent_ms = tegra_rtc_read_ms();
        delta = persistent_ms - last_persistent_ms;
 
-       timespec_add_ns(tsp, delta * NSEC_PER_MSEC);
-       *ts = *tsp;
+       timespec64_add_ns(&persistent_ts, delta * NSEC_PER_MSEC);
+       *ts = persistent_ts;
 }
 
 static unsigned long tegra_delay_timer_read_counter_long(void)
@@ -252,7 +251,7 @@ static void __init tegra20_init_rtc(struct device_node *np)
        else
                clk_prepare_enable(clk);
 
-       register_persistent_clock(NULL, tegra_read_persistent_clock);
+       register_persistent_clock(NULL, tegra_read_persistent_clock64);
 }
 CLOCKSOURCE_OF_DECLARE(tegra20_rtc, "nvidia,tegra20-rtc", tegra20_init_rtc);
 
index bba62f9..5b6e3d5 100644 (file)
@@ -111,7 +111,7 @@ static irqreturn_t efm32_clock_event_handler(int irq, void *dev_id)
 static struct efm32_clock_event_ddata clock_event_ddata = {
        .evtdev = {
                .name = "efm32 clockevent",
-               .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_MODE_PERIODIC,
+               .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
                .set_mode = efm32_clock_event_set_mode,
                .set_next_event = efm32_clock_event_set_next_event,
                .rating = 200,
@@ -225,12 +225,12 @@ static int __init efm32_clockevent_init(struct device_node *np)
        clock_event_ddata.base = base;
        clock_event_ddata.periodic_top = DIV_ROUND_CLOSEST(rate, 1024 * HZ);
 
-       setup_irq(irq, &efm32_clock_event_irq);
-
        clockevents_config_and_register(&clock_event_ddata.evtdev,
                                        DIV_ROUND_CLOSEST(rate, 1024),
                                        0xf, 0xffff);
 
+       setup_irq(irq, &efm32_clock_event_irq);
+
        return 0;
 
 err_get_irq:
index b5b4d45..c0304ff 100644 (file)
@@ -61,12 +61,12 @@ static inline struct pit_data *clkevt_to_pit_data(struct clock_event_device *clk
 
 static inline unsigned int pit_read(void __iomem *base, unsigned int reg_offset)
 {
-       return __raw_readl(base + reg_offset);
+       return readl_relaxed(base + reg_offset);
 }
 
 static inline void pit_write(void __iomem *base, unsigned int reg_offset, unsigned long value)
 {
-       __raw_writel(value, base + reg_offset);
+       writel_relaxed(value, base + reg_offset);
 }
 
 /*
index 0226844..28aa4b7 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/irq.h>
 #include <linux/irqreturn.h>
 #include <linux/reset.h>
-#include <linux/sched_clock.h>
+#include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 
 #define TIMER_SYNC_TICKS       3
 
-static void __iomem *timer_base;
-static u32 ticks_per_jiffy;
+struct sun5i_timer {
+       void __iomem            *base;
+       struct clk              *clk;
+       struct notifier_block   clk_rate_cb;
+       u32                     ticks_per_jiffy;
+};
+
+#define to_sun5i_timer(x) \
+       container_of(x, struct sun5i_timer, clk_rate_cb)
+
+struct sun5i_timer_clksrc {
+       struct sun5i_timer      timer;
+       struct clocksource      clksrc;
+};
+
+#define to_sun5i_timer_clksrc(x) \
+       container_of(x, struct sun5i_timer_clksrc, clksrc)
+
+struct sun5i_timer_clkevt {
+       struct sun5i_timer              timer;
+       struct clock_event_device       clkevt;
+};
+
+#define to_sun5i_timer_clkevt(x) \
+       container_of(x, struct sun5i_timer_clkevt, clkevt)
 
 /*
  * When we disable a timer, we need to wait at least for 2 cycles of
@@ -46,30 +69,30 @@ static u32 ticks_per_jiffy;
  * that is already setup and runs at the same frequency than the other
  * timers, and we never will be disabled.
  */
-static void sun5i_clkevt_sync(void)
+static void sun5i_clkevt_sync(struct sun5i_timer_clkevt *ce)
 {
-       u32 old = readl(timer_base + TIMER_CNTVAL_LO_REG(1));
+       u32 old = readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1));
 
-       while ((old - readl(timer_base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS)
+       while ((old - readl(ce->timer.base + TIMER_CNTVAL_LO_REG(1))) < TIMER_SYNC_TICKS)
                cpu_relax();
 }
 
-static void sun5i_clkevt_time_stop(u8 timer)
+static void sun5i_clkevt_time_stop(struct sun5i_timer_clkevt *ce, u8 timer)
 {
-       u32 val = readl(timer_base + TIMER_CTL_REG(timer));
-       writel(val & ~TIMER_CTL_ENABLE, timer_base + TIMER_CTL_REG(timer));
+       u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer));
+       writel(val & ~TIMER_CTL_ENABLE, ce->timer.base + TIMER_CTL_REG(timer));
 
-       sun5i_clkevt_sync();
+       sun5i_clkevt_sync(ce);
 }
 
-static void sun5i_clkevt_time_setup(u8 timer, u32 delay)
+static void sun5i_clkevt_time_setup(struct sun5i_timer_clkevt *ce, u8 timer, u32 delay)
 {
-       writel(delay, timer_base + TIMER_INTVAL_LO_REG(timer));
+       writel(delay, ce->timer.base + TIMER_INTVAL_LO_REG(timer));
 }
 
-static void sun5i_clkevt_time_start(u8 timer, bool periodic)
+static void sun5i_clkevt_time_start(struct sun5i_timer_clkevt *ce, u8 timer, bool periodic)
 {
-       u32 val = readl(timer_base + TIMER_CTL_REG(timer));
+       u32 val = readl(ce->timer.base + TIMER_CTL_REG(timer));
 
        if (periodic)
                val &= ~TIMER_CTL_ONESHOT;
@@ -77,80 +100,230 @@ static void sun5i_clkevt_time_start(u8 timer, bool periodic)
                val |= TIMER_CTL_ONESHOT;
 
        writel(val | TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
-              timer_base + TIMER_CTL_REG(timer));
+              ce->timer.base + TIMER_CTL_REG(timer));
 }
 
 static void sun5i_clkevt_mode(enum clock_event_mode mode,
-                             struct clock_event_device *clk)
+                             struct clock_event_device *clkevt)
 {
+       struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt);
+
        switch (mode) {
        case CLOCK_EVT_MODE_PERIODIC:
-               sun5i_clkevt_time_stop(0);
-               sun5i_clkevt_time_setup(0, ticks_per_jiffy);
-               sun5i_clkevt_time_start(0, true);
+               sun5i_clkevt_time_stop(ce, 0);
+               sun5i_clkevt_time_setup(ce, 0, ce->timer.ticks_per_jiffy);
+               sun5i_clkevt_time_start(ce, 0, true);
                break;
        case CLOCK_EVT_MODE_ONESHOT:
-               sun5i_clkevt_time_stop(0);
-               sun5i_clkevt_time_start(0, false);
+               sun5i_clkevt_time_stop(ce, 0);
+               sun5i_clkevt_time_start(ce, 0, false);
                break;
        case CLOCK_EVT_MODE_UNUSED:
        case CLOCK_EVT_MODE_SHUTDOWN:
        default:
-               sun5i_clkevt_time_stop(0);
+               sun5i_clkevt_time_stop(ce, 0);
                break;
        }
 }
 
 static int sun5i_clkevt_next_event(unsigned long evt,
-                                  struct clock_event_device *unused)
+                                  struct clock_event_device *clkevt)
 {
-       sun5i_clkevt_time_stop(0);
-       sun5i_clkevt_time_setup(0, evt - TIMER_SYNC_TICKS);
-       sun5i_clkevt_time_start(0, false);
+       struct sun5i_timer_clkevt *ce = to_sun5i_timer_clkevt(clkevt);
+
+       sun5i_clkevt_time_stop(ce, 0);
+       sun5i_clkevt_time_setup(ce, 0, evt - TIMER_SYNC_TICKS);
+       sun5i_clkevt_time_start(ce, 0, false);
 
        return 0;
 }
 
-static struct clock_event_device sun5i_clockevent = {
-       .name = "sun5i_tick",
-       .rating = 340,
-       .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
-       .set_mode = sun5i_clkevt_mode,
-       .set_next_event = sun5i_clkevt_next_event,
-};
-
-
 static irqreturn_t sun5i_timer_interrupt(int irq, void *dev_id)
 {
-       struct clock_event_device *evt = (struct clock_event_device *)dev_id;
+       struct sun5i_timer_clkevt *ce = (struct sun5i_timer_clkevt *)dev_id;
 
-       writel(0x1, timer_base + TIMER_IRQ_ST_REG);
-       evt->event_handler(evt);
+       writel(0x1, ce->timer.base + TIMER_IRQ_ST_REG);
+       ce->clkevt.event_handler(&ce->clkevt);
 
        return IRQ_HANDLED;
 }
 
-static struct irqaction sun5i_timer_irq = {
-       .name = "sun5i_timer0",
-       .flags = IRQF_TIMER | IRQF_IRQPOLL,
-       .handler = sun5i_timer_interrupt,
-       .dev_id = &sun5i_clockevent,
-};
+static cycle_t sun5i_clksrc_read(struct clocksource *clksrc)
+{
+       struct sun5i_timer_clksrc *cs = to_sun5i_timer_clksrc(clksrc);
+
+       return ~readl(cs->timer.base + TIMER_CNTVAL_LO_REG(1));
+}
 
-static u64 sun5i_timer_sched_read(void)
+static int sun5i_rate_cb_clksrc(struct notifier_block *nb,
+                               unsigned long event, void *data)
 {
-       return ~readl(timer_base + TIMER_CNTVAL_LO_REG(1));
+       struct clk_notifier_data *ndata = data;
+       struct sun5i_timer *timer = to_sun5i_timer(nb);
+       struct sun5i_timer_clksrc *cs = container_of(timer, struct sun5i_timer_clksrc, timer);
+
+       switch (event) {
+       case PRE_RATE_CHANGE:
+               clocksource_unregister(&cs->clksrc);
+               break;
+
+       case POST_RATE_CHANGE:
+               clocksource_register_hz(&cs->clksrc, ndata->new_rate);
+               break;
+
+       default:
+               break;
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int __init sun5i_setup_clocksource(struct device_node *node,
+                                         void __iomem *base,
+                                         struct clk *clk, int irq)
+{
+       struct sun5i_timer_clksrc *cs;
+       unsigned long rate;
+       int ret;
+
+       cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+       if (!cs)
+               return -ENOMEM;
+
+       ret = clk_prepare_enable(clk);
+       if (ret) {
+               pr_err("Couldn't enable parent clock\n");
+               goto err_free;
+       }
+
+       rate = clk_get_rate(clk);
+
+       cs->timer.base = base;
+       cs->timer.clk = clk;
+       cs->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clksrc;
+       cs->timer.clk_rate_cb.next = NULL;
+
+       ret = clk_notifier_register(clk, &cs->timer.clk_rate_cb);
+       if (ret) {
+               pr_err("Unable to register clock notifier.\n");
+               goto err_disable_clk;
+       }
+
+       writel(~0, base + TIMER_INTVAL_LO_REG(1));
+       writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
+              base + TIMER_CTL_REG(1));
+
+       cs->clksrc.name = node->name;
+       cs->clksrc.rating = 340;
+       cs->clksrc.read = sun5i_clksrc_read;
+       cs->clksrc.mask = CLOCKSOURCE_MASK(32);
+       cs->clksrc.flags = CLOCK_SOURCE_IS_CONTINUOUS;
+
+       ret = clocksource_register_hz(&cs->clksrc, rate);
+       if (ret) {
+               pr_err("Couldn't register clock source.\n");
+               goto err_remove_notifier;
+       }
+
+       return 0;
+
+err_remove_notifier:
+       clk_notifier_unregister(clk, &cs->timer.clk_rate_cb);
+err_disable_clk:
+       clk_disable_unprepare(clk);
+err_free:
+       kfree(cs);
+       return ret;
+}
+
+static int sun5i_rate_cb_clkevt(struct notifier_block *nb,
+                               unsigned long event, void *data)
+{
+       struct clk_notifier_data *ndata = data;
+       struct sun5i_timer *timer = to_sun5i_timer(nb);
+       struct sun5i_timer_clkevt *ce = container_of(timer, struct sun5i_timer_clkevt, timer);
+
+       if (event == POST_RATE_CHANGE) {
+               clockevents_update_freq(&ce->clkevt, ndata->new_rate);
+               ce->timer.ticks_per_jiffy = DIV_ROUND_UP(ndata->new_rate, HZ);
+       }
+
+       return NOTIFY_DONE;
+}
+
+static int __init sun5i_setup_clockevent(struct device_node *node, void __iomem *base,
+                                        struct clk *clk, int irq)
+{
+       struct sun5i_timer_clkevt *ce;
+       unsigned long rate;
+       int ret;
+       u32 val;
+
+       ce = kzalloc(sizeof(*ce), GFP_KERNEL);
+       if (!ce)
+               return -ENOMEM;
+
+       ret = clk_prepare_enable(clk);
+       if (ret) {
+               pr_err("Couldn't enable parent clock\n");
+               goto err_free;
+       }
+
+       rate = clk_get_rate(clk);
+
+       ce->timer.base = base;
+       ce->timer.ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
+       ce->timer.clk = clk;
+       ce->timer.clk_rate_cb.notifier_call = sun5i_rate_cb_clkevt;
+       ce->timer.clk_rate_cb.next = NULL;
+
+       ret = clk_notifier_register(clk, &ce->timer.clk_rate_cb);
+       if (ret) {
+               pr_err("Unable to register clock notifier.\n");
+               goto err_disable_clk;
+       }
+
+       ce->clkevt.name = node->name;
+       ce->clkevt.features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT;
+       ce->clkevt.set_next_event = sun5i_clkevt_next_event;
+       ce->clkevt.set_mode = sun5i_clkevt_mode;
+       ce->clkevt.rating = 340;
+       ce->clkevt.irq = irq;
+       ce->clkevt.cpumask = cpu_possible_mask;
+
+       /* Enable timer0 interrupt */
+       val = readl(base + TIMER_IRQ_EN_REG);
+       writel(val | TIMER_IRQ_EN(0), base + TIMER_IRQ_EN_REG);
+
+       clockevents_config_and_register(&ce->clkevt, rate,
+                                       TIMER_SYNC_TICKS, 0xffffffff);
+
+       ret = request_irq(irq, sun5i_timer_interrupt, IRQF_TIMER | IRQF_IRQPOLL,
+                         "sun5i_timer0", ce);
+       if (ret) {
+               pr_err("Unable to register interrupt\n");
+               goto err_remove_notifier;
+       }
+
+       return 0;
+
+err_remove_notifier:
+       clk_notifier_unregister(clk, &ce->timer.clk_rate_cb);
+err_disable_clk:
+       clk_disable_unprepare(clk);
+err_free:
+       kfree(ce);
+       return ret;
 }
 
 static void __init sun5i_timer_init(struct device_node *node)
 {
        struct reset_control *rstc;
-       unsigned long rate;
+       void __iomem *timer_base;
        struct clk *clk;
-       int ret, irq;
-       u32 val;
+       int irq;
 
-       timer_base = of_iomap(node, 0);
+       timer_base = of_io_request_and_map(node, 0, of_node_full_name(node));
        if (!timer_base)
                panic("Can't map registers");
 
@@ -161,36 +334,13 @@ static void __init sun5i_timer_init(struct device_node *node)
        clk = of_clk_get(node, 0);
        if (IS_ERR(clk))
                panic("Can't get timer clock");
-       clk_prepare_enable(clk);
-       rate = clk_get_rate(clk);
 
        rstc = of_reset_control_get(node, NULL);
        if (!IS_ERR(rstc))
                reset_control_deassert(rstc);
 
-       writel(~0, timer_base + TIMER_INTVAL_LO_REG(1));
-       writel(TIMER_CTL_ENABLE | TIMER_CTL_RELOAD,
-              timer_base + TIMER_CTL_REG(1));
-
-       sched_clock_register(sun5i_timer_sched_read, 32, rate);
-       clocksource_mmio_init(timer_base + TIMER_CNTVAL_LO_REG(1), node->name,
-                             rate, 340, 32, clocksource_mmio_readl_down);
-
-       ticks_per_jiffy = DIV_ROUND_UP(rate, HZ);
-
-       ret = setup_irq(irq, &sun5i_timer_irq);
-       if (ret)
-               pr_warn("failed to setup irq %d\n", irq);
-
-       /* Enable timer0 interrupt */
-       val = readl(timer_base + TIMER_IRQ_EN_REG);
-       writel(val | TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_EN_REG);
-
-       sun5i_clockevent.cpumask = cpu_possible_mask;
-       sun5i_clockevent.irq = irq;
-
-       clockevents_config_and_register(&sun5i_clockevent, rate,
-                                       TIMER_SYNC_TICKS, 0xffffffff);
+       sun5i_setup_clocksource(node, timer_base, clk, irq);
+       sun5i_setup_clockevent(node, timer_base, clk, irq);
 }
 CLOCKSOURCE_OF_DECLARE(sun5i_a13, "allwinner,sun5i-a13-hstimer",
                       sun5i_timer_init);
index 28e59a4..8ae655c 100644 (file)
@@ -1698,15 +1698,18 @@ void cpufreq_resume(void)
                    || __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS))
                        pr_err("%s: Failed to start governor for policy: %p\n",
                                __func__, policy);
-
-               /*
-                * schedule call cpufreq_update_policy() for boot CPU, i.e. last
-                * policy in list. It will verify that the current freq is in
-                * sync with what we believe it to be.
-                */
-               if (list_is_last(&policy->policy_list, &cpufreq_policy_list))
-                       schedule_work(&policy->update);
        }
+
+       /*
+        * schedule call cpufreq_update_policy() for first-online CPU, as that
+        * wouldn't be hotplugged-out on suspend. It will verify that the
+        * current freq is in sync with what we believe it to be.
+        */
+       policy = cpufreq_cpu_get_raw(cpumask_first(cpu_online_mask));
+       if (WARN_ON(!policy))
+               return;
+
+       schedule_work(&policy->update);
 }
 
 /**
index 38e6861..980151f 100644 (file)
@@ -37,11 +37,11 @@ static int mvebu_v7_enter_idle(struct cpuidle_device *dev,
                deepidle = true;
 
        ret = mvebu_v7_cpu_suspend(deepidle);
+       cpu_pm_exit();
+
        if (ret)
                return ret;
 
-       cpu_pm_exit();
-
        return index;
 }
 
@@ -50,17 +50,17 @@ static struct cpuidle_driver armadaxp_idle_driver = {
        .states[0]              = ARM_CPUIDLE_WFI_STATE,
        .states[1]              = {
                .enter                  = mvebu_v7_enter_idle,
-               .exit_latency           = 10,
+               .exit_latency           = 100,
                .power_usage            = 50,
-               .target_residency       = 100,
+               .target_residency       = 1000,
                .name                   = "MV CPU IDLE",
                .desc                   = "CPU power down",
        },
        .states[2]              = {
                .enter                  = mvebu_v7_enter_idle,
-               .exit_latency           = 100,
+               .exit_latency           = 1000,
                .power_usage            = 5,
-               .target_residency       = 1000,
+               .target_residency       = 10000,
                .flags                  = MVEBU_V7_FLAG_DEEP_IDLE,
                .name                   = "MV CPU DEEP IDLE",
                .desc                   = "CPU and L2 Fabric power down",
index 080bd2d..7a73a27 100644 (file)
@@ -330,9 +330,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
        if (!dev->registered)
                return -EINVAL;
 
-       if (!dev->state_count)
-               dev->state_count = drv->state_count;
-
        ret = cpuidle_add_device_sysfs(dev);
        if (ret)
                return ret;
index 2697e87..5db1478 100644 (file)
@@ -13,7 +13,7 @@
 #include <linux/sched.h>
 #include <linux/cpuidle.h>
 #include <linux/cpumask.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 
 #include "cpuidle.h"
 
@@ -130,21 +130,20 @@ static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
 #endif
 
 /**
- * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer
+ * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer on a cpu
  * @arg: a void pointer used to match the SMP cross call API
  *
- * @arg is used as a value of type 'long' with one of the two values:
- * - CLOCK_EVT_NOTIFY_BROADCAST_ON
- * - CLOCK_EVT_NOTIFY_BROADCAST_OFF
+ * If @arg is NULL broadcast is disabled otherwise enabled
  *
- * Set the broadcast timer notification for the current CPU.  This function
- * is executed per CPU by an SMP cross call.  It not supposed to be called
- * directly.
+ * This function is executed per CPU by an SMP cross call.  It's not
+ * supposed to be called directly.
  */
 static void cpuidle_setup_broadcast_timer(void *arg)
 {
-       int cpu = smp_processor_id();
-       clockevents_notify((long)(arg), &cpu);
+       if (arg)
+               tick_broadcast_enable();
+       else
+               tick_broadcast_disable();
 }
 
 /**
@@ -239,7 +238,7 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv)
 
        if (drv->bctimer)
                on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
-                                (void *)CLOCK_EVT_NOTIFY_BROADCAST_ON, 1);
+                                (void *)1, 1);
 
        poll_idle_init(drv);
 
@@ -263,7 +262,7 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
        if (drv->bctimer) {
                drv->bctimer = 0;
                on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
-                                (void *)CLOCK_EVT_NOTIFY_BROADCAST_OFF, 1);
+                                NULL, 1);
        }
 
        __cpuidle_unset_driver(drv);
index 97c5903..832a2c3 100644 (file)
@@ -401,7 +401,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
        struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
 
        /* state statistics */
-       for (i = 0; i < device->state_count; i++) {
+       for (i = 0; i < drv->state_count; i++) {
                kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
                if (!kobj)
                        goto error_state;
@@ -433,9 +433,10 @@ error_state:
  */
 static void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
 {
+       struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
        int i;
 
-       for (i = 0; i < device->state_count; i++)
+       for (i = 0; i < drv->state_count; i++)
                cpuidle_free_state_kobj(device, i);
 }
 
index 4a5fd24..83aa55d 100644 (file)
 
 #define DRIVER_NAME    "pl08xdmac"
 
+#define PL80X_DMA_BUSWIDTHS \
+       BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
+       BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+       BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+       BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)
+
 static struct amba_driver pl08x_amba_driver;
 struct pl08x_driver_data;
 
@@ -2070,6 +2076,10 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
        pl08x->memcpy.device_pause = pl08x_pause;
        pl08x->memcpy.device_resume = pl08x_resume;
        pl08x->memcpy.device_terminate_all = pl08x_terminate_all;
+       pl08x->memcpy.src_addr_widths = PL80X_DMA_BUSWIDTHS;
+       pl08x->memcpy.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
+       pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM);
+       pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
 
        /* Initialize slave engine */
        dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
@@ -2086,6 +2096,10 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
        pl08x->slave.device_pause = pl08x_pause;
        pl08x->slave.device_resume = pl08x_resume;
        pl08x->slave.device_terminate_all = pl08x_terminate_all;
+       pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS;
+       pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
+       pl08x->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+       pl08x->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
 
        /* Get the platform data */
        pl08x->pd = dev_get_platdata(&adev->dev);
index 1e1a4c5..0b4fc6f 100644 (file)
@@ -238,93 +238,126 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
 }
 
 /*
- * atc_get_current_descriptors -
- * locate the descriptor which equal to physical address in DSCR
- * @atchan: the channel we want to start
- * @dscr_addr: physical descriptor address in DSCR
+ * atc_get_desc_by_cookie - get the descriptor of a cookie
+ * @atchan: the DMA channel
+ * @cookie: the cookie to get the descriptor for
  */
-static struct at_desc *atc_get_current_descriptors(struct at_dma_chan *atchan,
-                                                       u32 dscr_addr)
+static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan,
+                                               dma_cookie_t cookie)
 {
-       struct at_desc  *desc, *_desc, *child, *desc_cur = NULL;
+       struct at_desc *desc, *_desc;
 
-       list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) {
-               if (desc->lli.dscr == dscr_addr) {
-                       desc_cur = desc;
-                       break;
-               }
+       list_for_each_entry_safe(desc, _desc, &atchan->queue, desc_node) {
+               if (desc->txd.cookie == cookie)
+                       return desc;
+       }
 
-               list_for_each_entry(child, &desc->tx_list, desc_node) {
-                       if (child->lli.dscr == dscr_addr) {
-                               desc_cur = child;
-                               break;
-                       }
-               }
+       list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) {
+               if (desc->txd.cookie == cookie)
+                       return desc;
        }
 
-       return desc_cur;
+       return NULL;
 }
 
-/*
- * atc_get_bytes_left -
- * Get the number of bytes residue in dma buffer,
- * @chan: the channel we want to start
+/**
+ * atc_calc_bytes_left - calculates the number of bytes left according to the
+ * value read from CTRLA.
+ *
+ * @current_len: the number of bytes left before reading CTRLA
+ * @ctrla: the value of CTRLA
+ * @desc: the descriptor containing the transfer width
+ */
+static inline int atc_calc_bytes_left(int current_len, u32 ctrla,
+                                       struct at_desc *desc)
+{
+       return current_len - ((ctrla & ATC_BTSIZE_MAX) << desc->tx_width);
+}
+
+/**
+ * atc_calc_bytes_left_from_reg - calculates the number of bytes left according
+ * to the current value of CTRLA.
+ *
+ * @current_len: the number of bytes left before reading CTRLA
+ * @atchan: the channel to read CTRLA for
+ * @desc: the descriptor containing the transfer width
+ */
+static inline int atc_calc_bytes_left_from_reg(int current_len,
+                       struct at_dma_chan *atchan, struct at_desc *desc)
+{
+       u32 ctrla = channel_readl(atchan, CTRLA);
+
+       return atc_calc_bytes_left(current_len, ctrla, desc);
+}
+
+/**
+ * atc_get_bytes_left - get the number of bytes residue for a cookie
+ * @chan: DMA channel
+ * @cookie: transaction identifier to check status of
  */
-static int atc_get_bytes_left(struct dma_chan *chan)
+static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
 {
        struct at_dma_chan      *atchan = to_at_dma_chan(chan);
-       struct at_dma           *atdma = to_at_dma(chan->device);
-       int     chan_id = atchan->chan_common.chan_id;
        struct at_desc *desc_first = atc_first_active(atchan);
-       struct at_desc *desc_cur;
-       int ret = 0, count = 0;
+       struct at_desc *desc;
+       int ret;
+       u32 ctrla, dscr;
 
        /*
-        * Initialize necessary values in the first time.
-        * remain_desc record remain desc length.
+        * If the cookie doesn't match to the currently running transfer then
+        * we can return the total length of the associated DMA transfer,
+        * because it is still queued.
         */
-       if (atchan->remain_desc == 0)
-               /* First descriptor embedds the transaction length */
-               atchan->remain_desc = desc_first->len;
+       desc = atc_get_desc_by_cookie(atchan, cookie);
+       if (desc == NULL)
+               return -EINVAL;
+       else if (desc != desc_first)
+               return desc->total_len;
 
-       /*
-        * This happens when current descriptor transfer complete.
-        * The residual buffer size should reduce current descriptor length.
-        */
-       if (unlikely(test_bit(ATC_IS_BTC, &atchan->status))) {
-               clear_bit(ATC_IS_BTC, &atchan->status);
-               desc_cur = atc_get_current_descriptors(atchan,
-                                               channel_readl(atchan, DSCR));
-               if (!desc_cur) {
-                       ret = -EINVAL;
-                       goto out;
-               }
+       /* cookie matches to the currently running transfer */
+       ret = desc_first->total_len;
 
-               count = (desc_cur->lli.ctrla & ATC_BTSIZE_MAX)
-                       << desc_first->tx_width;
-               if (atchan->remain_desc < count) {
-                       ret = -EINVAL;
-                       goto out;
+       if (desc_first->lli.dscr) {
+               /* hardware linked list transfer */
+
+               /*
+                * Calculate the residue by removing the length of the child
+                * descriptors already transferred from the total length.
+                * To get the current child descriptor we can use the value of
+                * the channel's DSCR register and compare it against the value
+                * of the hardware linked list structure of each child
+                * descriptor.
+                */
+
+               ctrla = channel_readl(atchan, CTRLA);
+               rmb(); /* ensure CTRLA is read before DSCR */
+               dscr = channel_readl(atchan, DSCR);
+
+               /* for the first descriptor we can be more accurate */
+               if (desc_first->lli.dscr == dscr)
+                       return atc_calc_bytes_left(ret, ctrla, desc_first);
+
+               ret -= desc_first->len;
+               list_for_each_entry(desc, &desc_first->tx_list, desc_node) {
+                       if (desc->lli.dscr == dscr)
+                               break;
+
+                       ret -= desc->len;
                }
 
-               atchan->remain_desc -= count;
-               ret = atchan->remain_desc;
-       } else {
                /*
-                * Get residual bytes when current
-                * descriptor transfer in progress.
+                * For the last descriptor in the chain we can calculate
+                * the remaining bytes using the channel's register.
+                * Note that the transfer width of the first and last
+                * descriptor may differ.
                 */
-               count = (channel_readl(atchan, CTRLA) & ATC_BTSIZE_MAX)
-                               << (desc_first->tx_width);
-               ret = atchan->remain_desc - count;
+               if (!desc->lli.dscr)
+                       ret = atc_calc_bytes_left_from_reg(ret, atchan, desc);
+       } else {
+               /* single transfer */
+               ret = atc_calc_bytes_left_from_reg(ret, atchan, desc_first);
        }
-       /*
-        * Check fifo empty.
-        */
-       if (!(dma_readl(atdma, CHSR) & AT_DMA_EMPT(chan_id)))
-               atc_issue_pending(chan);
 
-out:
        return ret;
 }
 
@@ -539,8 +572,6 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
                                        /* Give information to tasklet */
                                        set_bit(ATC_IS_ERROR, &atchan->status);
                                }
-                               if (pending & AT_DMA_BTC(i))
-                                       set_bit(ATC_IS_BTC, &atchan->status);
                                tasklet_schedule(&atchan->tasklet);
                                ret = IRQ_HANDLED;
                        }
@@ -653,14 +684,18 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
                desc->lli.ctrlb = ctrlb;
 
                desc->txd.cookie = 0;
+               desc->len = xfer_count << src_width;
 
                atc_desc_chain(&first, &prev, desc);
        }
 
        /* First descriptor of the chain embedds additional information */
        first->txd.cookie = -EBUSY;
-       first->len = len;
+       first->total_len = len;
+
+       /* set transfer width for the calculation of the residue */
        first->tx_width = src_width;
+       prev->tx_width = src_width;
 
        /* set end-of-link to the last link descriptor of list*/
        set_desc_eol(desc);
@@ -752,6 +787,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                        | ATC_SRC_WIDTH(mem_width)
                                        | len >> mem_width;
                        desc->lli.ctrlb = ctrlb;
+                       desc->len = len;
 
                        atc_desc_chain(&first, &prev, desc);
                        total_len += len;
@@ -792,6 +828,7 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
                                        | ATC_DST_WIDTH(mem_width)
                                        | len >> reg_width;
                        desc->lli.ctrlb = ctrlb;
+                       desc->len = len;
 
                        atc_desc_chain(&first, &prev, desc);
                        total_len += len;
@@ -806,8 +843,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
        /* First descriptor of the chain embedds additional information */
        first->txd.cookie = -EBUSY;
-       first->len = total_len;
+       first->total_len = total_len;
+
+       /* set transfer width for the calculation of the residue */
        first->tx_width = reg_width;
+       prev->tx_width = reg_width;
 
        /* first link descriptor of list is responsible of flags */
        first->txd.flags = flags; /* client is in control of this ack */
@@ -872,6 +912,7 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
                                | ATC_FC_MEM2PER
                                | ATC_SIF(atchan->mem_if)
                                | ATC_DIF(atchan->per_if);
+               desc->len = period_len;
                break;
 
        case DMA_DEV_TO_MEM:
@@ -883,6 +924,7 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
                                | ATC_FC_PER2MEM
                                | ATC_SIF(atchan->per_if)
                                | ATC_DIF(atchan->mem_if);
+               desc->len = period_len;
                break;
 
        default:
@@ -964,7 +1006,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 
        /* First descriptor of the chain embedds additional information */
        first->txd.cookie = -EBUSY;
-       first->len = buf_len;
+       first->total_len = buf_len;
        first->tx_width = reg_width;
 
        return &first->txd;
@@ -1118,7 +1160,7 @@ atc_tx_status(struct dma_chan *chan,
        spin_lock_irqsave(&atchan->lock, flags);
 
        /*  Get number of bytes left in the active transactions */
-       bytes = atc_get_bytes_left(chan);
+       bytes = atc_get_bytes_left(chan, cookie);
 
        spin_unlock_irqrestore(&atchan->lock, flags);
 
@@ -1214,7 +1256,6 @@ static int atc_alloc_chan_resources(struct dma_chan *chan)
 
        spin_lock_irqsave(&atchan->lock, flags);
        atchan->descs_allocated = i;
-       atchan->remain_desc = 0;
        list_splice(&tmp_list, &atchan->free_list);
        dma_cookie_init(chan);
        spin_unlock_irqrestore(&atchan->lock, flags);
@@ -1257,7 +1298,6 @@ static void atc_free_chan_resources(struct dma_chan *chan)
        list_splice_init(&atchan->free_list, &list);
        atchan->descs_allocated = 0;
        atchan->status = 0;
-       atchan->remain_desc = 0;
 
        dev_vdbg(chan2dev(chan), "free_chan_resources: done\n");
 }
index d6bba6c..2727ca5 100644 (file)
@@ -181,8 +181,9 @@ struct at_lli {
  * @at_lli: hardware lli structure
  * @txd: support for the async_tx api
  * @desc_node: node on the channed descriptors list
- * @len: total transaction bytecount
+ * @len: descriptor byte count
  * @tx_width: transfer width
+ * @total_len: total transaction byte count
  */
 struct at_desc {
        /* FIRST values the hardware uses */
@@ -194,6 +195,7 @@ struct at_desc {
        struct list_head                desc_node;
        size_t                          len;
        u32                             tx_width;
+       size_t                          total_len;
 };
 
 static inline struct at_desc *
@@ -213,7 +215,6 @@ txd_to_at_desc(struct dma_async_tx_descriptor *txd)
 enum atc_status {
        ATC_IS_ERROR = 0,
        ATC_IS_PAUSED = 1,
-       ATC_IS_BTC = 2,
        ATC_IS_CYCLIC = 24,
 };
 
@@ -231,7 +232,6 @@ enum atc_status {
  * @save_cfg: configuration register that is saved on suspend/resume cycle
  * @save_dscr: for cyclic operations, preserve next descriptor address in
  *             the cyclic list on suspend/resume cycle
- * @remain_desc: to save remain desc length
  * @dma_sconfig: configuration for slave transfers, passed via
  * .device_config
  * @lock: serializes enqueue/dequeue operations to descriptors lists
@@ -251,7 +251,6 @@ struct at_dma_chan {
        struct tasklet_struct   tasklet;
        u32                     save_cfg;
        u32                     save_dscr;
-       u32                     remain_desc;
        struct dma_slave_config dma_sconfig;
 
        spinlock_t              lock;
index 0723096..c92d6a7 100644 (file)
@@ -475,6 +475,7 @@ static int bcm2835_dma_terminate_all(struct dma_chan *chan)
         * c->desc is NULL and exit.)
         */
        if (c->desc) {
+               bcm2835_dma_desc_free(&c->desc->vd);
                c->desc = NULL;
                bcm2835_dma_abort(c->chan_base);
 
index 512cb8e..ceedafb 100644 (file)
@@ -903,6 +903,11 @@ static const struct cppi_glue_infos *get_glue_info(struct device *dev)
        return of_id->data;
 }
 
+#define CPPI41_DMA_BUSWIDTHS   (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+                               BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
+                               BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
+                               BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
 static int cppi41_dma_probe(struct platform_device *pdev)
 {
        struct cppi41_dd *cdd;
@@ -926,6 +931,10 @@ static int cppi41_dma_probe(struct platform_device *pdev)
        cdd->ddev.device_issue_pending = cppi41_dma_issue_pending;
        cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg;
        cdd->ddev.device_terminate_all = cppi41_stop_chan;
+       cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+       cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS;
+       cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS;
+       cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
        cdd->ddev.dev = dev;
        INIT_LIST_HEAD(&cdd->ddev.channels);
        cpp41_dma_info.dma_cap = cdd->ddev.cap_mask;
index 4527a3e..8488441 100644 (file)
@@ -511,6 +511,9 @@ static void jz4740_dma_desc_free(struct virt_dma_desc *vdesc)
        kfree(container_of(vdesc, struct jz4740_dma_desc, vdesc));
 }
 
+#define JZ4740_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
+       BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | BIT(DMA_SLAVE_BUSWIDTH_4_BYTES))
+
 static int jz4740_dma_probe(struct platform_device *pdev)
 {
        struct jz4740_dmaengine_chan *chan;
@@ -548,6 +551,10 @@ static int jz4740_dma_probe(struct platform_device *pdev)
        dd->device_prep_dma_cyclic = jz4740_dma_prep_dma_cyclic;
        dd->device_config = jz4740_dma_slave_config;
        dd->device_terminate_all = jz4740_dma_terminate_all;
+       dd->src_addr_widths = JZ4740_DMA_BUSWIDTHS;
+       dd->dst_addr_widths = JZ4740_DMA_BUSWIDTHS;
+       dd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+       dd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
        dd->dev = &pdev->dev;
        INIT_LIST_HEAD(&dd->channels);
 
index f15712f..ac336a9 100644 (file)
@@ -859,9 +859,6 @@ int dma_async_device_register(struct dma_device *device)
        BUG_ON(!device->device_issue_pending);
        BUG_ON(!device->dev);
 
-       WARN(dma_has_cap(DMA_SLAVE, device->cap_mask) && !device->directions,
-            "this driver doesn't support generic slave capabilities reporting\n");
-
        /* note: this only matters in the
         * CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH=n case
         */
index 6565a36..b2c3ae0 100644 (file)
@@ -26,6 +26,8 @@
 
 #include "internal.h"
 
+#define DRV_NAME       "dw_dmac"
+
 static struct dma_chan *dw_dma_of_xlate(struct of_phandle_args *dma_spec,
                                        struct of_dma *ofdma)
 {
@@ -284,7 +286,7 @@ static struct platform_driver dw_driver = {
        .remove         = dw_remove,
        .shutdown       = dw_shutdown,
        .driver = {
-               .name   = "dw_dmac",
+               .name   = DRV_NAME,
                .pm     = &dw_dev_pm_ops,
                .of_match_table = of_match_ptr(dw_dma_of_id_table),
                .acpi_match_table = ACPI_PTR(dw_dma_acpi_id_table),
@@ -305,3 +307,4 @@ module_exit(dw_exit);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller platform driver");
+MODULE_ALIAS("platform:" DRV_NAME);
index 276157f..53dbd3b 100644 (file)
@@ -260,6 +260,13 @@ static int edma_terminate_all(struct dma_chan *chan)
         */
        if (echan->edesc) {
                int cyclic = echan->edesc->cyclic;
+
+               /*
+                * free the running request descriptor
+                * since it is not in any of the vdesc lists
+                */
+               edma_desc_free(&echan->edesc->vdesc);
+
                echan->edesc = NULL;
                edma_stop(echan->ch_num);
                /* Move the cyclic channel back to default queue */
index 18c0a13..66a0efb 100644 (file)
@@ -531,6 +531,10 @@ static int sdma_run_channel0(struct sdma_engine *sdma)
                dev_err(sdma->dev, "Timeout waiting for CH0 ready\n");
        }
 
+       /* Set bits of CONFIG register with dynamic context switching */
+       if (readl(sdma->regs + SDMA_H_CONFIG) == 0)
+               writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
+
        return ret ? 0 : -ETIMEDOUT;
 }
 
@@ -1394,9 +1398,6 @@ static int sdma_init(struct sdma_engine *sdma)
 
        writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR);
 
-       /* Set bits of CONFIG register with given context switching mode */
-       writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
-
        /* Initializes channel's priorities */
        sdma_set_channel_priority(&sdma->channel[0], 7);
 
index 15cab7d..b463410 100644 (file)
@@ -193,8 +193,10 @@ static int moxart_terminate_all(struct dma_chan *chan)
 
        spin_lock_irqsave(&ch->vc.lock, flags);
 
-       if (ch->desc)
+       if (ch->desc) {
+               moxart_dma_desc_free(&ch->desc->vd);
                ch->desc = NULL;
+       }
 
        ctrl = readl(ch->base + REG_OFF_CTRL);
        ctrl &= ~(APB_DMA_ENABLE | APB_DMA_FIN_INT_EN | APB_DMA_ERR_INT_EN);
index 7dd6dd1..167dbaf 100644 (file)
@@ -981,6 +981,7 @@ static int omap_dma_terminate_all(struct dma_chan *chan)
         * c->desc is NULL and exit.)
         */
        if (c->desc) {
+               omap_dma_desc_free(&c->desc->vd);
                c->desc = NULL;
                /* Avoid stopping the dma twice */
                if (!c->paused)
index 69fac06..6e45a43 100644 (file)
@@ -17,7 +17,9 @@
  */
 static const char dmi_empty_string[] = "        ";
 
-static u16 __initdata dmi_ver;
+static u32 dmi_ver __initdata;
+static u32 dmi_len;
+static u16 dmi_num;
 /*
  * Catch too early calls to dmi_check_system():
  */
@@ -78,7 +80,7 @@ static const char * __init dmi_string(const struct dmi_header *dm, u8 s)
  *     We have to be cautious here. We have seen BIOSes with DMI pointers
  *     pointing to completely the wrong place for example
  */
-static void dmi_table(u8 *buf, u32 len, int num,
+static void dmi_table(u8 *buf,
                      void (*decode)(const struct dmi_header *, void *),
                      void *private_data)
 {
@@ -86,10 +88,13 @@ static void dmi_table(u8 *buf, u32 len, int num,
        int i = 0;
 
        /*
-        *      Stop when we see all the items the table claimed to have
-        *      OR we run off the end of the table (also happens)
+        * Stop when we have seen all the items the table claimed to have
+        * (SMBIOS < 3.0 only) OR we reach an end-of-table marker OR we run
+        * off the end of the table (should never happen but sometimes does
+        * on bogus implementations.)
         */
-       while ((i < num) && (data - buf + sizeof(struct dmi_header)) <= len) {
+       while ((!dmi_num || i < dmi_num) &&
+              (data - buf + sizeof(struct dmi_header)) <= dmi_len) {
                const struct dmi_header *dm = (const struct dmi_header *)data;
 
                /*
@@ -98,9 +103,9 @@ static void dmi_table(u8 *buf, u32 len, int num,
                 *  table in dmi_decode or dmi_string
                 */
                data += dm->length;
-               while ((data - buf < len - 1) && (data[0] || data[1]))
+               while ((data - buf < dmi_len - 1) && (data[0] || data[1]))
                        data++;
-               if (data - buf < len - 1)
+               if (data - buf < dmi_len - 1)
                        decode(dm, private_data);
 
                /*
@@ -115,8 +120,6 @@ static void dmi_table(u8 *buf, u32 len, int num,
 }
 
 static phys_addr_t dmi_base;
-static u32 dmi_len;
-static u16 dmi_num;
 
 static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
                void *))
@@ -127,7 +130,7 @@ static int __init dmi_walk_early(void (*decode)(const struct dmi_header *,
        if (buf == NULL)
                return -1;
 
-       dmi_table(buf, dmi_len, dmi_num, decode, NULL);
+       dmi_table(buf, decode, NULL);
 
        add_device_randomness(buf, dmi_len);
 
@@ -198,7 +201,7 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
         * the UUID are supposed to be little-endian encoded.  The specification
         * says that this is the defacto standard.
         */
-       if (dmi_ver >= 0x0206)
+       if (dmi_ver >= 0x020600)
                sprintf(s, "%pUL", d);
        else
                sprintf(s, "%pUB", d);
@@ -470,7 +473,7 @@ static void __init dmi_format_ids(char *buf, size_t len)
  */
 static int __init dmi_present(const u8 *buf)
 {
-       int smbios_ver;
+       u32 smbios_ver;
 
        if (memcmp(buf, "_SM_", 4) == 0 &&
            buf[5] < 32 && dmi_checksum(buf, buf[5])) {
@@ -503,14 +506,16 @@ static int __init dmi_present(const u8 *buf)
                if (dmi_walk_early(dmi_decode) == 0) {
                        if (smbios_ver) {
                                dmi_ver = smbios_ver;
-                               pr_info("SMBIOS %d.%d present.\n",
-                                      dmi_ver >> 8, dmi_ver & 0xFF);
+                               pr_info("SMBIOS %d.%d%s present.\n",
+                                       dmi_ver >> 8, dmi_ver & 0xFF,
+                                       (dmi_ver < 0x0300) ? "" : ".x");
                        } else {
                                dmi_ver = (buf[14] & 0xF0) << 4 |
                                           (buf[14] & 0x0F);
                                pr_info("Legacy DMI %d.%d present.\n",
                                       dmi_ver >> 8, dmi_ver & 0xFF);
                        }
+                       dmi_ver <<= 8;
                        dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
                        printk(KERN_DEBUG "DMI: %s\n", dmi_ids_string);
                        return 0;
@@ -528,25 +533,16 @@ static int __init dmi_smbios3_present(const u8 *buf)
 {
        if (memcmp(buf, "_SM3_", 5) == 0 &&
            buf[6] < 32 && dmi_checksum(buf, buf[6])) {
-               dmi_ver = get_unaligned_be16(buf + 7);
+               dmi_ver = get_unaligned_be32(buf + 6);
+               dmi_ver &= 0xFFFFFF;
+               dmi_num = 0;                    /* No longer specified */
                dmi_len = get_unaligned_le32(buf + 12);
                dmi_base = get_unaligned_le64(buf + 16);
 
-               /*
-                * The 64-bit SMBIOS 3.0 entry point no longer has a field
-                * containing the number of structures present in the table.
-                * Instead, it defines the table size as a maximum size, and
-                * relies on the end-of-table structure type (#127) to be used
-                * to signal the end of the table.
-                * So let's define dmi_num as an upper bound as well: each
-                * structure has a 4 byte header, so dmi_len / 4 is an upper
-                * bound for the number of structures in the table.
-                */
-               dmi_num = dmi_len / 4;
-
                if (dmi_walk_early(dmi_decode) == 0) {
-                       pr_info("SMBIOS %d.%d present.\n",
-                               dmi_ver >> 8, dmi_ver & 0xFF);
+                       pr_info("SMBIOS %d.%d.%d present.\n",
+                               dmi_ver >> 16, (dmi_ver >> 8) & 0xFF,
+                               dmi_ver & 0xFF);
                        dmi_format_ids(dmi_ids_string, sizeof(dmi_ids_string));
                        pr_debug("DMI: %s\n", dmi_ids_string);
                        return 0;
@@ -901,7 +897,7 @@ int dmi_walk(void (*decode)(const struct dmi_header *, void *),
        if (buf == NULL)
                return -1;
 
-       dmi_table(buf, dmi_len, dmi_num, decode, private_data);
+       dmi_table(buf, decode, private_data);
 
        dmi_unmap(buf);
        return 0;
index dcae482..e29560e 100644 (file)
@@ -175,7 +175,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
        unsigned long initrd_addr;
        u64 initrd_size = 0;
        unsigned long fdt_addr = 0;  /* Original DTB */
-       u64 fdt_size = 0;  /* We don't get size from configuration table */
+       unsigned long fdt_size = 0;
        char *cmdline_ptr = NULL;
        int cmdline_size = 0;
        unsigned long new_fdt_addr;
@@ -239,8 +239,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
        } else {
                status = handle_cmdline_files(sys_table, image, cmdline_ptr,
                                              "dtb=",
-                                             ~0UL, (unsigned long *)&fdt_addr,
-                                             (unsigned long *)&fdt_size);
+                                             ~0UL, &fdt_addr, &fdt_size);
 
                if (status != EFI_SUCCESS) {
                        pr_efi_err(sys_table, "Failed to load device tree!\n");
@@ -252,7 +251,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table,
                pr_efi(sys_table, "Using DTB from command line\n");
        } else {
                /* Look for a device tree configuration table entry. */
-               fdt_addr = (uintptr_t)get_fdt(sys_table);
+               fdt_addr = (uintptr_t)get_fdt(sys_table, &fdt_size);
                if (fdt_addr)
                        pr_efi(sys_table, "Using DTB from configuration table\n");
        }
index 47437b1..e334a01 100644 (file)
@@ -41,7 +41,7 @@ efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table,
                                            unsigned long fdt_addr,
                                            unsigned long fdt_size);
 
-void *get_fdt(efi_system_table_t *sys_table);
+void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size);
 
 void efi_get_virtmap(efi_memory_desc_t *memory_map, unsigned long map_size,
                     unsigned long desc_size, efi_memory_desc_t *runtime_map,
index 91da56c..ef5d764 100644 (file)
@@ -323,7 +323,7 @@ fail:
        return EFI_LOAD_ERROR;
 }
 
-void *get_fdt(efi_system_table_t *sys_table)
+void *get_fdt(efi_system_table_t *sys_table, unsigned long *fdt_size)
 {
        efi_guid_t fdt_guid = DEVICE_TREE_GUID;
        efi_config_table_t *tables;
@@ -336,6 +336,11 @@ void *get_fdt(efi_system_table_t *sys_table)
        for (i = 0; i < sys_table->nr_tables; i++)
                if (efi_guidcmp(tables[i].guid, fdt_guid) == 0) {
                        fdt = (void *) tables[i].table;
+                       if (fdt_check_header(fdt) != 0) {
+                               pr_efi_err(sys_table, "Invalid header detected on UEFI supplied FDT, ignoring ...\n");
+                               return NULL;
+                       }
+                       *fdt_size = fdt_totalsize(fdt);
                        break;
         }
 
index a6952ba..a65b751 100644 (file)
@@ -334,7 +334,7 @@ static struct irq_domain_ops mpc8xxx_gpio_irq_ops = {
        .xlate  = irq_domain_xlate_twocell,
 };
 
-static struct of_device_id mpc8xxx_gpio_ids[] __initdata = {
+static struct of_device_id mpc8xxx_gpio_ids[] = {
        { .compatible = "fsl,mpc8349-gpio", },
        { .compatible = "fsl,mpc8572-gpio", },
        { .compatible = "fsl,mpc8610-gpio", },
index 257e298..045a952 100644 (file)
@@ -219,7 +219,7 @@ static int syscon_gpio_probe(struct platform_device *pdev)
                ret = of_property_read_u32_index(np, "gpio,syscon-dev", 2,
                                                 &priv->dir_reg_offset);
                if (ret)
-                       dev_err(dev, "can't read the dir register offset!\n");
+                       dev_dbg(dev, "can't read the dir register offset!\n");
 
                priv->dir_reg_offset <<= 3;
        }
index c0929d9..df990f2 100644 (file)
@@ -201,6 +201,10 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
        if (!handler)
                return AE_BAD_PARAMETER;
 
+       pin = acpi_gpiochip_pin_to_gpio_offset(chip, pin);
+       if (pin < 0)
+               return AE_BAD_PARAMETER;
+
        desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event");
        if (IS_ERR(desc)) {
                dev_err(chip->dev, "Failed to request GPIO\n");
@@ -551,6 +555,12 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
                struct gpio_desc *desc;
                bool found;
 
+               pin = acpi_gpiochip_pin_to_gpio_offset(chip, pin);
+               if (pin < 0) {
+                       status = AE_BAD_PARAMETER;
+                       goto out;
+               }
+
                mutex_lock(&achip->conn_lock);
 
                found = false;
index 910ff8a..d8135ad 100644 (file)
@@ -645,6 +645,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
        pr_debug("     sdma queue id: %d\n", q->properties.sdma_queue_id);
        pr_debug("     sdma engine id: %d\n", q->properties.sdma_engine_id);
 
+       init_sdma_vm(dqm, q, qpd);
        retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
                                &q->gart_mqd_addr, &q->properties);
        if (retval != 0) {
@@ -652,7 +653,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
                return retval;
        }
 
-       init_sdma_vm(dqm, q, qpd);
+       retval = mqd->load_mqd(mqd, q->mqd, 0,
+                               0, NULL);
+       if (retval != 0) {
+               deallocate_sdma_queue(dqm, q->sdma_id);
+               mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+               return retval;
+       }
+
        return 0;
 }
 
index e415a2a..c7d298e 100644 (file)
@@ -44,7 +44,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
        BUG_ON(!kq || !dev);
        BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ);
 
-       pr_debug("kfd: In func %s initializing queue type %d size %d\n",
+       pr_debug("amdkfd: In func %s initializing queue type %d size %d\n",
                        __func__, KFD_QUEUE_TYPE_HIQ, queue_size);
 
        nop.opcode = IT_NOP;
@@ -69,12 +69,16 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
 
        prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off);
 
-       if (prop.doorbell_ptr == NULL)
+       if (prop.doorbell_ptr == NULL) {
+               pr_err("amdkfd: error init doorbell");
                goto err_get_kernel_doorbell;
+       }
 
        retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
-       if (retval != 0)
+       if (retval != 0) {
+               pr_err("amdkfd: error init pq queues size (%d)\n", queue_size);
                goto err_pq_allocate_vidmem;
+       }
 
        kq->pq_kernel_addr = kq->pq->cpu_ptr;
        kq->pq_gpu_addr = kq->pq->gpu_addr;
@@ -165,10 +169,8 @@ err_rptr_allocate_vidmem:
 err_eop_allocate_vidmem:
        kfd_gtt_sa_free(dev, kq->pq);
 err_pq_allocate_vidmem:
-       pr_err("kfd: error init pq\n");
        kfd_release_kernel_doorbell(dev, prop.doorbell_ptr);
 err_get_kernel_doorbell:
-       pr_err("kfd: error init doorbell");
        return false;
 
 }
@@ -187,6 +189,8 @@ static void uninitialize(struct kernel_queue *kq)
        else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
                kfd_gtt_sa_free(kq->dev, kq->fence_mem_obj);
 
+       kq->mqd->uninit_mqd(kq->mqd, kq->queue->mqd, kq->queue->mqd_mem_obj);
+
        kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
        kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
        kq->ops_asic_specific.uninitialize(kq);
@@ -211,7 +215,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
        queue_address = (unsigned int *)kq->pq_kernel_addr;
        queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t);
 
-       pr_debug("kfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n",
+       pr_debug("amdkfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n",
                        __func__, rptr, wptr, queue_address);
 
        available_size = (rptr - 1 - wptr + queue_size_dwords) %
@@ -296,7 +300,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
        }
 
        if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) {
-               pr_err("kfd: failed to init kernel queue\n");
+               pr_err("amdkfd: failed to init kernel queue\n");
                kfree(kq);
                return NULL;
        }
@@ -319,7 +323,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
 
        BUG_ON(!dev);
 
-       pr_err("kfd: starting kernel queue test\n");
+       pr_err("amdkfd: starting kernel queue test\n");
 
        kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
        BUG_ON(!kq);
@@ -330,7 +334,7 @@ static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
                buffer[i] = kq->nop_packet;
        kq->ops.submit_packet(kq);
 
-       pr_err("kfd: ending kernel queue test\n");
+       pr_err("amdkfd: ending kernel queue test\n");
 }
 
 
index 6b6b07f..b6f076b 100644 (file)
 #include "drm_crtc_internal.h"
 #include "drm_internal.h"
 
-static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev,
-                                                       struct drm_mode_fb_cmd2 *r,
-                                                       struct drm_file *file_priv);
+static struct drm_framebuffer *
+internal_framebuffer_create(struct drm_device *dev,
+                           struct drm_mode_fb_cmd2 *r,
+                           struct drm_file *file_priv);
 
 /* Avoid boilerplate.  I'm tired of typing. */
 #define DRM_ENUM_NAME_FN(fnname, list)                         \
@@ -524,17 +525,6 @@ void drm_framebuffer_reference(struct drm_framebuffer *fb)
 }
 EXPORT_SYMBOL(drm_framebuffer_reference);
 
-static void drm_framebuffer_free_bug(struct kref *kref)
-{
-       BUG();
-}
-
-static void __drm_framebuffer_unreference(struct drm_framebuffer *fb)
-{
-       DRM_DEBUG("%p: FB ID: %d (%d)\n", fb, fb->base.id, atomic_read(&fb->refcount.refcount));
-       kref_put(&fb->refcount, drm_framebuffer_free_bug);
-}
-
 /**
  * drm_framebuffer_unregister_private - unregister a private fb from the lookup idr
  * @fb: fb to unregister
@@ -1319,7 +1309,7 @@ void drm_plane_force_disable(struct drm_plane *plane)
                return;
        }
        /* disconnect the plane from the fb and crtc: */
-       __drm_framebuffer_unreference(plane->old_fb);
+       drm_framebuffer_unreference(plane->old_fb);
        plane->old_fb = NULL;
        plane->fb = NULL;
        plane->crtc = NULL;
@@ -2131,7 +2121,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
        connector = drm_connector_find(dev, out_resp->connector_id);
        if (!connector) {
                ret = -ENOENT;
-               goto out;
+               goto out_unlock;
        }
 
        for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++)
@@ -2211,6 +2201,8 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
 
 out:
        drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+out_unlock:
        mutex_unlock(&dev->mode_config.mutex);
 
        return ret;
@@ -2908,13 +2900,11 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
         */
        if (req->flags & DRM_MODE_CURSOR_BO) {
                if (req->handle) {
-                       fb = add_framebuffer_internal(dev, &fbreq, file_priv);
+                       fb = internal_framebuffer_create(dev, &fbreq, file_priv);
                        if (IS_ERR(fb)) {
                                DRM_DEBUG_KMS("failed to wrap cursor buffer in drm framebuffer\n");
                                return PTR_ERR(fb);
                        }
-
-                       drm_framebuffer_reference(fb);
                } else {
                        fb = NULL;
                }
@@ -3267,9 +3257,10 @@ static int framebuffer_check(const struct drm_mode_fb_cmd2 *r)
        return 0;
 }
 
-static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev,
-                                                       struct drm_mode_fb_cmd2 *r,
-                                                       struct drm_file *file_priv)
+static struct drm_framebuffer *
+internal_framebuffer_create(struct drm_device *dev,
+                           struct drm_mode_fb_cmd2 *r,
+                           struct drm_file *file_priv)
 {
        struct drm_mode_config *config = &dev->mode_config;
        struct drm_framebuffer *fb;
@@ -3301,12 +3292,6 @@ static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev,
                return fb;
        }
 
-       mutex_lock(&file_priv->fbs_lock);
-       r->fb_id = fb->base.id;
-       list_add(&fb->filp_head, &file_priv->fbs);
-       DRM_DEBUG_KMS("[FB:%d]\n", fb->base.id);
-       mutex_unlock(&file_priv->fbs_lock);
-
        return fb;
 }
 
@@ -3328,15 +3313,24 @@ static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev,
 int drm_mode_addfb2(struct drm_device *dev,
                    void *data, struct drm_file *file_priv)
 {
+       struct drm_mode_fb_cmd2 *r = data;
        struct drm_framebuffer *fb;
 
        if (!drm_core_check_feature(dev, DRIVER_MODESET))
                return -EINVAL;
 
-       fb = add_framebuffer_internal(dev, data, file_priv);
+       fb = internal_framebuffer_create(dev, r, file_priv);
        if (IS_ERR(fb))
                return PTR_ERR(fb);
 
+       /* Transfer ownership to the filp for reaping on close */
+
+       DRM_DEBUG_KMS("[FB:%d]\n", fb->base.id);
+       mutex_lock(&file_priv->fbs_lock);
+       r->fb_id = fb->base.id;
+       list_add(&fb->filp_head, &file_priv->fbs);
+       mutex_unlock(&file_priv->fbs_lock);
+
        return 0;
 }
 
index 9a5b687..379ab45 100644 (file)
@@ -733,10 +733,14 @@ static bool check_txmsg_state(struct drm_dp_mst_topology_mgr *mgr,
                              struct drm_dp_sideband_msg_tx *txmsg)
 {
        bool ret;
-       mutex_lock(&mgr->qlock);
+
+       /*
+        * All updates to txmsg->state are protected by mgr->qlock, and the two
+        * cases we check here are terminal states. For those the barriers
+        * provided by the wake_up/wait_event pair are enough.
+        */
        ret = (txmsg->state == DRM_DP_SIDEBAND_TX_RX ||
               txmsg->state == DRM_DP_SIDEBAND_TX_TIMEOUT);
-       mutex_unlock(&mgr->qlock);
        return ret;
 }
 
@@ -1363,12 +1367,13 @@ static int process_single_tx_qlock(struct drm_dp_mst_topology_mgr *mgr,
        return 0;
 }
 
-/* must be called holding qlock */
 static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
 {
        struct drm_dp_sideband_msg_tx *txmsg;
        int ret;
 
+       WARN_ON(!mutex_is_locked(&mgr->qlock));
+
        /* construct a chunk from the first msg in the tx_msg queue */
        if (list_empty(&mgr->tx_msg_downq)) {
                mgr->tx_down_in_progress = false;
index 732cb6f..4c0aa97 100644 (file)
@@ -287,6 +287,7 @@ int drm_load_edid_firmware(struct drm_connector *connector)
 
        drm_mode_connector_update_edid_property(connector, edid);
        ret = drm_add_edid_modes(connector, edid);
+       drm_edid_to_eld(connector, edid);
        kfree(edid);
 
        return ret;
index 7fc6f8b..1134526 100644 (file)
@@ -403,7 +403,7 @@ static int check_free_hole(u64 start, u64 end, u64 size, unsigned alignment)
                unsigned rem;
 
                rem = do_div(tmp, alignment);
-               if (tmp)
+               if (rem)
                        start += alignment - rem;
        }
 
index 6591d48..3fee587 100644 (file)
@@ -174,6 +174,7 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect
                        struct edid *edid = (struct edid *) connector->edid_blob_ptr->data;
 
                        count = drm_add_edid_modes(connector, edid);
+                       drm_edid_to_eld(connector, edid);
                } else
                        count = (*connector_funcs->get_modes)(connector);
        }
index a5e7461..0a67803 100644 (file)
@@ -50,7 +50,7 @@ config DRM_EXYNOS_DSI
 
 config DRM_EXYNOS_DP
        bool "EXYNOS DRM DP driver support"
-       depends on (DRM_EXYNOS_FIMD || DRM_EXYNOS7DECON) && ARCH_EXYNOS && (DRM_PTN3460=n || DRM_PTN3460=y || DRM_PTN3460=DRM_EXYNOS)
+       depends on (DRM_EXYNOS_FIMD || DRM_EXYNOS7_DECON) && ARCH_EXYNOS && (DRM_PTN3460=n || DRM_PTN3460=y || DRM_PTN3460=DRM_EXYNOS)
        default DRM_EXYNOS
        select DRM_PANEL
        help
index 63f02e2..9700461 100644 (file)
@@ -888,8 +888,8 @@ static int decon_probe(struct platform_device *pdev)
        of_node_put(i80_if_timings);
 
        ctx->regs = of_iomap(dev->of_node, 0);
-       if (IS_ERR(ctx->regs)) {
-               ret = PTR_ERR(ctx->regs);
+       if (!ctx->regs) {
+               ret = -ENOMEM;
                goto err_del_component;
        }
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.c b/drivers/gpu/drm/exynos/exynos_drm_connector.c
deleted file mode 100644 (file)
index ba9b3d5..0000000
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Copyright (c) 2011 Samsung Electronics Co., Ltd.
- * Authors:
- *     Inki Dae <inki.dae@samsung.com>
- *     Joonyoung Shim <jy0922.shim@samsung.com>
- *     Seung-Woo Kim <sw0312.kim@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <drm/drmP.h>
-#include <drm/drm_crtc_helper.h>
-
-#include <drm/exynos_drm.h>
-#include "exynos_drm_drv.h"
-#include "exynos_drm_encoder.h"
-#include "exynos_drm_connector.h"
-
-#define to_exynos_connector(x) container_of(x, struct exynos_drm_connector,\
-                               drm_connector)
-
-struct exynos_drm_connector {
-       struct drm_connector            drm_connector;
-       uint32_t                        encoder_id;
-       struct exynos_drm_display       *display;
-};
-
-static int exynos_drm_connector_get_modes(struct drm_connector *connector)
-{
-       struct exynos_drm_connector *exynos_connector =
-                                       to_exynos_connector(connector);
-       struct exynos_drm_display *display = exynos_connector->display;
-       struct edid *edid = NULL;
-       unsigned int count = 0;
-       int ret;
-
-       /*
-        * if get_edid() exists then get_edid() callback of hdmi side
-        * is called to get edid data through i2c interface else
-        * get timing from the FIMD driver(display controller).
-        *
-        * P.S. in case of lcd panel, count is always 1 if success
-        * because lcd panel has only one mode.
-        */
-       if (display->ops->get_edid) {
-               edid = display->ops->get_edid(display, connector);
-               if (IS_ERR_OR_NULL(edid)) {
-                       ret = PTR_ERR(edid);
-                       edid = NULL;
-                       DRM_ERROR("Panel operation get_edid failed %d\n", ret);
-                       goto out;
-               }
-
-               count = drm_add_edid_modes(connector, edid);
-               if (!count) {
-                       DRM_ERROR("Add edid modes failed %d\n", count);
-                       goto out;
-               }
-
-               drm_mode_connector_update_edid_property(connector, edid);
-       } else {
-               struct exynos_drm_panel_info *panel;
-               struct drm_display_mode *mode = drm_mode_create(connector->dev);
-               if (!mode) {
-                       DRM_ERROR("failed to create a new display mode.\n");
-                       return 0;
-               }
-
-               if (display->ops->get_panel)
-                       panel = display->ops->get_panel(display);
-               else {
-                       drm_mode_destroy(connector->dev, mode);
-                       return 0;
-               }
-
-               drm_display_mode_from_videomode(&panel->vm, mode);
-               mode->width_mm = panel->width_mm;
-               mode->height_mm = panel->height_mm;
-               connector->display_info.width_mm = mode->width_mm;
-               connector->display_info.height_mm = mode->height_mm;
-
-               mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
-               drm_mode_set_name(mode);
-               drm_mode_probed_add(connector, mode);
-
-               count = 1;
-       }
-
-out:
-       kfree(edid);
-       return count;
-}
-
-static int exynos_drm_connector_mode_valid(struct drm_connector *connector,
-                                           struct drm_display_mode *mode)
-{
-       struct exynos_drm_connector *exynos_connector =
-                                       to_exynos_connector(connector);
-       struct exynos_drm_display *display = exynos_connector->display;
-       int ret = MODE_BAD;
-
-       DRM_DEBUG_KMS("%s\n", __FILE__);
-
-       if (display->ops->check_mode)
-               if (!display->ops->check_mode(display, mode))
-                       ret = MODE_OK;
-
-       return ret;
-}
-
-static struct drm_encoder *exynos_drm_best_encoder(
-               struct drm_connector *connector)
-{
-       struct drm_device *dev = connector->dev;
-       struct exynos_drm_connector *exynos_connector =
-                                       to_exynos_connector(connector);
-       return drm_encoder_find(dev, exynos_connector->encoder_id);
-}
-
-static struct drm_connector_helper_funcs exynos_connector_helper_funcs = {
-       .get_modes      = exynos_drm_connector_get_modes,
-       .mode_valid     = exynos_drm_connector_mode_valid,
-       .best_encoder   = exynos_drm_best_encoder,
-};
-
-static int exynos_drm_connector_fill_modes(struct drm_connector *connector,
-                               unsigned int max_width, unsigned int max_height)
-{
-       struct exynos_drm_connector *exynos_connector =
-                                       to_exynos_connector(connector);
-       struct exynos_drm_display *display = exynos_connector->display;
-       unsigned int width, height;
-
-       width = max_width;
-       height = max_height;
-
-       /*
-        * if specific driver want to find desired_mode using maxmum
-        * resolution then get max width and height from that driver.
-        */
-       if (display->ops->get_max_resol)
-               display->ops->get_max_resol(display, &width, &height);
-
-       return drm_helper_probe_single_connector_modes(connector, width,
-                                                       height);
-}
-
-/* get detection status of display device. */
-static enum drm_connector_status
-exynos_drm_connector_detect(struct drm_connector *connector, bool force)
-{
-       struct exynos_drm_connector *exynos_connector =
-                                       to_exynos_connector(connector);
-       struct exynos_drm_display *display = exynos_connector->display;
-       enum drm_connector_status status = connector_status_disconnected;
-
-       if (display->ops->is_connected) {
-               if (display->ops->is_connected(display))
-                       status = connector_status_connected;
-               else
-                       status = connector_status_disconnected;
-       }
-
-       return status;
-}
-
-static void exynos_drm_connector_destroy(struct drm_connector *connector)
-{
-       struct exynos_drm_connector *exynos_connector =
-               to_exynos_connector(connector);
-
-       drm_connector_unregister(connector);
-       drm_connector_cleanup(connector);
-       kfree(exynos_connector);
-}
-
-static struct drm_connector_funcs exynos_connector_funcs = {
-       .dpms           = drm_helper_connector_dpms,
-       .fill_modes     = exynos_drm_connector_fill_modes,
-       .detect         = exynos_drm_connector_detect,
-       .destroy        = exynos_drm_connector_destroy,
-};
-
-struct drm_connector *exynos_drm_connector_create(struct drm_device *dev,
-                                                  struct drm_encoder *encoder)
-{
-       struct exynos_drm_connector *exynos_connector;
-       struct exynos_drm_display *display = exynos_drm_get_display(encoder);
-       struct drm_connector *connector;
-       int type;
-       int err;
-
-       exynos_connector = kzalloc(sizeof(*exynos_connector), GFP_KERNEL);
-       if (!exynos_connector)
-               return NULL;
-
-       connector = &exynos_connector->drm_connector;
-
-       switch (display->type) {
-       case EXYNOS_DISPLAY_TYPE_HDMI:
-               type = DRM_MODE_CONNECTOR_HDMIA;
-               connector->interlace_allowed = true;
-               connector->polled = DRM_CONNECTOR_POLL_HPD;
-               break;
-       case EXYNOS_DISPLAY_TYPE_VIDI:
-               type = DRM_MODE_CONNECTOR_VIRTUAL;
-               connector->polled = DRM_CONNECTOR_POLL_HPD;
-               break;
-       default:
-               type = DRM_MODE_CONNECTOR_Unknown;
-               break;
-       }
-
-       drm_connector_init(dev, connector, &exynos_connector_funcs, type);
-       drm_connector_helper_add(connector, &exynos_connector_helper_funcs);
-
-       err = drm_connector_register(connector);
-       if (err)
-               goto err_connector;
-
-       exynos_connector->encoder_id = encoder->base.id;
-       exynos_connector->display = display;
-       connector->dpms = DRM_MODE_DPMS_OFF;
-       connector->encoder = encoder;
-
-       err = drm_mode_connector_attach_encoder(connector, encoder);
-       if (err) {
-               DRM_ERROR("failed to attach a connector to a encoder\n");
-               goto err_sysfs;
-       }
-
-       DRM_DEBUG_KMS("connector has been created\n");
-
-       return connector;
-
-err_sysfs:
-       drm_connector_unregister(connector);
-err_connector:
-       drm_connector_cleanup(connector);
-       kfree(exynos_connector);
-       return NULL;
-}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.h b/drivers/gpu/drm/exynos/exynos_drm_connector.h
deleted file mode 100644 (file)
index 4eb20d7..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- * Copyright (c) 2011 Samsung Electronics Co., Ltd.
- * Authors:
- *     Inki Dae <inki.dae@samsung.com>
- *     Joonyoung Shim <jy0922.shim@samsung.com>
- *     Seung-Woo Kim <sw0312.kim@samsung.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_CONNECTOR_H_
-#define _EXYNOS_DRM_CONNECTOR_H_
-
-struct drm_connector *exynos_drm_connector_create(struct drm_device *dev,
-                                                  struct drm_encoder *encoder);
-
-#endif
index 925fc69..33a10ce 100644 (file)
@@ -147,6 +147,7 @@ struct fimd_win_data {
        unsigned int            ovl_height;
        unsigned int            fb_width;
        unsigned int            fb_height;
+       unsigned int            fb_pitch;
        unsigned int            bpp;
        unsigned int            pixel_format;
        dma_addr_t              dma_addr;
@@ -284,14 +285,9 @@ static void fimd_clear_channel(struct fimd_context *ctx)
        }
 }
 
-static int fimd_ctx_initialize(struct fimd_context *ctx,
+static int fimd_iommu_attach_devices(struct fimd_context *ctx,
                        struct drm_device *drm_dev)
 {
-       struct exynos_drm_private *priv;
-       priv = drm_dev->dev_private;
-
-       ctx->drm_dev = drm_dev;
-       ctx->pipe = priv->pipe++;
 
        /* attach this sub driver to iommu mapping if supported. */
        if (is_drm_iommu_supported(ctx->drm_dev)) {
@@ -313,7 +309,7 @@ static int fimd_ctx_initialize(struct fimd_context *ctx,
        return 0;
 }
 
-static void fimd_ctx_remove(struct fimd_context *ctx)
+static void fimd_iommu_detach_devices(struct fimd_context *ctx)
 {
        /* detach this sub driver from iommu mapping if supported. */
        if (is_drm_iommu_supported(ctx->drm_dev))
@@ -537,13 +533,14 @@ static void fimd_win_mode_set(struct exynos_drm_crtc *crtc,
        win_data->offset_y = plane->crtc_y;
        win_data->ovl_width = plane->crtc_width;
        win_data->ovl_height = plane->crtc_height;
+       win_data->fb_pitch = plane->pitch;
        win_data->fb_width = plane->fb_width;
        win_data->fb_height = plane->fb_height;
        win_data->dma_addr = plane->dma_addr[0] + offset;
        win_data->bpp = plane->bpp;
        win_data->pixel_format = plane->pixel_format;
-       win_data->buf_offsize = (plane->fb_width - plane->crtc_width) *
-                               (plane->bpp >> 3);
+       win_data->buf_offsize =
+               plane->pitch - (plane->crtc_width * (plane->bpp >> 3));
        win_data->line_size = plane->crtc_width * (plane->bpp >> 3);
 
        DRM_DEBUG_KMS("offset_x = %d, offset_y = %d\n",
@@ -709,7 +706,7 @@ static void fimd_win_commit(struct exynos_drm_crtc *crtc, int zpos)
        writel(val, ctx->regs + VIDWx_BUF_START(win, 0));
 
        /* buffer end address */
-       size = win_data->fb_width * win_data->ovl_height * (win_data->bpp >> 3);
+       size = win_data->fb_pitch * win_data->ovl_height * (win_data->bpp >> 3);
        val = (unsigned long)(win_data->dma_addr + size);
        writel(val, ctx->regs + VIDWx_BUF_END(win, 0));
 
@@ -1056,25 +1053,23 @@ static int fimd_bind(struct device *dev, struct device *master, void *data)
 {
        struct fimd_context *ctx = dev_get_drvdata(dev);
        struct drm_device *drm_dev = data;
+       struct exynos_drm_private *priv = drm_dev->dev_private;
        int ret;
 
-       ret = fimd_ctx_initialize(ctx, drm_dev);
-       if (ret) {
-               DRM_ERROR("fimd_ctx_initialize failed.\n");
-               return ret;
-       }
+       ctx->drm_dev = drm_dev;
+       ctx->pipe = priv->pipe++;
 
        ctx->crtc = exynos_drm_crtc_create(drm_dev, ctx->pipe,
                                           EXYNOS_DISPLAY_TYPE_LCD,
                                           &fimd_crtc_ops, ctx);
-       if (IS_ERR(ctx->crtc)) {
-               fimd_ctx_remove(ctx);
-               return PTR_ERR(ctx->crtc);
-       }
 
        if (ctx->display)
                exynos_drm_create_enc_conn(drm_dev, ctx->display);
 
+       ret = fimd_iommu_attach_devices(ctx, drm_dev);
+       if (ret)
+               return ret;
+
        return 0;
 
 }
@@ -1086,10 +1081,10 @@ static void fimd_unbind(struct device *dev, struct device *master,
 
        fimd_dpms(ctx->crtc, DRM_MODE_DPMS_OFF);
 
+       fimd_iommu_detach_devices(ctx);
+
        if (ctx->display)
                exynos_dpi_remove(ctx->display);
-
-       fimd_ctx_remove(ctx);
 }
 
 static const struct component_ops fimd_component_ops = {
index a561687..8ad5b72 100644 (file)
@@ -175,7 +175,7 @@ static int exynos_disable_plane(struct drm_plane *plane)
        struct exynos_drm_plane *exynos_plane = to_exynos_plane(plane);
        struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(plane->crtc);
 
-       if (exynos_crtc->ops->win_disable)
+       if (exynos_crtc && exynos_crtc->ops->win_disable)
                exynos_crtc->ops->win_disable(exynos_crtc,
                                              exynos_plane->zpos);
 
index 3518bc4..2e3bc57 100644 (file)
@@ -55,6 +55,7 @@ struct hdmi_win_data {
        unsigned int            fb_x;
        unsigned int            fb_y;
        unsigned int            fb_width;
+       unsigned int            fb_pitch;
        unsigned int            fb_height;
        unsigned int            src_width;
        unsigned int            src_height;
@@ -438,7 +439,7 @@ static void vp_video_buffer(struct mixer_context *ctx, int win)
        } else {
                luma_addr[0] = win_data->dma_addr;
                chroma_addr[0] = win_data->dma_addr
-                       + (win_data->fb_width * win_data->fb_height);
+                       + (win_data->fb_pitch * win_data->fb_height);
        }
 
        if (win_data->scan_flags & DRM_MODE_FLAG_INTERLACE) {
@@ -447,8 +448,8 @@ static void vp_video_buffer(struct mixer_context *ctx, int win)
                        luma_addr[1] = luma_addr[0] + 0x40;
                        chroma_addr[1] = chroma_addr[0] + 0x40;
                } else {
-                       luma_addr[1] = luma_addr[0] + win_data->fb_width;
-                       chroma_addr[1] = chroma_addr[0] + win_data->fb_width;
+                       luma_addr[1] = luma_addr[0] + win_data->fb_pitch;
+                       chroma_addr[1] = chroma_addr[0] + win_data->fb_pitch;
                }
        } else {
                ctx->interlace = false;
@@ -469,10 +470,10 @@ static void vp_video_buffer(struct mixer_context *ctx, int win)
        vp_reg_writemask(res, VP_MODE, val, VP_MODE_FMT_MASK);
 
        /* setting size of input image */
-       vp_reg_write(res, VP_IMG_SIZE_Y, VP_IMG_HSIZE(win_data->fb_width) |
+       vp_reg_write(res, VP_IMG_SIZE_Y, VP_IMG_HSIZE(win_data->fb_pitch) |
                VP_IMG_VSIZE(win_data->fb_height));
        /* chroma height has to reduced by 2 to avoid chroma distorions */
-       vp_reg_write(res, VP_IMG_SIZE_C, VP_IMG_HSIZE(win_data->fb_width) |
+       vp_reg_write(res, VP_IMG_SIZE_C, VP_IMG_HSIZE(win_data->fb_pitch) |
                VP_IMG_VSIZE(win_data->fb_height / 2));
 
        vp_reg_write(res, VP_SRC_WIDTH, win_data->src_width);
@@ -559,7 +560,7 @@ static void mixer_graph_buffer(struct mixer_context *ctx, int win)
        /* converting dma address base and source offset */
        dma_addr = win_data->dma_addr
                + (win_data->fb_x * win_data->bpp >> 3)
-               + (win_data->fb_y * win_data->fb_width * win_data->bpp >> 3);
+               + (win_data->fb_y * win_data->fb_pitch);
        src_x_offset = 0;
        src_y_offset = 0;
 
@@ -576,7 +577,8 @@ static void mixer_graph_buffer(struct mixer_context *ctx, int win)
                MXR_GRP_CFG_FORMAT_VAL(fmt), MXR_GRP_CFG_FORMAT_MASK);
 
        /* setup geometry */
-       mixer_reg_write(res, MXR_GRAPHIC_SPAN(win), win_data->fb_width);
+       mixer_reg_write(res, MXR_GRAPHIC_SPAN(win),
+                       win_data->fb_pitch / (win_data->bpp >> 3));
 
        /* setup display size */
        if (ctx->mxr_ver == MXR_VER_128_0_0_184 &&
@@ -961,6 +963,7 @@ static void mixer_win_mode_set(struct exynos_drm_crtc *crtc,
        win_data->fb_y = plane->fb_y;
        win_data->fb_width = plane->fb_width;
        win_data->fb_height = plane->fb_height;
+       win_data->fb_pitch = plane->pitch;
        win_data->src_width = plane->src_width;
        win_data->src_height = plane->src_height;
 
index cc6ea53..5c66b56 100644 (file)
@@ -1095,6 +1095,7 @@ static void vlv_save_gunit_s0ix_state(struct drm_i915_private *dev_priv)
        /* Gunit-Display CZ domain, 0x182028-0x1821CF */
        s->gu_ctl0              = I915_READ(VLV_GU_CTL0);
        s->gu_ctl1              = I915_READ(VLV_GU_CTL1);
+       s->pcbr                 = I915_READ(VLV_PCBR);
        s->clock_gate_dis2      = I915_READ(VLV_GUNIT_CLOCK_GATE2);
 
        /*
@@ -1189,6 +1190,7 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv)
        /* Gunit-Display CZ domain, 0x182028-0x1821CF */
        I915_WRITE(VLV_GU_CTL0,                 s->gu_ctl0);
        I915_WRITE(VLV_GU_CTL1,                 s->gu_ctl1);
+       I915_WRITE(VLV_PCBR,                    s->pcbr);
        I915_WRITE(VLV_GUNIT_CLOCK_GATE2,       s->clock_gate_dis2);
 }
 
@@ -1197,19 +1199,7 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on)
        u32 val;
        int err;
 
-       val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
-       WARN_ON(!!(val & VLV_GFX_CLK_FORCE_ON_BIT) == force_on);
-
 #define COND (I915_READ(VLV_GTLC_SURVIVABILITY_REG) & VLV_GFX_CLK_STATUS_BIT)
-       /* Wait for a previous force-off to settle */
-       if (force_on) {
-               err = wait_for(!COND, 20);
-               if (err) {
-                       DRM_ERROR("timeout waiting for GFX clock force-off (%08x)\n",
-                                 I915_READ(VLV_GTLC_SURVIVABILITY_REG));
-                       return err;
-               }
-       }
 
        val = I915_READ(VLV_GTLC_SURVIVABILITY_REG);
        val &= ~VLV_GFX_CLK_FORCE_ON_BIT;
index 8727086..b4faa2d 100644 (file)
@@ -1094,6 +1094,7 @@ struct vlv_s0ix_state {
        /* Display 2 CZ domain */
        u32 gu_ctl0;
        u32 gu_ctl1;
+       u32 pcbr;
        u32 clock_gate_dis2;
 };
 
index e5daad5..27ea6bd 100644 (file)
@@ -2737,24 +2737,11 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 
        WARN_ON(i915_verify_lists(ring->dev));
 
-       /* Move any buffers on the active list that are no longer referenced
-        * by the ringbuffer to the flushing/inactive lists as appropriate,
-        * before we free the context associated with the requests.
+       /* Retire requests first as we use it above for the early return.
+        * If we retire requests last, we may use a later seqno and so clear
+        * the requests lists without clearing the active list, leading to
+        * confusion.
         */
-       while (!list_empty(&ring->active_list)) {
-               struct drm_i915_gem_object *obj;
-
-               obj = list_first_entry(&ring->active_list,
-                                     struct drm_i915_gem_object,
-                                     ring_list);
-
-               if (!i915_gem_request_completed(obj->last_read_req, true))
-                       break;
-
-               i915_gem_object_move_to_inactive(obj);
-       }
-
-
        while (!list_empty(&ring->request_list)) {
                struct drm_i915_gem_request *request;
                struct intel_ringbuffer *ringbuf;
@@ -2789,6 +2776,23 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
                i915_gem_free_request(request);
        }
 
+       /* Move any buffers on the active list that are no longer referenced
+        * by the ringbuffer to the flushing/inactive lists as appropriate,
+        * before we free the context associated with the requests.
+        */
+       while (!list_empty(&ring->active_list)) {
+               struct drm_i915_gem_object *obj;
+
+               obj = list_first_entry(&ring->active_list,
+                                     struct drm_i915_gem_object,
+                                     ring_list);
+
+               if (!i915_gem_request_completed(obj->last_read_req, true))
+                       break;
+
+               i915_gem_object_move_to_inactive(obj);
+       }
+
        if (unlikely(ring->trace_irq_req &&
                     i915_gem_request_completed(ring->trace_irq_req, true))) {
                ring->irq_put(ring);
@@ -2936,9 +2940,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
        req = obj->last_read_req;
 
        /* Do this after OLR check to make sure we make forward progress polling
-        * on this IOCTL with a timeout <=0 (like busy ioctl)
+        * on this IOCTL with a timeout == 0 (like busy ioctl)
         */
-       if (args->timeout_ns <= 0) {
+       if (args->timeout_ns == 0) {
                ret = -ETIME;
                goto out;
        }
@@ -2948,7 +2952,8 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
        i915_gem_request_reference(req);
        mutex_unlock(&dev->struct_mutex);
 
-       ret = __i915_wait_request(req, reset_counter, true, &args->timeout_ns,
+       ret = __i915_wait_request(req, reset_counter, true,
+                                 args->timeout_ns > 0 ? &args->timeout_ns : NULL,
                                  file->driver_priv);
        mutex_lock(&dev->struct_mutex);
        i915_gem_request_unreference(req);
@@ -4792,6 +4797,9 @@ i915_gem_init_hw(struct drm_device *dev)
        if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
                return -EIO;
 
+       /* Double layer security blanket, see i915_gem_init() */
+       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
        if (dev_priv->ellc_size)
                I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
 
@@ -4824,7 +4832,7 @@ i915_gem_init_hw(struct drm_device *dev)
        for_each_ring(ring, dev_priv, i) {
                ret = ring->init_hw(ring);
                if (ret)
-                       return ret;
+                       goto out;
        }
 
        for (i = 0; i < NUM_L3_SLICES(dev); i++)
@@ -4841,9 +4849,11 @@ i915_gem_init_hw(struct drm_device *dev)
                DRM_ERROR("Context enable failed %d\n", ret);
                i915_gem_cleanup_ringbuffer(dev);
 
-               return ret;
+               goto out;
        }
 
+out:
+       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
        return ret;
 }
 
@@ -4877,6 +4887,14 @@ int i915_gem_init(struct drm_device *dev)
                dev_priv->gt.stop_ring = intel_logical_ring_stop;
        }
 
+       /* This is just a security blanket to placate dragons.
+        * On some systems, we very sporadically observe that the first TLBs
+        * used by the CS may be stale, despite us poking the TLB reset. If
+        * we hold the forcewake during initialisation these problems
+        * just magically go away.
+        */
+       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+
        ret = i915_gem_init_userptr(dev);
        if (ret)
                goto out_unlock;
@@ -4903,6 +4921,7 @@ int i915_gem_init(struct drm_device *dev)
        }
 
 out_unlock:
+       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
        mutex_unlock(&dev->struct_mutex);
 
        return ret;
index b773368..38a7425 100644 (file)
@@ -1487,7 +1487,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
                goto err;
        }
 
-       if (i915_needs_cmd_parser(ring)) {
+       if (i915_needs_cmd_parser(ring) && args->batch_len) {
                batch_obj = i915_gem_execbuffer_parse(ring,
                                                      &shadow_exec_entry,
                                                      eb,
index e730789..f75173c 100644 (file)
@@ -37,6 +37,7 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_crtc_helper.h>
@@ -2416,6 +2417,14 @@ out_unref_obj:
        return false;
 }
 
+/* Update plane->state->fb to match plane->fb after driver-internal updates */
+static void
+update_state_fb(struct drm_plane *plane)
+{
+       if (plane->fb != plane->state->fb)
+               drm_atomic_set_fb_for_plane(plane->state, plane->fb);
+}
+
 static void
 intel_find_plane_obj(struct intel_crtc *intel_crtc,
                     struct intel_initial_plane_config *plane_config)
@@ -2429,8 +2438,15 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc,
        if (!intel_crtc->base.primary->fb)
                return;
 
-       if (intel_alloc_plane_obj(intel_crtc, plane_config))
+       if (intel_alloc_plane_obj(intel_crtc, plane_config)) {
+               struct drm_plane *primary = intel_crtc->base.primary;
+
+               primary->state->crtc = &intel_crtc->base;
+               primary->crtc = &intel_crtc->base;
+               update_state_fb(primary);
+
                return;
+       }
 
        kfree(intel_crtc->base.primary->fb);
        intel_crtc->base.primary->fb = NULL;
@@ -2453,15 +2469,21 @@ intel_find_plane_obj(struct intel_crtc *intel_crtc,
                        continue;
 
                if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) {
+                       struct drm_plane *primary = intel_crtc->base.primary;
+
                        if (obj->tiling_mode != I915_TILING_NONE)
                                dev_priv->preserve_bios_swizzle = true;
 
                        drm_framebuffer_reference(c->primary->fb);
-                       intel_crtc->base.primary->fb = c->primary->fb;
+                       primary->fb = c->primary->fb;
+                       primary->state->crtc = &intel_crtc->base;
+                       primary->crtc = &intel_crtc->base;
                        obj->frontbuffer_bits |= INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe);
                        break;
                }
        }
+
+       update_state_fb(intel_crtc->base.primary);
 }
 
 static void i9xx_update_primary_plane(struct drm_crtc *crtc,
@@ -6602,6 +6624,10 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
        struct drm_framebuffer *fb;
        struct intel_framebuffer *intel_fb;
 
+       val = I915_READ(DSPCNTR(plane));
+       if (!(val & DISPLAY_PLANE_ENABLE))
+               return;
+
        intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
        if (!intel_fb) {
                DRM_DEBUG_KMS("failed to alloc fb\n");
@@ -6610,8 +6636,6 @@ i9xx_get_initial_plane_config(struct intel_crtc *crtc,
 
        fb = &intel_fb->base;
 
-       val = I915_READ(DSPCNTR(plane));
-
        if (INTEL_INFO(dev)->gen >= 4)
                if (val & DISPPLANE_TILED)
                        plane_config->tiling = I915_TILING_X;
@@ -7643,6 +7667,9 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
        fb = &intel_fb->base;
 
        val = I915_READ(PLANE_CTL(pipe, 0));
+       if (!(val & PLANE_CTL_ENABLE))
+               goto error;
+
        if (val & PLANE_CTL_TILED_MASK)
                plane_config->tiling = I915_TILING_X;
 
@@ -7730,6 +7757,10 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc,
        struct drm_framebuffer *fb;
        struct intel_framebuffer *intel_fb;
 
+       val = I915_READ(DSPCNTR(pipe));
+       if (!(val & DISPLAY_PLANE_ENABLE))
+               return;
+
        intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
        if (!intel_fb) {
                DRM_DEBUG_KMS("failed to alloc fb\n");
@@ -7738,8 +7769,6 @@ ironlake_get_initial_plane_config(struct intel_crtc *crtc,
 
        fb = &intel_fb->base;
 
-       val = I915_READ(DSPCNTR(pipe));
-
        if (INTEL_INFO(dev)->gen >= 4)
                if (val & DISPPLANE_TILED)
                        plane_config->tiling = I915_TILING_X;
@@ -9716,7 +9745,7 @@ void intel_check_page_flip(struct drm_device *dev, int pipe)
        struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe];
        struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
-       WARN_ON(!in_irq());
+       WARN_ON(!in_interrupt());
 
        if (crtc == NULL)
                return;
@@ -9816,6 +9845,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
        drm_gem_object_reference(&obj->base);
 
        crtc->primary->fb = fb;
+       update_state_fb(crtc->primary);
 
        work->pending_flip_obj = obj;
 
@@ -9884,6 +9914,7 @@ cleanup_unpin:
 cleanup_pending:
        atomic_dec(&intel_crtc->unpin_work_count);
        crtc->primary->fb = old_fb;
+       update_state_fb(crtc->primary);
        drm_gem_object_unreference(&work->old_fb_obj->base);
        drm_gem_object_unreference(&obj->base);
        mutex_unlock(&dev->struct_mutex);
@@ -13718,6 +13749,7 @@ void intel_modeset_gem_init(struct drm_device *dev)
                                  to_intel_crtc(c)->pipe);
                        drm_framebuffer_unreference(c->primary->fb);
                        c->primary->fb = NULL;
+                       update_state_fb(c->primary);
                }
        }
        mutex_unlock(&dev->struct_mutex);
index 0a52c44..9c5451c 100644 (file)
@@ -1322,7 +1322,7 @@ int intel_sprite_set_colorkey(struct drm_device *dev, void *data,
        drm_modeset_lock_all(dev);
 
        plane = drm_plane_find(dev, set->plane_id);
-       if (!plane) {
+       if (!plane || plane->type != DRM_PLANE_TYPE_OVERLAY) {
                ret = -ENOENT;
                goto out_unlock;
        }
@@ -1349,7 +1349,7 @@ int intel_sprite_get_colorkey(struct drm_device *dev, void *data,
        drm_modeset_lock_all(dev);
 
        plane = drm_plane_find(dev, get->plane_id);
-       if (!plane) {
+       if (!plane || plane->type != DRM_PLANE_TYPE_OVERLAY) {
                ret = -ENOENT;
                goto out_unlock;
        }
index c47a3ba..4e8fb89 100644 (file)
@@ -1048,8 +1048,14 @@ static void intel_uncore_fw_domains_init(struct drm_device *dev)
 
                /* We need to init first for ECOBUS access and then
                 * determine later if we want to reinit, in case of MT access is
-                * not working
+                * not working. In this stage we don't know which flavour this
+                * ivb is, so it is better to reset also the gen6 fw registers
+                * before the ecobus check.
                 */
+
+               __raw_i915_write32(dev_priv, FORCEWAKE, 0);
+               __raw_posting_read(dev_priv, ECOBUS);
+
                fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER,
                               FORCEWAKE_MT, FORCEWAKE_MT_ACK);
 
index 29bd539..6efa8f3 100644 (file)
@@ -340,11 +340,13 @@ nvkm_devobj_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
 
                /* switch mmio to cpu's native endianness */
 #ifndef __BIG_ENDIAN
-               if (ioread32_native(map + 0x000004) != 0x00000000)
+               if (ioread32_native(map + 0x000004) != 0x00000000) {
 #else
-               if (ioread32_native(map + 0x000004) == 0x00000000)
+               if (ioread32_native(map + 0x000004) == 0x00000000) {
 #endif
                        iowrite32_native(0x01000001, map + 0x000004);
+                       ioread32_native(map);
+               }
 
                /* read boot0 and strapping information */
                boot0 = ioread32_native(map + 0x000000);
index 539561e..108d048 100644 (file)
@@ -140,6 +140,49 @@ gm100_identify(struct nvkm_device *device)
                device->oclass[NVDEV_ENGINE_MSVLD  ] = &gk104_msvld_oclass;
                device->oclass[NVDEV_ENGINE_MSPDEC ] = &gk104_mspdec_oclass;
                device->oclass[NVDEV_ENGINE_MSPPP  ] = &gf100_msppp_oclass;
+#endif
+               break;
+       case 0x126:
+               device->cname = "GM206";
+               device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nvkm_bios_oclass;
+               device->oclass[NVDEV_SUBDEV_GPIO   ] =  gk104_gpio_oclass;
+               device->oclass[NVDEV_SUBDEV_I2C    ] =  gm204_i2c_oclass;
+               device->oclass[NVDEV_SUBDEV_FUSE   ] = &gm107_fuse_oclass;
+#if 0
+               /* looks to be some non-trivial changes */
+               device->oclass[NVDEV_SUBDEV_CLK    ] = &gk104_clk_oclass;
+               /* priv ring says no to 0x10eb14 writes */
+               device->oclass[NVDEV_SUBDEV_THERM  ] = &gm107_therm_oclass;
+#endif
+               device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
+               device->oclass[NVDEV_SUBDEV_DEVINIT] =  gm204_devinit_oclass;
+               device->oclass[NVDEV_SUBDEV_MC     ] =  gk20a_mc_oclass;
+               device->oclass[NVDEV_SUBDEV_BUS    ] =  gf100_bus_oclass;
+               device->oclass[NVDEV_SUBDEV_TIMER  ] = &gk20a_timer_oclass;
+               device->oclass[NVDEV_SUBDEV_FB     ] =  gm107_fb_oclass;
+               device->oclass[NVDEV_SUBDEV_LTC    ] =  gm107_ltc_oclass;
+               device->oclass[NVDEV_SUBDEV_IBUS   ] = &gk104_ibus_oclass;
+               device->oclass[NVDEV_SUBDEV_INSTMEM] =  nv50_instmem_oclass;
+               device->oclass[NVDEV_SUBDEV_MMU    ] = &gf100_mmu_oclass;
+               device->oclass[NVDEV_SUBDEV_BAR    ] = &gf100_bar_oclass;
+               device->oclass[NVDEV_SUBDEV_PMU    ] =  gk208_pmu_oclass;
+#if 0
+               device->oclass[NVDEV_SUBDEV_VOLT   ] = &nv40_volt_oclass;
+#endif
+               device->oclass[NVDEV_ENGINE_DMAOBJ ] =  gf110_dmaeng_oclass;
+#if 0
+               device->oclass[NVDEV_ENGINE_FIFO   ] =  gk208_fifo_oclass;
+               device->oclass[NVDEV_ENGINE_SW     ] =  gf100_sw_oclass;
+               device->oclass[NVDEV_ENGINE_GR     ] =  gm107_gr_oclass;
+#endif
+               device->oclass[NVDEV_ENGINE_DISP   ] =  gm204_disp_oclass;
+#if 0
+               device->oclass[NVDEV_ENGINE_CE0    ] = &gm204_ce0_oclass;
+               device->oclass[NVDEV_ENGINE_CE1    ] = &gm204_ce1_oclass;
+               device->oclass[NVDEV_ENGINE_CE2    ] = &gm204_ce2_oclass;
+               device->oclass[NVDEV_ENGINE_MSVLD  ] = &gk104_msvld_oclass;
+               device->oclass[NVDEV_ENGINE_MSPDEC ] = &gk104_mspdec_oclass;
+               device->oclass[NVDEV_ENGINE_MSPPP  ] = &gf100_msppp_oclass;
 #endif
                break;
        default:
index b038b6e..043e429 100644 (file)
@@ -502,72 +502,57 @@ nv04_fifo_intr(struct nvkm_subdev *subdev)
 {
        struct nvkm_device *device = nv_device(subdev);
        struct nv04_fifo_priv *priv = (void *)subdev;
-       uint32_t status, reassign;
-       int cnt = 0;
+       u32 mask = nv_rd32(priv, NV03_PFIFO_INTR_EN_0);
+       u32 stat = nv_rd32(priv, NV03_PFIFO_INTR_0) & mask;
+       u32 reassign, chid, get, sem;
 
        reassign = nv_rd32(priv, NV03_PFIFO_CACHES) & 1;
-       while ((status = nv_rd32(priv, NV03_PFIFO_INTR_0)) && (cnt++ < 100)) {
-               uint32_t chid, get;
-
-               nv_wr32(priv, NV03_PFIFO_CACHES, 0);
-
-               chid = nv_rd32(priv, NV03_PFIFO_CACHE1_PUSH1) & priv->base.max;
-               get  = nv_rd32(priv, NV03_PFIFO_CACHE1_GET);
+       nv_wr32(priv, NV03_PFIFO_CACHES, 0);
 
-               if (status & NV_PFIFO_INTR_CACHE_ERROR) {
-                       nv04_fifo_cache_error(device, priv, chid, get);
-                       status &= ~NV_PFIFO_INTR_CACHE_ERROR;
-               }
+       chid = nv_rd32(priv, NV03_PFIFO_CACHE1_PUSH1) & priv->base.max;
+       get  = nv_rd32(priv, NV03_PFIFO_CACHE1_GET);
 
-               if (status & NV_PFIFO_INTR_DMA_PUSHER) {
-                       nv04_fifo_dma_pusher(device, priv, chid);
-                       status &= ~NV_PFIFO_INTR_DMA_PUSHER;
-               }
+       if (stat & NV_PFIFO_INTR_CACHE_ERROR) {
+               nv04_fifo_cache_error(device, priv, chid, get);
+               stat &= ~NV_PFIFO_INTR_CACHE_ERROR;
+       }
 
-               if (status & NV_PFIFO_INTR_SEMAPHORE) {
-                       uint32_t sem;
+       if (stat & NV_PFIFO_INTR_DMA_PUSHER) {
+               nv04_fifo_dma_pusher(device, priv, chid);
+               stat &= ~NV_PFIFO_INTR_DMA_PUSHER;
+       }
 
-                       status &= ~NV_PFIFO_INTR_SEMAPHORE;
-                       nv_wr32(priv, NV03_PFIFO_INTR_0,
-                               NV_PFIFO_INTR_SEMAPHORE);
+       if (stat & NV_PFIFO_INTR_SEMAPHORE) {
+               stat &= ~NV_PFIFO_INTR_SEMAPHORE;
+               nv_wr32(priv, NV03_PFIFO_INTR_0, NV_PFIFO_INTR_SEMAPHORE);
 
-                       sem = nv_rd32(priv, NV10_PFIFO_CACHE1_SEMAPHORE);
-                       nv_wr32(priv, NV10_PFIFO_CACHE1_SEMAPHORE, sem | 0x1);
+               sem = nv_rd32(priv, NV10_PFIFO_CACHE1_SEMAPHORE);
+               nv_wr32(priv, NV10_PFIFO_CACHE1_SEMAPHORE, sem | 0x1);
 
-                       nv_wr32(priv, NV03_PFIFO_CACHE1_GET, get + 4);
-                       nv_wr32(priv, NV04_PFIFO_CACHE1_PULL0, 1);
-               }
+               nv_wr32(priv, NV03_PFIFO_CACHE1_GET, get + 4);
+               nv_wr32(priv, NV04_PFIFO_CACHE1_PULL0, 1);
+       }
 
-               if (device->card_type == NV_50) {
-                       if (status & 0x00000010) {
-                               status &= ~0x00000010;
-                               nv_wr32(priv, 0x002100, 0x00000010);
-                       }
-
-                       if (status & 0x40000000) {
-                               nv_wr32(priv, 0x002100, 0x40000000);
-                               nvkm_fifo_uevent(&priv->base);
-                               status &= ~0x40000000;
-                       }
+       if (device->card_type == NV_50) {
+               if (stat & 0x00000010) {
+                       stat &= ~0x00000010;
+                       nv_wr32(priv, 0x002100, 0x00000010);
                }
 
-               if (status) {
-                       nv_warn(priv, "unknown intr 0x%08x, ch %d\n",
-                               status, chid);
-                       nv_wr32(priv, NV03_PFIFO_INTR_0, status);
-                       status = 0;
+               if (stat & 0x40000000) {
+                       nv_wr32(priv, 0x002100, 0x40000000);
+                       nvkm_fifo_uevent(&priv->base);
+                       stat &= ~0x40000000;
                }
-
-               nv_wr32(priv, NV03_PFIFO_CACHES, reassign);
        }
 
-       if (status) {
-               nv_error(priv, "still angry after %d spins, halt\n", cnt);
-               nv_wr32(priv, 0x002140, 0);
-               nv_wr32(priv, 0x000140, 0);
+       if (stat) {
+               nv_warn(priv, "unknown intr 0x%08x\n", stat);
+               nv_mask(priv, NV03_PFIFO_INTR_EN_0, stat, 0x00000000);
+               nv_wr32(priv, NV03_PFIFO_INTR_0, stat);
        }
 
-       nv_wr32(priv, 0x000100, 0x00000100);
+       nv_wr32(priv, NV03_PFIFO_CACHES, reassign);
 }
 
 static int
index 2e7ec38..57e2c5b 100644 (file)
@@ -1032,9 +1032,9 @@ gf100_grctx_generate_bundle(struct gf100_grctx *info)
        const int s = 8;
        const int b = mmio_vram(info, impl->bundle_size, (1 << s), access);
        mmio_refn(info, 0x408004, 0x00000000, s, b);
-       mmio_refn(info, 0x408008, 0x80000000 | (impl->bundle_size >> s), 0, b);
+       mmio_wr32(info, 0x408008, 0x80000000 | (impl->bundle_size >> s));
        mmio_refn(info, 0x418808, 0x00000000, s, b);
-       mmio_refn(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s), 0, b);
+       mmio_wr32(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s));
 }
 
 void
index b52300d..5e9454b 100644 (file)
@@ -851,9 +851,9 @@ gk104_grctx_generate_bundle(struct gf100_grctx *info)
        const int s = 8;
        const int b = mmio_vram(info, impl->bundle_size, (1 << s), access);
        mmio_refn(info, 0x408004, 0x00000000, s, b);
-       mmio_refn(info, 0x408008, 0x80000000 | (impl->bundle_size >> s), 0, b);
+       mmio_wr32(info, 0x408008, 0x80000000 | (impl->bundle_size >> s));
        mmio_refn(info, 0x418808, 0x00000000, s, b);
-       mmio_refn(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s), 0, b);
+       mmio_wr32(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s));
        mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit);
 }
 
index 956f4dc..b2fae6e 100644 (file)
@@ -871,9 +871,9 @@ gm107_grctx_generate_bundle(struct gf100_grctx *info)
        const int s = 8;
        const int b = mmio_vram(info, impl->bundle_size, (1 << s), access);
        mmio_refn(info, 0x408004, 0x00000000, s, b);
-       mmio_refn(info, 0x408008, 0x80000000 | (impl->bundle_size >> s), 0, b);
+       mmio_wr32(info, 0x408008, 0x80000000 | (impl->bundle_size >> s));
        mmio_refn(info, 0x418e24, 0x00000000, s, b);
-       mmio_refn(info, 0x418e28, 0x80000000 | (impl->bundle_size >> s), 0, b);
+       mmio_wr32(info, 0x418e28, 0x80000000 | (impl->bundle_size >> s));
        mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit);
 }
 
index d1a89b2..c4e1f08 100644 (file)
@@ -74,7 +74,11 @@ dcb_i2c_parse(struct nvkm_bios *bios, u8 idx, struct dcb_i2c_entry *info)
        u16 ent = dcb_i2c_entry(bios, idx, &ver, &len);
        if (ent) {
                if (ver >= 0x41) {
-                       if (!(nv_ro32(bios, ent) & 0x80000000))
+                       u32 ent_value = nv_ro32(bios, ent);
+                       u8 i2c_port = (ent_value >> 27) & 0x1f;
+                       u8 dpaux_port = (ent_value >> 22) & 0x1f;
+                       /* value 0x1f means unused according to DCB 4.x spec */
+                       if (i2c_port == 0x1f && dpaux_port == 0x1f)
                                info->type = DCB_I2C_UNUSED;
                        else
                                info->type = DCB_I2C_PMGR;
index c648e19..243a36c 100644 (file)
 #define VCE_UENC_REG_CLOCK_GATING      0x207c0
 #define VCE_SYS_INT_EN                 0x21300
 #      define VCE_SYS_INT_TRAP_INTERRUPT_EN    (1 << 3)
+#define VCE_LMI_VCPU_CACHE_40BIT_BAR   0x2145c
 #define VCE_LMI_CTRL2                  0x21474
 #define VCE_LMI_CTRL                   0x21498
 #define VCE_LMI_VM_CTRL                        0x214a0
index 5587603..33d5a4f 100644 (file)
@@ -1565,6 +1565,7 @@ struct radeon_dpm {
        int                     new_active_crtc_count;
        u32                     current_active_crtcs;
        int                     current_active_crtc_count;
+       bool single_display;
        struct radeon_dpm_dynamic_state dyn_state;
        struct radeon_dpm_fan fan;
        u32 tdp_limit;
index 63ccb8f..d27e4cc 100644 (file)
@@ -76,7 +76,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev)
 
 static bool radeon_read_bios(struct radeon_device *rdev)
 {
-       uint8_t __iomem *bios;
+       uint8_t __iomem *bios, val1, val2;
        size_t size;
 
        rdev->bios = NULL;
@@ -86,15 +86,19 @@ static bool radeon_read_bios(struct radeon_device *rdev)
                return false;
        }
 
-       if (size == 0 || bios[0] != 0x55 || bios[1] != 0xaa) {
+       val1 = readb(&bios[0]);
+       val2 = readb(&bios[1]);
+
+       if (size == 0 || val1 != 0x55 || val2 != 0xaa) {
                pci_unmap_rom(rdev->pdev, bios);
                return false;
        }
-       rdev->bios = kmemdup(bios, size, GFP_KERNEL);
+       rdev->bios = kzalloc(size, GFP_KERNEL);
        if (rdev->bios == NULL) {
                pci_unmap_rom(rdev->pdev, bios);
                return false;
        }
+       memcpy_fromio(rdev->bios, bios, size);
        pci_unmap_rom(rdev->pdev, bios);
        return true;
 }
index d13d1b5..df09ca7 100644 (file)
@@ -1030,37 +1030,59 @@ static inline bool radeon_test_signaled(struct radeon_fence *fence)
        return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
 }
 
+struct radeon_wait_cb {
+       struct fence_cb base;
+       struct task_struct *task;
+};
+
+static void
+radeon_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
+{
+       struct radeon_wait_cb *wait =
+               container_of(cb, struct radeon_wait_cb, base);
+
+       wake_up_process(wait->task);
+}
+
 static signed long radeon_fence_default_wait(struct fence *f, bool intr,
                                             signed long t)
 {
        struct radeon_fence *fence = to_radeon_fence(f);
        struct radeon_device *rdev = fence->rdev;
-       bool signaled;
+       struct radeon_wait_cb cb;
 
-       fence_enable_sw_signaling(&fence->base);
+       cb.task = current;
 
-       /*
-        * This function has to return -EDEADLK, but cannot hold
-        * exclusive_lock during the wait because some callers
-        * may already hold it. This means checking needs_reset without
-        * lock, and not fiddling with any gpu internals.
-        *
-        * The callback installed with fence_enable_sw_signaling will
-        * run before our wait_event_*timeout call, so we will see
-        * both the signaled fence and the changes to needs_reset.
-        */
+       if (fence_add_callback(f, &cb.base, radeon_fence_wait_cb))
+               return t;
+
+       while (t > 0) {
+               if (intr)
+                       set_current_state(TASK_INTERRUPTIBLE);
+               else
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+
+               /*
+                * radeon_test_signaled must be called after
+                * set_current_state to prevent a race with wake_up_process
+                */
+               if (radeon_test_signaled(fence))
+                       break;
+
+               if (rdev->needs_reset) {
+                       t = -EDEADLK;
+                       break;
+               }
+
+               t = schedule_timeout(t);
+
+               if (t > 0 && intr && signal_pending(current))
+                       t = -ERESTARTSYS;
+       }
+
+       __set_current_state(TASK_RUNNING);
+       fence_remove_callback(f, &cb.base);
 
-       if (intr)
-               t = wait_event_interruptible_timeout(rdev->fence_queue,
-                       ((signaled = radeon_test_signaled(fence)) ||
-                        rdev->needs_reset), t);
-       else
-               t = wait_event_timeout(rdev->fence_queue,
-                       ((signaled = radeon_test_signaled(fence)) ||
-                        rdev->needs_reset), t);
-
-       if (t > 0 && !signaled)
-               return -EDEADLK;
        return t;
 }
 
index 061eaa9..122eb56 100644 (file)
@@ -153,7 +153,7 @@ void radeon_kfd_device_init(struct radeon_device *rdev)
                        .compute_vmid_bitmap = 0xFF00,
 
                        .first_compute_pipe = 1,
-                       .compute_pipe_count = 8 - 1,
+                       .compute_pipe_count = 4 - 1,
                };
 
                radeon_doorbell_get_kfd_info(rdev,
index a69bd44..572b4db 100644 (file)
@@ -122,7 +122,6 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
        it = interval_tree_iter_first(&rmn->objects, start, end);
        while (it) {
                struct radeon_bo *bo;
-               struct fence *fence;
                int r;
 
                bo = container_of(it, struct radeon_bo, mn_it);
@@ -134,12 +133,10 @@ static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
                        continue;
                }
 
-               fence = reservation_object_get_excl(bo->tbo.resv);
-               if (fence) {
-                       r = radeon_fence_wait((struct radeon_fence *)fence, false);
-                       if (r)
-                               DRM_ERROR("(%d) failed to wait for user bo\n", r);
-               }
+               r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true,
+                       false, MAX_SCHEDULE_TIMEOUT);
+               if (r)
+                       DRM_ERROR("(%d) failed to wait for user bo\n", r);
 
                radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
                r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
index 43e0994..318165d 100644 (file)
@@ -173,17 +173,6 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
                else
                        rbo->placements[i].lpfn = 0;
        }
-
-       /*
-        * Use two-ended allocation depending on the buffer size to
-        * improve fragmentation quality.
-        * 512kb was measured as the most optimal number.
-        */
-       if (rbo->tbo.mem.size > 512 * 1024) {
-               for (i = 0; i < c; i++) {
-                       rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
-               }
-       }
 }
 
 int radeon_bo_create(struct radeon_device *rdev,
index 33cf410..c1ba83a 100644 (file)
@@ -837,12 +837,8 @@ static void radeon_dpm_thermal_work_handler(struct work_struct *work)
        radeon_pm_compute_clocks(rdev);
 }
 
-static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev,
-                                                    enum radeon_pm_state_type dpm_state)
+static bool radeon_dpm_single_display(struct radeon_device *rdev)
 {
-       int i;
-       struct radeon_ps *ps;
-       u32 ui_class;
        bool single_display = (rdev->pm.dpm.new_active_crtc_count < 2) ?
                true : false;
 
@@ -858,6 +854,17 @@ static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev,
        if (single_display && (r600_dpm_get_vrefresh(rdev) >= 120))
                single_display = false;
 
+       return single_display;
+}
+
+static struct radeon_ps *radeon_dpm_pick_power_state(struct radeon_device *rdev,
+                                                    enum radeon_pm_state_type dpm_state)
+{
+       int i;
+       struct radeon_ps *ps;
+       u32 ui_class;
+       bool single_display = radeon_dpm_single_display(rdev);
+
        /* certain older asics have a separare 3D performance state,
         * so try that first if the user selected performance
         */
@@ -983,6 +990,7 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev)
        struct radeon_ps *ps;
        enum radeon_pm_state_type dpm_state;
        int ret;
+       bool single_display = radeon_dpm_single_display(rdev);
 
        /* if dpm init failed */
        if (!rdev->pm.dpm_enabled)
@@ -1007,6 +1015,9 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev)
                /* vce just modifies an existing state so force a change */
                if (ps->vce_active != rdev->pm.dpm.vce_active)
                        goto force;
+               /* user has made a display change (such as timing) */
+               if (rdev->pm.dpm.single_display != single_display)
+                       goto force;
                if ((rdev->family < CHIP_BARTS) || (rdev->flags & RADEON_IS_IGP)) {
                        /* for pre-BTC and APUs if the num crtcs changed but state is the same,
                         * all we need to do is update the display configuration.
@@ -1069,6 +1080,7 @@ force:
 
        rdev->pm.dpm.current_active_crtcs = rdev->pm.dpm.new_active_crtcs;
        rdev->pm.dpm.current_active_crtc_count = rdev->pm.dpm.new_active_crtc_count;
+       rdev->pm.dpm.single_display = single_display;
 
        /* wait for the rings to drain */
        for (i = 0; i < RADEON_NUM_RINGS; i++) {
index 2456f69..8c78723 100644 (file)
@@ -495,7 +495,7 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
        seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
        seq_printf(m, "%u dwords in ring\n", count);
 
-       if (!ring->ready)
+       if (!ring->ring)
                return 0;
 
        /* print 8 dw before current rptr as often it's the last executed
index d02aa1d..b292aca 100644 (file)
@@ -598,6 +598,10 @@ static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
        enum dma_data_direction direction = write ?
                DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
 
+       /* double check that we don't free the table twice */
+       if (!ttm->sg->sgl)
+               return;
+
        /* free the sg table and pages again */
        dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
 
index e088e55..a7fb273 100644 (file)
@@ -7130,8 +7130,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
        WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
 
        if (!vclk || !dclk) {
-               /* keep the Bypass mode, put PLL to sleep */
-               WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
+               /* keep the Bypass mode */
                return 0;
        }
 
@@ -7147,8 +7146,7 @@ int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
        /* set VCO_MODE to 1 */
        WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
 
-       /* toggle UPLL_SLEEP to 1 then back to 0 */
-       WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
+       /* disable sleep mode */
        WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
 
        /* deassert UPLL_RESET */
index 1ac7bb8..fbbe78f 100644 (file)
@@ -156,6 +156,9 @@ int vce_v2_0_resume(struct radeon_device *rdev)
        WREG32(VCE_LMI_SWAP_CNTL1, 0);
        WREG32(VCE_LMI_VM_CTRL, 0);
 
+       WREG32(VCE_LMI_VCPU_CACHE_40BIT_BAR, addr >> 8);
+
+       addr &= 0xff;
        size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size);
        WREG32(VCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
        WREG32(VCE_VCPU_CACHE_SIZE0, size);
index 6c6b655..e13b9cb 100644 (file)
@@ -725,32 +725,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
                goto out_err1;
        }
 
-       ret = ttm_bo_init_mm(&dev_priv->bdev, TTM_PL_VRAM,
-                            (dev_priv->vram_size >> PAGE_SHIFT));
-       if (unlikely(ret != 0)) {
-               DRM_ERROR("Failed initializing memory manager for VRAM.\n");
-               goto out_err2;
-       }
-
-       dev_priv->has_gmr = true;
-       if (((dev_priv->capabilities & (SVGA_CAP_GMR | SVGA_CAP_GMR2)) == 0) ||
-           refuse_dma || ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_GMR,
-                                        VMW_PL_GMR) != 0) {
-               DRM_INFO("No GMR memory available. "
-                        "Graphics memory resources are very limited.\n");
-               dev_priv->has_gmr = false;
-       }
-
-       if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) {
-               dev_priv->has_mob = true;
-               if (ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_MOB,
-                                  VMW_PL_MOB) != 0) {
-                       DRM_INFO("No MOB memory available. "
-                                "3D will be disabled.\n");
-                       dev_priv->has_mob = false;
-               }
-       }
-
        dev_priv->mmio_mtrr = arch_phys_wc_add(dev_priv->mmio_start,
                                               dev_priv->mmio_size);
 
@@ -813,6 +787,33 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
                goto out_no_fman;
        }
 
+
+       ret = ttm_bo_init_mm(&dev_priv->bdev, TTM_PL_VRAM,
+                            (dev_priv->vram_size >> PAGE_SHIFT));
+       if (unlikely(ret != 0)) {
+               DRM_ERROR("Failed initializing memory manager for VRAM.\n");
+               goto out_no_vram;
+       }
+
+       dev_priv->has_gmr = true;
+       if (((dev_priv->capabilities & (SVGA_CAP_GMR | SVGA_CAP_GMR2)) == 0) ||
+           refuse_dma || ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_GMR,
+                                        VMW_PL_GMR) != 0) {
+               DRM_INFO("No GMR memory available. "
+                        "Graphics memory resources are very limited.\n");
+               dev_priv->has_gmr = false;
+       }
+
+       if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS) {
+               dev_priv->has_mob = true;
+               if (ttm_bo_init_mm(&dev_priv->bdev, VMW_PL_MOB,
+                                  VMW_PL_MOB) != 0) {
+                       DRM_INFO("No MOB memory available. "
+                                "3D will be disabled.\n");
+                       dev_priv->has_mob = false;
+               }
+       }
+
        vmw_kms_save_vga(dev_priv);
 
        /* Start kms and overlay systems, needs fifo. */
@@ -838,6 +839,12 @@ out_no_fifo:
        vmw_kms_close(dev_priv);
 out_no_kms:
        vmw_kms_restore_vga(dev_priv);
+       if (dev_priv->has_mob)
+               (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB);
+       if (dev_priv->has_gmr)
+               (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR);
+       (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM);
+out_no_vram:
        vmw_fence_manager_takedown(dev_priv->fman);
 out_no_fman:
        if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
@@ -853,12 +860,6 @@ out_err4:
        iounmap(dev_priv->mmio_virt);
 out_err3:
        arch_phys_wc_del(dev_priv->mmio_mtrr);
-       if (dev_priv->has_mob)
-               (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB);
-       if (dev_priv->has_gmr)
-               (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR);
-       (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM);
-out_err2:
        (void)ttm_bo_device_release(&dev_priv->bdev);
 out_err1:
        vmw_ttm_global_release(dev_priv);
@@ -887,6 +888,13 @@ static int vmw_driver_unload(struct drm_device *dev)
        }
        vmw_kms_close(dev_priv);
        vmw_overlay_close(dev_priv);
+
+       if (dev_priv->has_mob)
+               (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB);
+       if (dev_priv->has_gmr)
+               (void)ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR);
+       (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM);
+
        vmw_fence_manager_takedown(dev_priv->fman);
        if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
                drm_irq_uninstall(dev_priv->dev);
@@ -898,11 +906,6 @@ static int vmw_driver_unload(struct drm_device *dev)
        ttm_object_device_release(&dev_priv->tdev);
        iounmap(dev_priv->mmio_virt);
        arch_phys_wc_del(dev_priv->mmio_mtrr);
-       if (dev_priv->has_mob)
-               (void) ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_MOB);
-       if (dev_priv->has_gmr)
-               (void)ttm_bo_clean_mm(&dev_priv->bdev, VMW_PL_GMR);
-       (void)ttm_bo_clean_mm(&dev_priv->bdev, TTM_PL_VRAM);
        (void)ttm_bo_device_release(&dev_priv->bdev);
        vmw_ttm_global_release(dev_priv);
 
@@ -1235,6 +1238,7 @@ static void vmw_remove(struct pci_dev *pdev)
 {
        struct drm_device *dev = pci_get_drvdata(pdev);
 
+       pci_disable_device(pdev);
        drm_put_dev(dev);
 }
 
index 33176d0..654c8da 100644 (file)
@@ -890,7 +890,8 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
        ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo);
        if (unlikely(ret != 0)) {
                DRM_ERROR("Could not find or use MOB buffer.\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out_no_reloc;
        }
        bo = &vmw_bo->base;
 
@@ -914,7 +915,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
 
 out_no_reloc:
        vmw_dmabuf_unreference(&vmw_bo);
-       vmw_bo_p = NULL;
+       *vmw_bo_p = NULL;
        return ret;
 }
 
@@ -951,7 +952,8 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
        ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo);
        if (unlikely(ret != 0)) {
                DRM_ERROR("Could not find or use GMR region.\n");
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out_no_reloc;
        }
        bo = &vmw_bo->base;
 
@@ -974,7 +976,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 
 out_no_reloc:
        vmw_dmabuf_unreference(&vmw_bo);
-       vmw_bo_p = NULL;
+       *vmw_bo_p = NULL;
        return ret;
 }
 
@@ -2780,13 +2782,11 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
                                  NULL, arg->command_size, arg->throttle_us,
                                  (void __user *)(unsigned long)arg->fence_rep,
                                  NULL);
-
+       ttm_read_unlock(&dev_priv->reservation_sem);
        if (unlikely(ret != 0))
-               goto out_unlock;
+               return ret;
 
        vmw_kms_cursor_post_execbuf(dev_priv);
 
-out_unlock:
-       ttm_read_unlock(&dev_priv->reservation_sem);
-       return ret;
+       return 0;
 }
index 8725b79..07cda8c 100644 (file)
@@ -2033,23 +2033,17 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
        int i;
        struct drm_mode_config *mode_config = &dev->mode_config;
 
-       ret = ttm_read_lock(&dev_priv->reservation_sem, true);
-       if (unlikely(ret != 0))
-               return ret;
-
        if (!arg->num_outputs) {
                struct drm_vmw_rect def_rect = {0, 0, 800, 600};
                vmw_du_update_layout(dev_priv, 1, &def_rect);
-               goto out_unlock;
+               return 0;
        }
 
        rects_size = arg->num_outputs * sizeof(struct drm_vmw_rect);
        rects = kcalloc(arg->num_outputs, sizeof(struct drm_vmw_rect),
                        GFP_KERNEL);
-       if (unlikely(!rects)) {
-               ret = -ENOMEM;
-               goto out_unlock;
-       }
+       if (unlikely(!rects))
+               return -ENOMEM;
 
        user_rects = (void __user *)(unsigned long)arg->rects;
        ret = copy_from_user(rects, user_rects, rects_size);
@@ -2074,7 +2068,5 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
 
 out_free:
        kfree(rects);
-out_unlock:
-       ttm_read_unlock(&dev_priv->reservation_sem);
        return ret;
 }
index 7c669c3..56ce8c2 100644 (file)
@@ -1959,6 +1959,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_THRUSTMASTER, 0xb65a) },
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_BT) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_PRO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED, USB_DEVICE_ID_TOPSEED_CYBERLINK) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TOPSEED2, USB_DEVICE_ID_TOPSEED2_RF_COMBO) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TWINHAN, USB_DEVICE_ID_TWINHAN_IR_REMOTE) },
index 204312b..9c47867 100644 (file)
 #define USB_VENDOR_ID_LOGITECH         0x046d
 #define USB_DEVICE_ID_LOGITECH_AUDIOHUB 0x0a0e
 #define USB_DEVICE_ID_LOGITECH_T651    0xb00c
+#define USB_DEVICE_ID_LOGITECH_C077    0xc007
 #define USB_DEVICE_ID_LOGITECH_RECEIVER        0xc101
 #define USB_DEVICE_ID_LOGITECH_HARMONY_FIRST  0xc110
 #define USB_DEVICE_ID_LOGITECH_HARMONY_LAST 0xc14f
 #define USB_VENDOR_ID_TIVO             0x150a
 #define USB_DEVICE_ID_TIVO_SLIDE_BT    0x1200
 #define USB_DEVICE_ID_TIVO_SLIDE       0x1201
+#define USB_DEVICE_ID_TIVO_SLIDE_PRO   0x1203
 
 #define USB_VENDOR_ID_TOPSEED          0x0766
 #define USB_DEVICE_ID_TOPSEED_CYBERLINK        0x0204
index d790d8d..d986969 100644 (file)
@@ -64,6 +64,7 @@ static const struct hid_device_id tivo_devices[] = {
        /* TiVo Slide Bluetooth remote, pairs with a Broadcom dongle */
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_BT) },
        { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_TIVO, USB_DEVICE_ID_TIVO_SLIDE_PRO) },
        { }
 };
 MODULE_DEVICE_TABLE(hid, tivo_devices);
index 9be99a6..a821277 100644 (file)
@@ -78,6 +78,7 @@ static const struct hid_blacklist {
        { USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_TS2700, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_FORMOSA, USB_DEVICE_ID_FORMOSA_IR_RECEIVER, HID_QUIRK_NO_INIT_REPORTS },
        { USB_VENDOR_ID_FREESCALE, USB_DEVICE_ID_FREESCALE_MX28, HID_QUIRK_NOGET },
+       { USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_C077, HID_QUIRK_ALWAYS_POLL },
        { USB_VENDOR_ID_MGE, USB_DEVICE_ID_MGE_UPS, HID_QUIRK_NOGET },
        { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_3, HID_QUIRK_NO_INIT_REPORTS },
        { USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_3_JP, HID_QUIRK_NO_INIT_REPORTS },
index 046351c..bbe32d6 100644 (file)
@@ -551,9 +551,13 @@ static int wacom_intuos_inout(struct wacom_wac *wacom)
           (features->type == CINTIQ && !(data[1] & 0x40)))
                return 1;
 
-       if (features->quirks & WACOM_QUIRK_MULTI_INPUT)
+       if (wacom->shared) {
                wacom->shared->stylus_in_proximity = true;
 
+               if (wacom->shared->touch_down)
+                       return 1;
+       }
+
        /* in Range while exiting */
        if (((data[1] & 0xfe) == 0x20) && wacom->reporting_data) {
                input_report_key(input, BTN_TOUCH, 0);
@@ -1043,27 +1047,28 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom)
        struct input_dev *input = wacom->input;
        unsigned char *data = wacom->data;
        int i;
-       int current_num_contacts = 0;
+       int current_num_contacts = data[61];
        int contacts_to_send = 0;
        int num_contacts_left = 4; /* maximum contacts per packet */
        int byte_per_packet = WACOM_BYTES_PER_24HDT_PACKET;
        int y_offset = 2;
+       static int contact_with_no_pen_down_count = 0;
 
        if (wacom->features.type == WACOM_27QHDT) {
                current_num_contacts = data[63];
                num_contacts_left = 10;
                byte_per_packet = WACOM_BYTES_PER_QHDTHID_PACKET;
                y_offset = 0;
-       } else {
-               current_num_contacts = data[61];
        }
 
        /*
         * First packet resets the counter since only the first
         * packet in series will have non-zero current_num_contacts.
         */
-       if (current_num_contacts)
+       if (current_num_contacts) {
                wacom->num_contacts_left = current_num_contacts;
+               contact_with_no_pen_down_count = 0;
+       }
 
        contacts_to_send = min(num_contacts_left, wacom->num_contacts_left);
 
@@ -1096,15 +1101,16 @@ static int wacom_24hdt_irq(struct wacom_wac *wacom)
                                input_report_abs(input, ABS_MT_WIDTH_MINOR, min(w, h));
                                input_report_abs(input, ABS_MT_ORIENTATION, w > h);
                        }
+                       contact_with_no_pen_down_count++;
                }
        }
        input_mt_report_pointer_emulation(input, true);
 
        wacom->num_contacts_left -= contacts_to_send;
-       if (wacom->num_contacts_left <= 0)
+       if (wacom->num_contacts_left <= 0) {
                wacom->num_contacts_left = 0;
-
-       wacom->shared->touch_down = (wacom->num_contacts_left > 0);
+               wacom->shared->touch_down = (contact_with_no_pen_down_count > 0);
+       }
        return 1;
 }
 
@@ -1116,6 +1122,7 @@ static int wacom_mt_touch(struct wacom_wac *wacom)
        int current_num_contacts = data[2];
        int contacts_to_send = 0;
        int x_offset = 0;
+       static int contact_with_no_pen_down_count = 0;
 
        /* MTTPC does not support Height and Width */
        if (wacom->features.type == MTTPC || wacom->features.type == MTTPC_B)
@@ -1125,8 +1132,10 @@ static int wacom_mt_touch(struct wacom_wac *wacom)
         * First packet resets the counter since only the first
         * packet in series will have non-zero current_num_contacts.
         */
-       if (current_num_contacts)
+       if (current_num_contacts) {
                wacom->num_contacts_left = current_num_contacts;
+               contact_with_no_pen_down_count = 0;
+       }
 
        /* There are at most 5 contacts per packet */
        contacts_to_send = min(5, wacom->num_contacts_left);
@@ -1147,15 +1156,16 @@ static int wacom_mt_touch(struct wacom_wac *wacom)
                        int y = get_unaligned_le16(&data[offset + x_offset + 9]);
                        input_report_abs(input, ABS_MT_POSITION_X, x);
                        input_report_abs(input, ABS_MT_POSITION_Y, y);
+                       contact_with_no_pen_down_count++;
                }
        }
        input_mt_report_pointer_emulation(input, true);
 
        wacom->num_contacts_left -= contacts_to_send;
-       if (wacom->num_contacts_left < 0)
+       if (wacom->num_contacts_left <= 0) {
                wacom->num_contacts_left = 0;
-
-       wacom->shared->touch_down = (wacom->num_contacts_left > 0);
+               wacom->shared->touch_down = (contact_with_no_pen_down_count > 0);
+       }
        return 1;
 }
 
@@ -1193,29 +1203,25 @@ static int wacom_tpc_single_touch(struct wacom_wac *wacom, size_t len)
 {
        unsigned char *data = wacom->data;
        struct input_dev *input = wacom->input;
-       bool prox;
+       bool prox = !wacom->shared->stylus_in_proximity;
        int x = 0, y = 0;
 
        if (wacom->features.touch_max > 1 || len > WACOM_PKGLEN_TPC2FG)
                return 0;
 
-       if (!wacom->shared->stylus_in_proximity) {
-               if (len == WACOM_PKGLEN_TPC1FG) {
-                       prox = data[0] & 0x01;
-                       x = get_unaligned_le16(&data[1]);
-                       y = get_unaligned_le16(&data[3]);
-               } else if (len == WACOM_PKGLEN_TPC1FG_B) {
-                       prox = data[2] & 0x01;
-                       x = get_unaligned_le16(&data[3]);
-                       y = get_unaligned_le16(&data[5]);
-               } else {
-                       prox = data[1] & 0x01;
-                       x = le16_to_cpup((__le16 *)&data[2]);
-                       y = le16_to_cpup((__le16 *)&data[4]);
-               }
-       } else
-               /* force touch out when pen is in prox */
-               prox = 0;
+       if (len == WACOM_PKGLEN_TPC1FG) {
+               prox = prox && (data[0] & 0x01);
+               x = get_unaligned_le16(&data[1]);
+               y = get_unaligned_le16(&data[3]);
+       } else if (len == WACOM_PKGLEN_TPC1FG_B) {
+               prox = prox && (data[2] & 0x01);
+               x = get_unaligned_le16(&data[3]);
+               y = get_unaligned_le16(&data[5]);
+       } else {
+               prox = prox && (data[1] & 0x01);
+               x = le16_to_cpup((__le16 *)&data[2]);
+               y = le16_to_cpup((__le16 *)&data[4]);
+       }
 
        if (prox) {
                input_report_abs(input, ABS_X, x);
@@ -1613,6 +1619,7 @@ static int wacom_bpt_touch(struct wacom_wac *wacom)
        struct input_dev *pad_input = wacom->pad_input;
        unsigned char *data = wacom->data;
        int i;
+       int contact_with_no_pen_down_count = 0;
 
        if (data[0] != 0x02)
            return 0;
@@ -1640,6 +1647,7 @@ static int wacom_bpt_touch(struct wacom_wac *wacom)
                        }
                        input_report_abs(input, ABS_MT_POSITION_X, x);
                        input_report_abs(input, ABS_MT_POSITION_Y, y);
+                       contact_with_no_pen_down_count++;
                }
        }
 
@@ -1649,11 +1657,12 @@ static int wacom_bpt_touch(struct wacom_wac *wacom)
        input_report_key(pad_input, BTN_FORWARD, (data[1] & 0x04) != 0);
        input_report_key(pad_input, BTN_BACK, (data[1] & 0x02) != 0);
        input_report_key(pad_input, BTN_RIGHT, (data[1] & 0x01) != 0);
+       wacom->shared->touch_down = (contact_with_no_pen_down_count > 0);
 
        return 1;
 }
 
-static void wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data)
+static int wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data, int last_touch_count)
 {
        struct wacom_features *features = &wacom->features;
        struct input_dev *input = wacom->input;
@@ -1661,7 +1670,7 @@ static void wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data)
        int slot = input_mt_get_slot_by_key(input, data[0]);
 
        if (slot < 0)
-               return;
+               return 0;
 
        touch = touch && !wacom->shared->stylus_in_proximity;
 
@@ -1693,7 +1702,9 @@ static void wacom_bpt3_touch_msg(struct wacom_wac *wacom, unsigned char *data)
                input_report_abs(input, ABS_MT_POSITION_Y, y);
                input_report_abs(input, ABS_MT_TOUCH_MAJOR, width);
                input_report_abs(input, ABS_MT_TOUCH_MINOR, height);
+               last_touch_count++;
        }
+       return last_touch_count;
 }
 
 static void wacom_bpt3_button_msg(struct wacom_wac *wacom, unsigned char *data)
@@ -1718,6 +1729,7 @@ static int wacom_bpt3_touch(struct wacom_wac *wacom)
        unsigned char *data = wacom->data;
        int count = data[1] & 0x07;
        int i;
+       int contact_with_no_pen_down_count = 0;
 
        if (data[0] != 0x02)
            return 0;
@@ -1728,12 +1740,15 @@ static int wacom_bpt3_touch(struct wacom_wac *wacom)
                int msg_id = data[offset];
 
                if (msg_id >= 2 && msg_id <= 17)
-                       wacom_bpt3_touch_msg(wacom, data + offset);
+                       contact_with_no_pen_down_count = 
+                           wacom_bpt3_touch_msg(wacom, data + offset,
+                                                contact_with_no_pen_down_count);
                else if (msg_id == 128)
                        wacom_bpt3_button_msg(wacom, data + offset);
 
        }
        input_mt_report_pointer_emulation(input, true);
+       wacom->shared->touch_down = (contact_with_no_pen_down_count > 0);
 
        return 1;
 }
@@ -1759,6 +1774,9 @@ static int wacom_bpt_pen(struct wacom_wac *wacom)
                return 0;
        }
 
+       if (wacom->shared->touch_down)
+               return 0;
+
        prox = (data[1] & 0x20) == 0x20;
 
        /*
index 210cf48..edf274c 100644 (file)
@@ -679,9 +679,6 @@ static int i2c_device_remove(struct device *dev)
                status = driver->remove(client);
        }
 
-       if (dev->of_node)
-               irq_dispose_mapping(client->irq);
-
        dev_pm_domain_detach(&client->dev, true);
        return status;
 }
index 1793aea..6eb738c 100644 (file)
@@ -1793,11 +1793,11 @@ static void idetape_setup(ide_drive_t *drive, idetape_tape_t *tape, int minor)
        tape->best_dsc_rw_freq = clamp_t(unsigned long, t, IDETAPE_DSC_RW_MIN,
                                         IDETAPE_DSC_RW_MAX);
        printk(KERN_INFO "ide-tape: %s <-> %s: %dKBps, %d*%dkB buffer, "
-               "%lums tDSC%s\n",
+               "%ums tDSC%s\n",
                drive->name, tape->name, *(u16 *)&tape->caps[14],
                (*(u16 *)&tape->caps[16] * 512) / tape->buffer_size,
                tape->buffer_size / 1024,
-               tape->best_dsc_rw_freq * 1000 / HZ,
+               jiffies_to_msecs(tape->best_dsc_rw_freq),
                (drive->dev_flags & IDE_DFLAG_USING_DMA) ? ", DMA" : "");
 
        ide_proc_register_driver(drive, tape->driver);
index b0e5852..5c979d0 100644 (file)
@@ -55,7 +55,7 @@
 
 #include <linux/kernel.h>
 #include <linux/cpuidle.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
 #include <trace/events/power.h>
 #include <linux/sched.h>
 #include <linux/notifier.h>
@@ -638,12 +638,12 @@ static int intel_idle(struct cpuidle_device *dev,
                leave_mm(cpu);
 
        if (!(lapic_timer_reliable_states & (1 << (cstate))))
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+               tick_broadcast_enter();
 
        mwait_idle_with_hints(eax, ecx);
 
        if (!(lapic_timer_reliable_states & (1 << (cstate))))
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+               tick_broadcast_exit();
 
        return index;
 }
@@ -665,13 +665,12 @@ static void intel_idle_freeze(struct cpuidle_device *dev,
 
 static void __setup_broadcast_timer(void *arg)
 {
-       unsigned long reason = (unsigned long)arg;
-       int cpu = smp_processor_id();
-
-       reason = reason ?
-               CLOCK_EVT_NOTIFY_BROADCAST_ON : CLOCK_EVT_NOTIFY_BROADCAST_OFF;
+       unsigned long on = (unsigned long)arg;
 
-       clockevents_notify(reason, &cpu);
+       if (on)
+               tick_broadcast_enable();
+       else
+               tick_broadcast_disable();
 }
 
 static int cpu_hotplug_notify(struct notifier_block *n,
index 1096da3..75c6d21 100644 (file)
@@ -659,7 +659,7 @@ static irqreturn_t bma180_trigger_handler(int irq, void *p)
 
        mutex_lock(&data->mutex);
 
-       for_each_set_bit(bit, indio_dev->buffer->scan_mask,
+       for_each_set_bit(bit, indio_dev->active_scan_mask,
                         indio_dev->masklength) {
                ret = bma180_get_data_reg(data, bit);
                if (ret < 0) {
index 066d0c0..75567fd 100644 (file)
@@ -168,14 +168,14 @@ static const struct {
        int val;
        int val2;
        u8 bw_bits;
-} bmc150_accel_samp_freq_table[] = { {7, 810000, 0x08},
-                                    {15, 630000, 0x09},
-                                    {31, 250000, 0x0A},
-                                    {62, 500000, 0x0B},
-                                    {125, 0, 0x0C},
-                                    {250, 0, 0x0D},
-                                    {500, 0, 0x0E},
-                                    {1000, 0, 0x0F} };
+} bmc150_accel_samp_freq_table[] = { {15, 620000, 0x08},
+                                    {31, 260000, 0x09},
+                                    {62, 500000, 0x0A},
+                                    {125, 0, 0x0B},
+                                    {250, 0, 0x0C},
+                                    {500, 0, 0x0D},
+                                    {1000, 0, 0x0E},
+                                    {2000, 0, 0x0F} };
 
 static const struct {
        int bw_bits;
@@ -840,7 +840,7 @@ static int bmc150_accel_validate_trigger(struct iio_dev *indio_dev,
 }
 
 static IIO_CONST_ATTR_SAMP_FREQ_AVAIL(
-               "7.810000 15.630000 31.250000 62.500000 125 250 500 1000");
+               "15.620000 31.260000 62.50000 125 250 500 1000 2000");
 
 static struct attribute *bmc150_accel_attributes[] = {
        &iio_const_attr_sampling_frequency_available.dev_attr.attr,
@@ -986,7 +986,7 @@ static irqreturn_t bmc150_accel_trigger_handler(int irq, void *p)
        int bit, ret, i = 0;
 
        mutex_lock(&data->mutex);
-       for_each_set_bit(bit, indio_dev->buffer->scan_mask,
+       for_each_set_bit(bit, indio_dev->active_scan_mask,
                         indio_dev->masklength) {
                ret = i2c_smbus_read_word_data(data->client,
                                               BMC150_ACCEL_AXIS_TO_REG(bit));
index 567de26..1a63795 100644 (file)
@@ -956,7 +956,7 @@ static irqreturn_t kxcjk1013_trigger_handler(int irq, void *p)
 
        mutex_lock(&data->mutex);
 
-       for_each_set_bit(bit, indio_dev->buffer->scan_mask,
+       for_each_set_bit(bit, indio_dev->active_scan_mask,
                         indio_dev->masklength) {
                ret = kxcjk1013_get_acc_reg(data, bit);
                if (ret < 0) {
index 202daf8..46379b1 100644 (file)
@@ -137,7 +137,8 @@ config AXP288_ADC
 
 config CC10001_ADC
        tristate "Cosmic Circuits 10001 ADC driver"
-       depends on HAS_IOMEM || HAVE_CLK || REGULATOR
+       depends on HAVE_CLK || REGULATOR
+       depends on HAS_IOMEM
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
        help
index ff61ae5..8a0eb4a 100644 (file)
@@ -544,7 +544,6 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state)
 {
        struct iio_dev *idev = iio_trigger_get_drvdata(trig);
        struct at91_adc_state *st = iio_priv(idev);
-       struct iio_buffer *buffer = idev->buffer;
        struct at91_adc_reg_desc *reg = st->registers;
        u32 status = at91_adc_readl(st, reg->trigger_register);
        int value;
@@ -564,7 +563,7 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state)
                at91_adc_writel(st, reg->trigger_register,
                                status | value);
 
-               for_each_set_bit(bit, buffer->scan_mask,
+               for_each_set_bit(bit, idev->active_scan_mask,
                                 st->num_channels) {
                        struct iio_chan_spec const *chan = idev->channels + bit;
                        at91_adc_writel(st, AT91_ADC_CHER,
@@ -579,7 +578,7 @@ static int at91_adc_configure_trigger(struct iio_trigger *trig, bool state)
                at91_adc_writel(st, reg->trigger_register,
                                status & ~value);
 
-               for_each_set_bit(bit, buffer->scan_mask,
+               for_each_set_bit(bit, idev->active_scan_mask,
                                 st->num_channels) {
                        struct iio_chan_spec const *chan = idev->channels + bit;
                        at91_adc_writel(st, AT91_ADC_CHDR,
index 5167225..b96c636 100644 (file)
                .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ), \
        }
 
-/* LSB is in nV to eliminate floating point */
-static const u32 rates_to_lsb[] = {1000000, 250000, 62500, 15625};
-
-/*
- *  scales calculated as:
- *  rates_to_lsb[sample_rate] / (1 << pga);
- *  pga is 1 for 0, 2
- */
-
 static const int mcp3422_scales[4][4] = {
-       { 1000000, 250000, 62500, 15625 },
-       { 500000 , 125000, 31250, 7812 },
-       { 250000 , 62500 , 15625, 3906 },
-       { 125000 , 31250 , 7812 , 1953 } };
+       { 1000000, 500000, 250000, 125000 },
+       { 250000 , 125000, 62500 , 31250  },
+       { 62500  , 31250 , 15625 , 7812   },
+       { 15625  , 7812  , 3906  , 1953   } };
 
 /* Constant msleep times for data acquisitions */
 static const int mcp3422_read_times[4] = {
index b9666f2..fabd24e 100644 (file)
@@ -296,7 +296,8 @@ static int iadc_do_conversion(struct iadc_chip *iadc, int chan, u16 *data)
        if (iadc->poll_eoc) {
                ret = iadc_poll_wait_eoc(iadc, wait);
        } else {
-               ret = wait_for_completion_timeout(&iadc->complete, wait);
+               ret = wait_for_completion_timeout(&iadc->complete,
+                       usecs_to_jiffies(wait));
                if (!ret)
                        ret = -ETIMEDOUT;
                else
index 2e5cc44..a0e7161 100644 (file)
@@ -188,12 +188,11 @@ static int tiadc_buffer_preenable(struct iio_dev *indio_dev)
 static int tiadc_buffer_postenable(struct iio_dev *indio_dev)
 {
        struct tiadc_device *adc_dev = iio_priv(indio_dev);
-       struct iio_buffer *buffer = indio_dev->buffer;
        unsigned int enb = 0;
        u8 bit;
 
        tiadc_step_config(indio_dev);
-       for_each_set_bit(bit, buffer->scan_mask, adc_dev->channels)
+       for_each_set_bit(bit, indio_dev->active_scan_mask, adc_dev->channels)
                enb |= (get_adc_step_bit(adc_dev, bit) << 1);
        adc_dev->buffer_en_ch_steps = enb;
 
index 8ec353c..e63b8e7 100644 (file)
@@ -141,9 +141,13 @@ struct vf610_adc {
        struct regulator *vref;
        struct vf610_adc_feature adc_feature;
 
+       u32 sample_freq_avail[5];
+
        struct completion completion;
 };
 
+static const u32 vf610_hw_avgs[] = { 1, 4, 8, 16, 32 };
+
 #define VF610_ADC_CHAN(_idx, _chan_type) {                     \
        .type = (_chan_type),                                   \
        .indexed = 1,                                           \
@@ -180,35 +184,47 @@ static const struct iio_chan_spec vf610_adc_iio_channels[] = {
        /* sentinel */
 };
 
-/*
- * ADC sample frequency, unit is ADCK cycles.
- * ADC clk source is ipg clock, which is the same as bus clock.
- *
- * ADC conversion time = SFCAdder + AverageNum x (BCT + LSTAdder)
- * SFCAdder: fixed to 6 ADCK cycles
- * AverageNum: 1, 4, 8, 16, 32 samples for hardware average.
- * BCT (Base Conversion Time): fixed to 25 ADCK cycles for 12 bit mode
- * LSTAdder(Long Sample Time): fixed to 3 ADCK cycles
- *
- * By default, enable 12 bit resolution mode, clock source
- * set to ipg clock, So get below frequency group:
- */
-static const u32 vf610_sample_freq_avail[5] =
-{1941176, 559332, 286957, 145374, 73171};
+static inline void vf610_adc_calculate_rates(struct vf610_adc *info)
+{
+       unsigned long adck_rate, ipg_rate = clk_get_rate(info->clk);
+       int i;
+
+       /*
+        * Calculate ADC sample frequencies
+        * Sample time unit is ADCK cycles. ADCK clk source is ipg clock,
+        * which is the same as bus clock.
+        *
+        * ADC conversion time = SFCAdder + AverageNum x (BCT + LSTAdder)
+        * SFCAdder: fixed to 6 ADCK cycles
+        * AverageNum: 1, 4, 8, 16, 32 samples for hardware average.
+        * BCT (Base Conversion Time): fixed to 25 ADCK cycles for 12 bit mode
+        * LSTAdder(Long Sample Time): fixed to 3 ADCK cycles
+        */
+       adck_rate = ipg_rate / info->adc_feature.clk_div;
+       for (i = 0; i < ARRAY_SIZE(vf610_hw_avgs); i++)
+               info->sample_freq_avail[i] =
+                       adck_rate / (6 + vf610_hw_avgs[i] * (25 + 3));
+}
 
 static inline void vf610_adc_cfg_init(struct vf610_adc *info)
 {
+       struct vf610_adc_feature *adc_feature = &info->adc_feature;
+
        /* set default Configuration for ADC controller */
-       info->adc_feature.clk_sel = VF610_ADCIOC_BUSCLK_SET;
-       info->adc_feature.vol_ref = VF610_ADCIOC_VR_VREF_SET;
+       adc_feature->clk_sel = VF610_ADCIOC_BUSCLK_SET;
+       adc_feature->vol_ref = VF610_ADCIOC_VR_VREF_SET;
+
+       adc_feature->calibration = true;
+       adc_feature->ovwren = true;
+
+       adc_feature->res_mode = 12;
+       adc_feature->sample_rate = 1;
+       adc_feature->lpm = true;
 
-       info->adc_feature.calibration = true;
-       info->adc_feature.ovwren = true;
+       /* Use a save ADCK which is below 20MHz on all devices */
+       adc_feature->clk_div = 8;
 
-       info->adc_feature.clk_div = 1;
-       info->adc_feature.res_mode = 12;
-       info->adc_feature.sample_rate = 1;
-       info->adc_feature.lpm = true;
+       vf610_adc_calculate_rates(info);
 }
 
 static void vf610_adc_cfg_post_set(struct vf610_adc *info)
@@ -290,12 +306,10 @@ static void vf610_adc_cfg_set(struct vf610_adc *info)
 
        cfg_data = readl(info->regs + VF610_REG_ADC_CFG);
 
-       /* low power configuration */
        cfg_data &= ~VF610_ADC_ADLPC_EN;
        if (adc_feature->lpm)
                cfg_data |= VF610_ADC_ADLPC_EN;
 
-       /* disable high speed */
        cfg_data &= ~VF610_ADC_ADHSC_EN;
 
        writel(cfg_data, info->regs + VF610_REG_ADC_CFG);
@@ -435,10 +449,27 @@ static irqreturn_t vf610_adc_isr(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
-static IIO_CONST_ATTR_SAMP_FREQ_AVAIL("1941176, 559332, 286957, 145374, 73171");
+static ssize_t vf610_show_samp_freq_avail(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct vf610_adc *info = iio_priv(dev_to_iio_dev(dev));
+       size_t len = 0;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(info->sample_freq_avail); i++)
+               len += scnprintf(buf + len, PAGE_SIZE - len,
+                       "%u ", info->sample_freq_avail[i]);
+
+       /* replace trailing space by newline */
+       buf[len - 1] = '\n';
+
+       return len;
+}
+
+static IIO_DEV_ATTR_SAMP_FREQ_AVAIL(vf610_show_samp_freq_avail);
 
 static struct attribute *vf610_attributes[] = {
-       &iio_const_attr_sampling_frequency_available.dev_attr.attr,
+       &iio_dev_attr_sampling_frequency_available.dev_attr.attr,
        NULL
 };
 
@@ -502,7 +533,7 @@ static int vf610_read_raw(struct iio_dev *indio_dev,
                return IIO_VAL_FRACTIONAL_LOG2;
 
        case IIO_CHAN_INFO_SAMP_FREQ:
-               *val = vf610_sample_freq_avail[info->adc_feature.sample_rate];
+               *val = info->sample_freq_avail[info->adc_feature.sample_rate];
                *val2 = 0;
                return IIO_VAL_INT;
 
@@ -525,9 +556,9 @@ static int vf610_write_raw(struct iio_dev *indio_dev,
        switch (mask) {
                case IIO_CHAN_INFO_SAMP_FREQ:
                        for (i = 0;
-                               i < ARRAY_SIZE(vf610_sample_freq_avail);
+                               i < ARRAY_SIZE(info->sample_freq_avail);
                                i++)
-                               if (val == vf610_sample_freq_avail[i]) {
+                               if (val == info->sample_freq_avail[i]) {
                                        info->adc_feature.sample_rate = i;
                                        vf610_adc_sample_set(info);
                                        return 0;
index 52d7043..55a9008 100644 (file)
@@ -640,6 +640,7 @@ static int ssp_remove(struct spi_device *spi)
        return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
 static int ssp_suspend(struct device *dev)
 {
        int ret;
@@ -688,6 +689,7 @@ static int ssp_resume(struct device *dev)
 
        return 0;
 }
+#endif /* CONFIG_PM_SLEEP */
 
 static const struct dev_pm_ops ssp_pm_ops = {
        SET_SYSTEM_SLEEP_PM_OPS(ssp_suspend, ssp_resume)
index f57562a..15c73e2 100644 (file)
@@ -322,7 +322,7 @@ static int ad5686_probe(struct spi_device *spi)
        st = iio_priv(indio_dev);
        spi_set_drvdata(spi, indio_dev);
 
-       st->reg = devm_regulator_get(&spi->dev, "vcc");
+       st->reg = devm_regulator_get_optional(&spi->dev, "vcc");
        if (!IS_ERR(st->reg)) {
                ret = regulator_enable(st->reg);
                if (ret)
index 60451b3..ccf3ea7 100644 (file)
@@ -822,7 +822,7 @@ static irqreturn_t bmg160_trigger_handler(int irq, void *p)
        int bit, ret, i = 0;
 
        mutex_lock(&data->mutex);
-       for_each_set_bit(bit, indio_dev->buffer->scan_mask,
+       for_each_set_bit(bit, indio_dev->active_scan_mask,
                         indio_dev->masklength) {
                ret = i2c_smbus_read_word_data(data->client,
                                               BMG160_AXIS_TO_REG(bit));
index 623c145..7d79a1a 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/wait.h>
 #include <linux/bitops.h>
 #include <linux/completion.h>
+#include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/gpio.h>
 #include <linux/of_gpio.h>
 
 #define DHT11_DATA_VALID_TIME  2000000000  /* 2s in ns */
 
-#define DHT11_EDGES_PREAMBLE 4
+#define DHT11_EDGES_PREAMBLE 2
 #define DHT11_BITS_PER_READ 40
+/*
+ * Note that when reading the sensor actually 84 edges are detected, but
+ * since the last edge is not significant, we only store 83:
+ */
 #define DHT11_EDGES_PER_READ (2*DHT11_BITS_PER_READ + DHT11_EDGES_PREAMBLE + 1)
 
 /* Data transmission timing (nano seconds) */
@@ -57,6 +62,7 @@ struct dht11 {
        int                             irq;
 
        struct completion               completion;
+       struct mutex                    lock;
 
        s64                             timestamp;
        int                             temperature;
@@ -88,7 +94,7 @@ static int dht11_decode(struct dht11 *dht11, int offset)
        unsigned char temp_int, temp_dec, hum_int, hum_dec, checksum;
 
        /* Calculate timestamp resolution */
-       for (i = 0; i < dht11->num_edges; ++i) {
+       for (i = 1; i < dht11->num_edges; ++i) {
                t = dht11->edges[i].ts - dht11->edges[i-1].ts;
                if (t > 0 && t < timeres)
                        timeres = t;
@@ -138,6 +144,27 @@ static int dht11_decode(struct dht11 *dht11, int offset)
        return 0;
 }
 
+/*
+ * IRQ handler called on GPIO edges
+ */
+static irqreturn_t dht11_handle_irq(int irq, void *data)
+{
+       struct iio_dev *iio = data;
+       struct dht11 *dht11 = iio_priv(iio);
+
+       /* TODO: Consider making the handler safe for IRQ sharing */
+       if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
+               dht11->edges[dht11->num_edges].ts = iio_get_time_ns();
+               dht11->edges[dht11->num_edges++].value =
+                                               gpio_get_value(dht11->gpio);
+
+               if (dht11->num_edges >= DHT11_EDGES_PER_READ)
+                       complete(&dht11->completion);
+       }
+
+       return IRQ_HANDLED;
+}
+
 static int dht11_read_raw(struct iio_dev *iio_dev,
                        const struct iio_chan_spec *chan,
                        int *val, int *val2, long m)
@@ -145,6 +172,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
        struct dht11 *dht11 = iio_priv(iio_dev);
        int ret;
 
+       mutex_lock(&dht11->lock);
        if (dht11->timestamp + DHT11_DATA_VALID_TIME < iio_get_time_ns()) {
                reinit_completion(&dht11->completion);
 
@@ -157,8 +185,17 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
                if (ret)
                        goto err;
 
+               ret = request_irq(dht11->irq, dht11_handle_irq,
+                                 IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+                                 iio_dev->name, iio_dev);
+               if (ret)
+                       goto err;
+
                ret = wait_for_completion_killable_timeout(&dht11->completion,
                                                                 HZ);
+
+               free_irq(dht11->irq, iio_dev);
+
                if (ret == 0 && dht11->num_edges < DHT11_EDGES_PER_READ - 1) {
                        dev_err(&iio_dev->dev,
                                        "Only %d signal edges detected\n",
@@ -185,6 +222,7 @@ static int dht11_read_raw(struct iio_dev *iio_dev,
                ret = -EINVAL;
 err:
        dht11->num_edges = -1;
+       mutex_unlock(&dht11->lock);
        return ret;
 }
 
@@ -193,27 +231,6 @@ static const struct iio_info dht11_iio_info = {
        .read_raw               = dht11_read_raw,
 };
 
-/*
- * IRQ handler called on GPIO edges
-*/
-static irqreturn_t dht11_handle_irq(int irq, void *data)
-{
-       struct iio_dev *iio = data;
-       struct dht11 *dht11 = iio_priv(iio);
-
-       /* TODO: Consider making the handler safe for IRQ sharing */
-       if (dht11->num_edges < DHT11_EDGES_PER_READ && dht11->num_edges >= 0) {
-               dht11->edges[dht11->num_edges].ts = iio_get_time_ns();
-               dht11->edges[dht11->num_edges++].value =
-                                               gpio_get_value(dht11->gpio);
-
-               if (dht11->num_edges >= DHT11_EDGES_PER_READ)
-                       complete(&dht11->completion);
-       }
-
-       return IRQ_HANDLED;
-}
-
 static const struct iio_chan_spec dht11_chan_spec[] = {
        { .type = IIO_TEMP,
                .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), },
@@ -256,11 +273,6 @@ static int dht11_probe(struct platform_device *pdev)
                dev_err(dev, "GPIO %d has no interrupt\n", dht11->gpio);
                return -EINVAL;
        }
-       ret = devm_request_irq(dev, dht11->irq, dht11_handle_irq,
-                               IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
-                               pdev->name, iio);
-       if (ret)
-               return ret;
 
        dht11->timestamp = iio_get_time_ns() - DHT11_DATA_VALID_TIME - 1;
        dht11->num_edges = -1;
@@ -268,6 +280,7 @@ static int dht11_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, iio);
 
        init_completion(&dht11->completion);
+       mutex_init(&dht11->lock);
        iio->name = pdev->name;
        iio->dev.parent = &pdev->dev;
        iio->info = &dht11_iio_info;
index b541646..fa3b809 100644 (file)
@@ -45,12 +45,12 @@ static int si7020_read_raw(struct iio_dev *indio_dev,
                           struct iio_chan_spec const *chan, int *val,
                           int *val2, long mask)
 {
-       struct i2c_client *client = iio_priv(indio_dev);
+       struct i2c_client **client = iio_priv(indio_dev);
        int ret;
 
        switch (mask) {
        case IIO_CHAN_INFO_RAW:
-               ret = i2c_smbus_read_word_data(client,
+               ret = i2c_smbus_read_word_data(*client,
                                               chan->type == IIO_TEMP ?
                                               SI7020CMD_TEMP_HOLD :
                                               SI7020CMD_RH_HOLD);
@@ -126,7 +126,7 @@ static int si7020_probe(struct i2c_client *client,
        /* Wait the maximum power-up time after software reset. */
        msleep(15);
 
-       indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*client));
+       indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*data));
        if (!indio_dev)
                return -ENOMEM;
 
index b70873d..fa795dc 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/debugfs.h>
+#include <linux/bitops.h>
 
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
@@ -414,7 +415,7 @@ static int adis16400_read_raw(struct iio_dev *indio_dev,
                mutex_unlock(&indio_dev->mlock);
                if (ret)
                        return ret;
-               val16 = ((val16 & 0xFFF) << 4) >> 4;
+               val16 = sign_extend32(val16, 11);
                *val = val16;
                return IIO_VAL_INT;
        case IIO_CHAN_INFO_OFFSET:
index e0017c2..f53e9a8 100644 (file)
@@ -60,7 +60,7 @@ int adis_probe_trigger(struct adis *adis, struct iio_dev *indio_dev)
        iio_trigger_set_drvdata(adis->trig, adis);
        ret = iio_trigger_register(adis->trig);
 
-       indio_dev->trig = adis->trig;
+       indio_dev->trig = iio_trigger_get(adis->trig);
        if (ret)
                goto error_free_irq;
 
index f73e60b..ef76afe 100644 (file)
@@ -410,42 +410,46 @@ error_read_raw:
        }
 }
 
-static int inv_mpu6050_write_fsr(struct inv_mpu6050_state *st, int fsr)
+static int inv_mpu6050_write_gyro_scale(struct inv_mpu6050_state *st, int val)
 {
-       int result;
+       int result, i;
        u8 d;
 
-       if (fsr < 0 || fsr > INV_MPU6050_MAX_GYRO_FS_PARAM)
-               return -EINVAL;
-       if (fsr == st->chip_config.fsr)
-               return 0;
+       for (i = 0; i < ARRAY_SIZE(gyro_scale_6050); ++i) {
+               if (gyro_scale_6050[i] == val) {
+                       d = (i << INV_MPU6050_GYRO_CONFIG_FSR_SHIFT);
+                       result = inv_mpu6050_write_reg(st,
+                                       st->reg->gyro_config, d);
+                       if (result)
+                               return result;
 
-       d = (fsr << INV_MPU6050_GYRO_CONFIG_FSR_SHIFT);
-       result = inv_mpu6050_write_reg(st, st->reg->gyro_config, d);
-       if (result)
-               return result;
-       st->chip_config.fsr = fsr;
+                       st->chip_config.fsr = i;
+                       return 0;
+               }
+       }
 
-       return 0;
+       return -EINVAL;
 }
 
-static int inv_mpu6050_write_accel_fs(struct inv_mpu6050_state *st, int fs)
+static int inv_mpu6050_write_accel_scale(struct inv_mpu6050_state *st, int val)
 {
-       int result;
+       int result, i;
        u8 d;
 
-       if (fs < 0 || fs > INV_MPU6050_MAX_ACCL_FS_PARAM)
-               return -EINVAL;
-       if (fs == st->chip_config.accl_fs)
-               return 0;
+       for (i = 0; i < ARRAY_SIZE(accel_scale); ++i) {
+               if (accel_scale[i] == val) {
+                       d = (i << INV_MPU6050_ACCL_CONFIG_FSR_SHIFT);
+                       result = inv_mpu6050_write_reg(st,
+                                       st->reg->accl_config, d);
+                       if (result)
+                               return result;
 
-       d = (fs << INV_MPU6050_ACCL_CONFIG_FSR_SHIFT);
-       result = inv_mpu6050_write_reg(st, st->reg->accl_config, d);
-       if (result)
-               return result;
-       st->chip_config.accl_fs = fs;
+                       st->chip_config.accl_fs = i;
+                       return 0;
+               }
+       }
 
-       return 0;
+       return -EINVAL;
 }
 
 static int inv_mpu6050_write_raw(struct iio_dev *indio_dev,
@@ -471,10 +475,10 @@ static int inv_mpu6050_write_raw(struct iio_dev *indio_dev,
        case IIO_CHAN_INFO_SCALE:
                switch (chan->type) {
                case IIO_ANGL_VEL:
-                       result = inv_mpu6050_write_fsr(st, val);
+                       result = inv_mpu6050_write_gyro_scale(st, val2);
                        break;
                case IIO_ACCEL:
-                       result = inv_mpu6050_write_accel_fs(st, val);
+                       result = inv_mpu6050_write_accel_scale(st, val2);
                        break;
                default:
                        result = -EINVAL;
@@ -780,7 +784,11 @@ static int inv_mpu_probe(struct i2c_client *client,
 
        i2c_set_clientdata(client, indio_dev);
        indio_dev->dev.parent = &client->dev;
-       indio_dev->name = id->name;
+       /* id will be NULL when enumerated via ACPI */
+       if (id)
+               indio_dev->name = (char *)id->name;
+       else
+               indio_dev->name = (char *)dev_name(&client->dev);
        indio_dev->channels = inv_mpu_channels;
        indio_dev->num_channels = ARRAY_SIZE(inv_mpu_channels);
 
index 0cd306a..ba27e27 100644 (file)
 #include <linux/poll.h>
 #include "inv_mpu_iio.h"
 
+static void inv_clear_kfifo(struct inv_mpu6050_state *st)
+{
+       unsigned long flags;
+
+       /* take the spin lock sem to avoid interrupt kick in */
+       spin_lock_irqsave(&st->time_stamp_lock, flags);
+       kfifo_reset(&st->timestamps);
+       spin_unlock_irqrestore(&st->time_stamp_lock, flags);
+}
+
 int inv_reset_fifo(struct iio_dev *indio_dev)
 {
        int result;
@@ -50,6 +60,10 @@ int inv_reset_fifo(struct iio_dev *indio_dev)
                                        INV_MPU6050_BIT_FIFO_RST);
        if (result)
                goto reset_fifo_fail;
+
+       /* clear timestamps fifo */
+       inv_clear_kfifo(st);
+
        /* enable interrupt */
        if (st->chip_config.accl_fifo_enable ||
            st->chip_config.gyro_fifo_enable) {
@@ -83,16 +97,6 @@ reset_fifo_fail:
        return result;
 }
 
-static void inv_clear_kfifo(struct inv_mpu6050_state *st)
-{
-       unsigned long flags;
-
-       /* take the spin lock sem to avoid interrupt kick in */
-       spin_lock_irqsave(&st->time_stamp_lock, flags);
-       kfifo_reset(&st->timestamps);
-       spin_unlock_irqrestore(&st->time_stamp_lock, flags);
-}
-
 /**
  * inv_mpu6050_irq_handler() - Cache a timestamp at each data ready interrupt.
  */
@@ -184,7 +188,6 @@ end_session:
 flush_fifo:
        /* Flush HW and SW FIFOs. */
        inv_reset_fifo(indio_dev);
-       inv_clear_kfifo(st);
        mutex_unlock(&indio_dev->mlock);
        iio_trigger_notify_done(indio_dev->trig);
 
index 5cc3692..b3a3637 100644 (file)
@@ -1227,7 +1227,7 @@ static irqreturn_t kmx61_trigger_handler(int irq, void *p)
                base = KMX61_MAG_XOUT_L;
 
        mutex_lock(&data->lock);
-       for_each_set_bit(bit, indio_dev->buffer->scan_mask,
+       for_each_set_bit(bit, indio_dev->active_scan_mask,
                         indio_dev->masklength) {
                ret = kmx61_read_measurement(data, base, bit);
                if (ret < 0) {
index aaba9d3..4df97f6 100644 (file)
@@ -847,8 +847,7 @@ static int iio_device_add_channel_sysfs(struct iio_dev *indio_dev,
  * @attr_list: List of IIO device attributes
  *
  * This function frees the memory allocated for each of the IIO device
- * attributes in the list. Note: if you want to reuse the list after calling
- * this function you have to reinitialize it using INIT_LIST_HEAD().
+ * attributes in the list.
  */
 void iio_free_chan_devattr_list(struct list_head *attr_list)
 {
@@ -856,6 +855,7 @@ void iio_free_chan_devattr_list(struct list_head *attr_list)
 
        list_for_each_entry_safe(p, n, attr_list, l) {
                kfree(p->dev_attr.attr.name);
+               list_del(&p->l);
                kfree(p);
        }
 }
@@ -936,6 +936,7 @@ static void iio_device_unregister_sysfs(struct iio_dev *indio_dev)
 
        iio_free_chan_devattr_list(&indio_dev->channel_attr_list);
        kfree(indio_dev->chan_attr_group.attrs);
+       indio_dev->chan_attr_group.attrs = NULL;
 }
 
 static void iio_dev_release(struct device *device)
index a4b3970..a99692b 100644 (file)
@@ -500,6 +500,7 @@ int iio_device_register_eventset(struct iio_dev *indio_dev)
 error_free_setup_event_lines:
        iio_free_chan_devattr_list(&indio_dev->event_interface->dev_attr_list);
        kfree(indio_dev->event_interface);
+       indio_dev->event_interface = NULL;
        return ret;
 }
 
index ae68c64..a224afd 100644 (file)
@@ -73,6 +73,7 @@ config CM36651
 config GP2AP020A00F
        tristate "Sharp GP2AP020A00F Proximity/ALS sensor"
        depends on I2C
+       select REGMAP_I2C
        select IIO_BUFFER
        select IIO_TRIGGERED_BUFFER
        select IRQ_WORK
@@ -126,6 +127,7 @@ config HID_SENSOR_PROX
 config JSA1212
        tristate "JSA1212 ALS and proximity sensor driver"
        depends on I2C
+       select REGMAP_I2C
        help
         Say Y here if you want to build a IIO driver for JSA1212
         proximity & ALS sensor device.
index 4c7a4c5..a5d6de7 100644 (file)
@@ -18,6 +18,8 @@ config AK8975
 
 config AK09911
        tristate "Asahi Kasei AK09911 3-axis Compass"
+       depends on I2C
+       depends on GPIOLIB
        select AK8975
        help
          Deprecated: AK09911 is now supported by AK8975 driver.
index 74dff4e..89fca3a 100644 (file)
@@ -494,7 +494,7 @@ static irqreturn_t sx9500_trigger_handler(int irq, void *private)
 
        mutex_lock(&data->mutex);
 
-       for_each_set_bit(bit, indio_dev->buffer->scan_mask,
+       for_each_set_bit(bit, indio_dev->active_scan_mask,
                         indio_dev->masklength) {
                ret = sx9500_read_proximity(data, &indio_dev->channels[bit],
                                            &val);
index aec7a6a..8c014b5 100644 (file)
@@ -99,6 +99,14 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        if (dmasync)
                dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
 
+       /*
+        * If the combination of the addr and size requested for this memory
+        * region causes an integer overflow, return error.
+        */
+       if ((PAGE_ALIGN(addr + size) <= size) ||
+           (PAGE_ALIGN(addr + size) <= addr))
+               return ERR_PTR(-EINVAL);
+
        if (!can_do_mlock())
                return ERR_PTR(-EPERM);
 
index c761971..5904026 100644 (file)
@@ -64,6 +64,14 @@ enum {
 #define GUID_TBL_BLK_NUM_ENTRIES 8
 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
 
+/* Counters should be saturate once they reach their maximum value */
+#define ASSIGN_32BIT_COUNTER(counter, value) do {\
+       if ((value) > U32_MAX)                   \
+               counter = cpu_to_be32(U32_MAX); \
+       else                                     \
+               counter = cpu_to_be32(value);    \
+} while (0)
+
 struct mlx4_mad_rcv_buf {
        struct ib_grh grh;
        u8 payload[256];
@@ -806,10 +814,14 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 static void edit_counter(struct mlx4_counter *cnt,
                                        struct ib_pma_portcounters *pma_cnt)
 {
-       pma_cnt->port_xmit_data = cpu_to_be32((be64_to_cpu(cnt->tx_bytes)>>2));
-       pma_cnt->port_rcv_data  = cpu_to_be32((be64_to_cpu(cnt->rx_bytes)>>2));
-       pma_cnt->port_xmit_packets = cpu_to_be32(be64_to_cpu(cnt->tx_frames));
-       pma_cnt->port_rcv_packets  = cpu_to_be32(be64_to_cpu(cnt->rx_frames));
+       ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
+                            (be64_to_cpu(cnt->tx_bytes) >> 2));
+       ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
+                            (be64_to_cpu(cnt->rx_bytes) >> 2));
+       ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
+                            be64_to_cpu(cnt->tx_frames));
+       ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
+                            be64_to_cpu(cnt->rx_frames));
 }
 
 static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
index ac6e2b7..b972c0b 100644 (file)
@@ -2697,8 +2697,12 @@ static void handle_bonded_port_state_event(struct work_struct *work)
        spin_lock_bh(&ibdev->iboe.lock);
        for (i = 0; i < MLX4_MAX_PORTS; ++i) {
                struct net_device *curr_netdev = ibdev->iboe.netdevs[i];
+               enum ib_port_state curr_port_state;
 
-               enum ib_port_state curr_port_state =
+               if (!curr_netdev)
+                       continue;
+
+               curr_port_state =
                        (netif_running(curr_netdev) &&
                         netif_carrier_ok(curr_netdev)) ?
                        IB_PORT_ACTIVE : IB_PORT_DOWN;
index 8ff612d..5639325 100644 (file)
@@ -411,9 +411,9 @@ static int tc3589x_keypad_probe(struct platform_device *pdev)
 
        input_set_drvdata(input, keypad);
 
-       error = request_threaded_irq(irq, NULL,
-                       tc3589x_keypad_irq, plat->irqtype,
-                       "tc3589x-keypad", keypad);
+       error = request_threaded_irq(irq, NULL, tc3589x_keypad_irq,
+                                    plat->irqtype | IRQF_ONESHOT,
+                                    "tc3589x-keypad", keypad);
        if (error < 0) {
                dev_err(&pdev->dev,
                                "Could not allocate irq %d,error %d\n",
index 59d4dcd..9822877 100644 (file)
@@ -187,6 +187,7 @@ static int mma8450_probe(struct i2c_client *c,
        idev->private           = m;
        idev->input->name       = MMA8450_DRV_NAME;
        idev->input->id.bustype = BUS_I2C;
+       idev->input->dev.parent = &c->dev;
        idev->poll              = mma8450_poll;
        idev->poll_interval     = POLL_INTERVAL;
        idev->poll_interval_max = POLL_INTERVAL_MAX;
index d28726a..27bcdbc 100644 (file)
@@ -1154,10 +1154,28 @@ out:
        mutex_unlock(&alps_mutex);
 }
 
-static void alps_report_bare_ps2_packet(struct input_dev *dev,
+static void alps_report_bare_ps2_packet(struct psmouse *psmouse,
                                        unsigned char packet[],
                                        bool report_buttons)
 {
+       struct alps_data *priv = psmouse->private;
+       struct input_dev *dev;
+
+       /* Figure out which device to use to report the bare packet */
+       if (priv->proto_version == ALPS_PROTO_V2 &&
+           (priv->flags & ALPS_DUALPOINT)) {
+               /* On V2 devices the DualPoint Stick reports bare packets */
+               dev = priv->dev2;
+       } else if (unlikely(IS_ERR_OR_NULL(priv->dev3))) {
+               /* Register dev3 mouse if we received PS/2 packet first time */
+               if (!IS_ERR(priv->dev3))
+                       psmouse_queue_work(psmouse, &priv->dev3_register_work,
+                                          0);
+               return;
+       } else {
+               dev = priv->dev3;
+       }
+
        if (report_buttons)
                alps_report_buttons(dev, NULL,
                                packet[0] & 1, packet[0] & 2, packet[0] & 4);
@@ -1232,8 +1250,8 @@ static psmouse_ret_t alps_handle_interleaved_ps2(struct psmouse *psmouse)
                 * de-synchronization.
                 */
 
-               alps_report_bare_ps2_packet(priv->dev2,
-                                           &psmouse->packet[3], false);
+               alps_report_bare_ps2_packet(psmouse, &psmouse->packet[3],
+                                           false);
 
                /*
                 * Continue with the standard ALPS protocol handling,
@@ -1289,18 +1307,9 @@ static psmouse_ret_t alps_process_byte(struct psmouse *psmouse)
         * properly we only do this if the device is fully synchronized.
         */
        if (!psmouse->out_of_sync_cnt && (psmouse->packet[0] & 0xc8) == 0x08) {
-
-               /* Register dev3 mouse if we received PS/2 packet first time */
-               if (unlikely(!priv->dev3))
-                       psmouse_queue_work(psmouse,
-                                          &priv->dev3_register_work, 0);
-
                if (psmouse->pktcnt == 3) {
-                       /* Once dev3 mouse device is registered report data */
-                       if (likely(!IS_ERR_OR_NULL(priv->dev3)))
-                               alps_report_bare_ps2_packet(priv->dev3,
-                                                           psmouse->packet,
-                                                           true);
+                       alps_report_bare_ps2_packet(psmouse, psmouse->packet,
+                                                   true);
                        return PSMOUSE_FULL_PACKET;
                }
                return PSMOUSE_GOOD_DATA;
@@ -2281,10 +2290,12 @@ static int alps_set_protocol(struct psmouse *psmouse,
                priv->set_abs_params = alps_set_abs_params_mt;
                priv->nibble_commands = alps_v3_nibble_commands;
                priv->addr_command = PSMOUSE_CMD_RESET_WRAP;
-               priv->x_max = 1360;
-               priv->y_max = 660;
                priv->x_bits = 23;
                priv->y_bits = 12;
+
+               if (alps_dolphin_get_device_area(psmouse, priv))
+                       return -EIO;
+
                break;
 
        case ALPS_PROTO_V6:
@@ -2303,9 +2314,8 @@ static int alps_set_protocol(struct psmouse *psmouse,
                priv->set_abs_params = alps_set_abs_params_mt;
                priv->nibble_commands = alps_v3_nibble_commands;
                priv->addr_command = PSMOUSE_CMD_RESET_WRAP;
-
-               if (alps_dolphin_get_device_area(psmouse, priv))
-                       return -EIO;
+               priv->x_max = 0xfff;
+               priv->y_max = 0x7ff;
 
                if (priv->fw_ver[1] != 0xba)
                        priv->flags |= ALPS_BUTTONPAD;
@@ -2605,8 +2615,10 @@ int alps_detect(struct psmouse *psmouse, bool set_properties)
                return -ENOMEM;
 
        error = alps_identify(psmouse, priv);
-       if (error)
+       if (error) {
+               kfree(priv);
                return error;
+       }
 
        if (set_properties) {
                psmouse->vendor = "ALPS";
index 77e9d70..1e2291c 100644 (file)
@@ -20,7 +20,7 @@
 #include <linux/input/mt.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/unaligned/access_ok.h>
+#include <asm/unaligned.h>
 #include "cyapa.h"
 
 
index ddf5393..5b611dd 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/mutex.h>
 #include <linux/completion.h>
 #include <linux/slab.h>
-#include <linux/unaligned/access_ok.h>
+#include <asm/unaligned.h>
 #include <linux/crc-itu-t.h>
 #include "cyapa.h"
 
@@ -1926,7 +1926,7 @@ static int cyapa_gen5_read_idac_data(struct cyapa *cyapa,
                                electrodes_tx = cyapa->electrodes_x;
                        max_element_cnt = ((cyapa->aligned_electrodes_rx + 7) &
                                                ~7u) * electrodes_tx;
-               } else if (idac_data_type == GEN5_RETRIEVE_SELF_CAP_PWC_DATA) {
+               } else {
                        offset = 2;
                        max_element_cnt = cyapa->electrodes_x +
                                                cyapa->electrodes_y;
index 757f78a..23d2594 100644 (file)
@@ -67,9 +67,6 @@ static void focaltech_reset(struct psmouse *psmouse)
 
 #define FOC_MAX_FINGERS 5
 
-#define FOC_MAX_X 2431
-#define FOC_MAX_Y 1663
-
 /*
  * Current state of a single finger on the touchpad.
  */
@@ -129,9 +126,17 @@ static void focaltech_report_state(struct psmouse *psmouse)
                input_mt_slot(dev, i);
                input_mt_report_slot_state(dev, MT_TOOL_FINGER, active);
                if (active) {
-                       input_report_abs(dev, ABS_MT_POSITION_X, finger->x);
+                       unsigned int clamped_x, clamped_y;
+                       /*
+                        * The touchpad might report invalid data, so we clamp
+                        * the resulting values so that we do not confuse
+                        * userspace.
+                        */
+                       clamped_x = clamp(finger->x, 0U, priv->x_max);
+                       clamped_y = clamp(finger->y, 0U, priv->y_max);
+                       input_report_abs(dev, ABS_MT_POSITION_X, clamped_x);
                        input_report_abs(dev, ABS_MT_POSITION_Y,
-                                        FOC_MAX_Y - finger->y);
+                                        priv->y_max - clamped_y);
                }
        }
        input_mt_report_pointer_emulation(dev, true);
@@ -180,16 +185,6 @@ static void focaltech_process_abs_packet(struct psmouse *psmouse,
 
        state->pressed = (packet[0] >> 4) & 1;
 
-       /*
-        * packet[5] contains some kind of tool size in the most
-        * significant nibble. 0xff is a special value (latching) that
-        * signals a large contact area.
-        */
-       if (packet[5] == 0xff) {
-               state->fingers[finger].valid = false;
-               return;
-       }
-
        state->fingers[finger].x = ((packet[1] & 0xf) << 8) | packet[2];
        state->fingers[finger].y = (packet[3] << 8) | packet[4];
        state->fingers[finger].valid = true;
@@ -381,6 +376,23 @@ static int focaltech_read_size(struct psmouse *psmouse)
 
        return 0;
 }
+
+void focaltech_set_resolution(struct psmouse *psmouse, unsigned int resolution)
+{
+       /* not supported yet */
+}
+
+static void focaltech_set_rate(struct psmouse *psmouse, unsigned int rate)
+{
+       /* not supported yet */
+}
+
+static void focaltech_set_scale(struct psmouse *psmouse,
+                               enum psmouse_scale scale)
+{
+       /* not supported yet */
+}
+
 int focaltech_init(struct psmouse *psmouse)
 {
        struct focaltech_data *priv;
@@ -415,6 +427,14 @@ int focaltech_init(struct psmouse *psmouse)
        psmouse->cleanup = focaltech_reset;
        /* resync is not supported yet */
        psmouse->resync_time = 0;
+       /*
+        * rate/resolution/scale changes are not supported yet, and
+        * the generic implementations of these functions seem to
+        * confuse some touchpads
+        */
+       psmouse->set_resolution = focaltech_set_resolution;
+       psmouse->set_rate = focaltech_set_rate;
+       psmouse->set_scale = focaltech_set_scale;
 
        return 0;
 
index 4ccd01d..8bc6123 100644 (file)
@@ -453,6 +453,17 @@ static void psmouse_set_rate(struct psmouse *psmouse, unsigned int rate)
        psmouse->rate = r;
 }
 
+/*
+ * Here we set the mouse scaling.
+ */
+
+static void psmouse_set_scale(struct psmouse *psmouse, enum psmouse_scale scale)
+{
+       ps2_command(&psmouse->ps2dev, NULL,
+                   scale == PSMOUSE_SCALE21 ? PSMOUSE_CMD_SETSCALE21 :
+                                              PSMOUSE_CMD_SETSCALE11);
+}
+
 /*
  * psmouse_poll() - default poll handler. Everyone except for ALPS uses it.
  */
@@ -689,6 +700,7 @@ static void psmouse_apply_defaults(struct psmouse *psmouse)
 
        psmouse->set_rate = psmouse_set_rate;
        psmouse->set_resolution = psmouse_set_resolution;
+       psmouse->set_scale = psmouse_set_scale;
        psmouse->poll = psmouse_poll;
        psmouse->protocol_handler = psmouse_process_byte;
        psmouse->pktsize = 3;
@@ -1160,7 +1172,7 @@ static void psmouse_initialize(struct psmouse *psmouse)
        if (psmouse_max_proto != PSMOUSE_PS2) {
                psmouse->set_rate(psmouse, psmouse->rate);
                psmouse->set_resolution(psmouse, psmouse->resolution);
-               ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_SETSCALE11);
+               psmouse->set_scale(psmouse, PSMOUSE_SCALE11);
        }
 }
 
index c2ff137..d02e1bd 100644 (file)
@@ -36,6 +36,11 @@ typedef enum {
        PSMOUSE_FULL_PACKET
 } psmouse_ret_t;
 
+enum psmouse_scale {
+       PSMOUSE_SCALE11,
+       PSMOUSE_SCALE21
+};
+
 struct psmouse {
        void *private;
        struct input_dev *dev;
@@ -67,6 +72,7 @@ struct psmouse {
        psmouse_ret_t (*protocol_handler)(struct psmouse *psmouse);
        void (*set_rate)(struct psmouse *psmouse, unsigned int rate);
        void (*set_resolution)(struct psmouse *psmouse, unsigned int resolution);
+       void (*set_scale)(struct psmouse *psmouse, enum psmouse_scale scale);
 
        int (*reconnect)(struct psmouse *psmouse);
        void (*disconnect)(struct psmouse *psmouse);
index f2cceb6..3b06c8a 100644 (file)
@@ -67,9 +67,6 @@
 #define X_MAX_POSITIVE 8176
 #define Y_MAX_POSITIVE 8176
 
-/* maximum ABS_MT_POSITION displacement (in mm) */
-#define DMAX 10
-
 /*****************************************************************************
  *     Stuff we need even when we do not want native Synaptics support
  ****************************************************************************/
@@ -123,32 +120,46 @@ void synaptics_reset(struct psmouse *psmouse)
 
 static bool cr48_profile_sensor;
 
+#define ANY_BOARD_ID 0
 struct min_max_quirk {
        const char * const *pnp_ids;
+       struct {
+               unsigned long int min, max;
+       } board_id;
        int x_min, x_max, y_min, y_max;
 };
 
 static const struct min_max_quirk min_max_pnpid_table[] = {
        {
                (const char * const []){"LEN0033", NULL},
+               {ANY_BOARD_ID, ANY_BOARD_ID},
                1024, 5052, 2258, 4832
        },
        {
-               (const char * const []){"LEN0035", "LEN0042", NULL},
+               (const char * const []){"LEN0042", NULL},
+               {ANY_BOARD_ID, ANY_BOARD_ID},
                1232, 5710, 1156, 4696
        },
        {
                (const char * const []){"LEN0034", "LEN0036", "LEN0037",
                                        "LEN0039", "LEN2002", "LEN2004",
                                        NULL},
+               {ANY_BOARD_ID, 2961},
                1024, 5112, 2024, 4832
        },
        {
                (const char * const []){"LEN2001", NULL},
+               {ANY_BOARD_ID, ANY_BOARD_ID},
                1024, 5022, 2508, 4832
        },
        {
                (const char * const []){"LEN2006", NULL},
+               {2691, 2691},
+               1024, 5045, 2457, 4832
+       },
+       {
+               (const char * const []){"LEN2006", NULL},
+               {ANY_BOARD_ID, ANY_BOARD_ID},
                1264, 5675, 1171, 4688
        },
        { }
@@ -175,9 +186,7 @@ static const char * const topbuttonpad_pnp_ids[] = {
        "LEN0041",
        "LEN0042", /* Yoga */
        "LEN0045",
-       "LEN0046",
        "LEN0047",
-       "LEN0048",
        "LEN0049",
        "LEN2000",
        "LEN2001", /* Edge E431 */
@@ -185,7 +194,7 @@ static const char * const topbuttonpad_pnp_ids[] = {
        "LEN2003",
        "LEN2004", /* L440 */
        "LEN2005",
-       "LEN2006",
+       "LEN2006", /* Edge E440/E540 */
        "LEN2007",
        "LEN2008",
        "LEN2009",
@@ -235,18 +244,39 @@ static int synaptics_model_id(struct psmouse *psmouse)
        return 0;
 }
 
+static int synaptics_more_extended_queries(struct psmouse *psmouse)
+{
+       struct synaptics_data *priv = psmouse->private;
+       unsigned char buf[3];
+
+       if (synaptics_send_cmd(psmouse, SYN_QUE_MEXT_CAPAB_10, buf))
+               return -1;
+
+       priv->ext_cap_10 = (buf[0]<<16) | (buf[1]<<8) | buf[2];
+
+       return 0;
+}
+
 /*
- * Read the board id from the touchpad
+ * Read the board id and the "More Extended Queries" from the touchpad
  * The board id is encoded in the "QUERY MODES" response
  */
-static int synaptics_board_id(struct psmouse *psmouse)
+static int synaptics_query_modes(struct psmouse *psmouse)
 {
        struct synaptics_data *priv = psmouse->private;
        unsigned char bid[3];
 
+       /* firmwares prior 7.5 have no board_id encoded */
+       if (SYN_ID_FULL(priv->identity) < 0x705)
+               return 0;
+
        if (synaptics_send_cmd(psmouse, SYN_QUE_MODES, bid))
                return -1;
        priv->board_id = ((bid[0] & 0xfc) << 6) | bid[1];
+
+       if (SYN_MEXT_CAP_BIT(bid[0]))
+               return synaptics_more_extended_queries(psmouse);
+
        return 0;
 }
 
@@ -346,7 +376,6 @@ static int synaptics_resolution(struct psmouse *psmouse)
 {
        struct synaptics_data *priv = psmouse->private;
        unsigned char resp[3];
-       int i;
 
        if (SYN_ID_MAJOR(priv->identity) < 4)
                return 0;
@@ -358,17 +387,6 @@ static int synaptics_resolution(struct psmouse *psmouse)
                }
        }
 
-       for (i = 0; min_max_pnpid_table[i].pnp_ids; i++) {
-               if (psmouse_matches_pnp_id(psmouse,
-                                          min_max_pnpid_table[i].pnp_ids)) {
-                       priv->x_min = min_max_pnpid_table[i].x_min;
-                       priv->x_max = min_max_pnpid_table[i].x_max;
-                       priv->y_min = min_max_pnpid_table[i].y_min;
-                       priv->y_max = min_max_pnpid_table[i].y_max;
-                       return 0;
-               }
-       }
-
        if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 5 &&
            SYN_CAP_MAX_DIMENSIONS(priv->ext_cap_0c)) {
                if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MAX_COORDS, resp)) {
@@ -377,23 +395,69 @@ static int synaptics_resolution(struct psmouse *psmouse)
                } else {
                        priv->x_max = (resp[0] << 5) | ((resp[1] & 0x0f) << 1);
                        priv->y_max = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3);
+                       psmouse_info(psmouse,
+                                    "queried max coordinates: x [..%d], y [..%d]\n",
+                                    priv->x_max, priv->y_max);
                }
        }
 
-       if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 7 &&
-           SYN_CAP_MIN_DIMENSIONS(priv->ext_cap_0c)) {
+       if (SYN_CAP_MIN_DIMENSIONS(priv->ext_cap_0c) &&
+           (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 7 ||
+            /*
+             * Firmware v8.1 does not report proper number of extended
+             * capabilities, but has been proven to report correct min
+             * coordinates.
+             */
+            SYN_ID_FULL(priv->identity) == 0x801)) {
                if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MIN_COORDS, resp)) {
                        psmouse_warn(psmouse,
                                     "device claims to have min coordinates query, but I'm not able to read it.\n");
                } else {
                        priv->x_min = (resp[0] << 5) | ((resp[1] & 0x0f) << 1);
                        priv->y_min = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3);
+                       psmouse_info(psmouse,
+                                    "queried min coordinates: x [%d..], y [%d..]\n",
+                                    priv->x_min, priv->y_min);
                }
        }
 
        return 0;
 }
 
+/*
+ * Apply quirk(s) if the hardware matches
+ */
+
+static void synaptics_apply_quirks(struct psmouse *psmouse)
+{
+       struct synaptics_data *priv = psmouse->private;
+       int i;
+
+       for (i = 0; min_max_pnpid_table[i].pnp_ids; i++) {
+               if (!psmouse_matches_pnp_id(psmouse,
+                                           min_max_pnpid_table[i].pnp_ids))
+                       continue;
+
+               if (min_max_pnpid_table[i].board_id.min != ANY_BOARD_ID &&
+                   priv->board_id < min_max_pnpid_table[i].board_id.min)
+                       continue;
+
+               if (min_max_pnpid_table[i].board_id.max != ANY_BOARD_ID &&
+                   priv->board_id > min_max_pnpid_table[i].board_id.max)
+                       continue;
+
+               priv->x_min = min_max_pnpid_table[i].x_min;
+               priv->x_max = min_max_pnpid_table[i].x_max;
+               priv->y_min = min_max_pnpid_table[i].y_min;
+               priv->y_max = min_max_pnpid_table[i].y_max;
+               psmouse_info(psmouse,
+                            "quirked min/max coordinates: x [%d..%d], y [%d..%d]\n",
+                            priv->x_min, priv->x_max,
+                            priv->y_min, priv->y_max);
+               break;
+       }
+}
+
 static int synaptics_query_hardware(struct psmouse *psmouse)
 {
        if (synaptics_identify(psmouse))
@@ -402,13 +466,15 @@ static int synaptics_query_hardware(struct psmouse *psmouse)
                return -1;
        if (synaptics_firmware_id(psmouse))
                return -1;
-       if (synaptics_board_id(psmouse))
+       if (synaptics_query_modes(psmouse))
                return -1;
        if (synaptics_capability(psmouse))
                return -1;
        if (synaptics_resolution(psmouse))
                return -1;
 
+       synaptics_apply_quirks(psmouse);
+
        return 0;
 }
 
@@ -516,18 +582,22 @@ static int synaptics_is_pt_packet(unsigned char *buf)
        return (buf[0] & 0xFC) == 0x84 && (buf[3] & 0xCC) == 0xC4;
 }
 
-static void synaptics_pass_pt_packet(struct serio *ptport, unsigned char *packet)
+static void synaptics_pass_pt_packet(struct psmouse *psmouse,
+                                    struct serio *ptport,
+                                    unsigned char *packet)
 {
+       struct synaptics_data *priv = psmouse->private;
        struct psmouse *child = serio_get_drvdata(ptport);
 
        if (child && child->state == PSMOUSE_ACTIVATED) {
-               serio_interrupt(ptport, packet[1], 0);
+               serio_interrupt(ptport, packet[1] | priv->pt_buttons, 0);
                serio_interrupt(ptport, packet[4], 0);
                serio_interrupt(ptport, packet[5], 0);
                if (child->pktsize == 4)
                        serio_interrupt(ptport, packet[2], 0);
-       } else
+       } else {
                serio_interrupt(ptport, packet[1], 0);
+       }
 }
 
 static void synaptics_pt_activate(struct psmouse *psmouse)
@@ -605,6 +675,18 @@ static void synaptics_parse_agm(const unsigned char buf[],
        }
 }
 
+static void synaptics_parse_ext_buttons(const unsigned char buf[],
+                                       struct synaptics_data *priv,
+                                       struct synaptics_hw_state *hw)
+{
+       unsigned int ext_bits =
+               (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) + 1) >> 1;
+       unsigned int ext_mask = GENMASK(ext_bits - 1, 0);
+
+       hw->ext_buttons = buf[4] & ext_mask;
+       hw->ext_buttons |= (buf[5] & ext_mask) << ext_bits;
+}
+
 static bool is_forcepad;
 
 static int synaptics_parse_hw_state(const unsigned char buf[],
@@ -691,28 +773,9 @@ static int synaptics_parse_hw_state(const unsigned char buf[],
                        hw->down = ((buf[0] ^ buf[3]) & 0x02) ? 1 : 0;
                }
 
-               if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) &&
+               if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) > 0 &&
                    ((buf[0] ^ buf[3]) & 0x02)) {
-                       switch (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) & ~0x01) {
-                       default:
-                               /*
-                                * if nExtBtn is greater than 8 it should be
-                                * considered invalid and treated as 0
-                                */
-                               break;
-                       case 8:
-                               hw->ext_buttons |= ((buf[5] & 0x08)) ? 0x80 : 0;
-                               hw->ext_buttons |= ((buf[4] & 0x08)) ? 0x40 : 0;
-                       case 6:
-                               hw->ext_buttons |= ((buf[5] & 0x04)) ? 0x20 : 0;
-                               hw->ext_buttons |= ((buf[4] & 0x04)) ? 0x10 : 0;
-                       case 4:
-                               hw->ext_buttons |= ((buf[5] & 0x02)) ? 0x08 : 0;
-                               hw->ext_buttons |= ((buf[4] & 0x02)) ? 0x04 : 0;
-                       case 2:
-                               hw->ext_buttons |= ((buf[5] & 0x01)) ? 0x02 : 0;
-                               hw->ext_buttons |= ((buf[4] & 0x01)) ? 0x01 : 0;
-                       }
+                       synaptics_parse_ext_buttons(buf, priv, hw);
                }
        } else {
                hw->x = (((buf[1] & 0x1f) << 8) | buf[2]);
@@ -774,12 +837,54 @@ static void synaptics_report_semi_mt_data(struct input_dev *dev,
        }
 }
 
+static void synaptics_report_ext_buttons(struct psmouse *psmouse,
+                                        const struct synaptics_hw_state *hw)
+{
+       struct input_dev *dev = psmouse->dev;
+       struct synaptics_data *priv = psmouse->private;
+       int ext_bits = (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) + 1) >> 1;
+       char buf[6] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+       int i;
+
+       if (!SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap))
+               return;
+
+       /* Bug in FW 8.1, buttons are reported only when ExtBit is 1 */
+       if (SYN_ID_FULL(priv->identity) == 0x801 &&
+           !((psmouse->packet[0] ^ psmouse->packet[3]) & 0x02))
+               return;
+
+       if (!SYN_CAP_EXT_BUTTONS_STICK(priv->ext_cap_10)) {
+               for (i = 0; i < ext_bits; i++) {
+                       input_report_key(dev, BTN_0 + 2 * i,
+                               hw->ext_buttons & (1 << i));
+                       input_report_key(dev, BTN_1 + 2 * i,
+                               hw->ext_buttons & (1 << (i + ext_bits)));
+               }
+               return;
+       }
+
+       /*
+        * This generation of touchpads has the trackstick buttons
+        * physically wired to the touchpad. Re-route them through
+        * the pass-through interface.
+        */
+       if (!priv->pt_port)
+               return;
+
+       /* The trackstick expects at most 3 buttons */
+       priv->pt_buttons = SYN_CAP_EXT_BUTTON_STICK_L(hw->ext_buttons)      |
+                          SYN_CAP_EXT_BUTTON_STICK_R(hw->ext_buttons) << 1 |
+                          SYN_CAP_EXT_BUTTON_STICK_M(hw->ext_buttons) << 2;
+
+       synaptics_pass_pt_packet(psmouse, priv->pt_port, buf);
+}
+
 static void synaptics_report_buttons(struct psmouse *psmouse,
                                     const struct synaptics_hw_state *hw)
 {
        struct input_dev *dev = psmouse->dev;
        struct synaptics_data *priv = psmouse->private;
-       int i;
 
        input_report_key(dev, BTN_LEFT, hw->left);
        input_report_key(dev, BTN_RIGHT, hw->right);
@@ -792,8 +897,7 @@ static void synaptics_report_buttons(struct psmouse *psmouse,
                input_report_key(dev, BTN_BACK, hw->down);
        }
 
-       for (i = 0; i < SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap); i++)
-               input_report_key(dev, BTN_0 + i, hw->ext_buttons & (1 << i));
+       synaptics_report_ext_buttons(psmouse, hw);
 }
 
 static void synaptics_report_mt_data(struct psmouse *psmouse,
@@ -813,7 +917,7 @@ static void synaptics_report_mt_data(struct psmouse *psmouse,
                pos[i].y = synaptics_invert_y(hw[i]->y);
        }
 
-       input_mt_assign_slots(dev, slot, pos, nsemi, DMAX * priv->x_res);
+       input_mt_assign_slots(dev, slot, pos, nsemi, 0);
 
        for (i = 0; i < nsemi; i++) {
                input_mt_slot(dev, slot[i]);
@@ -1014,7 +1118,8 @@ static psmouse_ret_t synaptics_process_byte(struct psmouse *psmouse)
                if (SYN_CAP_PASS_THROUGH(priv->capabilities) &&
                    synaptics_is_pt_packet(psmouse->packet)) {
                        if (priv->pt_port)
-                               synaptics_pass_pt_packet(priv->pt_port, psmouse->packet);
+                               synaptics_pass_pt_packet(psmouse, priv->pt_port,
+                                                        psmouse->packet);
                } else
                        synaptics_process_packet(psmouse);
 
@@ -1116,8 +1221,9 @@ static void set_input_params(struct psmouse *psmouse,
                __set_bit(BTN_BACK, dev->keybit);
        }
 
-       for (i = 0; i < SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap); i++)
-               __set_bit(BTN_0 + i, dev->keybit);
+       if (!SYN_CAP_EXT_BUTTONS_STICK(priv->ext_cap_10))
+               for (i = 0; i < SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap); i++)
+                       __set_bit(BTN_0 + i, dev->keybit);
 
        __clear_bit(EV_REL, dev->evbit);
        __clear_bit(REL_X, dev->relbit);
@@ -1125,7 +1231,8 @@ static void set_input_params(struct psmouse *psmouse,
 
        if (SYN_CAP_CLICKPAD(priv->ext_cap_0c)) {
                __set_bit(INPUT_PROP_BUTTONPAD, dev->propbit);
-               if (psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids))
+               if (psmouse_matches_pnp_id(psmouse, topbuttonpad_pnp_ids) &&
+                   !SYN_CAP_EXT_BUTTONS_STICK(priv->ext_cap_10))
                        __set_bit(INPUT_PROP_TOPBUTTONPAD, dev->propbit);
                /* Clickpads report only left button */
                __clear_bit(BTN_RIGHT, dev->keybit);
index aedc329..ee4bd0d 100644 (file)
@@ -22,6 +22,7 @@
 #define SYN_QUE_EXT_CAPAB_0C           0x0c
 #define SYN_QUE_EXT_MAX_COORDS         0x0d
 #define SYN_QUE_EXT_MIN_COORDS         0x0f
+#define SYN_QUE_MEXT_CAPAB_10          0x10
 
 /* synatics modes */
 #define SYN_BIT_ABSOLUTE_MODE          (1 << 7)
@@ -53,6 +54,7 @@
 #define SYN_EXT_CAP_REQUESTS(c)                (((c) & 0x700000) >> 20)
 #define SYN_CAP_MULTI_BUTTON_NO(ec)    (((ec) & 0x00f000) >> 12)
 #define SYN_CAP_PRODUCT_ID(ec)         (((ec) & 0xff0000) >> 16)
+#define SYN_MEXT_CAP_BIT(m)            ((m) & (1 << 1))
 
 /*
  * The following describes response for the 0x0c query.
 #define SYN_CAP_REDUCED_FILTERING(ex0c)        ((ex0c) & 0x000400)
 #define SYN_CAP_IMAGE_SENSOR(ex0c)     ((ex0c) & 0x000800)
 
+/*
+ * The following descibes response for the 0x10 query.
+ *
+ * byte        mask    name                    meaning
+ * ----        ----    -------                 ------------
+ * 1   0x01    ext buttons are stick   buttons exported in the extended
+ *                                     capability are actually meant to be used
+ *                                     by the tracktick (pass-through).
+ * 1   0x02    SecurePad               the touchpad is a SecurePad, so it
+ *                                     contains a built-in fingerprint reader.
+ * 1   0xe0    more ext count          how many more extented queries are
+ *                                     available after this one.
+ * 2   0xff    SecurePad width         the width of the SecurePad fingerprint
+ *                                     reader.
+ * 3   0xff    SecurePad height        the height of the SecurePad fingerprint
+ *                                     reader.
+ */
+#define SYN_CAP_EXT_BUTTONS_STICK(ex10)        ((ex10) & 0x010000)
+#define SYN_CAP_SECUREPAD(ex10)                ((ex10) & 0x020000)
+
+#define SYN_CAP_EXT_BUTTON_STICK_L(eb) (!!((eb) & 0x01))
+#define SYN_CAP_EXT_BUTTON_STICK_M(eb) (!!((eb) & 0x02))
+#define SYN_CAP_EXT_BUTTON_STICK_R(eb) (!!((eb) & 0x04))
+
 /* synaptics modes query bits */
 #define SYN_MODE_ABSOLUTE(m)           ((m) & (1 << 7))
 #define SYN_MODE_RATE(m)               ((m) & (1 << 6))
@@ -143,6 +169,7 @@ struct synaptics_data {
        unsigned long int capabilities;         /* Capabilities */
        unsigned long int ext_cap;              /* Extended Capabilities */
        unsigned long int ext_cap_0c;           /* Ext Caps from 0x0c query */
+       unsigned long int ext_cap_10;           /* Ext Caps from 0x10 query */
        unsigned long int identity;             /* Identification */
        unsigned int x_res, y_res;              /* X/Y resolution in units/mm */
        unsigned int x_max, y_max;              /* Max coordinates (from FW) */
@@ -156,6 +183,7 @@ struct synaptics_data {
        bool disable_gesture;                   /* disable gestures */
 
        struct serio *pt_port;                  /* Pass-through serio port */
+       unsigned char pt_buttons;               /* Pass-through buttons */
 
        /*
         * Last received Advanced Gesture Mode (AGM) packet. An AGM packet
index 5891752..6261fd6 100644 (file)
@@ -943,6 +943,7 @@ config TOUCHSCREEN_SUN4I
        tristate "Allwinner sun4i resistive touchscreen controller support"
        depends on ARCH_SUNXI || COMPILE_TEST
        depends on HWMON
+       depends on THERMAL || !THERMAL_OF
        help
          This selects support for the resistive touchscreen controller
          found on Allwinner sunxi SoCs.
index baa0d97..1ae4e54 100644 (file)
@@ -23,6 +23,7 @@ config IOMMU_IO_PGTABLE
 config IOMMU_IO_PGTABLE_LPAE
        bool "ARMv7/v8 Long Descriptor Format"
        select IOMMU_IO_PGTABLE
+       depends on ARM || ARM64 || COMPILE_TEST
        help
          Enable support for the ARM long descriptor pagetable format.
          This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page
@@ -63,6 +64,7 @@ config MSM_IOMMU
        bool "MSM IOMMU Support"
        depends on ARM
        depends on ARCH_MSM8X60 || ARCH_MSM8960 || COMPILE_TEST
+       depends on BROKEN
        select IOMMU_API
        help
          Support for the IOMMUs found on certain Qualcomm SOCs.
index fc13dd5..a3adde6 100644 (file)
@@ -1288,10 +1288,13 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
                return 0;
 
        spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
-       if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS)
+       if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
+                       smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
                ret = arm_smmu_iova_to_phys_hard(domain, iova);
-       else
+       } else {
                ret = ops->iova_to_phys(ops, iova);
+       }
+
        spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
 
        return ret;
@@ -1556,7 +1559,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
                return -ENODEV;
        }
 
-       if (smmu->version == 1 || (!(id & ID0_ATOSNS) && (id & ID0_S1TS))) {
+       if ((id & ID0_S1TS) && ((smmu->version == 1) || (id & ID0_ATOSNS))) {
                smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
                dev_notice(smmu->dev, "\taddress translation ops\n");
        }
index 7ce5273..dc14fec 100644 (file)
@@ -1186,8 +1186,15 @@ static const struct iommu_ops exynos_iommu_ops = {
 
 static int __init exynos_iommu_init(void)
 {
+       struct device_node *np;
        int ret;
 
+       np = of_find_matching_node(NULL, sysmmu_of_match);
+       if (!np)
+               return 0;
+
+       of_node_put(np);
+
        lv2table_kmem_cache = kmem_cache_create("exynos-iommu-lv2table",
                                LV2TABLE_SIZE, LV2TABLE_SIZE, 0, NULL);
        if (!lv2table_kmem_cache) {
index ae4c1a8..2d1e05b 100644 (file)
@@ -1742,9 +1742,8 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
 
 static void domain_exit(struct dmar_domain *domain)
 {
-       struct dmar_drhd_unit *drhd;
-       struct intel_iommu *iommu;
        struct page *freelist = NULL;
+       int i;
 
        /* Domain 0 is reserved, so dont process it */
        if (!domain)
@@ -1764,8 +1763,8 @@ static void domain_exit(struct dmar_domain *domain)
 
        /* clear attached or cached domains */
        rcu_read_lock();
-       for_each_active_iommu(iommu, drhd)
-               iommu_detach_domain(domain, iommu);
+       for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus)
+               iommu_detach_domain(domain, g_iommus[i]);
        rcu_read_unlock();
 
        dma_free_pagelist(freelist);
index 5a500ed..b610a8d 100644 (file)
@@ -56,7 +56,8 @@
        ((((d)->levels - ((l) - ARM_LPAE_START_LVL(d) + 1))             \
          * (d)->bits_per_level) + (d)->pg_shift)
 
-#define ARM_LPAE_PAGES_PER_PGD(d)      ((d)->pgd_size >> (d)->pg_shift)
+#define ARM_LPAE_PAGES_PER_PGD(d)                                      \
+       DIV_ROUND_UP((d)->pgd_size, 1UL << (d)->pg_shift)
 
 /*
  * Calculate the index at level l used to map virtual address a using the
@@ -66,7 +67,7 @@
        ((l) == ARM_LPAE_START_LVL(d) ? ilog2(ARM_LPAE_PAGES_PER_PGD(d)) : 0)
 
 #define ARM_LPAE_LVL_IDX(a,l,d)                                                \
-       (((a) >> ARM_LPAE_LVL_SHIFT(l,d)) &                             \
+       (((u64)(a) >> ARM_LPAE_LVL_SHIFT(l,d)) &                        \
         ((1 << ((d)->bits_per_level + ARM_LPAE_PGD_IDX(l,d))) - 1))
 
 /* Calculate the block/page mapping size at level l for pagetable in d. */
index 10186ca..bc39bdf 100644 (file)
@@ -851,6 +851,7 @@ static int ipmmu_remove(struct platform_device *pdev)
 
 static const struct of_device_id ipmmu_of_ids[] = {
        { .compatible = "renesas,ipmmu-vmsa", },
+       { }
 };
 
 static struct platform_driver ipmmu_driver = {
index f59f857..a4ba851 100644 (file)
@@ -1376,6 +1376,13 @@ static int __init omap_iommu_init(void)
        struct kmem_cache *p;
        const unsigned long flags = SLAB_HWCACHE_ALIGN;
        size_t align = 1 << 10; /* L2 pagetable alignement */
+       struct device_node *np;
+
+       np = of_find_matching_node(NULL, omap_iommu_of_match);
+       if (!np)
+               return 0;
+
+       of_node_put(np);
 
        p = kmem_cache_create("iopte_cache", IOPTE_TABLE_SIZE, align, flags,
                              iopte_cachep_ctor);
index 6a8b1ec..9f74fdd 100644 (file)
@@ -1015,8 +1015,15 @@ static struct platform_driver rk_iommu_driver = {
 
 static int __init rk_iommu_init(void)
 {
+       struct device_node *np;
        int ret;
 
+       np = of_find_matching_node(NULL, rk_iommu_dt_ids);
+       if (!np)
+               return 0;
+
+       of_node_put(np);
+
        ret = bus_set_iommu(&platform_bus_type, &rk_iommu_ops);
        if (ret)
                return ret;
index 463c235..4387dae 100644 (file)
@@ -69,6 +69,7 @@ static void __iomem *per_cpu_int_base;
 static void __iomem *main_int_base;
 static struct irq_domain *armada_370_xp_mpic_domain;
 static u32 doorbell_mask_reg;
+static int parent_irq;
 #ifdef CONFIG_PCI_MSI
 static struct irq_domain *armada_370_xp_msi_domain;
 static DECLARE_BITMAP(msi_used, PCI_MSI_DOORBELL_NR);
@@ -356,6 +357,7 @@ static int armada_xp_mpic_secondary_init(struct notifier_block *nfb,
 {
        if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
                armada_xp_mpic_smp_cpu_init();
+
        return NOTIFY_OK;
 }
 
@@ -364,6 +366,20 @@ static struct notifier_block armada_370_xp_mpic_cpu_notifier = {
        .priority = 100,
 };
 
+static int mpic_cascaded_secondary_init(struct notifier_block *nfb,
+                                       unsigned long action, void *hcpu)
+{
+       if (action == CPU_STARTING || action == CPU_STARTING_FROZEN)
+               enable_percpu_irq(parent_irq, IRQ_TYPE_NONE);
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block mpic_cascaded_cpu_notifier = {
+       .notifier_call = mpic_cascaded_secondary_init,
+       .priority = 100,
+};
+
 #endif /* CONFIG_SMP */
 
 static struct irq_domain_ops armada_370_xp_mpic_irq_ops = {
@@ -539,7 +555,7 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
                                             struct device_node *parent)
 {
        struct resource main_int_res, per_cpu_int_res;
-       int parent_irq, nr_irqs, i;
+       int nr_irqs, i;
        u32 control;
 
        BUG_ON(of_address_to_resource(node, 0, &main_int_res));
@@ -587,6 +603,9 @@ static int __init armada_370_xp_mpic_of_init(struct device_node *node,
                register_cpu_notifier(&armada_370_xp_mpic_cpu_notifier);
 #endif
        } else {
+#ifdef CONFIG_SMP
+               register_cpu_notifier(&mpic_cascaded_cpu_notifier);
+#endif
                irq_set_chained_handler(parent_irq,
                                        armada_370_xp_mpic_handle_cascade_irq);
        }
index d8996bd..9687f8a 100644 (file)
@@ -169,7 +169,7 @@ static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
 
 static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
 {
-       cmd->raw_cmd[0] &= ~(0xffffUL << 32);
+       cmd->raw_cmd[0] &= BIT_ULL(32) - 1;
        cmd->raw_cmd[0] |= ((u64)devid) << 32;
 }
 
@@ -416,13 +416,14 @@ static void its_send_single_command(struct its_node *its,
 {
        struct its_cmd_block *cmd, *sync_cmd, *next_cmd;
        struct its_collection *sync_col;
+       unsigned long flags;
 
-       raw_spin_lock(&its->lock);
+       raw_spin_lock_irqsave(&its->lock, flags);
 
        cmd = its_allocate_entry(its);
        if (!cmd) {             /* We're soooooo screewed... */
                pr_err_ratelimited("ITS can't allocate, dropping command\n");
-               raw_spin_unlock(&its->lock);
+               raw_spin_unlock_irqrestore(&its->lock, flags);
                return;
        }
        sync_col = builder(cmd, desc);
@@ -442,7 +443,7 @@ static void its_send_single_command(struct its_node *its,
 
 post:
        next_cmd = its_post_commands(its);
-       raw_spin_unlock(&its->lock);
+       raw_spin_unlock_irqrestore(&its->lock, flags);
 
        its_wait_for_range_completion(its, cmd, next_cmd);
 }
@@ -799,21 +800,44 @@ static int its_alloc_tables(struct its_node *its)
 {
        int err;
        int i;
-       int psz = PAGE_SIZE;
+       int psz = SZ_64K;
        u64 shr = GITS_BASER_InnerShareable;
+       u64 cache = GITS_BASER_WaWb;
 
        for (i = 0; i < GITS_BASER_NR_REGS; i++) {
                u64 val = readq_relaxed(its->base + GITS_BASER + i * 8);
                u64 type = GITS_BASER_TYPE(val);
                u64 entry_size = GITS_BASER_ENTRY_SIZE(val);
+               int order = get_order(psz);
+               int alloc_size;
                u64 tmp;
                void *base;
 
                if (type == GITS_BASER_TYPE_NONE)
                        continue;
 
-               /* We're lazy and only allocate a single page for now */
-               base = (void *)get_zeroed_page(GFP_KERNEL);
+               /*
+                * Allocate as many entries as required to fit the
+                * range of device IDs that the ITS can grok... The ID
+                * space being incredibly sparse, this results in a
+                * massive waste of memory.
+                *
+                * For other tables, only allocate a single page.
+                */
+               if (type == GITS_BASER_TYPE_DEVICE) {
+                       u64 typer = readq_relaxed(its->base + GITS_TYPER);
+                       u32 ids = GITS_TYPER_DEVBITS(typer);
+
+                       order = get_order((1UL << ids) * entry_size);
+                       if (order >= MAX_ORDER) {
+                               order = MAX_ORDER - 1;
+                               pr_warn("%s: Device Table too large, reduce its page order to %u\n",
+                                       its->msi_chip.of_node->full_name, order);
+                       }
+               }
+
+               alloc_size = (1 << order) * PAGE_SIZE;
+               base = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
                if (!base) {
                        err = -ENOMEM;
                        goto out_free;
@@ -825,7 +849,7 @@ retry_baser:
                val = (virt_to_phys(base)                                |
                       (type << GITS_BASER_TYPE_SHIFT)                   |
                       ((entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) |
-                      GITS_BASER_WaWb                                   |
+                      cache                                             |
                       shr                                               |
                       GITS_BASER_VALID);
 
@@ -841,7 +865,7 @@ retry_baser:
                        break;
                }
 
-               val |= (PAGE_SIZE / psz) - 1;
+               val |= (alloc_size / psz) - 1;
 
                writeq_relaxed(val, its->base + GITS_BASER + i * 8);
                tmp = readq_relaxed(its->base + GITS_BASER + i * 8);
@@ -851,9 +875,12 @@ retry_baser:
                         * Shareability didn't stick. Just use
                         * whatever the read reported, which is likely
                         * to be the only thing this redistributor
-                        * supports.
+                        * supports. If that's zero, make it
+                        * non-cacheable as well.
                         */
                        shr = tmp & GITS_BASER_SHAREABILITY_MASK;
+                       if (!shr)
+                               cache = GITS_BASER_nC;
                        goto retry_baser;
                }
 
@@ -882,7 +909,7 @@ retry_baser:
                }
 
                pr_info("ITS: allocated %d %s @%lx (psz %dK, shr %d)\n",
-                       (int)(PAGE_SIZE / entry_size),
+                       (int)(alloc_size / entry_size),
                        its_base_type_string[type],
                        (unsigned long)virt_to_phys(base),
                        psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
@@ -957,16 +984,39 @@ static void its_cpu_init_lpis(void)
        tmp = readq_relaxed(rbase + GICR_PROPBASER);
 
        if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) {
+               if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) {
+                       /*
+                        * The HW reports non-shareable, we must
+                        * remove the cacheability attributes as
+                        * well.
+                        */
+                       val &= ~(GICR_PROPBASER_SHAREABILITY_MASK |
+                                GICR_PROPBASER_CACHEABILITY_MASK);
+                       val |= GICR_PROPBASER_nC;
+                       writeq_relaxed(val, rbase + GICR_PROPBASER);
+               }
                pr_info_once("GIC: using cache flushing for LPI property table\n");
                gic_rdists->flags |= RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING;
        }
 
        /* set PENDBASE */
        val = (page_to_phys(pend_page) |
-              GICR_PROPBASER_InnerShareable |
-              GICR_PROPBASER_WaWb);
+              GICR_PENDBASER_InnerShareable |
+              GICR_PENDBASER_WaWb);
 
        writeq_relaxed(val, rbase + GICR_PENDBASER);
+       tmp = readq_relaxed(rbase + GICR_PENDBASER);
+
+       if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) {
+               /*
+                * The HW reports non-shareable, we must remove the
+                * cacheability attributes as well.
+                */
+               val &= ~(GICR_PENDBASER_SHAREABILITY_MASK |
+                        GICR_PENDBASER_CACHEABILITY_MASK);
+               val |= GICR_PENDBASER_nC;
+               writeq_relaxed(val, rbase + GICR_PENDBASER);
+       }
 
        /* Enable LPIs */
        val = readl_relaxed(rbase + GICR_CTLR);
@@ -1003,7 +1053,7 @@ static void its_cpu_init_collection(void)
                         * This ITS wants a linear CPU number.
                         */
                        target = readq_relaxed(gic_data_rdist_rd_base() + GICR_TYPER);
-                       target = GICR_TYPER_CPU_NUMBER(target);
+                       target = GICR_TYPER_CPU_NUMBER(target) << 16;
                }
 
                /* Perform collection mapping */
@@ -1020,8 +1070,9 @@ static void its_cpu_init_collection(void)
 static struct its_device *its_find_device(struct its_node *its, u32 dev_id)
 {
        struct its_device *its_dev = NULL, *tmp;
+       unsigned long flags;
 
-       raw_spin_lock(&its->lock);
+       raw_spin_lock_irqsave(&its->lock, flags);
 
        list_for_each_entry(tmp, &its->its_device_list, entry) {
                if (tmp->device_id == dev_id) {
@@ -1030,7 +1081,7 @@ static struct its_device *its_find_device(struct its_node *its, u32 dev_id)
                }
        }
 
-       raw_spin_unlock(&its->lock);
+       raw_spin_unlock_irqrestore(&its->lock, flags);
 
        return its_dev;
 }
@@ -1040,6 +1091,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 {
        struct its_device *dev;
        unsigned long *lpi_map;
+       unsigned long flags;
        void *itt;
        int lpi_base;
        int nr_lpis;
@@ -1056,7 +1108,7 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
        nr_ites = max(2UL, roundup_pow_of_two(nvecs));
        sz = nr_ites * its->ite_size;
        sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1;
-       itt = kmalloc(sz, GFP_KERNEL);
+       itt = kzalloc(sz, GFP_KERNEL);
        lpi_map = its_lpi_alloc_chunks(nvecs, &lpi_base, &nr_lpis);
 
        if (!dev || !itt || !lpi_map) {
@@ -1075,9 +1127,9 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
        dev->device_id = dev_id;
        INIT_LIST_HEAD(&dev->entry);
 
-       raw_spin_lock(&its->lock);
+       raw_spin_lock_irqsave(&its->lock, flags);
        list_add(&dev->entry, &its->its_device_list);
-       raw_spin_unlock(&its->lock);
+       raw_spin_unlock_irqrestore(&its->lock, flags);
 
        /* Bind the device to the first possible CPU */
        cpu = cpumask_first(cpu_online_mask);
@@ -1091,9 +1143,11 @@ static struct its_device *its_create_device(struct its_node *its, u32 dev_id,
 
 static void its_free_device(struct its_device *its_dev)
 {
-       raw_spin_lock(&its_dev->its->lock);
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&its_dev->its->lock, flags);
        list_del(&its_dev->entry);
-       raw_spin_unlock(&its_dev->its->lock);
+       raw_spin_unlock_irqrestore(&its_dev->its->lock, flags);
        kfree(its_dev->itt);
        kfree(its_dev);
 }
@@ -1112,31 +1166,69 @@ static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq)
        return 0;
 }
 
+struct its_pci_alias {
+       struct pci_dev  *pdev;
+       u32             dev_id;
+       u32             count;
+};
+
+static int its_pci_msi_vec_count(struct pci_dev *pdev)
+{
+       int msi, msix;
+
+       msi = max(pci_msi_vec_count(pdev), 0);
+       msix = max(pci_msix_vec_count(pdev), 0);
+
+       return max(msi, msix);
+}
+
+static int its_get_pci_alias(struct pci_dev *pdev, u16 alias, void *data)
+{
+       struct its_pci_alias *dev_alias = data;
+
+       dev_alias->dev_id = alias;
+       if (pdev != dev_alias->pdev)
+               dev_alias->count += its_pci_msi_vec_count(dev_alias->pdev);
+
+       return 0;
+}
+
 static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
                           int nvec, msi_alloc_info_t *info)
 {
        struct pci_dev *pdev;
        struct its_node *its;
-       u32 dev_id;
        struct its_device *its_dev;
+       struct its_pci_alias dev_alias;
 
        if (!dev_is_pci(dev))
                return -EINVAL;
 
        pdev = to_pci_dev(dev);
-       dev_id = PCI_DEVID(pdev->bus->number, pdev->devfn);
+       dev_alias.pdev = pdev;
+       dev_alias.count = nvec;
+
+       pci_for_each_dma_alias(pdev, its_get_pci_alias, &dev_alias);
        its = domain->parent->host_data;
 
-       its_dev = its_find_device(its, dev_id);
-       if (WARN_ON(its_dev))
-               return -EINVAL;
+       its_dev = its_find_device(its, dev_alias.dev_id);
+       if (its_dev) {
+               /*
+                * We already have seen this ID, probably through
+                * another alias (PCI bridge of some sort). No need to
+                * create the device.
+                */
+               dev_dbg(dev, "Reusing ITT for devID %x\n", dev_alias.dev_id);
+               goto out;
+       }
 
-       its_dev = its_create_device(its, dev_id, nvec);
+       its_dev = its_create_device(its, dev_alias.dev_id, dev_alias.count);
        if (!its_dev)
                return -ENOMEM;
 
-       dev_dbg(&pdev->dev, "ITT %d entries, %d bits\n", nvec, ilog2(nvec));
-
+       dev_dbg(&pdev->dev, "ITT %d entries, %d bits\n",
+               dev_alias.count, ilog2(dev_alias.count));
+out:
        info->scratchpad[0].ptr = its_dev;
        info->scratchpad[1].ptr = dev;
        return 0;
@@ -1255,6 +1347,34 @@ static const struct irq_domain_ops its_domain_ops = {
        .deactivate             = its_irq_domain_deactivate,
 };
 
+static int its_force_quiescent(void __iomem *base)
+{
+       u32 count = 1000000;    /* 1s */
+       u32 val;
+
+       val = readl_relaxed(base + GITS_CTLR);
+       if (val & GITS_CTLR_QUIESCENT)
+               return 0;
+
+       /* Disable the generation of all interrupts to this ITS */
+       val &= ~GITS_CTLR_ENABLE;
+       writel_relaxed(val, base + GITS_CTLR);
+
+       /* Poll GITS_CTLR and wait until ITS becomes quiescent */
+       while (1) {
+               val = readl_relaxed(base + GITS_CTLR);
+               if (val & GITS_CTLR_QUIESCENT)
+                       return 0;
+
+               count--;
+               if (!count)
+                       return -EBUSY;
+
+               cpu_relax();
+               udelay(1);
+       }
+}
+
 static int its_probe(struct device_node *node, struct irq_domain *parent)
 {
        struct resource res;
@@ -1283,6 +1403,13 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
                goto out_unmap;
        }
 
+       err = its_force_quiescent(its_base);
+       if (err) {
+               pr_warn("%s: failed to quiesce, giving up\n",
+                       node->full_name);
+               goto out_unmap;
+       }
+
        pr_info("ITS: %s\n", node->full_name);
 
        its = kzalloc(sizeof(*its), GFP_KERNEL);
@@ -1322,14 +1449,26 @@ static int its_probe(struct device_node *node, struct irq_domain *parent)
 
        writeq_relaxed(baser, its->base + GITS_CBASER);
        tmp = readq_relaxed(its->base + GITS_CBASER);
-       writeq_relaxed(0, its->base + GITS_CWRITER);
-       writel_relaxed(1, its->base + GITS_CTLR);
 
-       if ((tmp ^ baser) & GITS_BASER_SHAREABILITY_MASK) {
+       if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) {
+               if (!(tmp & GITS_CBASER_SHAREABILITY_MASK)) {
+                       /*
+                        * The HW reports non-shareable, we must
+                        * remove the cacheability attributes as
+                        * well.
+                        */
+                       baser &= ~(GITS_CBASER_SHAREABILITY_MASK |
+                                  GITS_CBASER_CACHEABILITY_MASK);
+                       baser |= GITS_CBASER_nC;
+                       writeq_relaxed(baser, its->base + GITS_CBASER);
+               }
                pr_info("ITS: using cache flushing for cmd queue\n");
                its->flags |= ITS_FLAGS_CMDQ_NEEDS_FLUSHING;
        }
 
+       writeq_relaxed(0, its->base + GITS_CWRITER);
+       writel_relaxed(GITS_CTLR_ENABLE, its->base + GITS_CTLR);
+
        if (of_property_read_bool(its->msi_chip.of_node, "msi-controller")) {
                its->domain = irq_domain_add_tree(NULL, &its_domain_ops, its);
                if (!its->domain) {
@@ -1382,12 +1521,11 @@ static bool gic_rdists_supports_plpis(void)
 
 int its_cpu_init(void)
 {
-       if (!gic_rdists_supports_plpis()) {
-               pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
-               return -ENXIO;
-       }
-
        if (!list_empty(&its_nodes)) {
+               if (!gic_rdists_supports_plpis()) {
+                       pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
+                       return -ENXIO;
+               }
                its_cpu_init_lpis();
                its_cpu_init_collection();
        }
index 1c6dea2..fd8850d 100644 (file)
@@ -466,7 +466,7 @@ static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask,
                tlist |= 1 << (mpidr & 0xf);
 
                cpu = cpumask_next(cpu, mask);
-               if (cpu == nr_cpu_ids)
+               if (cpu >= nr_cpu_ids)
                        goto out;
 
                mpidr = cpu_logical_map(cpu);
index 4634cf7..471e1cd 100644 (file)
@@ -154,23 +154,25 @@ static inline unsigned int gic_irq(struct irq_data *d)
 static void gic_mask_irq(struct irq_data *d)
 {
        u32 mask = 1 << (gic_irq(d) % 32);
+       unsigned long flags;
 
-       raw_spin_lock(&irq_controller_lock);
+       raw_spin_lock_irqsave(&irq_controller_lock, flags);
        writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_CLEAR + (gic_irq(d) / 32) * 4);
        if (gic_arch_extn.irq_mask)
                gic_arch_extn.irq_mask(d);
-       raw_spin_unlock(&irq_controller_lock);
+       raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
 static void gic_unmask_irq(struct irq_data *d)
 {
        u32 mask = 1 << (gic_irq(d) % 32);
+       unsigned long flags;
 
-       raw_spin_lock(&irq_controller_lock);
+       raw_spin_lock_irqsave(&irq_controller_lock, flags);
        if (gic_arch_extn.irq_unmask)
                gic_arch_extn.irq_unmask(d);
        writel_relaxed(mask, gic_dist_base(d) + GIC_DIST_ENABLE_SET + (gic_irq(d) / 32) * 4);
-       raw_spin_unlock(&irq_controller_lock);
+       raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 }
 
 static void gic_eoi_irq(struct irq_data *d)
@@ -188,6 +190,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
 {
        void __iomem *base = gic_dist_base(d);
        unsigned int gicirq = gic_irq(d);
+       unsigned long flags;
        int ret;
 
        /* Interrupt configuration for SGIs can't be changed */
@@ -199,14 +202,14 @@ static int gic_set_type(struct irq_data *d, unsigned int type)
                            type != IRQ_TYPE_EDGE_RISING)
                return -EINVAL;
 
-       raw_spin_lock(&irq_controller_lock);
+       raw_spin_lock_irqsave(&irq_controller_lock, flags);
 
        if (gic_arch_extn.irq_set_type)
                gic_arch_extn.irq_set_type(d, type);
 
        ret = gic_configure_irq(gicirq, type, base, NULL);
 
-       raw_spin_unlock(&irq_controller_lock);
+       raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 
        return ret;
 }
@@ -227,6 +230,7 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
        void __iomem *reg = gic_dist_base(d) + GIC_DIST_TARGET + (gic_irq(d) & ~3);
        unsigned int cpu, shift = (gic_irq(d) % 4) * 8;
        u32 val, mask, bit;
+       unsigned long flags;
 
        if (!force)
                cpu = cpumask_any_and(mask_val, cpu_online_mask);
@@ -236,12 +240,12 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
        if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids)
                return -EINVAL;
 
-       raw_spin_lock(&irq_controller_lock);
+       raw_spin_lock_irqsave(&irq_controller_lock, flags);
        mask = 0xff << shift;
        bit = gic_cpu_map[cpu] << shift;
        val = readl_relaxed(reg) & ~mask;
        writel_relaxed(val | bit, reg);
-       raw_spin_unlock(&irq_controller_lock);
+       raw_spin_unlock_irqrestore(&irq_controller_lock, flags);
 
        return IRQ_SET_MASK_OK;
 }
index 6a7447c..358a574 100644 (file)
@@ -1609,7 +1609,7 @@ icn_setup(char *line)
        if (ints[0] > 1)
                membase = (unsigned long)ints[2];
        if (str && *str) {
-               strcpy(sid, str);
+               strlcpy(sid, str, sizeof(sid));
                icn_id = sid;
                if ((p = strchr(sid, ','))) {
                        *p++ = 0;
index ee035ec..169172d 100644 (file)
@@ -1,6 +1,6 @@
 config LGUEST
        tristate "Linux hypervisor example code"
-       depends on X86_32 && EVENTFD && TTY
+       depends on X86_32 && EVENTFD && TTY && PCI_DIRECT
        select HVC_DRIVER
        ---help---
          This is a very simple module which allows you to run
index 37de017..74adcd2 100644 (file)
@@ -289,9 +289,16 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
        struct request_queue *q = bdev_get_queue(where->bdev);
        unsigned short logical_block_size = queue_logical_block_size(q);
        sector_t num_sectors;
+       unsigned int uninitialized_var(special_cmd_max_sectors);
 
-       /* Reject unsupported discard requests */
-       if ((rw & REQ_DISCARD) && !blk_queue_discard(q)) {
+       /*
+        * Reject unsupported discard and write same requests.
+        */
+       if (rw & REQ_DISCARD)
+               special_cmd_max_sectors = q->limits.max_discard_sectors;
+       else if (rw & REQ_WRITE_SAME)
+               special_cmd_max_sectors = q->limits.max_write_same_sectors;
+       if ((rw & (REQ_DISCARD | REQ_WRITE_SAME)) && special_cmd_max_sectors == 0) {
                dec_count(io, region, -EOPNOTSUPP);
                return;
        }
@@ -317,7 +324,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
                store_io_and_region_in_bio(bio, io, region);
 
                if (rw & REQ_DISCARD) {
-                       num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining);
+                       num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
                        bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
                        remaining -= num_sectors;
                } else if (rw & REQ_WRITE_SAME) {
@@ -326,7 +333,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
                         */
                        dp->get_page(dp, &page, &len, &offset);
                        bio_add_page(bio, page, logical_block_size, offset);
-                       num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining);
+                       num_sectors = min_t(sector_t, special_cmd_max_sectors, remaining);
                        bio->bi_iter.bi_size = num_sectors << SECTOR_SHIFT;
 
                        offset = 0;
index 8b204ae..f83a0f3 100644 (file)
@@ -20,6 +20,8 @@
 #include <linux/log2.h>
 #include <linux/dm-kcopyd.h>
 
+#include "dm.h"
+
 #include "dm-exception-store.h"
 
 #define DM_MSG_PREFIX "snapshots"
@@ -290,6 +292,16 @@ struct origin {
        struct list_head snapshots;
 };
 
+/*
+ * This structure is allocated for each origin target
+ */
+struct dm_origin {
+       struct dm_dev *dev;
+       struct dm_target *ti;
+       unsigned split_boundary;
+       struct list_head hash_list;
+};
+
 /*
  * Size of the hash table for origin volumes. If we make this
  * the size of the minors list then it should be nearly perfect
@@ -297,6 +309,7 @@ struct origin {
 #define ORIGIN_HASH_SIZE 256
 #define ORIGIN_MASK      0xFF
 static struct list_head *_origins;
+static struct list_head *_dm_origins;
 static struct rw_semaphore _origins_lock;
 
 static DECLARE_WAIT_QUEUE_HEAD(_pending_exceptions_done);
@@ -310,12 +323,22 @@ static int init_origin_hash(void)
        _origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
                           GFP_KERNEL);
        if (!_origins) {
-               DMERR("unable to allocate memory");
+               DMERR("unable to allocate memory for _origins");
                return -ENOMEM;
        }
-
        for (i = 0; i < ORIGIN_HASH_SIZE; i++)
                INIT_LIST_HEAD(_origins + i);
+
+       _dm_origins = kmalloc(ORIGIN_HASH_SIZE * sizeof(struct list_head),
+                             GFP_KERNEL);
+       if (!_dm_origins) {
+               DMERR("unable to allocate memory for _dm_origins");
+               kfree(_origins);
+               return -ENOMEM;
+       }
+       for (i = 0; i < ORIGIN_HASH_SIZE; i++)
+               INIT_LIST_HEAD(_dm_origins + i);
+
        init_rwsem(&_origins_lock);
 
        return 0;
@@ -324,6 +347,7 @@ static int init_origin_hash(void)
 static void exit_origin_hash(void)
 {
        kfree(_origins);
+       kfree(_dm_origins);
 }
 
 static unsigned origin_hash(struct block_device *bdev)
@@ -350,6 +374,30 @@ static void __insert_origin(struct origin *o)
        list_add_tail(&o->hash_list, sl);
 }
 
+static struct dm_origin *__lookup_dm_origin(struct block_device *origin)
+{
+       struct list_head *ol;
+       struct dm_origin *o;
+
+       ol = &_dm_origins[origin_hash(origin)];
+       list_for_each_entry (o, ol, hash_list)
+               if (bdev_equal(o->dev->bdev, origin))
+                       return o;
+
+       return NULL;
+}
+
+static void __insert_dm_origin(struct dm_origin *o)
+{
+       struct list_head *sl = &_dm_origins[origin_hash(o->dev->bdev)];
+       list_add_tail(&o->hash_list, sl);
+}
+
+static void __remove_dm_origin(struct dm_origin *o)
+{
+       list_del(&o->hash_list);
+}
+
 /*
  * _origins_lock must be held when calling this function.
  * Returns number of snapshots registered using the supplied cow device, plus:
@@ -1840,9 +1888,40 @@ static int snapshot_preresume(struct dm_target *ti)
 static void snapshot_resume(struct dm_target *ti)
 {
        struct dm_snapshot *s = ti->private;
-       struct dm_snapshot *snap_src = NULL, *snap_dest = NULL;
+       struct dm_snapshot *snap_src = NULL, *snap_dest = NULL, *snap_merging = NULL;
+       struct dm_origin *o;
+       struct mapped_device *origin_md = NULL;
+       bool must_restart_merging = false;
 
        down_read(&_origins_lock);
+
+       o = __lookup_dm_origin(s->origin->bdev);
+       if (o)
+               origin_md = dm_table_get_md(o->ti->table);
+       if (!origin_md) {
+               (void) __find_snapshots_sharing_cow(s, NULL, NULL, &snap_merging);
+               if (snap_merging)
+                       origin_md = dm_table_get_md(snap_merging->ti->table);
+       }
+       if (origin_md == dm_table_get_md(ti->table))
+               origin_md = NULL;
+       if (origin_md) {
+               if (dm_hold(origin_md))
+                       origin_md = NULL;
+       }
+
+       up_read(&_origins_lock);
+
+       if (origin_md) {
+               dm_internal_suspend_fast(origin_md);
+               if (snap_merging && test_bit(RUNNING_MERGE, &snap_merging->state_bits)) {
+                       must_restart_merging = true;
+                       stop_merge(snap_merging);
+               }
+       }
+
+       down_read(&_origins_lock);
+
        (void) __find_snapshots_sharing_cow(s, &snap_src, &snap_dest, NULL);
        if (snap_src && snap_dest) {
                down_write(&snap_src->lock);
@@ -1851,8 +1930,16 @@ static void snapshot_resume(struct dm_target *ti)
                up_write(&snap_dest->lock);
                up_write(&snap_src->lock);
        }
+
        up_read(&_origins_lock);
 
+       if (origin_md) {
+               if (must_restart_merging)
+                       start_merge(snap_merging);
+               dm_internal_resume_fast(origin_md);
+               dm_put(origin_md);
+       }
+
        /* Now we have correct chunk size, reregister */
        reregister_snapshot(s);
 
@@ -2133,11 +2220,6 @@ static int origin_write_extent(struct dm_snapshot *merging_snap,
  * Origin: maps a linear range of a device, with hooks for snapshotting.
  */
 
-struct dm_origin {
-       struct dm_dev *dev;
-       unsigned split_boundary;
-};
-
 /*
  * Construct an origin mapping: <dev_path>
  * The context for an origin is merely a 'struct dm_dev *'
@@ -2166,6 +2248,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto bad_open;
        }
 
+       o->ti = ti;
        ti->private = o;
        ti->num_flush_bios = 1;
 
@@ -2180,6 +2263,7 @@ bad_alloc:
 static void origin_dtr(struct dm_target *ti)
 {
        struct dm_origin *o = ti->private;
+
        dm_put_device(ti, o->dev);
        kfree(o);
 }
@@ -2216,6 +2300,19 @@ static void origin_resume(struct dm_target *ti)
        struct dm_origin *o = ti->private;
 
        o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev);
+
+       down_write(&_origins_lock);
+       __insert_dm_origin(o);
+       up_write(&_origins_lock);
+}
+
+static void origin_postsuspend(struct dm_target *ti)
+{
+       struct dm_origin *o = ti->private;
+
+       down_write(&_origins_lock);
+       __remove_dm_origin(o);
+       up_write(&_origins_lock);
 }
 
 static void origin_status(struct dm_target *ti, status_type_t type,
@@ -2258,12 +2355,13 @@ static int origin_iterate_devices(struct dm_target *ti,
 
 static struct target_type origin_target = {
        .name    = "snapshot-origin",
-       .version = {1, 8, 1},
+       .version = {1, 9, 0},
        .module  = THIS_MODULE,
        .ctr     = origin_ctr,
        .dtr     = origin_dtr,
        .map     = origin_map,
        .resume  = origin_resume,
+       .postsuspend = origin_postsuspend,
        .status  = origin_status,
        .merge   = origin_merge,
        .iterate_devices = origin_iterate_devices,
@@ -2271,7 +2369,7 @@ static struct target_type origin_target = {
 
 static struct target_type snapshot_target = {
        .name    = "snapshot",
-       .version = {1, 12, 0},
+       .version = {1, 13, 0},
        .module  = THIS_MODULE,
        .ctr     = snapshot_ctr,
        .dtr     = snapshot_dtr,
@@ -2285,7 +2383,7 @@ static struct target_type snapshot_target = {
 
 static struct target_type merge_target = {
        .name    = dm_snapshot_merge_target_name,
-       .version = {1, 2, 0},
+       .version = {1, 3, 0},
        .module  = THIS_MODULE,
        .ctr     = snapshot_ctr,
        .dtr     = snapshot_dtr,
index 654773c..921aafd 100644 (file)
@@ -2358,17 +2358,6 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
                return DM_MAPIO_REMAPPED;
 
        case -ENODATA:
-               if (get_pool_mode(tc->pool) == PM_READ_ONLY) {
-                       /*
-                        * This block isn't provisioned, and we have no way
-                        * of doing so.
-                        */
-                       handle_unserviceable_bio(tc->pool, bio);
-                       cell_defer_no_holder(tc, virt_cell);
-                       return DM_MAPIO_SUBMITTED;
-               }
-               /* fall through */
-
        case -EWOULDBLOCK:
                thin_defer_cell(tc, virt_cell);
                return DM_MAPIO_SUBMITTED;
index 73f2880..8001fe9 100644 (file)
@@ -433,7 +433,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
 
        dm_get(md);
        atomic_inc(&md->open_count);
-
 out:
        spin_unlock(&_minor_lock);
 
@@ -442,16 +441,20 @@ out:
 
 static void dm_blk_close(struct gendisk *disk, fmode_t mode)
 {
-       struct mapped_device *md = disk->private_data;
+       struct mapped_device *md;
 
        spin_lock(&_minor_lock);
 
+       md = disk->private_data;
+       if (WARN_ON(!md))
+               goto out;
+
        if (atomic_dec_and_test(&md->open_count) &&
            (test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
                queue_work(deferred_remove_workqueue, &deferred_remove_work);
 
        dm_put(md);
-
+out:
        spin_unlock(&_minor_lock);
 }
 
@@ -2241,7 +2244,6 @@ static void free_dev(struct mapped_device *md)
        int minor = MINOR(disk_devt(md->disk));
 
        unlock_fs(md);
-       bdput(md->bdev);
        destroy_workqueue(md->wq);
 
        if (md->kworker_task)
@@ -2252,19 +2254,22 @@ static void free_dev(struct mapped_device *md)
                mempool_destroy(md->rq_pool);
        if (md->bs)
                bioset_free(md->bs);
-       blk_integrity_unregister(md->disk);
-       del_gendisk(md->disk);
+
        cleanup_srcu_struct(&md->io_barrier);
        free_table_devices(&md->table_devices);
-       free_minor(minor);
+       dm_stats_cleanup(&md->stats);
 
        spin_lock(&_minor_lock);
        md->disk->private_data = NULL;
        spin_unlock(&_minor_lock);
-
+       if (blk_get_integrity(md->disk))
+               blk_integrity_unregister(md->disk);
+       del_gendisk(md->disk);
        put_disk(md->disk);
        blk_cleanup_queue(md->queue);
-       dm_stats_cleanup(&md->stats);
+       bdput(md->bdev);
+       free_minor(minor);
+
        module_put(THIS_MODULE);
        kfree(md);
 }
@@ -2616,6 +2621,19 @@ void dm_get(struct mapped_device *md)
        BUG_ON(test_bit(DMF_FREEING, &md->flags));
 }
 
+int dm_hold(struct mapped_device *md)
+{
+       spin_lock(&_minor_lock);
+       if (test_bit(DMF_FREEING, &md->flags)) {
+               spin_unlock(&_minor_lock);
+               return -EBUSY;
+       }
+       dm_get(md);
+       spin_unlock(&_minor_lock);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(dm_hold);
+
 const char *dm_device_name(struct mapped_device *md)
 {
        return md->name;
@@ -2629,8 +2647,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
 
        might_sleep();
 
-       spin_lock(&_minor_lock);
        map = dm_get_live_table(md, &srcu_idx);
+
+       spin_lock(&_minor_lock);
        idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
        set_bit(DMF_FREEING, &md->flags);
        spin_unlock(&_minor_lock);
@@ -2638,10 +2657,16 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
        if (dm_request_based(md))
                flush_kthread_worker(&md->kworker);
 
+       /*
+        * Take suspend_lock so that presuspend and postsuspend methods
+        * do not race with internal suspend.
+        */
+       mutex_lock(&md->suspend_lock);
        if (!dm_suspended_md(md)) {
                dm_table_presuspend_targets(map);
                dm_table_postsuspend_targets(map);
        }
+       mutex_unlock(&md->suspend_lock);
 
        /* dm_put_live_table must be before msleep, otherwise deadlock is possible */
        dm_put_live_table(md, srcu_idx);
@@ -3115,6 +3140,7 @@ void dm_internal_suspend_fast(struct mapped_device *md)
        flush_workqueue(md->wq);
        dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
 }
+EXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
 
 void dm_internal_resume_fast(struct mapped_device *md)
 {
@@ -3126,6 +3152,7 @@ void dm_internal_resume_fast(struct mapped_device *md)
 done:
        mutex_unlock(&md->suspend_lock);
 }
+EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
 
 /*-----------------------------------------------------------------
  * Event notification.
index cadf9cc..e617878 100644 (file)
@@ -249,6 +249,7 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
        const int rw = bio_data_dir(bio);
        struct mddev *mddev = q->queuedata;
        unsigned int sectors;
+       int cpu;
 
        if (mddev == NULL || mddev->pers == NULL
            || !mddev->ready) {
@@ -284,7 +285,10 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
        sectors = bio_sectors(bio);
        mddev->pers->make_request(mddev, bio);
 
-       generic_start_io_acct(rw, sectors, &mddev->gendisk->part0);
+       cpu = part_stat_lock();
+       part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
+       part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors);
+       part_stat_unlock();
 
        if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
                wake_up(&mddev->sb_wait);
@@ -5080,7 +5084,8 @@ int md_run(struct mddev *mddev)
        }
        if (err) {
                mddev_detach(mddev);
-               pers->free(mddev, mddev->private);
+               if (mddev->private)
+                       pers->free(mddev, mddev->private);
                module_put(pers->owner);
                bitmap_destroy(mddev);
                return err;
index a13f738..3b5d7f7 100644 (file)
@@ -313,7 +313,7 @@ static struct strip_zone *find_zone(struct r0conf *conf,
 
 /*
  * remaps the bio to the target device. we separate two flows.
- * power 2 flow and a general flow for the sake of perfromance
+ * power 2 flow and a general flow for the sake of performance
 */
 static struct md_rdev *map_sector(struct mddev *mddev, struct strip_zone *zone,
                                sector_t sector, sector_t *sector_offset)
@@ -467,8 +467,6 @@ static int raid0_run(struct mddev *mddev)
        dump_zones(mddev);
 
        ret = md_integrity_register(mddev);
-       if (ret)
-               raid0_free(mddev, conf);
 
        return ret;
 }
@@ -526,6 +524,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
                        split = bio;
                }
 
+               sector = bio->bi_iter.bi_sector;
                zone = find_zone(mddev->private, &sector);
                tmp_dev = map_sector(mddev, zone, sector, &sector);
                split->bi_bdev = tmp_dev->bdev;
index 5d2d8f4..67faa8d 100644 (file)
@@ -1240,7 +1240,7 @@ static int rtl2832_probe(struct i2c_client *client,
        dev->regmap_config.max_register = 5 * 0x100,
        dev->regmap_config.ranges = regmap_range_cfg,
        dev->regmap_config.num_ranges = ARRAY_SIZE(regmap_range_cfg),
-       dev->regmap_config.cache_type = REGCACHE_RBTREE,
+       dev->regmap_config.cache_type = REGCACHE_NONE,
        dev->regmap = regmap_init(&client->dev, &regmap_bus, client,
                                  &dev->regmap_config);
        if (IS_ERR(dev->regmap)) {
index e4901a5..63c0ee5 100644 (file)
@@ -1339,14 +1339,13 @@ static int vidioc_querycap(struct file *file, void  *priv,
        strlcpy(cap->driver, dev->name, sizeof(cap->driver));
        strlcpy(cap->card, cx23885_boards[tsport->dev->board].name,
                sizeof(cap->card));
-       sprintf(cap->bus_info, "PCI:%s", pci_name(dev->pci));
-       cap->capabilities =
-               V4L2_CAP_VIDEO_CAPTURE |
-               V4L2_CAP_READWRITE     |
-               V4L2_CAP_STREAMING     |
-               0;
+       sprintf(cap->bus_info, "PCIe:%s", pci_name(dev->pci));
+       cap->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE |
+                          V4L2_CAP_STREAMING;
        if (dev->tuner_type != TUNER_ABSENT)
-               cap->capabilities |= V4L2_CAP_TUNER;
+               cap->device_caps |= V4L2_CAP_TUNER;
+       cap->capabilities = cap->device_caps | V4L2_CAP_VBI_CAPTURE |
+               V4L2_CAP_AUDIO | V4L2_CAP_DEVICE_CAPS;
 
        return 0;
 }
index 12f7452..a92ff42 100644 (file)
@@ -1845,6 +1845,9 @@ static void exynos4_jpeg_set_img_addr(struct s5p_jpeg_ctx *ctx)
        struct s5p_jpeg_addr jpeg_addr;
        u32 pix_size, padding_bytes = 0;
 
+       jpeg_addr.cb = 0;
+       jpeg_addr.cr = 0;
+
        pix_size = ctx->cap_q.w * ctx->cap_q.h;
 
        if (ctx->mode == S5P_JPEG_ENCODE) {
index e8c2cad..0974b9a 100644 (file)
@@ -20,7 +20,7 @@
 
 void exynos3250_jpeg_reset(void __iomem *regs)
 {
-       u32 reg = 0;
+       u32 reg = 1;
        int count = 1000;
 
        writel(1, regs + EXYNOS3250_SW_RESET);
index 8e44a59..98374e8 100644 (file)
@@ -833,6 +833,7 @@ static int s5p_mfc_open(struct file *file)
        q->type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
        q->io_modes = VB2_MMAP;
        q->drv_priv = &ctx->fh;
+       q->lock = &dev->mfc_mutex;
        if (vdev == dev->vfd_dec) {
                q->io_modes = VB2_MMAP;
                q->ops = get_dec_queue_ops();
index 15f7663..24262bb 100644 (file)
@@ -29,7 +29,7 @@
 
 /* Offset base used to differentiate between CAPTURE and OUTPUT
 *  while mmaping */
-#define DST_QUEUE_OFF_BASE      (TASK_SIZE / 2)
+#define DST_QUEUE_OFF_BASE     (1 << 30)
 
 #define MFC_BANK1_ALLOC_CTX    0
 #define MFC_BANK2_ALLOC_CTX    1
index de2b8c6..22dfb3e 100644 (file)
@@ -302,7 +302,7 @@ struct s5p_mfc_hw_ops {
        void (*write_info)(struct s5p_mfc_ctx *ctx, unsigned int data,
                        unsigned int ofs);
        unsigned int (*read_info)(struct s5p_mfc_ctx *ctx,
-                       unsigned int ofs);
+                       unsigned long ofs);
        int (*get_dspl_y_adr)(struct s5p_mfc_dev *dev);
        int (*get_dec_y_adr)(struct s5p_mfc_dev *dev);
        int (*get_dspl_status)(struct s5p_mfc_dev *dev);
index 0c4fcf2..b09bcd1 100644 (file)
@@ -263,15 +263,15 @@ static void s5p_mfc_release_dev_context_buffer_v5(struct s5p_mfc_dev *dev)
 static void s5p_mfc_write_info_v5(struct s5p_mfc_ctx *ctx, unsigned int data,
                        unsigned int ofs)
 {
-       writel(data, (volatile void __iomem *)(ctx->shm.virt + ofs));
+       writel(data, (void *)(ctx->shm.virt + ofs));
        wmb();
 }
 
 static unsigned int s5p_mfc_read_info_v5(struct s5p_mfc_ctx *ctx,
-                               unsigned int ofs)
+                               unsigned long ofs)
 {
        rmb();
-       return readl((volatile void __iomem *)(ctx->shm.virt + ofs));
+       return readl((void *)(ctx->shm.virt + ofs));
 }
 
 static void s5p_mfc_dec_calc_dpb_size_v5(struct s5p_mfc_ctx *ctx)
index d826c58..cefad18 100644 (file)
@@ -1852,17 +1852,17 @@ static void s5p_mfc_write_info_v6(struct s5p_mfc_ctx *ctx, unsigned int data,
                unsigned int ofs)
 {
        s5p_mfc_clock_on();
-       writel(data, (volatile void __iomem *)((unsigned long)ofs));
+       writel(data, (void *)((unsigned long)ofs));
        s5p_mfc_clock_off();
 }
 
 static unsigned int
-s5p_mfc_read_info_v6(struct s5p_mfc_ctx *ctx, unsigned int ofs)
+s5p_mfc_read_info_v6(struct s5p_mfc_ctx *ctx, unsigned long ofs)
 {
        int ret;
 
        s5p_mfc_clock_on();
-       ret = readl((volatile void __iomem *)((unsigned long)ofs));
+       ret = readl((void *)ofs);
        s5p_mfc_clock_off();
 
        return ret;
index 5a1835d..697aaed 100644 (file)
@@ -20,6 +20,7 @@ if VIDEO_SAMSUNG_S5P_TV
 config VIDEO_SAMSUNG_S5P_HDMI
        tristate "Samsung HDMI Driver"
        depends on VIDEO_V4L2
+       depends on I2C
        depends on VIDEO_SAMSUNG_S5P_TV
        select VIDEO_SAMSUNG_S5P_HDMIPHY
        help
index a901b62..2554f37 100644 (file)
@@ -1158,6 +1158,7 @@ static int sh_veu_probe(struct platform_device *pdev)
        }
 
        *vdev = sh_veu_videodev;
+       vdev->v4l2_dev = &veu->v4l2_dev;
        spin_lock_init(&veu->lock);
        mutex_init(&veu->fop_lock);
        vdev->lock = &veu->fop_lock;
index 8526bf5..c835beb 100644 (file)
@@ -843,6 +843,8 @@ static int isi_camera_set_bus_param(struct soc_camera_device *icd)
        if (isi->pdata.full_mode)
                cfg1 |= ISI_CFG1_FULL_MODE;
 
+       cfg1 |= ISI_CFG1_THMASK_BEATS_16;
+
        isi_writel(isi, ISI_CTRL, ISI_CTRL_DIS);
        isi_writel(isi, ISI_CFG1, cfg1);
 
index cee7b56..66634b4 100644 (file)
@@ -1665,7 +1665,7 @@ eclkreg:
 eaddpdev:
        platform_device_put(sasc->pdev);
 eallocpdev:
-       devm_kfree(ici->v4l2_dev.dev, sasc);
+       devm_kfree(ici->v4l2_dev.dev, info);
        dev_err(ici->v4l2_dev.dev, "group probe failed: %d\n", ret);
 
        return ret;
index 77dcfdf..87fc0fe 100644 (file)
@@ -780,8 +780,6 @@ static int rtl2832u_frontend_callback(void *adapter_priv, int component,
                case TUNER_RTL2832_TUA9001:
                        return rtl2832u_tua9001_tuner_callback(d, cmd, arg);
                }
-       default:
-               return -EINVAL;
        }
 
        return 0;
index 60af3b1..3fd94fe 100644 (file)
@@ -1,6 +1,7 @@
 menuconfig USB_GSPCA
        tristate "GSPCA based webcams"
        depends on VIDEO_V4L2
+       depends on INPUT || INPUT=n
        default m
        ---help---
          Say Y here if you want to enable selecting webcams based
index bc08a82..cc16e76 100644 (file)
@@ -3230,18 +3230,13 @@ int vb2_thread_stop(struct vb2_queue *q)
 
        if (threadio == NULL)
                return 0;
-       call_void_qop(q, wait_finish, q);
        threadio->stop = true;
-       vb2_internal_streamoff(q, q->type);
-       call_void_qop(q, wait_prepare, q);
+       /* Wake up all pending sleeps in the thread */
+       vb2_queue_error(q);
        err = kthread_stop(threadio->thread);
-       q->fileio = NULL;
-       fileio->req.count = 0;
-       vb2_reqbufs(q, &fileio->req);
-       kfree(fileio);
+       __vb2_cleanup_fileio(q);
        threadio->thread = NULL;
        kfree(threadio);
-       q->fileio = NULL;
        q->threadio = NULL;
        return err;
 }
index b481d20..69e0483 100644 (file)
@@ -632,8 +632,7 @@ static void *vb2_dc_get_userptr(void *alloc_ctx, unsigned long vaddr,
        }
 
        /* extract page list from userspace mapping */
-       ret = vb2_dc_get_user_pages(start, pages, n_pages, vma,
-                                   dma_dir == DMA_FROM_DEVICE);
+       ret = vb2_dc_get_user_pages(start, pages, n_pages, vma, dma_dir);
        if (ret) {
                unsigned long pfn;
                if (vb2_dc_get_user_pfn(start, n_pages, vma, &pfn) == 0) {
index f38ec42..5615522 100644 (file)
@@ -739,7 +739,7 @@ static int __init kempld_init(void)
                for (id = kempld_dmi_table;
                     id->matches[0].slot != DMI_NONE; id++)
                        if (strstr(id->ident, force_device_id))
-                               if (id->callback && id->callback(id))
+                               if (id->callback && !id->callback(id))
                                        break;
                if (id->matches[0].slot == DMI_NONE)
                        return -ENODEV;
index ede5024..dbd907d 100644 (file)
@@ -196,18 +196,27 @@ EXPORT_SYMBOL_GPL(rtsx_usb_ep0_write_register);
 int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data)
 {
        u16 value;
+       u8 *buf;
+       int ret;
 
        if (!data)
                return -EINVAL;
-       *data = 0;
+
+       buf = kzalloc(sizeof(u8), GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
 
        addr |= EP0_READ_REG_CMD << EP0_OP_SHIFT;
        value = swab16(addr);
 
-       return usb_control_msg(ucr->pusb_dev,
+       ret = usb_control_msg(ucr->pusb_dev,
                        usb_rcvctrlpipe(ucr->pusb_dev, 0), RTSX_USB_REQ_REG_OP,
                        USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
-                       value, 0, data, 1, 100);
+                       value, 0, buf, 1, 100);
+       *data = *buf;
+
+       kfree(buf);
+       return ret;
 }
 EXPORT_SYMBOL_GPL(rtsx_usb_ep0_read_register);
 
@@ -288,18 +297,27 @@ static int rtsx_usb_get_status_with_bulk(struct rtsx_ucr *ucr, u16 *status)
 int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status)
 {
        int ret;
+       u16 *buf;
 
        if (!status)
                return -EINVAL;
 
-       if (polling_pipe == 0)
+       if (polling_pipe == 0) {
+               buf = kzalloc(sizeof(u16), GFP_KERNEL);
+               if (!buf)
+                       return -ENOMEM;
+
                ret = usb_control_msg(ucr->pusb_dev,
                                usb_rcvctrlpipe(ucr->pusb_dev, 0),
                                RTSX_USB_REQ_POLL,
                                USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
-                               0, 0, status, 2, 100);
-       else
+                               0, 0, buf, 2, 100);
+               *status = *buf;
+
+               kfree(buf);
+       } else {
                ret = rtsx_usb_get_status_with_bulk(ucr, status);
+       }
 
        /* usb_control_msg may return positive when success */
        if (ret < 0)
index 38552a3..65fed71 100644 (file)
@@ -202,16 +202,17 @@ static void enclosure_remove_links(struct enclosure_component *cdev)
 {
        char name[ENCLOSURE_NAME_SIZE];
 
+       enclosure_link_name(cdev, name);
+
        /*
         * In odd circumstances, like multipath devices, something else may
         * already have removed the links, so check for this condition first.
         */
-       if (!cdev->dev->kobj.sd)
-               return;
+       if (cdev->dev->kobj.sd)
+               sysfs_remove_link(&cdev->dev->kobj, name);
 
-       enclosure_link_name(cdev, name);
-       sysfs_remove_link(&cdev->dev->kobj, name);
-       sysfs_remove_link(&cdev->cdev.kobj, "device");
+       if (cdev->cdev.kobj.sd)
+               sysfs_remove_link(&cdev->cdev.kobj, "device");
 }
 
 static int enclosure_add_links(struct enclosure_component *cdev)
index 9306219..6ad049a 100644 (file)
@@ -341,6 +341,8 @@ void mei_stop(struct mei_device *dev)
 
        dev->dev_state = MEI_DEV_POWER_DOWN;
        mei_reset(dev);
+       /* move device to disabled state unconditionally */
+       dev->dev_state = MEI_DEV_DISABLED;
 
        mutex_unlock(&dev->device_lock);
 
index 82dc574..7f32712 100644 (file)
@@ -1210,7 +1210,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args)
 
                if (((die_args->trapnr == X86_TRAP_MF) ||
                     (die_args->trapnr == X86_TRAP_XF)) &&
-                   !user_mode_vm(die_args->regs))
+                   !user_mode(die_args->regs))
                        xpc_die_deactivate();
 
                break;
index e9f1d8d..c53f14a 100644 (file)
@@ -124,7 +124,7 @@ int mmc_pwrseq_simple_alloc(struct mmc_host *host, struct device *dev)
                    PTR_ERR(pwrseq->reset_gpios[i]) != -ENOSYS) {
                        ret = PTR_ERR(pwrseq->reset_gpios[i]);
 
-                       while (--i)
+                       while (i--)
                                gpiod_put(pwrseq->reset_gpios[i]);
 
                        goto clk_put;
index 5b76a17..5897d8d 100644 (file)
@@ -526,6 +526,7 @@ config MTD_NAND_SUNXI
 
 config MTD_NAND_HISI504
        tristate "Support for NAND controller on Hisilicon SoC Hip04"
+       depends on HAS_DMA
        help
          Enables support for NAND controller on Hisilicon SoC Hip04.
 
index 96b0b1d..10b1f7a 100644 (file)
@@ -480,6 +480,42 @@ static void disable_int(struct pxa3xx_nand_info *info, uint32_t int_mask)
        nand_writel(info, NDCR, ndcr | int_mask);
 }
 
+static void drain_fifo(struct pxa3xx_nand_info *info, void *data, int len)
+{
+       if (info->ecc_bch) {
+               int timeout;
+
+               /*
+                * According to the datasheet, when reading from NDDB
+                * with BCH enabled, after each 32 bytes reads, we
+                * have to make sure that the NDSR.RDDREQ bit is set.
+                *
+                * Drain the FIFO 8 32 bits reads at a time, and skip
+                * the polling on the last read.
+                */
+               while (len > 8) {
+                       __raw_readsl(info->mmio_base + NDDB, data, 8);
+
+                       for (timeout = 0;
+                            !(nand_readl(info, NDSR) & NDSR_RDDREQ);
+                            timeout++) {
+                               if (timeout >= 5) {
+                                       dev_err(&info->pdev->dev,
+                                               "Timeout on RDDREQ while draining the FIFO\n");
+                                       return;
+                               }
+
+                               mdelay(1);
+                       }
+
+                       data += 32;
+                       len -= 8;
+               }
+       }
+
+       __raw_readsl(info->mmio_base + NDDB, data, len);
+}
+
 static void handle_data_pio(struct pxa3xx_nand_info *info)
 {
        unsigned int do_bytes = min(info->data_size, info->chunk_size);
@@ -496,14 +532,14 @@ static void handle_data_pio(struct pxa3xx_nand_info *info)
                                      DIV_ROUND_UP(info->oob_size, 4));
                break;
        case STATE_PIO_READING:
-               __raw_readsl(info->mmio_base + NDDB,
-                            info->data_buff + info->data_buff_pos,
-                            DIV_ROUND_UP(do_bytes, 4));
+               drain_fifo(info,
+                          info->data_buff + info->data_buff_pos,
+                          DIV_ROUND_UP(do_bytes, 4));
 
                if (info->oob_size > 0)
-                       __raw_readsl(info->mmio_base + NDDB,
-                                    info->oob_buff + info->oob_buff_pos,
-                                    DIV_ROUND_UP(info->oob_size, 4));
+                       drain_fifo(info,
+                                  info->oob_buff + info->oob_buff_pos,
+                                  DIV_ROUND_UP(info->oob_size, 4));
                break;
        default:
                dev_err(&info->pdev->dev, "%s: invalid state %d\n", __func__,
@@ -1572,6 +1608,8 @@ static int alloc_nand_resource(struct platform_device *pdev)
        int ret, irq, cs;
 
        pdata = dev_get_platdata(&pdev->dev);
+       if (pdata->num_cs <= 0)
+               return -ENODEV;
        info = devm_kzalloc(&pdev->dev, sizeof(*info) + (sizeof(*mtd) +
                            sizeof(*host)) * pdata->num_cs, GFP_KERNEL);
        if (!info)
index da4c792..16e34b3 100644 (file)
@@ -425,9 +425,10 @@ retry:
                                        ubi_warn(ubi, "corrupted VID header at PEB %d, LEB %d:%d",
                                                 pnum, vol_id, lnum);
                                        err = -EBADMSG;
-                               } else
+                               } else {
                                        err = -EINVAL;
                                        ubi_ro_mode(ubi);
+                               }
                        }
                        goto out_free;
                } else if (err == UBI_IO_BITFLIPS)
index b979c26..089a402 100644 (file)
@@ -3850,7 +3850,8 @@ static inline int bond_slave_override(struct bonding *bond,
        /* Find out if any slaves have the same mapping as this skb. */
        bond_for_each_slave_rcu(bond, slave, iter) {
                if (slave->queue_id == skb->queue_mapping) {
-                       if (bond_slave_can_tx(slave)) {
+                       if (bond_slave_is_up(slave) &&
+                           slave->link == BOND_LINK_UP) {
                                bond_dev_queue_xmit(bond, skb, slave->dev);
                                return 0;
                        }
index 98d73aa..58808f6 100644 (file)
@@ -131,7 +131,7 @@ config CAN_RCAR
 
 config CAN_XILINXCAN
        tristate "Xilinx CAN"
-       depends on ARCH_ZYNQ || MICROBLAZE || COMPILE_TEST
+       depends on ARCH_ZYNQ || ARM64 || MICROBLAZE || COMPILE_TEST
        depends on COMMON_CLK && HAS_IOMEM
        ---help---
          Xilinx CAN driver. This driver supports both soft AXI CAN IP and
index 3c82e02..b0f6924 100644 (file)
@@ -579,6 +579,10 @@ struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
        skb->pkt_type = PACKET_BROADCAST;
        skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+       skb_reset_mac_header(skb);
+       skb_reset_network_header(skb);
+       skb_reset_transport_header(skb);
+
        can_skb_reserve(skb);
        can_skb_prv(skb)->ifindex = dev->ifindex;
 
@@ -603,6 +607,10 @@ struct sk_buff *alloc_canfd_skb(struct net_device *dev,
        skb->pkt_type = PACKET_BROADCAST;
        skb->ip_summed = CHECKSUM_UNNECESSARY;
 
+       skb_reset_mac_header(skb);
+       skb_reset_network_header(skb);
+       skb_reset_transport_header(skb);
+
        can_skb_reserve(skb);
        can_skb_prv(skb)->ifindex = dev->ifindex;
 
index 80c46ad..ad0a7e8 100644 (file)
@@ -592,13 +592,12 @@ static int flexcan_poll_state(struct net_device *dev, u32 reg_esr)
                rx_state = unlikely(reg_esr & FLEXCAN_ESR_RX_WRN) ?
                           CAN_STATE_ERROR_WARNING : CAN_STATE_ERROR_ACTIVE;
                new_state = max(tx_state, rx_state);
-       } else if (unlikely(flt == FLEXCAN_ESR_FLT_CONF_PASSIVE)) {
+       } else {
                __flexcan_get_berr_counter(dev, &bec);
-               new_state = CAN_STATE_ERROR_PASSIVE;
+               new_state = flt == FLEXCAN_ESR_FLT_CONF_PASSIVE ?
+                           CAN_STATE_ERROR_PASSIVE : CAN_STATE_BUS_OFF;
                rx_state = bec.rxerr >= bec.txerr ? new_state : 0;
                tx_state = bec.rxerr <= bec.txerr ? new_state : 0;
-       } else {
-               new_state = CAN_STATE_BUS_OFF;
        }
 
        /* state hasn't changed */
@@ -1158,12 +1157,19 @@ static int flexcan_probe(struct platform_device *pdev)
        const struct flexcan_devtype_data *devtype_data;
        struct net_device *dev;
        struct flexcan_priv *priv;
+       struct regulator *reg_xceiver;
        struct resource *mem;
        struct clk *clk_ipg = NULL, *clk_per = NULL;
        void __iomem *base;
        int err, irq;
        u32 clock_freq = 0;
 
+       reg_xceiver = devm_regulator_get(&pdev->dev, "xceiver");
+       if (PTR_ERR(reg_xceiver) == -EPROBE_DEFER)
+               return -EPROBE_DEFER;
+       else if (IS_ERR(reg_xceiver))
+               reg_xceiver = NULL;
+
        if (pdev->dev.of_node)
                of_property_read_u32(pdev->dev.of_node,
                                                "clock-frequency", &clock_freq);
@@ -1224,9 +1230,7 @@ static int flexcan_probe(struct platform_device *pdev)
        priv->pdata = dev_get_platdata(&pdev->dev);
        priv->devtype_data = devtype_data;
 
-       priv->reg_xceiver = devm_regulator_get(&pdev->dev, "xceiver");
-       if (IS_ERR(priv->reg_xceiver))
-               priv->reg_xceiver = NULL;
+       priv->reg_xceiver = reg_xceiver;
 
        netif_napi_add(dev, &priv->napi, flexcan_poll, FLEXCAN_NAPI_WEIGHT);
 
index 009acc8..8b4d3e6 100644 (file)
@@ -901,6 +901,8 @@ static int gs_usb_probe(struct usb_interface *intf, const struct usb_device_id *
        }
 
        dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+       if (!dev)
+               return -ENOMEM;
        init_usb_anchor(&dev->rx_submitted);
 
        atomic_set(&dev->active_channels, 0);
index 2928f70..57611fd 100644 (file)
@@ -14,6 +14,8 @@
  * Copyright (C) 2015 Valeo S.A.
  */
 
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
 #include <linux/completion.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
@@ -23,7 +25,6 @@
 #include <linux/can/dev.h>
 #include <linux/can/error.h>
 
-#define MAX_TX_URBS                    16
 #define MAX_RX_URBS                    4
 #define START_TIMEOUT                  1000 /* msecs */
 #define STOP_TIMEOUT                   1000 /* msecs */
@@ -441,6 +442,7 @@ struct kvaser_usb_error_summary {
        };
 };
 
+/* Context for an outstanding, not yet ACKed, transmission */
 struct kvaser_usb_tx_urb_context {
        struct kvaser_usb_net_priv *priv;
        u32 echo_index;
@@ -454,8 +456,13 @@ struct kvaser_usb {
        struct usb_endpoint_descriptor *bulk_in, *bulk_out;
        struct usb_anchor rx_submitted;
 
+       /* @max_tx_urbs: Firmware-reported maximum number of oustanding,
+        * not yet ACKed, transmissions on this device. This value is
+        * also used as a sentinel for marking free tx contexts.
+        */
        u32 fw_version;
        unsigned int nchannels;
+       unsigned int max_tx_urbs;
        enum kvaser_usb_family family;
 
        bool rxinitdone;
@@ -465,18 +472,18 @@ struct kvaser_usb {
 
 struct kvaser_usb_net_priv {
        struct can_priv can;
-
-       atomic_t active_tx_urbs;
-       struct usb_anchor tx_submitted;
-       struct kvaser_usb_tx_urb_context tx_contexts[MAX_TX_URBS];
-
-       struct completion start_comp, stop_comp;
+       struct can_berr_counter bec;
 
        struct kvaser_usb *dev;
        struct net_device *netdev;
        int channel;
 
-       struct can_berr_counter bec;
+       struct completion start_comp, stop_comp;
+       struct usb_anchor tx_submitted;
+
+       spinlock_t tx_contexts_lock;
+       int active_tx_contexts;
+       struct kvaser_usb_tx_urb_context tx_contexts[];
 };
 
 static const struct usb_device_id kvaser_usb_table[] = {
@@ -584,8 +591,15 @@ static int kvaser_usb_wait_msg(const struct kvaser_usb *dev, u8 id,
                while (pos <= actual_len - MSG_HEADER_LEN) {
                        tmp = buf + pos;
 
-                       if (!tmp->len)
-                               break;
+                       /* Handle messages crossing the USB endpoint max packet
+                        * size boundary. Check kvaser_usb_read_bulk_callback()
+                        * for further details.
+                        */
+                       if (tmp->len == 0) {
+                               pos = round_up(pos, le16_to_cpu(dev->bulk_in->
+                                                               wMaxPacketSize));
+                               continue;
+                       }
 
                        if (pos + tmp->len > actual_len) {
                                dev_err(dev->udev->dev.parent,
@@ -647,9 +661,13 @@ static int kvaser_usb_get_software_info(struct kvaser_usb *dev)
        switch (dev->family) {
        case KVASER_LEAF:
                dev->fw_version = le32_to_cpu(msg.u.leaf.softinfo.fw_version);
+               dev->max_tx_urbs =
+                       le16_to_cpu(msg.u.leaf.softinfo.max_outstanding_tx);
                break;
        case KVASER_USBCAN:
                dev->fw_version = le32_to_cpu(msg.u.usbcan.softinfo.fw_version);
+               dev->max_tx_urbs =
+                       le16_to_cpu(msg.u.usbcan.softinfo.max_outstanding_tx);
                break;
        }
 
@@ -686,6 +704,7 @@ static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev,
        struct kvaser_usb_net_priv *priv;
        struct sk_buff *skb;
        struct can_frame *cf;
+       unsigned long flags;
        u8 channel, tid;
 
        channel = msg->u.tx_acknowledge_header.channel;
@@ -704,7 +723,7 @@ static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev,
 
        stats = &priv->netdev->stats;
 
-       context = &priv->tx_contexts[tid % MAX_TX_URBS];
+       context = &priv->tx_contexts[tid % dev->max_tx_urbs];
 
        /* Sometimes the state change doesn't come after a bus-off event */
        if (priv->can.restart_ms &&
@@ -729,12 +748,15 @@ static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev,
 
        stats->tx_packets++;
        stats->tx_bytes += context->dlc;
-       can_get_echo_skb(priv->netdev, context->echo_index);
 
-       context->echo_index = MAX_TX_URBS;
-       atomic_dec(&priv->active_tx_urbs);
+       spin_lock_irqsave(&priv->tx_contexts_lock, flags);
 
+       can_get_echo_skb(priv->netdev, context->echo_index);
+       context->echo_index = dev->max_tx_urbs;
+       --priv->active_tx_contexts;
        netif_wake_queue(priv->netdev);
+
+       spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
 }
 
 static void kvaser_usb_simple_msg_callback(struct urb *urb)
@@ -787,7 +809,6 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv,
                netdev_err(netdev, "Error transmitting URB\n");
                usb_unanchor_urb(urb);
                usb_free_urb(urb);
-               kfree(buf);
                return err;
        }
 
@@ -796,17 +817,6 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv,
        return 0;
 }
 
-static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv)
-{
-       int i;
-
-       usb_kill_anchored_urbs(&priv->tx_submitted);
-       atomic_set(&priv->active_tx_urbs, 0);
-
-       for (i = 0; i < MAX_TX_URBS; i++)
-               priv->tx_contexts[i].echo_index = MAX_TX_URBS;
-}
-
 static void kvaser_usb_rx_error_update_can_state(struct kvaser_usb_net_priv *priv,
                                                 const struct kvaser_usb_error_summary *es,
                                                 struct can_frame *cf)
@@ -1317,8 +1327,20 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
        while (pos <= urb->actual_length - MSG_HEADER_LEN) {
                msg = urb->transfer_buffer + pos;
 
-               if (!msg->len)
-                       break;
+               /* The Kvaser firmware can only read and write messages that
+                * does not cross the USB's endpoint wMaxPacketSize boundary.
+                * If a follow-up command crosses such boundary, firmware puts
+                * a placeholder zero-length command in its place then aligns
+                * the real command to the next max packet size.
+                *
+                * Handle such cases or we're going to miss a significant
+                * number of events in case of a heavy rx load on the bus.
+                */
+               if (msg->len == 0) {
+                       pos = round_up(pos, le16_to_cpu(dev->bulk_in->
+                                                       wMaxPacketSize));
+                       continue;
+               }
 
                if (pos + msg->len > urb->actual_length) {
                        dev_err(dev->udev->dev.parent, "Format error\n");
@@ -1326,7 +1348,6 @@ static void kvaser_usb_read_bulk_callback(struct urb *urb)
                }
 
                kvaser_usb_handle_message(dev, msg);
-
                pos += msg->len;
        }
 
@@ -1498,6 +1519,26 @@ error:
        return err;
 }
 
+static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv)
+{
+       int i, max_tx_urbs;
+
+       max_tx_urbs = priv->dev->max_tx_urbs;
+
+       priv->active_tx_contexts = 0;
+       for (i = 0; i < max_tx_urbs; i++)
+               priv->tx_contexts[i].echo_index = max_tx_urbs;
+}
+
+/* This method might sleep. Do not call it in the atomic context
+ * of URB completions.
+ */
+static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv)
+{
+       usb_kill_anchored_urbs(&priv->tx_submitted);
+       kvaser_usb_reset_tx_urb_contexts(priv);
+}
+
 static void kvaser_usb_unlink_all_urbs(struct kvaser_usb *dev)
 {
        int i;
@@ -1615,9 +1656,9 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
        struct urb *urb;
        void *buf;
        struct kvaser_msg *msg;
-       int i, err;
-       int ret = NETDEV_TX_OK;
+       int i, err, ret = NETDEV_TX_OK;
        u8 *msg_tx_can_flags = NULL;            /* GCC */
+       unsigned long flags;
 
        if (can_dropped_invalid_skb(netdev, skb))
                return NETDEV_TX_OK;
@@ -1634,7 +1675,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
        if (!buf) {
                stats->tx_dropped++;
                dev_kfree_skb(skb);
-               goto nobufmem;
+               goto freeurb;
        }
 
        msg = buf;
@@ -1671,22 +1712,32 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
        if (cf->can_id & CAN_RTR_FLAG)
                *msg_tx_can_flags |= MSG_FLAG_REMOTE_FRAME;
 
-       for (i = 0; i < ARRAY_SIZE(priv->tx_contexts); i++) {
-               if (priv->tx_contexts[i].echo_index == MAX_TX_URBS) {
+       spin_lock_irqsave(&priv->tx_contexts_lock, flags);
+       for (i = 0; i < dev->max_tx_urbs; i++) {
+               if (priv->tx_contexts[i].echo_index == dev->max_tx_urbs) {
                        context = &priv->tx_contexts[i];
+
+                       context->echo_index = i;
+                       can_put_echo_skb(skb, netdev, context->echo_index);
+                       ++priv->active_tx_contexts;
+                       if (priv->active_tx_contexts >= dev->max_tx_urbs)
+                               netif_stop_queue(netdev);
+
                        break;
                }
        }
+       spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
 
        /* This should never happen; it implies a flow control bug */
        if (!context) {
                netdev_warn(netdev, "cannot find free context\n");
+
+               kfree(buf);
                ret =  NETDEV_TX_BUSY;
-               goto releasebuf;
+               goto freeurb;
        }
 
        context->priv = priv;
-       context->echo_index = i;
        context->dlc = cf->can_dlc;
 
        msg->u.tx_can.tid = context->echo_index;
@@ -1698,18 +1749,17 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
                          kvaser_usb_write_bulk_callback, context);
        usb_anchor_urb(urb, &priv->tx_submitted);
 
-       can_put_echo_skb(skb, netdev, context->echo_index);
-
-       atomic_inc(&priv->active_tx_urbs);
-
-       if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS)
-               netif_stop_queue(netdev);
-
        err = usb_submit_urb(urb, GFP_ATOMIC);
        if (unlikely(err)) {
+               spin_lock_irqsave(&priv->tx_contexts_lock, flags);
+
                can_free_echo_skb(netdev, context->echo_index);
+               context->echo_index = dev->max_tx_urbs;
+               --priv->active_tx_contexts;
+               netif_wake_queue(netdev);
+
+               spin_unlock_irqrestore(&priv->tx_contexts_lock, flags);
 
-               atomic_dec(&priv->active_tx_urbs);
                usb_unanchor_urb(urb);
 
                stats->tx_dropped++;
@@ -1719,16 +1769,12 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
                else
                        netdev_warn(netdev, "Failed tx_urb %d\n", err);
 
-               goto releasebuf;
+               goto freeurb;
        }
 
-       usb_free_urb(urb);
-
-       return NETDEV_TX_OK;
+       ret = NETDEV_TX_OK;
 
-releasebuf:
-       kfree(buf);
-nobufmem:
+freeurb:
        usb_free_urb(urb);
        return ret;
 }
@@ -1840,13 +1886,15 @@ static int kvaser_usb_init_one(struct usb_interface *intf,
        struct kvaser_usb *dev = usb_get_intfdata(intf);
        struct net_device *netdev;
        struct kvaser_usb_net_priv *priv;
-       int i, err;
+       int err;
 
        err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, channel);
        if (err)
                return err;
 
-       netdev = alloc_candev(sizeof(*priv), MAX_TX_URBS);
+       netdev = alloc_candev(sizeof(*priv) +
+                             dev->max_tx_urbs * sizeof(*priv->tx_contexts),
+                             dev->max_tx_urbs);
        if (!netdev) {
                dev_err(&intf->dev, "Cannot alloc candev\n");
                return -ENOMEM;
@@ -1854,19 +1902,17 @@ static int kvaser_usb_init_one(struct usb_interface *intf,
 
        priv = netdev_priv(netdev);
 
+       init_usb_anchor(&priv->tx_submitted);
        init_completion(&priv->start_comp);
        init_completion(&priv->stop_comp);
 
-       init_usb_anchor(&priv->tx_submitted);
-       atomic_set(&priv->active_tx_urbs, 0);
-
-       for (i = 0; i < ARRAY_SIZE(priv->tx_contexts); i++)
-               priv->tx_contexts[i].echo_index = MAX_TX_URBS;
-
        priv->dev = dev;
        priv->netdev = netdev;
        priv->channel = channel;
 
+       spin_lock_init(&priv->tx_contexts_lock);
+       kvaser_usb_reset_tx_urb_contexts(priv);
+
        priv->can.state = CAN_STATE_STOPPED;
        priv->can.clock.freq = CAN_USB_CLOCK;
        priv->can.bittiming_const = &kvaser_usb_bittiming_const;
@@ -1976,6 +2022,13 @@ static int kvaser_usb_probe(struct usb_interface *intf,
                return err;
        }
 
+       dev_dbg(&intf->dev, "Firmware version: %d.%d.%d\n",
+               ((dev->fw_version >> 24) & 0xff),
+               ((dev->fw_version >> 16) & 0xff),
+               (dev->fw_version & 0xffff));
+
+       dev_dbg(&intf->dev, "Max oustanding tx = %d URBs\n", dev->max_tx_urbs);
+
        err = kvaser_usb_get_card_info(dev);
        if (err) {
                dev_err(&intf->dev,
@@ -1983,11 +2036,6 @@ static int kvaser_usb_probe(struct usb_interface *intf,
                return err;
        }
 
-       dev_dbg(&intf->dev, "Firmware version: %d.%d.%d\n",
-               ((dev->fw_version >> 24) & 0xff),
-               ((dev->fw_version >> 16) & 0xff),
-               (dev->fw_version & 0xffff));
-
        for (i = 0; i < dev->nchannels; i++) {
                err = kvaser_usb_init_one(intf, id, i);
                if (err) {
index 1ba7c25..e8fc495 100644 (file)
@@ -26,8 +26,8 @@
 #define PUCAN_CMD_FILTER_STD           0x008
 #define PUCAN_CMD_TX_ABORT             0x009
 #define PUCAN_CMD_WR_ERR_CNT           0x00a
-#define PUCAN_CMD_RX_FRAME_ENABLE      0x00b
-#define PUCAN_CMD_RX_FRAME_DISABLE     0x00c
+#define PUCAN_CMD_SET_EN_OPTION                0x00b
+#define PUCAN_CMD_CLR_DIS_OPTION       0x00c
 #define PUCAN_CMD_END_OF_COLLECTION    0x3ff
 
 /* uCAN received messages list */
@@ -101,14 +101,15 @@ struct __packed pucan_wr_err_cnt {
        u16     unused;
 };
 
-/* uCAN RX_FRAME_ENABLE command fields */
-#define PUCAN_FLTEXT_ERROR             0x0001
-#define PUCAN_FLTEXT_BUSLOAD           0x0002
+/* uCAN SET_EN/CLR_DIS _OPTION command fields */
+#define PUCAN_OPTION_ERROR             0x0001
+#define PUCAN_OPTION_BUSLOAD           0x0002
+#define PUCAN_OPTION_CANDFDISO         0x0004
 
-struct __packed pucan_filter_ext {
+struct __packed pucan_options {
        __le16  opcode_channel;
 
-       __le16  ext_mask;
+       __le16  options;
        u32     unused;
 };
 
index 962c3f0..a9221ad 100644 (file)
@@ -110,13 +110,13 @@ struct __packed pcan_ufd_led {
        u8      unused[5];
 };
 
-/* Extended usage of uCAN commands CMD_RX_FRAME_xxxABLE for PCAN-USB Pro FD */
+/* Extended usage of uCAN commands CMD_xxx_xx_OPTION for PCAN-USB Pro FD */
 #define PCAN_UFD_FLTEXT_CALIBRATION    0x8000
 
-struct __packed pcan_ufd_filter_ext {
+struct __packed pcan_ufd_options {
        __le16  opcode_channel;
 
-       __le16  ext_mask;
+       __le16  ucan_mask;
        u16     unused;
        __le16  usb_mask;
 };
@@ -251,6 +251,27 @@ static int pcan_usb_fd_build_restart_cmd(struct peak_usb_device *dev, u8 *buf)
        /* moves the pointer forward */
        pc += sizeof(struct pucan_wr_err_cnt);
 
+       /* add command to switch from ISO to non-ISO mode, if fw allows it */
+       if (dev->can.ctrlmode_supported & CAN_CTRLMODE_FD_NON_ISO) {
+               struct pucan_options *puo = (struct pucan_options *)pc;
+
+               puo->opcode_channel =
+                       (dev->can.ctrlmode & CAN_CTRLMODE_FD_NON_ISO) ?
+                       pucan_cmd_opcode_channel(dev,
+                                                PUCAN_CMD_CLR_DIS_OPTION) :
+                       pucan_cmd_opcode_channel(dev, PUCAN_CMD_SET_EN_OPTION);
+
+               puo->options = cpu_to_le16(PUCAN_OPTION_CANDFDISO);
+
+               /* to be sure that no other extended bits will be taken into
+                * account
+                */
+               puo->unused = 0;
+
+               /* moves the pointer forward */
+               pc += sizeof(struct pucan_options);
+       }
+
        /* next, go back to operational mode */
        cmd = (struct pucan_command *)pc;
        cmd->opcode_channel = pucan_cmd_opcode_channel(dev,
@@ -321,21 +342,21 @@ static int pcan_usb_fd_set_filter_std(struct peak_usb_device *dev, int idx,
        return pcan_usb_fd_send_cmd(dev, cmd);
 }
 
-/* set/unset notifications filter:
+/* set/unset options
  *
- *     onoff   sets(1)/unset(0) notifications
- *     mask    each bit defines a kind of notification to set/unset
+ *     onoff   set(1)/unset(0) options
+ *     mask    each bit defines a kind of options to set/unset
  */
-static int pcan_usb_fd_set_filter_ext(struct peak_usb_device *dev,
-                                     bool onoff, u16 ext_mask, u16 usb_mask)
+static int pcan_usb_fd_set_options(struct peak_usb_device *dev,
+                                  bool onoff, u16 ucan_mask, u16 usb_mask)
 {
-       struct pcan_ufd_filter_ext *cmd = pcan_usb_fd_cmd_buffer(dev);
+       struct pcan_ufd_options *cmd = pcan_usb_fd_cmd_buffer(dev);
 
        cmd->opcode_channel = pucan_cmd_opcode_channel(dev,
-                                       (onoff) ? PUCAN_CMD_RX_FRAME_ENABLE :
-                                                 PUCAN_CMD_RX_FRAME_DISABLE);
+                                       (onoff) ? PUCAN_CMD_SET_EN_OPTION :
+                                                 PUCAN_CMD_CLR_DIS_OPTION);
 
-       cmd->ext_mask = cpu_to_le16(ext_mask);
+       cmd->ucan_mask = cpu_to_le16(ucan_mask);
        cmd->usb_mask = cpu_to_le16(usb_mask);
 
        /* send the command */
@@ -770,9 +791,9 @@ static int pcan_usb_fd_start(struct peak_usb_device *dev)
                                       &pcan_usb_pro_fd);
 
                /* enable USB calibration messages */
-               err = pcan_usb_fd_set_filter_ext(dev, 1,
-                                                PUCAN_FLTEXT_ERROR,
-                                                PCAN_UFD_FLTEXT_CALIBRATION);
+               err = pcan_usb_fd_set_options(dev, 1,
+                                             PUCAN_OPTION_ERROR,
+                                             PCAN_UFD_FLTEXT_CALIBRATION);
        }
 
        pdev->usb_if->dev_opened_count++;
@@ -806,9 +827,9 @@ static int pcan_usb_fd_stop(struct peak_usb_device *dev)
 
        /* turn off special msgs for that interface if no other dev opened */
        if (pdev->usb_if->dev_opened_count == 1)
-               pcan_usb_fd_set_filter_ext(dev, 0,
-                                          PUCAN_FLTEXT_ERROR,
-                                          PCAN_UFD_FLTEXT_CALIBRATION);
+               pcan_usb_fd_set_options(dev, 0,
+                                       PUCAN_OPTION_ERROR,
+                                       PCAN_UFD_FLTEXT_CALIBRATION);
        pdev->usb_if->dev_opened_count--;
 
        return 0;
@@ -860,8 +881,14 @@ static int pcan_usb_fd_init(struct peak_usb_device *dev)
                         pdev->usb_if->fw_info.fw_version[2],
                         dev->adapter->ctrl_count);
 
-               /* the currently supported hw is non-ISO */
-               dev->can.ctrlmode = CAN_CTRLMODE_FD_NON_ISO;
+               /* check for ability to switch between ISO/non-ISO modes */
+               if (pdev->usb_if->fw_info.fw_version[0] >= 2) {
+                       /* firmware >= 2.x supports ISO/non-ISO switching */
+                       dev->can.ctrlmode_supported |= CAN_CTRLMODE_FD_NON_ISO;
+               } else {
+                       /* firmware < 2.x only supports fixed(!) non-ISO */
+                       dev->can.ctrlmode |= CAN_CTRLMODE_FD_NON_ISO;
+               }
 
                /* tell the hardware the can driver is running */
                err = pcan_usb_fd_drv_loaded(dev, 1);
@@ -879,6 +906,10 @@ static int pcan_usb_fd_init(struct peak_usb_device *dev)
 
                pdev->usb_if = ppdev->usb_if;
                pdev->cmd_buffer_addr = ppdev->cmd_buffer_addr;
+
+               /* do a copy of the ctrlmode[_supported] too */
+               dev->can.ctrlmode = ppdev->dev.can.ctrlmode;
+               dev->can.ctrlmode_supported = ppdev->dev.can.ctrlmode_supported;
        }
 
        pdev->usb_if->dev[dev->ctrl_idx] = dev;
@@ -933,9 +964,9 @@ static void pcan_usb_fd_exit(struct peak_usb_device *dev)
        if (dev->ctrl_idx == 0) {
                /* turn off calibration message if any device were opened */
                if (pdev->usb_if->dev_opened_count > 0)
-                       pcan_usb_fd_set_filter_ext(dev, 0,
-                                                  PUCAN_FLTEXT_ERROR,
-                                                  PCAN_UFD_FLTEXT_CALIBRATION);
+                       pcan_usb_fd_set_options(dev, 0,
+                                               PUCAN_OPTION_ERROR,
+                                               PCAN_UFD_FLTEXT_CALIBRATION);
 
                /* tell USB adapter that the driver is being unloaded */
                pcan_usb_fd_drv_loaded(dev, 0);
index 11d6e65..15a8190 100644 (file)
@@ -1543,7 +1543,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
 {
        struct pcnet32_private *lp;
        int i, media;
-       int fdx, mii, fset, dxsuflo;
+       int fdx, mii, fset, dxsuflo, sram;
        int chip_version;
        char *chipname;
        struct net_device *dev;
@@ -1580,7 +1580,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
        }
 
        /* initialize variables */
-       fdx = mii = fset = dxsuflo = 0;
+       fdx = mii = fset = dxsuflo = sram = 0;
        chip_version = (chip_version >> 12) & 0xffff;
 
        switch (chip_version) {
@@ -1613,6 +1613,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
                chipname = "PCnet/FAST III 79C973";     /* PCI */
                fdx = 1;
                mii = 1;
+               sram = 1;
                break;
        case 0x2626:
                chipname = "PCnet/Home 79C978"; /* PCI */
@@ -1636,6 +1637,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
                chipname = "PCnet/FAST III 79C975";     /* PCI */
                fdx = 1;
                mii = 1;
+               sram = 1;
                break;
        case 0x2628:
                chipname = "PCnet/PRO 79C976";
@@ -1664,6 +1666,31 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
                dxsuflo = 1;
        }
 
+       /*
+        * The Am79C973/Am79C975 controllers come with 12K of SRAM
+        * which we can use for the Tx/Rx buffers but most importantly,
+        * the use of SRAM allow us to use the BCR18:NOUFLO bit to avoid
+        * Tx fifo underflows.
+        */
+       if (sram) {
+               /*
+                * The SRAM is being configured in two steps. First we
+                * set the SRAM size in the BCR25:SRAM_SIZE bits. According
+                * to the datasheet, each bit corresponds to a 512-byte
+                * page so we can have at most 24 pages. The SRAM_SIZE
+                * holds the value of the upper 8 bits of the 16-bit SRAM size.
+                * The low 8-bits start at 0x00 and end at 0xff. So the
+                * address range is from 0x0000 up to 0x17ff. Therefore,
+                * the SRAM_SIZE is set to 0x17. The next step is to set
+                * the BCR26:SRAM_BND midway through so the Tx and Rx
+                * buffers can share the SRAM equally.
+                */
+               a->write_bcr(ioaddr, 25, 0x17);
+               a->write_bcr(ioaddr, 26, 0xc);
+               /* And finally enable the NOUFLO bit */
+               a->write_bcr(ioaddr, 18, a->read_bcr(ioaddr, 18) | (1 << 11));
+       }
+
        dev = alloc_etherdev(sizeof(*lp));
        if (!dev) {
                ret = -ENOMEM;
index 869d97f..b927021 100644 (file)
@@ -593,7 +593,7 @@ static int xgene_enet_reset(struct xgene_enet_pdata *pdata)
        if (!xgene_ring_mgr_init(pdata))
                return -ENODEV;
 
-       if (!efi_enabled(EFI_BOOT)) {
+       if (pdata->clk) {
                clk_prepare_enable(pdata->clk);
                clk_disable_unprepare(pdata->clk);
                clk_prepare_enable(pdata->clk);
index 4de62b2..635a83b 100644 (file)
@@ -1025,6 +1025,8 @@ static int xgene_enet_remove(struct platform_device *pdev)
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id xgene_enet_acpi_match[] = {
        { "APMC0D05", },
+       { "APMC0D30", },
+       { "APMC0D31", },
        { }
 };
 MODULE_DEVICE_TABLE(acpi, xgene_enet_acpi_match);
@@ -1033,6 +1035,8 @@ MODULE_DEVICE_TABLE(acpi, xgene_enet_acpi_match);
 #ifdef CONFIG_OF
 static struct of_device_id xgene_enet_of_match[] = {
        {.compatible = "apm,xgene-enet",},
+       {.compatible = "apm,xgene1-sgenet",},
+       {.compatible = "apm,xgene1-xgenet",},
        {},
 };
 
index 21206d3..a7f2cc3 100644 (file)
@@ -486,7 +486,7 @@ static int bcm_enet_poll(struct napi_struct *napi, int budget)
 {
        struct bcm_enet_priv *priv;
        struct net_device *dev;
-       int tx_work_done, rx_work_done;
+       int rx_work_done;
 
        priv = container_of(napi, struct bcm_enet_priv, napi);
        dev = priv->net_dev;
@@ -498,14 +498,14 @@ static int bcm_enet_poll(struct napi_struct *napi, int budget)
                         ENETDMAC_IR, priv->tx_chan);
 
        /* reclaim sent skb */
-       tx_work_done = bcm_enet_tx_reclaim(dev, 0);
+       bcm_enet_tx_reclaim(dev, 0);
 
        spin_lock(&priv->rx_lock);
        rx_work_done = bcm_enet_receive_queue(dev, budget);
        spin_unlock(&priv->rx_lock);
 
-       if (rx_work_done >= budget || tx_work_done > 0) {
-               /* rx/tx queue is not yet empty/clean */
+       if (rx_work_done >= budget) {
+               /* rx queue is not yet empty/clean */
                return rx_work_done;
        }
 
index 676ffe0..0469f72 100644 (file)
@@ -302,9 +302,6 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
        slot->skb = skb;
        slot->dma_addr = dma_addr;
 
-       if (slot->dma_addr & 0xC0000000)
-               bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
-
        return 0;
 }
 
@@ -505,8 +502,6 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
                                  ring->mmio_base);
                        goto err_dma_free;
                }
-               if (ring->dma_base & 0xC0000000)
-                       bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
 
                ring->unaligned = bgmac_dma_unaligned(bgmac, ring,
                                                      BGMAC_DMA_RING_TX);
@@ -536,8 +531,6 @@ static int bgmac_dma_alloc(struct bgmac *bgmac)
                        err = -ENOMEM;
                        goto err_dma_free;
                }
-               if (ring->dma_base & 0xC0000000)
-                       bgmac_warn(bgmac, "DMA address using 0xC0000000 bit(s), it may need translation trick\n");
 
                ring->unaligned = bgmac_dma_unaligned(bgmac, ring,
                                                      BGMAC_DMA_RING_RX);
index 756053c..4085c4b 100644 (file)
@@ -1811,7 +1811,7 @@ struct bnx2x {
        int                     stats_state;
 
        /* used for synchronization of concurrent threads statistics handling */
-       spinlock_t              stats_lock;
+       struct mutex            stats_lock;
 
        /* used by dmae command loader */
        struct dmae_command     stats_dmae;
@@ -1935,8 +1935,6 @@ struct bnx2x {
 
        int fp_array_size;
        u32 dump_preset_idx;
-       bool                                    stats_started;
-       struct semaphore                        stats_sema;
 
        u8                                      phys_port_id[ETH_ALEN];
 
index 7155e1d..1ec635f 100644 (file)
@@ -129,8 +129,8 @@ struct bnx2x_mac_vals {
        u32 xmac_val;
        u32 emac_addr;
        u32 emac_val;
-       u32 umac_addr;
-       u32 umac_val;
+       u32 umac_addr[2];
+       u32 umac_val[2];
        u32 bmac_addr;
        u32 bmac_val[2];
 };
@@ -7866,6 +7866,20 @@ int bnx2x_init_hw_func_cnic(struct bnx2x *bp)
        return 0;
 }
 
+/* previous driver DMAE transaction may have occurred when pre-boot stage ended
+ * and boot began, or when kdump kernel was loaded. Either case would invalidate
+ * the addresses of the transaction, resulting in was-error bit set in the pci
+ * causing all hw-to-host pcie transactions to timeout. If this happened we want
+ * to clear the interrupt which detected this from the pglueb and the was done
+ * bit
+ */
+static void bnx2x_clean_pglue_errors(struct bnx2x *bp)
+{
+       if (!CHIP_IS_E1x(bp))
+               REG_WR(bp, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR,
+                      1 << BP_ABS_FUNC(bp));
+}
+
 static int bnx2x_init_hw_func(struct bnx2x *bp)
 {
        int port = BP_PORT(bp);
@@ -7958,8 +7972,7 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
 
        bnx2x_init_block(bp, BLOCK_PGLUE_B, init_phase);
 
-       if (!CHIP_IS_E1x(bp))
-               REG_WR(bp, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR, func);
+       bnx2x_clean_pglue_errors(bp);
 
        bnx2x_init_block(bp, BLOCK_ATC, init_phase);
        bnx2x_init_block(bp, BLOCK_DMAE, init_phase);
@@ -10141,6 +10154,25 @@ static u32 bnx2x_get_pretend_reg(struct bnx2x *bp)
        return base + (BP_ABS_FUNC(bp)) * stride;
 }
 
+static bool bnx2x_prev_unload_close_umac(struct bnx2x *bp,
+                                        u8 port, u32 reset_reg,
+                                        struct bnx2x_mac_vals *vals)
+{
+       u32 mask = MISC_REGISTERS_RESET_REG_2_UMAC0 << port;
+       u32 base_addr;
+
+       if (!(mask & reset_reg))
+               return false;
+
+       BNX2X_DEV_INFO("Disable umac Rx %02x\n", port);
+       base_addr = port ? GRCBASE_UMAC1 : GRCBASE_UMAC0;
+       vals->umac_addr[port] = base_addr + UMAC_REG_COMMAND_CONFIG;
+       vals->umac_val[port] = REG_RD(bp, vals->umac_addr[port]);
+       REG_WR(bp, vals->umac_addr[port], 0);
+
+       return true;
+}
+
 static void bnx2x_prev_unload_close_mac(struct bnx2x *bp,
                                        struct bnx2x_mac_vals *vals)
 {
@@ -10149,10 +10181,7 @@ static void bnx2x_prev_unload_close_mac(struct bnx2x *bp,
        u8 port = BP_PORT(bp);
 
        /* reset addresses as they also mark which values were changed */
-       vals->bmac_addr = 0;
-       vals->umac_addr = 0;
-       vals->xmac_addr = 0;
-       vals->emac_addr = 0;
+       memset(vals, 0, sizeof(*vals));
 
        reset_reg = REG_RD(bp, MISC_REG_RESET_REG_2);
 
@@ -10201,15 +10230,11 @@ static void bnx2x_prev_unload_close_mac(struct bnx2x *bp,
                        REG_WR(bp, vals->xmac_addr, 0);
                        mac_stopped = true;
                }
-               mask = MISC_REGISTERS_RESET_REG_2_UMAC0 << port;
-               if (mask & reset_reg) {
-                       BNX2X_DEV_INFO("Disable umac Rx\n");
-                       base_addr = BP_PORT(bp) ? GRCBASE_UMAC1 : GRCBASE_UMAC0;
-                       vals->umac_addr = base_addr + UMAC_REG_COMMAND_CONFIG;
-                       vals->umac_val = REG_RD(bp, vals->umac_addr);
-                       REG_WR(bp, vals->umac_addr, 0);
-                       mac_stopped = true;
-               }
+
+               mac_stopped |= bnx2x_prev_unload_close_umac(bp, 0,
+                                                           reset_reg, vals);
+               mac_stopped |= bnx2x_prev_unload_close_umac(bp, 1,
+                                                           reset_reg, vals);
        }
 
        if (mac_stopped)
@@ -10505,8 +10530,11 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp)
                /* Close the MAC Rx to prevent BRB from filling up */
                bnx2x_prev_unload_close_mac(bp, &mac_vals);
 
-               /* close LLH filters towards the BRB */
+               /* close LLH filters for both ports towards the BRB */
+               bnx2x_set_rx_filter(&bp->link_params, 0);
+               bp->link_params.port ^= 1;
                bnx2x_set_rx_filter(&bp->link_params, 0);
+               bp->link_params.port ^= 1;
 
                /* Check if the UNDI driver was previously loaded */
                if (bnx2x_prev_is_after_undi(bp)) {
@@ -10553,8 +10581,10 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp)
 
        if (mac_vals.xmac_addr)
                REG_WR(bp, mac_vals.xmac_addr, mac_vals.xmac_val);
-       if (mac_vals.umac_addr)
-               REG_WR(bp, mac_vals.umac_addr, mac_vals.umac_val);
+       if (mac_vals.umac_addr[0])
+               REG_WR(bp, mac_vals.umac_addr[0], mac_vals.umac_val[0]);
+       if (mac_vals.umac_addr[1])
+               REG_WR(bp, mac_vals.umac_addr[1], mac_vals.umac_val[1]);
        if (mac_vals.emac_addr)
                REG_WR(bp, mac_vals.emac_addr, mac_vals.emac_val);
        if (mac_vals.bmac_addr) {
@@ -10571,26 +10601,6 @@ static int bnx2x_prev_unload_common(struct bnx2x *bp)
        return bnx2x_prev_mcp_done(bp);
 }
 
-/* previous driver DMAE transaction may have occurred when pre-boot stage ended
- * and boot began, or when kdump kernel was loaded. Either case would invalidate
- * the addresses of the transaction, resulting in was-error bit set in the pci
- * causing all hw-to-host pcie transactions to timeout. If this happened we want
- * to clear the interrupt which detected this from the pglueb and the was done
- * bit
- */
-static void bnx2x_prev_interrupted_dmae(struct bnx2x *bp)
-{
-       if (!CHIP_IS_E1x(bp)) {
-               u32 val = REG_RD(bp, PGLUE_B_REG_PGLUE_B_INT_STS);
-               if (val & PGLUE_B_PGLUE_B_INT_STS_REG_WAS_ERROR_ATTN) {
-                       DP(BNX2X_MSG_SP,
-                          "'was error' bit was found to be set in pglueb upon startup. Clearing\n");
-                       REG_WR(bp, PGLUE_B_REG_WAS_ERROR_PF_7_0_CLR,
-                              1 << BP_FUNC(bp));
-               }
-       }
-}
-
 static int bnx2x_prev_unload(struct bnx2x *bp)
 {
        int time_counter = 10;
@@ -10600,7 +10610,7 @@ static int bnx2x_prev_unload(struct bnx2x *bp)
        /* clear hw from errors which may have resulted from an interrupted
         * dmae transaction.
         */
-       bnx2x_prev_interrupted_dmae(bp);
+       bnx2x_clean_pglue_errors(bp);
 
        /* Release previously held locks */
        hw_lock_reg = (BP_FUNC(bp) <= 5) ?
@@ -12037,9 +12047,8 @@ static int bnx2x_init_bp(struct bnx2x *bp)
        mutex_init(&bp->port.phy_mutex);
        mutex_init(&bp->fw_mb_mutex);
        mutex_init(&bp->drv_info_mutex);
+       mutex_init(&bp->stats_lock);
        bp->drv_info_mng_owner = false;
-       spin_lock_init(&bp->stats_lock);
-       sema_init(&bp->stats_sema, 1);
 
        INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task);
        INIT_DELAYED_WORK(&bp->sp_rtnl_task, bnx2x_sp_rtnl_task);
@@ -12722,6 +12731,9 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
        pci_write_config_dword(bp->pdev, PCICFG_GRC_ADDRESS,
                               PCICFG_VENDOR_ID_OFFSET);
 
+       /* Set PCIe reset type to fundamental for EEH recovery */
+       pdev->needs_freset = 1;
+
        /* AER (Advanced Error reporting) configuration */
        rc = pci_enable_pcie_error_reporting(pdev);
        if (!rc)
@@ -12766,7 +12778,7 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
                NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 |
                NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO |
                NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX;
-       if (!CHIP_IS_E1x(bp)) {
+       if (!chip_is_e1x) {
                dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL |
                                    NETIF_F_GSO_IPIP | NETIF_F_GSO_SIT;
                dev->hw_enc_features =
@@ -13665,9 +13677,9 @@ static int bnx2x_eeh_nic_unload(struct bnx2x *bp)
        cancel_delayed_work_sync(&bp->sp_task);
        cancel_delayed_work_sync(&bp->period_task);
 
-       spin_lock_bh(&bp->stats_lock);
+       mutex_lock(&bp->stats_lock);
        bp->stats_state = STATS_STATE_DISABLED;
-       spin_unlock_bh(&bp->stats_lock);
+       mutex_unlock(&bp->stats_lock);
 
        bnx2x_save_statistics(bp);
 
index e5aca2d..cfe3c76 100644 (file)
@@ -2238,7 +2238,9 @@ int bnx2x_vf_close(struct bnx2x *bp, struct bnx2x_virtf *vf)
 
                cookie.vf = vf;
                cookie.state = VF_ACQUIRED;
-               bnx2x_stats_safe_exec(bp, bnx2x_set_vf_state, &cookie);
+               rc = bnx2x_stats_safe_exec(bp, bnx2x_set_vf_state, &cookie);
+               if (rc)
+                       goto op_err;
        }
 
        DP(BNX2X_MSG_IOV, "set state to acquired\n");
index d160829..800ab44 100644 (file)
@@ -123,36 +123,28 @@ static void bnx2x_dp_stats(struct bnx2x *bp)
  */
 static void bnx2x_storm_stats_post(struct bnx2x *bp)
 {
-       if (!bp->stats_pending) {
-               int rc;
+       int rc;
 
-               spin_lock_bh(&bp->stats_lock);
-
-               if (bp->stats_pending) {
-                       spin_unlock_bh(&bp->stats_lock);
-                       return;
-               }
-
-               bp->fw_stats_req->hdr.drv_stats_counter =
-                       cpu_to_le16(bp->stats_counter++);
+       if (bp->stats_pending)
+               return;
 
-               DP(BNX2X_MSG_STATS, "Sending statistics ramrod %d\n",
-                  le16_to_cpu(bp->fw_stats_req->hdr.drv_stats_counter));
+       bp->fw_stats_req->hdr.drv_stats_counter =
+               cpu_to_le16(bp->stats_counter++);
 
-               /* adjust the ramrod to include VF queues statistics */
-               bnx2x_iov_adjust_stats_req(bp);
-               bnx2x_dp_stats(bp);
+       DP(BNX2X_MSG_STATS, "Sending statistics ramrod %d\n",
+          le16_to_cpu(bp->fw_stats_req->hdr.drv_stats_counter));
 
-               /* send FW stats ramrod */
-               rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_STAT_QUERY, 0,
-                                  U64_HI(bp->fw_stats_req_mapping),
-                                  U64_LO(bp->fw_stats_req_mapping),
-                                  NONE_CONNECTION_TYPE);
-               if (rc == 0)
-                       bp->stats_pending = 1;
+       /* adjust the ramrod to include VF queues statistics */
+       bnx2x_iov_adjust_stats_req(bp);
+       bnx2x_dp_stats(bp);
 
-               spin_unlock_bh(&bp->stats_lock);
-       }
+       /* send FW stats ramrod */
+       rc = bnx2x_sp_post(bp, RAMROD_CMD_ID_COMMON_STAT_QUERY, 0,
+                          U64_HI(bp->fw_stats_req_mapping),
+                          U64_LO(bp->fw_stats_req_mapping),
+                          NONE_CONNECTION_TYPE);
+       if (rc == 0)
+               bp->stats_pending = 1;
 }
 
 static void bnx2x_hw_stats_post(struct bnx2x *bp)
@@ -221,7 +213,7 @@ static void bnx2x_stats_comp(struct bnx2x *bp)
  */
 
 /* should be called under stats_sema */
-static void __bnx2x_stats_pmf_update(struct bnx2x *bp)
+static void bnx2x_stats_pmf_update(struct bnx2x *bp)
 {
        struct dmae_command *dmae;
        u32 opcode;
@@ -519,7 +511,7 @@ static void bnx2x_func_stats_init(struct bnx2x *bp)
 }
 
 /* should be called under stats_sema */
-static void __bnx2x_stats_start(struct bnx2x *bp)
+static void bnx2x_stats_start(struct bnx2x *bp)
 {
        if (IS_PF(bp)) {
                if (bp->port.pmf)
@@ -531,34 +523,13 @@ static void __bnx2x_stats_start(struct bnx2x *bp)
                bnx2x_hw_stats_post(bp);
                bnx2x_storm_stats_post(bp);
        }
-
-       bp->stats_started = true;
-}
-
-static void bnx2x_stats_start(struct bnx2x *bp)
-{
-       if (down_timeout(&bp->stats_sema, HZ/10))
-               BNX2X_ERR("Unable to acquire stats lock\n");
-       __bnx2x_stats_start(bp);
-       up(&bp->stats_sema);
 }
 
 static void bnx2x_stats_pmf_start(struct bnx2x *bp)
 {
-       if (down_timeout(&bp->stats_sema, HZ/10))
-               BNX2X_ERR("Unable to acquire stats lock\n");
        bnx2x_stats_comp(bp);
-       __bnx2x_stats_pmf_update(bp);
-       __bnx2x_stats_start(bp);
-       up(&bp->stats_sema);
-}
-
-static void bnx2x_stats_pmf_update(struct bnx2x *bp)
-{
-       if (down_timeout(&bp->stats_sema, HZ/10))
-               BNX2X_ERR("Unable to acquire stats lock\n");
-       __bnx2x_stats_pmf_update(bp);
-       up(&bp->stats_sema);
+       bnx2x_stats_pmf_update(bp);
+       bnx2x_stats_start(bp);
 }
 
 static void bnx2x_stats_restart(struct bnx2x *bp)
@@ -568,11 +539,9 @@ static void bnx2x_stats_restart(struct bnx2x *bp)
         */
        if (IS_VF(bp))
                return;
-       if (down_timeout(&bp->stats_sema, HZ/10))
-               BNX2X_ERR("Unable to acquire stats lock\n");
+
        bnx2x_stats_comp(bp);
-       __bnx2x_stats_start(bp);
-       up(&bp->stats_sema);
+       bnx2x_stats_start(bp);
 }
 
 static void bnx2x_bmac_stats_update(struct bnx2x *bp)
@@ -1246,18 +1215,12 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 {
        u32 *stats_comp = bnx2x_sp(bp, stats_comp);
 
-       /* we run update from timer context, so give up
-        * if somebody is in the middle of transition
-        */
-       if (down_trylock(&bp->stats_sema))
+       if (bnx2x_edebug_stats_stopped(bp))
                return;
 
-       if (bnx2x_edebug_stats_stopped(bp) || !bp->stats_started)
-               goto out;
-
        if (IS_PF(bp)) {
                if (*stats_comp != DMAE_COMP_VAL)
-                       goto out;
+                       return;
 
                if (bp->port.pmf)
                        bnx2x_hw_stats_update(bp);
@@ -1267,7 +1230,7 @@ static void bnx2x_stats_update(struct bnx2x *bp)
                                BNX2X_ERR("storm stats were not updated for 3 times\n");
                                bnx2x_panic();
                        }
-                       goto out;
+                       return;
                }
        } else {
                /* vf doesn't collect HW statistics, and doesn't get completions
@@ -1281,7 +1244,7 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 
        /* vf is done */
        if (IS_VF(bp))
-               goto out;
+               return;
 
        if (netif_msg_timer(bp)) {
                struct bnx2x_eth_stats *estats = &bp->eth_stats;
@@ -1292,9 +1255,6 @@ static void bnx2x_stats_update(struct bnx2x *bp)
 
        bnx2x_hw_stats_post(bp);
        bnx2x_storm_stats_post(bp);
-
-out:
-       up(&bp->stats_sema);
 }
 
 static void bnx2x_port_stats_stop(struct bnx2x *bp)
@@ -1358,12 +1318,7 @@ static void bnx2x_port_stats_stop(struct bnx2x *bp)
 
 static void bnx2x_stats_stop(struct bnx2x *bp)
 {
-       int update = 0;
-
-       if (down_timeout(&bp->stats_sema, HZ/10))
-               BNX2X_ERR("Unable to acquire stats lock\n");
-
-       bp->stats_started = false;
+       bool update = false;
 
        bnx2x_stats_comp(bp);
 
@@ -1381,8 +1336,6 @@ static void bnx2x_stats_stop(struct bnx2x *bp)
                bnx2x_hw_stats_post(bp);
                bnx2x_stats_comp(bp);
        }
-
-       up(&bp->stats_sema);
 }
 
 static void bnx2x_stats_do_nothing(struct bnx2x *bp)
@@ -1410,18 +1363,28 @@ static const struct {
 
 void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event)
 {
-       enum bnx2x_stats_state state;
-       void (*action)(struct bnx2x *bp);
+       enum bnx2x_stats_state state = bp->stats_state;
+
        if (unlikely(bp->panic))
                return;
 
-       spin_lock_bh(&bp->stats_lock);
-       state = bp->stats_state;
+       /* Statistics update run from timer context, and we don't want to stop
+        * that context in case someone is in the middle of a transition.
+        * For other events, wait a bit until lock is taken.
+        */
+       if (!mutex_trylock(&bp->stats_lock)) {
+               if (event == STATS_EVENT_UPDATE)
+                       return;
+
+               DP(BNX2X_MSG_STATS,
+                  "Unlikely stats' lock contention [event %d]\n", event);
+               mutex_lock(&bp->stats_lock);
+       }
+
+       bnx2x_stats_stm[state][event].action(bp);
        bp->stats_state = bnx2x_stats_stm[state][event].next_state;
-       action = bnx2x_stats_stm[state][event].action;
-       spin_unlock_bh(&bp->stats_lock);
 
-       action(bp);
+       mutex_unlock(&bp->stats_lock);
 
        if ((event != STATS_EVENT_UPDATE) || netif_msg_timer(bp))
                DP(BNX2X_MSG_STATS, "state %d -> event %d -> state %d\n",
@@ -1998,13 +1961,34 @@ void bnx2x_afex_collect_stats(struct bnx2x *bp, void *void_afex_stats,
        }
 }
 
-void bnx2x_stats_safe_exec(struct bnx2x *bp,
-                          void (func_to_exec)(void *cookie),
-                          void *cookie){
-       if (down_timeout(&bp->stats_sema, HZ/10))
-               BNX2X_ERR("Unable to acquire stats lock\n");
+int bnx2x_stats_safe_exec(struct bnx2x *bp,
+                         void (func_to_exec)(void *cookie),
+                         void *cookie)
+{
+       int cnt = 10, rc = 0;
+
+       /* Wait for statistics to end [while blocking further requests],
+        * then run supplied function 'safely'.
+        */
+       mutex_lock(&bp->stats_lock);
+
        bnx2x_stats_comp(bp);
+       while (bp->stats_pending && cnt--)
+               if (bnx2x_storm_stats_update(bp))
+                       usleep_range(1000, 2000);
+       if (bp->stats_pending) {
+               BNX2X_ERR("Failed to wait for stats pending to clear [possibly FW is stuck]\n");
+               rc = -EBUSY;
+               goto out;
+       }
+
        func_to_exec(cookie);
-       __bnx2x_stats_start(bp);
-       up(&bp->stats_sema);
+
+out:
+       /* No need to restart statistics - if they're enabled, the timer
+        * will restart the statistics.
+        */
+       mutex_unlock(&bp->stats_lock);
+
+       return rc;
 }
index 2beceae..965539a 100644 (file)
@@ -539,9 +539,9 @@ struct bnx2x;
 void bnx2x_memset_stats(struct bnx2x *bp);
 void bnx2x_stats_init(struct bnx2x *bp);
 void bnx2x_stats_handle(struct bnx2x *bp, enum bnx2x_stats_event event);
-void bnx2x_stats_safe_exec(struct bnx2x *bp,
-                          void (func_to_exec)(void *cookie),
-                          void *cookie);
+int bnx2x_stats_safe_exec(struct bnx2x *bp,
+                         void (func_to_exec)(void *cookie),
+                         void *cookie);
 
 /**
  * bnx2x_save_statistics - save statistics when unloading.
index 149a0d7..b971229 100644 (file)
@@ -73,15 +73,17 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
        if (wol->wolopts & ~(WAKE_MAGIC | WAKE_MAGICSECURE))
                return -EINVAL;
 
+       reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
        if (wol->wolopts & WAKE_MAGICSECURE) {
                bcmgenet_umac_writel(priv, get_unaligned_be16(&wol->sopass[0]),
                                     UMAC_MPD_PW_MS);
                bcmgenet_umac_writel(priv, get_unaligned_be32(&wol->sopass[2]),
                                     UMAC_MPD_PW_LS);
-               reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
                reg |= MPD_PW_EN;
-               bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
+       } else {
+               reg &= ~MPD_PW_EN;
        }
+       bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
 
        /* Flag the device and relevant IRQ as wakeup capable */
        if (wol->wolopts) {
index ad76b8e..81d4153 100644 (file)
@@ -2113,17 +2113,17 @@ static const struct net_device_ops macb_netdev_ops = {
 };
 
 #if defined(CONFIG_OF)
-static struct macb_config pc302gem_config = {
+static const struct macb_config pc302gem_config = {
        .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE,
        .dma_burst_length = 16,
 };
 
-static struct macb_config sama5d3_config = {
+static const struct macb_config sama5d3_config = {
        .caps = MACB_CAPS_SG_DISABLED | MACB_CAPS_GIGABIT_MODE_AVAILABLE,
        .dma_burst_length = 16,
 };
 
-static struct macb_config sama5d4_config = {
+static const struct macb_config sama5d4_config = {
        .caps = 0,
        .dma_burst_length = 4,
 };
@@ -2154,7 +2154,7 @@ static void macb_configure_caps(struct macb *bp)
        if (bp->pdev->dev.of_node) {
                match = of_match_node(macb_dt_ids, bp->pdev->dev.of_node);
                if (match && match->data) {
-                       config = (const struct macb_config *)match->data;
+                       config = match->data;
 
                        bp->caps = config->caps;
                        /*
index 31dc080..ff85619 100644 (file)
 
 /* Bitfields in MID */
 #define MACB_IDNUM_OFFSET                      16
-#define MACB_IDNUM_SIZE                                16
+#define MACB_IDNUM_SIZE                                12
 #define MACB_REV_OFFSET                                0
 #define MACB_REV_SIZE                          16
 
index 97842d0..c6ff489 100644 (file)
@@ -376,8 +376,6 @@ enum {
 enum {
        INGQ_EXTRAS = 2,        /* firmware event queue and */
                                /*   forwarded interrupts */
-       MAX_EGRQ = MAX_ETH_QSETS*2 + MAX_OFLD_QSETS*2
-                  + MAX_CTRL_QUEUES + MAX_RDMA_QUEUES + MAX_ISCSI_QUEUES,
        MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES
                   + MAX_RDMA_CIQS + MAX_ISCSI_QUEUES + INGQ_EXTRAS,
 };
@@ -616,11 +614,13 @@ struct sge {
        unsigned int idma_qid[2];   /* SGE IDMA Hung Ingress Queue ID */
 
        unsigned int egr_start;
+       unsigned int egr_sz;
        unsigned int ingr_start;
-       void *egr_map[MAX_EGRQ];    /* qid->queue egress queue map */
-       struct sge_rspq *ingr_map[MAX_INGQ]; /* qid->queue ingress queue map */
-       DECLARE_BITMAP(starving_fl, MAX_EGRQ);
-       DECLARE_BITMAP(txq_maperr, MAX_EGRQ);
+       unsigned int ingr_sz;
+       void **egr_map;    /* qid->queue egress queue map */
+       struct sge_rspq **ingr_map; /* qid->queue ingress queue map */
+       unsigned long *starving_fl;
+       unsigned long *txq_maperr;
        struct timer_list rx_timer; /* refills starving FLs */
        struct timer_list tx_timer; /* checks Tx queues */
 };
@@ -1136,6 +1136,8 @@ int cxgb4_t4_bar2_sge_qregs(struct adapter *adapter,
 
 unsigned int qtimer_val(const struct adapter *adap,
                        const struct sge_rspq *q);
+
+int t4_init_devlog_params(struct adapter *adapter);
 int t4_init_sge_params(struct adapter *adapter);
 int t4_init_tp_params(struct adapter *adap);
 int t4_filter_field_shift(const struct adapter *adap, int filter_sel);
index 78854ce..dcb0479 100644 (file)
@@ -670,9 +670,13 @@ static int cctrl_tbl_show(struct seq_file *seq, void *v)
                "0.9375" };
 
        int i;
-       u16 incr[NMTUS][NCCTRL_WIN];
+       u16 (*incr)[NCCTRL_WIN];
        struct adapter *adap = seq->private;
 
+       incr = kmalloc(sizeof(*incr) * NMTUS, GFP_KERNEL);
+       if (!incr)
+               return -ENOMEM;
+
        t4_read_cong_tbl(adap, incr);
 
        for (i = 0; i < NCCTRL_WIN; ++i) {
@@ -685,6 +689,8 @@ static int cctrl_tbl_show(struct seq_file *seq, void *v)
                           adap->params.a_wnd[i],
                           dec_fac[adap->params.b_wnd[i]]);
        }
+
+       kfree(incr);
        return 0;
 }
 
index a22cf93..d929951 100644 (file)
@@ -920,7 +920,7 @@ static void quiesce_rx(struct adapter *adap)
 {
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
+       for (i = 0; i < adap->sge.ingr_sz; i++) {
                struct sge_rspq *q = adap->sge.ingr_map[i];
 
                if (q && q->handler) {
@@ -934,6 +934,21 @@ static void quiesce_rx(struct adapter *adap)
        }
 }
 
+/* Disable interrupt and napi handler */
+static void disable_interrupts(struct adapter *adap)
+{
+       if (adap->flags & FULL_INIT_DONE) {
+               t4_intr_disable(adap);
+               if (adap->flags & USING_MSIX) {
+                       free_msix_queue_irqs(adap);
+                       free_irq(adap->msix_info[0].vec, adap);
+               } else {
+                       free_irq(adap->pdev->irq, adap);
+               }
+               quiesce_rx(adap);
+       }
+}
+
 /*
  * Enable NAPI scheduling and interrupt generation for all Rx queues.
  */
@@ -941,7 +956,7 @@ static void enable_rx(struct adapter *adap)
 {
        int i;
 
-       for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
+       for (i = 0; i < adap->sge.ingr_sz; i++) {
                struct sge_rspq *q = adap->sge.ingr_map[i];
 
                if (!q)
@@ -970,8 +985,8 @@ static int setup_sge_queues(struct adapter *adap)
        int err, msi_idx, i, j;
        struct sge *s = &adap->sge;
 
-       bitmap_zero(s->starving_fl, MAX_EGRQ);
-       bitmap_zero(s->txq_maperr, MAX_EGRQ);
+       bitmap_zero(s->starving_fl, s->egr_sz);
+       bitmap_zero(s->txq_maperr, s->egr_sz);
 
        if (adap->flags & USING_MSIX)
                msi_idx = 1;         /* vector 0 is for non-queue interrupts */
@@ -983,6 +998,19 @@ static int setup_sge_queues(struct adapter *adap)
                msi_idx = -((int)s->intrq.abs_id + 1);
        }
 
+       /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here,
+        * don't forget to update the following which need to be
+        * synchronized to and changes here.
+        *
+        * 1. The calculations of MAX_INGQ in cxgb4.h.
+        *
+        * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs
+        *    to accommodate any new/deleted Ingress Queues
+        *    which need MSI-X Vectors.
+        *
+        * 3. Update sge_qinfo_show() to include information on the
+        *    new/deleted queues.
+        */
        err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
                               msi_idx, NULL, fwevtq_handler);
        if (err) {
@@ -4244,19 +4272,12 @@ static int cxgb_up(struct adapter *adap)
 
 static void cxgb_down(struct adapter *adapter)
 {
-       t4_intr_disable(adapter);
        cancel_work_sync(&adapter->tid_release_task);
        cancel_work_sync(&adapter->db_full_task);
        cancel_work_sync(&adapter->db_drop_task);
        adapter->tid_release_task_busy = false;
        adapter->tid_release_head = NULL;
 
-       if (adapter->flags & USING_MSIX) {
-               free_msix_queue_irqs(adapter);
-               free_irq(adapter->msix_info[0].vec, adapter);
-       } else
-               free_irq(adapter->pdev->irq, adapter);
-       quiesce_rx(adapter);
        t4_sge_stop(adapter);
        t4_free_sge_resources(adapter);
        adapter->flags &= ~FULL_INIT_DONE;
@@ -4733,8 +4754,9 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
        if (ret < 0)
                return ret;
 
-       ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, MAX_EGRQ, 64, MAX_INGQ,
-                         0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF, FW_CMD_CAP_PF);
+       ret = t4_cfg_pfvf(adap, adap->fn, adap->fn, 0, adap->sge.egr_sz, 64,
+                         MAX_INGQ, 0, 0, 4, 0xf, 0xf, 16, FW_CMD_CAP_PF,
+                         FW_CMD_CAP_PF);
        if (ret < 0)
                return ret;
 
@@ -5088,10 +5110,15 @@ static int adap_init0(struct adapter *adap)
        enum dev_state state;
        u32 params[7], val[7];
        struct fw_caps_config_cmd caps_cmd;
-       struct fw_devlog_cmd devlog_cmd;
-       u32 devlog_meminfo;
        int reset = 1;
 
+       /* Grab Firmware Device Log parameters as early as possible so we have
+        * access to it for debugging, etc.
+        */
+       ret = t4_init_devlog_params(adap);
+       if (ret < 0)
+               return ret;
+
        /* Contact FW, advertising Master capability */
        ret = t4_fw_hello(adap, adap->mbox, adap->mbox, MASTER_MAY, &state);
        if (ret < 0) {
@@ -5169,30 +5196,6 @@ static int adap_init0(struct adapter *adap)
        if (ret < 0)
                goto bye;
 
-       /* Read firmware device log parameters.  We really need to find a way
-        * to get these parameters initialized with some default values (which
-        * are likely to be correct) for the case where we either don't
-        * attache to the firmware or it's crashed when we probe the adapter.
-        * That way we'll still be able to perform early firmware startup
-        * debugging ...  If the request to get the Firmware's Device Log
-        * parameters fails, we'll live so we don't make that a fatal error.
-        */
-       memset(&devlog_cmd, 0, sizeof(devlog_cmd));
-       devlog_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_DEVLOG_CMD) |
-                                      FW_CMD_REQUEST_F | FW_CMD_READ_F);
-       devlog_cmd.retval_len16 = htonl(FW_LEN16(devlog_cmd));
-       ret = t4_wr_mbox(adap, adap->mbox, &devlog_cmd, sizeof(devlog_cmd),
-                        &devlog_cmd);
-       if (ret == 0) {
-               devlog_meminfo =
-                       ntohl(devlog_cmd.memtype_devlog_memaddr16_devlog);
-               adap->params.devlog.memtype =
-                       FW_DEVLOG_CMD_MEMTYPE_DEVLOG_G(devlog_meminfo);
-               adap->params.devlog.start =
-                       FW_DEVLOG_CMD_MEMADDR16_DEVLOG_G(devlog_meminfo) << 4;
-               adap->params.devlog.size = ntohl(devlog_cmd.memsize_devlog);
-       }
-
        /*
         * Find out what ports are available to us.  Note that we need to do
         * this before calling adap_init0_no_config() since it needs nports
@@ -5293,6 +5296,51 @@ static int adap_init0(struct adapter *adap)
        adap->tids.nftids = val[4] - val[3] + 1;
        adap->sge.ingr_start = val[5];
 
+       /* qids (ingress/egress) returned from firmware can be anywhere
+        * in the range from EQ(IQFLINT)_START to EQ(IQFLINT)_END.
+        * Hence driver needs to allocate memory for this range to
+        * store the queue info. Get the highest IQFLINT/EQ index returned
+        * in FW_EQ_*_CMD.alloc command.
+        */
+       params[0] = FW_PARAM_PFVF(EQ_END);
+       params[1] = FW_PARAM_PFVF(IQFLINT_END);
+       ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val);
+       if (ret < 0)
+               goto bye;
+       adap->sge.egr_sz = val[0] - adap->sge.egr_start + 1;
+       adap->sge.ingr_sz = val[1] - adap->sge.ingr_start + 1;
+
+       adap->sge.egr_map = kcalloc(adap->sge.egr_sz,
+                                   sizeof(*adap->sge.egr_map), GFP_KERNEL);
+       if (!adap->sge.egr_map) {
+               ret = -ENOMEM;
+               goto bye;
+       }
+
+       adap->sge.ingr_map = kcalloc(adap->sge.ingr_sz,
+                                    sizeof(*adap->sge.ingr_map), GFP_KERNEL);
+       if (!adap->sge.ingr_map) {
+               ret = -ENOMEM;
+               goto bye;
+       }
+
+       /* Allocate the memory for the vaious egress queue bitmaps
+        * ie starving_fl and txq_maperr.
+        */
+       adap->sge.starving_fl = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
+                                       sizeof(long), GFP_KERNEL);
+       if (!adap->sge.starving_fl) {
+               ret = -ENOMEM;
+               goto bye;
+       }
+
+       adap->sge.txq_maperr = kcalloc(BITS_TO_LONGS(adap->sge.egr_sz),
+                                      sizeof(long), GFP_KERNEL);
+       if (!adap->sge.txq_maperr) {
+               ret = -ENOMEM;
+               goto bye;
+       }
+
        params[0] = FW_PARAM_PFVF(CLIP_START);
        params[1] = FW_PARAM_PFVF(CLIP_END);
        ret = t4_query_params(adap, adap->mbox, adap->fn, 0, 2, params, val);
@@ -5501,6 +5549,10 @@ static int adap_init0(struct adapter *adap)
         * happened to HW/FW, stop issuing commands.
         */
 bye:
+       kfree(adap->sge.egr_map);
+       kfree(adap->sge.ingr_map);
+       kfree(adap->sge.starving_fl);
+       kfree(adap->sge.txq_maperr);
        if (ret != -ETIMEDOUT && ret != -EIO)
                t4_fw_bye(adap, adap->mbox);
        return ret;
@@ -5528,6 +5580,7 @@ static pci_ers_result_t eeh_err_detected(struct pci_dev *pdev,
                netif_carrier_off(dev);
        }
        spin_unlock(&adap->stats_lock);
+       disable_interrupts(adap);
        if (adap->flags & FULL_INIT_DONE)
                cxgb_down(adap);
        rtnl_unlock();
@@ -5912,6 +5965,10 @@ static void free_some_resources(struct adapter *adapter)
 
        t4_free_mem(adapter->l2t);
        t4_free_mem(adapter->tids.tid_tab);
+       kfree(adapter->sge.egr_map);
+       kfree(adapter->sge.ingr_map);
+       kfree(adapter->sge.starving_fl);
+       kfree(adapter->sge.txq_maperr);
        disable_msi(adapter);
 
        for_each_port(adapter, i)
@@ -6237,6 +6294,8 @@ static void remove_one(struct pci_dev *pdev)
                if (is_offload(adapter))
                        detach_ulds(adapter);
 
+               disable_interrupts(adapter);
+
                for_each_port(adapter, i)
                        if (adapter->port[i]->reg_state == NETREG_REGISTERED)
                                unregister_netdev(adapter->port[i]);
index b4b9f60..b688b32 100644 (file)
@@ -2171,7 +2171,7 @@ static void sge_rx_timer_cb(unsigned long data)
        struct adapter *adap = (struct adapter *)data;
        struct sge *s = &adap->sge;
 
-       for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++)
+       for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
                for (m = s->starving_fl[i]; m; m &= m - 1) {
                        struct sge_eth_rxq *rxq;
                        unsigned int id = __ffs(m) + i * BITS_PER_LONG;
@@ -2259,7 +2259,7 @@ static void sge_tx_timer_cb(unsigned long data)
        struct adapter *adap = (struct adapter *)data;
        struct sge *s = &adap->sge;
 
-       for (i = 0; i < ARRAY_SIZE(s->txq_maperr); i++)
+       for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
                for (m = s->txq_maperr[i]; m; m &= m - 1) {
                        unsigned long id = __ffs(m) + i * BITS_PER_LONG;
                        struct sge_ofld_txq *txq = s->egr_map[id];
@@ -2741,7 +2741,8 @@ void t4_free_sge_resources(struct adapter *adap)
                free_rspq_fl(adap, &adap->sge.intrq, NULL);
 
        /* clear the reverse egress queue map */
-       memset(adap->sge.egr_map, 0, sizeof(adap->sge.egr_map));
+       memset(adap->sge.egr_map, 0,
+              adap->sge.egr_sz * sizeof(*adap->sge.egr_map));
 }
 
 void t4_sge_start(struct adapter *adap)
index 853c389..ee394dc 100644 (file)
@@ -1120,7 +1120,7 @@ int t4_prep_fw(struct adapter *adap, struct fw_info *fw_info,
                }
 
                /* Installed successfully, update the cached header too. */
-               memcpy(card_fw, fs_fw, sizeof(*card_fw));
+               *card_fw = *fs_fw;
                card_fw_usable = 1;
                *reset = 0;     /* already reset as part of load_fw */
        }
@@ -4458,6 +4458,59 @@ int cxgb4_t4_bar2_sge_qregs(struct adapter *adapter,
        return 0;
 }
 
+/**
+ *     t4_init_devlog_params - initialize adapter->params.devlog
+ *     @adap: the adapter
+ *
+ *     Initialize various fields of the adapter's Firmware Device Log
+ *     Parameters structure.
+ */
+int t4_init_devlog_params(struct adapter *adap)
+{
+       struct devlog_params *dparams = &adap->params.devlog;
+       u32 pf_dparams;
+       unsigned int devlog_meminfo;
+       struct fw_devlog_cmd devlog_cmd;
+       int ret;
+
+       /* If we're dealing with newer firmware, the Device Log Paramerters
+        * are stored in a designated register which allows us to access the
+        * Device Log even if we can't talk to the firmware.
+        */
+       pf_dparams =
+               t4_read_reg(adap, PCIE_FW_REG(PCIE_FW_PF_A, PCIE_FW_PF_DEVLOG));
+       if (pf_dparams) {
+               unsigned int nentries, nentries128;
+
+               dparams->memtype = PCIE_FW_PF_DEVLOG_MEMTYPE_G(pf_dparams);
+               dparams->start = PCIE_FW_PF_DEVLOG_ADDR16_G(pf_dparams) << 4;
+
+               nentries128 = PCIE_FW_PF_DEVLOG_NENTRIES128_G(pf_dparams);
+               nentries = (nentries128 + 1) * 128;
+               dparams->size = nentries * sizeof(struct fw_devlog_e);
+
+               return 0;
+       }
+
+       /* Otherwise, ask the firmware for it's Device Log Parameters.
+        */
+       memset(&devlog_cmd, 0, sizeof(devlog_cmd));
+       devlog_cmd.op_to_write = htonl(FW_CMD_OP_V(FW_DEVLOG_CMD) |
+                                      FW_CMD_REQUEST_F | FW_CMD_READ_F);
+       devlog_cmd.retval_len16 = htonl(FW_LEN16(devlog_cmd));
+       ret = t4_wr_mbox(adap, adap->mbox, &devlog_cmd, sizeof(devlog_cmd),
+                        &devlog_cmd);
+       if (ret)
+               return ret;
+
+       devlog_meminfo = ntohl(devlog_cmd.memtype_devlog_memaddr16_devlog);
+       dparams->memtype = FW_DEVLOG_CMD_MEMTYPE_DEVLOG_G(devlog_meminfo);
+       dparams->start = FW_DEVLOG_CMD_MEMADDR16_DEVLOG_G(devlog_meminfo) << 4;
+       dparams->size = ntohl(devlog_cmd.memsize_devlog);
+
+       return 0;
+}
+
 /**
  *     t4_init_sge_params - initialize adap->params.sge
  *     @adapter: the adapter
index 231a725..326674b 100644 (file)
@@ -63,6 +63,8 @@
 #define MC_BIST_STATUS_REG(reg_addr, idx) ((reg_addr) + (idx) * 4)
 #define EDC_BIST_STATUS_REG(reg_addr, idx) ((reg_addr) + (idx) * 4)
 
+#define PCIE_FW_REG(reg_addr, idx) ((reg_addr) + (idx) * 4)
+
 #define SGE_PF_KDOORBELL_A 0x0
 
 #define QID_S    15
 #define PFNUM_V(x) ((x) << PFNUM_S)
 
 #define PCIE_FW_A 0x30b8
+#define PCIE_FW_PF_A 0x30bc
 
 #define PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS_A 0x5908
 
index 9b353a8..a4a19e0 100644 (file)
@@ -101,7 +101,7 @@ enum fw_wr_opcodes {
        FW_RI_BIND_MW_WR               = 0x18,
        FW_RI_FR_NSMR_WR               = 0x19,
        FW_RI_INV_LSTAG_WR             = 0x1a,
-       FW_LASTC2E_WR                  = 0x40
+       FW_LASTC2E_WR                  = 0x70
 };
 
 struct fw_wr_hdr {
@@ -993,6 +993,7 @@ enum fw_memtype_cf {
        FW_MEMTYPE_CF_EXTMEM            = 0x2,
        FW_MEMTYPE_CF_FLASH             = 0x4,
        FW_MEMTYPE_CF_INTERNAL          = 0x5,
+       FW_MEMTYPE_CF_EXTMEM1           = 0x6,
 };
 
 struct fw_caps_config_cmd {
@@ -1035,6 +1036,7 @@ enum fw_params_mnem {
        FW_PARAMS_MNEM_PFVF             = 2,    /* function params */
        FW_PARAMS_MNEM_REG              = 3,    /* limited register access */
        FW_PARAMS_MNEM_DMAQ             = 4,    /* dma queue params */
+       FW_PARAMS_MNEM_CHNET            = 5,    /* chnet params */
        FW_PARAMS_MNEM_LAST
 };
 
@@ -3102,7 +3104,8 @@ enum fw_devlog_facility {
        FW_DEVLOG_FACILITY_FCOE         = 0x2E,
        FW_DEVLOG_FACILITY_FOISCSI      = 0x30,
        FW_DEVLOG_FACILITY_FOFCOE       = 0x32,
-       FW_DEVLOG_FACILITY_MAX          = 0x32,
+       FW_DEVLOG_FACILITY_CHNET        = 0x34,
+       FW_DEVLOG_FACILITY_MAX          = 0x34,
 };
 
 /* log message format */
@@ -3139,4 +3142,36 @@ struct fw_devlog_cmd {
        (((x) >> FW_DEVLOG_CMD_MEMADDR16_DEVLOG_S) & \
         FW_DEVLOG_CMD_MEMADDR16_DEVLOG_M)
 
+/* P C I E   F W   P F 7   R E G I S T E R */
+
+/* PF7 stores the Firmware Device Log parameters which allows Host Drivers to
+ * access the "devlog" which needing to contact firmware.  The encoding is
+ * mostly the same as that returned by the DEVLOG command except for the size
+ * which is encoded as the number of entries in multiples-1 of 128 here rather
+ * than the memory size as is done in the DEVLOG command.  Thus, 0 means 128
+ * and 15 means 2048.  This of course in turn constrains the allowed values
+ * for the devlog size ...
+ */
+#define PCIE_FW_PF_DEVLOG              7
+
+#define PCIE_FW_PF_DEVLOG_NENTRIES128_S        28
+#define PCIE_FW_PF_DEVLOG_NENTRIES128_M        0xf
+#define PCIE_FW_PF_DEVLOG_NENTRIES128_V(x) \
+       ((x) << PCIE_FW_PF_DEVLOG_NENTRIES128_S)
+#define PCIE_FW_PF_DEVLOG_NENTRIES128_G(x) \
+       (((x) >> PCIE_FW_PF_DEVLOG_NENTRIES128_S) & \
+        PCIE_FW_PF_DEVLOG_NENTRIES128_M)
+
+#define PCIE_FW_PF_DEVLOG_ADDR16_S     4
+#define PCIE_FW_PF_DEVLOG_ADDR16_M     0xffffff
+#define PCIE_FW_PF_DEVLOG_ADDR16_V(x)  ((x) << PCIE_FW_PF_DEVLOG_ADDR16_S)
+#define PCIE_FW_PF_DEVLOG_ADDR16_G(x) \
+       (((x) >> PCIE_FW_PF_DEVLOG_ADDR16_S) & PCIE_FW_PF_DEVLOG_ADDR16_M)
+
+#define PCIE_FW_PF_DEVLOG_MEMTYPE_S    0
+#define PCIE_FW_PF_DEVLOG_MEMTYPE_M    0xf
+#define PCIE_FW_PF_DEVLOG_MEMTYPE_V(x) ((x) << PCIE_FW_PF_DEVLOG_MEMTYPE_S)
+#define PCIE_FW_PF_DEVLOG_MEMTYPE_G(x) \
+       (((x) >> PCIE_FW_PF_DEVLOG_MEMTYPE_S) & PCIE_FW_PF_DEVLOG_MEMTYPE_M)
+
 #endif /* _T4FW_INTERFACE_H_ */
index e2bd3f7..b9d1cba 100644 (file)
 #define __T4FW_VERSION_H__
 
 #define T4FW_VERSION_MAJOR 0x01
-#define T4FW_VERSION_MINOR 0x0C
-#define T4FW_VERSION_MICRO 0x19
+#define T4FW_VERSION_MINOR 0x0D
+#define T4FW_VERSION_MICRO 0x20
 #define T4FW_VERSION_BUILD 0x00
 
 #define T5FW_VERSION_MAJOR 0x01
-#define T5FW_VERSION_MINOR 0x0C
-#define T5FW_VERSION_MICRO 0x19
+#define T5FW_VERSION_MINOR 0x0D
+#define T5FW_VERSION_MICRO 0x20
 #define T5FW_VERSION_BUILD 0x00
 
 #endif
index 0545f0d..e0d7110 100644 (file)
@@ -1004,7 +1004,7 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
                                              ? (tq->pidx - 1)
                                              : (tq->size - 1));
                        __be64 *src = (__be64 *)&tq->desc[index];
-                       __be64 __iomem *dst = (__be64 *)(tq->bar2_addr +
+                       __be64 __iomem *dst = (__be64 __iomem *)(tq->bar2_addr +
                                                         SGE_UDB_WCDOORBELL);
                        unsigned int count = EQ_UNIT / sizeof(__be64);
 
@@ -1018,7 +1018,11 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
                         * DMA.
                         */
                        while (count) {
-                               writeq(*src, dst);
+                               /* the (__force u64) is because the compiler
+                                * doesn't understand the endian swizzling
+                                * going on
+                                */
+                               writeq((__force u64)*src, dst);
                                src++;
                                dst++;
                                count--;
@@ -1252,8 +1256,8 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
        BUG_ON(DIV_ROUND_UP(ETHTXQ_MAX_HDR, TXD_PER_EQ_UNIT) > 1);
        wr = (void *)&txq->q.desc[txq->q.pidx];
        wr->equiq_to_len16 = cpu_to_be32(wr_mid);
-       wr->r3[0] = cpu_to_be64(0);
-       wr->r3[1] = cpu_to_be64(0);
+       wr->r3[0] = cpu_to_be32(0);
+       wr->r3[1] = cpu_to_be32(0);
        skb_copy_from_linear_data(skb, (void *)wr->ethmacdst, fw_hdr_copy_len);
        end = (u64 *)wr + flits;
 
index 1b5506d..280b4a2 100644 (file)
@@ -210,10 +210,10 @@ int t4vf_wr_mbox_core(struct adapter *adapter, const void *cmd, int size,
 
                        if (rpl) {
                                /* request bit in high-order BE word */
-                               WARN_ON((be32_to_cpu(*(const u32 *)cmd)
+                               WARN_ON((be32_to_cpu(*(const __be32 *)cmd)
                                         & FW_CMD_REQUEST_F) == 0);
                                get_mbox_rpl(adapter, rpl, size, mbox_data);
-                               WARN_ON((be32_to_cpu(*(u32 *)rpl)
+                               WARN_ON((be32_to_cpu(*(__be32 *)rpl)
                                         & FW_CMD_REQUEST_F) != 0);
                        }
                        t4_write_reg(adapter, mbox_ctl,
@@ -484,7 +484,7 @@ int t4_bar2_sge_qregs(struct adapter *adapter,
         *  o The BAR2 Queue ID.
         *  o The BAR2 Queue ID Offset into the BAR2 page.
         */
-       bar2_page_offset = ((qid >> qpp_shift) << page_shift);
+       bar2_page_offset = ((u64)(qid >> qpp_shift) << page_shift);
        bar2_qid = qid & qpp_mask;
        bar2_qid_offset = bar2_qid * SGE_UDB_SIZE;
 
index 3b42556..ed41559 100644 (file)
@@ -589,7 +589,7 @@ static void tulip_tx_timeout(struct net_device *dev)
                               (unsigned int)tp->rx_ring[i].buffer1,
                               (unsigned int)tp->rx_ring[i].buffer2,
                               buf[0], buf[1], buf[2]);
-                       for (j = 0; buf[j] != 0xee && j < 1600; j++)
+                       for (j = 0; ((j < 1600) && buf[j] != 0xee); j++)
                                if (j < 100)
                                        pr_cont(" %02x", buf[j]);
                        pr_cont(" j=%d\n", j);
index 27de37a..27b9fe9 100644 (file)
@@ -354,6 +354,7 @@ struct be_vf_cfg {
        u16 vlan_tag;
        u32 tx_rate;
        u32 plink_tracking;
+       u32 privileges;
 };
 
 enum vf_state {
@@ -423,6 +424,7 @@ struct be_adapter {
 
        u8 __iomem *csr;        /* CSR BAR used only for BE2/3 */
        u8 __iomem *db;         /* Door Bell */
+       u8 __iomem *pcicfg;     /* On SH,BEx only. Shadow of PCI config space */
 
        struct mutex mbox_lock; /* For serializing mbox cmds to BE card */
        struct be_dma_mem mbox_mem;
index 36916cf..7f05f30 100644 (file)
@@ -1902,15 +1902,11 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd,
 {
        int num_eqs, i = 0;
 
-       if (lancer_chip(adapter) && num > 8) {
-               while (num) {
-                       num_eqs = min(num, 8);
-                       __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs);
-                       i += num_eqs;
-                       num -= num_eqs;
-               }
-       } else {
-               __be_cmd_modify_eqd(adapter, set_eqd, num);
+       while (num) {
+               num_eqs = min(num, 8);
+               __be_cmd_modify_eqd(adapter, &set_eqd[i], num_eqs);
+               i += num_eqs;
+               num -= num_eqs;
        }
 
        return 0;
@@ -1918,7 +1914,7 @@ int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *set_eqd,
 
 /* Uses sycnhronous mcc */
 int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
-                      u32 num)
+                      u32 num, u32 domain)
 {
        struct be_mcc_wrb *wrb;
        struct be_cmd_req_vlan_config *req;
@@ -1936,6 +1932,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
        be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON,
                               OPCODE_COMMON_NTWK_VLAN_CONFIG, sizeof(*req),
                               wrb, NULL);
+       req->hdr.domain = domain;
 
        req->interface_id = if_id;
        req->untagged = BE_IF_FLAGS_UNTAGGED & be_if_cap_flags(adapter) ? 1 : 0;
index db761e8..a7634a3 100644 (file)
@@ -2256,7 +2256,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter,
 int be_cmd_get_fw_ver(struct be_adapter *adapter);
 int be_cmd_modify_eqd(struct be_adapter *adapter, struct be_set_eqd *, int num);
 int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array,
-                      u32 num);
+                      u32 num, u32 domain);
 int be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 status);
 int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc);
 int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc);
index 0a81685..e6b790f 100644 (file)
@@ -1171,7 +1171,7 @@ static int be_vid_config(struct be_adapter *adapter)
        for_each_set_bit(i, adapter->vids, VLAN_N_VID)
                vids[num++] = cpu_to_le16(i);
 
-       status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num);
+       status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
        if (status) {
                dev_err(dev, "Setting HW VLAN filtering failed\n");
                /* Set to VLAN promisc mode as setting VLAN filter failed */
@@ -1380,11 +1380,67 @@ static int be_get_vf_config(struct net_device *netdev, int vf,
        return 0;
 }
 
+static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
+{
+       struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
+       u16 vids[BE_NUM_VLANS_SUPPORTED];
+       int vf_if_id = vf_cfg->if_handle;
+       int status;
+
+       /* Enable Transparent VLAN Tagging */
+       status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0);
+       if (status)
+               return status;
+
+       /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
+       vids[0] = 0;
+       status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
+       if (!status)
+               dev_info(&adapter->pdev->dev,
+                        "Cleared guest VLANs on VF%d", vf);
+
+       /* After TVT is enabled, disallow VFs to program VLAN filters */
+       if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
+               status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
+                                                 ~BE_PRIV_FILTMGMT, vf + 1);
+               if (!status)
+                       vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
+       }
+       return 0;
+}
+
+static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
+{
+       struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
+       struct device *dev = &adapter->pdev->dev;
+       int status;
+
+       /* Reset Transparent VLAN Tagging. */
+       status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
+                                      vf_cfg->if_handle, 0);
+       if (status)
+               return status;
+
+       /* Allow VFs to program VLAN filtering */
+       if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
+               status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
+                                                 BE_PRIV_FILTMGMT, vf + 1);
+               if (!status) {
+                       vf_cfg->privileges |= BE_PRIV_FILTMGMT;
+                       dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
+               }
+       }
+
+       dev_info(dev,
+                "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
+       return 0;
+}
+
 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
 {
        struct be_adapter *adapter = netdev_priv(netdev);
        struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
-       int status = 0;
+       int status;
 
        if (!sriov_enabled(adapter))
                return -EPERM;
@@ -1394,24 +1450,19 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos)
 
        if (vlan || qos) {
                vlan |= qos << VLAN_PRIO_SHIFT;
-               if (vf_cfg->vlan_tag != vlan)
-                       status = be_cmd_set_hsw_config(adapter, vlan, vf + 1,
-                                                      vf_cfg->if_handle, 0);
+               status = be_set_vf_tvt(adapter, vf, vlan);
        } else {
-               /* Reset Transparent Vlan Tagging. */
-               status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID,
-                                              vf + 1, vf_cfg->if_handle, 0);
+               status = be_clear_vf_tvt(adapter, vf);
        }
 
        if (status) {
                dev_err(&adapter->pdev->dev,
-                       "VLAN %d config on VF %d failed : %#x\n", vlan,
-                       vf, status);
+                       "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
+                       status);
                return be_cmd_status(status);
        }
 
        vf_cfg->vlan_tag = vlan;
-
        return 0;
 }
 
@@ -2772,14 +2823,12 @@ void be_detect_error(struct be_adapter *adapter)
                        }
                }
        } else {
-               pci_read_config_dword(adapter->pdev,
-                                     PCICFG_UE_STATUS_LOW, &ue_lo);
-               pci_read_config_dword(adapter->pdev,
-                                     PCICFG_UE_STATUS_HIGH, &ue_hi);
-               pci_read_config_dword(adapter->pdev,
-                                     PCICFG_UE_STATUS_LOW_MASK, &ue_lo_mask);
-               pci_read_config_dword(adapter->pdev,
-                                     PCICFG_UE_STATUS_HI_MASK, &ue_hi_mask);
+               ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
+               ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
+               ue_lo_mask = ioread32(adapter->pcicfg +
+                                     PCICFG_UE_STATUS_LOW_MASK);
+               ue_hi_mask = ioread32(adapter->pcicfg +
+                                     PCICFG_UE_STATUS_HI_MASK);
 
                ue_lo = (ue_lo & ~ue_lo_mask);
                ue_hi = (ue_hi & ~ue_hi_mask);
@@ -3339,7 +3388,6 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle,
                        u32 cap_flags, u32 vf)
 {
        u32 en_flags;
-       int status;
 
        en_flags = BE_IF_FLAGS_UNTAGGED | BE_IF_FLAGS_BROADCAST |
                   BE_IF_FLAGS_MULTICAST | BE_IF_FLAGS_PASS_L3L4_ERRORS |
@@ -3347,10 +3395,7 @@ static int be_if_create(struct be_adapter *adapter, u32 *if_handle,
 
        en_flags &= cap_flags;
 
-       status = be_cmd_if_create(adapter, cap_flags, en_flags,
-                                 if_handle, vf);
-
-       return status;
+       return be_cmd_if_create(adapter, cap_flags, en_flags, if_handle, vf);
 }
 
 static int be_vfs_if_create(struct be_adapter *adapter)
@@ -3368,8 +3413,13 @@ static int be_vfs_if_create(struct be_adapter *adapter)
                if (!BE3_chip(adapter)) {
                        status = be_cmd_get_profile_config(adapter, &res,
                                                           vf + 1);
-                       if (!status)
+                       if (!status) {
                                cap_flags = res.if_cap_flags;
+                               /* Prevent VFs from enabling VLAN promiscuous
+                                * mode
+                                */
+                               cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
+                       }
                }
 
                status = be_if_create(adapter, &vf_cfg->if_handle,
@@ -3403,7 +3453,6 @@ static int be_vf_setup(struct be_adapter *adapter)
        struct device *dev = &adapter->pdev->dev;
        struct be_vf_cfg *vf_cfg;
        int status, old_vfs, vf;
-       u32 privileges;
 
        old_vfs = pci_num_vf(adapter->pdev);
 
@@ -3433,15 +3482,18 @@ static int be_vf_setup(struct be_adapter *adapter)
 
        for_all_vfs(adapter, vf_cfg, vf) {
                /* Allow VFs to programs MAC/VLAN filters */
-               status = be_cmd_get_fn_privileges(adapter, &privileges, vf + 1);
-               if (!status && !(privileges & BE_PRIV_FILTMGMT)) {
+               status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
+                                                 vf + 1);
+               if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
                        status = be_cmd_set_fn_privileges(adapter,
-                                                         privileges |
+                                                         vf_cfg->privileges |
                                                          BE_PRIV_FILTMGMT,
                                                          vf + 1);
-                       if (!status)
+                       if (!status) {
+                               vf_cfg->privileges |= BE_PRIV_FILTMGMT;
                                dev_info(dev, "VF%d has FILTMGMT privilege\n",
                                         vf);
+                       }
                }
 
                /* Allow full available bandwidth */
@@ -4820,24 +4872,37 @@ static int be_roce_map_pci_bars(struct be_adapter *adapter)
 
 static int be_map_pci_bars(struct be_adapter *adapter)
 {
+       struct pci_dev *pdev = adapter->pdev;
        u8 __iomem *addr;
 
        if (BEx_chip(adapter) && be_physfn(adapter)) {
-               adapter->csr = pci_iomap(adapter->pdev, 2, 0);
+               adapter->csr = pci_iomap(pdev, 2, 0);
                if (!adapter->csr)
                        return -ENOMEM;
        }
 
-       addr = pci_iomap(adapter->pdev, db_bar(adapter), 0);
+       addr = pci_iomap(pdev, db_bar(adapter), 0);
        if (!addr)
                goto pci_map_err;
        adapter->db = addr;
 
+       if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
+               if (be_physfn(adapter)) {
+                       /* PCICFG is the 2nd BAR in BE2 */
+                       addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
+                       if (!addr)
+                               goto pci_map_err;
+                       adapter->pcicfg = addr;
+               } else {
+                       adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
+               }
+       }
+
        be_roce_map_pci_bars(adapter);
        return 0;
 
 pci_map_err:
-       dev_err(&adapter->pdev->dev, "Error in mapping PCI BARs\n");
+       dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
        be_unmap_pci_bars(adapter);
        return -ENOMEM;
 }
index 9bb6220..f6a3a7a 100644 (file)
@@ -1189,13 +1189,12 @@ static void
 fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
 {
        struct  fec_enet_private *fep;
-       struct bufdesc *bdp, *bdp_t;
+       struct bufdesc *bdp;
        unsigned short status;
        struct  sk_buff *skb;
        struct fec_enet_priv_tx_q *txq;
        struct netdev_queue *nq;
        int     index = 0;
-       int     i, bdnum;
        int     entries_free;
 
        fep = netdev_priv(ndev);
@@ -1216,29 +1215,18 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
                if (bdp == txq->cur_tx)
                        break;
 
-               bdp_t = bdp;
-               bdnum = 1;
-               index = fec_enet_get_bd_index(txq->tx_bd_base, bdp_t, fep);
-               skb = txq->tx_skbuff[index];
-               while (!skb) {
-                       bdp_t = fec_enet_get_nextdesc(bdp_t, fep, queue_id);
-                       index = fec_enet_get_bd_index(txq->tx_bd_base, bdp_t, fep);
-                       skb = txq->tx_skbuff[index];
-                       bdnum++;
-               }
-               if (skb_shinfo(skb)->nr_frags &&
-                   (status = bdp_t->cbd_sc) & BD_ENET_TX_READY)
-                       break;
+               index = fec_enet_get_bd_index(txq->tx_bd_base, bdp, fep);
 
-               for (i = 0; i < bdnum; i++) {
-                       if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr))
-                               dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
-                                                bdp->cbd_datlen, DMA_TO_DEVICE);
-                       bdp->cbd_bufaddr = 0;
-                       if (i < bdnum - 1)
-                               bdp = fec_enet_get_nextdesc(bdp, fep, queue_id);
-               }
+               skb = txq->tx_skbuff[index];
                txq->tx_skbuff[index] = NULL;
+               if (!IS_TSO_HEADER(txq, bdp->cbd_bufaddr))
+                       dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
+                                       bdp->cbd_datlen, DMA_TO_DEVICE);
+               bdp->cbd_bufaddr = 0;
+               if (!skb) {
+                       bdp = fec_enet_get_nextdesc(bdp, fep, queue_id);
+                       continue;
+               }
 
                /* Check for errors. */
                if (status & (BD_ENET_TX_HB | BD_ENET_TX_LC |
@@ -1479,8 +1467,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 
                        vlan_packet_rcvd = true;
 
-                       skb_copy_to_linear_data_offset(skb, VLAN_HLEN,
-                                                      data, (2 * ETH_ALEN));
+                       memmove(skb->data + VLAN_HLEN, data, ETH_ALEN * 2);
                        skb_pull(skb, VLAN_HLEN);
                }
 
@@ -1597,7 +1584,7 @@ fec_enet_interrupt(int irq, void *dev_id)
        writel(int_events, fep->hwp + FEC_IEVENT);
        fec_enet_collect_events(fep, int_events);
 
-       if (fep->work_tx || fep->work_rx) {
+       if ((fep->work_tx || fep->work_rx) && fep->link) {
                ret = IRQ_HANDLED;
 
                if (napi_schedule_prep(&fep->napi)) {
@@ -1967,6 +1954,7 @@ static int fec_enet_mii_init(struct platform_device *pdev)
        struct fec_enet_private *fep = netdev_priv(ndev);
        struct device_node *node;
        int err = -ENXIO, i;
+       u32 mii_speed, holdtime;
 
        /*
         * The i.MX28 dual fec interfaces are not equal.
@@ -2004,10 +1992,33 @@ static int fec_enet_mii_init(struct platform_device *pdev)
         * Reference Manual has an error on this, and gets fixed on i.MX6Q
         * document.
         */
-       fep->phy_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 5000000);
+       mii_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 5000000);
        if (fep->quirks & FEC_QUIRK_ENET_MAC)
-               fep->phy_speed--;
-       fep->phy_speed <<= 1;
+               mii_speed--;
+       if (mii_speed > 63) {
+               dev_err(&pdev->dev,
+                       "fec clock (%lu) to fast to get right mii speed\n",
+                       clk_get_rate(fep->clk_ipg));
+               err = -EINVAL;
+               goto err_out;
+       }
+
+       /*
+        * The i.MX28 and i.MX6 types have another filed in the MSCR (aka
+        * MII_SPEED) register that defines the MDIO output hold time. Earlier
+        * versions are RAZ there, so just ignore the difference and write the
+        * register always.
+        * The minimal hold time according to IEE802.3 (clause 22) is 10 ns.
+        * HOLDTIME + 1 is the number of clk cycles the fec is holding the
+        * output.
+        * The HOLDTIME bitfield takes values between 0 and 7 (inclusive).
+        * Given that ceil(clkrate / 5000000) <= 64, the calculation for
+        * holdtime cannot result in a value greater than 3.
+        */
+       holdtime = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 100000000) - 1;
+
+       fep->phy_speed = mii_speed << 1 | holdtime << 8;
+
        writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
        fep->mii_bus = mdiobus_alloc();
@@ -3383,7 +3394,6 @@ fec_drv_remove(struct platform_device *pdev)
                regulator_disable(fep->reg_phy);
        if (fep->ptp_clock)
                ptp_clock_unregister(fep->ptp_clock);
-       fec_enet_clk_enable(ndev, false);
        of_node_put(fep->phy_node);
        free_netdev(ndev);
 
index 178e540..7bf3682 100644 (file)
@@ -747,6 +747,18 @@ static int gfar_parse_group(struct device_node *np,
        return 0;
 }
 
+static int gfar_of_group_count(struct device_node *np)
+{
+       struct device_node *child;
+       int num = 0;
+
+       for_each_available_child_of_node(np, child)
+               if (!of_node_cmp(child->name, "queue-group"))
+                       num++;
+
+       return num;
+}
+
 static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 {
        const char *model;
@@ -784,7 +796,7 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
                num_rx_qs = 1;
        } else { /* MQ_MG_MODE */
                /* get the actual number of supported groups */
-               unsigned int num_grps = of_get_available_child_count(np);
+               unsigned int num_grps = gfar_of_group_count(np);
 
                if (num_grps == 0 || num_grps > MAXGROUPS) {
                        dev_err(&ofdev->dev, "Invalid # of int groups(%d)\n",
@@ -851,7 +863,10 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 
        /* Parse and initialize group specific information */
        if (priv->mode == MQ_MG_MODE) {
-               for_each_child_of_node(np, child) {
+               for_each_available_child_of_node(np, child) {
+                       if (of_node_cmp(child->name, "queue-group"))
+                               continue;
+
                        err = gfar_parse_group(child, priv, model);
                        if (err)
                                goto err_grp_init;
index 357e8b5..56b774d 100644 (file)
@@ -3893,6 +3893,9 @@ static int ucc_geth_probe(struct platform_device* ofdev)
        ugeth->phy_interface = phy_interface;
        ugeth->max_speed = max_speed;
 
+       /* Carrier starts down, phylib will bring it up */
+       netif_carrier_off(dev);
+
        err = register_netdev(dev);
        if (err) {
                if (netif_msg_probe(ugeth))
index 072426a..cd7675a 100644 (file)
@@ -1136,6 +1136,8 @@ restart_poll:
        ibmveth_replenish_task(adapter);
 
        if (frames_processed < budget) {
+               napi_complete(napi);
+
                /* We think we are done - reenable interrupts,
                 * then check once more to make sure we are done.
                 */
@@ -1144,8 +1146,6 @@ restart_poll:
 
                BUG_ON(lpar_rc != H_SUCCESS);
 
-               napi_complete(napi);
-
                if (ibmveth_rxq_pending_buffer(adapter) &&
                    napi_reschedule(napi)) {
                        lpar_rc = h_vio_signal(adapter->vdev->unit_address,
index 96208f1..2db6532 100644 (file)
@@ -2658,16 +2658,11 @@ static int mvneta_stop(struct net_device *dev)
 static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
        struct mvneta_port *pp = netdev_priv(dev);
-       int ret;
 
        if (!pp->phy_dev)
                return -ENOTSUPP;
 
-       ret = phy_mii_ioctl(pp->phy_dev, ifr, cmd);
-       if (!ret)
-               mvneta_adjust_link(dev);
-
-       return ret;
+       return phy_mii_ioctl(pp->phy_dev, ifr, cmd);
 }
 
 /* Ethtool methods */
index a681d7c..546ca42 100644 (file)
@@ -724,7 +724,8 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
                 * on the host, we deprecate the error message for this
                 * specific command/input_mod/opcode_mod/fw-status to be debug.
                 */
-               if (op == MLX4_CMD_SET_PORT && in_modifier == 1 &&
+               if (op == MLX4_CMD_SET_PORT &&
+                   (in_modifier == 1 || in_modifier == 2) &&
                    op_modifier == 0 && context->fw_status == CMD_STAT_BAD_SIZE)
                        mlx4_dbg(dev, "command 0x%x failed: fw status = 0x%x\n",
                                 op, context->fw_status);
@@ -1993,7 +1994,6 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
                        goto reset_slave;
                slave_state[slave].vhcr_dma = ((u64) param) << 48;
                priv->mfunc.master.slave_state[slave].cookie = 0;
-               mutex_init(&priv->mfunc.master.gen_eqe_mutex[slave]);
                break;
        case MLX4_COMM_CMD_VHCR1:
                if (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR0)
@@ -2225,6 +2225,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
                for (i = 0; i < dev->num_slaves; ++i) {
                        s_state = &priv->mfunc.master.slave_state[i];
                        s_state->last_cmd = MLX4_COMM_CMD_RESET;
+                       mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]);
                        for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j)
                                s_state->event_eq[j].eqn = -1;
                        __raw_writel((__force u32) 0,
index 2a210c4..3485acf 100644 (file)
@@ -1698,8 +1698,6 @@ int mlx4_en_start_port(struct net_device *dev)
        /* Schedule multicast task to populate multicast list */
        queue_work(mdev->workqueue, &priv->rx_mode_task);
 
-       mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap);
-
 #ifdef CONFIG_MLX4_EN_VXLAN
        if (priv->mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN)
                vxlan_get_rx_port(dev);
@@ -2807,13 +2805,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        netif_carrier_off(dev);
        mlx4_en_set_default_moderation(priv);
 
-       err = register_netdev(dev);
-       if (err) {
-               en_err(priv, "Netdev registration failed for port %d\n", port);
-               goto out;
-       }
-       priv->registered = 1;
-
        en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num);
        en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
 
@@ -2853,6 +2844,16 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
                queue_delayed_work(mdev->workqueue, &priv->service_task,
                                   SERVICE_TASK_DELAY);
 
+       mlx4_set_stats_bitmap(mdev->dev, &priv->stats_bitmap);
+
+       err = register_netdev(dev);
+       if (err) {
+               en_err(priv, "Netdev registration failed for port %d\n", port);
+               goto out;
+       }
+
+       priv->registered = 1;
+
        return 0;
 
 out:
index 264bc15..6e70ffe 100644 (file)
@@ -153,12 +153,10 @@ void mlx4_gen_slave_eqe(struct work_struct *work)
 
                /* All active slaves need to receive the event */
                if (slave == ALL_SLAVES) {
-                       for (i = 0; i < dev->num_slaves; i++) {
-                               if (i != dev->caps.function &&
-                                   master->slave_state[i].active)
-                                       if (mlx4_GEN_EQE(dev, i, eqe))
-                                               mlx4_warn(dev, "Failed to generate event for slave %d\n",
-                                                         i);
+                       for (i = 0; i <= dev->persist->num_vfs; i++) {
+                               if (mlx4_GEN_EQE(dev, i, eqe))
+                                       mlx4_warn(dev, "Failed to generate event for slave %d\n",
+                                                 i);
                        }
                } else {
                        if (mlx4_GEN_EQE(dev, slave, eqe))
@@ -203,13 +201,11 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave,
                             struct mlx4_eqe *eqe)
 {
        struct mlx4_priv *priv = mlx4_priv(dev);
-       struct mlx4_slave_state *s_slave =
-               &priv->mfunc.master.slave_state[slave];
 
-       if (!s_slave->active) {
-               /*mlx4_warn(dev, "Trying to pass event to inactive slave\n");*/
+       if (slave < 0 || slave > dev->persist->num_vfs ||
+           slave == dev->caps.function ||
+           !priv->mfunc.master.slave_state[slave].active)
                return;
-       }
 
        slave_event(dev, slave, eqe);
 }
index 2a8268e..ebbe244 100644 (file)
@@ -453,7 +453,7 @@ struct mlx4_en_port_stats {
        unsigned long rx_chksum_none;
        unsigned long rx_chksum_complete;
        unsigned long tx_chksum_offload;
-#define NUM_PORT_STATS         9
+#define NUM_PORT_STATS         10
 };
 
 struct mlx4_en_perf_stats {
index d97ca88..6e413ac 100644 (file)
@@ -3095,6 +3095,12 @@ int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe)
        if (!priv->mfunc.master.slave_state)
                return -EINVAL;
 
+       /* check for slave valid, slave not PF, and slave active */
+       if (slave < 0 || slave > dev->persist->num_vfs ||
+           slave == dev->caps.function ||
+           !priv->mfunc.master.slave_state[slave].active)
+               return 0;
+
        event_eq = &priv->mfunc.master.slave_state[slave].event_eq[eqe->type];
 
        /* Create the event only if the slave is registered */
index 9fb6948..5cecec2 100644 (file)
@@ -4468,10 +4468,16 @@ static int rocker_port_master_changed(struct net_device *dev)
        struct net_device *master = netdev_master_upper_dev_get(dev);
        int err = 0;
 
+       /* There are currently three cases handled here:
+        * 1. Joining a bridge
+        * 2. Leaving a previously joined bridge
+        * 3. Other, e.g. being added to or removed from a bond or openvswitch,
+        *    in which case nothing is done
+        */
        if (master && master->rtnl_link_ops &&
            !strcmp(master->rtnl_link_ops->kind, "bridge"))
                err = rocker_port_bridge_join(rocker_port, master);
-       else
+       else if (rocker_port_is_bridged(rocker_port))
                err = rocker_port_bridge_leave(rocker_port);
 
        return err;
index 209ee1b..8678e39 100644 (file)
@@ -92,6 +92,7 @@ static const char version[] =
 #include "smc91x.h"
 
 #if defined(CONFIG_ASSABET_NEPONSET)
+#include <mach/assabet.h>
 #include <mach/neponset.h>
 #endif
 
@@ -2247,10 +2248,9 @@ static int smc_drv_probe(struct platform_device *pdev)
        const struct of_device_id *match = NULL;
        struct smc_local *lp;
        struct net_device *ndev;
-       struct resource *res;
+       struct resource *res, *ires;
        unsigned int __iomem *addr;
        unsigned long irq_flags = SMC_IRQ_FLAGS;
-       unsigned long irq_resflags;
        int ret;
 
        ndev = alloc_etherdev(sizeof(struct smc_local));
@@ -2342,19 +2342,16 @@ static int smc_drv_probe(struct platform_device *pdev)
                goto out_free_netdev;
        }
 
-       ndev->irq = platform_get_irq(pdev, 0);
-       if (ndev->irq <= 0) {
+       ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+       if (!ires) {
                ret = -ENODEV;
                goto out_release_io;
        }
-       /*
-        * If this platform does not specify any special irqflags, or if
-        * the resource supplies a trigger, override the irqflags with
-        * the trigger flags from the resource.
-        */
-       irq_resflags = irqd_get_trigger_type(irq_get_irq_data(ndev->irq));
-       if (irq_flags == -1 || irq_resflags & IRQF_TRIGGER_MASK)
-               irq_flags = irq_resflags & IRQF_TRIGGER_MASK;
+
+       ndev->irq = ires->start;
+
+       if (irq_flags == -1 || ires->flags & IRQF_TRIGGER_MASK)
+               irq_flags = ires->flags & IRQF_TRIGGER_MASK;
 
        ret = smc_request_attrib(pdev, ndev);
        if (ret)
index fb846eb..f9b42f1 100644 (file)
@@ -272,6 +272,37 @@ static int stmmac_pltfr_probe(struct platform_device *pdev)
        struct stmmac_priv *priv = NULL;
        struct plat_stmmacenet_data *plat_dat = NULL;
        const char *mac = NULL;
+       int irq, wol_irq, lpi_irq;
+
+       /* Get IRQ information early to have an ability to ask for deferred
+        * probe if needed before we went too far with resource allocation.
+        */
+       irq = platform_get_irq_byname(pdev, "macirq");
+       if (irq < 0) {
+               if (irq != -EPROBE_DEFER) {
+                       dev_err(dev,
+                               "MAC IRQ configuration information not found\n");
+               }
+               return irq;
+       }
+
+       /* On some platforms e.g. SPEAr the wake up irq differs from the mac irq
+        * The external wake up irq can be passed through the platform code
+        * named as "eth_wake_irq"
+        *
+        * In case the wake up interrupt is not passed from the platform
+        * so the driver will continue to use the mac irq (ndev->irq)
+        */
+       wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq");
+       if (wol_irq < 0) {
+               if (wol_irq == -EPROBE_DEFER)
+                       return -EPROBE_DEFER;
+               wol_irq = irq;
+       }
+
+       lpi_irq = platform_get_irq_byname(pdev, "eth_lpi");
+       if (lpi_irq == -EPROBE_DEFER)
+               return -EPROBE_DEFER;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
        addr = devm_ioremap_resource(dev, res);
@@ -323,39 +354,15 @@ static int stmmac_pltfr_probe(struct platform_device *pdev)
                return PTR_ERR(priv);
        }
 
+       /* Copy IRQ values to priv structure which is now avaialble */
+       priv->dev->irq = irq;
+       priv->wol_irq = wol_irq;
+       priv->lpi_irq = lpi_irq;
+
        /* Get MAC address if available (DT) */
        if (mac)
                memcpy(priv->dev->dev_addr, mac, ETH_ALEN);
 
-       /* Get the MAC information */
-       priv->dev->irq = platform_get_irq_byname(pdev, "macirq");
-       if (priv->dev->irq < 0) {
-               if (priv->dev->irq != -EPROBE_DEFER) {
-                       netdev_err(priv->dev,
-                                  "MAC IRQ configuration information not found\n");
-               }
-               return priv->dev->irq;
-       }
-
-       /*
-        * On some platforms e.g. SPEAr the wake up irq differs from the mac irq
-        * The external wake up irq can be passed through the platform code
-        * named as "eth_wake_irq"
-        *
-        * In case the wake up interrupt is not passed from the platform
-        * so the driver will continue to use the mac irq (ndev->irq)
-        */
-       priv->wol_irq = platform_get_irq_byname(pdev, "eth_wake_irq");
-       if (priv->wol_irq < 0) {
-               if (priv->wol_irq == -EPROBE_DEFER)
-                       return -EPROBE_DEFER;
-               priv->wol_irq = priv->dev->irq;
-       }
-
-       priv->lpi_irq = platform_get_irq_byname(pdev, "eth_lpi");
-       if (priv->lpi_irq == -EPROBE_DEFER)
-               return -EPROBE_DEFER;
-
        platform_set_drvdata(pdev, priv->dev);
 
        pr_debug("STMMAC platform driver registration completed");
index a495931..0e0fbb5 100644 (file)
@@ -498,9 +498,9 @@ static int w5100_napi_poll(struct napi_struct *napi, int budget)
        }
 
        if (rx_count < budget) {
+               napi_complete(napi);
                w5100_write(priv, W5100_IMR, IR_S0);
                mmiowb();
-               napi_complete(napi);
        }
 
        return rx_count;
index 09322d9..4b31000 100644 (file)
@@ -418,9 +418,9 @@ static int w5300_napi_poll(struct napi_struct *napi, int budget)
        }
 
        if (rx_count < budget) {
+               napi_complete(napi);
                w5300_write(priv, W5300_IMR, IR_S0);
                mmiowb();
-               napi_complete(napi);
        }
 
        return rx_count;
index 924ea98..54549a6 100644 (file)
@@ -114,7 +114,9 @@ unsigned int ipvlan_mac_hash(const unsigned char *addr);
 rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb);
 int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev);
 void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr);
-bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6);
+struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
+                                  const void *iaddr, bool is_v6);
+bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6);
 struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
                                        const void *iaddr, bool is_v6);
 void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync);
index 2a17500..b7877a1 100644 (file)
@@ -81,19 +81,20 @@ void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
        hash = (addr->atype == IPVL_IPV6) ?
               ipvlan_get_v6_hash(&addr->ip6addr) :
               ipvlan_get_v4_hash(&addr->ip4addr);
-       hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
+       if (hlist_unhashed(&addr->hlnode))
+               hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
 }
 
 void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync)
 {
-       hlist_del_rcu(&addr->hlnode);
+       hlist_del_init_rcu(&addr->hlnode);
        if (sync)
                synchronize_rcu();
 }
 
-bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
+struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan,
+                                  const void *iaddr, bool is_v6)
 {
-       struct ipvl_port *port = ipvlan->port;
        struct ipvl_addr *addr;
 
        list_for_each_entry(addr, &ipvlan->addrs, anode) {
@@ -101,12 +102,21 @@ bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
                    ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
                    (!is_v6 && addr->atype == IPVL_IPV4 &&
                    addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
-                       return true;
+                       return addr;
        }
+       return NULL;
+}
+
+bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6)
+{
+       struct ipvl_dev *ipvlan;
 
-       if (ipvlan_ht_addr_lookup(port, iaddr, is_v6))
-               return true;
+       ASSERT_RTNL();
 
+       list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+               if (ipvlan_find_addr(ipvlan, iaddr, is_v6))
+                       return true;
+       }
        return false;
 }
 
@@ -192,7 +202,8 @@ static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb,
        if (skb->protocol == htons(ETH_P_PAUSE))
                return;
 
-       list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+       rcu_read_lock();
+       list_for_each_entry_rcu(ipvlan, &port->ipvlans, pnode) {
                if (local && (ipvlan == in_dev))
                        continue;
 
@@ -219,6 +230,7 @@ static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb,
 mcast_acct:
                ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
        }
+       rcu_read_unlock();
 
        /* Locally generated? ...Forward a copy to the main-device as
         * well. On the RX side we'll ignore it (wont give it to any
index 4f4099d..4fa1420 100644 (file)
@@ -505,7 +505,7 @@ static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
        if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) {
                list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
                        ipvlan_ht_addr_del(addr, !dev->dismantle);
-                       list_del_rcu(&addr->anode);
+                       list_del(&addr->anode);
                }
        }
        list_del_rcu(&ipvlan->pnode);
@@ -607,7 +607,7 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
        struct ipvl_addr *addr;
 
-       if (ipvlan_addr_busy(ipvlan, ip6_addr, true)) {
+       if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
                netif_err(ipvlan, ifup, ipvlan->dev,
                          "Failed to add IPv6=%pI6c addr for %s intf\n",
                          ip6_addr, ipvlan->dev->name);
@@ -620,9 +620,13 @@ static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
        addr->master = ipvlan;
        memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr));
        addr->atype = IPVL_IPV6;
-       list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
+       list_add_tail(&addr->anode, &ipvlan->addrs);
        ipvlan->ipv6cnt++;
-       ipvlan_ht_addr_add(ipvlan, addr);
+       /* If the interface is not up, the address will be added to the hash
+        * list by ipvlan_open.
+        */
+       if (netif_running(ipvlan->dev))
+               ipvlan_ht_addr_add(ipvlan, addr);
 
        return 0;
 }
@@ -631,12 +635,12 @@ static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
 {
        struct ipvl_addr *addr;
 
-       addr = ipvlan_ht_addr_lookup(ipvlan->port, ip6_addr, true);
+       addr = ipvlan_find_addr(ipvlan, ip6_addr, true);
        if (!addr)
                return;
 
        ipvlan_ht_addr_del(addr, true);
-       list_del_rcu(&addr->anode);
+       list_del(&addr->anode);
        ipvlan->ipv6cnt--;
        WARN_ON(ipvlan->ipv6cnt < 0);
        kfree_rcu(addr, rcu);
@@ -675,7 +679,7 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
        struct ipvl_addr *addr;
 
-       if (ipvlan_addr_busy(ipvlan, ip4_addr, false)) {
+       if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
                netif_err(ipvlan, ifup, ipvlan->dev,
                          "Failed to add IPv4=%pI4 on %s intf.\n",
                          ip4_addr, ipvlan->dev->name);
@@ -688,9 +692,13 @@ static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
        addr->master = ipvlan;
        memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr));
        addr->atype = IPVL_IPV4;
-       list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
+       list_add_tail(&addr->anode, &ipvlan->addrs);
        ipvlan->ipv4cnt++;
-       ipvlan_ht_addr_add(ipvlan, addr);
+       /* If the interface is not up, the address will be added to the hash
+        * list by ipvlan_open.
+        */
+       if (netif_running(ipvlan->dev))
+               ipvlan_ht_addr_add(ipvlan, addr);
        ipvlan_set_broadcast_mac_filter(ipvlan, true);
 
        return 0;
@@ -700,12 +708,12 @@ static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
 {
        struct ipvl_addr *addr;
 
-       addr = ipvlan_ht_addr_lookup(ipvlan->port, ip4_addr, false);
+       addr = ipvlan_find_addr(ipvlan, ip4_addr, false);
        if (!addr)
                return;
 
        ipvlan_ht_addr_del(addr, true);
-       list_del_rcu(&addr->anode);
+       list_del(&addr->anode);
        ipvlan->ipv4cnt--;
        WARN_ON(ipvlan->ipv4cnt < 0);
        if (!ipvlan->ipv4cnt)
index f1ee71e..7d39484 100644 (file)
@@ -1730,11 +1730,11 @@ static int team_set_mac_address(struct net_device *dev, void *p)
        if (dev->type == ARPHRD_ETHER && !is_valid_ether_addr(addr->sa_data))
                return -EADDRNOTAVAIL;
        memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
-       rcu_read_lock();
-       list_for_each_entry_rcu(port, &team->port_list, list)
+       mutex_lock(&team->lock);
+       list_for_each_entry(port, &team->port_list, list)
                if (team->ops.port_change_dev_addr)
                        team->ops.port_change_dev_addr(team, port);
-       rcu_read_unlock();
+       mutex_unlock(&team->lock);
        return 0;
 }
 
index 5c55f11..75d6f26 100644 (file)
@@ -188,6 +188,8 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
                memcpy(skb_tail_pointer(skb), &padbytes, sizeof(padbytes));
                skb_put(skb, sizeof(padbytes));
        }
+
+       usbnet_set_skb_tx_stats(skb, 1, 0);
        return skb;
 }
 
index 9311a08..4545e78 100644 (file)
@@ -522,6 +522,7 @@ static const struct driver_info wwan_info = {
 #define DELL_VENDOR_ID         0x413C
 #define REALTEK_VENDOR_ID      0x0bda
 #define SAMSUNG_VENDOR_ID      0x04e8
+#define LENOVO_VENDOR_ID       0x17ef
 
 static const struct usb_device_id      products[] = {
 /* BLACKLIST !!
@@ -702,6 +703,13 @@ static const struct usb_device_id  products[] = {
        .driver_info = 0,
 },
 
+/* Lenovo Thinkpad USB 3.0 Ethernet Adapters (based on Realtek RTL8153) */
+{
+       USB_DEVICE_AND_INTERFACE_INFO(LENOVO_VENDOR_ID, 0x7205, USB_CLASS_COMM,
+                       USB_CDC_SUBCLASS_ETHERNET, USB_CDC_PROTO_NONE),
+       .driver_info = 0,
+},
+
 /* WHITELIST!!!
  *
  * CDC Ether uses two interfaces, not necessarily consecutive.
index 80a844e..c3e4da9 100644 (file)
@@ -1172,17 +1172,17 @@ cdc_ncm_fill_tx_frame(struct usbnet *dev, struct sk_buff *skb, __le32 sign)
 
        /* return skb */
        ctx->tx_curr_skb = NULL;
-       dev->net->stats.tx_packets += ctx->tx_curr_frame_num;
 
        /* keep private stats: framing overhead and number of NTBs */
        ctx->tx_overhead += skb_out->len - ctx->tx_curr_frame_payload;
        ctx->tx_ntbs++;
 
-       /* usbnet has already counted all the framing overhead.
+       /* usbnet will count all the framing overhead by default.
         * Adjust the stats so that the tx_bytes counter show real
         * payload data instead.
         */
-       dev->net->stats.tx_bytes -= skb_out->len - ctx->tx_curr_frame_payload;
+       usbnet_set_skb_tx_stats(skb_out, n,
+                               ctx->tx_curr_frame_payload - skb_out->len);
 
        return skb_out;
 
index 3eed708..1762ad3 100644 (file)
@@ -46,8 +46,7 @@ enum cx82310_status {
 };
 
 #define CMD_PACKET_SIZE        64
-/* first command after power on can take around 8 seconds */
-#define CMD_TIMEOUT    15000
+#define CMD_TIMEOUT    100
 #define CMD_REPLY_RETRY 5
 
 #define CX82310_MTU    1514
@@ -78,8 +77,9 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply,
        ret = usb_bulk_msg(udev, usb_sndbulkpipe(udev, CMD_EP), buf,
                           CMD_PACKET_SIZE, &actual_len, CMD_TIMEOUT);
        if (ret < 0) {
-               dev_err(&dev->udev->dev, "send command %#x: error %d\n",
-                       cmd, ret);
+               if (cmd != CMD_GET_LINK_STATUS)
+                       dev_err(&dev->udev->dev, "send command %#x: error %d\n",
+                               cmd, ret);
                goto end;
        }
 
@@ -90,8 +90,10 @@ static int cx82310_cmd(struct usbnet *dev, enum cx82310_cmd cmd, bool reply,
                                           buf, CMD_PACKET_SIZE, &actual_len,
                                           CMD_TIMEOUT);
                        if (ret < 0) {
-                               dev_err(&dev->udev->dev,
-                                       "reply receive error %d\n", ret);
+                               if (cmd != CMD_GET_LINK_STATUS)
+                                       dev_err(&dev->udev->dev,
+                                               "reply receive error %d\n",
+                                               ret);
                                goto end;
                        }
                        if (actual_len > 0)
@@ -134,6 +136,8 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf)
        int ret;
        char buf[15];
        struct usb_device *udev = dev->udev;
+       u8 link[3];
+       int timeout = 50;
 
        /* avoid ADSL modems - continue only if iProduct is "USB NET CARD" */
        if (usb_string(udev, udev->descriptor.iProduct, buf, sizeof(buf)) > 0
@@ -160,6 +164,20 @@ static int cx82310_bind(struct usbnet *dev, struct usb_interface *intf)
        if (!dev->partial_data)
                return -ENOMEM;
 
+       /* wait for firmware to become ready (indicated by the link being up) */
+       while (--timeout) {
+               ret = cx82310_cmd(dev, CMD_GET_LINK_STATUS, true, NULL, 0,
+                                 link, sizeof(link));
+               /* the command can time out during boot - it's not an error */
+               if (!ret && link[0] == 1 && link[2] == 1)
+                       break;
+               msleep(500);
+       };
+       if (!timeout) {
+               dev_err(&udev->dev, "firmware not ready in time\n");
+               return -ETIMEDOUT;
+       }
+
        /* enable ethernet mode (?) */
        ret = cx82310_cmd(dev, CMD_ETHERNET_MODE, true, "\x01", 1, NULL, 0);
        if (ret) {
@@ -300,9 +318,18 @@ static const struct driver_info    cx82310_info = {
        .tx_fixup       = cx82310_tx_fixup,
 };
 
+#define USB_DEVICE_CLASS(vend, prod, cl, sc, pr) \
+       .match_flags = USB_DEVICE_ID_MATCH_DEVICE | \
+                      USB_DEVICE_ID_MATCH_DEV_INFO, \
+       .idVendor = (vend), \
+       .idProduct = (prod), \
+       .bDeviceClass = (cl), \
+       .bDeviceSubClass = (sc), \
+       .bDeviceProtocol = (pr)
+
 static const struct usb_device_id products[] = {
        {
-               USB_DEVICE_AND_INTERFACE_INFO(0x0572, 0xcb01, 0xff, 0, 0),
+               USB_DEVICE_CLASS(0x0572, 0xcb01, 0xff, 0, 0),
                .driver_info = (unsigned long) &cx82310_info
        },
        { },
index 438fc6b..9f7c0ab 100644 (file)
@@ -492,6 +492,7 @@ enum rtl8152_flags {
 /* Define these values to match your device */
 #define VENDOR_ID_REALTEK              0x0bda
 #define VENDOR_ID_SAMSUNG              0x04e8
+#define VENDOR_ID_LENOVO               0x17ef
 
 #define MCU_TYPE_PLA                   0x0100
 #define MCU_TYPE_USB                   0x0000
@@ -4037,6 +4038,7 @@ static struct usb_device_id rtl8152_table[] = {
        {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8152)},
        {REALTEK_USB_DEVICE(VENDOR_ID_REALTEK, 0x8153)},
        {REALTEK_USB_DEVICE(VENDOR_ID_SAMSUNG, 0xa101)},
+       {REALTEK_USB_DEVICE(VENDOR_ID_LENOVO,  0x7205)},
        {}
 };
 
index b94a0fb..953de13 100644 (file)
@@ -144,6 +144,7 @@ static struct sk_buff *sr_tx_fixup(struct usbnet *dev, struct sk_buff *skb,
                skb_put(skb, sizeof(padbytes));
        }
 
+       usbnet_set_skb_tx_stats(skb, 1, 0);
        return skb;
 }
 
index 449835f..777757a 100644 (file)
@@ -1188,8 +1188,7 @@ static void tx_complete (struct urb *urb)
        struct usbnet           *dev = entry->dev;
 
        if (urb->status == 0) {
-               if (!(dev->driver_info->flags & FLAG_MULTI_PACKET))
-                       dev->net->stats.tx_packets++;
+               dev->net->stats.tx_packets += entry->packets;
                dev->net->stats.tx_bytes += entry->length;
        } else {
                dev->net->stats.tx_errors++;
@@ -1347,7 +1346,19 @@ netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
                } else
                        urb->transfer_flags |= URB_ZERO_PACKET;
        }
-       entry->length = urb->transfer_buffer_length = length;
+       urb->transfer_buffer_length = length;
+
+       if (info->flags & FLAG_MULTI_PACKET) {
+               /* Driver has set number of packets and a length delta.
+                * Calculate the complete length and ensure that it's
+                * positive.
+                */
+               entry->length += length;
+               if (WARN_ON_ONCE(entry->length <= 0))
+                       entry->length = length;
+       } else {
+               usbnet_set_skb_tx_stats(skb, 1, length);
+       }
 
        spin_lock_irqsave(&dev->txq.lock, flags);
        retval = usb_autopm_get_interface_async(dev->intf);
index f1ff366..59b0e97 100644 (file)
@@ -1448,8 +1448,10 @@ static void virtnet_free_queues(struct virtnet_info *vi)
 {
        int i;
 
-       for (i = 0; i < vi->max_queue_pairs; i++)
+       for (i = 0; i < vi->max_queue_pairs; i++) {
+               napi_hash_del(&vi->rq[i].napi);
                netif_napi_del(&vi->rq[i].napi);
+       }
 
        kfree(vi->rq);
        kfree(vi->sq);
@@ -1948,11 +1950,8 @@ static int virtnet_freeze(struct virtio_device *vdev)
        cancel_delayed_work_sync(&vi->refill);
 
        if (netif_running(vi->dev)) {
-               for (i = 0; i < vi->max_queue_pairs; i++) {
+               for (i = 0; i < vi->max_queue_pairs; i++)
                        napi_disable(&vi->rq[i].napi);
-                       napi_hash_del(&vi->rq[i].napi);
-                       netif_napi_del(&vi->rq[i].napi);
-               }
        }
 
        remove_vq_common(vi);
index 1e0a775..f8528a4 100644 (file)
@@ -1218,7 +1218,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
                        goto drop;
 
                flags &= ~VXLAN_HF_RCO;
-               vni &= VXLAN_VID_MASK;
+               vni &= VXLAN_VNI_MASK;
        }
 
        /* For backwards compatibility, only allow reserved fields to be
@@ -1239,7 +1239,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
                flags &= ~VXLAN_GBP_USED_BITS;
        }
 
-       if (flags || (vni & ~VXLAN_VID_MASK)) {
+       if (flags || vni & ~VXLAN_VNI_MASK) {
                /* If there are any unprocessed flags remaining treat
                 * this as a malformed packet. This behavior diverges from
                 * VXLAN RFC (RFC7348) which stipulates that bits in reserved
index cb366ad..f50a6bc 100644 (file)
@@ -219,12 +219,15 @@ void ath9k_beacon_remove_slot(struct ath_softc *sc, struct ieee80211_vif *vif)
        struct ath_common *common = ath9k_hw_common(sc->sc_ah);
        struct ath_vif *avp = (void *)vif->drv_priv;
        struct ath_buf *bf = avp->av_bcbuf;
+       struct ath_beacon_config *cur_conf = &sc->cur_chan->beacon;
 
        ath_dbg(common, CONFIG, "Removing interface at beacon slot: %d\n",
                avp->av_bslot);
 
        tasklet_disable(&sc->bcon_tasklet);
 
+       cur_conf->enable_beacon &= ~BIT(avp->av_bslot);
+
        if (bf && bf->bf_mpdu) {
                struct sk_buff *skb = bf->bf_mpdu;
                dma_unmap_single(sc->dev, bf->bf_buf_addr,
@@ -521,8 +524,7 @@ static bool ath9k_allow_beacon_config(struct ath_softc *sc,
        }
 
        if (sc->sc_ah->opmode == NL80211_IFTYPE_AP) {
-               if ((vif->type != NL80211_IFTYPE_AP) ||
-                   (sc->nbcnvifs > 1)) {
+               if (vif->type != NL80211_IFTYPE_AP) {
                        ath_dbg(common, CONFIG,
                                "An AP interface is already present !\n");
                        return false;
@@ -616,12 +618,14 @@ void ath9k_beacon_config(struct ath_softc *sc, struct ieee80211_vif *vif,
         * enabling/disabling SWBA.
         */
        if (changed & BSS_CHANGED_BEACON_ENABLED) {
-               if (!bss_conf->enable_beacon &&
-                   (sc->nbcnvifs <= 1)) {
-                       cur_conf->enable_beacon = false;
-               } else if (bss_conf->enable_beacon) {
-                       cur_conf->enable_beacon = true;
-                       ath9k_cache_beacon_config(sc, ctx, bss_conf);
+               bool enabled = cur_conf->enable_beacon;
+
+               if (!bss_conf->enable_beacon) {
+                       cur_conf->enable_beacon &= ~BIT(avp->av_bslot);
+               } else {
+                       cur_conf->enable_beacon |= BIT(avp->av_bslot);
+                       if (!enabled)
+                               ath9k_cache_beacon_config(sc, ctx, bss_conf);
                }
        }
 
index 2b79a56..d237373 100644 (file)
@@ -54,7 +54,7 @@ struct ath_beacon_config {
        u16 dtim_period;
        u16 bmiss_timeout;
        u8 dtim_count;
-       bool enable_beacon;
+       u8 enable_beacon;
        bool ibss_creator;
        u32 nexttbtt;
        u32 intval;
index 60aa8d7..8529014 100644 (file)
@@ -424,7 +424,7 @@ static void ath9k_hw_init_defaults(struct ath_hw *ah)
        ah->power_mode = ATH9K_PM_UNDEFINED;
        ah->htc_reset_init = true;
 
-       ah->tpc_enabled = true;
+       ah->tpc_enabled = false;
 
        ah->ani_function = ATH9K_ANI_ALL;
        if (!AR_SREV_9300_20_OR_LATER(ah))
index ccbdb05..75345c1 100644 (file)
@@ -5370,6 +5370,7 @@ static void b43_supported_bands(struct b43_wldev *dev, bool *have_2ghz_phy,
        case 0x432a: /* BCM4321 */
        case 0x432d: /* BCM4322 */
        case 0x4352: /* BCM43222 */
+       case 0x435a: /* BCM43228 */
        case 0x4333: /* BCM4331 */
        case 0x43a2: /* BCM4360 */
        case 0x43b3: /* BCM4352 */
index defb7a4..7748a1c 100644 (file)
@@ -126,7 +126,8 @@ void brcmf_feat_attach(struct brcmf_pub *drvr)
        brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_MCHAN, "mchan");
        if (drvr->bus_if->wowl_supported)
                brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_WOWL, "wowl");
-       brcmf_feat_iovar_int_set(ifp, BRCMF_FEAT_MBSS, "mbss", 0);
+       if (drvr->bus_if->chip != BRCM_CC_43362_CHIP_ID)
+               brcmf_feat_iovar_int_set(ifp, BRCMF_FEAT_MBSS, "mbss", 0);
 
        /* set chip related quirks */
        switch (drvr->bus_if->chip) {
index 50cdf70..8eff275 100644 (file)
@@ -39,13 +39,22 @@ static int brcmf_cfg80211_vndr_cmds_dcmd_handler(struct wiphy *wiphy,
        void *dcmd_buf = NULL, *wr_pointer;
        u16 msglen, maxmsglen = PAGE_SIZE - 0x100;
 
-       brcmf_dbg(TRACE, "cmd %x set %d len %d\n", cmdhdr->cmd, cmdhdr->set,
-                 cmdhdr->len);
+       if (len < sizeof(*cmdhdr)) {
+               brcmf_err("vendor command too short: %d\n", len);
+               return -EINVAL;
+       }
 
        vif = container_of(wdev, struct brcmf_cfg80211_vif, wdev);
        ifp = vif->ifp;
 
-       len -= sizeof(struct brcmf_vndr_dcmd_hdr);
+       brcmf_dbg(TRACE, "ifidx=%d, cmd=%d\n", ifp->ifidx, cmdhdr->cmd);
+
+       if (cmdhdr->offset > len) {
+               brcmf_err("bad buffer offset %d > %d\n", cmdhdr->offset, len);
+               return -EINVAL;
+       }
+
+       len -= cmdhdr->offset;
        ret_len = cmdhdr->len;
        if (ret_len > 0 || len > 0) {
                if (len > BRCMF_DCMD_MAXLEN) {
index a6f22c3..3811878 100644 (file)
@@ -708,7 +708,6 @@ struct iwl_priv {
        unsigned long reload_jiffies;
        int reload_count;
        bool ucode_loaded;
-       bool init_ucode_run;            /* Don't run init uCode again */
 
        u8 plcp_delta_threshold;
 
index 47e64e8..cceb026 100644 (file)
@@ -1114,16 +1114,17 @@ static void iwlagn_mac_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
        scd_queues &= ~(BIT(IWL_IPAN_CMD_QUEUE_NUM) |
                        BIT(IWL_DEFAULT_CMD_QUEUE_NUM));
 
-       if (vif)
-               scd_queues &= ~BIT(vif->hw_queue[IEEE80211_AC_VO]);
-
-       IWL_DEBUG_TX_QUEUES(priv, "Flushing SCD queues: 0x%x\n", scd_queues);
-       if (iwlagn_txfifo_flush(priv, scd_queues)) {
-               IWL_ERR(priv, "flush request fail\n");
-               goto done;
+       if (drop) {
+               IWL_DEBUG_TX_QUEUES(priv, "Flushing SCD queues: 0x%x\n",
+                                   scd_queues);
+               if (iwlagn_txfifo_flush(priv, scd_queues)) {
+                       IWL_ERR(priv, "flush request fail\n");
+                       goto done;
+               }
        }
+
        IWL_DEBUG_TX_QUEUES(priv, "wait transmit/flush all frames\n");
-       iwl_trans_wait_tx_queue_empty(priv->trans, 0xffffffff);
+       iwl_trans_wait_tx_queue_empty(priv->trans, scd_queues);
 done:
        mutex_unlock(&priv->mutex);
        IWL_DEBUG_MAC80211(priv, "leave\n");
index 4dbef7e..5244e43 100644 (file)
@@ -418,9 +418,6 @@ int iwl_run_init_ucode(struct iwl_priv *priv)
        if (!priv->fw->img[IWL_UCODE_INIT].sec[0].len)
                return 0;
 
-       if (priv->init_ucode_run)
-               return 0;
-
        iwl_init_notification_wait(&priv->notif_wait, &calib_wait,
                                   calib_complete, ARRAY_SIZE(calib_complete),
                                   iwlagn_wait_calib, priv);
@@ -440,8 +437,6 @@ int iwl_run_init_ucode(struct iwl_priv *priv)
         */
        ret = iwl_wait_notification(&priv->notif_wait, &calib_wait,
                                        UCODE_CALIB_TIMEOUT);
-       if (!ret)
-               priv->init_ucode_run = true;
 
        goto out;
 
index c3817fa..06f6cc0 100644 (file)
@@ -95,7 +95,8 @@ static const struct iwl_eeprom_params iwl1000_eeprom_params = {
        .nvm_calib_ver = EEPROM_1000_TX_POWER_VERSION,  \
        .base_params = &iwl1000_base_params,                    \
        .eeprom_params = &iwl1000_eeprom_params,                \
-       .led_mode = IWL_LED_BLINK
+       .led_mode = IWL_LED_BLINK,                              \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl1000_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 1000 BGN",
@@ -121,7 +122,8 @@ const struct iwl_cfg iwl1000_bg_cfg = {
        .base_params = &iwl1000_base_params,                    \
        .eeprom_params = &iwl1000_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
-       .rx_with_siso_diversity = true
+       .rx_with_siso_diversity = true,                         \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl100_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 100 BGN",
index 21e5d08..890b95f 100644 (file)
@@ -123,7 +123,9 @@ static const struct iwl_eeprom_params iwl20x0_eeprom_params = {
        .nvm_calib_ver = EEPROM_2000_TX_POWER_VERSION,          \
        .base_params = &iwl2000_base_params,                    \
        .eeprom_params = &iwl20x0_eeprom_params,                \
-       .led_mode = IWL_LED_RF_STATE
+       .led_mode = IWL_LED_RF_STATE,                           \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
+
 
 const struct iwl_cfg iwl2000_2bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 2200 BGN",
@@ -149,7 +151,8 @@ const struct iwl_cfg iwl2000_2bgn_d_cfg = {
        .nvm_calib_ver = EEPROM_2000_TX_POWER_VERSION,  \
        .base_params = &iwl2030_base_params,                    \
        .eeprom_params = &iwl20x0_eeprom_params,                \
-       .led_mode = IWL_LED_RF_STATE
+       .led_mode = IWL_LED_RF_STATE,                           \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl2030_2bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 2230 BGN",
@@ -170,7 +173,8 @@ const struct iwl_cfg iwl2030_2bgn_cfg = {
        .base_params = &iwl2000_base_params,                    \
        .eeprom_params = &iwl20x0_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
-       .rx_with_siso_diversity = true
+       .rx_with_siso_diversity = true,                         \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl105_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 105 BGN",
@@ -197,7 +201,8 @@ const struct iwl_cfg iwl105_bgn_d_cfg = {
        .base_params = &iwl2030_base_params,                    \
        .eeprom_params = &iwl20x0_eeprom_params,                \
        .led_mode = IWL_LED_RF_STATE,                           \
-       .rx_with_siso_diversity = true
+       .rx_with_siso_diversity = true,                         \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl135_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N 135 BGN",
index 332bbed..724194e 100644 (file)
@@ -93,7 +93,8 @@ static const struct iwl_eeprom_params iwl5000_eeprom_params = {
        .nvm_calib_ver = EEPROM_5000_TX_POWER_VERSION,  \
        .base_params = &iwl5000_base_params,                    \
        .eeprom_params = &iwl5000_eeprom_params,                \
-       .led_mode = IWL_LED_BLINK
+       .led_mode = IWL_LED_BLINK,                              \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl5300_agn_cfg = {
        .name = "Intel(R) Ultimate N WiFi Link 5300 AGN",
@@ -158,7 +159,8 @@ const struct iwl_cfg iwl5350_agn_cfg = {
        .base_params = &iwl5000_base_params,                    \
        .eeprom_params = &iwl5000_eeprom_params,                \
        .led_mode = IWL_LED_BLINK,                              \
-       .internal_wimax_coex = true
+       .internal_wimax_coex = true,                            \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl5150_agn_cfg = {
        .name = "Intel(R) WiMAX/WiFi Link 5150 AGN",
index 8f2c3c8..21b2630 100644 (file)
@@ -145,7 +145,8 @@ static const struct iwl_eeprom_params iwl6000_eeprom_params = {
        .nvm_calib_ver = EEPROM_6005_TX_POWER_VERSION,  \
        .base_params = &iwl6000_g2_base_params,                 \
        .eeprom_params = &iwl6000_eeprom_params,                \
-       .led_mode = IWL_LED_RF_STATE
+       .led_mode = IWL_LED_RF_STATE,                           \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl6005_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N 6205 AGN",
@@ -199,7 +200,8 @@ const struct iwl_cfg iwl6005_2agn_mow2_cfg = {
        .nvm_calib_ver = EEPROM_6030_TX_POWER_VERSION,  \
        .base_params = &iwl6000_g2_base_params,                 \
        .eeprom_params = &iwl6000_eeprom_params,                \
-       .led_mode = IWL_LED_RF_STATE
+       .led_mode = IWL_LED_RF_STATE,                           \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl6030_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N 6230 AGN",
@@ -235,7 +237,8 @@ const struct iwl_cfg iwl6030_2bg_cfg = {
        .nvm_calib_ver = EEPROM_6030_TX_POWER_VERSION,  \
        .base_params = &iwl6000_g2_base_params,                 \
        .eeprom_params = &iwl6000_eeprom_params,                \
-       .led_mode = IWL_LED_RF_STATE
+       .led_mode = IWL_LED_RF_STATE,                           \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl6035_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N 6235 AGN",
@@ -290,7 +293,8 @@ const struct iwl_cfg iwl130_bg_cfg = {
        .nvm_calib_ver = EEPROM_6000_TX_POWER_VERSION,  \
        .base_params = &iwl6000_base_params,                    \
        .eeprom_params = &iwl6000_eeprom_params,                \
-       .led_mode = IWL_LED_BLINK
+       .led_mode = IWL_LED_BLINK,                              \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl6000i_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N 6200 AGN",
@@ -322,7 +326,8 @@ const struct iwl_cfg iwl6000i_2bg_cfg = {
        .base_params = &iwl6050_base_params,                    \
        .eeprom_params = &iwl6000_eeprom_params,                \
        .led_mode = IWL_LED_BLINK,                              \
-       .internal_wimax_coex = true
+       .internal_wimax_coex = true,                            \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl6050_2agn_cfg = {
        .name = "Intel(R) Centrino(R) Advanced-N + WiMAX 6250 AGN",
@@ -347,7 +352,8 @@ const struct iwl_cfg iwl6050_2abg_cfg = {
        .base_params = &iwl6050_base_params,                    \
        .eeprom_params = &iwl6000_eeprom_params,                \
        .led_mode = IWL_LED_BLINK,                              \
-       .internal_wimax_coex = true
+       .internal_wimax_coex = true,                            \
+       .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K
 
 const struct iwl_cfg iwl6150_bgn_cfg = {
        .name = "Intel(R) Centrino(R) Wireless-N + WiMAX 6150 BGN",
index 996e7f1..c7154ac 100644 (file)
@@ -1257,6 +1257,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context)
                                op->name, err);
 #endif
        }
+       kfree(pieces);
        return;
 
  try_again:
index 1ec4d55..7810c41 100644 (file)
@@ -793,7 +793,8 @@ static void iwl_mvm_bt_notif_iterator(void *_data, u8 *mac,
        if (!vif->bss_conf.assoc)
                smps_mode = IEEE80211_SMPS_AUTOMATIC;
 
-       if (IWL_COEX_IS_RRC_ON(mvm->last_bt_notif.ttc_rrc_status,
+       if (mvmvif->phy_ctxt &&
+           IWL_COEX_IS_RRC_ON(mvm->last_bt_notif.ttc_rrc_status,
                               mvmvif->phy_ctxt->id))
                smps_mode = IEEE80211_SMPS_AUTOMATIC;
 
index d530ef3..542ee74 100644 (file)
@@ -832,7 +832,8 @@ static void iwl_mvm_bt_notif_iterator(void *_data, u8 *mac,
        if (!vif->bss_conf.assoc)
                smps_mode = IEEE80211_SMPS_AUTOMATIC;
 
-       if (data->notif->rrc_enabled & BIT(mvmvif->phy_ctxt->id))
+       if (mvmvif->phy_ctxt &&
+           data->notif->rrc_enabled & BIT(mvmvif->phy_ctxt->id))
                smps_mode = IEEE80211_SMPS_AUTOMATIC;
 
        IWL_DEBUG_COEX(data->mvm,
index 1ff7ec0..09654e7 100644 (file)
@@ -405,7 +405,10 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
                hw->wiphy->bands[IEEE80211_BAND_5GHZ] =
                        &mvm->nvm_data->bands[IEEE80211_BAND_5GHZ];
 
-               if (mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_BEAMFORMER)
+               if ((mvm->fw->ucode_capa.capa[0] &
+                    IWL_UCODE_TLV_CAPA_BEAMFORMER) &&
+                   (mvm->fw->ucode_capa.api[0] &
+                    IWL_UCODE_TLV_API_LQ_SS_PARAMS))
                        hw->wiphy->bands[IEEE80211_BAND_5GHZ]->vht_cap.cap |=
                                IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE;
        }
@@ -2215,7 +2218,19 @@ static void iwl_mvm_mac_cancel_hw_scan(struct ieee80211_hw *hw,
 
        mutex_lock(&mvm->mutex);
 
-       iwl_mvm_cancel_scan(mvm);
+       /* Due to a race condition, it's possible that mac80211 asks
+        * us to stop a hw_scan when it's already stopped.  This can
+        * happen, for instance, if we stopped the scan ourselves,
+        * called ieee80211_scan_completed() and the userspace called
+        * cancel scan scan before ieee80211_scan_work() could run.
+        * To handle that, simply return if the scan is not running.
+       */
+       /* FIXME: for now, we ignore this race for UMAC scans, since
+        * they don't set the scan_status.
+        */
+       if ((mvm->scan_status == IWL_MVM_SCAN_OS) ||
+           (mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_UMAC_SCAN))
+               iwl_mvm_cancel_scan(mvm);
 
        mutex_unlock(&mvm->mutex);
 }
@@ -2559,12 +2574,29 @@ static int iwl_mvm_mac_sched_scan_stop(struct ieee80211_hw *hw,
        int ret;
 
        mutex_lock(&mvm->mutex);
+
+       /* Due to a race condition, it's possible that mac80211 asks
+        * us to stop a sched_scan when it's already stopped.  This
+        * can happen, for instance, if we stopped the scan ourselves,
+        * called ieee80211_sched_scan_stopped() and the userspace called
+        * stop sched scan scan before ieee80211_sched_scan_stopped_work()
+        * could run.  To handle this, simply return if the scan is
+        * not running.
+       */
+       /* FIXME: for now, we ignore this race for UMAC scans, since
+        * they don't set the scan_status.
+        */
+       if (mvm->scan_status != IWL_MVM_SCAN_SCHED &&
+           !(mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_UMAC_SCAN)) {
+               mutex_unlock(&mvm->mutex);
+               return 0;
+       }
+
        ret = iwl_mvm_scan_offload_stop(mvm, false);
        mutex_unlock(&mvm->mutex);
        iwl_mvm_wait_for_async_handlers(mvm);
 
        return ret;
-
 }
 
 static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
index 194bd1f..078f24c 100644 (file)
@@ -134,9 +134,12 @@ enum rs_column_mode {
 #define MAX_NEXT_COLUMNS 7
 #define MAX_COLUMN_CHECKS 3
 
+struct rs_tx_column;
+
 typedef bool (*allow_column_func_t) (struct iwl_mvm *mvm,
                                     struct ieee80211_sta *sta,
-                                    struct iwl_scale_tbl_info *tbl);
+                                    struct iwl_scale_tbl_info *tbl,
+                                    const struct rs_tx_column *next_col);
 
 struct rs_tx_column {
        enum rs_column_mode mode;
@@ -147,13 +150,15 @@ struct rs_tx_column {
 };
 
 static bool rs_ant_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
-                        struct iwl_scale_tbl_info *tbl)
+                        struct iwl_scale_tbl_info *tbl,
+                        const struct rs_tx_column *next_col)
 {
-       return iwl_mvm_bt_coex_is_ant_avail(mvm, tbl->rate.ant);
+       return iwl_mvm_bt_coex_is_ant_avail(mvm, next_col->ant);
 }
 
 static bool rs_mimo_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
-                         struct iwl_scale_tbl_info *tbl)
+                         struct iwl_scale_tbl_info *tbl,
+                         const struct rs_tx_column *next_col)
 {
        if (!sta->ht_cap.ht_supported)
                return false;
@@ -171,7 +176,8 @@ static bool rs_mimo_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 }
 
 static bool rs_siso_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
-                         struct iwl_scale_tbl_info *tbl)
+                         struct iwl_scale_tbl_info *tbl,
+                         const struct rs_tx_column *next_col)
 {
        if (!sta->ht_cap.ht_supported)
                return false;
@@ -180,7 +186,8 @@ static bool rs_siso_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 }
 
 static bool rs_sgi_allow(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
-                        struct iwl_scale_tbl_info *tbl)
+                        struct iwl_scale_tbl_info *tbl,
+                        const struct rs_tx_column *next_col)
 {
        struct rs_rate *rate = &tbl->rate;
        struct ieee80211_sta_ht_cap *ht_cap = &sta->ht_cap;
@@ -1271,6 +1278,9 @@ static void rs_mac80211_tx_status(void *mvm_r,
        struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
+       if (!iwl_mvm_sta_from_mac80211(sta)->vif)
+               return;
+
        if (!ieee80211_is_data(hdr->frame_control) ||
            info->flags & IEEE80211_TX_CTL_NO_ACK)
                return;
@@ -1590,7 +1600,7 @@ static enum rs_column rs_get_next_column(struct iwl_mvm *mvm,
 
                for (j = 0; j < MAX_COLUMN_CHECKS; j++) {
                        allow_func = next_col->checks[j];
-                       if (allow_func && !allow_func(mvm, sta, tbl))
+                       if (allow_func && !allow_func(mvm, sta, tbl, next_col))
                                break;
                }
 
@@ -2504,6 +2514,14 @@ static void rs_get_rate(void *mvm_r, struct ieee80211_sta *sta, void *mvm_sta,
        struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
        struct iwl_lq_sta *lq_sta = mvm_sta;
 
+       if (sta && !iwl_mvm_sta_from_mac80211(sta)->vif) {
+               /* if vif isn't initialized mvm doesn't know about
+                * this station, so don't do anything with the it
+                */
+               sta = NULL;
+               mvm_sta = NULL;
+       }
+
        /* TODO: handle rate_idx_mask and rate_idx_mcs_mask */
 
        /* Treat uninitialized rate scaling data same as non-existing. */
@@ -2820,6 +2838,9 @@ static void rs_rate_update(void *mvm_r,
                        (struct iwl_op_mode *)mvm_r;
        struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode);
 
+       if (!iwl_mvm_sta_from_mac80211(sta)->vif)
+               return;
+
        /* Stop any ongoing aggregations as rs starts off assuming no agg */
        for (tid = 0; tid < IWL_MAX_TID_COUNT; tid++)
                ieee80211_stop_tx_ba_session(sta, tid);
@@ -3580,9 +3601,15 @@ static ssize_t iwl_dbgfs_ss_force_write(struct iwl_lq_sta *lq_sta, char *buf,
 
 MVM_DEBUGFS_READ_WRITE_FILE_OPS(ss_force, 32);
 
-static void rs_add_debugfs(void *mvm, void *mvm_sta, struct dentry *dir)
+static void rs_add_debugfs(void *mvm, void *priv_sta, struct dentry *dir)
 {
-       struct iwl_lq_sta *lq_sta = mvm_sta;
+       struct iwl_lq_sta *lq_sta = priv_sta;
+       struct iwl_mvm_sta *mvmsta;
+
+       mvmsta = container_of(lq_sta, struct iwl_mvm_sta, lq_sta);
+
+       if (!mvmsta->vif)
+               return;
 
        debugfs_create_file("rate_scale_table", S_IRUSR | S_IWUSR, dir,
                            lq_sta, &rs_sta_dbgfs_scale_table_ops);
index 7e9aa3c..c47c805 100644 (file)
@@ -1128,8 +1128,10 @@ int iwl_mvm_scan_offload_stop(struct iwl_mvm *mvm, bool notify)
        if (mvm->scan_status == IWL_MVM_SCAN_NONE)
                return 0;
 
-       if (iwl_mvm_is_radio_killed(mvm))
+       if (iwl_mvm_is_radio_killed(mvm)) {
+               ret = 0;
                goto out;
+       }
 
        if (mvm->scan_status != IWL_MVM_SCAN_SCHED &&
            (!(mvm->fw->ucode_capa.api[0] & IWL_UCODE_TLV_API_LMAC_SCAN) ||
@@ -1148,16 +1150,14 @@ int iwl_mvm_scan_offload_stop(struct iwl_mvm *mvm, bool notify)
                IWL_DEBUG_SCAN(mvm, "Send stop %sscan failed %d\n",
                               sched ? "offloaded " : "", ret);
                iwl_remove_notification(&mvm->notif_wait, &wait_scan_done);
-               return ret;
+               goto out;
        }
 
        IWL_DEBUG_SCAN(mvm, "Successfully sent stop %sscan\n",
                       sched ? "offloaded " : "");
 
        ret = iwl_wait_notification(&mvm->notif_wait, &wait_scan_done, 1 * HZ);
-       if (ret)
-               return ret;
-
+out:
        /*
         * Clear the scan status so the next scan requests will succeed. This
         * also ensures the Rx handler doesn't do anything, as the scan was
@@ -1167,7 +1167,6 @@ int iwl_mvm_scan_offload_stop(struct iwl_mvm *mvm, bool notify)
        if (mvm->scan_status == IWL_MVM_SCAN_OS)
                iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN);
 
-out:
        mvm->scan_status = IWL_MVM_SCAN_NONE;
 
        if (notify) {
@@ -1177,7 +1176,7 @@ out:
                        ieee80211_scan_completed(mvm->hw, true);
        }
 
-       return 0;
+       return ret;
 }
 
 static void iwl_mvm_unified_scan_fill_tx_cmd(struct iwl_mvm *mvm,
index 54fafbf..4b81c0b 100644 (file)
@@ -197,6 +197,8 @@ iwl_mvm_te_handle_notify_csa(struct iwl_mvm *mvm,
                             struct iwl_time_event_notif *notif)
 {
        if (!le32_to_cpu(notif->status)) {
+               if (te_data->vif->type == NL80211_IFTYPE_STATION)
+                       ieee80211_connection_loss(te_data->vif);
                IWL_DEBUG_TE(mvm, "CSA time event failed to start\n");
                iwl_mvm_te_clear_data(mvm, te_data);
                return;
@@ -750,8 +752,7 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm)
         * request
         */
        list_for_each_entry(te_data, &mvm->time_event_list, list) {
-               if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE &&
-                   te_data->running) {
+               if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) {
                        mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif);
                        is_p2p = true;
                        goto remove_te;
@@ -766,10 +767,8 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm)
         * request
         */
        list_for_each_entry(te_data, &mvm->aux_roc_te_list, list) {
-               if (te_data->running) {
-                       mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif);
-                       goto remove_te;
-               }
+               mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif);
+               goto remove_te;
        }
 
 remove_te:
index 07304e1..96a0540 100644 (file)
@@ -949,8 +949,10 @@ int iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb,
        mvmsta = iwl_mvm_sta_from_mac80211(sta);
        tid_data = &mvmsta->tid_data[tid];
 
-       if (WARN_ONCE(tid_data->txq_id != scd_flow, "Q %d, tid %d, flow %d",
-                     tid_data->txq_id, tid, scd_flow)) {
+       if (tid_data->txq_id != scd_flow) {
+               IWL_ERR(mvm,
+                       "invalid BA notification: Q %d, tid %d, flow %d\n",
+                       tid_data->txq_id, tid, scd_flow);
                rcu_read_unlock();
                return 0;
        }
index dbd6bcf..686dd30 100644 (file)
@@ -368,10 +368,12 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 /* 3165 Series */
        {IWL_PCI_DEVICE(0x3165, 0x4010, iwl3165_2ac_cfg)},
        {IWL_PCI_DEVICE(0x3165, 0x4012, iwl3165_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x3165, 0x4110, iwl3165_2ac_cfg)},
-       {IWL_PCI_DEVICE(0x3165, 0x4210, iwl3165_2ac_cfg)},
        {IWL_PCI_DEVICE(0x3165, 0x4410, iwl3165_2ac_cfg)},
        {IWL_PCI_DEVICE(0x3165, 0x4510, iwl3165_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x3165, 0x4110, iwl3165_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x3166, 0x4310, iwl3165_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x3166, 0x4210, iwl3165_2ac_cfg)},
+       {IWL_PCI_DEVICE(0x3165, 0x8010, iwl3165_2ac_cfg)},
 
 /* 7265 Series */
        {IWL_PCI_DEVICE(0x095A, 0x5010, iwl7265_2ac_cfg)},
index 1d46774..074f716 100644 (file)
@@ -1386,8 +1386,11 @@ u8 rtl_is_special_data(struct ieee80211_hw *hw, struct sk_buff *skb, u8 is_tx)
                }
 
                return true;
-       } else if (0x86DD == ether_type) {
-               return true;
+       } else if (ETH_P_IPV6 == ether_type) {
+               /* TODO: Handle any IPv6 cases that need special handling.
+                * For now, always return false
+                */
+               goto end;
        }
 
 end:
index a62170e..8c45cf4 100644 (file)
@@ -1124,12 +1124,22 @@ static void _rtl_pci_prepare_bcn_tasklet(struct ieee80211_hw *hw)
        /*This is for new trx flow*/
        struct rtl_tx_buffer_desc *pbuffer_desc = NULL;
        u8 temp_one = 1;
+       u8 *entry;
 
        memset(&tcb_desc, 0, sizeof(struct rtl_tcb_desc));
        ring = &rtlpci->tx_ring[BEACON_QUEUE];
        pskb = __skb_dequeue(&ring->queue);
-       if (pskb)
+       if (rtlpriv->use_new_trx_flow)
+               entry = (u8 *)(&ring->buffer_desc[ring->idx]);
+       else
+               entry = (u8 *)(&ring->desc[ring->idx]);
+       if (pskb) {
+               pci_unmap_single(rtlpci->pdev,
+                                rtlpriv->cfg->ops->get_desc(
+                                (u8 *)entry, true, HW_DESC_TXBUFF_ADDR),
+                                pskb->len, PCI_DMA_TODEVICE);
                kfree_skb(pskb);
+       }
 
        /*NB: the beacon data buffer must be 32-bit aligned. */
        pskb = ieee80211_beacon_get(hw, mac->vif);
index f38227a..3aa8648 100644 (file)
@@ -340,12 +340,11 @@ static void xenvif_get_ethtool_stats(struct net_device *dev,
        unsigned int num_queues = vif->num_queues;
        int i;
        unsigned int queue_index;
-       struct xenvif_stats *vif_stats;
 
        for (i = 0; i < ARRAY_SIZE(xenvif_stats); i++) {
                unsigned long accum = 0;
                for (queue_index = 0; queue_index < num_queues; ++queue_index) {
-                       vif_stats = &vif->queues[queue_index].stats;
+                       void *vif_stats = &vif->queues[queue_index].stats;
                        accum += *(unsigned long *)(vif_stats + xenvif_stats[i].offset);
                }
                data[i] = accum;
index c4d68d7..997cf09 100644 (file)
@@ -96,6 +96,7 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
 static void make_tx_response(struct xenvif_queue *queue,
                             struct xen_netif_tx_request *txp,
                             s8       st);
+static void push_tx_responses(struct xenvif_queue *queue);
 
 static inline int tx_work_todo(struct xenvif_queue *queue);
 
@@ -655,15 +656,10 @@ static void xenvif_tx_err(struct xenvif_queue *queue,
        unsigned long flags;
 
        do {
-               int notify;
-
                spin_lock_irqsave(&queue->response_lock, flags);
                make_tx_response(queue, txp, XEN_NETIF_RSP_ERROR);
-               RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
+               push_tx_responses(queue);
                spin_unlock_irqrestore(&queue->response_lock, flags);
-               if (notify)
-                       notify_remote_via_irq(queue->tx_irq);
-
                if (cons == end)
                        break;
                txp = RING_GET_REQUEST(&queue->tx, cons++);
@@ -1349,7 +1345,7 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
 {
        unsigned int offset = skb_headlen(skb);
        skb_frag_t frags[MAX_SKB_FRAGS];
-       int i;
+       int i, f;
        struct ubuf_info *uarg;
        struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
 
@@ -1389,23 +1385,25 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
                frags[i].page_offset = 0;
                skb_frag_size_set(&frags[i], len);
        }
-       /* swap out with old one */
-       memcpy(skb_shinfo(skb)->frags,
-              frags,
-              i * sizeof(skb_frag_t));
-       skb_shinfo(skb)->nr_frags = i;
-       skb->truesize += i * PAGE_SIZE;
 
-       /* remove traces of mapped pages and frag_list */
+       /* Copied all the bits from the frag list -- free it. */
        skb_frag_list_init(skb);
+       xenvif_skb_zerocopy_prepare(queue, nskb);
+       kfree_skb(nskb);
+
+       /* Release all the original (foreign) frags. */
+       for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+               skb_frag_unref(skb, f);
        uarg = skb_shinfo(skb)->destructor_arg;
        /* increase inflight counter to offset decrement in callback */
        atomic_inc(&queue->inflight_packets);
        uarg->callback(uarg, true);
        skb_shinfo(skb)->destructor_arg = NULL;
 
-       xenvif_skb_zerocopy_prepare(queue, nskb);
-       kfree_skb(nskb);
+       /* Fill the skb with the new (local) frags. */
+       memcpy(skb_shinfo(skb)->frags, frags, i * sizeof(skb_frag_t));
+       skb_shinfo(skb)->nr_frags = i;
+       skb->truesize += i * PAGE_SIZE;
 
        return 0;
 }
@@ -1655,7 +1653,6 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
 {
        struct pending_tx_info *pending_tx_info;
        pending_ring_idx_t index;
-       int notify;
        unsigned long flags;
 
        pending_tx_info = &queue->pending_tx_info[pending_idx];
@@ -1671,12 +1668,9 @@ static void xenvif_idx_release(struct xenvif_queue *queue, u16 pending_idx,
        index = pending_index(queue->pending_prod++);
        queue->pending_ring[index] = pending_idx;
 
-       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
+       push_tx_responses(queue);
 
        spin_unlock_irqrestore(&queue->response_lock, flags);
-
-       if (notify)
-               notify_remote_via_irq(queue->tx_irq);
 }
 
 
@@ -1697,6 +1691,15 @@ static void make_tx_response(struct xenvif_queue *queue,
        queue->tx.rsp_prod_pvt = ++i;
 }
 
+static void push_tx_responses(struct xenvif_queue *queue)
+{
+       int notify;
+
+       RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->tx, notify);
+       if (notify)
+               notify_remote_via_irq(queue->tx_irq);
+}
+
 static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
                                             u16      id,
                                             s8       st,
index e9b960f..720aaf6 100644 (file)
@@ -1008,8 +1008,7 @@ err:
 
 static int xennet_change_mtu(struct net_device *dev, int mtu)
 {
-       int max = xennet_can_sg(dev) ?
-               XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER : ETH_DATA_LEN;
+       int max = xennet_can_sg(dev) ? XEN_NETIF_MAX_TX_SIZE : ETH_DATA_LEN;
 
        if (mtu > max)
                return -EINVAL;
@@ -1279,8 +1278,6 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
        netdev->ethtool_ops = &xennet_ethtool_ops;
        SET_NETDEV_DEV(netdev, &dev->dev);
 
-       netif_set_gso_max_size(netdev, XEN_NETIF_MAX_TX_SIZE - MAX_TCP_HEADER);
-
        np->netdev = netdev;
 
        netif_carrier_off(netdev);
index 38d1c51..7bcaeec 100644 (file)
@@ -84,8 +84,7 @@ config OF_RESOLVE
        bool
 
 config OF_OVERLAY
-       bool
-       depends on OF
+       bool "Device Tree overlays"
        select OF_DYNAMIC
        select OF_RESOLVE
 
index ad29069..78a7dcb 100644 (file)
@@ -450,12 +450,17 @@ static struct of_bus *of_match_bus(struct device_node *np)
        return NULL;
 }
 
-static int of_empty_ranges_quirk(void)
+static int of_empty_ranges_quirk(struct device_node *np)
 {
        if (IS_ENABLED(CONFIG_PPC)) {
-               /* To save cycles, we cache the result */
+               /* To save cycles, we cache the result for global "Mac" setting */
                static int quirk_state = -1;
 
+               /* PA-SEMI sdc DT bug */
+               if (of_device_is_compatible(np, "1682m-sdc"))
+                       return true;
+
+               /* Make quirk cached */
                if (quirk_state < 0)
                        quirk_state =
                                of_machine_is_compatible("Power Macintosh") ||
@@ -490,7 +495,7 @@ static int of_translate_one(struct device_node *parent, struct of_bus *bus,
         * This code is only enabled on powerpc. --gcl
         */
        ranges = of_get_property(parent, rprop, &rlen);
-       if (ranges == NULL && !of_empty_ranges_quirk()) {
+       if (ranges == NULL && !of_empty_ranges_quirk(parent)) {
                pr_debug("OF: no ranges; cannot translate\n");
                return 1;
        }
index 0a8aeb8..8f165b1 100644 (file)
@@ -714,16 +714,12 @@ static struct device_node *__of_find_node_by_path(struct device_node *parent,
                                                const char *path)
 {
        struct device_node *child;
-       int len = strchrnul(path, '/') - path;
-       int term;
+       int len;
 
+       len = strcspn(path, "/:");
        if (!len)
                return NULL;
 
-       term = strchrnul(path, ':') - path;
-       if (term < len)
-               len = term;
-
        __for_each_child_of_node(parent, child) {
                const char *name = strrchr(child->full_name, '/');
                if (WARN(!name, "malformed device_node %s\n", child->full_name))
@@ -768,8 +764,12 @@ struct device_node *of_find_node_opts_by_path(const char *path, const char **opt
 
        /* The path could begin with an alias */
        if (*path != '/') {
-               char *p = strchrnul(path, '/');
-               int len = separator ? separator - path : p - path;
+               int len;
+               const char *p = separator;
+
+               if (!p)
+                       p = strchrnul(path, '/');
+               len = p - path;
 
                /* of_aliases must not be NULL */
                if (!of_aliases)
@@ -794,6 +794,8 @@ struct device_node *of_find_node_opts_by_path(const char *path, const char **opt
                path++; /* Increment past '/' delimiter */
                np = __of_find_node_by_path(np, path);
                path = strchrnul(path, '/');
+               if (separator && separator < path)
+                       break;
        }
        raw_spin_unlock_irqrestore(&devtree_lock, flags);
        return np;
index 0d77658..1a79806 100644 (file)
@@ -290,7 +290,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar
        struct device_node *p;
        const __be32 *intspec, *tmp, *addr;
        u32 intsize, intlen;
-       int i, res = -EINVAL;
+       int i, res;
 
        pr_debug("of_irq_parse_one: dev=%s, index=%d\n", of_node_full_name(device), index);
 
@@ -323,15 +323,19 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar
 
        /* Get size of interrupt specifier */
        tmp = of_get_property(p, "#interrupt-cells", NULL);
-       if (tmp == NULL)
+       if (tmp == NULL) {
+               res = -EINVAL;
                goto out;
+       }
        intsize = be32_to_cpu(*tmp);
 
        pr_debug(" intsize=%d intlen=%d\n", intsize, intlen);
 
        /* Check index */
-       if ((index + 1) * intsize > intlen)
+       if ((index + 1) * intsize > intlen) {
+               res = -EINVAL;
                goto out;
+       }
 
        /* Copy intspec into irq structure */
        intspec += index * intsize;
index 352b4f2..dee9270 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/err.h>
+#include <linux/idr.h>
 
 #include "of_private.h"
 
@@ -85,7 +86,7 @@ static int of_overlay_apply_single_device_node(struct of_overlay *ov,
                struct device_node *target, struct device_node *child)
 {
        const char *cname;
-       struct device_node *tchild, *grandchild;
+       struct device_node *tchild;
        int ret = 0;
 
        cname = kbasename(child->full_name);
index 0cf9a23..52c45c7 100644 (file)
@@ -92,6 +92,16 @@ static void __init of_selftest_find_node_by_name(void)
                 "option path test failed\n");
        of_node_put(np);
 
+       np = of_find_node_opts_by_path("/testcase-data:test/option", &options);
+       selftest(np && !strcmp("test/option", options),
+                "option path test, subcase #1 failed\n");
+       of_node_put(np);
+
+       np = of_find_node_opts_by_path("/testcase-data/testcase-device1:test/option", &options);
+       selftest(np && !strcmp("test/option", options),
+                "option path test, subcase #2 failed\n");
+       of_node_put(np);
+
        np = of_find_node_opts_by_path("/testcase-data:testoption", NULL);
        selftest(np, "NULL option path test failed\n");
        of_node_put(np);
@@ -102,6 +112,12 @@ static void __init of_selftest_find_node_by_name(void)
                 "option alias path test failed\n");
        of_node_put(np);
 
+       np = of_find_node_opts_by_path("testcase-alias:test/alias/option",
+                                      &options);
+       selftest(np && !strcmp("test/alias/option", options),
+                "option alias path test, subcase #1 failed\n");
+       of_node_put(np);
+
        np = of_find_node_opts_by_path("testcase-alias:testaliasoption", NULL);
        selftest(np, "NULL option alias path test failed\n");
        of_node_put(np);
@@ -378,9 +394,9 @@ static void __init of_selftest_property_string(void)
        rc = of_property_match_string(np, "phandle-list-names", "first");
        selftest(rc == 0, "first expected:0 got:%i\n", rc);
        rc = of_property_match_string(np, "phandle-list-names", "second");
-       selftest(rc == 1, "second expected:0 got:%i\n", rc);
+       selftest(rc == 1, "second expected:1 got:%i\n", rc);
        rc = of_property_match_string(np, "phandle-list-names", "third");
-       selftest(rc == 2, "third expected:0 got:%i\n", rc);
+       selftest(rc == 2, "third expected:2 got:%i\n", rc);
        rc = of_property_match_string(np, "phandle-list-names", "fourth");
        selftest(rc == -ENODATA, "unmatched string; rc=%i\n", rc);
        rc = of_property_match_string(np, "missing-property", "blah");
@@ -478,7 +494,6 @@ static void __init of_selftest_changeset(void)
        struct device_node *n1, *n2, *n21, *nremove, *parent, *np;
        struct of_changeset chgset;
 
-       of_changeset_init(&chgset);
        n1 = __of_node_dup(NULL, "/testcase-data/changeset/n1");
        selftest(n1, "testcase setup failure\n");
        n2 = __of_node_dup(NULL, "/testcase-data/changeset/n2");
@@ -979,7 +994,7 @@ static int of_path_platform_device_exists(const char *path)
        return pdev != NULL;
 }
 
-#if IS_ENABLED(CONFIG_I2C)
+#if IS_BUILTIN(CONFIG_I2C)
 
 /* get the i2c client device instantiated at the path */
 static struct i2c_client *of_path_to_i2c_client(const char *path)
@@ -1445,7 +1460,7 @@ static void of_selftest_overlay_11(void)
                return;
 }
 
-#if IS_ENABLED(CONFIG_I2C) && IS_ENABLED(CONFIG_OF_OVERLAY)
+#if IS_BUILTIN(CONFIG_I2C) && IS_ENABLED(CONFIG_OF_OVERLAY)
 
 struct selftest_i2c_bus_data {
        struct platform_device  *pdev;
@@ -1584,7 +1599,7 @@ static struct i2c_driver selftest_i2c_dev_driver = {
        .id_table = selftest_i2c_dev_id,
 };
 
-#if IS_ENABLED(CONFIG_I2C_MUX)
+#if IS_BUILTIN(CONFIG_I2C_MUX)
 
 struct selftest_i2c_mux_data {
        int nchans;
@@ -1695,7 +1710,7 @@ static int of_selftest_overlay_i2c_init(void)
                        "could not register selftest i2c bus driver\n"))
                return ret;
 
-#if IS_ENABLED(CONFIG_I2C_MUX)
+#if IS_BUILTIN(CONFIG_I2C_MUX)
        ret = i2c_add_driver(&selftest_i2c_mux_driver);
        if (selftest(ret == 0,
                        "could not register selftest i2c mux driver\n"))
@@ -1707,7 +1722,7 @@ static int of_selftest_overlay_i2c_init(void)
 
 static void of_selftest_overlay_i2c_cleanup(void)
 {
-#if IS_ENABLED(CONFIG_I2C_MUX)
+#if IS_BUILTIN(CONFIG_I2C_MUX)
        i2c_del_driver(&selftest_i2c_mux_driver);
 #endif
        platform_driver_unregister(&selftest_i2c_bus_driver);
@@ -1814,7 +1829,7 @@ static void __init of_selftest_overlay(void)
        of_selftest_overlay_10();
        of_selftest_overlay_11();
 
-#if IS_ENABLED(CONFIG_I2C)
+#if IS_BUILTIN(CONFIG_I2C)
        if (selftest(of_selftest_overlay_i2c_init() == 0, "i2c init failed\n"))
                goto out;
 
index aab5547..ee082c0 100644 (file)
@@ -127,7 +127,7 @@ static bool xgene_pcie_hide_rc_bars(struct pci_bus *bus, int offset)
        return false;
 }
 
-static int xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
+static void __iomem *xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
                              int offset)
 {
        struct xgene_pcie_port *port = bus->sysdata;
@@ -137,7 +137,7 @@ static int xgene_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
                return NULL;
 
        xgene_pcie_set_rtdid_reg(bus, devfn);
-       return xgene_pcie_get_cfg_base(bus);
+       return xgene_pcie_get_cfg_base(bus) + offset;
 }
 
 static struct pci_ops xgene_pcie_ops = {
index 1f4ea6f..2e9f84f 100644 (file)
@@ -342,7 +342,7 @@ static const struct irq_domain_ops msi_domain_ops = {
        .map = dw_pcie_msi_map,
 };
 
-int __init dw_pcie_host_init(struct pcie_port *pp)
+int dw_pcie_host_init(struct pcie_port *pp)
 {
        struct device_node *np = pp->dev->of_node;
        struct platform_device *pdev = to_platform_device(pp->dev);
index 866465f..020d788 100644 (file)
@@ -269,7 +269,7 @@ static struct pcie_host_ops spear13xx_pcie_host_ops = {
        .host_init = spear13xx_pcie_host_init,
 };
 
-static int __init spear13xx_add_pcie_port(struct pcie_port *pp,
+static int spear13xx_add_pcie_port(struct pcie_port *pp,
                                         struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -299,7 +299,7 @@ static int __init spear13xx_add_pcie_port(struct pcie_port *pp,
        return 0;
 }
 
-static int __init spear13xx_pcie_probe(struct platform_device *pdev)
+static int spear13xx_pcie_probe(struct platform_device *pdev)
 {
        struct spear13xx_pcie *spear13xx_pcie;
        struct pcie_port *pp;
@@ -370,7 +370,7 @@ static const struct of_device_id spear13xx_pcie_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, spear13xx_pcie_of_match);
 
-static struct platform_driver spear13xx_pcie_driver __initdata = {
+static struct platform_driver spear13xx_pcie_driver = {
        .probe          = spear13xx_pcie_probe,
        .driver = {
                .name   = "spear-pcie",
index 7d48eca..788db48 100644 (file)
@@ -286,11 +286,12 @@ int cpci_configure_slot(struct slot *slot)
        }
        parent = slot->dev->bus;
 
-       list_for_each_entry(dev, &parent->devices, bus_list)
+       list_for_each_entry(dev, &parent->devices, bus_list) {
                if (PCI_SLOT(dev->devfn) != PCI_SLOT(slot->devfn))
                        continue;
                if (pci_is_bridge(dev))
                        pci_hp_add_bridge(dev);
+       }
 
 
        pci_assign_unassigned_bridge_resources(parent->self);
index 4890639..c93fbe7 100644 (file)
@@ -248,6 +248,9 @@ int pci_get_hp_params(struct pci_dev *dev, struct hotplug_params *hpp)
        acpi_handle handle, phandle;
        struct pci_bus *pbus;
 
+       if (acpi_pci_disabled)
+               return -ENODEV;
+
        handle = NULL;
        for (pbus = dev->bus; pbus; pbus = pbus->parent) {
                handle = acpi_pci_get_bridge_handle(pbus);
index aa012fb..312f23a 100644 (file)
@@ -521,7 +521,8 @@ static ssize_t driver_override_store(struct device *dev,
        struct pci_dev *pdev = to_pci_dev(dev);
        char *driver_override, *old = pdev->driver_override, *cp;
 
-       if (count > PATH_MAX)
+       /* We need to keep extra room for a newline */
+       if (count >= (PAGE_SIZE - 1))
                return -EINVAL;
 
        driver_override = kstrndup(buf, count, GFP_KERNEL);
@@ -549,7 +550,7 @@ static ssize_t driver_override_show(struct device *dev,
 {
        struct pci_dev *pdev = to_pci_dev(dev);
 
-       return sprintf(buf, "%s\n", pdev->driver_override);
+       return snprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
 }
 static DEVICE_ATTR_RW(driver_override);
 
index c6849d9..167fe41 100644 (file)
@@ -132,16 +132,8 @@ static const char *aer_agent_string[] = {
 static void __print_tlp_header(struct pci_dev *dev,
                               struct aer_header_log_regs *t)
 {
-       unsigned char *tlp = (unsigned char *)&t;
-
-       dev_err(&dev->dev, "  TLP Header:"
-               " %02x%02x%02x%02x %02x%02x%02x%02x"
-               " %02x%02x%02x%02x %02x%02x%02x%02x\n",
-               *(tlp + 3), *(tlp + 2), *(tlp + 1), *tlp,
-               *(tlp + 7), *(tlp + 6), *(tlp + 5), *(tlp + 4),
-               *(tlp + 11), *(tlp + 10), *(tlp + 9),
-               *(tlp + 8), *(tlp + 15), *(tlp + 14),
-               *(tlp + 13), *(tlp + 12));
+       dev_err(&dev->dev, "  TLP Header: %08x %08x %08x %08x\n",
+               t->dw0, t->dw1, t->dw2, t->dw3);
 }
 
 static void __aer_print_error(struct pci_dev *dev,
index 3bb4925..45f67c6 100644 (file)
@@ -69,8 +69,7 @@ config YENTA
        tristate "CardBus yenta-compatible bridge support"
        depends on PCI
        select CARDBUS if !EXPERT
-       select PCCARD_NONSTATIC if PCMCIA != n && ISA
-       select PCCARD_PCI if PCMCIA !=n && !ISA
+       select PCCARD_NONSTATIC if PCMCIA != n
        ---help---
          This option enables support for CardBus host bridges.  Virtually
          all modern PCMCIA bridges are CardBus compatible.  A "bridge" is
@@ -110,8 +109,7 @@ config YENTA_TOSHIBA
 config PD6729
        tristate "Cirrus PD6729 compatible bridge support"
        depends on PCMCIA && PCI
-       select PCCARD_NONSTATIC if PCMCIA != n && ISA
-       select PCCARD_PCI if PCMCIA !=n && !ISA
+       select PCCARD_NONSTATIC
        help
          This provides support for the Cirrus PD6729 PCI-to-PCMCIA bridge
          device, found in some older laptops and PCMCIA card readers.
@@ -119,8 +117,7 @@ config PD6729
 config I82092
        tristate "i82092 compatible bridge support"
        depends on PCMCIA && PCI
-       select PCCARD_NONSTATIC if PCMCIA != n && ISA
-       select PCCARD_PCI if PCMCIA !=n && !ISA
+       select PCCARD_NONSTATIC
        help
          This provides support for the Intel I82092AA PCI-to-PCMCIA bridge device,
          found in some older laptops and more commonly in evaluation boards for the
@@ -291,9 +288,6 @@ config ELECTRA_CF
          Say Y here to support the CompactFlash controller on the
          PA Semi Electra eval board.
 
-config PCCARD_PCI
-       bool
-
 config PCCARD_NONSTATIC
        bool
 
index f1a7ca0..27e94b3 100644 (file)
@@ -12,7 +12,6 @@ obj-$(CONFIG_PCMCIA)                          += pcmcia.o
 pcmcia_rsrc-y                                  += rsrc_mgr.o
 pcmcia_rsrc-$(CONFIG_PCCARD_NONSTATIC)         += rsrc_nonstatic.o
 pcmcia_rsrc-$(CONFIG_PCCARD_IODYN)             += rsrc_iodyn.o
-pcmcia_rsrc-$(CONFIG_PCCARD_PCI)               += rsrc_pci.o
 obj-$(CONFIG_PCCARD)                           += pcmcia_rsrc.o
 
 
diff --git a/drivers/pcmcia/rsrc_pci.c b/drivers/pcmcia/rsrc_pci.c
deleted file mode 100644 (file)
index 1f67b3b..0000000
+++ /dev/null
@@ -1,173 +0,0 @@
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-
-#include <pcmcia/ss.h>
-#include <pcmcia/cistpl.h>
-#include "cs_internal.h"
-
-
-struct pcmcia_align_data {
-       unsigned long   mask;
-       unsigned long   offset;
-};
-
-static resource_size_t pcmcia_align(void *align_data,
-                               const struct resource *res,
-                               resource_size_t size, resource_size_t align)
-{
-       struct pcmcia_align_data *data = align_data;
-       resource_size_t start;
-
-       start = (res->start & ~data->mask) + data->offset;
-       if (start < res->start)
-               start += data->mask + 1;
-       return start;
-}
-
-static struct resource *find_io_region(struct pcmcia_socket *s,
-                                       unsigned long base, int num,
-                                       unsigned long align)
-{
-       struct resource *res = pcmcia_make_resource(0, num, IORESOURCE_IO,
-                                               dev_name(&s->dev));
-       struct pcmcia_align_data data;
-       int ret;
-
-       data.mask = align - 1;
-       data.offset = base & data.mask;
-
-       ret = pci_bus_alloc_resource(s->cb_dev->bus, res, num, 1,
-                                            base, 0, pcmcia_align, &data);
-       if (ret != 0) {
-               kfree(res);
-               res = NULL;
-       }
-       return res;
-}
-
-static int res_pci_find_io(struct pcmcia_socket *s, unsigned int attr,
-                       unsigned int *base, unsigned int num,
-                       unsigned int align, struct resource **parent)
-{
-       int i, ret = 0;
-
-       /* Check for an already-allocated window that must conflict with
-        * what was asked for.  It is a hack because it does not catch all
-        * potential conflicts, just the most obvious ones.
-        */
-       for (i = 0; i < MAX_IO_WIN; i++) {
-               if (!s->io[i].res)
-                       continue;
-
-               if (!*base)
-                       continue;
-
-               if ((s->io[i].res->start & (align-1)) == *base)
-                       return -EBUSY;
-       }
-
-       for (i = 0; i < MAX_IO_WIN; i++) {
-               struct resource *res = s->io[i].res;
-               unsigned int try;
-
-               if (res && (res->flags & IORESOURCE_BITS) !=
-                       (attr & IORESOURCE_BITS))
-                       continue;
-
-               if (!res) {
-                       if (align == 0)
-                               align = 0x10000;
-
-                       res = s->io[i].res = find_io_region(s, *base, num,
-                                                               align);
-                       if (!res)
-                               return -EINVAL;
-
-                       *base = res->start;
-                       s->io[i].res->flags =
-                               ((res->flags & ~IORESOURCE_BITS) |
-                                       (attr & IORESOURCE_BITS));
-                       s->io[i].InUse = num;
-                       *parent = res;
-                       return 0;
-               }
-
-               /* Try to extend top of window */
-               try = res->end + 1;
-               if ((*base == 0) || (*base == try)) {
-                       ret = adjust_resource(s->io[i].res, res->start,
-                                             resource_size(res) + num);
-                       if (ret)
-                               continue;
-                       *base = try;
-                       s->io[i].InUse += num;
-                       *parent = res;
-                       return 0;
-               }
-
-               /* Try to extend bottom of window */
-               try = res->start - num;
-               if ((*base == 0) || (*base == try)) {
-                       ret = adjust_resource(s->io[i].res,
-                                             res->start - num,
-                                             resource_size(res) + num);
-                       if (ret)
-                               continue;
-                       *base = try;
-                       s->io[i].InUse += num;
-                       *parent = res;
-                       return 0;
-               }
-       }
-       return -EINVAL;
-}
-
-static struct resource *res_pci_find_mem(u_long base, u_long num,
-               u_long align, int low, struct pcmcia_socket *s)
-{
-       struct resource *res = pcmcia_make_resource(0, num, IORESOURCE_MEM,
-                                               dev_name(&s->dev));
-       struct pcmcia_align_data data;
-       unsigned long min;
-       int ret;
-
-       if (align < 0x20000)
-               align = 0x20000;
-       data.mask = align - 1;
-       data.offset = base & data.mask;
-
-       min = 0;
-       if (!low)
-               min = 0x100000UL;
-
-       ret = pci_bus_alloc_resource(s->cb_dev->bus,
-                       res, num, 1, min, 0,
-                       pcmcia_align, &data);
-
-       if (ret != 0) {
-               kfree(res);
-               res = NULL;
-       }
-       return res;
-}
-
-
-static int res_pci_init(struct pcmcia_socket *s)
-{
-       if (!s->cb_dev || !(s->features & SS_CAP_PAGE_REGS)) {
-               dev_err(&s->dev, "not supported by res_pci\n");
-               return -EOPNOTSUPP;
-       }
-       return 0;
-}
-
-struct pccard_resource_ops pccard_nonstatic_ops = {
-       .validate_mem = NULL,
-       .find_io = res_pci_find_io,
-       .find_mem = res_pci_find_mem,
-       .init = res_pci_init,
-       .exit = NULL,
-};
-EXPORT_SYMBOL(pccard_nonstatic_ops);
index 7c99ca2..8ccc395 100644 (file)
@@ -37,7 +37,7 @@ static int armada375_usb_phy_init(struct phy *phy)
        struct armada375_cluster_phy *cluster_phy;
        u32 reg;
 
-       cluster_phy = dev_get_drvdata(phy->dev.parent);
+       cluster_phy = phy_get_drvdata(phy);
        if (!cluster_phy)
                return -ENODEV;
 
@@ -131,6 +131,7 @@ static int armada375_usb_phy_probe(struct platform_device *pdev)
        cluster_phy->reg = usb_cluster_base;
 
        dev_set_drvdata(dev, cluster_phy);
+       phy_set_drvdata(phy, cluster_phy);
 
        phy_provider = devm_of_phy_provider_register(&pdev->dev,
                                                     armada375_usb_phy_xlate);
index a12d353..3791838 100644 (file)
@@ -52,7 +52,9 @@ static void devm_phy_consume(struct device *dev, void *res)
 
 static int devm_phy_match(struct device *dev, void *res, void *match_data)
 {
-       return res == match_data;
+       struct phy **phy = res;
+
+       return *phy == match_data;
 }
 
 /**
@@ -223,6 +225,7 @@ int phy_init(struct phy *phy)
        ret = phy_pm_runtime_get_sync(phy);
        if (ret < 0 && ret != -ENOTSUPP)
                return ret;
+       ret = 0; /* Override possible ret == -ENOTSUPP */
 
        mutex_lock(&phy->mutex);
        if (phy->init_count == 0 && phy->ops->init) {
@@ -231,8 +234,6 @@ int phy_init(struct phy *phy)
                        dev_err(&phy->dev, "phy init failed --> %d\n", ret);
                        goto out;
                }
-       } else {
-               ret = 0; /* Override possible ret == -ENOTSUPP */
        }
        ++phy->init_count;
 
@@ -253,6 +254,7 @@ int phy_exit(struct phy *phy)
        ret = phy_pm_runtime_get_sync(phy);
        if (ret < 0 && ret != -ENOTSUPP)
                return ret;
+       ret = 0; /* Override possible ret == -ENOTSUPP */
 
        mutex_lock(&phy->mutex);
        if (phy->init_count == 1 && phy->ops->exit) {
@@ -287,6 +289,7 @@ int phy_power_on(struct phy *phy)
        ret = phy_pm_runtime_get_sync(phy);
        if (ret < 0 && ret != -ENOTSUPP)
                return ret;
+       ret = 0; /* Override possible ret == -ENOTSUPP */
 
        mutex_lock(&phy->mutex);
        if (phy->power_count == 0 && phy->ops->power_on) {
@@ -295,8 +298,6 @@ int phy_power_on(struct phy *phy)
                        dev_err(&phy->dev, "phy poweron failed --> %d\n", ret);
                        goto out;
                }
-       } else {
-               ret = 0; /* Override possible ret == -ENOTSUPP */
        }
        ++phy->power_count;
        mutex_unlock(&phy->mutex);
index f86cbe6..179cbf9 100644 (file)
@@ -30,28 +30,13 @@ struct exynos_dp_video_phy {
        const struct exynos_dp_video_phy_drvdata *drvdata;
 };
 
-static void exynos_dp_video_phy_pwr_isol(struct exynos_dp_video_phy *state,
-                                                       unsigned int on)
-{
-       unsigned int val;
-
-       if (IS_ERR(state->regs))
-               return;
-
-       val = on ? 0 : EXYNOS5_PHY_ENABLE;
-
-       regmap_update_bits(state->regs, state->drvdata->phy_ctrl_offset,
-                          EXYNOS5_PHY_ENABLE, val);
-}
-
 static int exynos_dp_video_phy_power_on(struct phy *phy)
 {
        struct exynos_dp_video_phy *state = phy_get_drvdata(phy);
 
        /* Disable power isolation on DP-PHY */
-       exynos_dp_video_phy_pwr_isol(state, 0);
-
-       return 0;
+       return regmap_update_bits(state->regs, state->drvdata->phy_ctrl_offset,
+                                 EXYNOS5_PHY_ENABLE, EXYNOS5_PHY_ENABLE);
 }
 
 static int exynos_dp_video_phy_power_off(struct phy *phy)
@@ -59,9 +44,8 @@ static int exynos_dp_video_phy_power_off(struct phy *phy)
        struct exynos_dp_video_phy *state = phy_get_drvdata(phy);
 
        /* Enable power isolation on DP-PHY */
-       exynos_dp_video_phy_pwr_isol(state, 1);
-
-       return 0;
+       return regmap_update_bits(state->regs, state->drvdata->phy_ctrl_offset,
+                                 EXYNOS5_PHY_ENABLE, 0);
 }
 
 static struct phy_ops exynos_dp_video_phy_ops = {
index f017b2f..df7519a 100644 (file)
@@ -43,7 +43,6 @@ struct exynos_mipi_video_phy {
        } phys[EXYNOS_MIPI_PHYS_NUM];
        spinlock_t slock;
        void __iomem *regs;
-       struct mutex mutex;
        struct regmap *regmap;
 };
 
@@ -59,8 +58,9 @@ static int __set_phy_state(struct exynos_mipi_video_phy *state,
        else
                reset = EXYNOS4_MIPI_PHY_SRESETN;
 
-       if (state->regmap) {
-               mutex_lock(&state->mutex);
+       spin_lock(&state->slock);
+
+       if (!IS_ERR(state->regmap)) {
                regmap_read(state->regmap, offset, &val);
                if (on)
                        val |= reset;
@@ -72,11 +72,9 @@ static int __set_phy_state(struct exynos_mipi_video_phy *state,
                else if (!(val & EXYNOS4_MIPI_PHY_RESET_MASK))
                        val &= ~EXYNOS4_MIPI_PHY_ENABLE;
                regmap_write(state->regmap, offset, val);
-               mutex_unlock(&state->mutex);
        } else {
                addr = state->regs + EXYNOS_MIPI_PHY_CONTROL(id / 2);
 
-               spin_lock(&state->slock);
                val = readl(addr);
                if (on)
                        val |= reset;
@@ -90,9 +88,9 @@ static int __set_phy_state(struct exynos_mipi_video_phy *state,
                        val &= ~EXYNOS4_MIPI_PHY_ENABLE;
 
                writel(val, addr);
-               spin_unlock(&state->slock);
        }
 
+       spin_unlock(&state->slock);
        return 0;
 }
 
@@ -158,7 +156,6 @@ static int exynos_mipi_video_phy_probe(struct platform_device *pdev)
 
        dev_set_drvdata(dev, state);
        spin_lock_init(&state->slock);
-       mutex_init(&state->mutex);
 
        for (i = 0; i < EXYNOS_MIPI_PHYS_NUM; i++) {
                struct phy *phy = devm_phy_create(dev, NULL,
index 236a52a..f30bbb0 100644 (file)
@@ -250,7 +250,6 @@ static const struct samsung_usb2_common_phy exynos4210_phys[] = {
                .power_on       = exynos4210_power_on,
                .power_off      = exynos4210_power_off,
        },
-       {},
 };
 
 const struct samsung_usb2_phy_config exynos4210_usb2_phy_config = {
index 0b9de88..765da90 100644 (file)
@@ -361,7 +361,6 @@ static const struct samsung_usb2_common_phy exynos4x12_phys[] = {
                .power_on       = exynos4x12_power_on,
                .power_off      = exynos4x12_power_off,
        },
-       {},
 };
 
 const struct samsung_usb2_phy_config exynos3250_usb2_phy_config = {
index 0437401..e2a0be7 100644 (file)
@@ -531,7 +531,7 @@ static struct phy *exynos5_usbdrd_phy_xlate(struct device *dev,
 {
        struct exynos5_usbdrd_phy *phy_drd = dev_get_drvdata(dev);
 
-       if (WARN_ON(args->args[0] > EXYNOS5_DRDPHYS_NUM))
+       if (WARN_ON(args->args[0] >= EXYNOS5_DRDPHYS_NUM))
                return ERR_PTR(-ENODEV);
 
        return phy_drd->phys[args->args[0]].phy;
index 1c139aa..2ed1735 100644 (file)
@@ -391,7 +391,6 @@ static const struct samsung_usb2_common_phy exynos5250_phys[] = {
                .power_on       = exynos5250_power_on,
                .power_off      = exynos5250_power_off,
        },
-       {},
 };
 
 const struct samsung_usb2_phy_config exynos5250_usb2_phy_config = {
index 34915b4..d6b2265 100644 (file)
@@ -147,6 +147,9 @@ static int hix5hd2_sata_phy_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -EINVAL;
+
        priv->base = devm_ioremap(dev, res->start, resource_size(res));
        if (!priv->base)
                return -ENOMEM;
index 9b2848e..9334352 100644 (file)
@@ -228,6 +228,7 @@ struct miphy28lp_dev {
        struct regmap *regmap;
        struct mutex miphy_mutex;
        struct miphy28lp_phy **phys;
+       int nphys;
 };
 
 struct miphy_initval {
@@ -1116,7 +1117,7 @@ static struct phy *miphy28lp_xlate(struct device *dev,
                return ERR_PTR(-EINVAL);
        }
 
-       for (index = 0; index < of_get_child_count(dev->of_node); index++)
+       for (index = 0; index < miphy_dev->nphys; index++)
                if (phynode == miphy_dev->phys[index]->phy->dev.of_node) {
                        miphy_phy = miphy_dev->phys[index];
                        break;
@@ -1138,6 +1139,7 @@ static struct phy *miphy28lp_xlate(struct device *dev,
 
 static struct phy_ops miphy28lp_ops = {
        .init = miphy28lp_init,
+       .owner = THIS_MODULE,
 };
 
 static int miphy28lp_probe_resets(struct device_node *node,
@@ -1200,16 +1202,15 @@ static int miphy28lp_probe(struct platform_device *pdev)
        struct miphy28lp_dev *miphy_dev;
        struct phy_provider *provider;
        struct phy *phy;
-       int chancount, port = 0;
-       int ret;
+       int ret, port = 0;
 
        miphy_dev = devm_kzalloc(&pdev->dev, sizeof(*miphy_dev), GFP_KERNEL);
        if (!miphy_dev)
                return -ENOMEM;
 
-       chancount = of_get_child_count(np);
-       miphy_dev->phys = devm_kzalloc(&pdev->dev, sizeof(phy) * chancount,
-                                      GFP_KERNEL);
+       miphy_dev->nphys = of_get_child_count(np);
+       miphy_dev->phys = devm_kcalloc(&pdev->dev, miphy_dev->nphys,
+                                      sizeof(*miphy_dev->phys), GFP_KERNEL);
        if (!miphy_dev->phys)
                return -ENOMEM;
 
index 6c80154..51b459d 100644 (file)
@@ -150,6 +150,7 @@ struct miphy365x_dev {
        struct regmap *regmap;
        struct mutex miphy_mutex;
        struct miphy365x_phy **phys;
+       int nphys;
 };
 
 /*
@@ -485,7 +486,7 @@ static struct phy *miphy365x_xlate(struct device *dev,
                return ERR_PTR(-EINVAL);
        }
 
-       for (index = 0; index < of_get_child_count(dev->of_node); index++)
+       for (index = 0; index < miphy_dev->nphys; index++)
                if (phynode == miphy_dev->phys[index]->phy->dev.of_node) {
                        miphy_phy = miphy_dev->phys[index];
                        break;
@@ -541,16 +542,15 @@ static int miphy365x_probe(struct platform_device *pdev)
        struct miphy365x_dev *miphy_dev;
        struct phy_provider *provider;
        struct phy *phy;
-       int chancount, port = 0;
-       int ret;
+       int ret, port = 0;
 
        miphy_dev = devm_kzalloc(&pdev->dev, sizeof(*miphy_dev), GFP_KERNEL);
        if (!miphy_dev)
                return -ENOMEM;
 
-       chancount = of_get_child_count(np);
-       miphy_dev->phys = devm_kzalloc(&pdev->dev, sizeof(phy) * chancount,
-                                      GFP_KERNEL);
+       miphy_dev->nphys = of_get_child_count(np);
+       miphy_dev->phys = devm_kcalloc(&pdev->dev, miphy_dev->nphys,
+                                      sizeof(*miphy_dev->phys), GFP_KERNEL);
        if (!miphy_dev->phys)
                return -ENOMEM;
 
index efe724f..93252e0 100644 (file)
@@ -360,7 +360,7 @@ static void __exit omap_control_phy_exit(void)
 }
 module_exit(omap_control_phy_exit);
 
-MODULE_ALIAS("platform: omap_control_phy");
+MODULE_ALIAS("platform:omap_control_phy");
 MODULE_AUTHOR("Texas Instruments Inc.");
 MODULE_DESCRIPTION("OMAP Control Module PHY Driver");
 MODULE_LICENSE("GPL v2");
index 6f4aef3..4757e76 100644 (file)
@@ -296,10 +296,11 @@ static int omap_usb2_probe(struct platform_device *pdev)
                        dev_warn(&pdev->dev,
                                 "found usb_otg_ss_refclk960m, please fix DTS\n");
                }
-       } else {
-               clk_prepare(phy->optclk);
        }
 
+       if (!IS_ERR(phy->optclk))
+               clk_prepare(phy->optclk);
+
        usb_add_phy_dev(&phy->phy);
 
        return 0;
@@ -383,7 +384,7 @@ static struct platform_driver omap_usb2_driver = {
 
 module_platform_driver(omap_usb2_driver);
 
-MODULE_ALIAS("platform: omap_usb2");
+MODULE_ALIAS("platform:omap_usb2");
 MODULE_AUTHOR("Texas Instruments Inc.");
 MODULE_DESCRIPTION("OMAP USB2 phy driver");
 MODULE_LICENSE("GPL v2");
index 22011c3..7d4c336 100644 (file)
@@ -61,8 +61,6 @@ static int rockchip_usb_phy_power_off(struct phy *_phy)
                return ret;
 
        clk_disable_unprepare(phy->clk);
-       if (ret)
-               return ret;
 
        return 0;
 }
@@ -78,8 +76,10 @@ static int rockchip_usb_phy_power_on(struct phy *_phy)
 
        /* Power up usb phy analog blocks by set siddq 0 */
        ret = rockchip_usb_phy_power(phy, 0);
-       if (ret)
+       if (ret) {
+               clk_disable_unprepare(phy->clk);
                return ret;
+       }
 
        return 0;
 }
index 95c88f9..2ba610b 100644 (file)
@@ -165,15 +165,11 @@ static int ti_pipe3_dpll_wait_lock(struct ti_pipe3 *phy)
                cpu_relax();
                val = ti_pipe3_readl(phy->pll_ctrl_base, PLL_STATUS);
                if (val & PLL_LOCK)
-                       break;
+                       return 0;
        } while (!time_after(jiffies, timeout));
 
-       if (!(val & PLL_LOCK)) {
-               dev_err(phy->dev, "DPLL failed to lock\n");
-               return -EBUSY;
-       }
-
-       return 0;
+       dev_err(phy->dev, "DPLL failed to lock\n");
+       return -EBUSY;
 }
 
 static int ti_pipe3_dpll_program(struct ti_pipe3 *phy)
@@ -608,7 +604,7 @@ static struct platform_driver ti_pipe3_driver = {
 
 module_platform_driver(ti_pipe3_driver);
 
-MODULE_ALIAS("platform: ti_pipe3");
+MODULE_ALIAS("platform:ti_pipe3");
 MODULE_AUTHOR("Texas Instruments Inc.");
 MODULE_DESCRIPTION("TI PIPE3 phy driver");
 MODULE_LICENSE("GPL v2");
index 8e87f54..bc42d6a 100644 (file)
@@ -666,7 +666,6 @@ static int twl4030_usb_probe(struct platform_device *pdev)
        twl->dev                = &pdev->dev;
        twl->irq                = platform_get_irq(pdev, 0);
        twl->vbus_supplied      = false;
-       twl->linkstat           = -EINVAL;
        twl->linkstat           = OMAP_MUSB_UNKNOWN;
 
        twl->phy.dev            = twl->dev;
index 29214a3..2263cd0 100644 (file)
@@ -1704,7 +1704,6 @@ static int xgene_phy_probe(struct platform_device *pdev)
        for (i = 0; i < MAX_LANE; i++)
                ctx->sata_param.speed[i] = 2; /* Default to Gen3 */
 
-       ctx->dev = &pdev->dev;
        platform_set_drvdata(pdev, ctx);
 
        ctx->phy = devm_phy_create(ctx->dev, NULL, &xgene_phy_ops);
index 5afe03e..2062c22 100644 (file)
 #define BYT_DIR_MASK           (BIT(1) | BIT(2))
 #define BYT_TRIG_MASK          (BIT(26) | BIT(25) | BIT(24))
 
+#define BYT_CONF0_RESTORE_MASK (BYT_DIRECT_IRQ_EN | BYT_TRIG_MASK | \
+                                BYT_PIN_MUX)
+#define BYT_VAL_RESTORE_MASK   (BYT_DIR_MASK | BYT_LEVEL)
+
 #define BYT_NGPIO_SCORE                102
 #define BYT_NGPIO_NCORE                28
 #define BYT_NGPIO_SUS          44
@@ -134,12 +138,18 @@ static struct pinctrl_gpio_range byt_ranges[] = {
        },
 };
 
+struct byt_gpio_pin_context {
+       u32 conf0;
+       u32 val;
+};
+
 struct byt_gpio {
        struct gpio_chip                chip;
        struct platform_device          *pdev;
        spinlock_t                      lock;
        void __iomem                    *reg_base;
        struct pinctrl_gpio_range       *range;
+       struct byt_gpio_pin_context     *saved_context;
 };
 
 #define to_byt_gpio(c) container_of(c, struct byt_gpio, chip)
@@ -158,40 +168,62 @@ static void __iomem *byt_gpio_reg(struct gpio_chip *chip, unsigned offset,
        return vg->reg_base + reg_offset + reg;
 }
 
-static bool is_special_pin(struct byt_gpio *vg, unsigned offset)
+static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned offset)
+{
+       void __iomem *reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG);
+       unsigned long flags;
+       u32 value;
+
+       spin_lock_irqsave(&vg->lock, flags);
+       value = readl(reg);
+       value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL);
+       writel(value, reg);
+       spin_unlock_irqrestore(&vg->lock, flags);
+}
+
+static u32 byt_get_gpio_mux(struct byt_gpio *vg, unsigned offset)
 {
        /* SCORE pin 92-93 */
        if (!strcmp(vg->range->name, BYT_SCORE_ACPI_UID) &&
                offset >= 92 && offset <= 93)
-               return true;
+               return 1;
 
        /* SUS pin 11-21 */
        if (!strcmp(vg->range->name, BYT_SUS_ACPI_UID) &&
                offset >= 11 && offset <= 21)
-               return true;
+               return 1;
 
-       return false;
+       return 0;
 }
 
 static int byt_gpio_request(struct gpio_chip *chip, unsigned offset)
 {
        struct byt_gpio *vg = to_byt_gpio(chip);
        void __iomem *reg = byt_gpio_reg(chip, offset, BYT_CONF0_REG);
-       u32 value;
-       bool special;
+       u32 value, gpio_mux;
 
        /*
         * In most cases, func pin mux 000 means GPIO function.
         * But, some pins may have func pin mux 001 represents
-        * GPIO function. Only allow user to export pin with
-        * func pin mux preset as GPIO function by BIOS/FW.
+        * GPIO function.
+        *
+        * Because there are devices out there where some pins were not
+        * configured correctly we allow changing the mux value from
+        * request (but print out warning about that).
         */
        value = readl(reg) & BYT_PIN_MUX;
-       special = is_special_pin(vg, offset);
-       if ((special && value != 1) || (!special && value)) {
-               dev_err(&vg->pdev->dev,
-                       "pin %u cannot be used as GPIO.\n", offset);
-               return -EINVAL;
+       gpio_mux = byt_get_gpio_mux(vg, offset);
+       if (WARN_ON(gpio_mux != value)) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&vg->lock, flags);
+               value = readl(reg) & ~BYT_PIN_MUX;
+               value |= gpio_mux;
+               writel(value, reg);
+               spin_unlock_irqrestore(&vg->lock, flags);
+
+               dev_warn(&vg->pdev->dev,
+                        "pin %u forcibly re-configured as GPIO\n", offset);
        }
 
        pm_runtime_get(&vg->pdev->dev);
@@ -202,14 +234,8 @@ static int byt_gpio_request(struct gpio_chip *chip, unsigned offset)
 static void byt_gpio_free(struct gpio_chip *chip, unsigned offset)
 {
        struct byt_gpio *vg = to_byt_gpio(chip);
-       void __iomem *reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG);
-       u32 value;
-
-       /* clear interrupt triggering */
-       value = readl(reg);
-       value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL);
-       writel(value, reg);
 
+       byt_gpio_clear_triggering(vg, offset);
        pm_runtime_put(&vg->pdev->dev);
 }
 
@@ -236,23 +262,13 @@ static int byt_irq_type(struct irq_data *d, unsigned type)
        value &= ~(BYT_DIRECT_IRQ_EN | BYT_TRIG_POS | BYT_TRIG_NEG |
                   BYT_TRIG_LVL);
 
-       switch (type) {
-       case IRQ_TYPE_LEVEL_HIGH:
-               value |= BYT_TRIG_LVL;
-       case IRQ_TYPE_EDGE_RISING:
-               value |= BYT_TRIG_POS;
-               break;
-       case IRQ_TYPE_LEVEL_LOW:
-               value |= BYT_TRIG_LVL;
-       case IRQ_TYPE_EDGE_FALLING:
-               value |= BYT_TRIG_NEG;
-               break;
-       case IRQ_TYPE_EDGE_BOTH:
-               value |= (BYT_TRIG_NEG | BYT_TRIG_POS);
-               break;
-       }
        writel(value, reg);
 
+       if (type & IRQ_TYPE_EDGE_BOTH)
+               __irq_set_handler_locked(d->irq, handle_edge_irq);
+       else if (type & IRQ_TYPE_LEVEL_MASK)
+               __irq_set_handler_locked(d->irq, handle_level_irq);
+
        spin_unlock_irqrestore(&vg->lock, flags);
 
        return 0;
@@ -410,58 +426,80 @@ static void byt_gpio_irq_handler(unsigned irq, struct irq_desc *desc)
        struct irq_data *data = irq_desc_get_irq_data(desc);
        struct byt_gpio *vg = to_byt_gpio(irq_desc_get_handler_data(desc));
        struct irq_chip *chip = irq_data_get_irq_chip(data);
-       u32 base, pin, mask;
+       u32 base, pin;
        void __iomem *reg;
-       u32 pending;
+       unsigned long pending;
        unsigned virq;
-       int looplimit = 0;
 
        /* check from GPIO controller which pin triggered the interrupt */
        for (base = 0; base < vg->chip.ngpio; base += 32) {
-
                reg = byt_gpio_reg(&vg->chip, base, BYT_INT_STAT_REG);
-
-               while ((pending = readl(reg))) {
-                       pin = __ffs(pending);
-                       mask = BIT(pin);
-                       /* Clear before handling so we can't lose an edge */
-                       writel(mask, reg);
-
+               pending = readl(reg);
+               for_each_set_bit(pin, &pending, 32) {
                        virq = irq_find_mapping(vg->chip.irqdomain, base + pin);
                        generic_handle_irq(virq);
-
-                       /* In case bios or user sets triggering incorretly a pin
-                        * might remain in "interrupt triggered" state.
-                        */
-                       if (looplimit++ > 32) {
-                               dev_err(&vg->pdev->dev,
-                                       "Gpio %d interrupt flood, disabling\n",
-                                       base + pin);
-
-                               reg = byt_gpio_reg(&vg->chip, base + pin,
-                                                  BYT_CONF0_REG);
-                               mask = readl(reg);
-                               mask &= ~(BYT_TRIG_NEG | BYT_TRIG_POS |
-                                         BYT_TRIG_LVL);
-                               writel(mask, reg);
-                               mask = readl(reg); /* flush */
-                               break;
-                       }
                }
        }
        chip->irq_eoi(data);
 }
 
+static void byt_irq_ack(struct irq_data *d)
+{
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+       struct byt_gpio *vg = to_byt_gpio(gc);
+       unsigned offset = irqd_to_hwirq(d);
+       void __iomem *reg;
+
+       reg = byt_gpio_reg(&vg->chip, offset, BYT_INT_STAT_REG);
+       writel(BIT(offset % 32), reg);
+}
+
 static void byt_irq_unmask(struct irq_data *d)
 {
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+       struct byt_gpio *vg = to_byt_gpio(gc);
+       unsigned offset = irqd_to_hwirq(d);
+       unsigned long flags;
+       void __iomem *reg;
+       u32 value;
+
+       spin_lock_irqsave(&vg->lock, flags);
+
+       reg = byt_gpio_reg(&vg->chip, offset, BYT_CONF0_REG);
+       value = readl(reg);
+
+       switch (irqd_get_trigger_type(d)) {
+       case IRQ_TYPE_LEVEL_HIGH:
+               value |= BYT_TRIG_LVL;
+       case IRQ_TYPE_EDGE_RISING:
+               value |= BYT_TRIG_POS;
+               break;
+       case IRQ_TYPE_LEVEL_LOW:
+               value |= BYT_TRIG_LVL;
+       case IRQ_TYPE_EDGE_FALLING:
+               value |= BYT_TRIG_NEG;
+               break;
+       case IRQ_TYPE_EDGE_BOTH:
+               value |= (BYT_TRIG_NEG | BYT_TRIG_POS);
+               break;
+       }
+
+       writel(value, reg);
+
+       spin_unlock_irqrestore(&vg->lock, flags);
 }
 
 static void byt_irq_mask(struct irq_data *d)
 {
+       struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+       struct byt_gpio *vg = to_byt_gpio(gc);
+
+       byt_gpio_clear_triggering(vg, irqd_to_hwirq(d));
 }
 
 static struct irq_chip byt_irqchip = {
        .name = "BYT-GPIO",
+       .irq_ack = byt_irq_ack,
        .irq_mask = byt_irq_mask,
        .irq_unmask = byt_irq_unmask,
        .irq_set_type = byt_irq_type,
@@ -472,6 +510,21 @@ static void byt_gpio_irq_init_hw(struct byt_gpio *vg)
 {
        void __iomem *reg;
        u32 base, value;
+       int i;
+
+       /*
+        * Clear interrupt triggers for all pins that are GPIOs and
+        * do not use direct IRQ mode. This will prevent spurious
+        * interrupts from misconfigured pins.
+        */
+       for (i = 0; i < vg->chip.ngpio; i++) {
+               value = readl(byt_gpio_reg(&vg->chip, i, BYT_CONF0_REG));
+               if ((value & BYT_PIN_MUX) == byt_get_gpio_mux(vg, i) &&
+                   !(value & BYT_DIRECT_IRQ_EN)) {
+                       byt_gpio_clear_triggering(vg, i);
+                       dev_dbg(&vg->pdev->dev, "disabling GPIO %d\n", i);
+               }
+       }
 
        /* clear interrupt status trigger registers */
        for (base = 0; base < vg->chip.ngpio; base += 32) {
@@ -541,6 +594,11 @@ static int byt_gpio_probe(struct platform_device *pdev)
        gc->can_sleep = false;
        gc->dev = dev;
 
+#ifdef CONFIG_PM_SLEEP
+       vg->saved_context = devm_kcalloc(&pdev->dev, gc->ngpio,
+                                      sizeof(*vg->saved_context), GFP_KERNEL);
+#endif
+
        ret = gpiochip_add(gc);
        if (ret) {
                dev_err(&pdev->dev, "failed adding byt-gpio chip\n");
@@ -569,6 +627,69 @@ static int byt_gpio_probe(struct platform_device *pdev)
        return 0;
 }
 
+#ifdef CONFIG_PM_SLEEP
+static int byt_gpio_suspend(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct byt_gpio *vg = platform_get_drvdata(pdev);
+       int i;
+
+       for (i = 0; i < vg->chip.ngpio; i++) {
+               void __iomem *reg;
+               u32 value;
+
+               reg = byt_gpio_reg(&vg->chip, i, BYT_CONF0_REG);
+               value = readl(reg) & BYT_CONF0_RESTORE_MASK;
+               vg->saved_context[i].conf0 = value;
+
+               reg = byt_gpio_reg(&vg->chip, i, BYT_VAL_REG);
+               value = readl(reg) & BYT_VAL_RESTORE_MASK;
+               vg->saved_context[i].val = value;
+       }
+
+       return 0;
+}
+
+static int byt_gpio_resume(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct byt_gpio *vg = platform_get_drvdata(pdev);
+       int i;
+
+       for (i = 0; i < vg->chip.ngpio; i++) {
+               void __iomem *reg;
+               u32 value;
+
+               reg = byt_gpio_reg(&vg->chip, i, BYT_CONF0_REG);
+               value = readl(reg);
+               if ((value & BYT_CONF0_RESTORE_MASK) !=
+                    vg->saved_context[i].conf0) {
+                       value &= ~BYT_CONF0_RESTORE_MASK;
+                       value |= vg->saved_context[i].conf0;
+                       writel(value, reg);
+                       dev_info(dev, "restored pin %d conf0 %#08x", i, value);
+               }
+
+               reg = byt_gpio_reg(&vg->chip, i, BYT_VAL_REG);
+               value = readl(reg);
+               if ((value & BYT_VAL_RESTORE_MASK) !=
+                    vg->saved_context[i].val) {
+                       u32 v;
+
+                       v = value & ~BYT_VAL_RESTORE_MASK;
+                       v |= vg->saved_context[i].val;
+                       if (v != value) {
+                               writel(v, reg);
+                               dev_dbg(dev, "restored pin %d val %#08x\n",
+                                       i, v);
+                       }
+               }
+       }
+
+       return 0;
+}
+#endif
+
 static int byt_gpio_runtime_suspend(struct device *dev)
 {
        return 0;
@@ -580,8 +701,9 @@ static int byt_gpio_runtime_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops byt_gpio_pm_ops = {
-       .runtime_suspend = byt_gpio_runtime_suspend,
-       .runtime_resume = byt_gpio_runtime_resume,
+       SET_LATE_SYSTEM_SLEEP_PM_OPS(byt_gpio_suspend, byt_gpio_resume)
+       SET_RUNTIME_PM_OPS(byt_gpio_runtime_suspend, byt_gpio_runtime_resume,
+                          NULL)
 };
 
 static const struct acpi_device_id byt_gpio_acpi_match[] = {
index 3034fd0..82f691e 100644 (file)
@@ -1226,6 +1226,7 @@ static int chv_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 static int chv_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
                                     int value)
 {
+       chv_gpio_set(chip, offset, value);
        return pinctrl_gpio_direction_output(chip->base + offset);
 }
 
index f4cd0b9..a481406 100644 (file)
@@ -1477,28 +1477,25 @@ static void gpio_irq_ack(struct irq_data *d)
        /* the interrupt is already cleared before by reading ISR */
 }
 
-static unsigned int gpio_irq_startup(struct irq_data *d)
+static int gpio_irq_request_res(struct irq_data *d)
 {
        struct at91_gpio_chip *at91_gpio = irq_data_get_irq_chip_data(d);
        unsigned        pin = d->hwirq;
        int ret;
 
        ret = gpiochip_lock_as_irq(&at91_gpio->chip, pin);
-       if (ret) {
+       if (ret)
                dev_err(at91_gpio->chip.dev, "unable to lock pind %lu IRQ\n",
                        d->hwirq);
-               return ret;
-       }
-       gpio_irq_unmask(d);
-       return 0;
+
+       return ret;
 }
 
-static void gpio_irq_shutdown(struct irq_data *d)
+static void gpio_irq_release_res(struct irq_data *d)
 {
        struct at91_gpio_chip *at91_gpio = irq_data_get_irq_chip_data(d);
        unsigned        pin = d->hwirq;
 
-       gpio_irq_mask(d);
        gpiochip_unlock_as_irq(&at91_gpio->chip, pin);
 }
 
@@ -1577,8 +1574,8 @@ void at91_pinctrl_gpio_resume(void)
 static struct irq_chip gpio_irqchip = {
        .name           = "GPIO",
        .irq_ack        = gpio_irq_ack,
-       .irq_startup    = gpio_irq_startup,
-       .irq_shutdown   = gpio_irq_shutdown,
+       .irq_request_resources = gpio_irq_request_res,
+       .irq_release_resources = gpio_irq_release_res,
        .irq_disable    = gpio_irq_mask,
        .irq_mask       = gpio_irq_mask,
        .irq_unmask     = gpio_irq_unmask,
index 24c5d88..3c68a8e 100644 (file)
@@ -1011,6 +1011,7 @@ static const struct sunxi_pinctrl_desc sun4i_a10_pinctrl_data = {
        .pins = sun4i_a10_pins,
        .npins = ARRAY_SIZE(sun4i_a10_pins),
        .irq_banks = 1,
+       .irq_read_needs_mux = true,
 };
 
 static int sun4i_a10_pinctrl_probe(struct platform_device *pdev)
index 3d07443..f8e171b 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 
 #include "../core.h"
+#include "../../gpio/gpiolib.h"
 #include "pinctrl-sunxi.h"
 
 static struct irq_chip sunxi_pinctrl_edge_irq_chip;
@@ -464,10 +465,19 @@ static int sunxi_pinctrl_gpio_direction_input(struct gpio_chip *chip,
 static int sunxi_pinctrl_gpio_get(struct gpio_chip *chip, unsigned offset)
 {
        struct sunxi_pinctrl *pctl = dev_get_drvdata(chip->dev);
-
        u32 reg = sunxi_data_reg(offset);
        u8 index = sunxi_data_offset(offset);
-       u32 val = (readl(pctl->membase + reg) >> index) & DATA_PINS_MASK;
+       u32 set_mux = pctl->desc->irq_read_needs_mux &&
+                       test_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
+       u32 val;
+
+       if (set_mux)
+               sunxi_pmx_set(pctl->pctl_dev, offset, SUN4I_FUNC_INPUT);
+
+       val = (readl(pctl->membase + reg) >> index) & DATA_PINS_MASK;
+
+       if (set_mux)
+               sunxi_pmx_set(pctl->pctl_dev, offset, SUN4I_FUNC_IRQ);
 
        return val;
 }
index 5a51523..e248e81 100644 (file)
@@ -77,6 +77,9 @@
 #define IRQ_LEVEL_LOW          0x03
 #define IRQ_EDGE_BOTH          0x04
 
+#define SUN4I_FUNC_INPUT       0
+#define SUN4I_FUNC_IRQ         6
+
 struct sunxi_desc_function {
        const char      *name;
        u8              muxval;
@@ -94,6 +97,7 @@ struct sunxi_pinctrl_desc {
        int                             npins;
        unsigned                        pin_base;
        unsigned                        irq_banks;
+       bool                            irq_read_needs_mux;
 };
 
 struct sunxi_pinctrl_function {
index 97b5e4e..63d4033 100644 (file)
@@ -73,7 +73,7 @@
 
 #define TIME_WINDOW_MAX_MSEC 40000
 #define TIME_WINDOW_MIN_MSEC 250
-
+#define ENERGY_UNIT_SCALE    1000 /* scale from driver unit to powercap unit */
 enum unit_type {
        ARBITRARY_UNIT, /* no translation */
        POWER_UNIT,
@@ -158,6 +158,7 @@ struct rapl_domain {
        struct rapl_power_limit rpl[NR_POWER_LIMITS];
        u64 attr_map; /* track capabilities */
        unsigned int state;
+       unsigned int domain_energy_unit;
        int package_id;
 };
 #define power_zone_to_rapl_domain(_zone) \
@@ -190,6 +191,7 @@ struct rapl_defaults {
        void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
        u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
                                bool to_raw);
+       unsigned int dram_domain_energy_unit;
 };
 static struct rapl_defaults *rapl_defaults;
 
@@ -227,7 +229,8 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
 static int rapl_write_data_raw(struct rapl_domain *rd,
                        enum rapl_primitives prim,
                        unsigned long long value);
-static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value,
+static u64 rapl_unit_xlate(struct rapl_domain *rd, int package,
+                       enum unit_type type, u64 value,
                        int to_raw);
 static void package_power_limit_irq_save(int package_id);
 
@@ -305,7 +308,9 @@ static int get_energy_counter(struct powercap_zone *power_zone, u64 *energy_raw)
 
 static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy)
 {
-       *energy = rapl_unit_xlate(0, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
+       struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev);
+
+       *energy = rapl_unit_xlate(rd, 0, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
        return 0;
 }
 
@@ -639,6 +644,11 @@ static void rapl_init_domains(struct rapl_package *rp)
                        rd->msrs[4] = MSR_DRAM_POWER_INFO;
                        rd->rpl[0].prim_id = PL1_ENABLE;
                        rd->rpl[0].name = pl1_name;
+                       rd->domain_energy_unit =
+                               rapl_defaults->dram_domain_energy_unit;
+                       if (rd->domain_energy_unit)
+                               pr_info("DRAM domain energy unit %dpj\n",
+                                       rd->domain_energy_unit);
                        break;
                }
                if (mask) {
@@ -648,11 +658,13 @@ static void rapl_init_domains(struct rapl_package *rp)
        }
 }
 
-static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value,
+static u64 rapl_unit_xlate(struct rapl_domain *rd, int package,
+                       enum unit_type type, u64 value,
                        int to_raw)
 {
        u64 units = 1;
        struct rapl_package *rp;
+       u64 scale = 1;
 
        rp = find_package_by_id(package);
        if (!rp)
@@ -663,7 +675,12 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value,
                units = rp->power_unit;
                break;
        case ENERGY_UNIT:
-               units = rp->energy_unit;
+               scale = ENERGY_UNIT_SCALE;
+               /* per domain unit takes precedence */
+               if (rd && rd->domain_energy_unit)
+                       units = rd->domain_energy_unit;
+               else
+                       units = rp->energy_unit;
                break;
        case TIME_UNIT:
                return rapl_defaults->compute_time_window(rp, value, to_raw);
@@ -673,11 +690,11 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value,
        };
 
        if (to_raw)
-               return div64_u64(value, units);
+               return div64_u64(value, units) * scale;
 
        value *= units;
 
-       return value;
+       return div64_u64(value, scale);
 }
 
 /* in the order of enum rapl_primitives */
@@ -773,7 +790,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
        final = value & rp->mask;
        final = final >> rp->shift;
        if (xlate)
-               *data = rapl_unit_xlate(rd->package_id, rp->unit, final, 0);
+               *data = rapl_unit_xlate(rd, rd->package_id, rp->unit, final, 0);
        else
                *data = final;
 
@@ -799,7 +816,7 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
                        "failed to read msr 0x%x on cpu %d\n", msr, cpu);
                return -EIO;
        }
-       value = rapl_unit_xlate(rd->package_id, rp->unit, value, 1);
+       value = rapl_unit_xlate(rd, rd->package_id, rp->unit, value, 1);
        msr_val &= ~rp->mask;
        msr_val |= value << rp->shift;
        if (wrmsrl_safe_on_cpu(cpu, msr, msr_val)) {
@@ -818,7 +835,7 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
  * calculate units differ on different CPUs.
  * We convert the units to below format based on CPUs.
  * i.e.
- * energy unit: microJoules : Represented in microJoules by default
+ * energy unit: picoJoules  : Represented in picoJoules by default
  * power unit : microWatts  : Represented in milliWatts by default
  * time unit  : microseconds: Represented in seconds by default
  */
@@ -834,7 +851,7 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
        }
 
        value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
-       rp->energy_unit = 1000000 / (1 << value);
+       rp->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
 
        value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
        rp->power_unit = 1000000 / (1 << value);
@@ -842,7 +859,7 @@ static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
        value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
        rp->time_unit = 1000000 / (1 << value);
 
-       pr_debug("Core CPU package %d energy=%duJ, time=%dus, power=%duW\n",
+       pr_debug("Core CPU package %d energy=%dpJ, time=%dus, power=%duW\n",
                rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
 
        return 0;
@@ -859,7 +876,7 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
                return -ENODEV;
        }
        value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
-       rp->energy_unit = 1 << value;
+       rp->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
 
        value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
        rp->power_unit = (1 << value) * 1000;
@@ -867,7 +884,7 @@ static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
        value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
        rp->time_unit = 1000000 / (1 << value);
 
-       pr_debug("Atom package %d energy=%duJ, time=%dus, power=%duW\n",
+       pr_debug("Atom package %d energy=%dpJ, time=%dus, power=%duW\n",
                rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
 
        return 0;
@@ -1017,6 +1034,13 @@ static const struct rapl_defaults rapl_defaults_core = {
        .compute_time_window = rapl_compute_time_window_core,
 };
 
+static const struct rapl_defaults rapl_defaults_hsw_server = {
+       .check_unit = rapl_check_unit_core,
+       .set_floor_freq = set_floor_freq_default,
+       .compute_time_window = rapl_compute_time_window_core,
+       .dram_domain_energy_unit = 15300,
+};
+
 static const struct rapl_defaults rapl_defaults_atom = {
        .check_unit = rapl_check_unit_atom,
        .set_floor_freq = set_floor_freq_atom,
@@ -1037,7 +1061,7 @@ static const struct x86_cpu_id rapl_ids[] = {
        RAPL_CPU(0x3a, rapl_defaults_core),/* Ivy Bridge */
        RAPL_CPU(0x3c, rapl_defaults_core),/* Haswell */
        RAPL_CPU(0x3d, rapl_defaults_core),/* Broadwell */
-       RAPL_CPU(0x3f, rapl_defaults_core),/* Haswell */
+       RAPL_CPU(0x3f, rapl_defaults_hsw_server),/* Haswell servers */
        RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */
        RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */
        RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */
index b899947..a4a8a6d 100644 (file)
@@ -1839,10 +1839,12 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
        }
 
        if (rdev->ena_pin) {
-               ret = regulator_ena_gpio_ctrl(rdev, true);
-               if (ret < 0)
-                       return ret;
-               rdev->ena_gpio_state = 1;
+               if (!rdev->ena_gpio_state) {
+                       ret = regulator_ena_gpio_ctrl(rdev, true);
+                       if (ret < 0)
+                               return ret;
+                       rdev->ena_gpio_state = 1;
+               }
        } else if (rdev->desc->ops->enable) {
                ret = rdev->desc->ops->enable(rdev);
                if (ret < 0)
@@ -1939,10 +1941,12 @@ static int _regulator_do_disable(struct regulator_dev *rdev)
        trace_regulator_disable(rdev_get_name(rdev));
 
        if (rdev->ena_pin) {
-               ret = regulator_ena_gpio_ctrl(rdev, false);
-               if (ret < 0)
-                       return ret;
-               rdev->ena_gpio_state = 0;
+               if (rdev->ena_gpio_state) {
+                       ret = regulator_ena_gpio_ctrl(rdev, false);
+                       if (ret < 0)
+                               return ret;
+                       rdev->ena_gpio_state = 0;
+               }
 
        } else if (rdev->desc->ops->disable) {
                ret = rdev->desc->ops->disable(rdev);
@@ -3444,13 +3448,6 @@ static umode_t regulator_attr_is_visible(struct kobject *kobj,
        if (attr == &dev_attr_requested_microamps.attr)
                return rdev->desc->type == REGULATOR_CURRENT ? mode : 0;
 
-       /* all the other attributes exist to support constraints;
-        * don't show them if there are no constraints, or if the
-        * relevant supporting methods are missing.
-        */
-       if (!rdev->constraints)
-               return 0;
-
        /* constraints need specific supporting methods */
        if (attr == &dev_attr_min_microvolts.attr ||
            attr == &dev_attr_max_microvolts.attr)
@@ -3633,12 +3630,6 @@ regulator_register(const struct regulator_desc *regulator_desc,
                                 config->ena_gpio, ret);
                        goto wash;
                }
-
-               if (config->ena_gpio_flags & GPIOF_OUT_INIT_HIGH)
-                       rdev->ena_gpio_state = 1;
-
-               if (config->ena_gpio_invert)
-                       rdev->ena_gpio_state = !rdev->ena_gpio_state;
        }
 
        /* set regulator constraints */
@@ -3807,9 +3798,11 @@ int regulator_suspend_finish(void)
        list_for_each_entry(rdev, &regulator_list, list) {
                mutex_lock(&rdev->mutex);
                if (rdev->use_count > 0  || rdev->constraints->always_on) {
-                       error = _regulator_do_enable(rdev);
-                       if (error)
-                               ret = error;
+                       if (!_regulator_is_enabled(rdev)) {
+                               error = _regulator_do_enable(rdev);
+                               if (error)
+                                       ret = error;
+                       }
                } else {
                        if (!have_full_constraints())
                                goto unlock;
index bc61001..f0489cb 100644 (file)
@@ -152,6 +152,15 @@ static int da9210_i2c_probe(struct i2c_client *i2c,
        config.regmap = chip->regmap;
        config.of_node = dev->of_node;
 
+       /* Mask all interrupt sources to deassert interrupt line */
+       error = regmap_write(chip->regmap, DA9210_REG_MASK_A, ~0);
+       if (!error)
+               error = regmap_write(chip->regmap, DA9210_REG_MASK_B, ~0);
+       if (error) {
+               dev_err(&i2c->dev, "Failed to write to mask reg: %d\n", error);
+               return error;
+       }
+
        rdev = devm_regulator_register(&i2c->dev, &da9210_reg, &config);
        if (IS_ERR(rdev)) {
                dev_err(&i2c->dev, "Failed to register DA9210 regulator\n");
index 9205f43..1819831 100644 (file)
@@ -1572,6 +1572,10 @@ static int palmas_regulators_probe(struct platform_device *pdev)
        if (!pmic)
                return -ENOMEM;
 
+       if (of_device_is_compatible(node, "ti,tps659038-pmic"))
+               palmas_generic_regs_info[PALMAS_REG_REGEN2].ctrl_addr =
+                                                       TPS659038_REGEN2_CTRL;
+
        pmic->dev = &pdev->dev;
        pmic->palmas = palmas;
        palmas->pmic = pmic;
index 1f93b75..3fd4435 100644 (file)
@@ -235,6 +235,7 @@ static const struct regulator_desc rk808_reg[] = {
                .vsel_mask = RK808_LDO_VSEL_MASK,
                .enable_reg = RK808_LDO_EN_REG,
                .enable_mask = BIT(0),
+               .enable_time = 400,
                .owner = THIS_MODULE,
        }, {
                .name = "LDO_REG2",
@@ -249,6 +250,7 @@ static const struct regulator_desc rk808_reg[] = {
                .vsel_mask = RK808_LDO_VSEL_MASK,
                .enable_reg = RK808_LDO_EN_REG,
                .enable_mask = BIT(1),
+               .enable_time = 400,
                .owner = THIS_MODULE,
        }, {
                .name = "LDO_REG3",
@@ -263,6 +265,7 @@ static const struct regulator_desc rk808_reg[] = {
                .vsel_mask = RK808_BUCK4_VSEL_MASK,
                .enable_reg = RK808_LDO_EN_REG,
                .enable_mask = BIT(2),
+               .enable_time = 400,
                .owner = THIS_MODULE,
        }, {
                .name = "LDO_REG4",
@@ -277,6 +280,7 @@ static const struct regulator_desc rk808_reg[] = {
                .vsel_mask = RK808_LDO_VSEL_MASK,
                .enable_reg = RK808_LDO_EN_REG,
                .enable_mask = BIT(3),
+               .enable_time = 400,
                .owner = THIS_MODULE,
        }, {
                .name = "LDO_REG5",
@@ -291,6 +295,7 @@ static const struct regulator_desc rk808_reg[] = {
                .vsel_mask = RK808_LDO_VSEL_MASK,
                .enable_reg = RK808_LDO_EN_REG,
                .enable_mask = BIT(4),
+               .enable_time = 400,
                .owner = THIS_MODULE,
        }, {
                .name = "LDO_REG6",
@@ -305,6 +310,7 @@ static const struct regulator_desc rk808_reg[] = {
                .vsel_mask = RK808_LDO_VSEL_MASK,
                .enable_reg = RK808_LDO_EN_REG,
                .enable_mask = BIT(5),
+               .enable_time = 400,
                .owner = THIS_MODULE,
        }, {
                .name = "LDO_REG7",
@@ -319,6 +325,7 @@ static const struct regulator_desc rk808_reg[] = {
                .vsel_mask = RK808_LDO_VSEL_MASK,
                .enable_reg = RK808_LDO_EN_REG,
                .enable_mask = BIT(6),
+               .enable_time = 400,
                .owner = THIS_MODULE,
        }, {
                .name = "LDO_REG8",
@@ -333,6 +340,7 @@ static const struct regulator_desc rk808_reg[] = {
                .vsel_mask = RK808_LDO_VSEL_MASK,
                .enable_reg = RK808_LDO_EN_REG,
                .enable_mask = BIT(7),
+               .enable_time = 400,
                .owner = THIS_MODULE,
        }, {
                .name = "SWITCH_REG1",
index e2cffe0..fb991ec 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/err.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/driver.h>
 #include <linux/regulator/machine.h>
index 92f6af6..73354ee 100644 (file)
@@ -951,6 +951,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
        void *bufs_va;
        int err = 0, i;
        size_t total_buf_space;
+       bool notify;
 
        vrp = kzalloc(sizeof(*vrp), GFP_KERNEL);
        if (!vrp)
@@ -1030,8 +1031,22 @@ static int rpmsg_probe(struct virtio_device *vdev)
                }
        }
 
+       /*
+        * Prepare to kick but don't notify yet - we can't do this before
+        * device is ready.
+        */
+       notify = virtqueue_kick_prepare(vrp->rvq);
+
+       /* From this point on, we can notify and get callbacks. */
+       virtio_device_ready(vdev);
+
        /* tell the remote processor it can start sending messages */
-       virtqueue_kick(vrp->rvq);
+       /*
+        * this might be concurrent with callbacks, but we are only
+        * doing notify, not a full kick here, so that's ok.
+        */
+       if (notify)
+               virtqueue_notify(vrp->rvq);
 
        dev_info(&vdev->dev, "rpmsg host is online\n");
 
index 472a5ad..c29ba7e 100644 (file)
@@ -55,7 +55,7 @@ static int rtc_suspend(struct device *dev)
        struct timespec64       delta, delta_delta;
        int err;
 
-       if (has_persistent_clock())
+       if (timekeeping_rtc_skipsuspend())
                return 0;
 
        if (strcmp(dev_name(&rtc->dev), CONFIG_RTC_HCTOSYS_DEVICE) != 0)
@@ -102,7 +102,7 @@ static int rtc_resume(struct device *dev)
        struct timespec64       sleep_time;
        int err;
 
-       if (has_persistent_clock())
+       if (timekeeping_rtc_skipresume())
                return 0;
 
        rtc_hctosys_ret = -ENODEV;
@@ -117,10 +117,6 @@ static int rtc_resume(struct device *dev)
                return 0;
        }
 
-       if (rtc_valid_tm(&tm) != 0) {
-               pr_debug("%s:  bogus resume time\n", dev_name(&rtc->dev));
-               return 0;
-       }
        new_rtc.tv_sec = rtc_tm_to_time64(&tm);
        new_rtc.tv_nsec = 0;
 
index 37215cf..d43ee40 100644 (file)
@@ -72,7 +72,11 @@ int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm)
                err = -ENODEV;
        else if (rtc->ops->set_time)
                err = rtc->ops->set_time(rtc->dev.parent, tm);
-       else if (rtc->ops->set_mmss) {
+       else if (rtc->ops->set_mmss64) {
+               time64_t secs64 = rtc_tm_to_time64(tm);
+
+               err = rtc->ops->set_mmss64(rtc->dev.parent, secs64);
+       } else if (rtc->ops->set_mmss) {
                time64_t secs64 = rtc_tm_to_time64(tm);
                err = rtc->ops->set_mmss(rtc->dev.parent, secs64);
        } else
@@ -96,6 +100,8 @@ int rtc_set_mmss(struct rtc_device *rtc, unsigned long secs)
 
        if (!rtc->ops)
                err = -ENODEV;
+       else if (rtc->ops->set_mmss64)
+               err = rtc->ops->set_mmss64(rtc->dev.parent, secs);
        else if (rtc->ops->set_mmss)
                err = rtc->ops->set_mmss(rtc->dev.parent, secs);
        else if (rtc->ops->read_time && rtc->ops->set_time) {
index 1d0340f..9b725c5 100644 (file)
 /*
  * RTC clock functions and device struct declaration
  */
-static int ab3100_rtc_set_mmss(struct device *dev, unsigned long secs)
+static int ab3100_rtc_set_mmss(struct device *dev, time64_t secs)
 {
        u8 regs[] = {AB3100_TI0, AB3100_TI1, AB3100_TI2,
                     AB3100_TI3, AB3100_TI4, AB3100_TI5};
        unsigned char buf[6];
-       u64 fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2;
+       u64 hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2;
        int err = 0;
        int i;
 
-       buf[0] = (fat_time) & 0xFF;
-       buf[1] = (fat_time >> 8) & 0xFF;
-       buf[2] = (fat_time >> 16) & 0xFF;
-       buf[3] = (fat_time >> 24) & 0xFF;
-       buf[4] = (fat_time >> 32) & 0xFF;
-       buf[5] = (fat_time >> 40) & 0xFF;
+       buf[0] = (hw_counter) & 0xFF;
+       buf[1] = (hw_counter >> 8) & 0xFF;
+       buf[2] = (hw_counter >> 16) & 0xFF;
+       buf[3] = (hw_counter >> 24) & 0xFF;
+       buf[4] = (hw_counter >> 32) & 0xFF;
+       buf[5] = (hw_counter >> 40) & 0xFF;
 
        for (i = 0; i < 6; i++) {
                err = abx500_set_register_interruptible(dev, 0,
@@ -75,7 +75,7 @@ static int ab3100_rtc_set_mmss(struct device *dev, unsigned long secs)
 
 static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-       unsigned long time;
+       time64_t time;
        u8 rtcval;
        int err;
 
@@ -88,7 +88,7 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
                dev_info(dev, "clock not set (lost power)");
                return -EINVAL;
        } else {
-               u64 fat_time;
+               u64 hw_counter;
                u8 buf[6];
 
                /* Read out time registers */
@@ -98,22 +98,21 @@ static int ab3100_rtc_read_time(struct device *dev, struct rtc_time *tm)
                if (err != 0)
                        return err;
 
-               fat_time = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) |
+               hw_counter = ((u64) buf[5] << 40) | ((u64) buf[4] << 32) |
                        ((u64) buf[3] << 24) | ((u64) buf[2] << 16) |
                        ((u64) buf[1] << 8) | (u64) buf[0];
-               time = (unsigned long) (fat_time /
-                                       (u64) (AB3100_RTC_CLOCK_RATE * 2));
+               time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2);
        }
 
-       rtc_time_to_tm(time, tm);
+       rtc_time64_to_tm(time, tm);
 
        return rtc_valid_tm(tm);
 }
 
 static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
-       unsigned long time;
-       u64 fat_time;
+       time64_t time;
+       u64 hw_counter;
        u8 buf[6];
        u8 rtcval;
        int err;
@@ -134,11 +133,11 @@ static int ab3100_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
                                                     AB3100_AL0, buf, 4);
        if (err)
                return err;
-       fat_time = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) |
+       hw_counter = ((u64) buf[3] << 40) | ((u64) buf[2] << 32) |
                ((u64) buf[1] << 24) | ((u64) buf[0] << 16);
-       time = (unsigned long) (fat_time / (u64) (AB3100_RTC_CLOCK_RATE * 2));
+       time = hw_counter / (u64) (AB3100_RTC_CLOCK_RATE * 2);
 
-       rtc_time_to_tm(time, &alarm->time);
+       rtc_time64_to_tm(time, &alarm->time);
 
        return rtc_valid_tm(&alarm->time);
 }
@@ -147,17 +146,17 @@ static int ab3100_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
        u8 regs[] = {AB3100_AL0, AB3100_AL1, AB3100_AL2, AB3100_AL3};
        unsigned char buf[4];
-       unsigned long secs;
-       u64 fat_time;
+       time64_t secs;
+       u64 hw_counter;
        int err;
        int i;
 
-       rtc_tm_to_time(&alarm->time, &secs);
-       fat_time = (u64) secs * AB3100_RTC_CLOCK_RATE * 2;
-       buf[0] = (fat_time >> 16) & 0xFF;
-       buf[1] = (fat_time >> 24) & 0xFF;
-       buf[2] = (fat_time >> 32) & 0xFF;
-       buf[3] = (fat_time >> 40) & 0xFF;
+       secs = rtc_tm_to_time64(&alarm->time);
+       hw_counter = secs * AB3100_RTC_CLOCK_RATE * 2;
+       buf[0] = (hw_counter >> 16) & 0xFF;
+       buf[1] = (hw_counter >> 24) & 0xFF;
+       buf[2] = (hw_counter >> 32) & 0xFF;
+       buf[3] = (hw_counter >> 40) & 0xFF;
 
        /* Set the alarm */
        for (i = 0; i < 4; i++) {
@@ -193,7 +192,7 @@ static int ab3100_rtc_irq_enable(struct device *dev, unsigned int enabled)
 
 static const struct rtc_class_ops ab3100_rtc_ops = {
        .read_time      = ab3100_rtc_read_time,
-       .set_mmss       = ab3100_rtc_set_mmss,
+       .set_mmss64     = ab3100_rtc_set_mmss,
        .read_alarm     = ab3100_rtc_read_alarm,
        .set_alarm      = ab3100_rtc_set_alarm,
        .alarm_irq_enable = ab3100_rtc_irq_enable,
index b4f7744..b283a1a 100644 (file)
@@ -324,7 +324,7 @@ static irqreturn_t at91_rtc_interrupt(int irq, void *dev_id)
 
                ret = IRQ_HANDLED;
        }
-       spin_lock(&suspended_lock);
+       spin_unlock(&suspended_lock);
 
        return ret;
 }
index 5bce904..32df1d8 100644 (file)
@@ -83,20 +83,19 @@ static int mc13xxx_rtc_read_time(struct device *dev, struct rtc_time *tm)
                        return ret;
        } while (days1 != days2);
 
-       rtc_time_to_tm(days1 * SEC_PER_DAY + seconds, tm);
+       rtc_time64_to_tm((time64_t)days1 * SEC_PER_DAY + seconds, tm);
 
        return rtc_valid_tm(tm);
 }
 
-static int mc13xxx_rtc_set_mmss(struct device *dev, unsigned long secs)
+static int mc13xxx_rtc_set_mmss(struct device *dev, time64_t secs)
 {
        struct mc13xxx_rtc *priv = dev_get_drvdata(dev);
        unsigned int seconds, days;
        unsigned int alarmseconds;
        int ret;
 
-       seconds = secs % SEC_PER_DAY;
-       days = secs / SEC_PER_DAY;
+       days = div_s64_rem(secs, SEC_PER_DAY, &seconds);
 
        mc13xxx_lock(priv->mc13xxx);
 
@@ -159,7 +158,7 @@ static int mc13xxx_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
        struct mc13xxx_rtc *priv = dev_get_drvdata(dev);
        unsigned seconds, days;
-       unsigned long s1970;
+       time64_t s1970;
        int enabled, pending;
        int ret;
 
@@ -189,10 +188,10 @@ out:
        alarm->enabled = enabled;
        alarm->pending = pending;
 
-       s1970 = days * SEC_PER_DAY + seconds;
+       s1970 = (time64_t)days * SEC_PER_DAY + seconds;
 
-       rtc_time_to_tm(s1970, &alarm->time);
-       dev_dbg(dev, "%s: %lu\n", __func__, s1970);
+       rtc_time64_to_tm(s1970, &alarm->time);
+       dev_dbg(dev, "%s: %lld\n", __func__, (long long)s1970);
 
        return 0;
 }
@@ -200,8 +199,8 @@ out:
 static int mc13xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
 {
        struct mc13xxx_rtc *priv = dev_get_drvdata(dev);
-       unsigned long s1970;
-       unsigned seconds, days;
+       time64_t s1970;
+       u32 seconds, days;
        int ret;
 
        mc13xxx_lock(priv->mc13xxx);
@@ -215,20 +214,17 @@ static int mc13xxx_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alarm)
        if (unlikely(ret))
                goto out;
 
-       ret = rtc_tm_to_time(&alarm->time, &s1970);
-       if (unlikely(ret))
-               goto out;
+       s1970 = rtc_tm_to_time64(&alarm->time);
 
-       dev_dbg(dev, "%s: o%2.s %lu\n", __func__, alarm->enabled ? "n" : "ff",
-                       s1970);
+       dev_dbg(dev, "%s: o%2.s %lld\n", __func__, alarm->enabled ? "n" : "ff",
+                       (long long)s1970);
 
        ret = mc13xxx_rtc_irq_enable_unlocked(dev, alarm->enabled,
                        MC13XXX_IRQ_TODA);
        if (unlikely(ret))
                goto out;
 
-       seconds = s1970 % SEC_PER_DAY;
-       days = s1970 / SEC_PER_DAY;
+       days = div_s64_rem(s1970, SEC_PER_DAY, &seconds);
 
        ret = mc13xxx_reg_write(priv->mc13xxx, MC13XXX_RTCDAYA, days);
        if (unlikely(ret))
@@ -268,7 +264,7 @@ static irqreturn_t mc13xxx_rtc_update_handler(int irq, void *dev)
 
 static const struct rtc_class_ops mc13xxx_rtc_ops = {
        .read_time = mc13xxx_rtc_read_time,
-       .set_mmss = mc13xxx_rtc_set_mmss,
+       .set_mmss64 = mc13xxx_rtc_set_mmss,
        .read_alarm = mc13xxx_rtc_read_alarm,
        .set_alarm = mc13xxx_rtc_set_alarm,
        .alarm_irq_enable = mc13xxx_rtc_alarm_irq_enable,
index e2436d1..3a6fd3a 100644 (file)
@@ -413,8 +413,8 @@ static void rtc_mrst_do_remove(struct device *dev)
        mrst->dev = NULL;
 }
 
-#ifdef CONFIG_PM
-static int mrst_suspend(struct device *dev, pm_message_t mesg)
+#ifdef CONFIG_PM_SLEEP
+static int mrst_suspend(struct device *dev)
 {
        struct mrst_rtc *mrst = dev_get_drvdata(dev);
        unsigned char   tmp;
@@ -453,7 +453,7 @@ static int mrst_suspend(struct device *dev, pm_message_t mesg)
  */
 static inline int mrst_poweroff(struct device *dev)
 {
-       return mrst_suspend(dev, PMSG_HIBERNATE);
+       return mrst_suspend(dev);
 }
 
 static int mrst_resume(struct device *dev)
@@ -490,9 +490,11 @@ static int mrst_resume(struct device *dev)
        return 0;
 }
 
+static SIMPLE_DEV_PM_OPS(mrst_pm_ops, mrst_suspend, mrst_resume);
+#define MRST_PM_OPS (&mrst_pm_ops)
+
 #else
-#define        mrst_suspend    NULL
-#define        mrst_resume     NULL
+#define MRST_PM_OPS NULL
 
 static inline int mrst_poweroff(struct device *dev)
 {
@@ -529,9 +531,8 @@ static struct platform_driver vrtc_mrst_platform_driver = {
        .remove         = vrtc_mrst_platform_remove,
        .shutdown       = vrtc_mrst_platform_shutdown,
        .driver = {
-               .name           = (char *) driver_name,
-               .suspend        = mrst_suspend,
-               .resume         = mrst_resume,
+               .name   = driver_name,
+               .pm     = MRST_PM_OPS,
        }
 };
 
index 3c3f8d1..09d422b 100644 (file)
@@ -106,7 +106,7 @@ static inline int is_imx1_rtc(struct rtc_plat_data *data)
  * This function is used to obtain the RTC time or the alarm value in
  * second.
  */
-static u32 get_alarm_or_time(struct device *dev, int time_alarm)
+static time64_t get_alarm_or_time(struct device *dev, int time_alarm)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
@@ -129,29 +129,28 @@ static u32 get_alarm_or_time(struct device *dev, int time_alarm)
        hr = hr_min >> 8;
        min = hr_min & 0xff;
 
-       return (((day * 24 + hr) * 60) + min) * 60 + sec;
+       return ((((time64_t)day * 24 + hr) * 60) + min) * 60 + sec;
 }
 
 /*
  * This function sets the RTC alarm value or the time value.
  */
-static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time)
+static void set_alarm_or_time(struct device *dev, int time_alarm, time64_t time)
 {
-       u32 day, hr, min, sec, temp;
+       u32 tod, day, hr, min, sec, temp;
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
        void __iomem *ioaddr = pdata->ioaddr;
 
-       day = time / 86400;
-       time -= day * 86400;
+       day = div_s64_rem(time, 86400, &tod);
 
        /* time is within a day now */
-       hr = time / 3600;
-       time -= hr * 3600;
+       hr = tod / 3600;
+       tod -= hr * 3600;
 
        /* time is within an hour now */
-       min = time / 60;
-       sec = time - min * 60;
+       min = tod / 60;
+       sec = tod - min * 60;
 
        temp = (hr << 8) + min;
 
@@ -173,29 +172,18 @@ static void set_alarm_or_time(struct device *dev, int time_alarm, u32 time)
  * This function updates the RTC alarm registers and then clears all the
  * interrupt status bits.
  */
-static int rtc_update_alarm(struct device *dev, struct rtc_time *alrm)
+static void rtc_update_alarm(struct device *dev, struct rtc_time *alrm)
 {
-       struct rtc_time alarm_tm, now_tm;
-       unsigned long now, time;
+       time64_t time;
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
        void __iomem *ioaddr = pdata->ioaddr;
 
-       now = get_alarm_or_time(dev, MXC_RTC_TIME);
-       rtc_time_to_tm(now, &now_tm);
-       alarm_tm.tm_year = now_tm.tm_year;
-       alarm_tm.tm_mon = now_tm.tm_mon;
-       alarm_tm.tm_mday = now_tm.tm_mday;
-       alarm_tm.tm_hour = alrm->tm_hour;
-       alarm_tm.tm_min = alrm->tm_min;
-       alarm_tm.tm_sec = alrm->tm_sec;
-       rtc_tm_to_time(&alarm_tm, &time);
+       time = rtc_tm_to_time64(alrm);
 
        /* clear all the interrupt status bits */
        writew(readw(ioaddr + RTC_RTCISR), ioaddr + RTC_RTCISR);
        set_alarm_or_time(dev, MXC_RTC_ALARM, time);
-
-       return 0;
 }
 
 static void mxc_rtc_irq_enable(struct device *dev, unsigned int bit,
@@ -283,14 +271,14 @@ static int mxc_rtc_alarm_irq_enable(struct device *dev, unsigned int enabled)
  */
 static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
-       u32 val;
+       time64_t val;
 
        /* Avoid roll-over from reading the different registers */
        do {
                val = get_alarm_or_time(dev, MXC_RTC_TIME);
        } while (val != get_alarm_or_time(dev, MXC_RTC_TIME));
 
-       rtc_time_to_tm(val, tm);
+       rtc_time64_to_tm(val, tm);
 
        return 0;
 }
@@ -298,7 +286,7 @@ static int mxc_rtc_read_time(struct device *dev, struct rtc_time *tm)
 /*
  * This function sets the internal RTC time based on tm in Gregorian date.
  */
-static int mxc_rtc_set_mmss(struct device *dev, unsigned long time)
+static int mxc_rtc_set_mmss(struct device *dev, time64_t time)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
@@ -309,9 +297,9 @@ static int mxc_rtc_set_mmss(struct device *dev, unsigned long time)
        if (is_imx1_rtc(pdata)) {
                struct rtc_time tm;
 
-               rtc_time_to_tm(time, &tm);
+               rtc_time64_to_tm(time, &tm);
                tm.tm_year = 70;
-               rtc_tm_to_time(&tm, &time);
+               time = rtc_tm_to_time64(&tm);
        }
 
        /* Avoid roll-over from reading the different registers */
@@ -333,7 +321,7 @@ static int mxc_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
        void __iomem *ioaddr = pdata->ioaddr;
 
-       rtc_time_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time);
+       rtc_time64_to_tm(get_alarm_or_time(dev, MXC_RTC_ALARM), &alrm->time);
        alrm->pending = ((readw(ioaddr + RTC_RTCISR) & RTC_ALM_BIT)) ? 1 : 0;
 
        return 0;
@@ -346,11 +334,8 @@ static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
        struct platform_device *pdev = to_platform_device(dev);
        struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
-       int ret;
 
-       ret = rtc_update_alarm(dev, &alrm->time);
-       if (ret)
-               return ret;
+       rtc_update_alarm(dev, &alrm->time);
 
        memcpy(&pdata->g_rtc_alarm, &alrm->time, sizeof(struct rtc_time));
        mxc_rtc_irq_enable(dev, RTC_ALM_BIT, alrm->enabled);
@@ -362,7 +347,7 @@ static int mxc_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 static struct rtc_class_ops mxc_rtc_ops = {
        .release                = mxc_rtc_release,
        .read_time              = mxc_rtc_read_time,
-       .set_mmss               = mxc_rtc_set_mmss,
+       .set_mmss64             = mxc_rtc_set_mmss,
        .read_alarm             = mxc_rtc_read_alarm,
        .set_alarm              = mxc_rtc_set_alarm,
        .alarm_irq_enable       = mxc_rtc_alarm_irq_enable,
index 4241eea..f4cf685 100644 (file)
@@ -849,6 +849,7 @@ static struct s3c_rtc_data const s3c2443_rtc_data = {
 
 static struct s3c_rtc_data const s3c6410_rtc_data = {
        .max_user_freq          = 32768,
+       .needs_src_clk          = true,
        .irq_handler            = s3c6410_rtc_irq,
        .set_freq               = s3c6410_rtc_setfreq,
        .enable_tick            = s3c6410_rtc_enable_tick,
index 8f86fa9..3a2da4c 100644 (file)
 #include <linux/rtc.h>
 #include <linux/platform_device.h>
 
+static int test_mmss64;
+module_param(test_mmss64, int, 0644);
+MODULE_PARM_DESC(test_mmss64, "Test struct rtc_class_ops.set_mmss64().");
+
 static struct platform_device *test0 = NULL, *test1 = NULL;
 
 static int test_rtc_read_alarm(struct device *dev,
@@ -30,7 +34,13 @@ static int test_rtc_set_alarm(struct device *dev,
 static int test_rtc_read_time(struct device *dev,
        struct rtc_time *tm)
 {
-       rtc_time_to_tm(get_seconds(), tm);
+       rtc_time64_to_tm(ktime_get_real_seconds(), tm);
+       return 0;
+}
+
+static int test_rtc_set_mmss64(struct device *dev, time64_t secs)
+{
+       dev_info(dev, "%s, secs = %lld\n", __func__, (long long)secs);
        return 0;
 }
 
@@ -55,7 +65,7 @@ static int test_rtc_alarm_irq_enable(struct device *dev, unsigned int enable)
        return 0;
 }
 
-static const struct rtc_class_ops test_rtc_ops = {
+static struct rtc_class_ops test_rtc_ops = {
        .proc = test_rtc_proc,
        .read_time = test_rtc_read_time,
        .read_alarm = test_rtc_read_alarm,
@@ -101,6 +111,11 @@ static int test_probe(struct platform_device *plat_dev)
        int err;
        struct rtc_device *rtc;
 
+       if (test_mmss64) {
+               test_rtc_ops.set_mmss64 = test_rtc_set_mmss64;
+               test_rtc_ops.set_mmss = NULL;
+       }
+
        rtc = devm_rtc_device_register(&plat_dev->dev, "test",
                                &test_rtc_ops, THIS_MODULE);
        if (IS_ERR(rtc)) {
index eb71872..7728d5e 100644 (file)
@@ -11,7 +11,7 @@
  * rtc_set_ntp_time - Save NTP synchronized time to the RTC
  * @now: Current time of day
  *
- * Replacement for the NTP platform function update_persistent_clock
+ * Replacement for the NTP platform function update_persistent_clock64
  * that stores time for later retrieval by rtc_hctosys.
  *
  * Returns 0 on successful RTC update, -ENODEV if a RTC update is not
@@ -35,7 +35,10 @@ int rtc_set_ntp_time(struct timespec64 now)
        if (rtc) {
                /* rtc_hctosys exclusively uses UTC, so we call set_time here,
                 * not set_mmss. */
-               if (rtc->ops && (rtc->ops->set_time || rtc->ops->set_mmss))
+               if (rtc->ops &&
+                   (rtc->ops->set_time ||
+                    rtc->ops->set_mmss64 ||
+                    rtc->ops->set_mmss))
                        err = rtc_set_time(rtc, &tm);
                rtc_class_close(rtc);
        }
index 96128cb..da21281 100644 (file)
@@ -547,7 +547,7 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char
         * parse input
         */
        num_of_segments = 0;
-       for (i = 0; ((buf[i] != '\0') && (buf[i] != '\n') && i < count); i++) {
+       for (i = 0; (i < count && (buf[i] != '\0') && (buf[i] != '\n')); i++) {
                for (j = i; (buf[j] != ':') &&
                        (buf[j] != '\0') &&
                        (buf[j] != '\n') &&
index 09db452..7497ddd 100644 (file)
@@ -92,7 +92,7 @@ bool scm_reserve_cluster(struct scm_request *scmrq)
                        add = 0;
                        continue;
                }
-               for (pos = 0; pos <= iter->aob->request.msb_count; pos++) {
+               for (pos = 0; pos < iter->aob->request.msb_count; pos++) {
                        if (clusters_intersect(req, iter->request[pos]) &&
                            (rq_data_dir(req) == WRITE ||
                             rq_data_dir(iter->request[pos]) == WRITE)) {
index a7cc618..923a2b5 100644 (file)
@@ -5734,9 +5734,9 @@ free_port:
 hba_free:
        if (phba->msix_enabled)
                pci_disable_msix(phba->pcidev);
-       iscsi_host_remove(phba->shost);
        pci_dev_put(phba->pcidev);
        iscsi_host_free(phba->shost);
+       pci_set_drvdata(pcidev, NULL);
 disable_pci:
        pci_disable_device(pcidev);
        return ret;
index 9219953..d9afc51 100644 (file)
@@ -6815,7 +6815,8 @@ static struct ata_port_operations ipr_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-       .flags          = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
+       .flags          = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA |
+                         ATA_FLAG_SAS_HOST,
        .pio_mask       = ATA_PIO4_ONLY,
        .mwdma_mask     = ATA_MWDMA2,
        .udma_mask      = ATA_UDMA6,
index 932d9cc..9c706d8 100644 (file)
@@ -547,7 +547,8 @@ static struct ata_port_operations sas_sata_ops = {
 };
 
 static struct ata_port_info sata_port_info = {
-       .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ,
+       .flags = ATA_FLAG_SATA | ATA_FLAG_PIO_DMA | ATA_FLAG_NCQ |
+                ATA_FLAG_SAS_HOST,
        .pio_mask = ATA_PIO4,
        .mwdma_mask = ATA_MWDMA2,
        .udma_mask = ATA_UDMA6,
index 62b58d3..60de662 100644 (file)
@@ -500,6 +500,7 @@ static void sas_revalidate_domain(struct work_struct *work)
        struct sas_discovery_event *ev = to_sas_discovery_event(work);
        struct asd_sas_port *port = ev->port;
        struct sas_ha_struct *ha = port->ha;
+       struct domain_device *ddev = port->port_dev;
 
        /* prevent revalidation from finding sata links in recovery */
        mutex_lock(&ha->disco_mutex);
@@ -514,8 +515,9 @@ static void sas_revalidate_domain(struct work_struct *work)
        SAS_DPRINTK("REVALIDATING DOMAIN on port %d, pid:%d\n", port->id,
                    task_pid_nr(current));
 
-       if (port->port_dev)
-               res = sas_ex_revalidate_domain(port->port_dev);
+       if (ddev && (ddev->dev_type == SAS_FANOUT_EXPANDER_DEVICE ||
+                    ddev->dev_type == SAS_EDGE_EXPANDER_DEVICE))
+               res = sas_ex_revalidate_domain(ddev);
 
        SAS_DPRINTK("done REVALIDATING DOMAIN on port %d, pid:%d, res 0x%x\n",
                    port->id, task_pid_nr(current), res);
index 99f43b7..ab4879e 100644 (file)
@@ -1596,7 +1596,7 @@ static int tcm_qla2xxx_check_initiator_node_acl(
        /*
         * Finally register the new FC Nexus with TCM
         */
-       __transport_register_session(se_nacl->se_tpg, se_nacl, se_sess, sess);
+       transport_register_session(se_nacl->se_tpg, se_nacl, se_sess, sess);
 
        return 0;
 }
index 54d7a6c..b1a2631 100644 (file)
@@ -1311,9 +1311,11 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
                                    "rejecting I/O to dead device\n");
                        ret = BLKPREP_KILL;
                        break;
-               case SDEV_QUIESCE:
                case SDEV_BLOCK:
                case SDEV_CREATED_BLOCK:
+                       ret = BLKPREP_DEFER;
+                       break;
+               case SDEV_QUIESCE:
                        /*
                         * If the devices is blocked we defer normal commands.
                         */
index 9af7841..06de340 100644 (file)
@@ -764,17 +764,17 @@ static void atmel_spi_pdc_next_xfer(struct spi_master *master,
                        (unsigned long long)xfer->rx_dma);
        }
 
-       /* REVISIT: We're waiting for ENDRX before we start the next
+       /* REVISIT: We're waiting for RXBUFF before we start the next
         * transfer because we need to handle some difficult timing
-        * issues otherwise. If we wait for ENDTX in one transfer and
-        * then starts waiting for ENDRX in the next, it's difficult
-        * to tell the difference between the ENDRX interrupt we're
-        * actually waiting for and the ENDRX interrupt of the
+        * issues otherwise. If we wait for TXBUFE in one transfer and
+        * then starts waiting for RXBUFF in the next, it's difficult
+        * to tell the difference between the RXBUFF interrupt we're
+        * actually waiting for and the RXBUFF interrupt of the
         * previous transfer.
         *
         * It should be doable, though. Just not now...
         */
-       spi_writel(as, IER, SPI_BIT(ENDRX) | SPI_BIT(OVRES));
+       spi_writel(as, IER, SPI_BIT(RXBUFF) | SPI_BIT(OVRES));
        spi_writel(as, PTCR, SPI_BIT(TXTEN) | SPI_BIT(RXTEN));
 }
 
index a0197fd..4f8c798 100644 (file)
@@ -108,7 +108,8 @@ static void dw_spi_dma_tx_done(void *arg)
 {
        struct dw_spi *dws = arg;
 
-       if (test_and_clear_bit(TX_BUSY, &dws->dma_chan_busy) & BIT(RX_BUSY))
+       clear_bit(TX_BUSY, &dws->dma_chan_busy);
+       if (test_bit(RX_BUSY, &dws->dma_chan_busy))
                return;
        dw_spi_xfer_done(dws);
 }
@@ -139,6 +140,9 @@ static struct dma_async_tx_descriptor *dw_spi_dma_prepare_tx(struct dw_spi *dws)
                                1,
                                DMA_MEM_TO_DEV,
                                DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       if (!txdesc)
+               return NULL;
+
        txdesc->callback = dw_spi_dma_tx_done;
        txdesc->callback_param = dws;
 
@@ -153,7 +157,8 @@ static void dw_spi_dma_rx_done(void *arg)
 {
        struct dw_spi *dws = arg;
 
-       if (test_and_clear_bit(RX_BUSY, &dws->dma_chan_busy) & BIT(TX_BUSY))
+       clear_bit(RX_BUSY, &dws->dma_chan_busy);
+       if (test_bit(TX_BUSY, &dws->dma_chan_busy))
                return;
        dw_spi_xfer_done(dws);
 }
@@ -184,6 +189,9 @@ static struct dma_async_tx_descriptor *dw_spi_dma_prepare_rx(struct dw_spi *dws)
                                1,
                                DMA_DEV_TO_MEM,
                                DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+       if (!rxdesc)
+               return NULL;
+
        rxdesc->callback = dw_spi_dma_rx_done;
        rxdesc->callback_param = dws;
 
index 5ba3310..6d331e0 100644 (file)
@@ -36,13 +36,13 @@ struct spi_pci_desc {
 
 static struct spi_pci_desc spi_pci_mid_desc_1 = {
        .setup = dw_spi_mid_init,
-       .num_cs = 32,
+       .num_cs = 5,
        .bus_num = 0,
 };
 
 static struct spi_pci_desc spi_pci_mid_desc_2 = {
        .setup = dw_spi_mid_init,
-       .num_cs = 4,
+       .num_cs = 2,
        .bus_num = 1,
 };
 
index 5a97a62..4847afb 100644 (file)
@@ -621,14 +621,14 @@ static void spi_hw_init(struct device *dev, struct dw_spi *dws)
        if (!dws->fifo_len) {
                u32 fifo;
 
-               for (fifo = 2; fifo <= 256; fifo++) {
+               for (fifo = 1; fifo < 256; fifo++) {
                        dw_writew(dws, DW_SPI_TXFLTR, fifo);
                        if (fifo != dw_readw(dws, DW_SPI_TXFLTR))
                                break;
                }
                dw_writew(dws, DW_SPI_TXFLTR, 0);
 
-               dws->fifo_len = (fifo == 2) ? 0 : fifo - 1;
+               dws->fifo_len = (fifo == 1) ? 0 : fifo;
                dev_dbg(dev, "Detected FIFO size: %u bytes\n", dws->fifo_len);
        }
 }
index c01567d..e649bc7 100644 (file)
@@ -459,6 +459,13 @@ static int img_spfi_transfer_one(struct spi_master *master,
        unsigned long flags;
        int ret;
 
+       if (xfer->len > SPFI_TRANSACTION_TSIZE_MASK) {
+               dev_err(spfi->dev,
+                       "Transfer length (%d) is greater than the max supported (%d)",
+                       xfer->len, SPFI_TRANSACTION_TSIZE_MASK);
+               return -EINVAL;
+       }
+
        /*
         * Stop all DMA and reset the controller if the previous transaction
         * timed-out and never completed it's DMA.
index 89ca162..ee513a8 100644 (file)
@@ -534,12 +534,12 @@ static void giveback(struct pl022 *pl022)
        pl022->cur_msg = NULL;
        pl022->cur_transfer = NULL;
        pl022->cur_chip = NULL;
-       spi_finalize_current_message(pl022->master);
 
        /* disable the SPI/SSP operation */
        writew((readw(SSP_CR1(pl022->virtbase)) &
                (~SSP_CR1_MASK_SSE)), SSP_CR1(pl022->virtbase));
 
+       spi_finalize_current_message(pl022->master);
 }
 
 /**
index ff9cdbd..2b2c359 100644 (file)
@@ -498,7 +498,7 @@ static int spi_qup_probe(struct platform_device *pdev)
        struct resource *res;
        struct device *dev;
        void __iomem *base;
-       u32 max_freq, iomode;
+       u32 max_freq, iomode, num_cs;
        int ret, irq, size;
 
        dev = &pdev->dev;
@@ -550,10 +550,11 @@ static int spi_qup_probe(struct platform_device *pdev)
        }
 
        /* use num-cs unless not present or out of range */
-       if (of_property_read_u16(dev->of_node, "num-cs",
-                       &master->num_chipselect) ||
-                       (master->num_chipselect > SPI_NUM_CHIPSELECTS))
+       if (of_property_read_u32(dev->of_node, "num-cs", &num_cs) ||
+           num_cs > SPI_NUM_CHIPSELECTS)
                master->num_chipselect = SPI_NUM_CHIPSELECTS;
+       else
+               master->num_chipselect = num_cs;
 
        master->bus_num = pdev->id;
        master->mode_bits = SPI_CPOL | SPI_CPHA | SPI_CS_HIGH | SPI_LOOP;
index 884a716..5c06168 100644 (file)
@@ -101,6 +101,7 @@ struct ti_qspi {
 #define QSPI_FLEN(n)                   ((n - 1) << 0)
 
 /* STATUS REGISTER */
+#define BUSY                           0x01
 #define WC                             0x02
 
 /* INTERRUPT REGISTER */
@@ -199,6 +200,21 @@ static void ti_qspi_restore_ctx(struct ti_qspi *qspi)
        ti_qspi_write(qspi, ctx_reg->clkctrl, QSPI_SPI_CLOCK_CNTRL_REG);
 }
 
+static inline u32 qspi_is_busy(struct ti_qspi *qspi)
+{
+       u32 stat;
+       unsigned long timeout = jiffies + QSPI_COMPLETION_TIMEOUT;
+
+       stat = ti_qspi_read(qspi, QSPI_SPI_STATUS_REG);
+       while ((stat & BUSY) && time_after(timeout, jiffies)) {
+               cpu_relax();
+               stat = ti_qspi_read(qspi, QSPI_SPI_STATUS_REG);
+       }
+
+       WARN(stat & BUSY, "qspi busy\n");
+       return stat & BUSY;
+}
+
 static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 {
        int wlen, count;
@@ -211,6 +227,9 @@ static int qspi_write_msg(struct ti_qspi *qspi, struct spi_transfer *t)
        wlen = t->bits_per_word >> 3;   /* in bytes */
 
        while (count) {
+               if (qspi_is_busy(qspi))
+                       return -EBUSY;
+
                switch (wlen) {
                case 1:
                        dev_dbg(qspi->dev, "tx cmd %08x dc %08x data %02x\n",
@@ -266,6 +285,9 @@ static int qspi_read_msg(struct ti_qspi *qspi, struct spi_transfer *t)
 
        while (count) {
                dev_dbg(qspi->dev, "rx cmd %08x dc %08x\n", cmd, qspi->dc);
+               if (qspi_is_busy(qspi))
+                       return -EBUSY;
+
                ti_qspi_write(qspi, cmd, QSPI_SPI_CMD_REG);
                if (!wait_for_completion_timeout(&qspi->transfer_complete,
                                                 QSPI_COMPLETION_TIMEOUT)) {
index c64a3e5..57a1950 100644 (file)
@@ -1105,13 +1105,14 @@ void spi_finalize_current_message(struct spi_master *master)
                                "failed to unprepare message: %d\n", ret);
                }
        }
+
+       trace_spi_message_done(mesg);
+
        master->cur_msg_prepared = false;
 
        mesg->state = NULL;
        if (mesg->complete)
                mesg->complete(mesg->context);
-
-       trace_spi_message_done(mesg);
 }
 EXPORT_SYMBOL_GPL(spi_finalize_current_message);
 
index 9800c01..3f72451 100644 (file)
@@ -426,7 +426,6 @@ static int pci171x_ai_insn_read(struct comedi_device *dev,
                                unsigned int *data)
 {
        struct pci1710_private *devpriv = dev->private;
-       unsigned int chan = CR_CHAN(insn->chanspec);
        int ret = 0;
        int i;
 
@@ -447,7 +446,7 @@ static int pci171x_ai_insn_read(struct comedi_device *dev,
                if (ret)
                        break;
 
-               ret = pci171x_ai_read_sample(dev, s, chan, &val);
+               ret = pci171x_ai_read_sample(dev, s, 0, &val);
                if (ret)
                        break;
 
index dbdea71..e856f01 100644 (file)
@@ -91,9 +91,10 @@ unsigned int comedi_isadma_disable_on_sample(unsigned int dma_chan,
                        stalled++;
                        if (stalled > 10)
                                break;
+               } else {
+                       residue = new_residue;
+                       stalled = 0;
                }
-               residue = new_residue;
-               stalled = 0;
        }
        return residue;
 }
index e371183..a090668 100644 (file)
@@ -103,11 +103,6 @@ enum vmk80xx_model {
        VMK8061_MODEL
 };
 
-struct firmware_version {
-       unsigned char ic3_vers[32];     /* USB-Controller */
-       unsigned char ic6_vers[32];     /* CPU */
-};
-
 static const struct comedi_lrange vmk8061_range = {
        2, {
                UNI_RANGE(5),
@@ -156,68 +151,12 @@ static const struct vmk80xx_board vmk80xx_boardinfo[] = {
 struct vmk80xx_private {
        struct usb_endpoint_descriptor *ep_rx;
        struct usb_endpoint_descriptor *ep_tx;
-       struct firmware_version fw;
        struct semaphore limit_sem;
        unsigned char *usb_rx_buf;
        unsigned char *usb_tx_buf;
        enum vmk80xx_model model;
 };
 
-static int vmk80xx_check_data_link(struct comedi_device *dev)
-{
-       struct vmk80xx_private *devpriv = dev->private;
-       struct usb_device *usb = comedi_to_usb_dev(dev);
-       unsigned int tx_pipe;
-       unsigned int rx_pipe;
-       unsigned char tx[1];
-       unsigned char rx[2];
-
-       tx_pipe = usb_sndbulkpipe(usb, 0x01);
-       rx_pipe = usb_rcvbulkpipe(usb, 0x81);
-
-       tx[0] = VMK8061_CMD_RD_PWR_STAT;
-
-       /*
-        * Check that IC6 (PIC16F871) is powered and
-        * running and the data link between IC3 and
-        * IC6 is working properly
-        */
-       usb_bulk_msg(usb, tx_pipe, tx, 1, NULL, devpriv->ep_tx->bInterval);
-       usb_bulk_msg(usb, rx_pipe, rx, 2, NULL, HZ * 10);
-
-       return (int)rx[1];
-}
-
-static void vmk80xx_read_eeprom(struct comedi_device *dev, int flag)
-{
-       struct vmk80xx_private *devpriv = dev->private;
-       struct usb_device *usb = comedi_to_usb_dev(dev);
-       unsigned int tx_pipe;
-       unsigned int rx_pipe;
-       unsigned char tx[1];
-       unsigned char rx[64];
-       int cnt;
-
-       tx_pipe = usb_sndbulkpipe(usb, 0x01);
-       rx_pipe = usb_rcvbulkpipe(usb, 0x81);
-
-       tx[0] = VMK8061_CMD_RD_VERSION;
-
-       /*
-        * Read the firmware version info of IC3 and
-        * IC6 from the internal EEPROM of the IC
-        */
-       usb_bulk_msg(usb, tx_pipe, tx, 1, NULL, devpriv->ep_tx->bInterval);
-       usb_bulk_msg(usb, rx_pipe, rx, 64, &cnt, HZ * 10);
-
-       rx[cnt] = '\0';
-
-       if (flag & IC3_VERSION)
-               strncpy(devpriv->fw.ic3_vers, rx + 1, 24);
-       else                    /* IC6_VERSION */
-               strncpy(devpriv->fw.ic6_vers, rx + 25, 24);
-}
-
 static void vmk80xx_do_bulk_msg(struct comedi_device *dev)
 {
        struct vmk80xx_private *devpriv = dev->private;
@@ -878,16 +817,6 @@ static int vmk80xx_auto_attach(struct comedi_device *dev,
 
        usb_set_intfdata(intf, devpriv);
 
-       if (devpriv->model == VMK8061_MODEL) {
-               vmk80xx_read_eeprom(dev, IC3_VERSION);
-               dev_info(&intf->dev, "%s\n", devpriv->fw.ic3_vers);
-
-               if (vmk80xx_check_data_link(dev)) {
-                       vmk80xx_read_eeprom(dev, IC6_VERSION);
-                       dev_info(&intf->dev, "%s\n", devpriv->fw.ic6_vers);
-               }
-       }
-
        if (devpriv->model == VMK8055_MODEL)
                vmk80xx_reset_device(dev);
 
index 2418302..6d5b38d 100644 (file)
@@ -38,6 +38,7 @@ config IIO_SIMPLE_DUMMY_EVENTS
 config IIO_SIMPLE_DUMMY_BUFFER
        bool "Buffered capture support"
        select IIO_BUFFER
+       select IIO_TRIGGER
        select IIO_KFIFO_BUF
        help
          Add buffered data capture to the simple dummy driver.
index d9d6fad..8161743 100644 (file)
@@ -214,11 +214,17 @@ struct mxs_lradc {
        unsigned long           is_divided;
 
        /*
-        * Touchscreen LRADC channels receives a private slot in the CTRL4
-        * register, the slot #7. Therefore only 7 slots instead of 8 in the
-        * CTRL4 register can be mapped to LRADC channels when using the
-        * touchscreen.
-        *
+        * When the touchscreen is enabled, we give it two private virtual
+        * channels: #6 and #7. This means that only 6 virtual channels (instead
+        * of 8) will be available for buffered capture.
+        */
+#define TOUCHSCREEN_VCHANNEL1          7
+#define TOUCHSCREEN_VCHANNEL2          6
+#define BUFFER_VCHANS_LIMITED          0x3f
+#define BUFFER_VCHANS_ALL              0xff
+       u8                      buffer_vchans;
+
+       /*
         * Furthermore, certain LRADC channels are shared between touchscreen
         * and/or touch-buttons and generic LRADC block. Therefore when using
         * either of these, these channels are not available for the regular
@@ -342,6 +348,9 @@ struct mxs_lradc {
 #define        LRADC_CTRL4                             0x140
 #define        LRADC_CTRL4_LRADCSELECT_MASK(n)         (0xf << ((n) * 4))
 #define        LRADC_CTRL4_LRADCSELECT_OFFSET(n)       ((n) * 4)
+#define        LRADC_CTRL4_LRADCSELECT(n, x) \
+                               (((x) << LRADC_CTRL4_LRADCSELECT_OFFSET(n)) & \
+                               LRADC_CTRL4_LRADCSELECT_MASK(n))
 
 #define LRADC_RESOLUTION                       12
 #define LRADC_SINGLE_SAMPLE_MASK               ((1 << LRADC_RESOLUTION) - 1)
@@ -416,6 +425,14 @@ static bool mxs_lradc_check_touch_event(struct mxs_lradc *lradc)
                                        LRADC_STATUS_TOUCH_DETECT_RAW);
 }
 
+static void mxs_lradc_map_channel(struct mxs_lradc *lradc, unsigned vch,
+                                 unsigned ch)
+{
+       mxs_lradc_reg_clear(lradc, LRADC_CTRL4_LRADCSELECT_MASK(vch),
+                               LRADC_CTRL4);
+       mxs_lradc_reg_set(lradc, LRADC_CTRL4_LRADCSELECT(vch, ch), LRADC_CTRL4);
+}
+
 static void mxs_lradc_setup_ts_channel(struct mxs_lradc *lradc, unsigned ch)
 {
        /*
@@ -450,12 +467,8 @@ static void mxs_lradc_setup_ts_channel(struct mxs_lradc *lradc, unsigned ch)
                LRADC_DELAY_DELAY(lradc->over_sample_delay - 1),
                        LRADC_DELAY(3));
 
-       mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(2) |
-                       LRADC_CTRL1_LRADC_IRQ(3) | LRADC_CTRL1_LRADC_IRQ(4) |
-                       LRADC_CTRL1_LRADC_IRQ(5), LRADC_CTRL1);
+       mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(ch), LRADC_CTRL1);
 
-       /* wake us again, when the complete conversion is done */
-       mxs_lradc_reg_set(lradc, LRADC_CTRL1_LRADC_IRQ_EN(ch), LRADC_CTRL1);
        /*
         * after changing the touchscreen plates setting
         * the signals need some initial time to settle. Start the
@@ -509,12 +522,8 @@ static void mxs_lradc_setup_ts_pressure(struct mxs_lradc *lradc, unsigned ch1,
                LRADC_DELAY_DELAY(lradc->over_sample_delay - 1),
                                        LRADC_DELAY(3));
 
-       mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(2) |
-                       LRADC_CTRL1_LRADC_IRQ(3) | LRADC_CTRL1_LRADC_IRQ(4) |
-                       LRADC_CTRL1_LRADC_IRQ(5), LRADC_CTRL1);
+       mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(ch2), LRADC_CTRL1);
 
-       /* wake us again, when the conversions are done */
-       mxs_lradc_reg_set(lradc, LRADC_CTRL1_LRADC_IRQ_EN(ch2), LRADC_CTRL1);
        /*
         * after changing the touchscreen plates setting
         * the signals need some initial time to settle. Start the
@@ -580,36 +589,6 @@ static unsigned mxs_lradc_read_ts_pressure(struct mxs_lradc *lradc,
 #define TS_CH_XM 4
 #define TS_CH_YM 5
 
-static int mxs_lradc_read_ts_channel(struct mxs_lradc *lradc)
-{
-       u32 reg;
-       int val;
-
-       reg = readl(lradc->base + LRADC_CTRL1);
-
-       /* only channels 3 to 5 are of interest here */
-       if (reg & LRADC_CTRL1_LRADC_IRQ(TS_CH_YP)) {
-               mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ_EN(TS_CH_YP) |
-                       LRADC_CTRL1_LRADC_IRQ(TS_CH_YP), LRADC_CTRL1);
-               val = mxs_lradc_read_raw_channel(lradc, TS_CH_YP);
-       } else if (reg & LRADC_CTRL1_LRADC_IRQ(TS_CH_XM)) {
-               mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ_EN(TS_CH_XM) |
-                       LRADC_CTRL1_LRADC_IRQ(TS_CH_XM), LRADC_CTRL1);
-               val = mxs_lradc_read_raw_channel(lradc, TS_CH_XM);
-       } else if (reg & LRADC_CTRL1_LRADC_IRQ(TS_CH_YM)) {
-               mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ_EN(TS_CH_YM) |
-                       LRADC_CTRL1_LRADC_IRQ(TS_CH_YM), LRADC_CTRL1);
-               val = mxs_lradc_read_raw_channel(lradc, TS_CH_YM);
-       } else {
-               return -EIO;
-       }
-
-       mxs_lradc_reg_wrt(lradc, 0, LRADC_DELAY(2));
-       mxs_lradc_reg_wrt(lradc, 0, LRADC_DELAY(3));
-
-       return val;
-}
-
 /*
  * YP(open)--+-------------+
  *           |             |--+
@@ -653,7 +632,8 @@ static void mxs_lradc_prepare_x_pos(struct mxs_lradc *lradc)
        mxs_lradc_reg_set(lradc, mxs_lradc_drive_x_plate(lradc), LRADC_CTRL0);
 
        lradc->cur_plate = LRADC_SAMPLE_X;
-       mxs_lradc_setup_ts_channel(lradc, TS_CH_YP);
+       mxs_lradc_map_channel(lradc, TOUCHSCREEN_VCHANNEL1, TS_CH_YP);
+       mxs_lradc_setup_ts_channel(lradc, TOUCHSCREEN_VCHANNEL1);
 }
 
 /*
@@ -674,7 +654,8 @@ static void mxs_lradc_prepare_y_pos(struct mxs_lradc *lradc)
        mxs_lradc_reg_set(lradc, mxs_lradc_drive_y_plate(lradc), LRADC_CTRL0);
 
        lradc->cur_plate = LRADC_SAMPLE_Y;
-       mxs_lradc_setup_ts_channel(lradc, TS_CH_XM);
+       mxs_lradc_map_channel(lradc, TOUCHSCREEN_VCHANNEL1, TS_CH_XM);
+       mxs_lradc_setup_ts_channel(lradc, TOUCHSCREEN_VCHANNEL1);
 }
 
 /*
@@ -695,7 +676,10 @@ static void mxs_lradc_prepare_pressure(struct mxs_lradc *lradc)
        mxs_lradc_reg_set(lradc, mxs_lradc_drive_pressure(lradc), LRADC_CTRL0);
 
        lradc->cur_plate = LRADC_SAMPLE_PRESSURE;
-       mxs_lradc_setup_ts_pressure(lradc, TS_CH_XP, TS_CH_YM);
+       mxs_lradc_map_channel(lradc, TOUCHSCREEN_VCHANNEL1, TS_CH_YM);
+       mxs_lradc_map_channel(lradc, TOUCHSCREEN_VCHANNEL2, TS_CH_XP);
+       mxs_lradc_setup_ts_pressure(lradc, TOUCHSCREEN_VCHANNEL2,
+                                               TOUCHSCREEN_VCHANNEL1);
 }
 
 static void mxs_lradc_enable_touch_detection(struct mxs_lradc *lradc)
@@ -708,6 +692,19 @@ static void mxs_lradc_enable_touch_detection(struct mxs_lradc *lradc)
        mxs_lradc_reg_set(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ_EN, LRADC_CTRL1);
 }
 
+static void mxs_lradc_start_touch_event(struct mxs_lradc *lradc)
+{
+       mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ_EN,
+                               LRADC_CTRL1);
+       mxs_lradc_reg_set(lradc,
+               LRADC_CTRL1_LRADC_IRQ_EN(TOUCHSCREEN_VCHANNEL1), LRADC_CTRL1);
+       /*
+        * start with the Y-pos, because it uses nearly the same plate
+        * settings like the touch detection
+        */
+       mxs_lradc_prepare_y_pos(lradc);
+}
+
 static void mxs_lradc_report_ts_event(struct mxs_lradc *lradc)
 {
        input_report_abs(lradc->ts_input, ABS_X, lradc->ts_x_pos);
@@ -725,10 +722,12 @@ static void mxs_lradc_complete_touch_event(struct mxs_lradc *lradc)
         * start a dummy conversion to burn time to settle the signals
         * note: we are not interested in the conversion's value
         */
-       mxs_lradc_reg_wrt(lradc, 0, LRADC_CH(5));
-       mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ(5), LRADC_CTRL1);
-       mxs_lradc_reg_set(lradc, LRADC_CTRL1_LRADC_IRQ_EN(5), LRADC_CTRL1);
-       mxs_lradc_reg_wrt(lradc, LRADC_DELAY_TRIGGER(1 << 5) |
+       mxs_lradc_reg_wrt(lradc, 0, LRADC_CH(TOUCHSCREEN_VCHANNEL1));
+       mxs_lradc_reg_clear(lradc,
+               LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL1) |
+               LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL2), LRADC_CTRL1);
+       mxs_lradc_reg_wrt(lradc,
+               LRADC_DELAY_TRIGGER(1 << TOUCHSCREEN_VCHANNEL1) |
                LRADC_DELAY_KICK | LRADC_DELAY_DELAY(10), /* waste 5 ms */
                        LRADC_DELAY(2));
 }
@@ -760,59 +759,45 @@ static void mxs_lradc_finish_touch_event(struct mxs_lradc *lradc, bool valid)
 
        /* if it is released, wait for the next touch via IRQ */
        lradc->cur_plate = LRADC_TOUCH;
-       mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ, LRADC_CTRL1);
+       mxs_lradc_reg_wrt(lradc, 0, LRADC_DELAY(2));
+       mxs_lradc_reg_wrt(lradc, 0, LRADC_DELAY(3));
+       mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ |
+               LRADC_CTRL1_LRADC_IRQ_EN(TOUCHSCREEN_VCHANNEL1) |
+               LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL1), LRADC_CTRL1);
        mxs_lradc_reg_set(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ_EN, LRADC_CTRL1);
 }
 
 /* touchscreen's state machine */
 static void mxs_lradc_handle_touch(struct mxs_lradc *lradc)
 {
-       int val;
-
        switch (lradc->cur_plate) {
        case LRADC_TOUCH:
-               /*
-                * start with the Y-pos, because it uses nearly the same plate
-                * settings like the touch detection
-                */
-               if (mxs_lradc_check_touch_event(lradc)) {
-                       mxs_lradc_reg_clear(lradc,
-                                       LRADC_CTRL1_TOUCH_DETECT_IRQ_EN,
-                                       LRADC_CTRL1);
-                       mxs_lradc_prepare_y_pos(lradc);
-               }
+               if (mxs_lradc_check_touch_event(lradc))
+                       mxs_lradc_start_touch_event(lradc);
                mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ,
                                        LRADC_CTRL1);
                return;
 
        case LRADC_SAMPLE_Y:
-               val = mxs_lradc_read_ts_channel(lradc);
-               if (val < 0) {
-                       mxs_lradc_enable_touch_detection(lradc); /* re-start */
-                       return;
-               }
-               lradc->ts_y_pos = val;
+               lradc->ts_y_pos = mxs_lradc_read_raw_channel(lradc,
+                                                       TOUCHSCREEN_VCHANNEL1);
                mxs_lradc_prepare_x_pos(lradc);
                return;
 
        case LRADC_SAMPLE_X:
-               val = mxs_lradc_read_ts_channel(lradc);
-               if (val < 0) {
-                       mxs_lradc_enable_touch_detection(lradc); /* re-start */
-                       return;
-               }
-               lradc->ts_x_pos = val;
+               lradc->ts_x_pos = mxs_lradc_read_raw_channel(lradc,
+                                                       TOUCHSCREEN_VCHANNEL1);
                mxs_lradc_prepare_pressure(lradc);
                return;
 
        case LRADC_SAMPLE_PRESSURE:
-               lradc->ts_pressure =
-                       mxs_lradc_read_ts_pressure(lradc, TS_CH_XP, TS_CH_YM);
+               lradc->ts_pressure = mxs_lradc_read_ts_pressure(lradc,
+                                                       TOUCHSCREEN_VCHANNEL2,
+                                                       TOUCHSCREEN_VCHANNEL1);
                mxs_lradc_complete_touch_event(lradc);
                return;
 
        case LRADC_SAMPLE_VALID:
-               val = mxs_lradc_read_ts_channel(lradc); /* ignore the value */
                mxs_lradc_finish_touch_event(lradc, 1);
                break;
        }
@@ -844,9 +829,9 @@ static int mxs_lradc_read_single(struct iio_dev *iio_dev, int chan, int *val)
         * used if doing raw sampling.
         */
        if (lradc->soc == IMX28_LRADC)
-               mxs_lradc_reg_clear(lradc, LRADC_CTRL1_MX28_LRADC_IRQ_EN_MASK,
+               mxs_lradc_reg_clear(lradc, LRADC_CTRL1_LRADC_IRQ_EN(0),
                        LRADC_CTRL1);
-       mxs_lradc_reg_clear(lradc, 0xff, LRADC_CTRL0);
+       mxs_lradc_reg_clear(lradc, 0x1, LRADC_CTRL0);
 
        /* Enable / disable the divider per requirement */
        if (test_bit(chan, &lradc->is_divided))
@@ -1090,9 +1075,8 @@ static void mxs_lradc_disable_ts(struct mxs_lradc *lradc)
 {
        /* stop all interrupts from firing */
        mxs_lradc_reg_clear(lradc, LRADC_CTRL1_TOUCH_DETECT_IRQ_EN |
-               LRADC_CTRL1_LRADC_IRQ_EN(2) | LRADC_CTRL1_LRADC_IRQ_EN(3) |
-               LRADC_CTRL1_LRADC_IRQ_EN(4) | LRADC_CTRL1_LRADC_IRQ_EN(5),
-               LRADC_CTRL1);
+               LRADC_CTRL1_LRADC_IRQ_EN(TOUCHSCREEN_VCHANNEL1) |
+               LRADC_CTRL1_LRADC_IRQ_EN(TOUCHSCREEN_VCHANNEL2), LRADC_CTRL1);
 
        /* Power-down touchscreen touch-detect circuitry. */
        mxs_lradc_reg_clear(lradc, mxs_lradc_plate_mask(lradc), LRADC_CTRL0);
@@ -1158,26 +1142,31 @@ static irqreturn_t mxs_lradc_handle_irq(int irq, void *data)
        struct iio_dev *iio = data;
        struct mxs_lradc *lradc = iio_priv(iio);
        unsigned long reg = readl(lradc->base + LRADC_CTRL1);
+       uint32_t clr_irq = mxs_lradc_irq_mask(lradc);
        const uint32_t ts_irq_mask =
                LRADC_CTRL1_TOUCH_DETECT_IRQ |
-               LRADC_CTRL1_LRADC_IRQ(2) |
-               LRADC_CTRL1_LRADC_IRQ(3) |
-               LRADC_CTRL1_LRADC_IRQ(4) |
-               LRADC_CTRL1_LRADC_IRQ(5);
+               LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL1) |
+               LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL2);
 
        if (!(reg & mxs_lradc_irq_mask(lradc)))
                return IRQ_NONE;
 
-       if (lradc->use_touchscreen && (reg & ts_irq_mask))
+       if (lradc->use_touchscreen && (reg & ts_irq_mask)) {
                mxs_lradc_handle_touch(lradc);
 
-       if (iio_buffer_enabled(iio))
-               iio_trigger_poll(iio->trig);
-       else if (reg & LRADC_CTRL1_LRADC_IRQ(0))
+               /* Make sure we don't clear the next conversion's interrupt. */
+               clr_irq &= ~(LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL1) |
+                               LRADC_CTRL1_LRADC_IRQ(TOUCHSCREEN_VCHANNEL2));
+       }
+
+       if (iio_buffer_enabled(iio)) {
+               if (reg & lradc->buffer_vchans)
+                       iio_trigger_poll(iio->trig);
+       } else if (reg & LRADC_CTRL1_LRADC_IRQ(0)) {
                complete(&lradc->completion);
+       }
 
-       mxs_lradc_reg_clear(lradc, reg & mxs_lradc_irq_mask(lradc),
-                       LRADC_CTRL1);
+       mxs_lradc_reg_clear(lradc, reg & clr_irq, LRADC_CTRL1);
 
        return IRQ_HANDLED;
 }
@@ -1289,9 +1278,10 @@ static int mxs_lradc_buffer_preenable(struct iio_dev *iio)
        }
 
        if (lradc->soc == IMX28_LRADC)
-               mxs_lradc_reg_clear(lradc, LRADC_CTRL1_MX28_LRADC_IRQ_EN_MASK,
-                                                       LRADC_CTRL1);
-       mxs_lradc_reg_clear(lradc, 0xff, LRADC_CTRL0);
+               mxs_lradc_reg_clear(lradc,
+                       lradc->buffer_vchans << LRADC_CTRL1_LRADC_IRQ_EN_OFFSET,
+                       LRADC_CTRL1);
+       mxs_lradc_reg_clear(lradc, lradc->buffer_vchans, LRADC_CTRL0);
 
        for_each_set_bit(chan, iio->active_scan_mask, LRADC_MAX_TOTAL_CHANS) {
                ctrl4_set |= chan << LRADC_CTRL4_LRADCSELECT_OFFSET(ofs);
@@ -1324,10 +1314,11 @@ static int mxs_lradc_buffer_postdisable(struct iio_dev *iio)
        mxs_lradc_reg_clear(lradc, LRADC_DELAY_TRIGGER_LRADCS_MASK |
                                        LRADC_DELAY_KICK, LRADC_DELAY(0));
 
-       mxs_lradc_reg_clear(lradc, 0xff, LRADC_CTRL0);
+       mxs_lradc_reg_clear(lradc, lradc->buffer_vchans, LRADC_CTRL0);
        if (lradc->soc == IMX28_LRADC)
-               mxs_lradc_reg_clear(lradc, LRADC_CTRL1_MX28_LRADC_IRQ_EN_MASK,
-                                       LRADC_CTRL1);
+               mxs_lradc_reg_clear(lradc,
+                       lradc->buffer_vchans << LRADC_CTRL1_LRADC_IRQ_EN_OFFSET,
+                       LRADC_CTRL1);
 
        kfree(lradc->buffer);
        mutex_unlock(&lradc->lock);
@@ -1353,7 +1344,7 @@ static bool mxs_lradc_validate_scan_mask(struct iio_dev *iio,
        if (lradc->use_touchbutton)
                rsvd_chans++;
        if (lradc->use_touchscreen)
-               rsvd_chans++;
+               rsvd_chans += 2;
 
        /* Test for attempts to map channels with special mode of operation. */
        if (bitmap_intersects(mask, &rsvd_mask, LRADC_MAX_TOTAL_CHANS))
@@ -1413,6 +1404,13 @@ static const struct iio_chan_spec mxs_lradc_chan_spec[] = {
                .channel = 8,
                .scan_type = {.sign = 'u', .realbits = 18, .storagebits = 32,},
        },
+       /* Hidden channel to keep indexes */
+       {
+               .type = IIO_TEMP,
+               .indexed = 1,
+               .scan_index = -1,
+               .channel = 9,
+       },
        MXS_ADC_CHAN(10, IIO_VOLTAGE),  /* VDDIO */
        MXS_ADC_CHAN(11, IIO_VOLTAGE),  /* VTH */
        MXS_ADC_CHAN(12, IIO_VOLTAGE),  /* VDDA */
@@ -1583,6 +1581,11 @@ static int mxs_lradc_probe(struct platform_device *pdev)
 
        touch_ret = mxs_lradc_probe_touchscreen(lradc, node);
 
+       if (touch_ret == 0)
+               lradc->buffer_vchans = BUFFER_VCHANS_LIMITED;
+       else
+               lradc->buffer_vchans = BUFFER_VCHANS_ALL;
+
        /* Grab all IRQ sources */
        for (i = 0; i < of_cfg->irq_count; i++) {
                lradc->irq[i] = platform_get_irq(pdev, i);
index fd171d8..90cc18b 100644 (file)
@@ -592,6 +592,7 @@ int hmc5843_common_probe(struct device *dev, struct regmap *regmap,
        mutex_init(&data->lock);
 
        indio_dev->dev.parent = dev;
+       indio_dev->name = dev->driver->name;
        indio_dev->info = &hmc5843_info;
        indio_dev->modes = INDIO_DIRECT_MODE;
        indio_dev->channels = data->variant->channels;
index 017d2f8..c17893b 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/delay.h>
 #include <linux/gpio.h>
 #include <linux/module.h>
+#include <linux/bitops.h>
 
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
@@ -68,7 +69,7 @@ static int ad2s1200_read_raw(struct iio_dev *indio_dev,
                break;
        case IIO_ANGL_VEL:
                vel = (((s16)(st->rx[0])) << 4) | ((st->rx[1] & 0xF0) >> 4);
-               vel = (vel << 4) >> 4;
+               vel = sign_extend32(vel, 11);
                *val = vel;
                break;
        default:
index 4324282..03b2a90 100644 (file)
@@ -330,16 +330,6 @@ static void device_init_registers(struct vnt_private *pDevice)
        /* zonetype initial */
        pDevice->byOriginalZonetype = pDevice->abyEEPROM[EEP_OFS_ZONETYPE];
 
-       /* Get RFType */
-       pDevice->byRFType = SROMbyReadEmbedded(pDevice->PortOffset, EEP_OFS_RFTYPE);
-
-       /* force change RevID for VT3253 emu */
-       if ((pDevice->byRFType & RF_EMU) != 0)
-                       pDevice->byRevId = 0x80;
-
-       pDevice->byRFType &= RF_MASK;
-       pr_debug("pDevice->byRFType = %x\n", pDevice->byRFType);
-
        if (!pDevice->bZoneRegExist)
                pDevice->byZoneType = pDevice->abyEEPROM[EEP_OFS_ZONETYPE];
 
@@ -1187,12 +1177,14 @@ static int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb)
 {
        struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
        PSTxDesc head_td;
-       u32 dma_idx = TYPE_AC0DMA;
+       u32 dma_idx;
        unsigned long flags;
 
        spin_lock_irqsave(&priv->lock, flags);
 
-       if (!ieee80211_is_data(hdr->frame_control))
+       if (ieee80211_is_data(hdr->frame_control))
+               dma_idx = TYPE_AC0DMA;
+       else
                dma_idx = TYPE_TXDMA0;
 
        if (AVAIL_TD(priv, dma_idx) < 1) {
@@ -1206,6 +1198,9 @@ static int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb)
 
        head_td->pTDInfo->skb = skb;
 
+       if (dma_idx == TYPE_AC0DMA)
+               head_td->pTDInfo->byFlags = TD_FLAGS_NETIF_SKB;
+
        priv->iTDUsed[dma_idx]++;
 
        /* Take ownership */
@@ -1234,13 +1229,10 @@ static int vnt_tx_packet(struct vnt_private *priv, struct sk_buff *skb)
 
        head_td->buff_addr = cpu_to_le32(head_td->pTDInfo->skb_dma);
 
-       if (dma_idx == TYPE_AC0DMA) {
-               head_td->pTDInfo->byFlags = TD_FLAGS_NETIF_SKB;
-
+       if (head_td->pTDInfo->byFlags & TD_FLAGS_NETIF_SKB)
                MACvTransmitAC0(priv->PortOffset);
-       } else {
+       else
                MACvTransmit0(priv->PortOffset);
-       }
 
        spin_unlock_irqrestore(&priv->lock, flags);
 
@@ -1778,6 +1770,12 @@ vt6655_probe(struct pci_dev *pcid, const struct pci_device_id *ent)
        MACvInitialize(priv->PortOffset);
        MACvReadEtherAddress(priv->PortOffset, priv->abyCurrentNetAddr);
 
+       /* Get RFType */
+       priv->byRFType = SROMbyReadEmbedded(priv->PortOffset, EEP_OFS_RFTYPE);
+       priv->byRFType &= RF_MASK;
+
+       dev_dbg(&pcid->dev, "RF Type = %x\n", priv->byRFType);
+
        device_get_options(priv);
        device_set_options(priv);
        /* Mask out the options cannot be set to the chip */
index 941b2ad..7626f63 100644 (file)
@@ -794,6 +794,7 @@ bool RFbSetPower(
                break;
        case RATE_6M:
        case RATE_9M:
+       case RATE_12M:
        case RATE_18M:
                byPwr = priv->abyOFDMPwrTbl[uCH];
                if (priv->byRFType == RF_UW2452)
index c42cde5..c4286cc 100644 (file)
@@ -640,6 +640,7 @@ int vnt_rf_setpower(struct vnt_private *priv, u32 rate, u32 channel)
                break;
        case RATE_6M:
        case RATE_9M:
+       case RATE_12M:
        case RATE_18M:
        case RATE_24M:
        case RATE_36M:
index 50bad55..77d6425 100644 (file)
@@ -1181,7 +1181,7 @@ iscsit_handle_scsi_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
         * traditional iSCSI block I/O.
         */
        if (iscsit_allocate_iovecs(cmd) < 0) {
-               return iscsit_add_reject_cmd(cmd,
+               return iscsit_reject_cmd(cmd,
                                ISCSI_REASON_BOOKMARK_NO_RESOURCES, buf);
        }
        immed_data = cmd->immediate_data;
@@ -3468,6 +3468,7 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
                                                tpg_np_list) {
                                struct iscsi_np *np = tpg_np->tpg_np;
                                bool inaddr_any = iscsit_check_inaddr_any(np);
+                               char *fmt_str;
 
                                if (np->np_network_transport != network_transport)
                                        continue;
@@ -3495,8 +3496,12 @@ iscsit_build_sendtargets_response(struct iscsi_cmd *cmd,
                                        }
                                }
 
-                               len = sprintf(buf, "TargetAddress="
-                                       "%s:%hu,%hu",
+                               if (np->np_sockaddr.ss_family == AF_INET6)
+                                       fmt_str = "TargetAddress=[%s]:%hu,%hu";
+                               else
+                                       fmt_str = "TargetAddress=%s:%hu,%hu";
+
+                               len = sprintf(buf, fmt_str,
                                        inaddr_any ? conn->local_ip : np->np_ip,
                                        np->np_port,
                                        tpg->tpgt);
@@ -4256,11 +4261,17 @@ int iscsit_close_connection(
        pr_debug("Closing iSCSI connection CID %hu on SID:"
                " %u\n", conn->cid, sess->sid);
        /*
-        * Always up conn_logout_comp just in case the RX Thread is sleeping
-        * and the logout response never got sent because the connection
-        * failed.
+        * Always up conn_logout_comp for the traditional TCP case just in case
+        * the RX Thread in iscsi_target_rx_opcode() is sleeping and the logout
+        * response never got sent because the connection failed.
+        *
+        * However for iser-target, isert_wait4logout() is using conn_logout_comp
+        * to signal logout response TX interrupt completion.  Go ahead and skip
+        * this for iser since isert_rx_opcode() does not wait on logout failure,
+        * and to avoid iscsi_conn pointer dereference in iser-target code.
         */
-       complete(&conn->conn_logout_comp);
+       if (conn->conn_transport->transport_type == ISCSI_TCP)
+               complete(&conn->conn_logout_comp);
 
        iscsi_release_thread_set(conn);
 
index 1c197ba..bdd8731 100644 (file)
@@ -22,7 +22,6 @@
 #include <target/target_core_fabric.h>
 
 #include <target/iscsi/iscsi_target_core.h>
-#include <target/iscsi/iscsi_transport.h>
 #include "iscsi_target_seq_pdu_list.h"
 #include "iscsi_target_tq.h"
 #include "iscsi_target_erl0.h"
@@ -940,8 +939,7 @@ void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn)
 
        if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) {
                spin_unlock_bh(&conn->state_lock);
-               if (conn->conn_transport->transport_type == ISCSI_TCP)
-                       iscsit_close_connection(conn);
+               iscsit_close_connection(conn);
                return;
        }
 
index 6b3c329..c36bd7c 100644 (file)
@@ -953,11 +953,8 @@ static int tcm_loop_make_nexus(
                transport_free_session(tl_nexus->se_sess);
                goto out;
        }
-       /*
-        * Now, register the SAS I_T Nexus as active with the call to
-        * transport_register_session()
-        */
-       __transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl,
+       /* Now, register the SAS I_T Nexus as active. */
+       transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl,
                        tl_nexus->se_sess, tl_nexus);
        tl_tpg->tl_nexus = tl_nexus;
        pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated"
index 58f49ff..7faa6ae 100644 (file)
@@ -650,6 +650,18 @@ static u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size)
        return aligned_max_sectors;
 }
 
+bool se_dev_check_wce(struct se_device *dev)
+{
+       bool wce = false;
+
+       if (dev->transport->get_write_cache)
+               wce = dev->transport->get_write_cache(dev);
+       else if (dev->dev_attrib.emulate_write_cache > 0)
+               wce = true;
+
+       return wce;
+}
+
 int se_dev_set_max_unmap_lba_count(
        struct se_device *dev,
        u32 max_unmap_lba_count)
@@ -767,6 +779,16 @@ int se_dev_set_emulate_fua_write(struct se_device *dev, int flag)
                pr_err("Illegal value %d\n", flag);
                return -EINVAL;
        }
+       if (flag &&
+           dev->transport->get_write_cache) {
+               pr_warn("emulate_fua_write not supported for this device, ignoring\n");
+               return 0;
+       }
+       if (dev->export_count) {
+               pr_err("emulate_fua_write cannot be changed with active"
+                      " exports: %d\n", dev->export_count);
+               return -EINVAL;
+       }
        dev->dev_attrib.emulate_fua_write = flag;
        pr_debug("dev[%p]: SE Device Forced Unit Access WRITEs: %d\n",
                        dev, dev->dev_attrib.emulate_fua_write);
@@ -801,7 +823,11 @@ int se_dev_set_emulate_write_cache(struct se_device *dev, int flag)
                pr_err("emulate_write_cache not supported for this device\n");
                return -EINVAL;
        }
-
+       if (dev->export_count) {
+               pr_err("emulate_write_cache cannot be changed with active"
+                      " exports: %d\n", dev->export_count);
+               return -EINVAL;
+       }
        dev->dev_attrib.emulate_write_cache = flag;
        pr_debug("dev[%p]: SE Device WRITE_CACHE_EMULATION flag: %d\n",
                        dev, dev->dev_attrib.emulate_write_cache);
@@ -1534,8 +1560,6 @@ int target_configure_device(struct se_device *dev)
        ret = dev->transport->configure_device(dev);
        if (ret)
                goto out;
-       dev->dev_flags |= DF_CONFIGURED;
-
        /*
         * XXX: there is not much point to have two different values here..
         */
@@ -1597,6 +1621,8 @@ int target_configure_device(struct se_device *dev)
        list_add_tail(&dev->g_dev_node, &g_device_list);
        mutex_unlock(&g_device_mutex);
 
+       dev->dev_flags |= DF_CONFIGURED;
+
        return 0;
 
 out_free_alua:
index 1045dcd..f6c954c 100644 (file)
@@ -1121,7 +1121,7 @@ static u32 pscsi_get_device_type(struct se_device *dev)
        struct pscsi_dev_virt *pdv = PSCSI_DEV(dev);
        struct scsi_device *sd = pdv->pdv_sd;
 
-       return sd->type;
+       return (sd) ? sd->type : TYPE_NO_LUN;
 }
 
 static sector_t pscsi_get_blocks(struct se_device *dev)
index 9a2f9d3..3e72974 100644 (file)
@@ -708,8 +708,7 @@ sbc_check_dpofua(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb)
                }
        }
        if (cdb[1] & 0x8) {
-               if (!dev->dev_attrib.emulate_fua_write ||
-                   !dev->dev_attrib.emulate_write_cache) {
+               if (!dev->dev_attrib.emulate_fua_write || !se_dev_check_wce(dev)) {
                        pr_err("Got CDB: 0x%02x with FUA bit set, but device"
                               " does not advertise support for FUA write\n",
                               cdb[0]);
index 460e931..6c8bd6b 100644 (file)
@@ -454,19 +454,6 @@ check_scsi_name:
 }
 EXPORT_SYMBOL(spc_emulate_evpd_83);
 
-static bool
-spc_check_dev_wce(struct se_device *dev)
-{
-       bool wce = false;
-
-       if (dev->transport->get_write_cache)
-               wce = dev->transport->get_write_cache(dev);
-       else if (dev->dev_attrib.emulate_write_cache > 0)
-               wce = true;
-
-       return wce;
-}
-
 /* Extended INQUIRY Data VPD Page */
 static sense_reason_t
 spc_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf)
@@ -490,7 +477,7 @@ spc_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf)
        buf[5] = 0x07;
 
        /* If WriteCache emulation is enabled, set V_SUP */
-       if (spc_check_dev_wce(dev))
+       if (se_dev_check_wce(dev))
                buf[6] = 0x01;
        /* If an LBA map is present set R_SUP */
        spin_lock(&cmd->se_dev->t10_alua.lba_map_lock);
@@ -897,7 +884,7 @@ static int spc_modesense_caching(struct se_cmd *cmd, u8 pc, u8 *p)
        if (pc == 1)
                goto out;
 
-       if (spc_check_dev_wce(dev))
+       if (se_dev_check_wce(dev))
                p[2] = 0x04; /* Write Cache Enable */
        p[12] = 0x20; /* Disabled Read Ahead */
 
@@ -1009,7 +996,7 @@ static sense_reason_t spc_emulate_modesense(struct se_cmd *cmd)
             (cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY)))
                spc_modesense_write_protect(&buf[length], type);
 
-       if ((spc_check_dev_wce(dev)) &&
+       if ((se_dev_check_wce(dev)) &&
            (dev->dev_attrib.emulate_fua_write > 0))
                spc_modesense_dpofua(&buf[length], type);
 
index 0adc0f6..ac3cbab 100644 (file)
@@ -2389,6 +2389,10 @@ int target_get_sess_cmd(struct se_session *se_sess, struct se_cmd *se_cmd,
        list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list);
 out:
        spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+
+       if (ret && ack_kref)
+               target_put_sess_cmd(se_sess, se_cmd);
+
        return ret;
 }
 EXPORT_SYMBOL(target_get_sess_cmd);
index 97b486c..583e755 100644 (file)
@@ -359,7 +359,7 @@ void ft_invl_hw_context(struct ft_cmd *cmd)
                ep = fc_seq_exch(seq);
                if (ep) {
                        lport = ep->lp;
-                       if (lport && (ep->xid <= lport->lro_xid))
+                       if (lport && (ep->xid <= lport->lro_xid)) {
                                /*
                                 * "ddp_done" trigger invalidation of HW
                                 * specific DDP context
@@ -374,6 +374,7 @@ void ft_invl_hw_context(struct ft_cmd *cmd)
                                 * identified using ep->xid)
                                 */
                                cmd->was_ddp_setup = 0;
+                       }
                }
        }
 }
index d1ec580..76c515d 100644 (file)
@@ -25,7 +25,7 @@
  * Function to allocate regfields which are common
  * between syscfg and memory mapped based sensors
  */
-int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor)
+static int st_thermal_alloc_regfields(struct st_thermal_sensor *sensor)
 {
        struct device *dev = sensor->dev;
        struct regmap *regmap = sensor->regmap;
index 067bfcd..fc0c9e1 100644 (file)
@@ -157,7 +157,7 @@ static const struct st_thermal_sensor_ops st_mmap_sensor_ops = {
 };
 
 /* Compatible device data stih416 mpe thermal sensor */
-const struct st_thermal_compat_data st_416mpe_cdata = {
+static const struct st_thermal_compat_data st_416mpe_cdata = {
        .reg_fields             = st_mmap_thermal_regfields,
        .ops                    = &st_mmap_sensor_ops,
        .calibration_val        = 14,
@@ -166,7 +166,7 @@ const struct st_thermal_compat_data st_416mpe_cdata = {
 };
 
 /* Compatible device data stih407 thermal sensor */
-const struct st_thermal_compat_data st_407_cdata = {
+static const struct st_thermal_compat_data st_407_cdata = {
        .reg_fields             = st_mmap_thermal_regfields,
        .ops                    = &st_mmap_sensor_ops,
        .calibration_val        = 16,
@@ -174,19 +174,19 @@ const struct st_thermal_compat_data st_407_cdata = {
        .crit_temp              = 120,
 };
 
-static struct of_device_id st_mmap_thermal_of_match[] = {
+static const struct of_device_id st_mmap_thermal_of_match[] = {
        { .compatible = "st,stih416-mpe-thermal", .data = &st_416mpe_cdata },
        { .compatible = "st,stih407-thermal",     .data = &st_407_cdata },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, st_mmap_thermal_of_match);
 
-int st_mmap_probe(struct platform_device *pdev)
+static int st_mmap_probe(struct platform_device *pdev)
 {
        return st_thermal_register(pdev,  st_mmap_thermal_of_match);
 }
 
-int st_mmap_remove(struct platform_device *pdev)
+static int st_mmap_remove(struct platform_device *pdev)
 {
        return st_thermal_unregister(pdev);
 }
index 26d36a2..3df5b78 100644 (file)
@@ -104,7 +104,7 @@ static const struct st_thermal_sensor_ops st_syscfg_sensor_ops = {
 };
 
 /* Compatible device data for stih415 sas thermal sensor */
-const struct st_thermal_compat_data st_415sas_cdata = {
+static const struct st_thermal_compat_data st_415sas_cdata = {
        .sys_compat             = "st,stih415-front-syscfg",
        .reg_fields             = st_415sas_regfields,
        .ops                    = &st_syscfg_sensor_ops,
@@ -114,7 +114,7 @@ const struct st_thermal_compat_data st_415sas_cdata = {
 };
 
 /* Compatible device data for stih415 mpe thermal sensor */
-const struct st_thermal_compat_data st_415mpe_cdata = {
+static const struct st_thermal_compat_data st_415mpe_cdata = {
        .sys_compat             = "st,stih415-system-syscfg",
        .reg_fields             = st_415mpe_regfields,
        .ops                    = &st_syscfg_sensor_ops,
@@ -124,7 +124,7 @@ const struct st_thermal_compat_data st_415mpe_cdata = {
 };
 
 /* Compatible device data for stih416 sas thermal sensor */
-const struct st_thermal_compat_data st_416sas_cdata = {
+static const struct st_thermal_compat_data st_416sas_cdata = {
        .sys_compat             = "st,stih416-front-syscfg",
        .reg_fields             = st_416sas_regfields,
        .ops                    = &st_syscfg_sensor_ops,
@@ -134,7 +134,7 @@ const struct st_thermal_compat_data st_416sas_cdata = {
 };
 
 /* Compatible device data for stid127 thermal sensor */
-const struct st_thermal_compat_data st_127_cdata = {
+static const struct st_thermal_compat_data st_127_cdata = {
        .sys_compat             = "st,stid127-cpu-syscfg",
        .reg_fields             = st_127_regfields,
        .ops                    = &st_syscfg_sensor_ops,
@@ -143,7 +143,7 @@ const struct st_thermal_compat_data st_127_cdata = {
        .crit_temp              = 120,
 };
 
-static struct of_device_id st_syscfg_thermal_of_match[] = {
+static const struct of_device_id st_syscfg_thermal_of_match[] = {
        { .compatible = "st,stih415-sas-thermal", .data = &st_415sas_cdata },
        { .compatible = "st,stih415-mpe-thermal", .data = &st_415mpe_cdata },
        { .compatible = "st,stih416-sas-thermal", .data = &st_416sas_cdata },
@@ -152,12 +152,12 @@ static struct of_device_id st_syscfg_thermal_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, st_syscfg_thermal_of_match);
 
-int st_syscfg_probe(struct platform_device *pdev)
+static int st_syscfg_probe(struct platform_device *pdev)
 {
        return st_thermal_register(pdev, st_syscfg_thermal_of_match);
 }
 
-int st_syscfg_remove(struct platform_device *pdev)
+static int st_syscfg_remove(struct platform_device *pdev)
 {
        return st_thermal_unregister(pdev);
 }
index 174d3bc..4108db7 100644 (file)
@@ -458,8 +458,10 @@ static void update_temperature(struct thermal_zone_device *tz)
 
        ret = thermal_zone_get_temp(tz, &temp);
        if (ret) {
-               dev_warn(&tz->device, "failed to read out thermal zone %d\n",
-                        tz->id);
+               if (ret != -EAGAIN)
+                       dev_warn(&tz->device,
+                                "failed to read out thermal zone (%d)\n",
+                                ret);
                return;
        }
 
index d7b198c..ce24182 100644 (file)
@@ -210,18 +210,6 @@ bfin_jc_chars_in_buffer(struct tty_struct *tty)
        return circ_cnt(&bfin_jc_write_buf);
 }
 
-static void
-bfin_jc_wait_until_sent(struct tty_struct *tty, int timeout)
-{
-       unsigned long expire = jiffies + timeout;
-       while (!circ_empty(&bfin_jc_write_buf)) {
-               if (signal_pending(current))
-                       break;
-               if (time_after(jiffies, expire))
-                       break;
-       }
-}
-
 static const struct tty_operations bfin_jc_ops = {
        .open            = bfin_jc_open,
        .close           = bfin_jc_close,
@@ -230,7 +218,6 @@ static const struct tty_operations bfin_jc_ops = {
        .flush_chars     = bfin_jc_flush_chars,
        .write_room      = bfin_jc_write_room,
        .chars_in_buffer = bfin_jc_chars_in_buffer,
-       .wait_until_sent = bfin_jc_wait_until_sent,
 };
 
 static int __init bfin_jc_init(void)
index e3b9570..deae122 100644 (file)
@@ -2138,8 +2138,8 @@ int serial8250_do_startup(struct uart_port *port)
        /*
         * Clear the interrupt registers.
         */
-       if (serial_port_in(port, UART_LSR) & UART_LSR_DR)
-               serial_port_in(port, UART_RX);
+       serial_port_in(port, UART_LSR);
+       serial_port_in(port, UART_RX);
        serial_port_in(port, UART_IIR);
        serial_port_in(port, UART_MSR);
 
@@ -2300,8 +2300,8 @@ dont_test_tx_en:
         * saved flags to avoid getting false values from polling
         * routines or the previous session.
         */
-       if (serial_port_in(port, UART_LSR) & UART_LSR_DR)
-               serial_port_in(port, UART_RX);
+       serial_port_in(port, UART_LSR);
+       serial_port_in(port, UART_RX);
        serial_port_in(port, UART_IIR);
        serial_port_in(port, UART_MSR);
        up->lsr_saved_flags = 0;
@@ -2394,8 +2394,7 @@ void serial8250_do_shutdown(struct uart_port *port)
         * Read data port to reset things, and then unlink from
         * the IRQ chain.
         */
-       if (serial_port_in(port, UART_LSR) & UART_LSR_DR)
-               serial_port_in(port, UART_RX);
+       serial_port_in(port, UART_RX);
        serial8250_rpm_put(up);
 
        del_timer_sync(&up->timer);
index e601162..6ae5b85 100644 (file)
@@ -59,6 +59,8 @@ struct dw8250_data {
        u8                      usr_reg;
        int                     last_mcr;
        int                     line;
+       int                     msr_mask_on;
+       int                     msr_mask_off;
        struct clk              *clk;
        struct clk              *pclk;
        struct reset_control    *rst;
@@ -81,6 +83,12 @@ static inline int dw8250_modify_msr(struct uart_port *p, int offset, int value)
                value &= ~UART_MSR_DCTS;
        }
 
+       /* Override any modem control signals if needed */
+       if (offset == UART_MSR) {
+               value |= d->msr_mask_on;
+               value &= ~d->msr_mask_off;
+       }
+
        return value;
 }
 
@@ -111,7 +119,10 @@ static void dw8250_serial_out(struct uart_port *p, int offset, int value)
                        dw8250_force_idle(p);
                        writeb(value, p->membase + (UART_LCR << p->regshift));
                }
-               dev_err(p->dev, "Couldn't set LCR to %d\n", value);
+               /*
+                * FIXME: this deadlocks if port->lock is already held
+                * dev_err(p->dev, "Couldn't set LCR to %d\n", value);
+                */
        }
 }
 
@@ -155,7 +166,10 @@ static void dw8250_serial_outq(struct uart_port *p, int offset, int value)
                        __raw_writeq(value & 0xff,
                                     p->membase + (UART_LCR << p->regshift));
                }
-               dev_err(p->dev, "Couldn't set LCR to %d\n", value);
+               /*
+                * FIXME: this deadlocks if port->lock is already held
+                * dev_err(p->dev, "Couldn't set LCR to %d\n", value);
+                */
        }
 }
 #endif /* CONFIG_64BIT */
@@ -179,7 +193,10 @@ static void dw8250_serial_out32(struct uart_port *p, int offset, int value)
                        dw8250_force_idle(p);
                        writel(value, p->membase + (UART_LCR << p->regshift));
                }
-               dev_err(p->dev, "Couldn't set LCR to %d\n", value);
+               /*
+                * FIXME: this deadlocks if port->lock is already held
+                * dev_err(p->dev, "Couldn't set LCR to %d\n", value);
+                */
        }
 }
 
@@ -334,6 +351,30 @@ static int dw8250_probe_of(struct uart_port *p,
        if (id >= 0)
                p->line = id;
 
+       if (of_property_read_bool(np, "dcd-override")) {
+               /* Always report DCD as active */
+               data->msr_mask_on |= UART_MSR_DCD;
+               data->msr_mask_off |= UART_MSR_DDCD;
+       }
+
+       if (of_property_read_bool(np, "dsr-override")) {
+               /* Always report DSR as active */
+               data->msr_mask_on |= UART_MSR_DSR;
+               data->msr_mask_off |= UART_MSR_DDSR;
+       }
+
+       if (of_property_read_bool(np, "cts-override")) {
+               /* Always report DSR as active */
+               data->msr_mask_on |= UART_MSR_DSR;
+               data->msr_mask_off |= UART_MSR_DDSR;
+       }
+
+       if (of_property_read_bool(np, "ri-override")) {
+               /* Always report Ring indicator as inactive */
+               data->msr_mask_off |= UART_MSR_RI;
+               data->msr_mask_off |= UART_MSR_TERI;
+       }
+
        /* clock got configured through clk api, all done */
        if (p->uartclk)
                return 0;
index daf2c82..892eb32 100644 (file)
@@ -69,7 +69,7 @@ static void moan_device(const char *str, struct pci_dev *dev)
               "Please send the output of lspci -vv, this\n"
               "message (0x%04x,0x%04x,0x%04x,0x%04x), the\n"
               "manufacturer and name of serial board or\n"
-              "modem board to rmk+serial@arm.linux.org.uk.\n",
+              "modem board to <linux-serial@vger.kernel.org>.\n",
               pci_name(dev), str, dev->vendor, dev->device,
               dev->subsystem_vendor, dev->subsystem_device);
 }
@@ -1987,13 +1987,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
                .subdevice      = PCI_ANY_ID,
                .setup          = byt_serial_setup,
        },
-       {
-               .vendor         = PCI_VENDOR_ID_INTEL,
-               .device         = PCI_DEVICE_ID_INTEL_QRK_UART,
-               .subvendor      = PCI_ANY_ID,
-               .subdevice      = PCI_ANY_ID,
-               .setup          = pci_default_setup,
-       },
        {
                .vendor         = PCI_VENDOR_ID_INTEL,
                .device         = PCI_DEVICE_ID_INTEL_BSW_UART1,
@@ -2199,13 +2192,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = {
        /*
         * PLX
         */
-       {
-               .vendor         = PCI_VENDOR_ID_PLX,
-               .device         = PCI_DEVICE_ID_PLX_9030,
-               .subvendor      = PCI_SUBVENDOR_ID_PERLE,
-               .subdevice      = PCI_ANY_ID,
-               .setup          = pci_default_setup,
-       },
        {
                .vendor         = PCI_VENDOR_ID_PLX,
                .device         = PCI_DEVICE_ID_PLX_9050,
@@ -5415,10 +5401,6 @@ static struct pci_device_id serial_pci_tbl[] = {
                PCI_ANY_ID, PCI_ANY_ID,
                0, 0, pbn_b0_bt_2_115200 },
 
-       {       PCI_VENDOR_ID_WCH, PCI_DEVICE_ID_WCH_CH352_2S,
-               PCI_ANY_ID, PCI_ANY_ID,
-               0, 0, pbn_b0_bt_2_115200 },
-
        {       PCIE_VENDOR_ID_WCH, PCIE_DEVICE_ID_WCH_CH384_4S,
                PCI_ANY_ID, PCI_ANY_ID,
                0, 0, pbn_wch384_4 },
index b1893f3..3ad1458 100644 (file)
@@ -921,6 +921,9 @@ static void lpuart_setup_watermark(struct lpuart_port *sport)
        writeb(val | UARTPFIFO_TXFE | UARTPFIFO_RXFE,
                        sport->port.membase + UARTPFIFO);
 
+       /* explicitly clear RDRF */
+       readb(sport->port.membase + UARTSR1);
+
        /* flush Tx and Rx FIFO */
        writeb(UARTCFIFO_TXFLUSH | UARTCFIFO_RXFLUSH,
                        sport->port.membase + UARTCFIFO);
@@ -1076,6 +1079,8 @@ static int lpuart_startup(struct uart_port *port)
        sport->txfifo_size = 0x1 << (((temp >> UARTPFIFO_TXSIZE_OFF) &
                UARTPFIFO_FIFOSIZE_MASK) + 1);
 
+       sport->port.fifosize = sport->txfifo_size;
+
        sport->rxfifo_size = 0x1 << (((temp >> UARTPFIFO_RXSIZE_OFF) &
                UARTPFIFO_FIFOSIZE_MASK) + 1);
 
index 7ff61e2..33fb94f 100644 (file)
@@ -133,10 +133,6 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
        if (of_find_property(np, "no-loopback-test", NULL))
                port->flags |= UPF_SKIP_TEST;
 
-       ret = of_alias_get_id(np, "serial");
-       if (ret >= 0)
-               port->line = ret;
-
        port->dev = &ofdev->dev;
 
        switch (type) {
index af821a9..cf08876 100644 (file)
@@ -963,6 +963,7 @@ static void s3c24xx_serial_shutdown(struct uart_port *port)
                        free_irq(ourport->tx_irq, ourport);
                tx_enabled(port) = 0;
                ourport->tx_claimed = 0;
+               ourport->tx_mode = 0;
        }
 
        if (ourport->rx_claimed) {
index 594b633..bca975f 100644 (file)
@@ -293,8 +293,10 @@ static irqreturn_t sprd_handle_irq(int irq, void *dev_id)
 
        ims = serial_in(port, SPRD_IMSR);
 
-       if (!ims)
+       if (!ims) {
+               spin_unlock(&port->lock);
                return IRQ_NONE;
+       }
 
        serial_out(port, SPRD_ICLR, ~0);
 
index 51f066a..2bb4dfc 100644 (file)
@@ -1028,8 +1028,8 @@ EXPORT_SYMBOL(start_tty);
 /* We limit tty time update visibility to every 8 seconds or so. */
 static void tty_update_time(struct timespec *time)
 {
-       unsigned long sec = get_seconds() & ~7;
-       if ((long)(sec - time->tv_sec) > 0)
+       unsigned long sec = get_seconds();
+       if (abs(sec - time->tv_sec) & ~7)
                time->tv_sec = sec;
 }
 
index a5cf253..632fc81 100644 (file)
@@ -217,11 +217,17 @@ void tty_wait_until_sent(struct tty_struct *tty, long timeout)
 #endif
        if (!timeout)
                timeout = MAX_SCHEDULE_TIMEOUT;
-       if (wait_event_interruptible_timeout(tty->write_wait,
-                       !tty_chars_in_buffer(tty), timeout) >= 0) {
-               if (tty->ops->wait_until_sent)
-                       tty->ops->wait_until_sent(tty, timeout);
-       }
+
+       timeout = wait_event_interruptible_timeout(tty->write_wait,
+                       !tty_chars_in_buffer(tty), timeout);
+       if (timeout <= 0)
+               return;
+
+       if (timeout == MAX_SCHEDULE_TIMEOUT)
+               timeout = 0;
+
+       if (tty->ops->wait_until_sent)
+               tty->ops->wait_until_sent(tty, timeout);
 }
 EXPORT_SYMBOL(tty_wait_until_sent);
 
index ff45104..4bfb7ac 100644 (file)
@@ -929,6 +929,13 @@ __acquires(hwep->lock)
        return retval;
 }
 
+static int otg_a_alt_hnp_support(struct ci_hdrc *ci)
+{
+       dev_warn(&ci->gadget.dev,
+               "connect the device to an alternate port if you want HNP\n");
+       return isr_setup_status_phase(ci);
+}
+
 /**
  * isr_setup_packet_handler: setup packet handler
  * @ci: UDC descriptor
@@ -1061,6 +1068,10 @@ __acquires(ci->lock)
                                                        ci);
                                }
                                break;
+                       case USB_DEVICE_A_ALT_HNP_SUPPORT:
+                               if (ci_otg_is_fsm_mode(ci))
+                                       err = otg_a_alt_hnp_support(ci);
+                               break;
                        default:
                                goto delegate;
                        }
index e78720b..6836177 100644 (file)
@@ -1650,6 +1650,8 @@ static int acm_reset_resume(struct usb_interface *intf)
 
 static const struct usb_device_id acm_ids[] = {
        /* quirky and broken devices */
+       { USB_DEVICE(0x076d, 0x0006), /* Denso Cradle CU-321 */
+       .driver_info = NO_UNION_NORMAL, },/* has no union descriptor */
        { USB_DEVICE(0x17ef, 0x7000), /* Lenovo USB modem */
        .driver_info = NO_UNION_NORMAL, },/* has no union descriptor */
        { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */
index c6b35b7..61d538a 100644 (file)
@@ -150,9 +150,9 @@ static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state)
                break;
        case OTG_STATE_B_PERIPHERAL:
                otg_chrg_vbus(fsm, 0);
-               otg_loc_conn(fsm, 1);
                otg_loc_sof(fsm, 0);
                otg_set_protocol(fsm, PROTO_GADGET);
+               otg_loc_conn(fsm, 1);
                break;
        case OTG_STATE_B_WAIT_ACON:
                otg_chrg_vbus(fsm, 0);
@@ -213,10 +213,10 @@ static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state)
 
                break;
        case OTG_STATE_A_PERIPHERAL:
-               otg_loc_conn(fsm, 1);
                otg_loc_sof(fsm, 0);
                otg_set_protocol(fsm, PROTO_GADGET);
                otg_drv_vbus(fsm, 1);
+               otg_loc_conn(fsm, 1);
                otg_add_timer(fsm, A_BIDL_ADIS);
                break;
        case OTG_STATE_A_WAIT_VFALL:
index 66abdbc..1163553 100644 (file)
@@ -501,6 +501,7 @@ static void async_completed(struct urb *urb)
        as->status = urb->status;
        signr = as->signr;
        if (signr) {
+               memset(&sinfo, 0, sizeof(sinfo));
                sinfo.si_signo = as->signr;
                sinfo.si_errno = as->status;
                sinfo.si_code = SI_ASYNCIO;
@@ -2382,6 +2383,7 @@ static void usbdev_remove(struct usb_device *udev)
                wake_up_all(&ps->wait);
                list_del_init(&ps->list);
                if (ps->discsignr) {
+                       memset(&sinfo, 0, sizeof(sinfo));
                        sinfo.si_signo = ps->discsignr;
                        sinfo.si_errno = EPIPE;
                        sinfo.si_code = SI_ASYNCIO;
index 02e3e2d..6cf0478 100644 (file)
@@ -377,6 +377,9 @@ static void dwc2_handle_disconnect_intr(struct dwc2_hsotg *hsotg)
                dwc2_is_host_mode(hsotg) ? "Host" : "Device",
                dwc2_op_state_str(hsotg));
 
+       if (hsotg->op_state == OTG_STATE_A_HOST)
+               dwc2_hcd_disconnect(hsotg);
+
        /* Change to L3 (OFF) state */
        hsotg->lx_state = DWC2_L3;
 
index 172d64e..52e0c4e 100644 (file)
@@ -205,6 +205,18 @@ static void dwc3_omap_write_irq0_set(struct dwc3_omap *omap, u32 value)
                                                omap->irq0_offset, value);
 }
 
+static void dwc3_omap_write_irqmisc_clr(struct dwc3_omap *omap, u32 value)
+{
+       dwc3_omap_writel(omap->base, USBOTGSS_IRQENABLE_CLR_MISC +
+                                               omap->irqmisc_offset, value);
+}
+
+static void dwc3_omap_write_irq0_clr(struct dwc3_omap *omap, u32 value)
+{
+       dwc3_omap_writel(omap->base, USBOTGSS_IRQENABLE_CLR_0 -
+                                               omap->irq0_offset, value);
+}
+
 static void dwc3_omap_set_mailbox(struct dwc3_omap *omap,
        enum omap_dwc3_vbus_id_status status)
 {
@@ -345,9 +357,23 @@ static void dwc3_omap_enable_irqs(struct dwc3_omap *omap)
 
 static void dwc3_omap_disable_irqs(struct dwc3_omap *omap)
 {
+       u32                     reg;
+
        /* disable all IRQs */
-       dwc3_omap_write_irqmisc_set(omap, 0x00);
-       dwc3_omap_write_irq0_set(omap, 0x00);
+       reg = USBOTGSS_IRQO_COREIRQ_ST;
+       dwc3_omap_write_irq0_clr(omap, reg);
+
+       reg = (USBOTGSS_IRQMISC_OEVT |
+                       USBOTGSS_IRQMISC_DRVVBUS_RISE |
+                       USBOTGSS_IRQMISC_CHRGVBUS_RISE |
+                       USBOTGSS_IRQMISC_DISCHRGVBUS_RISE |
+                       USBOTGSS_IRQMISC_IDPULLUP_RISE |
+                       USBOTGSS_IRQMISC_DRVVBUS_FALL |
+                       USBOTGSS_IRQMISC_CHRGVBUS_FALL |
+                       USBOTGSS_IRQMISC_DISCHRGVBUS_FALL |
+                       USBOTGSS_IRQMISC_IDPULLUP_FALL);
+
+       dwc3_omap_write_irqmisc_clr(omap, reg);
 }
 
 static u64 dwc3_omap_dma_mask = DMA_BIT_MASK(32);
index 7564814..c42765b 100644 (file)
@@ -1161,7 +1161,6 @@ static ssize_t interf_grp_compatible_id_store(struct usb_os_desc *desc,
        if (desc->opts_mutex)
                mutex_lock(desc->opts_mutex);
        memcpy(desc->ext_compat_id, page, l);
-       desc->ext_compat_id[l] = '\0';
 
        if (desc->opts_mutex)
                mutex_unlock(desc->opts_mutex);
@@ -1192,7 +1191,6 @@ static ssize_t interf_grp_sub_compatible_id_store(struct usb_os_desc *desc,
        if (desc->opts_mutex)
                mutex_lock(desc->opts_mutex);
        memcpy(desc->ext_compat_id + 8, page, l);
-       desc->ext_compat_id[l + 8] = '\0';
 
        if (desc->opts_mutex)
                mutex_unlock(desc->opts_mutex);
index af98b09..175c995 100644 (file)
@@ -144,10 +144,9 @@ struct ffs_io_data {
        bool read;
 
        struct kiocb *kiocb;
-       const struct iovec *iovec;
-       unsigned long nr_segs;
-       char __user *buf;
-       size_t len;
+       struct iov_iter data;
+       const void *to_free;
+       char *buf;
 
        struct mm_struct *mm;
        struct work_struct work;
@@ -649,29 +648,10 @@ static void ffs_user_copy_worker(struct work_struct *work)
                                         io_data->req->actual;
 
        if (io_data->read && ret > 0) {
-               int i;
-               size_t pos = 0;
-
-               /*
-                * Since req->length may be bigger than io_data->len (after
-                * being rounded up to maxpacketsize), we may end up with more
-                * data then user space has space for.
-                */
-               ret = min_t(int, ret, io_data->len);
-
                use_mm(io_data->mm);
-               for (i = 0; i < io_data->nr_segs; i++) {
-                       size_t len = min_t(size_t, ret - pos,
-                                       io_data->iovec[i].iov_len);
-                       if (!len)
-                               break;
-                       if (unlikely(copy_to_user(io_data->iovec[i].iov_base,
-                                                &io_data->buf[pos], len))) {
-                               ret = -EFAULT;
-                               break;
-                       }
-                       pos += len;
-               }
+               ret = copy_to_iter(io_data->buf, ret, &io_data->data);
+               if (iov_iter_count(&io_data->data))
+                       ret = -EFAULT;
                unuse_mm(io_data->mm);
        }
 
@@ -684,7 +664,7 @@ static void ffs_user_copy_worker(struct work_struct *work)
 
        io_data->kiocb->private = NULL;
        if (io_data->read)
-               kfree(io_data->iovec);
+               kfree(io_data->to_free);
        kfree(io_data->buf);
        kfree(io_data);
 }
@@ -743,6 +723,7 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
                 * before the waiting completes, so do not assign to 'gadget' earlier
                 */
                struct usb_gadget *gadget = epfile->ffs->gadget;
+               size_t copied;
 
                spin_lock_irq(&epfile->ffs->eps_lock);
                /* In the meantime, endpoint got disabled or changed. */
@@ -750,34 +731,21 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
                        spin_unlock_irq(&epfile->ffs->eps_lock);
                        return -ESHUTDOWN;
                }
+               data_len = iov_iter_count(&io_data->data);
                /*
                 * Controller may require buffer size to be aligned to
                 * maxpacketsize of an out endpoint.
                 */
-               data_len = io_data->read ?
-                          usb_ep_align_maybe(gadget, ep->ep, io_data->len) :
-                          io_data->len;
+               if (io_data->read)
+                       data_len = usb_ep_align_maybe(gadget, ep->ep, data_len);
                spin_unlock_irq(&epfile->ffs->eps_lock);
 
                data = kmalloc(data_len, GFP_KERNEL);
                if (unlikely(!data))
                        return -ENOMEM;
-               if (io_data->aio && !io_data->read) {
-                       int i;
-                       size_t pos = 0;
-                       for (i = 0; i < io_data->nr_segs; i++) {
-                               if (unlikely(copy_from_user(&data[pos],
-                                            io_data->iovec[i].iov_base,
-                                            io_data->iovec[i].iov_len))) {
-                                       ret = -EFAULT;
-                                       goto error;
-                               }
-                               pos += io_data->iovec[i].iov_len;
-                       }
-               } else {
-                       if (!io_data->read &&
-                           unlikely(__copy_from_user(data, io_data->buf,
-                                                     io_data->len))) {
+               if (!io_data->read) {
+                       copied = copy_from_iter(data, data_len, &io_data->data);
+                       if (copied != data_len) {
                                ret = -EFAULT;
                                goto error;
                        }
@@ -876,10 +844,8 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data)
                                 */
                                ret = ep->status;
                                if (io_data->read && ret > 0) {
-                                       ret = min_t(size_t, ret, io_data->len);
-
-                                       if (unlikely(copy_to_user(io_data->buf,
-                                               data, ret)))
+                                       ret = copy_to_iter(data, ret, &io_data->data);
+                                       if (unlikely(iov_iter_count(&io_data->data)))
                                                ret = -EFAULT;
                                }
                        }
@@ -898,37 +864,6 @@ error:
        return ret;
 }
 
-static ssize_t
-ffs_epfile_write(struct file *file, const char __user *buf, size_t len,
-                loff_t *ptr)
-{
-       struct ffs_io_data io_data;
-
-       ENTER();
-
-       io_data.aio = false;
-       io_data.read = false;
-       io_data.buf = (char * __user)buf;
-       io_data.len = len;
-
-       return ffs_epfile_io(file, &io_data);
-}
-
-static ssize_t
-ffs_epfile_read(struct file *file, char __user *buf, size_t len, loff_t *ptr)
-{
-       struct ffs_io_data io_data;
-
-       ENTER();
-
-       io_data.aio = false;
-       io_data.read = true;
-       io_data.buf = buf;
-       io_data.len = len;
-
-       return ffs_epfile_io(file, &io_data);
-}
-
 static int
 ffs_epfile_open(struct inode *inode, struct file *file)
 {
@@ -965,67 +900,86 @@ static int ffs_aio_cancel(struct kiocb *kiocb)
        return value;
 }
 
-static ssize_t ffs_epfile_aio_write(struct kiocb *kiocb,
-                                   const struct iovec *iovec,
-                                   unsigned long nr_segs, loff_t loff)
+static ssize_t ffs_epfile_write_iter(struct kiocb *kiocb, struct iov_iter *from)
 {
-       struct ffs_io_data *io_data;
+       struct ffs_io_data io_data, *p = &io_data;
+       ssize_t res;
 
        ENTER();
 
-       io_data = kmalloc(sizeof(*io_data), GFP_KERNEL);
-       if (unlikely(!io_data))
-               return -ENOMEM;
+       if (!is_sync_kiocb(kiocb)) {
+               p = kmalloc(sizeof(io_data), GFP_KERNEL);
+               if (unlikely(!p))
+                       return -ENOMEM;
+               p->aio = true;
+       } else {
+               p->aio = false;
+       }
 
-       io_data->aio = true;
-       io_data->read = false;
-       io_data->kiocb = kiocb;
-       io_data->iovec = iovec;
-       io_data->nr_segs = nr_segs;
-       io_data->len = kiocb->ki_nbytes;
-       io_data->mm = current->mm;
+       p->read = false;
+       p->kiocb = kiocb;
+       p->data = *from;
+       p->mm = current->mm;
 
-       kiocb->private = io_data;
+       kiocb->private = p;
 
        kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
 
-       return ffs_epfile_io(kiocb->ki_filp, io_data);
+       res = ffs_epfile_io(kiocb->ki_filp, p);
+       if (res == -EIOCBQUEUED)
+               return res;
+       if (p->aio)
+               kfree(p);
+       else
+               *from = p->data;
+       return res;
 }
 
-static ssize_t ffs_epfile_aio_read(struct kiocb *kiocb,
-                                  const struct iovec *iovec,
-                                  unsigned long nr_segs, loff_t loff)
+static ssize_t ffs_epfile_read_iter(struct kiocb *kiocb, struct iov_iter *to)
 {
-       struct ffs_io_data *io_data;
-       struct iovec *iovec_copy;
+       struct ffs_io_data io_data, *p = &io_data;
+       ssize_t res;
 
        ENTER();
 
-       iovec_copy = kmalloc_array(nr_segs, sizeof(*iovec_copy), GFP_KERNEL);
-       if (unlikely(!iovec_copy))
-               return -ENOMEM;
-
-       memcpy(iovec_copy, iovec, sizeof(struct iovec)*nr_segs);
-
-       io_data = kmalloc(sizeof(*io_data), GFP_KERNEL);
-       if (unlikely(!io_data)) {
-               kfree(iovec_copy);
-               return -ENOMEM;
+       if (!is_sync_kiocb(kiocb)) {
+               p = kmalloc(sizeof(io_data), GFP_KERNEL);
+               if (unlikely(!p))
+                       return -ENOMEM;
+               p->aio = true;
+       } else {
+               p->aio = false;
        }
 
-       io_data->aio = true;
-       io_data->read = true;
-       io_data->kiocb = kiocb;
-       io_data->iovec = iovec_copy;
-       io_data->nr_segs = nr_segs;
-       io_data->len = kiocb->ki_nbytes;
-       io_data->mm = current->mm;
+       p->read = true;
+       p->kiocb = kiocb;
+       if (p->aio) {
+               p->to_free = dup_iter(&p->data, to, GFP_KERNEL);
+               if (!p->to_free) {
+                       kfree(p);
+                       return -ENOMEM;
+               }
+       } else {
+               p->data = *to;
+               p->to_free = NULL;
+       }
+       p->mm = current->mm;
 
-       kiocb->private = io_data;
+       kiocb->private = p;
 
        kiocb_set_cancel_fn(kiocb, ffs_aio_cancel);
 
-       return ffs_epfile_io(kiocb->ki_filp, io_data);
+       res = ffs_epfile_io(kiocb->ki_filp, p);
+       if (res == -EIOCBQUEUED)
+               return res;
+
+       if (p->aio) {
+               kfree(p->to_free);
+               kfree(p);
+       } else {
+               *to = p->data;
+       }
+       return res;
 }
 
 static int
@@ -1105,10 +1059,10 @@ static const struct file_operations ffs_epfile_operations = {
        .llseek =       no_llseek,
 
        .open =         ffs_epfile_open,
-       .write =        ffs_epfile_write,
-       .read =         ffs_epfile_read,
-       .aio_write =    ffs_epfile_aio_write,
-       .aio_read =     ffs_epfile_aio_read,
+       .write =        new_sync_write,
+       .read =         new_sync_read,
+       .write_iter =   ffs_epfile_write_iter,
+       .read_iter =    ffs_epfile_read_iter,
        .release =      ffs_epfile_release,
        .unlocked_ioctl =       ffs_epfile_ioctl,
 };
index 426d69a..a2612fb 100644 (file)
@@ -569,7 +569,7 @@ fail:
        return status;
 }
 
-const struct file_operations f_hidg_fops = {
+static const struct file_operations f_hidg_fops = {
        .owner          = THIS_MODULE,
        .open           = f_hidg_open,
        .release        = f_hidg_release,
index 298b461..39f49f1 100644 (file)
@@ -289,8 +289,7 @@ static void disable_loopback(struct f_loopback *loop)
        struct usb_composite_dev        *cdev;
 
        cdev = loop->function.config->cdev;
-       disable_endpoints(cdev, loop->in_ep, loop->out_ep, NULL, NULL, NULL,
-                       NULL);
+       disable_endpoints(cdev, loop->in_ep, loop->out_ep, NULL, NULL);
        VDBG(cdev, "%s disabled\n", loop->function.name);
 }
 
index c89e96c..c0c3ef2 100644 (file)
@@ -417,7 +417,10 @@ static int pn_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
                        return -EINVAL;
 
                spin_lock(&port->lock);
-               __pn_reset(f);
+
+               if (fp->in_ep->driver_data)
+                       __pn_reset(f);
+
                if (alt == 1) {
                        int i;
 
index e07c50c..3a5ae99 100644 (file)
 #include "gadget_chips.h"
 #include "u_f.h"
 
-#define USB_MS_TO_SS_INTERVAL(x) USB_MS_TO_HS_INTERVAL(x)
-
-enum eptype {
-       EP_CONTROL = 0,
-       EP_BULK,
-       EP_ISOC,
-       EP_INTERRUPT,
-};
-
 /*
  * SOURCE/SINK FUNCTION ... a primary testing vehicle for USB peripheral
  * controller drivers.
@@ -64,8 +55,6 @@ struct f_sourcesink {
        struct usb_ep           *out_ep;
        struct usb_ep           *iso_in_ep;
        struct usb_ep           *iso_out_ep;
-       struct usb_ep           *int_in_ep;
-       struct usb_ep           *int_out_ep;
        int                     cur_alt;
 };
 
@@ -79,10 +68,6 @@ static unsigned isoc_interval;
 static unsigned isoc_maxpacket;
 static unsigned isoc_mult;
 static unsigned isoc_maxburst;
-static unsigned int_interval; /* In ms */
-static unsigned int_maxpacket;
-static unsigned int_mult;
-static unsigned int_maxburst;
 static unsigned buflen;
 
 /*-------------------------------------------------------------------------*/
@@ -107,16 +92,6 @@ static struct usb_interface_descriptor source_sink_intf_alt1 = {
        /* .iInterface          = DYNAMIC */
 };
 
-static struct usb_interface_descriptor source_sink_intf_alt2 = {
-       .bLength =              USB_DT_INTERFACE_SIZE,
-       .bDescriptorType =      USB_DT_INTERFACE,
-
-       .bAlternateSetting =    2,
-       .bNumEndpoints =        2,
-       .bInterfaceClass =      USB_CLASS_VENDOR_SPEC,
-       /* .iInterface          = DYNAMIC */
-};
-
 /* full speed support: */
 
 static struct usb_endpoint_descriptor fs_source_desc = {
@@ -155,26 +130,6 @@ static struct usb_endpoint_descriptor fs_iso_sink_desc = {
        .bInterval =            4,
 };
 
-static struct usb_endpoint_descriptor fs_int_source_desc = {
-       .bLength =              USB_DT_ENDPOINT_SIZE,
-       .bDescriptorType =      USB_DT_ENDPOINT,
-
-       .bEndpointAddress =     USB_DIR_IN,
-       .bmAttributes =         USB_ENDPOINT_XFER_INT,
-       .wMaxPacketSize =       cpu_to_le16(64),
-       .bInterval =            GZERO_INT_INTERVAL,
-};
-
-static struct usb_endpoint_descriptor fs_int_sink_desc = {
-       .bLength =              USB_DT_ENDPOINT_SIZE,
-       .bDescriptorType =      USB_DT_ENDPOINT,
-
-       .bEndpointAddress =     USB_DIR_OUT,
-       .bmAttributes =         USB_ENDPOINT_XFER_INT,
-       .wMaxPacketSize =       cpu_to_le16(64),
-       .bInterval =            GZERO_INT_INTERVAL,
-};
-
 static struct usb_descriptor_header *fs_source_sink_descs[] = {
        (struct usb_descriptor_header *) &source_sink_intf_alt0,
        (struct usb_descriptor_header *) &fs_sink_desc,
@@ -185,10 +140,6 @@ static struct usb_descriptor_header *fs_source_sink_descs[] = {
        (struct usb_descriptor_header *) &fs_source_desc,
        (struct usb_descriptor_header *) &fs_iso_sink_desc,
        (struct usb_descriptor_header *) &fs_iso_source_desc,
-       (struct usb_descriptor_header *) &source_sink_intf_alt2,
-#define FS_ALT_IFC_2_OFFSET    8
-       (struct usb_descriptor_header *) &fs_int_sink_desc,
-       (struct usb_descriptor_header *) &fs_int_source_desc,
        NULL,
 };
 
@@ -228,24 +179,6 @@ static struct usb_endpoint_descriptor hs_iso_sink_desc = {
        .bInterval =            4,
 };
 
-static struct usb_endpoint_descriptor hs_int_source_desc = {
-       .bLength =              USB_DT_ENDPOINT_SIZE,
-       .bDescriptorType =      USB_DT_ENDPOINT,
-
-       .bmAttributes =         USB_ENDPOINT_XFER_INT,
-       .wMaxPacketSize =       cpu_to_le16(1024),
-       .bInterval =            USB_MS_TO_HS_INTERVAL(GZERO_INT_INTERVAL),
-};
-
-static struct usb_endpoint_descriptor hs_int_sink_desc = {
-       .bLength =              USB_DT_ENDPOINT_SIZE,
-       .bDescriptorType =      USB_DT_ENDPOINT,
-
-       .bmAttributes =         USB_ENDPOINT_XFER_INT,
-       .wMaxPacketSize =       cpu_to_le16(1024),
-       .bInterval =            USB_MS_TO_HS_INTERVAL(GZERO_INT_INTERVAL),
-};
-
 static struct usb_descriptor_header *hs_source_sink_descs[] = {
        (struct usb_descriptor_header *) &source_sink_intf_alt0,
        (struct usb_descriptor_header *) &hs_source_desc,
@@ -256,10 +189,6 @@ static struct usb_descriptor_header *hs_source_sink_descs[] = {
        (struct usb_descriptor_header *) &hs_sink_desc,
        (struct usb_descriptor_header *) &hs_iso_source_desc,
        (struct usb_descriptor_header *) &hs_iso_sink_desc,
-       (struct usb_descriptor_header *) &source_sink_intf_alt2,
-#define HS_ALT_IFC_2_OFFSET    8
-       (struct usb_descriptor_header *) &hs_int_source_desc,
-       (struct usb_descriptor_header *) &hs_int_sink_desc,
        NULL,
 };
 
@@ -335,42 +264,6 @@ static struct usb_ss_ep_comp_descriptor ss_iso_sink_comp_desc = {
        .wBytesPerInterval =    cpu_to_le16(1024),
 };
 
-static struct usb_endpoint_descriptor ss_int_source_desc = {
-       .bLength =              USB_DT_ENDPOINT_SIZE,
-       .bDescriptorType =      USB_DT_ENDPOINT,
-
-       .bmAttributes =         USB_ENDPOINT_XFER_INT,
-       .wMaxPacketSize =       cpu_to_le16(1024),
-       .bInterval =            USB_MS_TO_SS_INTERVAL(GZERO_INT_INTERVAL),
-};
-
-struct usb_ss_ep_comp_descriptor ss_int_source_comp_desc = {
-       .bLength =              USB_DT_SS_EP_COMP_SIZE,
-       .bDescriptorType =      USB_DT_SS_ENDPOINT_COMP,
-
-       .bMaxBurst =            0,
-       .bmAttributes =         0,
-       .wBytesPerInterval =    cpu_to_le16(1024),
-};
-
-static struct usb_endpoint_descriptor ss_int_sink_desc = {
-       .bLength =              USB_DT_ENDPOINT_SIZE,
-       .bDescriptorType =      USB_DT_ENDPOINT,
-
-       .bmAttributes =         USB_ENDPOINT_XFER_INT,
-       .wMaxPacketSize =       cpu_to_le16(1024),
-       .bInterval =            USB_MS_TO_SS_INTERVAL(GZERO_INT_INTERVAL),
-};
-
-struct usb_ss_ep_comp_descriptor ss_int_sink_comp_desc = {
-       .bLength =              USB_DT_SS_EP_COMP_SIZE,
-       .bDescriptorType =      USB_DT_SS_ENDPOINT_COMP,
-
-       .bMaxBurst =            0,
-       .bmAttributes =         0,
-       .wBytesPerInterval =    cpu_to_le16(1024),
-};
-
 static struct usb_descriptor_header *ss_source_sink_descs[] = {
        (struct usb_descriptor_header *) &source_sink_intf_alt0,
        (struct usb_descriptor_header *) &ss_source_desc,
@@ -387,12 +280,6 @@ static struct usb_descriptor_header *ss_source_sink_descs[] = {
        (struct usb_descriptor_header *) &ss_iso_source_comp_desc,
        (struct usb_descriptor_header *) &ss_iso_sink_desc,
        (struct usb_descriptor_header *) &ss_iso_sink_comp_desc,
-       (struct usb_descriptor_header *) &source_sink_intf_alt2,
-#define SS_ALT_IFC_2_OFFSET    14
-       (struct usb_descriptor_header *) &ss_int_source_desc,
-       (struct usb_descriptor_header *) &ss_int_source_comp_desc,
-       (struct usb_descriptor_header *) &ss_int_sink_desc,
-       (struct usb_descriptor_header *) &ss_int_sink_comp_desc,
        NULL,
 };
 
@@ -414,21 +301,6 @@ static struct usb_gadget_strings *sourcesink_strings[] = {
 };
 
 /*-------------------------------------------------------------------------*/
-static const char *get_ep_string(enum eptype ep_type)
-{
-       switch (ep_type) {
-       case EP_ISOC:
-               return "ISOC-";
-       case EP_INTERRUPT:
-               return "INTERRUPT-";
-       case EP_CONTROL:
-               return "CTRL-";
-       case EP_BULK:
-               return "BULK-";
-       default:
-               return "UNKNOWN-";
-       }
-}
 
 static inline struct usb_request *ss_alloc_ep_req(struct usb_ep *ep, int len)
 {
@@ -456,8 +328,7 @@ static void disable_ep(struct usb_composite_dev *cdev, struct usb_ep *ep)
 
 void disable_endpoints(struct usb_composite_dev *cdev,
                struct usb_ep *in, struct usb_ep *out,
-               struct usb_ep *iso_in, struct usb_ep *iso_out,
-               struct usb_ep *int_in, struct usb_ep *int_out)
+               struct usb_ep *iso_in, struct usb_ep *iso_out)
 {
        disable_ep(cdev, in);
        disable_ep(cdev, out);
@@ -465,10 +336,6 @@ void disable_endpoints(struct usb_composite_dev *cdev,
                disable_ep(cdev, iso_in);
        if (iso_out)
                disable_ep(cdev, iso_out);
-       if (int_in)
-               disable_ep(cdev, int_in);
-       if (int_out)
-               disable_ep(cdev, int_out);
 }
 
 static int
@@ -485,7 +352,6 @@ sourcesink_bind(struct usb_configuration *c, struct usb_function *f)
                return id;
        source_sink_intf_alt0.bInterfaceNumber = id;
        source_sink_intf_alt1.bInterfaceNumber = id;
-       source_sink_intf_alt2.bInterfaceNumber = id;
 
        /* allocate bulk endpoints */
        ss->in_ep = usb_ep_autoconfig(cdev->gadget, &fs_source_desc);
@@ -546,55 +412,14 @@ no_iso:
        if (isoc_maxpacket > 1024)
                isoc_maxpacket = 1024;
 
-       /* sanity check the interrupt module parameters */
-       if (int_interval < 1)
-               int_interval = 1;
-       if (int_interval > 4096)
-               int_interval = 4096;
-       if (int_mult > 2)
-               int_mult = 2;
-       if (int_maxburst > 15)
-               int_maxburst = 15;
-
-       /* fill in the FS interrupt descriptors from the module parameters */
-       fs_int_source_desc.wMaxPacketSize = int_maxpacket > 64 ?
-                                               64 : int_maxpacket;
-       fs_int_source_desc.bInterval = int_interval > 255 ?
-                                               255 : int_interval;
-       fs_int_sink_desc.wMaxPacketSize = int_maxpacket > 64 ?
-                                               64 : int_maxpacket;
-       fs_int_sink_desc.bInterval = int_interval > 255 ?
-                                               255 : int_interval;
-
-       /* allocate int endpoints */
-       ss->int_in_ep = usb_ep_autoconfig(cdev->gadget, &fs_int_source_desc);
-       if (!ss->int_in_ep)
-               goto no_int;
-       ss->int_in_ep->driver_data = cdev;      /* claim */
-
-       ss->int_out_ep = usb_ep_autoconfig(cdev->gadget, &fs_int_sink_desc);
-       if (ss->int_out_ep) {
-               ss->int_out_ep->driver_data = cdev;     /* claim */
-       } else {
-               ss->int_in_ep->driver_data = NULL;
-               ss->int_in_ep = NULL;
-no_int:
-               fs_source_sink_descs[FS_ALT_IFC_2_OFFSET] = NULL;
-               hs_source_sink_descs[HS_ALT_IFC_2_OFFSET] = NULL;
-               ss_source_sink_descs[SS_ALT_IFC_2_OFFSET] = NULL;
-       }
-
-       if (int_maxpacket > 1024)
-               int_maxpacket = 1024;
-
        /* support high speed hardware */
        hs_source_desc.bEndpointAddress = fs_source_desc.bEndpointAddress;
        hs_sink_desc.bEndpointAddress = fs_sink_desc.bEndpointAddress;
 
        /*
-        * Fill in the HS isoc and interrupt descriptors from the module
-        * parameters. We assume that the user knows what they are doing and
-        * won't give parameters that their UDC doesn't support.
+        * Fill in the HS isoc descriptors from the module parameters.
+        * We assume that the user knows what they are doing and won't
+        * give parameters that their UDC doesn't support.
         */
        hs_iso_source_desc.wMaxPacketSize = isoc_maxpacket;
        hs_iso_source_desc.wMaxPacketSize |= isoc_mult << 11;
@@ -607,17 +432,6 @@ no_int:
        hs_iso_sink_desc.bInterval = isoc_interval;
        hs_iso_sink_desc.bEndpointAddress = fs_iso_sink_desc.bEndpointAddress;
 
-       hs_int_source_desc.wMaxPacketSize = int_maxpacket;
-       hs_int_source_desc.wMaxPacketSize |= int_mult << 11;
-       hs_int_source_desc.bInterval = USB_MS_TO_HS_INTERVAL(int_interval);
-       hs_int_source_desc.bEndpointAddress =
-               fs_int_source_desc.bEndpointAddress;
-
-       hs_int_sink_desc.wMaxPacketSize = int_maxpacket;
-       hs_int_sink_desc.wMaxPacketSize |= int_mult << 11;
-       hs_int_sink_desc.bInterval = USB_MS_TO_HS_INTERVAL(int_interval);
-       hs_int_sink_desc.bEndpointAddress = fs_int_sink_desc.bEndpointAddress;
-
        /* support super speed hardware */
        ss_source_desc.bEndpointAddress =
                fs_source_desc.bEndpointAddress;
@@ -625,9 +439,9 @@ no_int:
                fs_sink_desc.bEndpointAddress;
 
        /*
-        * Fill in the SS isoc and interrupt descriptors from the module
-        * parameters. We assume that the user knows what they are doing and
-        * won't give parameters that their UDC doesn't support.
+        * Fill in the SS isoc descriptors from the module parameters.
+        * We assume that the user knows what they are doing and won't
+        * give parameters that their UDC doesn't support.
         */
        ss_iso_source_desc.wMaxPacketSize = isoc_maxpacket;
        ss_iso_source_desc.bInterval = isoc_interval;
@@ -646,37 +460,17 @@ no_int:
                isoc_maxpacket * (isoc_mult + 1) * (isoc_maxburst + 1);
        ss_iso_sink_desc.bEndpointAddress = fs_iso_sink_desc.bEndpointAddress;
 
-       ss_int_source_desc.wMaxPacketSize = int_maxpacket;
-       ss_int_source_desc.bInterval = USB_MS_TO_SS_INTERVAL(int_interval);
-       ss_int_source_comp_desc.bmAttributes = int_mult;
-       ss_int_source_comp_desc.bMaxBurst = int_maxburst;
-       ss_int_source_comp_desc.wBytesPerInterval =
-               int_maxpacket * (int_mult + 1) * (int_maxburst + 1);
-       ss_int_source_desc.bEndpointAddress =
-               fs_int_source_desc.bEndpointAddress;
-
-       ss_int_sink_desc.wMaxPacketSize = int_maxpacket;
-       ss_int_sink_desc.bInterval = USB_MS_TO_SS_INTERVAL(int_interval);
-       ss_int_sink_comp_desc.bmAttributes = int_mult;
-       ss_int_sink_comp_desc.bMaxBurst = int_maxburst;
-       ss_int_sink_comp_desc.wBytesPerInterval =
-               int_maxpacket * (int_mult + 1) * (int_maxburst + 1);
-       ss_int_sink_desc.bEndpointAddress = fs_int_sink_desc.bEndpointAddress;
-
        ret = usb_assign_descriptors(f, fs_source_sink_descs,
                        hs_source_sink_descs, ss_source_sink_descs);
        if (ret)
                return ret;
 
-       DBG(cdev, "%s speed %s: IN/%s, OUT/%s, ISO-IN/%s, ISO-OUT/%s, "
-                       "INT-IN/%s, INT-OUT/%s\n",
+       DBG(cdev, "%s speed %s: IN/%s, OUT/%s, ISO-IN/%s, ISO-OUT/%s\n",
            (gadget_is_superspeed(c->cdev->gadget) ? "super" :
             (gadget_is_dualspeed(c->cdev->gadget) ? "dual" : "full")),
                        f->name, ss->in_ep->name, ss->out_ep->name,
                        ss->iso_in_ep ? ss->iso_in_ep->name : "<none>",
-                       ss->iso_out_ep ? ss->iso_out_ep->name : "<none>",
-                       ss->int_in_ep ? ss->int_in_ep->name : "<none>",
-                       ss->int_out_ep ? ss->int_out_ep->name : "<none>");
+                       ss->iso_out_ep ? ss->iso_out_ep->name : "<none>");
        return 0;
 }
 
@@ -807,15 +601,14 @@ static void source_sink_complete(struct usb_ep *ep, struct usb_request *req)
 }
 
 static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in,
-               enum eptype ep_type, int speed)
+               bool is_iso, int speed)
 {
        struct usb_ep           *ep;
        struct usb_request      *req;
        int                     i, size, status;
 
        for (i = 0; i < 8; i++) {
-               switch (ep_type) {
-               case EP_ISOC:
+               if (is_iso) {
                        switch (speed) {
                        case USB_SPEED_SUPER:
                                size = isoc_maxpacket * (isoc_mult + 1) *
@@ -831,28 +624,9 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in,
                        }
                        ep = is_in ? ss->iso_in_ep : ss->iso_out_ep;
                        req = ss_alloc_ep_req(ep, size);
-                       break;
-               case EP_INTERRUPT:
-                       switch (speed) {
-                       case USB_SPEED_SUPER:
-                               size = int_maxpacket * (int_mult + 1) *
-                                               (int_maxburst + 1);
-                               break;
-                       case USB_SPEED_HIGH:
-                               size = int_maxpacket * (int_mult + 1);
-                               break;
-                       default:
-                               size = int_maxpacket > 1023 ?
-                                               1023 : int_maxpacket;
-                               break;
-                       }
-                       ep = is_in ? ss->int_in_ep : ss->int_out_ep;
-                       req = ss_alloc_ep_req(ep, size);
-                       break;
-               default:
+               } else {
                        ep = is_in ? ss->in_ep : ss->out_ep;
                        req = ss_alloc_ep_req(ep, 0);
-                       break;
                }
 
                if (!req)
@@ -870,12 +644,12 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in,
 
                        cdev = ss->function.config->cdev;
                        ERROR(cdev, "start %s%s %s --> %d\n",
-                               get_ep_string(ep_type), is_in ? "IN" : "OUT",
-                               ep->name, status);
+                             is_iso ? "ISO-" : "", is_in ? "IN" : "OUT",
+                             ep->name, status);
                        free_ep_req(ep, req);
                }
 
-               if (!(ep_type == EP_ISOC))
+               if (!is_iso)
                        break;
        }
 
@@ -888,7 +662,7 @@ static void disable_source_sink(struct f_sourcesink *ss)
 
        cdev = ss->function.config->cdev;
        disable_endpoints(cdev, ss->in_ep, ss->out_ep, ss->iso_in_ep,
-                       ss->iso_out_ep, ss->int_in_ep, ss->int_out_ep);
+                       ss->iso_out_ep);
        VDBG(cdev, "%s disabled\n", ss->function.name);
 }
 
@@ -900,62 +674,6 @@ enable_source_sink(struct usb_composite_dev *cdev, struct f_sourcesink *ss,
        int                                     speed = cdev->gadget->speed;
        struct usb_ep                           *ep;
 
-       if (alt == 2) {
-               /* Configure for periodic interrupt endpoint */
-               ep = ss->int_in_ep;
-               if (ep) {
-                       result = config_ep_by_speed(cdev->gadget,
-                                       &(ss->function), ep);
-                       if (result)
-                               return result;
-
-                       result = usb_ep_enable(ep);
-                       if (result < 0)
-                               return result;
-
-                       ep->driver_data = ss;
-                       result = source_sink_start_ep(ss, true, EP_INTERRUPT,
-                                       speed);
-                       if (result < 0) {
-fail1:
-                               ep = ss->int_in_ep;
-                               if (ep) {
-                                       usb_ep_disable(ep);
-                                       ep->driver_data = NULL;
-                               }
-                               return result;
-                       }
-               }
-
-               /*
-                * one interrupt endpoint reads (sinks) anything OUT (from the
-                * host)
-                */
-               ep = ss->int_out_ep;
-               if (ep) {
-                       result = config_ep_by_speed(cdev->gadget,
-                                       &(ss->function), ep);
-                       if (result)
-                               goto fail1;
-
-                       result = usb_ep_enable(ep);
-                       if (result < 0)
-                               goto fail1;
-
-                       ep->driver_data = ss;
-                       result = source_sink_start_ep(ss, false, EP_INTERRUPT,
-                                       speed);
-                       if (result < 0) {
-                               ep = ss->int_out_ep;
-                               usb_ep_disable(ep);
-                               ep->driver_data = NULL;
-                               goto fail1;
-                       }
-               }
-
-               goto out;
-       }
-
        /* one bulk endpoint writes (sources) zeroes IN (to the host) */
        ep = ss->in_ep;
        result = config_ep_by_speed(cdev->gadget, &(ss->function), ep);
@@ -966,7 +684,7 @@ fail1:
                return result;
        ep->driver_data = ss;
 
-       result = source_sink_start_ep(ss, true, EP_BULK, speed);
+       result = source_sink_start_ep(ss, true, false, speed);
        if (result < 0) {
 fail:
                ep = ss->in_ep;
@@ -985,7 +703,7 @@ fail:
                goto fail;
        ep->driver_data = ss;
 
-       result = source_sink_start_ep(ss, false, EP_BULK, speed);
+       result = source_sink_start_ep(ss, false, false, speed);
        if (result < 0) {
 fail2:
                ep = ss->out_ep;
@@ -1008,7 +726,7 @@ fail2:
                        goto fail2;
                ep->driver_data = ss;
 
-               result = source_sink_start_ep(ss, true, EP_ISOC, speed);
+               result = source_sink_start_ep(ss, true, true, speed);
                if (result < 0) {
 fail3:
                        ep = ss->iso_in_ep;
@@ -1031,14 +749,13 @@ fail3:
                        goto fail3;
                ep->driver_data = ss;
 
-               result = source_sink_start_ep(ss, false, EP_ISOC, speed);
+               result = source_sink_start_ep(ss, false, true, speed);
                if (result < 0) {
                        usb_ep_disable(ep);
                        ep->driver_data = NULL;
                        goto fail3;
                }
        }
-
 out:
        ss->cur_alt = alt;
 
@@ -1054,8 +771,6 @@ static int sourcesink_set_alt(struct usb_function *f,
 
        if (ss->in_ep->driver_data)
                disable_source_sink(ss);
-       else if (alt == 2 && ss->int_in_ep->driver_data)
-               disable_source_sink(ss);
        return enable_source_sink(cdev, ss, alt);
 }
 
@@ -1168,10 +883,6 @@ static struct usb_function *source_sink_alloc_func(
        isoc_maxpacket = ss_opts->isoc_maxpacket;
        isoc_mult = ss_opts->isoc_mult;
        isoc_maxburst = ss_opts->isoc_maxburst;
-       int_interval = ss_opts->int_interval;
-       int_maxpacket = ss_opts->int_maxpacket;
-       int_mult = ss_opts->int_mult;
-       int_maxburst = ss_opts->int_maxburst;
        buflen = ss_opts->bulk_buflen;
 
        ss->function.name = "source/sink";
@@ -1468,182 +1179,6 @@ static struct f_ss_opts_attribute f_ss_opts_bulk_buflen =
                        f_ss_opts_bulk_buflen_show,
                        f_ss_opts_bulk_buflen_store);
 
-static ssize_t f_ss_opts_int_interval_show(struct f_ss_opts *opts, char *page)
-{
-       int result;
-
-       mutex_lock(&opts->lock);
-       result = sprintf(page, "%u", opts->int_interval);
-       mutex_unlock(&opts->lock);
-
-       return result;
-}
-
-static ssize_t f_ss_opts_int_interval_store(struct f_ss_opts *opts,
-                                      const char *page, size_t len)
-{
-       int ret;
-       u32 num;
-
-       mutex_lock(&opts->lock);
-       if (opts->refcnt) {
-               ret = -EBUSY;
-               goto end;
-       }
-
-       ret = kstrtou32(page, 0, &num);
-       if (ret)
-               goto end;
-
-       if (num > 4096) {
-               ret = -EINVAL;
-               goto end;
-       }
-
-       opts->int_interval = num;
-       ret = len;
-end:
-       mutex_unlock(&opts->lock);
-       return ret;
-}
-
-static struct f_ss_opts_attribute f_ss_opts_int_interval =
-       __CONFIGFS_ATTR(int_interval, S_IRUGO | S_IWUSR,
-                       f_ss_opts_int_interval_show,
-                       f_ss_opts_int_interval_store);
-
-static ssize_t f_ss_opts_int_maxpacket_show(struct f_ss_opts *opts, char *page)
-{
-       int result;
-
-       mutex_lock(&opts->lock);
-       result = sprintf(page, "%u", opts->int_maxpacket);
-       mutex_unlock(&opts->lock);
-
-       return result;
-}
-
-static ssize_t f_ss_opts_int_maxpacket_store(struct f_ss_opts *opts,
-                                      const char *page, size_t len)
-{
-       int ret;
-       u16 num;
-
-       mutex_lock(&opts->lock);
-       if (opts->refcnt) {
-               ret = -EBUSY;
-               goto end;
-       }
-
-       ret = kstrtou16(page, 0, &num);
-       if (ret)
-               goto end;
-
-       if (num > 1024) {
-               ret = -EINVAL;
-               goto end;
-       }
-
-       opts->int_maxpacket = num;
-       ret = len;
-end:
-       mutex_unlock(&opts->lock);
-       return ret;
-}
-
-static struct f_ss_opts_attribute f_ss_opts_int_maxpacket =
-       __CONFIGFS_ATTR(int_maxpacket, S_IRUGO | S_IWUSR,
-                       f_ss_opts_int_maxpacket_show,
-                       f_ss_opts_int_maxpacket_store);
-
-static ssize_t f_ss_opts_int_mult_show(struct f_ss_opts *opts, char *page)
-{
-       int result;
-
-       mutex_lock(&opts->lock);
-       result = sprintf(page, "%u", opts->int_mult);
-       mutex_unlock(&opts->lock);
-
-       return result;
-}
-
-static ssize_t f_ss_opts_int_mult_store(struct f_ss_opts *opts,
-                                      const char *page, size_t len)
-{
-       int ret;
-       u8 num;
-
-       mutex_lock(&opts->lock);
-       if (opts->refcnt) {
-               ret = -EBUSY;
-               goto end;
-       }
-
-       ret = kstrtou8(page, 0, &num);
-       if (ret)
-               goto end;
-
-       if (num > 2) {
-               ret = -EINVAL;
-               goto end;
-       }
-
-       opts->int_mult = num;
-       ret = len;
-end:
-       mutex_unlock(&opts->lock);
-       return ret;
-}
-
-static struct f_ss_opts_attribute f_ss_opts_int_mult =
-       __CONFIGFS_ATTR(int_mult, S_IRUGO | S_IWUSR,
-                       f_ss_opts_int_mult_show,
-                       f_ss_opts_int_mult_store);
-
-static ssize_t f_ss_opts_int_maxburst_show(struct f_ss_opts *opts, char *page)
-{
-       int result;
-
-       mutex_lock(&opts->lock);
-       result = sprintf(page, "%u", opts->int_maxburst);
-       mutex_unlock(&opts->lock);
-
-       return result;
-}
-
-static ssize_t f_ss_opts_int_maxburst_store(struct f_ss_opts *opts,
-                                      const char *page, size_t len)
-{
-       int ret;
-       u8 num;
-
-       mutex_lock(&opts->lock);
-       if (opts->refcnt) {
-               ret = -EBUSY;
-               goto end;
-       }
-
-       ret = kstrtou8(page, 0, &num);
-       if (ret)
-               goto end;
-
-       if (num > 15) {
-               ret = -EINVAL;
-               goto end;
-       }
-
-       opts->int_maxburst = num;
-       ret = len;
-end:
-       mutex_unlock(&opts->lock);
-       return ret;
-}
-
-static struct f_ss_opts_attribute f_ss_opts_int_maxburst =
-       __CONFIGFS_ATTR(int_maxburst, S_IRUGO | S_IWUSR,
-                       f_ss_opts_int_maxburst_show,
-                       f_ss_opts_int_maxburst_store);
-
 static struct configfs_attribute *ss_attrs[] = {
        &f_ss_opts_pattern.attr,
        &f_ss_opts_isoc_interval.attr,
@@ -1651,10 +1186,6 @@ static struct configfs_attribute *ss_attrs[] = {
        &f_ss_opts_isoc_mult.attr,
        &f_ss_opts_isoc_maxburst.attr,
        &f_ss_opts_bulk_buflen.attr,
-       &f_ss_opts_int_interval.attr,
-       &f_ss_opts_int_maxpacket.attr,
-       &f_ss_opts_int_mult.attr,
-       &f_ss_opts_int_maxburst.attr,
        NULL,
 };
 
@@ -1684,8 +1215,6 @@ static struct usb_function_instance *source_sink_alloc_inst(void)
        ss_opts->isoc_interval = GZERO_ISOC_INTERVAL;
        ss_opts->isoc_maxpacket = GZERO_ISOC_MAXPACKET;
        ss_opts->bulk_buflen = GZERO_BULK_BUFLEN;
-       ss_opts->int_interval = GZERO_INT_INTERVAL;
-       ss_opts->int_maxpacket = GZERO_INT_MAXPACKET;
 
        config_group_init_type_name(&ss_opts->func_inst.group, "",
                                    &ss_func_type);
index 33e1665..6d3eb8b 100644 (file)
@@ -54,7 +54,7 @@
 #define UNFLW_CTRL     8
 #define OVFLW_CTRL     10
 
-const char *uac2_name = "snd_uac2";
+static const char *uac2_name = "snd_uac2";
 
 struct uac2_req {
        struct uac2_rtd_params *pp; /* parent param */
@@ -634,7 +634,7 @@ static struct usb_interface_descriptor std_ac_if_desc = {
 };
 
 /* Clock source for IN traffic */
-struct uac_clock_source_descriptor in_clk_src_desc = {
+static struct uac_clock_source_descriptor in_clk_src_desc = {
        .bLength = sizeof in_clk_src_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
 
@@ -646,7 +646,7 @@ struct uac_clock_source_descriptor in_clk_src_desc = {
 };
 
 /* Clock source for OUT traffic */
-struct uac_clock_source_descriptor out_clk_src_desc = {
+static struct uac_clock_source_descriptor out_clk_src_desc = {
        .bLength = sizeof out_clk_src_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
 
@@ -658,7 +658,7 @@ struct uac_clock_source_descriptor out_clk_src_desc = {
 };
 
 /* Input Terminal for USB_OUT */
-struct uac2_input_terminal_descriptor usb_out_it_desc = {
+static struct uac2_input_terminal_descriptor usb_out_it_desc = {
        .bLength = sizeof usb_out_it_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
 
@@ -672,7 +672,7 @@ struct uac2_input_terminal_descriptor usb_out_it_desc = {
 };
 
 /* Input Terminal for I/O-In */
-struct uac2_input_terminal_descriptor io_in_it_desc = {
+static struct uac2_input_terminal_descriptor io_in_it_desc = {
        .bLength = sizeof io_in_it_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
 
@@ -686,7 +686,7 @@ struct uac2_input_terminal_descriptor io_in_it_desc = {
 };
 
 /* Ouput Terminal for USB_IN */
-struct uac2_output_terminal_descriptor usb_in_ot_desc = {
+static struct uac2_output_terminal_descriptor usb_in_ot_desc = {
        .bLength = sizeof usb_in_ot_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
 
@@ -700,7 +700,7 @@ struct uac2_output_terminal_descriptor usb_in_ot_desc = {
 };
 
 /* Ouput Terminal for I/O-Out */
-struct uac2_output_terminal_descriptor io_out_ot_desc = {
+static struct uac2_output_terminal_descriptor io_out_ot_desc = {
        .bLength = sizeof io_out_ot_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
 
@@ -713,7 +713,7 @@ struct uac2_output_terminal_descriptor io_out_ot_desc = {
        .bmControls = (CONTROL_RDWR << COPY_CTRL),
 };
 
-struct uac2_ac_header_descriptor ac_hdr_desc = {
+static struct uac2_ac_header_descriptor ac_hdr_desc = {
        .bLength = sizeof ac_hdr_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
 
@@ -751,7 +751,7 @@ static struct usb_interface_descriptor std_as_out_if1_desc = {
 };
 
 /* Audio Stream OUT Intface Desc */
-struct uac2_as_header_descriptor as_out_hdr_desc = {
+static struct uac2_as_header_descriptor as_out_hdr_desc = {
        .bLength = sizeof as_out_hdr_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
 
@@ -764,7 +764,7 @@ struct uac2_as_header_descriptor as_out_hdr_desc = {
 };
 
 /* Audio USB_OUT Format */
-struct uac2_format_type_i_descriptor as_out_fmt1_desc = {
+static struct uac2_format_type_i_descriptor as_out_fmt1_desc = {
        .bLength = sizeof as_out_fmt1_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
        .bDescriptorSubtype = UAC_FORMAT_TYPE,
@@ -772,7 +772,7 @@ struct uac2_format_type_i_descriptor as_out_fmt1_desc = {
 };
 
 /* STD AS ISO OUT Endpoint */
-struct usb_endpoint_descriptor fs_epout_desc = {
+static struct usb_endpoint_descriptor fs_epout_desc = {
        .bLength = USB_DT_ENDPOINT_SIZE,
        .bDescriptorType = USB_DT_ENDPOINT,
 
@@ -782,7 +782,7 @@ struct usb_endpoint_descriptor fs_epout_desc = {
        .bInterval = 1,
 };
 
-struct usb_endpoint_descriptor hs_epout_desc = {
+static struct usb_endpoint_descriptor hs_epout_desc = {
        .bLength = USB_DT_ENDPOINT_SIZE,
        .bDescriptorType = USB_DT_ENDPOINT,
 
@@ -828,7 +828,7 @@ static struct usb_interface_descriptor std_as_in_if1_desc = {
 };
 
 /* Audio Stream IN Intface Desc */
-struct uac2_as_header_descriptor as_in_hdr_desc = {
+static struct uac2_as_header_descriptor as_in_hdr_desc = {
        .bLength = sizeof as_in_hdr_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
 
@@ -841,7 +841,7 @@ struct uac2_as_header_descriptor as_in_hdr_desc = {
 };
 
 /* Audio USB_IN Format */
-struct uac2_format_type_i_descriptor as_in_fmt1_desc = {
+static struct uac2_format_type_i_descriptor as_in_fmt1_desc = {
        .bLength = sizeof as_in_fmt1_desc,
        .bDescriptorType = USB_DT_CS_INTERFACE,
        .bDescriptorSubtype = UAC_FORMAT_TYPE,
@@ -849,7 +849,7 @@ struct uac2_format_type_i_descriptor as_in_fmt1_desc = {
 };
 
 /* STD AS ISO IN Endpoint */
-struct usb_endpoint_descriptor fs_epin_desc = {
+static struct usb_endpoint_descriptor fs_epin_desc = {
        .bLength = USB_DT_ENDPOINT_SIZE,
        .bDescriptorType = USB_DT_ENDPOINT,
 
@@ -859,7 +859,7 @@ struct usb_endpoint_descriptor fs_epin_desc = {
        .bInterval = 1,
 };
 
-struct usb_endpoint_descriptor hs_epin_desc = {
+static struct usb_endpoint_descriptor hs_epin_desc = {
        .bLength = USB_DT_ENDPOINT_SIZE,
        .bDescriptorType = USB_DT_ENDPOINT,
 
@@ -1563,7 +1563,7 @@ static void afunc_unbind(struct usb_configuration *c, struct usb_function *f)
                agdev->out_ep->driver_data = NULL;
 }
 
-struct usb_function *afunc_alloc(struct usb_function_instance *fi)
+static struct usb_function *afunc_alloc(struct usb_function_instance *fi)
 {
        struct audio_dev *agdev;
        struct f_uac2_opts *opts;
index 2ce28b9..15f1809 100644 (file)
@@ -10,8 +10,6 @@
 #define GZERO_QLEN             32
 #define GZERO_ISOC_INTERVAL    4
 #define GZERO_ISOC_MAXPACKET   1024
-#define GZERO_INT_INTERVAL     1 /* Default interrupt interval = 1 ms */
-#define GZERO_INT_MAXPACKET    1024
 
 struct usb_zero_options {
        unsigned pattern;
@@ -19,10 +17,6 @@ struct usb_zero_options {
        unsigned isoc_maxpacket;
        unsigned isoc_mult;
        unsigned isoc_maxburst;
-       unsigned int_interval; /* In ms */
-       unsigned int_maxpacket;
-       unsigned int_mult;
-       unsigned int_maxburst;
        unsigned bulk_buflen;
        unsigned qlen;
 };
@@ -34,10 +28,6 @@ struct f_ss_opts {
        unsigned isoc_maxpacket;
        unsigned isoc_mult;
        unsigned isoc_maxburst;
-       unsigned int_interval; /* In ms */
-       unsigned int_maxpacket;
-       unsigned int_mult;
-       unsigned int_maxburst;
        unsigned bulk_buflen;
 
        /*
@@ -72,7 +62,6 @@ int lb_modinit(void);
 void free_ep_req(struct usb_ep *ep, struct usb_request *req);
 void disable_endpoints(struct usb_composite_dev *cdev,
                struct usb_ep *in, struct usb_ep *out,
-               struct usb_ep *iso_in, struct usb_ep *iso_out,
-               struct usb_ep *int_in, struct usb_ep *int_out);
+               struct usb_ep *iso_in, struct usb_ep *iso_out);
 
 #endif /* __G_ZERO_H */
index 5aad7fe..8b818fd 100644 (file)
@@ -27,6 +27,7 @@
 #include "uvc.h"
 #include "uvc_queue.h"
 #include "uvc_video.h"
+#include "uvc_v4l2.h"
 
 /* --------------------------------------------------------------------------
  * Requests handling
index 9cb86bc..50a5e63 100644 (file)
@@ -21,6 +21,7 @@
 
 #include "uvc.h"
 #include "uvc_queue.h"
+#include "uvc_video.h"
 
 /* --------------------------------------------------------------------------
  * Video codecs
index 06acfa5..b01b88e 100644 (file)
@@ -133,7 +133,9 @@ struct gfs_configuration {
        struct usb_configuration c;
        int (*eth)(struct usb_configuration *c);
        int num;
-} gfs_configurations[] = {
+};
+
+static struct gfs_configuration gfs_configurations[] = {
 #ifdef CONFIG_USB_FUNCTIONFS_RNDIS
        {
                .eth            = bind_rndis_config,
@@ -278,7 +280,7 @@ static void *functionfs_acquire_dev(struct ffs_dev *dev)
        if (!try_module_get(THIS_MODULE))
                return ERR_PTR(-ENOENT);
        
-       return 0;
+       return NULL;
 }
 
 static void functionfs_release_dev(struct ffs_dev *dev)
index db49ec4..200f9a5 100644 (file)
@@ -74,6 +74,8 @@ MODULE_DESCRIPTION (DRIVER_DESC);
 MODULE_AUTHOR ("David Brownell");
 MODULE_LICENSE ("GPL");
 
+static int ep_open(struct inode *, struct file *);
+
 
 /*----------------------------------------------------------------------*/
 
@@ -283,14 +285,15 @@ static void epio_complete (struct usb_ep *ep, struct usb_request *req)
  * still need dev->lock to use epdata->ep.
  */
 static int
-get_ready_ep (unsigned f_flags, struct ep_data *epdata)
+get_ready_ep (unsigned f_flags, struct ep_data *epdata, bool is_write)
 {
        int     val;
 
        if (f_flags & O_NONBLOCK) {
                if (!mutex_trylock(&epdata->lock))
                        goto nonblock;
-               if (epdata->state != STATE_EP_ENABLED) {
+               if (epdata->state != STATE_EP_ENABLED &&
+                   (!is_write || epdata->state != STATE_EP_READY)) {
                        mutex_unlock(&epdata->lock);
 nonblock:
                        val = -EAGAIN;
@@ -305,18 +308,20 @@ nonblock:
 
        switch (epdata->state) {
        case STATE_EP_ENABLED:
+               return 0;
+       case STATE_EP_READY:                    /* not configured yet */
+               if (is_write)
+                       return 0;
+               // FALLTHRU
+       case STATE_EP_UNBOUND:                  /* clean disconnect */
                break;
        // case STATE_EP_DISABLED:              /* "can't happen" */
-       // case STATE_EP_READY:                 /* "can't happen" */
        default:                                /* error! */
                pr_debug ("%s: ep %p not available, state %d\n",
                                shortname, epdata, epdata->state);
-               // FALLTHROUGH
-       case STATE_EP_UNBOUND:                  /* clean disconnect */
-               val = -ENODEV;
-               mutex_unlock(&epdata->lock);
        }
-       return val;
+       mutex_unlock(&epdata->lock);
+       return -ENODEV;
 }
 
 static ssize_t
@@ -363,97 +368,6 @@ ep_io (struct ep_data *epdata, void *buf, unsigned len)
        return value;
 }
 
-
-/* handle a synchronous OUT bulk/intr/iso transfer */
-static ssize_t
-ep_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
-{
-       struct ep_data          *data = fd->private_data;
-       void                    *kbuf;
-       ssize_t                 value;
-
-       if ((value = get_ready_ep (fd->f_flags, data)) < 0)
-               return value;
-
-       /* halt any endpoint by doing a "wrong direction" i/o call */
-       if (usb_endpoint_dir_in(&data->desc)) {
-               if (usb_endpoint_xfer_isoc(&data->desc)) {
-                       mutex_unlock(&data->lock);
-                       return -EINVAL;
-               }
-               DBG (data->dev, "%s halt\n", data->name);
-               spin_lock_irq (&data->dev->lock);
-               if (likely (data->ep != NULL))
-                       usb_ep_set_halt (data->ep);
-               spin_unlock_irq (&data->dev->lock);
-               mutex_unlock(&data->lock);
-               return -EBADMSG;
-       }
-
-       /* FIXME readahead for O_NONBLOCK and poll(); careful with ZLPs */
-
-       value = -ENOMEM;
-       kbuf = kmalloc (len, GFP_KERNEL);
-       if (unlikely (!kbuf))
-               goto free1;
-
-       value = ep_io (data, kbuf, len);
-       VDEBUG (data->dev, "%s read %zu OUT, status %d\n",
-               data->name, len, (int) value);
-       if (value >= 0 && copy_to_user (buf, kbuf, value))
-               value = -EFAULT;
-
-free1:
-       mutex_unlock(&data->lock);
-       kfree (kbuf);
-       return value;
-}
-
-/* handle a synchronous IN bulk/intr/iso transfer */
-static ssize_t
-ep_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
-{
-       struct ep_data          *data = fd->private_data;
-       void                    *kbuf;
-       ssize_t                 value;
-
-       if ((value = get_ready_ep (fd->f_flags, data)) < 0)
-               return value;
-
-       /* halt any endpoint by doing a "wrong direction" i/o call */
-       if (!usb_endpoint_dir_in(&data->desc)) {
-               if (usb_endpoint_xfer_isoc(&data->desc)) {
-                       mutex_unlock(&data->lock);
-                       return -EINVAL;
-               }
-               DBG (data->dev, "%s halt\n", data->name);
-               spin_lock_irq (&data->dev->lock);
-               if (likely (data->ep != NULL))
-                       usb_ep_set_halt (data->ep);
-               spin_unlock_irq (&data->dev->lock);
-               mutex_unlock(&data->lock);
-               return -EBADMSG;
-       }
-
-       /* FIXME writebehind for O_NONBLOCK and poll(), qlen = 1 */
-
-       value = -ENOMEM;
-       kbuf = memdup_user(buf, len);
-       if (IS_ERR(kbuf)) {
-               value = PTR_ERR(kbuf);
-               kbuf = NULL;
-               goto free1;
-       }
-
-       value = ep_io (data, kbuf, len);
-       VDEBUG (data->dev, "%s write %zu IN, status %d\n",
-               data->name, len, (int) value);
-free1:
-       mutex_unlock(&data->lock);
-       kfree (kbuf);
-       return value;
-}
-
 static int
 ep_release (struct inode *inode, struct file *fd)
 {
@@ -481,7 +395,7 @@ static long ep_ioctl(struct file *fd, unsigned code, unsigned long value)
        struct ep_data          *data = fd->private_data;
        int                     status;
 
-       if ((status = get_ready_ep (fd->f_flags, data)) < 0)
+       if ((status = get_ready_ep (fd->f_flags, data, false)) < 0)
                return status;
 
        spin_lock_irq (&data->dev->lock);
@@ -517,8 +431,8 @@ struct kiocb_priv {
        struct mm_struct        *mm;
        struct work_struct      work;
        void                    *buf;
-       const struct iovec      *iv;
-       unsigned long           nr_segs;
+       struct iov_iter         to;
+       const void              *to_free;
        unsigned                actual;
 };
 
@@ -541,35 +455,6 @@ static int ep_aio_cancel(struct kiocb *iocb)
        return value;
 }
 
-static ssize_t ep_copy_to_user(struct kiocb_priv *priv)
-{
-       ssize_t                 len, total;
-       void                    *to_copy;
-       int                     i;
-
-       /* copy stuff into user buffers */
-       total = priv->actual;
-       len = 0;
-       to_copy = priv->buf;
-       for (i=0; i < priv->nr_segs; i++) {
-               ssize_t this = min((ssize_t)(priv->iv[i].iov_len), total);
-
-               if (copy_to_user(priv->iv[i].iov_base, to_copy, this)) {
-                       if (len == 0)
-                               len = -EFAULT;
-                       break;
-               }
-
-               total -= this;
-               len += this;
-               to_copy += this;
-               if (total == 0)
-                       break;
-       }
-
-       return len;
-}
-
 static void ep_user_copy_worker(struct work_struct *work)
 {
        struct kiocb_priv *priv = container_of(work, struct kiocb_priv, work);
@@ -578,13 +463,16 @@ static void ep_user_copy_worker(struct work_struct *work)
        size_t ret;
 
        use_mm(mm);
-       ret = ep_copy_to_user(priv);
+       ret = copy_to_iter(priv->buf, priv->actual, &priv->to);
        unuse_mm(mm);
+       if (!ret)
+               ret = -EFAULT;
 
        /* completing the iocb can drop the ctx and mm, don't touch mm after */
        aio_complete(iocb, ret, ret);
 
        kfree(priv->buf);
+       kfree(priv->to_free);
        kfree(priv);
 }
 
@@ -603,8 +491,9 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
         * don't need to copy anything to userspace, so we can
         * complete the aio request immediately.
         */
-       if (priv->iv == NULL || unlikely(req->actual == 0)) {
+       if (priv->to_free == NULL || unlikely(req->actual == 0)) {
                kfree(req->buf);
+               kfree(priv->to_free);
                kfree(priv);
                iocb->private = NULL;
                /* aio_complete() reports bytes-transferred _and_ faults */
@@ -618,6 +507,7 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
 
                priv->buf = req->buf;
                priv->actual = req->actual;
+               INIT_WORK(&priv->work, ep_user_copy_worker);
                schedule_work(&priv->work);
        }
        spin_unlock(&epdata->dev->lock);
@@ -626,38 +516,17 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
        put_ep(epdata);
 }
 
-static ssize_t
-ep_aio_rwtail(
-       struct kiocb    *iocb,
-       char            *buf,
-       size_t          len,
-       struct ep_data  *epdata,
-       const struct iovec *iv,
-       unsigned long   nr_segs
-)
+static ssize_t ep_aio(struct kiocb *iocb,
+                     struct kiocb_priv *priv,
+                     struct ep_data *epdata,
+                     char *buf,
+                     size_t len)
 {
-       struct kiocb_priv       *priv;
-       struct usb_request      *req;
-       ssize_t                 value;
+       struct usb_request *req;
+       ssize_t value;
 
-       priv = kmalloc(sizeof *priv, GFP_KERNEL);
-       if (!priv) {
-               value = -ENOMEM;
-fail:
-               kfree(buf);
-               return value;
-       }
        iocb->private = priv;
        priv->iocb = iocb;
-       priv->iv = iv;
-       priv->nr_segs = nr_segs;
-       INIT_WORK(&priv->work, ep_user_copy_worker);
-
-       value = get_ready_ep(iocb->ki_filp->f_flags, epdata);
-       if (unlikely(value < 0)) {
-               kfree(priv);
-               goto fail;
-       }
 
        kiocb_set_cancel_fn(iocb, ep_aio_cancel);
        get_ep(epdata);
@@ -669,75 +538,154 @@ fail:
         * allocate or submit those if the host disconnected.
         */
        spin_lock_irq(&epdata->dev->lock);
-       if (likely(epdata->ep)) {
-               req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC);
-               if (likely(req)) {
-                       priv->req = req;
-                       req->buf = buf;
-                       req->length = len;
-                       req->complete = ep_aio_complete;
-                       req->context = iocb;
-                       value = usb_ep_queue(epdata->ep, req, GFP_ATOMIC);
-                       if (unlikely(0 != value))
-                               usb_ep_free_request(epdata->ep, req);
-               } else
-                       value = -EAGAIN;
-       } else
-               value = -ENODEV;
-       spin_unlock_irq(&epdata->dev->lock);
+       value = -ENODEV;
+       if (unlikely(epdata->ep))
+               goto fail;
 
-       mutex_unlock(&epdata->lock);
+       req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC);
+       value = -ENOMEM;
+       if (unlikely(!req))
+               goto fail;
 
-       if (unlikely(value)) {
-               kfree(priv);
-               put_ep(epdata);
-       } else
-               value = -EIOCBQUEUED;
+       priv->req = req;
+       req->buf = buf;
+       req->length = len;
+       req->complete = ep_aio_complete;
+       req->context = iocb;
+       value = usb_ep_queue(epdata->ep, req, GFP_ATOMIC);
+       if (unlikely(0 != value)) {
+               usb_ep_free_request(epdata->ep, req);
+               goto fail;
+       }
+       spin_unlock_irq(&epdata->dev->lock);
+       return -EIOCBQUEUED;
+
+fail:
+       spin_unlock_irq(&epdata->dev->lock);
+       kfree(priv->to_free);
+       kfree(priv);
+       put_ep(epdata);
        return value;
 }
 
 static ssize_t
-ep_aio_read(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t o)
+ep_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
-       struct ep_data          *epdata = iocb->ki_filp->private_data;
-       char                    *buf;
+       struct file *file = iocb->ki_filp;
+       struct ep_data *epdata = file->private_data;
+       size_t len = iov_iter_count(to);
+       ssize_t value;
+       char *buf;
 
-       if (unlikely(usb_endpoint_dir_in(&epdata->desc)))
-               return -EINVAL;
+       if ((value = get_ready_ep(file->f_flags, epdata, false)) < 0)
+               return value;
 
-       buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL);
-       if (unlikely(!buf))
-               return -ENOMEM;
+       /* halt any endpoint by doing a "wrong direction" i/o call */
+       if (usb_endpoint_dir_in(&epdata->desc)) {
+               if (usb_endpoint_xfer_isoc(&epdata->desc) ||
+                   !is_sync_kiocb(iocb)) {
+                       mutex_unlock(&epdata->lock);
+                       return -EINVAL;
+               }
+               DBG (epdata->dev, "%s halt\n", epdata->name);
+               spin_lock_irq(&epdata->dev->lock);
+               if (likely(epdata->ep != NULL))
+                       usb_ep_set_halt(epdata->ep);
+               spin_unlock_irq(&epdata->dev->lock);
+               mutex_unlock(&epdata->lock);
+               return -EBADMSG;
+       }
 
-       return ep_aio_rwtail(iocb, buf, iocb->ki_nbytes, epdata, iov, nr_segs);
+       buf = kmalloc(len, GFP_KERNEL);
+       if (unlikely(!buf)) {
+               mutex_unlock(&epdata->lock);
+               return -ENOMEM;
+       }
+       if (is_sync_kiocb(iocb)) {
+               value = ep_io(epdata, buf, len);
+               if (value >= 0 && copy_to_iter(buf, value, to))
+                       value = -EFAULT;
+       } else {
+               struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL);
+               value = -ENOMEM;
+               if (!priv)
+                       goto fail;
+               priv->to_free = dup_iter(&priv->to, to, GFP_KERNEL);
+               if (!priv->to_free) {
+                       kfree(priv);
+                       goto fail;
+               }
+               value = ep_aio(iocb, priv, epdata, buf, len);
+               if (value == -EIOCBQUEUED)
+                       buf = NULL;
+       }
+fail:
+       kfree(buf);
+       mutex_unlock(&epdata->lock);
+       return value;
 }
 
+static ssize_t ep_config(struct ep_data *, const char *, size_t);
+
 static ssize_t
-ep_aio_write(struct kiocb *iocb, const struct iovec *iov,
-               unsigned long nr_segs, loff_t o)
+ep_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
-       struct ep_data          *epdata = iocb->ki_filp->private_data;
-       char                    *buf;
-       size_t                  len = 0;
-       int                     i = 0;
+       struct file *file = iocb->ki_filp;
+       struct ep_data *epdata = file->private_data;
+       size_t len = iov_iter_count(from);
+       bool configured;
+       ssize_t value;
+       char *buf;
+
+       if ((value = get_ready_ep(file->f_flags, epdata, true)) < 0)
+               return value;
 
-       if (unlikely(!usb_endpoint_dir_in(&epdata->desc)))
-               return -EINVAL;
+       configured = epdata->state == STATE_EP_ENABLED;
 
-       buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL);
-       if (unlikely(!buf))
+       /* halt any endpoint by doing a "wrong direction" i/o call */
+       if (configured && !usb_endpoint_dir_in(&epdata->desc)) {
+               if (usb_endpoint_xfer_isoc(&epdata->desc) ||
+                   !is_sync_kiocb(iocb)) {
+                       mutex_unlock(&epdata->lock);
+                       return -EINVAL;
+               }
+               DBG (epdata->dev, "%s halt\n", epdata->name);
+               spin_lock_irq(&epdata->dev->lock);
+               if (likely(epdata->ep != NULL))
+                       usb_ep_set_halt(epdata->ep);
+               spin_unlock_irq(&epdata->dev->lock);
+               mutex_unlock(&epdata->lock);
+               return -EBADMSG;
+       }
+
+       buf = kmalloc(len, GFP_KERNEL);
+       if (unlikely(!buf)) {
+               mutex_unlock(&epdata->lock);
                return -ENOMEM;
+       }
 
-       for (i=0; i < nr_segs; i++) {
-               if (unlikely(copy_from_user(&buf[len], iov[i].iov_base,
-                               iov[i].iov_len) != 0)) {
-                       kfree(buf);
-                       return -EFAULT;
+       if (unlikely(copy_from_iter(buf, len, from) != len)) {
+               value = -EFAULT;
+               goto out;
+       }
+
+       if (unlikely(!configured)) {
+               value = ep_config(epdata, buf, len);
+       } else if (is_sync_kiocb(iocb)) {
+               value = ep_io(epdata, buf, len);
+       } else {
+               struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL);
+               value = -ENOMEM;
+               if (priv) {
+                       value = ep_aio(iocb, priv, epdata, buf, len);
+                       if (value == -EIOCBQUEUED)
+                               buf = NULL;
                }
-               len += iov[i].iov_len;
        }
-       return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0);
+out:
+       kfree(buf);
+       mutex_unlock(&epdata->lock);
+       return value;
 }
 
 /*----------------------------------------------------------------------*/
@@ -745,15 +693,15 @@ ep_aio_write(struct kiocb *iocb, const struct iovec *iov,
 /* used after endpoint configuration */
 static const struct file_operations ep_io_operations = {
        .owner =        THIS_MODULE,
-       .llseek =       no_llseek,
 
-       .read =         ep_read,
-       .write =        ep_write,
-       .unlocked_ioctl = ep_ioctl,
+       .open =         ep_open,
        .release =      ep_release,
-
-       .aio_read =     ep_aio_read,
-       .aio_write =    ep_aio_write,
+       .llseek =       no_llseek,
+       .read =         new_sync_read,
+       .write =        new_sync_write,
+       .unlocked_ioctl = ep_ioctl,
+       .read_iter =    ep_read_iter,
+       .write_iter =   ep_write_iter,
 };
 
 /* ENDPOINT INITIALIZATION
@@ -770,17 +718,12 @@ static const struct file_operations ep_io_operations = {
  * speed descriptor, then optional high speed descriptor.
  */
 static ssize_t
-ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
+ep_config (struct ep_data *data, const char *buf, size_t len)
 {
-       struct ep_data          *data = fd->private_data;
        struct usb_ep           *ep;
        u32                     tag;
        int                     value, length = len;
 
-       value = mutex_lock_interruptible(&data->lock);
-       if (value < 0)
-               return value;
-
        if (data->state != STATE_EP_READY) {
                value = -EL2HLT;
                goto fail;
@@ -791,9 +734,7 @@ ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
                goto fail0;
 
        /* we might need to change message format someday */
-       if (copy_from_user (&tag, buf, 4)) {
-               goto fail1;
-       }
+       memcpy(&tag, buf, 4);
        if (tag != 1) {
                DBG(data->dev, "config %s, bad tag %d\n", data->name, tag);
                goto fail0;
@@ -806,19 +747,15 @@ ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
         */
 
        /* full/low speed descriptor, then high speed */
-       if (copy_from_user (&data->desc, buf, USB_DT_ENDPOINT_SIZE)) {
-               goto fail1;
-       }
+       memcpy(&data->desc, buf, USB_DT_ENDPOINT_SIZE);
        if (data->desc.bLength != USB_DT_ENDPOINT_SIZE
                        || data->desc.bDescriptorType != USB_DT_ENDPOINT)
                goto fail0;
        if (len != USB_DT_ENDPOINT_SIZE) {
                if (len != 2 * USB_DT_ENDPOINT_SIZE)
                        goto fail0;
-               if (copy_from_user (&data->hs_desc, buf + USB_DT_ENDPOINT_SIZE,
-                                       USB_DT_ENDPOINT_SIZE)) {
-                       goto fail1;
-               }
+               memcpy(&data->hs_desc, buf + USB_DT_ENDPOINT_SIZE,
+                       USB_DT_ENDPOINT_SIZE);
                if (data->hs_desc.bLength != USB_DT_ENDPOINT_SIZE
                                || data->hs_desc.bDescriptorType
                                        != USB_DT_ENDPOINT) {
@@ -840,24 +777,20 @@ ep_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
        case USB_SPEED_LOW:
        case USB_SPEED_FULL:
                ep->desc = &data->desc;
-               value = usb_ep_enable(ep);
-               if (value == 0)
-                       data->state = STATE_EP_ENABLED;
                break;
        case USB_SPEED_HIGH:
                /* fails if caller didn't provide that descriptor... */
                ep->desc = &data->hs_desc;
-               value = usb_ep_enable(ep);
-               if (value == 0)
-                       data->state = STATE_EP_ENABLED;
                break;
        default:
                DBG(data->dev, "unconnected, %s init abandoned\n",
                                data->name);
                value = -EINVAL;
+               goto gone;
        }
+       value = usb_ep_enable(ep);
        if (value == 0) {
-               fd->f_op = &ep_io_operations;
+               data->state = STATE_EP_ENABLED;
                value = length;
        }
 gone:
@@ -867,14 +800,10 @@ fail:
                data->desc.bDescriptorType = 0;
                data->hs_desc.bDescriptorType = 0;
        }
-       mutex_unlock(&data->lock);
        return value;
 fail0:
        value = -EINVAL;
        goto fail;
-fail1:
-       value = -EFAULT;
-       goto fail;
 }
 
 static int
@@ -902,15 +831,6 @@ ep_open (struct inode *inode, struct file *fd)
        return value;
 }
 
-/* used before endpoint configuration */
-static const struct file_operations ep_config_operations = {
-       .llseek =       no_llseek,
-
-       .open =         ep_open,
-       .write =        ep_config,
-       .release =      ep_release,
-};
-
 /*----------------------------------------------------------------------*/
 
 /* EP0 IMPLEMENTATION can be partly in userspace.
@@ -989,6 +909,10 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
        enum ep0_state                  state;
 
        spin_lock_irq (&dev->lock);
+       if (dev->state <= STATE_DEV_OPENED) {
+               retval = -EINVAL;
+               goto done;
+       }
 
        /* report fd mode change before acting on it */
        if (dev->setup_abort) {
@@ -1187,8 +1111,6 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
        struct dev_data         *dev = fd->private_data;
        ssize_t                 retval = -ESRCH;
 
-       spin_lock_irq (&dev->lock);
-
        /* report fd mode change before acting on it */
        if (dev->setup_abort) {
                dev->setup_abort = 0;
@@ -1234,7 +1156,6 @@ ep0_write (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
        } else
                DBG (dev, "fail %s, state %d\n", __func__, dev->state);
 
-       spin_unlock_irq (&dev->lock);
        return retval;
 }
 
@@ -1281,6 +1202,9 @@ ep0_poll (struct file *fd, poll_table *wait)
        struct dev_data         *dev = fd->private_data;
        int                     mask = 0;
 
+       if (dev->state <= STATE_DEV_OPENED)
+               return DEFAULT_POLLMASK;
+
        poll_wait(fd, &dev->wait, wait);
 
        spin_lock_irq (&dev->lock);
@@ -1316,19 +1240,6 @@ static long dev_ioctl (struct file *fd, unsigned code, unsigned long value)
        return ret;
 }
 
-/* used after device configuration */
-static const struct file_operations ep0_io_operations = {
-       .owner =        THIS_MODULE,
-       .llseek =       no_llseek,
-
-       .read =         ep0_read,
-       .write =        ep0_write,
-       .fasync =       ep0_fasync,
-       .poll =         ep0_poll,
-       .unlocked_ioctl =       dev_ioctl,
-       .release =      dev_release,
-};
-
 /*----------------------------------------------------------------------*/
 
 /* The in-kernel gadget driver handles most ep0 issues, in particular
@@ -1650,7 +1561,7 @@ static int activate_ep_files (struct dev_data *dev)
                        goto enomem1;
 
                data->dentry = gadgetfs_create_file (dev->sb, data->name,
-                               data, &ep_config_operations);
+                               data, &ep_io_operations);
                if (!data->dentry)
                        goto enomem2;
                list_add_tail (&data->epfiles, &dev->epfiles);
@@ -1852,6 +1763,14 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
        u32                     tag;
        char                    *kbuf;
 
+       spin_lock_irq(&dev->lock);
+       if (dev->state > STATE_DEV_OPENED) {
+               value = ep0_write(fd, buf, len, ptr);
+               spin_unlock_irq(&dev->lock);
+               return value;
+       }
+       spin_unlock_irq(&dev->lock);
+
        if (len < (USB_DT_CONFIG_SIZE + USB_DT_DEVICE_SIZE + 4))
                return -EINVAL;
 
@@ -1925,7 +1844,6 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr)
                 * on, they can work ... except in cleanup paths that
                 * kick in after the ep0 descriptor is closed.
                 */
-               fd->f_op = &ep0_io_operations;
                value = len;
        }
        return value;
@@ -1956,12 +1874,14 @@ dev_open (struct inode *inode, struct file *fd)
        return value;
 }
 
-static const struct file_operations dev_init_operations = {
+static const struct file_operations ep0_operations = {
        .llseek =       no_llseek,
 
        .open =         dev_open,
+       .read =         ep0_read,
        .write =        dev_config,
        .fasync =       ep0_fasync,
+       .poll =         ep0_poll,
        .unlocked_ioctl = dev_ioctl,
        .release =      dev_release,
 };
@@ -2077,7 +1997,7 @@ gadgetfs_fill_super (struct super_block *sb, void *opts, int silent)
                goto Enomem;
 
        dev->sb = sb;
-       dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &dev_init_operations);
+       dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &ep0_operations);
        if (!dev->dentry) {
                put_dev(dev);
                goto Enomem;
index 3a49416..6e0a019 100644 (file)
@@ -1740,10 +1740,9 @@ static int tcm_usbg_make_nexus(struct usbg_tpg *tpg, char *name)
                goto err_session;
        }
        /*
-        * Now register the TCM vHost virtual I_T Nexus as active with the
-        * call to __transport_register_session()
+        * Now register the TCM vHost virtual I_T Nexus as active.
         */
-       __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
+       transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
                        tv_nexus->tvn_se_sess, tv_nexus);
        tpg->tpg_nexus = tv_nexus;
        mutex_unlock(&tpg->tpg_mutex);
index ff97ac9..5ee9515 100644 (file)
@@ -68,8 +68,6 @@ static struct usb_zero_options gzero_options = {
        .isoc_maxpacket = GZERO_ISOC_MAXPACKET,
        .bulk_buflen = GZERO_BULK_BUFLEN,
        .qlen = GZERO_QLEN,
-       .int_interval = GZERO_INT_INTERVAL,
-       .int_maxpacket = GZERO_INT_MAXPACKET,
 };
 
 /*-------------------------------------------------------------------------*/
@@ -268,21 +266,6 @@ module_param_named(isoc_maxburst, gzero_options.isoc_maxburst, uint,
                S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(isoc_maxburst, "0 - 15 (ss only)");
 
-module_param_named(int_interval, gzero_options.int_interval, uint,
-               S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(int_interval, "1 - 16");
-
-module_param_named(int_maxpacket, gzero_options.int_maxpacket, uint,
-               S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(int_maxpacket, "0 - 1023 (fs), 0 - 1024 (hs/ss)");
-
-module_param_named(int_mult, gzero_options.int_mult, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(int_mult, "0 - 2 (hs/ss only)");
-
-module_param_named(int_maxburst, gzero_options.int_maxburst, uint,
-               S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(int_maxburst, "0 - 15 (ss only)");
-
 static struct usb_function *func_lb;
 static struct usb_function_instance *func_inst_lb;
 
@@ -318,10 +301,6 @@ static int __init zero_bind(struct usb_composite_dev *cdev)
        ss_opts->isoc_maxpacket = gzero_options.isoc_maxpacket;
        ss_opts->isoc_mult = gzero_options.isoc_mult;
        ss_opts->isoc_maxburst = gzero_options.isoc_maxburst;
-       ss_opts->int_interval = gzero_options.int_interval;
-       ss_opts->int_maxpacket = gzero_options.int_maxpacket;
-       ss_opts->int_mult = gzero_options.int_mult;
-       ss_opts->int_maxburst = gzero_options.int_maxburst;
        ss_opts->bulk_buflen = gzero_options.bulk_buflen;
 
        func_ss = usb_get_function(func_inst_ss);
index 663f790..be0964a 100644 (file)
@@ -34,7 +34,6 @@ static const char hcd_name[] = "ehci-atmel";
 
 struct atmel_ehci_priv {
        struct clk *iclk;
-       struct clk *fclk;
        struct clk *uclk;
        bool clocked;
 };
@@ -51,12 +50,9 @@ static void atmel_start_clock(struct atmel_ehci_priv *atmel_ehci)
 {
        if (atmel_ehci->clocked)
                return;
-       if (IS_ENABLED(CONFIG_COMMON_CLK)) {
-               clk_set_rate(atmel_ehci->uclk, 48000000);
-               clk_prepare_enable(atmel_ehci->uclk);
-       }
+
+       clk_prepare_enable(atmel_ehci->uclk);
        clk_prepare_enable(atmel_ehci->iclk);
-       clk_prepare_enable(atmel_ehci->fclk);
        atmel_ehci->clocked = true;
 }
 
@@ -64,10 +60,9 @@ static void atmel_stop_clock(struct atmel_ehci_priv *atmel_ehci)
 {
        if (!atmel_ehci->clocked)
                return;
-       clk_disable_unprepare(atmel_ehci->fclk);
+
        clk_disable_unprepare(atmel_ehci->iclk);
-       if (IS_ENABLED(CONFIG_COMMON_CLK))
-               clk_disable_unprepare(atmel_ehci->uclk);
+       clk_disable_unprepare(atmel_ehci->uclk);
        atmel_ehci->clocked = false;
 }
 
@@ -146,20 +141,13 @@ static int ehci_atmel_drv_probe(struct platform_device *pdev)
                retval = -ENOENT;
                goto fail_request_resource;
        }
-       atmel_ehci->fclk = devm_clk_get(&pdev->dev, "uhpck");
-       if (IS_ERR(atmel_ehci->fclk)) {
-               dev_err(&pdev->dev, "Error getting function clock\n");
-               retval = -ENOENT;
+
+       atmel_ehci->uclk = devm_clk_get(&pdev->dev, "usb_clk");
+       if (IS_ERR(atmel_ehci->uclk)) {
+               dev_err(&pdev->dev, "failed to get uclk\n");
+               retval = PTR_ERR(atmel_ehci->uclk);
                goto fail_request_resource;
        }
-       if (IS_ENABLED(CONFIG_COMMON_CLK)) {
-               atmel_ehci->uclk = devm_clk_get(&pdev->dev, "usb_clk");
-               if (IS_ERR(atmel_ehci->uclk)) {
-                       dev_err(&pdev->dev, "failed to get uclk\n");
-                       retval = PTR_ERR(atmel_ehci->uclk);
-                       goto fail_request_resource;
-               }
-       }
 
        ehci = hcd_to_ehci(hcd);
        /* registers start at offset 0x0 */
index a7865c4..0827d7c 100644 (file)
@@ -387,6 +387,10 @@ static void xhci_clear_port_change_bit(struct xhci_hcd *xhci, u16 wValue,
                status = PORT_PLC;
                port_change_bit = "link state";
                break;
+       case USB_PORT_FEAT_C_PORT_CONFIG_ERROR:
+               status = PORT_CEC;
+               port_change_bit = "config error";
+               break;
        default:
                /* Should never happen */
                return;
@@ -588,6 +592,8 @@ static u32 xhci_get_port_status(struct usb_hcd *hcd,
                        status |= USB_PORT_STAT_C_LINK_STATE << 16;
                if ((raw_port_status & PORT_WRC))
                        status |= USB_PORT_STAT_C_BH_RESET << 16;
+               if ((raw_port_status & PORT_CEC))
+                       status |= USB_PORT_STAT_C_CONFIG_ERROR << 16;
        }
 
        if (hcd->speed != HCD_USB3) {
@@ -1005,6 +1011,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                case USB_PORT_FEAT_C_OVER_CURRENT:
                case USB_PORT_FEAT_C_ENABLE:
                case USB_PORT_FEAT_C_PORT_LINK_STATE:
+               case USB_PORT_FEAT_C_PORT_CONFIG_ERROR:
                        xhci_clear_port_change_bit(xhci, wValue, wIndex,
                                        port_array[wIndex], temp);
                        break;
@@ -1069,7 +1076,7 @@ int xhci_hub_status_data(struct usb_hcd *hcd, char *buf)
         */
        status = bus_state->resuming_ports;
 
-       mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC;
+       mask = PORT_CSC | PORT_PEC | PORT_OCC | PORT_PLC | PORT_WRC | PORT_CEC;
 
        spin_lock_irqsave(&xhci->lock, flags);
        /* For each port, did anything change?  If so, set that bit in buf. */
index 7f76c8a..2af32e2 100644 (file)
@@ -37,6 +37,9 @@
 
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI     0x8c31
 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI  0x9c31
+#define PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI            0x22b5
+#define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI                0xa12f
+#define PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI       0x9d2f
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -112,6 +115,7 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
        if (pdev->vendor == PCI_VENDOR_ID_INTEL) {
                xhci->quirks |= XHCI_LPM_SUPPORT;
                xhci->quirks |= XHCI_INTEL_HOST;
+               xhci->quirks |= XHCI_AVOID_BEI;
        }
        if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
                        pdev->device == PCI_DEVICE_ID_INTEL_PANTHERPOINT_XHCI) {
@@ -127,12 +131,17 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
                 * PPT chipsets.
                 */
                xhci->quirks |= XHCI_SPURIOUS_REBOOT;
-               xhci->quirks |= XHCI_AVOID_BEI;
        }
        if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
                pdev->device == PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI) {
                xhci->quirks |= XHCI_SPURIOUS_REBOOT;
        }
+       if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
+               (pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_LP_XHCI ||
+                pdev->device == PCI_DEVICE_ID_INTEL_SUNRISEPOINT_H_XHCI ||
+                pdev->device == PCI_DEVICE_ID_INTEL_CHERRYVIEW_XHCI)) {
+               xhci->quirks |= XHCI_PME_STUCK_QUIRK;
+       }
        if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
                        pdev->device == PCI_DEVICE_ID_EJ168) {
                xhci->quirks |= XHCI_RESET_ON_RESUME;
@@ -159,6 +168,21 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
                                "QUIRK: Resetting on resume");
 }
 
+/*
+ * Make sure PME works on some Intel xHCI controllers by writing 1 to clear
+ * the Internal PME flag bit in vendor specific PMCTRL register at offset 0x80a4
+ */
+static void xhci_pme_quirk(struct xhci_hcd *xhci)
+{
+       u32 val;
+       void __iomem *reg;
+
+       reg = (void __iomem *) xhci->cap_regs + 0x80a4;
+       val = readl(reg);
+       writel(val | BIT(28), reg);
+       readl(reg);
+}
+
 /* called during probe() after chip reset completes */
 static int xhci_pci_setup(struct usb_hcd *hcd)
 {
@@ -283,6 +307,9 @@ static int xhci_pci_suspend(struct usb_hcd *hcd, bool do_wakeup)
        if (xhci->quirks & XHCI_COMP_MODE_QUIRK)
                pdev->no_d3cold = true;
 
+       if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
+               xhci_pme_quirk(xhci);
+
        return xhci_suspend(xhci, do_wakeup);
 }
 
@@ -313,6 +340,9 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
        if (pdev->vendor == PCI_VENDOR_ID_INTEL)
                usb_enable_intel_xhci_ports(pdev);
 
+       if (xhci->quirks & XHCI_PME_STUCK_QUIRK)
+               xhci_pme_quirk(xhci);
+
        retval = xhci_resume(xhci, hibernated);
        return retval;
 }
index 08d402b..0e11d61 100644 (file)
@@ -83,16 +83,6 @@ static int xhci_plat_probe(struct platform_device *pdev)
        if (irq < 0)
                return -ENODEV;
 
-
-       if (of_device_is_compatible(pdev->dev.of_node,
-                                   "marvell,armada-375-xhci") ||
-           of_device_is_compatible(pdev->dev.of_node,
-                                   "marvell,armada-380-xhci")) {
-               ret = xhci_mvebu_mbus_init_quirk(pdev);
-               if (ret)
-                       return ret;
-       }
-
        /* Initialize dma_mask and coherent_dma_mask to 32-bits */
        ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
        if (ret)
@@ -127,6 +117,15 @@ static int xhci_plat_probe(struct platform_device *pdev)
                        goto put_hcd;
        }
 
+       if (of_device_is_compatible(pdev->dev.of_node,
+                                   "marvell,armada-375-xhci") ||
+           of_device_is_compatible(pdev->dev.of_node,
+                                   "marvell,armada-380-xhci")) {
+               ret = xhci_mvebu_mbus_init_quirk(pdev);
+               if (ret)
+                       goto disable_clk;
+       }
+
        ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
        if (ret)
                goto disable_clk;
index 88da8d6..73485fa 100644 (file)
@@ -1946,7 +1946,7 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
        if (event_trb != ep_ring->dequeue) {
                /* The event was for the status stage */
                if (event_trb == td->last_trb) {
-                       if (td->urb->actual_length != 0) {
+                       if (td->urb_length_set) {
                                /* Don't overwrite a previously set error code
                                 */
                                if ((*status == -EINPROGRESS || *status == 0) &&
@@ -1960,7 +1960,13 @@ static int process_ctrl_td(struct xhci_hcd *xhci, struct xhci_td *td,
                                        td->urb->transfer_buffer_length;
                        }
                } else {
-               /* Maybe the event was for the data stage? */
+                       /*
+                        * Maybe the event was for the data stage? If so, update
+                        * already the actual_length of the URB and flag it as
+                        * set, so that it is not overwritten in the event for
+                        * the last TRB.
+                        */
+                       td->urb_length_set = true;
                        td->urb->actual_length =
                                td->urb->transfer_buffer_length -
                                EVENT_TRB_LEN(le32_to_cpu(event->transfer_len));
index 9745147..8e421b8 100644 (file)
@@ -1,3 +1,4 @@
+
 /*
  * xHCI host controller driver
  *
@@ -88,9 +89,10 @@ struct xhci_cap_regs {
 #define HCS_IST(p)             (((p) >> 0) & 0xf)
 /* bits 4:7, max number of Event Ring segments */
 #define HCS_ERST_MAX(p)                (((p) >> 4) & 0xf)
+/* bits 21:25 Hi 5 bits of Scratchpad buffers SW must allocate for the HW */
 /* bit 26 Scratchpad restore - for save/restore HW state - not used yet */
-/* bits 27:31 number of Scratchpad buffers SW must allocate for the HW */
-#define HCS_MAX_SCRATCHPAD(p)   (((p) >> 27) & 0x1f)
+/* bits 27:31 Lo 5 bits of Scratchpad buffers SW must allocate for the HW */
+#define HCS_MAX_SCRATCHPAD(p)   ((((p) >> 16) & 0x3e0) | (((p) >> 27) & 0x1f))
 
 /* HCSPARAMS3 - hcs_params3 - bitmasks */
 /* bits 0:7, Max U1 to U0 latency for the roothub ports */
@@ -1288,6 +1290,8 @@ struct xhci_td {
        struct xhci_segment     *start_seg;
        union xhci_trb          *first_trb;
        union xhci_trb          *last_trb;
+       /* actual_length of the URB has already been set */
+       bool                    urb_length_set;
 };
 
 /* xHCI command default timeout value */
@@ -1560,6 +1564,7 @@ struct xhci_hcd {
 #define XHCI_SPURIOUS_WAKEUP   (1 << 18)
 /* For controllers with a broken beyond repair streams implementation */
 #define XHCI_BROKEN_STREAMS    (1 << 19)
+#define XHCI_PME_STUCK_QUIRK   (1 << 20)
        unsigned int            num_active_eps;
        unsigned int            limit_active_eps;
        /* There are two roothubs to keep track of bus suspend info for */
index b982755..bfa402c 100644 (file)
@@ -151,8 +151,7 @@ int isp1760_register(struct resource *mem, int irq, unsigned long irqflags,
        }
 
        if (IS_ENABLED(CONFIG_USB_ISP1761_UDC) && !udc_disabled) {
-               ret = isp1760_udc_register(isp, irq, irqflags | IRQF_SHARED |
-                                          IRQF_DISABLED);
+               ret = isp1760_udc_register(isp, irq, irqflags);
                if (ret < 0) {
                        isp1760_hcd_unregister(&isp->hcd);
                        return ret;
index eba9b82..3cb98b1 100644 (file)
@@ -1274,7 +1274,7 @@ static void errata2_function(unsigned long data)
        for (slot = 0; slot < 32; slot++)
                if (priv->atl_slots[slot].qh && time_after(jiffies,
                                        priv->atl_slots[slot].timestamp +
-                                       SLOT_TIMEOUT * HZ / 1000)) {
+                                       msecs_to_jiffies(SLOT_TIMEOUT))) {
                        ptd_read(hcd->regs, ATL_PTD_OFFSET, slot, &ptd);
                        if (!FROM_DW0_VALID(ptd.dw0) &&
                                        !FROM_DW3_ACTIVE(ptd.dw3))
@@ -1286,7 +1286,7 @@ static void errata2_function(unsigned long data)
 
        spin_unlock_irqrestore(&priv->lock, spinflags);
 
-       errata2_timer.expires = jiffies + SLOT_CHECK_PERIOD * HZ / 1000;
+       errata2_timer.expires = jiffies + msecs_to_jiffies(SLOT_CHECK_PERIOD);
        add_timer(&errata2_timer);
 }
 
@@ -1336,7 +1336,7 @@ static int isp1760_run(struct usb_hcd *hcd)
                return retval;
 
        setup_timer(&errata2_timer, errata2_function, (unsigned long)hcd);
-       errata2_timer.expires = jiffies + SLOT_CHECK_PERIOD * HZ / 1000;
+       errata2_timer.expires = jiffies + msecs_to_jiffies(SLOT_CHECK_PERIOD);
        add_timer(&errata2_timer);
 
        chipid = reg_read32(hcd->regs, HC_CHIP_ID_REG);
index 9612d79..3fc4fe7 100644 (file)
@@ -1191,6 +1191,7 @@ static int isp1760_udc_start(struct usb_gadget *gadget,
                             struct usb_gadget_driver *driver)
 {
        struct isp1760_udc *udc = gadget_to_udc(gadget);
+       unsigned long flags;
 
        /* The hardware doesn't support low speed. */
        if (driver->max_speed < USB_SPEED_FULL) {
@@ -1198,17 +1199,17 @@ static int isp1760_udc_start(struct usb_gadget *gadget,
                return -EINVAL;
        }
 
-       spin_lock(&udc->lock);
+       spin_lock_irqsave(&udc->lock, flags);
 
        if (udc->driver) {
                dev_err(udc->isp->dev, "UDC already has a gadget driver\n");
-               spin_unlock(&udc->lock);
+               spin_unlock_irqrestore(&udc->lock, flags);
                return -EBUSY;
        }
 
        udc->driver = driver;
 
-       spin_unlock(&udc->lock);
+       spin_unlock_irqrestore(&udc->lock, flags);
 
        dev_dbg(udc->isp->dev, "starting UDC with driver %s\n",
                driver->function);
@@ -1232,6 +1233,7 @@ static int isp1760_udc_start(struct usb_gadget *gadget,
 static int isp1760_udc_stop(struct usb_gadget *gadget)
 {
        struct isp1760_udc *udc = gadget_to_udc(gadget);
+       unsigned long flags;
 
        dev_dbg(udc->isp->dev, "%s\n", __func__);
 
@@ -1239,9 +1241,9 @@ static int isp1760_udc_stop(struct usb_gadget *gadget)
 
        isp1760_udc_write(udc, DC_MODE, 0);
 
-       spin_lock(&udc->lock);
+       spin_lock_irqsave(&udc->lock, flags);
        udc->driver = NULL;
-       spin_unlock(&udc->lock);
+       spin_unlock_irqrestore(&udc->lock, flags);
 
        return 0;
 }
@@ -1411,7 +1413,7 @@ static int isp1760_udc_init(struct isp1760_udc *udc)
                return -ENODEV;
        }
 
-       if (chipid != 0x00011582) {
+       if (chipid != 0x00011582 && chipid != 0x00158210) {
                dev_err(udc->isp->dev, "udc: invalid chip ID 0x%08x\n", chipid);
                return -ENODEV;
        }
@@ -1451,8 +1453,8 @@ int isp1760_udc_register(struct isp1760_device *isp, int irq,
 
        sprintf(udc->irqname, "%s (udc)", devname);
 
-       ret = request_irq(irq, isp1760_udc_irq, IRQF_SHARED | IRQF_DISABLED |
-                         irqflags, udc->irqname, udc);
+       ret = request_irq(irq, isp1760_udc_irq, IRQF_SHARED | irqflags,
+                         udc->irqname, udc);
        if (ret < 0)
                goto error;
 
index 14e1628..39db8b6 100644 (file)
@@ -79,7 +79,8 @@ config USB_MUSB_TUSB6010
 
 config USB_MUSB_OMAP2PLUS
        tristate "OMAP2430 and onwards"
-       depends on ARCH_OMAP2PLUS && USB && OMAP_CONTROL_PHY
+       depends on ARCH_OMAP2PLUS && USB
+       depends on OMAP_CONTROL_PHY || !OMAP_CONTROL_PHY
        select GENERIC_PHY
 
 config USB_MUSB_AM35X
index e6f4cbf..067920f 100644 (file)
@@ -1969,10 +1969,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
                goto fail0;
        }
 
-       pm_runtime_use_autosuspend(musb->controller);
-       pm_runtime_set_autosuspend_delay(musb->controller, 200);
-       pm_runtime_enable(musb->controller);
-
        spin_lock_init(&musb->lock);
        musb->board_set_power = plat->set_power;
        musb->min_power = plat->min_power;
@@ -1991,6 +1987,12 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
        musb_readl = musb_default_readl;
        musb_writel = musb_default_writel;
 
+       /* We need musb_read/write functions initialized for PM */
+       pm_runtime_use_autosuspend(musb->controller);
+       pm_runtime_set_autosuspend_delay(musb->controller, 200);
+       pm_runtime_irq_safe(musb->controller);
+       pm_runtime_enable(musb->controller);
+
        /* The musb_platform_init() call:
         *   - adjusts musb->mregs
         *   - sets the musb->isr
index 53bd0e7..a900c98 100644 (file)
@@ -457,12 +457,27 @@ static int dsps_musb_init(struct musb *musb)
        if (IS_ERR(musb->xceiv))
                return PTR_ERR(musb->xceiv);
 
+       musb->phy = devm_phy_get(dev->parent, "usb2-phy");
+
        /* Returns zero if e.g. not clocked */
        rev = dsps_readl(reg_base, wrp->revision);
        if (!rev)
                return -ENODEV;
 
        usb_phy_init(musb->xceiv);
+       if (IS_ERR(musb->phy))  {
+               musb->phy = NULL;
+       } else {
+               ret = phy_init(musb->phy);
+               if (ret < 0)
+                       return ret;
+               ret = phy_power_on(musb->phy);
+               if (ret) {
+                       phy_exit(musb->phy);
+                       return ret;
+               }
+       }
+
        setup_timer(&glue->timer, otg_timer, (unsigned long) musb);
 
        /* Reset the musb */
@@ -502,6 +517,8 @@ static int dsps_musb_exit(struct musb *musb)
 
        del_timer_sync(&glue->timer);
        usb_phy_shutdown(musb->xceiv);
+       phy_power_off(musb->phy);
+       phy_exit(musb->phy);
        debugfs_remove_recursive(glue->dbgfs_root);
 
        return 0;
@@ -610,7 +627,7 @@ static int dsps_musb_reset(struct musb *musb)
        struct device *dev = musb->controller;
        struct dsps_glue *glue = dev_get_drvdata(dev->parent);
        const struct dsps_musb_wrapper *wrp = glue->wrp;
-       int session_restart = 0;
+       int session_restart = 0, error;
 
        if (glue->sw_babble_enabled)
                session_restart = sw_babble_control(musb);
@@ -624,8 +641,14 @@ static int dsps_musb_reset(struct musb *musb)
                dsps_writel(musb->ctrl_base, wrp->control, (1 << wrp->reset));
                usleep_range(100, 200);
                usb_phy_shutdown(musb->xceiv);
+               error = phy_power_off(musb->phy);
+               if (error)
+                       dev_err(dev, "phy shutdown failed: %i\n", error);
                usleep_range(100, 200);
                usb_phy_init(musb->xceiv);
+               error = phy_power_on(musb->phy);
+               if (error)
+                       dev_err(dev, "phy powerup failed: %i\n", error);
                session_restart = 1;
        }
 
@@ -687,7 +710,7 @@ static int dsps_create_musb_pdev(struct dsps_glue *glue,
        struct musb_hdrc_config *config;
        struct platform_device *musb;
        struct device_node *dn = parent->dev.of_node;
-       int ret;
+       int ret, val;
 
        memset(resources, 0, sizeof(resources));
        res = platform_get_resource_byname(parent, IORESOURCE_MEM, "mc");
@@ -739,7 +762,10 @@ static int dsps_create_musb_pdev(struct dsps_glue *glue,
        pdata.mode = get_musb_port_mode(dev);
        /* DT keeps this entry in mA, musb expects it as per USB spec */
        pdata.power = get_int_prop(dn, "mentor,power") / 2;
-       config->multipoint = of_property_read_bool(dn, "mentor,multipoint");
+
+       ret = of_property_read_u32(dn, "mentor,multipoint", &val);
+       if (!ret && val)
+               config->multipoint = true;
 
        ret = platform_device_add_data(musb, &pdata, sizeof(pdata));
        if (ret) {
index 883a9ad..c3d5fc9 100644 (file)
@@ -2613,7 +2613,7 @@ static const struct hc_driver musb_hc_driver = {
        .description            = "musb-hcd",
        .product_desc           = "MUSB HDRC host driver",
        .hcd_priv_size          = sizeof(struct musb *),
-       .flags                  = HCD_USB2 | HCD_MEMORY,
+       .flags                  = HCD_USB2 | HCD_MEMORY | HCD_BH,
 
        /* not using irq handler or reset hooks from usbcore, since
         * those must be shared with peripheral code for OTG configs
index 763649e..cc752d8 100644 (file)
@@ -516,7 +516,7 @@ static int omap2430_probe(struct platform_device *pdev)
        struct omap2430_glue            *glue;
        struct device_node              *np = pdev->dev.of_node;
        struct musb_hdrc_config         *config;
-       int                             ret = -ENOMEM;
+       int                             ret = -ENOMEM, val;
 
        glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL);
        if (!glue)
@@ -559,7 +559,10 @@ static int omap2430_probe(struct platform_device *pdev)
                of_property_read_u32(np, "num-eps", (u32 *)&config->num_eps);
                of_property_read_u32(np, "ram-bits", (u32 *)&config->ram_bits);
                of_property_read_u32(np, "power", (u32 *)&pdata->power);
-               config->multipoint = of_property_read_bool(np, "multipoint");
+
+               ret = of_property_read_u32(np, "multipoint", &val);
+               if (!ret && val)
+                       config->multipoint = true;
 
                pdata->board_data       = data;
                pdata->config           = config;
index 403fab7..7b3035f 100644 (file)
@@ -126,6 +126,9 @@ struct phy_control *am335x_get_phy_control(struct device *dev)
                return NULL;
 
        dev = bus_find_device(&platform_bus_type, NULL, node, match);
+       if (!dev)
+               return NULL;
+
        ctrl_usb = dev_get_drvdata(dev);
        if (!ctrl_usb)
                return NULL;
index de83b9d..ebc99ee 100644 (file)
@@ -6,6 +6,7 @@ config USB_RENESAS_USBHS
        tristate 'Renesas USBHS controller'
        depends on USB_GADGET
        depends on ARCH_SHMOBILE || SUPERH || COMPILE_TEST
+       depends on EXTCON || !EXTCON # if EXTCON=m, USBHS cannot be built-in
        default n
        help
          Renesas USBHS is a discrete USB host and peripheral controller chip
index 9374bd2..8936a83 100644 (file)
@@ -38,56 +38,51 @@ static int usb_serial_device_match(struct device *dev,
        return 0;
 }
 
-static ssize_t port_number_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct usb_serial_port *port = to_usb_serial_port(dev);
-
-       return sprintf(buf, "%d\n", port->port_number);
-}
-static DEVICE_ATTR_RO(port_number);
-
 static int usb_serial_device_probe(struct device *dev)
 {
        struct usb_serial_driver *driver;
        struct usb_serial_port *port;
+       struct device *tty_dev;
        int retval = 0;
        int minor;
 
        port = to_usb_serial_port(dev);
-       if (!port) {
-               retval = -ENODEV;
-               goto exit;
-       }
+       if (!port)
+               return -ENODEV;
 
        /* make sure suspend/resume doesn't race against port_probe */
        retval = usb_autopm_get_interface(port->serial->interface);
        if (retval)
-               goto exit;
+               return retval;
 
        driver = port->serial->type;
        if (driver->port_probe) {
                retval = driver->port_probe(port);
                if (retval)
-                       goto exit_with_autopm;
+                       goto err_autopm_put;
        }
 
-       retval = device_create_file(dev, &dev_attr_port_number);
-       if (retval) {
-               if (driver->port_remove)
-                       retval = driver->port_remove(port);
-               goto exit_with_autopm;
+       minor = port->minor;
+       tty_dev = tty_register_device(usb_serial_tty_driver, minor, dev);
+       if (IS_ERR(tty_dev)) {
+               retval = PTR_ERR(tty_dev);
+               goto err_port_remove;
        }
 
-       minor = port->minor;
-       tty_register_device(usb_serial_tty_driver, minor, dev);
+       usb_autopm_put_interface(port->serial->interface);
+
        dev_info(&port->serial->dev->dev,
                 "%s converter now attached to ttyUSB%d\n",
                 driver->description, minor);
 
-exit_with_autopm:
+       return 0;
+
+err_port_remove:
+       if (driver->port_remove)
+               driver->port_remove(port);
+err_autopm_put:
        usb_autopm_put_interface(port->serial->interface);
-exit:
+
        return retval;
 }
 
@@ -114,8 +109,6 @@ static int usb_serial_device_remove(struct device *dev)
        minor = port->minor;
        tty_unregister_device(usb_serial_tty_driver, minor);
 
-       device_remove_file(&port->dev, &dev_attr_port_number);
-
        driver = port->serial->type;
        if (driver->port_remove)
                retval = driver->port_remove(port);
index 2d72aa3..ede4f5f 100644 (file)
@@ -84,6 +84,10 @@ struct ch341_private {
        u8 line_status; /* active status of modem control inputs */
 };
 
+static void ch341_set_termios(struct tty_struct *tty,
+                             struct usb_serial_port *port,
+                             struct ktermios *old_termios);
+
 static int ch341_control_out(struct usb_device *dev, u8 request,
                             u16 value, u16 index)
 {
@@ -309,19 +313,12 @@ static int ch341_open(struct tty_struct *tty, struct usb_serial_port *port)
        struct ch341_private *priv = usb_get_serial_port_data(port);
        int r;
 
-       priv->baud_rate = DEFAULT_BAUD_RATE;
-
        r = ch341_configure(serial->dev, priv);
        if (r)
                goto out;
 
-       r = ch341_set_handshake(serial->dev, priv->line_control);
-       if (r)
-               goto out;
-
-       r = ch341_set_baudrate(serial->dev, priv);
-       if (r)
-               goto out;
+       if (tty)
+               ch341_set_termios(tty, port, NULL);
 
        dev_dbg(&port->dev, "%s - submitting interrupt urb\n", __func__);
        r = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL);
index 29fa1c3..3806e70 100644 (file)
@@ -14,6 +14,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/tty.h>
 #include <linux/console.h>
@@ -144,6 +145,7 @@ static int usb_console_setup(struct console *co, char *options)
                        init_ldsem(&tty->ldisc_sem);
                        INIT_LIST_HEAD(&tty->tty_files);
                        kref_get(&tty->driver->kref);
+                       __module_get(tty->driver->owner);
                        tty->ops = &usb_console_fake_tty_ops;
                        if (tty_init_termios(tty)) {
                                retval = -ENOMEM;
index f40c856..84ce2d7 100644 (file)
@@ -147,6 +147,8 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x166A, 0x0305) }, /* Clipsal C-5000CT2 C-Bus Spectrum Colour Touchscreen */
        { USB_DEVICE(0x166A, 0x0401) }, /* Clipsal L51xx C-Bus Architectural Dimmer */
        { USB_DEVICE(0x166A, 0x0101) }, /* Clipsal 5560884 C-Bus Multi-room Audio Matrix Switcher */
+       { USB_DEVICE(0x16C0, 0x09B0) }, /* Lunatico Seletek */
+       { USB_DEVICE(0x16C0, 0x09B1) }, /* Lunatico Seletek */
        { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
        { USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */
        { USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */
index 1ebb351..8eb68a3 100644 (file)
@@ -604,6 +604,7 @@ static const struct usb_device_id id_table_combined[] = {
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+       { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) },
        /*
         * ELV devices:
         */
@@ -799,6 +800,8 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE(FTDI_VID, FTDI_ELSTER_UNICOM_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PROPOX_JTAGCABLEII_PID) },
        { USB_DEVICE(FTDI_VID, FTDI_PROPOX_ISPCABLEIII_PID) },
+       { USB_DEVICE(FTDI_VID, CYBER_CORTEX_AV_PID),
+               .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_PID),
                .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
        { USB_DEVICE(OLIMEX_VID, OLIMEX_ARM_USB_OCD_H_PID),
@@ -978,6 +981,23 @@ static const struct usb_device_id id_table_combined[] = {
        { USB_DEVICE_INTERFACE_NUMBER(INFINEON_VID, INFINEON_TRIBOARD_PID, 1) },
        /* GE Healthcare devices */
        { USB_DEVICE(GE_HEALTHCARE_VID, GE_HEALTHCARE_NEMO_TRACKER_PID) },
+       /* Active Research (Actisense) devices */
+       { USB_DEVICE(FTDI_VID, ACTISENSE_NDC_PID) },
+       { USB_DEVICE(FTDI_VID, ACTISENSE_USG_PID) },
+       { USB_DEVICE(FTDI_VID, ACTISENSE_NGT_PID) },
+       { USB_DEVICE(FTDI_VID, ACTISENSE_NGW_PID) },
+       { USB_DEVICE(FTDI_VID, ACTISENSE_D9AC_PID) },
+       { USB_DEVICE(FTDI_VID, ACTISENSE_D9AD_PID) },
+       { USB_DEVICE(FTDI_VID, ACTISENSE_D9AE_PID) },
+       { USB_DEVICE(FTDI_VID, ACTISENSE_D9AF_PID) },
+       { USB_DEVICE(FTDI_VID, CHETCO_SEAGAUGE_PID) },
+       { USB_DEVICE(FTDI_VID, CHETCO_SEASWITCH_PID) },
+       { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_NMEA2000_PID) },
+       { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ETHERNET_PID) },
+       { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_WIFI_PID) },
+       { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) },
+       { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) },
+       { USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) },
        { }                                     /* Terminating entry */
 };
 
@@ -1864,8 +1884,12 @@ static int ftdi_8u2232c_probe(struct usb_serial *serial)
 {
        struct usb_device *udev = serial->dev;
 
-       if ((udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems")) ||
-           (udev->product && !strcmp(udev->product, "BeagleBone/XDS100V2")))
+       if (udev->manufacturer && !strcmp(udev->manufacturer, "CALAO Systems"))
+               return ftdi_jtag_probe(serial);
+
+       if (udev->product &&
+               (!strcmp(udev->product, "BeagleBone/XDS100V2") ||
+                !strcmp(udev->product, "SNAP Connect E10")))
                return ftdi_jtag_probe(serial);
 
        return 0;
index e52409c..4e4f46f 100644 (file)
@@ -38,6 +38,9 @@
 
 #define FTDI_LUMEL_PD12_PID    0x6002
 
+/* Cyber Cortex AV by Fabulous Silicon (http://fabuloussilicon.com) */
+#define CYBER_CORTEX_AV_PID    0x8698
+
 /*
  * Marvell OpenRD Base, Client
  * http://www.open-rd.org
  */
 #define FTDI_NT_ORIONLXM_PID   0x7c90  /* OrionLXm Substation Automation Platform */
 
+/*
+ * Synapse Wireless product ids (FTDI_VID)
+ * http://www.synapse-wireless.com
+ */
+#define FTDI_SYNAPSE_SS200_PID 0x9090 /* SS200 - SNAP Stick 200 */
+
 
 /********************************/
 /** third-party VID/PID combos **/
  */
 #define GE_HEALTHCARE_VID              0x1901
 #define GE_HEALTHCARE_NEMO_TRACKER_PID 0x0015
+
+/*
+ * Active Research (Actisense) devices
+ */
+#define ACTISENSE_NDC_PID              0xD9A8 /* NDC USB Serial Adapter */
+#define ACTISENSE_USG_PID              0xD9A9 /* USG USB Serial Adapter */
+#define ACTISENSE_NGT_PID              0xD9AA /* NGT NMEA2000 Interface */
+#define ACTISENSE_NGW_PID              0xD9AB /* NGW NMEA2000 Gateway */
+#define ACTISENSE_D9AC_PID             0xD9AC /* Actisense Reserved */
+#define ACTISENSE_D9AD_PID             0xD9AD /* Actisense Reserved */
+#define ACTISENSE_D9AE_PID             0xD9AE /* Actisense Reserved */
+#define ACTISENSE_D9AF_PID             0xD9AF /* Actisense Reserved */
+#define CHETCO_SEAGAUGE_PID            0xA548 /* SeaGauge USB Adapter */
+#define CHETCO_SEASWITCH_PID           0xA549 /* SeaSwitch USB Adapter */
+#define CHETCO_SEASMART_NMEA2000_PID   0xA54A /* SeaSmart NMEA2000 Gateway */
+#define CHETCO_SEASMART_ETHERNET_PID   0xA54B /* SeaSmart Ethernet Gateway */
+#define CHETCO_SEASMART_WIFI_PID       0xA5AC /* SeaSmart Wifi Gateway */
+#define CHETCO_SEASMART_DISPLAY_PID    0xA5AD /* SeaSmart NMEA2000 Display */
+#define CHETCO_SEASMART_LITE_PID       0xA5AE /* SeaSmart Lite USB Adapter */
+#define CHETCO_SEASMART_ANALOG_PID     0xA5AF /* SeaSmart Analog Adapter */
index ccf1df7..54e170d 100644 (file)
@@ -258,7 +258,8 @@ void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout)
         * character or at least one jiffy.
         */
        period = max_t(unsigned long, (10 * HZ / bps), 1);
-       period = min_t(unsigned long, period, timeout);
+       if (timeout)
+               period = min_t(unsigned long, period, timeout);
 
        dev_dbg(&port->dev, "%s - timeout = %u ms, period = %u ms\n",
                                        __func__, jiffies_to_msecs(timeout),
@@ -268,7 +269,7 @@ void usb_serial_generic_wait_until_sent(struct tty_struct *tty, long timeout)
                schedule_timeout_interruptible(period);
                if (signal_pending(current))
                        break;
-               if (time_after(jiffies, expire))
+               if (timeout && time_after(jiffies, expire))
                        break;
        }
 }
index dd97d8b..4f7e072 100644 (file)
@@ -61,6 +61,7 @@ struct keyspan_pda_private {
 /* For Xircom PGSDB9 and older Entrega version of the same device */
 #define XIRCOM_VENDOR_ID               0x085a
 #define XIRCOM_FAKE_ID                 0x8027
+#define XIRCOM_FAKE_ID_2               0x8025 /* "PGMFHUB" serial */
 #define ENTREGA_VENDOR_ID              0x1645
 #define ENTREGA_FAKE_ID                        0x8093
 
@@ -70,6 +71,7 @@ static const struct usb_device_id id_table_combined[] = {
 #endif
 #ifdef XIRCOM
        { USB_DEVICE(XIRCOM_VENDOR_ID, XIRCOM_FAKE_ID) },
+       { USB_DEVICE(XIRCOM_VENDOR_ID, XIRCOM_FAKE_ID_2) },
        { USB_DEVICE(ENTREGA_VENDOR_ID, ENTREGA_FAKE_ID) },
 #endif
        { USB_DEVICE(KEYSPAN_VENDOR_ID, KEYSPAN_PDA_ID) },
@@ -93,6 +95,7 @@ static const struct usb_device_id id_table_fake[] = {
 #ifdef XIRCOM
 static const struct usb_device_id id_table_fake_xircom[] = {
        { USB_DEVICE(XIRCOM_VENDOR_ID, XIRCOM_FAKE_ID) },
+       { USB_DEVICE(XIRCOM_VENDOR_ID, XIRCOM_FAKE_ID_2) },
        { USB_DEVICE(ENTREGA_VENDOR_ID, ENTREGA_FAKE_ID) },
        { }
 };
index ab1d690..460a406 100644 (file)
@@ -1284,7 +1284,8 @@ static int mxuport_open(struct tty_struct *tty, struct usb_serial_port *port)
        }
 
        /* Initial port termios */
-       mxuport_set_termios(tty, port, NULL);
+       if (tty)
+               mxuport_set_termios(tty, port, NULL);
 
        /*
         * TODO: use RQ_VENDOR_GET_MSR, once we know what it
index 0f872e6..829604d 100644 (file)
@@ -132,6 +132,7 @@ MODULE_DEVICE_TABLE(usb, id_table);
 #define UART_OVERRUN_ERROR             0x40
 #define UART_CTS                       0x80
 
+static void pl2303_set_break(struct usb_serial_port *port, bool enable);
 
 enum pl2303_type {
        TYPE_01,        /* Type 0 and 1 (difference unknown) */
@@ -615,6 +616,7 @@ static void pl2303_close(struct usb_serial_port *port)
 {
        usb_serial_generic_close(port);
        usb_kill_urb(port->interrupt_in_urb);
+       pl2303_set_break(port, false);
 }
 
 static int pl2303_open(struct tty_struct *tty, struct usb_serial_port *port)
@@ -741,17 +743,16 @@ static int pl2303_ioctl(struct tty_struct *tty,
        return -ENOIOCTLCMD;
 }
 
-static void pl2303_break_ctl(struct tty_struct *tty, int break_state)
+static void pl2303_set_break(struct usb_serial_port *port, bool enable)
 {
-       struct usb_serial_port *port = tty->driver_data;
        struct usb_serial *serial = port->serial;
        u16 state;
        int result;
 
-       if (break_state == 0)
-               state = BREAK_OFF;
-       else
+       if (enable)
                state = BREAK_ON;
+       else
+               state = BREAK_OFF;
 
        dev_dbg(&port->dev, "%s - turning break %s\n", __func__,
                        state == BREAK_OFF ? "off" : "on");
@@ -763,6 +764,13 @@ static void pl2303_break_ctl(struct tty_struct *tty, int break_state)
                dev_err(&port->dev, "error sending break = %d\n", result);
 }
 
+static void pl2303_break_ctl(struct tty_struct *tty, int state)
+{
+       struct usb_serial_port *port = tty->driver_data;
+
+       pl2303_set_break(port, state);
+}
+
 static void pl2303_update_line_status(struct usb_serial_port *port,
                                      unsigned char *data,
                                      unsigned int actual_length)
index 475723c..529066b 100644 (file)
@@ -687,6 +687,21 @@ static void serial_port_dtr_rts(struct tty_port *port, int on)
                drv->dtr_rts(p, on);
 }
 
+static ssize_t port_number_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct usb_serial_port *port = to_usb_serial_port(dev);
+
+       return sprintf(buf, "%u\n", port->port_number);
+}
+static DEVICE_ATTR_RO(port_number);
+
+static struct attribute *usb_serial_port_attrs[] = {
+       &dev_attr_port_number.attr,
+       NULL
+};
+ATTRIBUTE_GROUPS(usb_serial_port);
+
 static const struct tty_port_operations serial_port_ops = {
        .carrier_raised         = serial_port_carrier_raised,
        .dtr_rts                = serial_port_dtr_rts,
@@ -902,6 +917,7 @@ static int usb_serial_probe(struct usb_interface *interface,
                port->dev.driver = NULL;
                port->dev.bus = &usb_serial_bus_type;
                port->dev.release = &usb_serial_port_release;
+               port->dev.groups = usb_serial_port_groups;
                device_initialize(&port->dev);
        }
 
@@ -940,8 +956,9 @@ static int usb_serial_probe(struct usb_interface *interface,
                port = serial->port[i];
                if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
                        goto probe_error;
-               buffer_size = max_t(int, serial->type->bulk_out_size,
-                                               usb_endpoint_maxp(endpoint));
+               buffer_size = serial->type->bulk_out_size;
+               if (!buffer_size)
+                       buffer_size = usb_endpoint_maxp(endpoint);
                port->bulk_out_size = buffer_size;
                port->bulk_out_endpointAddress = endpoint->bEndpointAddress;
 
index dbc00e5..c85ea53 100644 (file)
@@ -113,6 +113,20 @@ UNUSUAL_DEV(0x0bc2, 0xab2a, 0x0000, 0x9999,
                USB_SC_DEVICE, USB_PR_DEVICE, NULL,
                US_FL_NO_ATA_1X),
 
+/* Reported-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> */
+UNUSUAL_DEV(0x13fd, 0x3940, 0x0000, 0x9999,
+               "Initio Corporation",
+               "",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_NO_ATA_1X),
+
+/* Reported-by: Tom Arild Naess <tanaess@gmail.com> */
+UNUSUAL_DEV(0x152d, 0x0539, 0x0000, 0x9999,
+               "JMicron",
+               "JMS539",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_NO_REPORT_OPCODES),
+
 /* Reported-by: Claudio Bizzarri <claudio.bizzarri@gmail.com> */
 UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999,
                "JMicron",
index d468d02..5600c33 100644 (file)
@@ -889,6 +889,12 @@ static void usb_stor_scan_dwork(struct work_struct *work)
            !(us->fflags & US_FL_SCM_MULT_TARG)) {
                mutex_lock(&us->dev_mutex);
                us->max_lun = usb_stor_Bulk_max_lun(us);
+               /*
+                * Allow proper scanning of devices that present more than 8 LUNs
+                * While not affecting other devices that may need the previous behavior
+                */
+               if (us->max_lun >= 8)
+                       us_to_host(us)->max_lun = us->max_lun+1;
                mutex_unlock(&us->dev_mutex);
        }
        scsi_scan_host(us_to_host(us));
index f88bfdf..2027a27 100644 (file)
@@ -868,12 +868,14 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags,
                                func = vfio_pci_set_err_trigger;
                        break;
                }
+               break;
        case VFIO_PCI_REQ_IRQ_INDEX:
                switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
                case VFIO_IRQ_SET_ACTION_TRIGGER:
                        func = vfio_pci_set_req_trigger;
                        break;
                }
+               break;
        }
 
        if (!func)
index 8d4f3f1..71df240 100644 (file)
@@ -1956,10 +1956,9 @@ static int vhost_scsi_make_nexus(struct vhost_scsi_tpg *tpg,
                goto out;
        }
        /*
-        * Now register the TCM vhost virtual I_T Nexus as active with the
-        * call to __transport_register_session()
+        * Now register the TCM vhost virtual I_T Nexus as active.
         */
-       __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
+       transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
                        tv_nexus->tvn_se_sess, tv_nexus);
        tpg->tpg_nexus = tv_nexus;
 
index 32c0b6b..9362424 100644 (file)
@@ -599,6 +599,9 @@ static int clcdfb_of_get_mode(struct device *dev, struct device_node *endpoint,
 
        len = clcdfb_snprintf_mode(NULL, 0, mode);
        name = devm_kzalloc(dev, len + 1, GFP_KERNEL);
+       if (!name)
+               return -ENOMEM;
+
        clcdfb_snprintf_mode(name, len + 1, mode);
        mode->name = name;
 
index 9533859..868facd 100644 (file)
@@ -624,9 +624,6 @@ static struct fb_videomode *fb_create_modedb(unsigned char *edid, int *dbsize,
        int num = 0, i, first = 1;
        int ver, rev;
 
-       ver = edid[EDID_STRUCT_VERSION];
-       rev = edid[EDID_STRUCT_REVISION];
-
        mode = kzalloc(50 * sizeof(struct fb_videomode), GFP_KERNEL);
        if (mode == NULL)
                return NULL;
@@ -637,6 +634,9 @@ static struct fb_videomode *fb_create_modedb(unsigned char *edid, int *dbsize,
                return NULL;
        }
 
+       ver = edid[EDID_STRUCT_VERSION];
+       rev = edid[EDID_STRUCT_REVISION];
+
        *dbsize = 0;
 
        DPRINTK("   Detailed Timings\n");
index 5a2095a..1218655 100644 (file)
 #include <video/omapdss.h>
 #include "dss.h"
 
-static struct omap_dss_device *to_dss_device_sysfs(struct device *dev)
+static ssize_t display_name_show(struct omap_dss_device *dssdev, char *buf)
 {
-       struct omap_dss_device *dssdev = NULL;
-
-       for_each_dss_dev(dssdev) {
-               if (dssdev->dev == dev) {
-                       omap_dss_put_device(dssdev);
-                       return dssdev;
-               }
-       }
-
-       return NULL;
-}
-
-static ssize_t display_name_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
-{
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
-
        return snprintf(buf, PAGE_SIZE, "%s\n",
                        dssdev->name ?
                        dssdev->name : "");
 }
 
-static ssize_t display_enabled_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static ssize_t display_enabled_show(struct omap_dss_device *dssdev, char *buf)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
-
        return snprintf(buf, PAGE_SIZE, "%d\n",
                        omapdss_device_is_enabled(dssdev));
 }
 
-static ssize_t display_enabled_store(struct device *dev,
-               struct device_attribute *attr,
+static ssize_t display_enabled_store(struct omap_dss_device *dssdev,
                const char *buf, size_t size)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        int r;
        bool enable;
 
@@ -90,19 +68,16 @@ static ssize_t display_enabled_store(struct device *dev,
        return size;
 }
 
-static ssize_t display_tear_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static ssize_t display_tear_show(struct omap_dss_device *dssdev, char *buf)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        return snprintf(buf, PAGE_SIZE, "%d\n",
                        dssdev->driver->get_te ?
                        dssdev->driver->get_te(dssdev) : 0);
 }
 
-static ssize_t display_tear_store(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t size)
+static ssize_t display_tear_store(struct omap_dss_device *dssdev,
+       const char *buf, size_t size)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        int r;
        bool te;
 
@@ -120,10 +95,8 @@ static ssize_t display_tear_store(struct device *dev,
        return size;
 }
 
-static ssize_t display_timings_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static ssize_t display_timings_show(struct omap_dss_device *dssdev, char *buf)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        struct omap_video_timings t;
 
        if (!dssdev->driver->get_timings)
@@ -137,10 +110,9 @@ static ssize_t display_timings_show(struct device *dev,
                        t.y_res, t.vfp, t.vbp, t.vsw);
 }
 
-static ssize_t display_timings_store(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t size)
+static ssize_t display_timings_store(struct omap_dss_device *dssdev,
+       const char *buf, size_t size)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        struct omap_video_timings t = dssdev->panel.timings;
        int r, found;
 
@@ -176,10 +148,8 @@ static ssize_t display_timings_store(struct device *dev,
        return size;
 }
 
-static ssize_t display_rotate_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static ssize_t display_rotate_show(struct omap_dss_device *dssdev, char *buf)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        int rotate;
        if (!dssdev->driver->get_rotate)
                return -ENOENT;
@@ -187,10 +157,9 @@ static ssize_t display_rotate_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%u\n", rotate);
 }
 
-static ssize_t display_rotate_store(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t size)
+static ssize_t display_rotate_store(struct omap_dss_device *dssdev,
+       const char *buf, size_t size)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        int rot, r;
 
        if (!dssdev->driver->set_rotate || !dssdev->driver->get_rotate)
@@ -207,10 +176,8 @@ static ssize_t display_rotate_store(struct device *dev,
        return size;
 }
 
-static ssize_t display_mirror_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static ssize_t display_mirror_show(struct omap_dss_device *dssdev, char *buf)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        int mirror;
        if (!dssdev->driver->get_mirror)
                return -ENOENT;
@@ -218,10 +185,9 @@ static ssize_t display_mirror_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "%u\n", mirror);
 }
 
-static ssize_t display_mirror_store(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t size)
+static ssize_t display_mirror_store(struct omap_dss_device *dssdev,
+       const char *buf, size_t size)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        int r;
        bool mirror;
 
@@ -239,10 +205,8 @@ static ssize_t display_mirror_store(struct device *dev,
        return size;
 }
 
-static ssize_t display_wss_show(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static ssize_t display_wss_show(struct omap_dss_device *dssdev, char *buf)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        unsigned int wss;
 
        if (!dssdev->driver->get_wss)
@@ -253,10 +217,9 @@ static ssize_t display_wss_show(struct device *dev,
        return snprintf(buf, PAGE_SIZE, "0x%05x\n", wss);
 }
 
-static ssize_t display_wss_store(struct device *dev,
-               struct device_attribute *attr, const char *buf, size_t size)
+static ssize_t display_wss_store(struct omap_dss_device *dssdev,
+       const char *buf, size_t size)
 {
-       struct omap_dss_device *dssdev = to_dss_device_sysfs(dev);
        u32 wss;
        int r;
 
@@ -277,50 +240,94 @@ static ssize_t display_wss_store(struct device *dev,
        return size;
 }
 
-static DEVICE_ATTR(display_name, S_IRUGO, display_name_show, NULL);
-static DEVICE_ATTR(enabled, S_IRUGO|S_IWUSR,
+struct display_attribute {
+       struct attribute attr;
+       ssize_t (*show)(struct omap_dss_device *, char *);
+       ssize_t (*store)(struct omap_dss_device *, const char *, size_t);
+};
+
+#define DISPLAY_ATTR(_name, _mode, _show, _store) \
+       struct display_attribute display_attr_##_name = \
+       __ATTR(_name, _mode, _show, _store)
+
+static DISPLAY_ATTR(name, S_IRUGO, display_name_show, NULL);
+static DISPLAY_ATTR(display_name, S_IRUGO, display_name_show, NULL);
+static DISPLAY_ATTR(enabled, S_IRUGO|S_IWUSR,
                display_enabled_show, display_enabled_store);
-static DEVICE_ATTR(tear_elim, S_IRUGO|S_IWUSR,
+static DISPLAY_ATTR(tear_elim, S_IRUGO|S_IWUSR,
                display_tear_show, display_tear_store);
-static DEVICE_ATTR(timings, S_IRUGO|S_IWUSR,
+static DISPLAY_ATTR(timings, S_IRUGO|S_IWUSR,
                display_timings_show, display_timings_store);
-static DEVICE_ATTR(rotate, S_IRUGO|S_IWUSR,
+static DISPLAY_ATTR(rotate, S_IRUGO|S_IWUSR,
                display_rotate_show, display_rotate_store);
-static DEVICE_ATTR(mirror, S_IRUGO|S_IWUSR,
+static DISPLAY_ATTR(mirror, S_IRUGO|S_IWUSR,
                display_mirror_show, display_mirror_store);
-static DEVICE_ATTR(wss, S_IRUGO|S_IWUSR,
+static DISPLAY_ATTR(wss, S_IRUGO|S_IWUSR,
                display_wss_show, display_wss_store);
 
-static const struct attribute *display_sysfs_attrs[] = {
-       &dev_attr_display_name.attr,
-       &dev_attr_enabled.attr,
-       &dev_attr_tear_elim.attr,
-       &dev_attr_timings.attr,
-       &dev_attr_rotate.attr,
-       &dev_attr_mirror.attr,
-       &dev_attr_wss.attr,
+static struct attribute *display_sysfs_attrs[] = {
+       &display_attr_name.attr,
+       &display_attr_display_name.attr,
+       &display_attr_enabled.attr,
+       &display_attr_tear_elim.attr,
+       &display_attr_timings.attr,
+       &display_attr_rotate.attr,
+       &display_attr_mirror.attr,
+       &display_attr_wss.attr,
        NULL
 };
 
+static ssize_t display_attr_show(struct kobject *kobj, struct attribute *attr,
+               char *buf)
+{
+       struct omap_dss_device *dssdev;
+       struct display_attribute *display_attr;
+
+       dssdev = container_of(kobj, struct omap_dss_device, kobj);
+       display_attr = container_of(attr, struct display_attribute, attr);
+
+       if (!display_attr->show)
+               return -ENOENT;
+
+       return display_attr->show(dssdev, buf);
+}
+
+static ssize_t display_attr_store(struct kobject *kobj, struct attribute *attr,
+               const char *buf, size_t size)
+{
+       struct omap_dss_device *dssdev;
+       struct display_attribute *display_attr;
+
+       dssdev = container_of(kobj, struct omap_dss_device, kobj);
+       display_attr = container_of(attr, struct display_attribute, attr);
+
+       if (!display_attr->store)
+               return -ENOENT;
+
+       return display_attr->store(dssdev, buf, size);
+}
+
+static const struct sysfs_ops display_sysfs_ops = {
+       .show = display_attr_show,
+       .store = display_attr_store,
+};
+
+static struct kobj_type display_ktype = {
+       .sysfs_ops = &display_sysfs_ops,
+       .default_attrs = display_sysfs_attrs,
+};
+
 int display_init_sysfs(struct platform_device *pdev)
 {
        struct omap_dss_device *dssdev = NULL;
        int r;
 
        for_each_dss_dev(dssdev) {
-               struct kobject *kobj = &dssdev->dev->kobj;
-
-               r = sysfs_create_files(kobj, display_sysfs_attrs);
+               r = kobject_init_and_add(&dssdev->kobj, &display_ktype,
+                       &pdev->dev.kobj, dssdev->alias);
                if (r) {
                        DSSERR("failed to create sysfs files\n");
-                       goto err;
-               }
-
-               r = sysfs_create_link(&pdev->dev.kobj, kobj, dssdev->alias);
-               if (r) {
-                       sysfs_remove_files(kobj, display_sysfs_attrs);
-
-                       DSSERR("failed to create sysfs display link\n");
+                       omap_dss_put_device(dssdev);
                        goto err;
                }
        }
@@ -338,8 +345,12 @@ void display_uninit_sysfs(struct platform_device *pdev)
        struct omap_dss_device *dssdev = NULL;
 
        for_each_dss_dev(dssdev) {
-               sysfs_remove_link(&pdev->dev.kobj, dssdev->alias);
-               sysfs_remove_files(&dssdev->dev->kobj,
-                               display_sysfs_attrs);
+               if (kobject_name(&dssdev->kobj) == NULL)
+                       continue;
+
+               kobject_del(&dssdev->kobj);
+               kobject_put(&dssdev->kobj);
+
+               memset(&dssdev->kobj, 0, sizeof(dssdev->kobj));
        }
 }
index 0413157..6a356e3 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/balloon_compaction.h>
 #include <linux/oom.h>
+#include <linux/wait.h>
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -334,17 +335,25 @@ static int virtballoon_oom_notify(struct notifier_block *self,
 static int balloon(void *_vballoon)
 {
        struct virtio_balloon *vb = _vballoon;
+       DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
        set_freezable();
        while (!kthread_should_stop()) {
                s64 diff;
 
                try_to_freeze();
-               wait_event_interruptible(vb->config_change,
-                                        (diff = towards_target(vb)) != 0
-                                        || vb->need_stats_update
-                                        || kthread_should_stop()
-                                        || freezing(current));
+
+               add_wait_queue(&vb->config_change, &wait);
+               for (;;) {
+                       if ((diff = towards_target(vb)) != 0 ||
+                           vb->need_stats_update ||
+                           kthread_should_stop() ||
+                           freezing(current))
+                               break;
+                       wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+               }
+               remove_wait_queue(&vb->config_change, &wait);
+
                if (vb->need_stats_update)
                        stats_handle_request(vb);
                if (diff > 0)
@@ -499,6 +508,8 @@ static int virtballoon_probe(struct virtio_device *vdev)
        if (err < 0)
                goto out_oom_notify;
 
+       virtio_device_ready(vdev);
+
        vb->thread = kthread_run(balloon, vb, "vballoon");
        if (IS_ERR(vb->thread)) {
                err = PTR_ERR(vb->thread);
index cad5698..6010d7e 100644 (file)
@@ -156,22 +156,95 @@ static void vm_get(struct virtio_device *vdev, unsigned offset,
                   void *buf, unsigned len)
 {
        struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
-       u8 *ptr = buf;
-       int i;
+       void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG;
+       u8 b;
+       __le16 w;
+       __le32 l;
 
-       for (i = 0; i < len; i++)
-               ptr[i] = readb(vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
+       if (vm_dev->version == 1) {
+               u8 *ptr = buf;
+               int i;
+
+               for (i = 0; i < len; i++)
+                       ptr[i] = readb(base + offset + i);
+               return;
+       }
+
+       switch (len) {
+       case 1:
+               b = readb(base + offset);
+               memcpy(buf, &b, sizeof b);
+               break;
+       case 2:
+               w = cpu_to_le16(readw(base + offset));
+               memcpy(buf, &w, sizeof w);
+               break;
+       case 4:
+               l = cpu_to_le32(readl(base + offset));
+               memcpy(buf, &l, sizeof l);
+               break;
+       case 8:
+               l = cpu_to_le32(readl(base + offset));
+               memcpy(buf, &l, sizeof l);
+               l = cpu_to_le32(ioread32(base + offset + sizeof l));
+               memcpy(buf + sizeof l, &l, sizeof l);
+               break;
+       default:
+               BUG();
+       }
 }
 
 static void vm_set(struct virtio_device *vdev, unsigned offset,
                   const void *buf, unsigned len)
 {
        struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
-       const u8 *ptr = buf;
-       int i;
+       void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG;
+       u8 b;
+       __le16 w;
+       __le32 l;
 
-       for (i = 0; i < len; i++)
-               writeb(ptr[i], vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
+       if (vm_dev->version == 1) {
+               const u8 *ptr = buf;
+               int i;
+
+               for (i = 0; i < len; i++)
+                       writeb(ptr[i], base + offset + i);
+
+               return;
+       }
+
+       switch (len) {
+       case 1:
+               memcpy(&b, buf, sizeof b);
+               writeb(b, base + offset);
+               break;
+       case 2:
+               memcpy(&w, buf, sizeof w);
+               writew(le16_to_cpu(w), base + offset);
+               break;
+       case 4:
+               memcpy(&l, buf, sizeof l);
+               writel(le32_to_cpu(l), base + offset);
+               break;
+       case 8:
+               memcpy(&l, buf, sizeof l);
+               writel(le32_to_cpu(l), base + offset);
+               memcpy(&l, buf + sizeof l, sizeof l);
+               writel(le32_to_cpu(l), base + offset + sizeof l);
+               break;
+       default:
+               BUG();
+       }
+}
+
+static u32 vm_generation(struct virtio_device *vdev)
+{
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
+
+       if (vm_dev->version == 1)
+               return 0;
+       else
+               return readl(vm_dev->base + VIRTIO_MMIO_CONFIG_GENERATION);
 }
 
 static u8 vm_get_status(struct virtio_device *vdev)
@@ -440,6 +513,7 @@ static const char *vm_bus_name(struct virtio_device *vdev)
 static const struct virtio_config_ops virtio_mmio_config_ops = {
        .get            = vm_get,
        .set            = vm_set,
+       .generation     = vm_generation,
        .get_status     = vm_get_status,
        .set_status     = vm_set_status,
        .reset          = vm_reset,
index c8def68..0deaa4f 100644 (file)
 #define PDC_WDT_MIN_TIMEOUT            1
 #define PDC_WDT_DEF_TIMEOUT            64
 
-static int heartbeat;
+static int heartbeat = PDC_WDT_DEF_TIMEOUT;
 module_param(heartbeat, int, 0);
-MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds. "
-       "(default = " __MODULE_STRING(PDC_WDT_DEF_TIMEOUT) ")");
+MODULE_PARM_DESC(heartbeat, "Watchdog heartbeats in seconds "
+       "(default=" __MODULE_STRING(PDC_WDT_DEF_TIMEOUT) ")");
 
 static bool nowayout = WATCHDOG_NOWAYOUT;
 module_param(nowayout, bool, 0);
@@ -191,6 +191,7 @@ static int pdc_wdt_probe(struct platform_device *pdev)
        pdc_wdt->wdt_dev.ops = &pdc_wdt_ops;
        pdc_wdt->wdt_dev.max_timeout = 1 << PDC_WDT_CONFIG_DELAY_MASK;
        pdc_wdt->wdt_dev.parent = &pdev->dev;
+       watchdog_set_drvdata(&pdc_wdt->wdt_dev, pdc_wdt);
 
        ret = watchdog_init_timeout(&pdc_wdt->wdt_dev, heartbeat, &pdev->dev);
        if (ret < 0) {
@@ -232,7 +233,6 @@ static int pdc_wdt_probe(struct platform_device *pdev)
        watchdog_set_nowayout(&pdc_wdt->wdt_dev, nowayout);
 
        platform_set_drvdata(pdev, pdc_wdt);
-       watchdog_set_drvdata(&pdc_wdt->wdt_dev, pdc_wdt);
 
        ret = watchdog_register_device(&pdc_wdt->wdt_dev);
        if (ret)
index a87f6df..938b987 100644 (file)
@@ -133,7 +133,7 @@ static int mtk_wdt_start(struct watchdog_device *wdt_dev)
        u32 reg;
        struct mtk_wdt_dev *mtk_wdt = watchdog_get_drvdata(wdt_dev);
        void __iomem *wdt_base = mtk_wdt->wdt_base;
-       u32 ret;
+       int ret;
 
        ret = mtk_wdt_set_timeout(wdt_dev, wdt_dev->timeout);
        if (ret < 0)
index b812462..94d9680 100644 (file)
@@ -55,6 +55,23 @@ config XEN_BALLOON_MEMORY_HOTPLUG
 
          In that case step 3 should be omitted.
 
+config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+       int "Hotplugged memory limit (in GiB) for a PV guest"
+       default 512 if X86_64
+       default 4 if X86_32
+       range 0 64 if X86_32
+       depends on XEN_HAVE_PVMMU
+       depends on XEN_BALLOON_MEMORY_HOTPLUG
+       help
+         Maxmium amount of memory (in GiB) that a PV guest can be
+         expanded to when using memory hotplug.
+
+         A PV guest can have more memory than this limit if is
+         started with a larger maximum.
+
+         This value is used to allocate enough space in internal
+         tables needed for physical memory administration.
+
 config XEN_SCRUB_PAGES
        bool "Scrub pages before returning them to system"
        depends on XEN_BALLOON
index 0b52d92..fd93369 100644 (file)
@@ -229,6 +229,29 @@ static enum bp_state reserve_additional_memory(long credit)
        balloon_hotplug = round_up(balloon_hotplug, PAGES_PER_SECTION);
        nid = memory_add_physaddr_to_nid(hotplug_start_paddr);
 
+#ifdef CONFIG_XEN_HAVE_PVMMU
+        /*
+         * add_memory() will build page tables for the new memory so
+         * the p2m must contain invalid entries so the correct
+         * non-present PTEs will be written.
+         *
+         * If a failure occurs, the original (identity) p2m entries
+         * are not restored since this region is now known not to
+         * conflict with any devices.
+         */ 
+       if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+               unsigned long pfn, i;
+
+               pfn = PFN_DOWN(hotplug_start_paddr);
+               for (i = 0; i < balloon_hotplug; i++) {
+                       if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) {
+                               pr_warn("set_phys_to_machine() failed, no memory added\n");
+                               return BP_ECANCELED;
+                       }
+                }
+       }
+#endif
+
        rc = add_memory(nid, hotplug_start_paddr, balloon_hotplug << PAGE_SHIFT);
 
        if (rc) {
index b4bca2d..70fba97 100644 (file)
@@ -526,20 +526,26 @@ static unsigned int __startup_pirq(unsigned int irq)
        pirq_query_unmask(irq);
 
        rc = set_evtchn_to_irq(evtchn, irq);
-       if (rc != 0) {
-               pr_err("irq%d: Failed to set port to irq mapping (%d)\n",
-                      irq, rc);
-               xen_evtchn_close(evtchn);
-               return 0;
-       }
+       if (rc)
+               goto err;
+
        bind_evtchn_to_cpu(evtchn, 0);
        info->evtchn = evtchn;
 
+       rc = xen_evtchn_port_setup(info);
+       if (rc)
+               goto err;
+
 out:
        unmask_evtchn(evtchn);
        eoi_pirq(irq_get_irq_data(irq));
 
        return 0;
+
+err:
+       pr_err("irq%d: Failed to set port to irq mapping (%d)\n", irq, rc);
+       xen_evtchn_close(evtchn);
+       return 0;
 }
 
 static unsigned int startup_pirq(struct irq_data *data)
index 46ae0f9..75fe3d4 100644 (file)
@@ -16,7 +16,7 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
-static bool permissive;
+bool permissive;
 module_param(permissive, bool, 0644);
 
 /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word,
index e56c934..2e1d73d 100644 (file)
@@ -64,6 +64,8 @@ struct config_field_entry {
        void *data;
 };
 
+extern bool permissive;
+
 #define OFFSET(cfg_entry) ((cfg_entry)->base_offset+(cfg_entry)->field->offset)
 
 /* Add fields to a device - the add_fields macro expects to get a pointer to
index c5ee825..2d73693 100644 (file)
 #include "pciback.h"
 #include "conf_space.h"
 
+struct pci_cmd_info {
+       u16 val;
+};
+
 struct pci_bar_info {
        u32 val;
        u32 len_val;
@@ -20,22 +24,36 @@ struct pci_bar_info {
 #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
 #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
 
-static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
+/* Bits guests are allowed to control in permissive mode. */
+#define PCI_COMMAND_GUEST (PCI_COMMAND_MASTER|PCI_COMMAND_SPECIAL| \
+                          PCI_COMMAND_INVALIDATE|PCI_COMMAND_VGA_PALETTE| \
+                          PCI_COMMAND_WAIT|PCI_COMMAND_FAST_BACK)
+
+static void *command_init(struct pci_dev *dev, int offset)
 {
-       int i;
-       int ret;
-
-       ret = xen_pcibk_read_config_word(dev, offset, value, data);
-       if (!pci_is_enabled(dev))
-               return ret;
-
-       for (i = 0; i < PCI_ROM_RESOURCE; i++) {
-               if (dev->resource[i].flags & IORESOURCE_IO)
-                       *value |= PCI_COMMAND_IO;
-               if (dev->resource[i].flags & IORESOURCE_MEM)
-                       *value |= PCI_COMMAND_MEMORY;
+       struct pci_cmd_info *cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+       int err;
+
+       if (!cmd)
+               return ERR_PTR(-ENOMEM);
+
+       err = pci_read_config_word(dev, PCI_COMMAND, &cmd->val);
+       if (err) {
+               kfree(cmd);
+               return ERR_PTR(err);
        }
 
+       return cmd;
+}
+
+static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data)
+{
+       int ret = pci_read_config_word(dev, offset, value);
+       const struct pci_cmd_info *cmd = data;
+
+       *value &= PCI_COMMAND_GUEST;
+       *value |= cmd->val & ~PCI_COMMAND_GUEST;
+
        return ret;
 }
 
@@ -43,6 +61,8 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
 {
        struct xen_pcibk_dev_data *dev_data;
        int err;
+       u16 val;
+       struct pci_cmd_info *cmd = data;
 
        dev_data = pci_get_drvdata(dev);
        if (!pci_is_enabled(dev) && is_enable_cmd(value)) {
@@ -83,6 +103,19 @@ static int command_write(struct pci_dev *dev, int offset, u16 value, void *data)
                }
        }
 
+       cmd->val = value;
+
+       if (!permissive && (!dev_data || !dev_data->permissive))
+               return 0;
+
+       /* Only allow the guest to control certain bits. */
+       err = pci_read_config_word(dev, offset, &val);
+       if (err || val == value)
+               return err;
+
+       value &= PCI_COMMAND_GUEST;
+       value |= val & ~PCI_COMMAND_GUEST;
+
        return pci_write_config_word(dev, offset, value);
 }
 
@@ -282,6 +315,8 @@ static const struct config_field header_common[] = {
        {
         .offset    = PCI_COMMAND,
         .size      = 2,
+        .init      = command_init,
+        .release   = bar_release,
         .u.w.read  = command_read,
         .u.w.write = command_write,
        },
index 9faca6a..42bd55a 100644 (file)
@@ -1659,11 +1659,8 @@ static int scsiback_make_nexus(struct scsiback_tpg *tpg,
                         name);
                goto out;
        }
-       /*
-        * Now register the TCM pvscsi virtual I_T Nexus as active with the
-        * call to __transport_register_session()
-        */
-       __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
+       /* Now register the TCM pvscsi virtual I_T Nexus as active. */
+       transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
                        tv_nexus->tvn_se_sess, tv_nexus);
        tpg->tpg_nexus = tv_nexus;
 
index d2468bf..a91795e 100644 (file)
@@ -699,8 +699,10 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
        boff = tmp % bsize;
        if (boff) {
                bh = affs_bread_ino(inode, bidx, 0);
-               if (IS_ERR(bh))
-                       return PTR_ERR(bh);
+               if (IS_ERR(bh)) {
+                       written = PTR_ERR(bh);
+                       goto err_first_bh;
+               }
                tmp = min(bsize - boff, to - from);
                BUG_ON(boff + tmp > bsize || tmp > bsize);
                memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
@@ -712,14 +714,16 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
                bidx++;
        } else if (bidx) {
                bh = affs_bread_ino(inode, bidx - 1, 0);
-               if (IS_ERR(bh))
-                       return PTR_ERR(bh);
+               if (IS_ERR(bh)) {
+                       written = PTR_ERR(bh);
+                       goto err_first_bh;
+               }
        }
        while (from + bsize <= to) {
                prev_bh = bh;
                bh = affs_getemptyblk_ino(inode, bidx);
                if (IS_ERR(bh))
-                       goto out;
+                       goto err_bh;
                memcpy(AFFS_DATA(bh), data + from, bsize);
                if (buffer_new(bh)) {
                        AFFS_DATA_HEAD(bh)->ptype = cpu_to_be32(T_DATA);
@@ -751,7 +755,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
                prev_bh = bh;
                bh = affs_bread_ino(inode, bidx, 1);
                if (IS_ERR(bh))
-                       goto out;
+                       goto err_bh;
                tmp = min(bsize, to - from);
                BUG_ON(tmp > bsize);
                memcpy(AFFS_DATA(bh), data + from, tmp);
@@ -790,12 +794,13 @@ done:
        if (tmp > inode->i_size)
                inode->i_size = AFFS_I(inode)->mmu_private = tmp;
 
+err_first_bh:
        unlock_page(page);
        page_cache_release(page);
 
        return written;
 
-out:
+err_bh:
        bh = prev_bh;
        if (!written)
                written = PTR_ERR(bh);
index f8e52a1..a793f70 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -278,11 +278,11 @@ static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
        return 0;
 }
 
-static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+static int aio_ring_remap(struct file *file, struct vm_area_struct *vma)
 {
        struct mm_struct *mm = vma->vm_mm;
        struct kioctx_table *table;
-       int i;
+       int i, res = -EINVAL;
 
        spin_lock(&mm->ioctx_lock);
        rcu_read_lock();
@@ -292,13 +292,17 @@ static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
 
                ctx = table->table[i];
                if (ctx && ctx->aio_ring_file == file) {
-                       ctx->user_id = ctx->mmap_base = vma->vm_start;
+                       if (!atomic_read(&ctx->dead)) {
+                               ctx->user_id = ctx->mmap_base = vma->vm_start;
+                               res = 0;
+                       }
                        break;
                }
        }
 
        rcu_read_unlock();
        spin_unlock(&mm->ioctx_lock);
+       return res;
 }
 
 static const struct file_operations aio_ring_fops = {
@@ -727,6 +731,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 err_cleanup:
        aio_nr_sub(ctx->max_reqs);
 err_ctx:
+       atomic_set(&ctx->dead, 1);
+       if (ctx->mmap_size)
+               vm_munmap(ctx->mmap_base, ctx->mmap_size);
        aio_free_ring(ctx);
 err:
        mutex_unlock(&ctx->ring_lock);
@@ -748,11 +755,12 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
 {
        struct kioctx_table *table;
 
-       if (atomic_xchg(&ctx->dead, 1))
+       spin_lock(&mm->ioctx_lock);
+       if (atomic_xchg(&ctx->dead, 1)) {
+               spin_unlock(&mm->ioctx_lock);
                return -EINVAL;
+       }
 
-
-       spin_lock(&mm->ioctx_lock);
        table = rcu_dereference_raw(mm->ioctx_table);
        WARN_ON(ctx != table->table[ctx->id]);
        table->table[ctx->id] = NULL;
index 84c3b00..f9c89ca 100644 (file)
@@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                    struct btrfs_root *root);
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root);
 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
 int btrfs_free_block_groups(struct btrfs_fs_info *info);
 int btrfs_read_block_groups(struct btrfs_root *root);
@@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
                                    loff_t actual_len, u64 *alloc_hint);
 int btrfs_inode_check_errors(struct inode *inode);
 extern const struct dentry_operations btrfs_dentry_operations;
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_inode_set_ops(struct inode *inode);
+#endif
 
 /* ioctl.c */
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
index f79f385..639f266 100644 (file)
@@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
        }
        if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
                        + sizeof(struct btrfs_chunk)) {
-               printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n",
+               printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
                                btrfs_super_sys_array_size(sb),
                                sizeof(struct btrfs_disk_key)
                                + sizeof(struct btrfs_chunk));
index 6f08045..8b353ad 100644 (file)
@@ -3325,6 +3325,32 @@ out:
        return ret;
 }
 
+int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
+                           struct btrfs_root *root)
+{
+       struct btrfs_block_group_cache *cache, *tmp;
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       struct btrfs_path *path;
+
+       if (list_empty(&cur_trans->dirty_bgs) ||
+           !btrfs_test_opt(root, SPACE_CACHE))
+               return 0;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       /* Could add new block groups, use _safe just in case */
+       list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
+                                dirty_list) {
+               if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+                       cache_save_setup(cache, trans, path);
+       }
+
+       btrfs_free_path(path);
+       return 0;
+}
+
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root)
 {
@@ -5110,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
        num_bytes = ALIGN(num_bytes, root->sectorsize);
 
        spin_lock(&BTRFS_I(inode)->lock);
-       BTRFS_I(inode)->outstanding_extents++;
+       nr_extents = (unsigned)div64_u64(num_bytes +
+                                        BTRFS_MAX_EXTENT_SIZE - 1,
+                                        BTRFS_MAX_EXTENT_SIZE);
+       BTRFS_I(inode)->outstanding_extents += nr_extents;
+       nr_extents = 0;
 
        if (BTRFS_I(inode)->outstanding_extents >
            BTRFS_I(inode)->reserved_extents)
@@ -5255,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
        if (dropped > 0)
                to_free += btrfs_calc_trans_metadata_size(root, dropped);
 
+       if (btrfs_test_is_dummy_root(root))
+               return;
+
        trace_btrfs_space_reservation(root->fs_info, "delalloc",
                                      btrfs_ino(inode), to_free, 0);
        if (root->fs_info->quota_enabled) {
index c7233ff..d688cfe 100644 (file)
@@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb)
 
                /* Should be safe to release our pages at this point */
                btrfs_release_extent_buffer_page(eb);
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+               if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
+                       __free_extent_buffer(eb);
+                       return 1;
+               }
+#endif
                call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
                return 1;
        }
index da828cf..d2e732d 100644 (file)
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
 
 static int btrfs_dirty_inode(struct inode *inode);
 
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+void btrfs_test_inode_set_ops(struct inode *inode)
+{
+       BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+}
+#endif
+
 static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
                                     struct inode *inode,  struct inode *dir,
                                     const struct qstr *qstr)
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
                u64 new_size;
 
                /*
-                * We need the largest size of the remaining extent to see if we
-                * need to add a new outstanding extent.  Think of the following
-                * case
-                *
-                * [MEAX_EXTENT_SIZEx2 - 4k][4k]
-                *
-                * The new_size would just be 4k and we'd think we had enough
-                * outstanding extents for this if we only took one side of the
-                * split, same goes for the other direction.  We need to see if
-                * the larger size still is the same amount of extents as the
-                * original size, because if it is we need to add a new
-                * outstanding extent.  But if we split up and the larger size
-                * is less than the original then we are good to go since we've
-                * already accounted for the extra extent in our original
-                * accounting.
+                * See the explanation in btrfs_merge_extent_hook, the same
+                * applies here, just in reverse.
                 */
                new_size = orig->end - split + 1;
-               if ((split - orig->start) > new_size)
-                       new_size = split - orig->start;
-
-               num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+               num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
                                        BTRFS_MAX_EXTENT_SIZE);
-               if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
-                             BTRFS_MAX_EXTENT_SIZE) < num_extents)
+               new_size = split - orig->start;
+               num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                                       BTRFS_MAX_EXTENT_SIZE);
+               if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
+                             BTRFS_MAX_EXTENT_SIZE) >= num_extents)
                        return;
        }
 
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
        if (!(other->state & EXTENT_DELALLOC))
                return;
 
-       old_size = other->end - other->start + 1;
-       new_size = old_size + (new->end - new->start + 1);
+       if (new->start > other->start)
+               new_size = new->end - other->start + 1;
+       else
+               new_size = other->end - new->start + 1;
 
        /* we're not bigger than the max, unreserve the space and go */
        if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
        }
 
        /*
-        * If we grew by another max_extent, just return, we want to keep that
-        * reserved amount.
+        * We have to add up either side to figure out how many extents were
+        * accounted for before we merged into one big extent.  If the number of
+        * extents we accounted for is <= the amount we need for the new range
+        * then we can return, otherwise drop.  Think of it like this
+        *
+        * [ 4k][MAX_SIZE]
+        *
+        * So we've grown the extent by a MAX_SIZE extent, this would mean we
+        * need 2 outstanding extents, on one side we have 1 and the other side
+        * we have 1 so they are == and we can return.  But in this case
+        *
+        * [MAX_SIZE+4k][MAX_SIZE+4k]
+        *
+        * Each range on their own accounts for 2 extents, but merged together
+        * they are only 3 extents worth of accounting, so we need to drop in
+        * this case.
         */
+       old_size = other->end - other->start + 1;
        num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
                                BTRFS_MAX_EXTENT_SIZE);
+       old_size = new->end - new->start + 1;
+       num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
+                                BTRFS_MAX_EXTENT_SIZE);
+
        if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
-                     BTRFS_MAX_EXTENT_SIZE) > num_extents)
+                     BTRFS_MAX_EXTENT_SIZE) >= num_extents)
                return;
 
        spin_lock(&BTRFS_I(inode)->lock);
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
                        spin_unlock(&BTRFS_I(inode)->lock);
                }
 
+               /* For sanity tests */
+               if (btrfs_test_is_dummy_root(root))
+                       return;
+
                __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
                                     root->fs_info->delalloc_batch);
                spin_lock(&BTRFS_I(inode)->lock);
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
                    root != root->fs_info->tree_root)
                        btrfs_delalloc_release_metadata(inode, len);
 
+               /* For sanity tests. */
+               if (btrfs_test_is_dummy_root(root))
+                       return;
+
                if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
                    && do_list && !(state->state & EXTENT_NORESERVE))
                        btrfs_free_reserved_data_space(inode, len);
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        u64 start = iblock << inode->i_blkbits;
        u64 lockstart, lockend;
        u64 len = bh_result->b_size;
-       u64 orig_len = len;
+       u64 *outstanding_extents = NULL;
        int unlock_bits = EXTENT_LOCKED;
        int ret = 0;
 
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        lockstart = start;
        lockend = start + len - 1;
 
+       if (current->journal_info) {
+               /*
+                * Need to pull our outstanding extents and set journal_info to NULL so
+                * that anything that needs to check if there's a transction doesn't get
+                * confused.
+                */
+               outstanding_extents = current->journal_info;
+               current->journal_info = NULL;
+       }
+
        /*
         * If this errors out it's because we couldn't invalidate pagecache for
         * this range and we need to fallback to buffered.
@@ -7348,11 +7381,20 @@ unlock:
                if (start + len > i_size_read(inode))
                        i_size_write(inode, start + len);
 
-               if (len < orig_len) {
+               /*
+                * If we have an outstanding_extents count still set then we're
+                * within our reservation, otherwise we need to adjust our inode
+                * counter appropriately.
+                */
+               if (*outstanding_extents) {
+                       (*outstanding_extents)--;
+               } else {
                        spin_lock(&BTRFS_I(inode)->lock);
                        BTRFS_I(inode)->outstanding_extents++;
                        spin_unlock(&BTRFS_I(inode)->lock);
                }
+
+               current->journal_info = outstanding_extents;
                btrfs_free_reserved_data_space(inode, len);
        }
 
@@ -7376,6 +7418,8 @@ unlock:
 unlock_err:
        clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
                         unlock_bits, 1, 0, &cached_state, GFP_NOFS);
+       if (outstanding_extents)
+               current->journal_info = outstanding_extents;
        return ret;
 }
 
@@ -8075,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
+       u64 outstanding_extents = 0;
        size_t count = 0;
        int flags = 0;
        bool wakeup = true;
@@ -8112,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                ret = btrfs_delalloc_reserve_space(inode, count);
                if (ret)
                        goto out;
+               outstanding_extents = div64_u64(count +
+                                               BTRFS_MAX_EXTENT_SIZE - 1,
+                                               BTRFS_MAX_EXTENT_SIZE);
+
+               /*
+                * We need to know how many extents we reserved so that we can
+                * do the accounting properly if we go over the number we
+                * originally calculated.  Abuse current->journal_info for this.
+                */
+               current->journal_info = &outstanding_extents;
        } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
                                     &BTRFS_I(inode)->runtime_flags)) {
                inode_dio_done(inode);
@@ -8124,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
                        iter, offset, btrfs_get_blocks_direct, NULL,
                        btrfs_submit_direct, flags);
        if (rw & WRITE) {
+               current->journal_info = NULL;
                if (ret < 0 && ret != -EIOCBQUEUED)
                        btrfs_delalloc_release_space(inode, count);
                else if (ret >= 0 && (size_t)ret < count)
index 97159a8..058c79e 100644 (file)
@@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
        if (oper1->seq < oper2->seq)
                return -1;
        if (oper1->seq > oper2->seq)
-               return -1;
+               return 1;
        if (oper1->ref_root < oper2->ref_root)
                return -1;
        if (oper1->ref_root > oper2->ref_root)
index a116b55..054fc0d 100644 (file)
@@ -911,6 +911,197 @@ out:
        return ret;
 }
 
+static int test_extent_accounting(void)
+{
+       struct inode *inode = NULL;
+       struct btrfs_root *root = NULL;
+       int ret = -ENOMEM;
+
+       inode = btrfs_new_test_inode();
+       if (!inode) {
+               test_msg("Couldn't allocate inode\n");
+               return ret;
+       }
+
+       root = btrfs_alloc_dummy_root();
+       if (IS_ERR(root)) {
+               test_msg("Couldn't allocate root\n");
+               goto out;
+       }
+
+       root->fs_info = btrfs_alloc_dummy_fs_info();
+       if (!root->fs_info) {
+               test_msg("Couldn't allocate dummy fs info\n");
+               goto out;
+       }
+
+       BTRFS_I(inode)->root = root;
+       btrfs_test_inode_set_ops(inode);
+
+       /* [BTRFS_MAX_EXTENT_SIZE] */
+       BTRFS_I(inode)->outstanding_extents++;
+       ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
+                                       NULL);
+       if (ret) {
+               test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+               goto out;
+       }
+       if (BTRFS_I(inode)->outstanding_extents != 1) {
+               ret = -EINVAL;
+               test_msg("Miscount, wanted 1, got %u\n",
+                        BTRFS_I(inode)->outstanding_extents);
+               goto out;
+       }
+
+       /* [BTRFS_MAX_EXTENT_SIZE][4k] */
+       BTRFS_I(inode)->outstanding_extents++;
+       ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
+                                       BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
+       if (ret) {
+               test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+               goto out;
+       }
+       if (BTRFS_I(inode)->outstanding_extents != 2) {
+               ret = -EINVAL;
+               test_msg("Miscount, wanted 2, got %u\n",
+                        BTRFS_I(inode)->outstanding_extents);
+               goto out;
+       }
+
+       /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
+       ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+                              BTRFS_MAX_EXTENT_SIZE >> 1,
+                              (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+                              EXTENT_DELALLOC | EXTENT_DIRTY |
+                              EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
+                              NULL, GFP_NOFS);
+       if (ret) {
+               test_msg("clear_extent_bit returned %d\n", ret);
+               goto out;
+       }
+       if (BTRFS_I(inode)->outstanding_extents != 2) {
+               ret = -EINVAL;
+               test_msg("Miscount, wanted 2, got %u\n",
+                        BTRFS_I(inode)->outstanding_extents);
+               goto out;
+       }
+
+       /* [BTRFS_MAX_EXTENT_SIZE][4K] */
+       BTRFS_I(inode)->outstanding_extents++;
+       ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
+                                       (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
+                                       NULL);
+       if (ret) {
+               test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+               goto out;
+       }
+       if (BTRFS_I(inode)->outstanding_extents != 2) {
+               ret = -EINVAL;
+               test_msg("Miscount, wanted 2, got %u\n",
+                        BTRFS_I(inode)->outstanding_extents);
+               goto out;
+       }
+
+       /*
+        * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
+        *
+        * I'm artificially adding 2 to outstanding_extents because in the
+        * buffered IO case we'd add things up as we go, but I don't feel like
+        * doing that here, this isn't the interesting case we want to test.
+        */
+       BTRFS_I(inode)->outstanding_extents += 2;
+       ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
+                                       (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
+                                       NULL);
+       if (ret) {
+               test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+               goto out;
+       }
+       if (BTRFS_I(inode)->outstanding_extents != 4) {
+               ret = -EINVAL;
+               test_msg("Miscount, wanted 4, got %u\n",
+                        BTRFS_I(inode)->outstanding_extents);
+               goto out;
+       }
+
+       /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
+       BTRFS_I(inode)->outstanding_extents++;
+       ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
+                                       BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+       if (ret) {
+               test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+               goto out;
+       }
+       if (BTRFS_I(inode)->outstanding_extents != 3) {
+               ret = -EINVAL;
+               test_msg("Miscount, wanted 3, got %u\n",
+                        BTRFS_I(inode)->outstanding_extents);
+               goto out;
+       }
+
+       /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
+       ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
+                              BTRFS_MAX_EXTENT_SIZE+4096,
+                              BTRFS_MAX_EXTENT_SIZE+8191,
+                              EXTENT_DIRTY | EXTENT_DELALLOC |
+                              EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+                              NULL, GFP_NOFS);
+       if (ret) {
+               test_msg("clear_extent_bit returned %d\n", ret);
+               goto out;
+       }
+       if (BTRFS_I(inode)->outstanding_extents != 4) {
+               ret = -EINVAL;
+               test_msg("Miscount, wanted 4, got %u\n",
+                        BTRFS_I(inode)->outstanding_extents);
+               goto out;
+       }
+
+       /*
+        * Refill the hole again just for good measure, because I thought it
+        * might fail and I'd rather satisfy my paranoia at this point.
+        */
+       BTRFS_I(inode)->outstanding_extents++;
+       ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
+                                       BTRFS_MAX_EXTENT_SIZE+8191, NULL);
+       if (ret) {
+               test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
+               goto out;
+       }
+       if (BTRFS_I(inode)->outstanding_extents != 3) {
+               ret = -EINVAL;
+               test_msg("Miscount, wanted 3, got %u\n",
+                        BTRFS_I(inode)->outstanding_extents);
+               goto out;
+       }
+
+       /* Empty */
+       ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+                              EXTENT_DIRTY | EXTENT_DELALLOC |
+                              EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+                              NULL, GFP_NOFS);
+       if (ret) {
+               test_msg("clear_extent_bit returned %d\n", ret);
+               goto out;
+       }
+       if (BTRFS_I(inode)->outstanding_extents) {
+               ret = -EINVAL;
+               test_msg("Miscount, wanted 0, got %u\n",
+                        BTRFS_I(inode)->outstanding_extents);
+               goto out;
+       }
+       ret = 0;
+out:
+       if (ret)
+               clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
+                                EXTENT_DIRTY | EXTENT_DELALLOC |
+                                EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
+                                NULL, GFP_NOFS);
+       iput(inode);
+       btrfs_free_dummy_root(root);
+       return ret;
+}
+
 int btrfs_test_inodes(void)
 {
        int ret;
@@ -924,5 +1115,9 @@ int btrfs_test_inodes(void)
        if (ret)
                return ret;
        test_msg("Running hole first btrfs_get_extent test\n");
-       return test_hole_first();
+       ret = test_hole_first();
+       if (ret)
+               return ret;
+       test_msg("Running outstanding_extents tests\n");
+       return test_extent_accounting();
 }
index 88e51ad..8be4278 100644 (file)
@@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
        u64 old_root_bytenr;
        u64 old_root_used;
        struct btrfs_root *tree_root = root->fs_info->tree_root;
-       bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
 
        old_root_used = btrfs_root_used(&root->root_item);
-       btrfs_write_dirty_block_groups(trans, root);
 
        while (1) {
                old_root_bytenr = btrfs_root_bytenr(&root->root_item);
                if (old_root_bytenr == root->node->start &&
-                   old_root_used == btrfs_root_used(&root->root_item) &&
-                   (!extent_root ||
-                    list_empty(&trans->transaction->dirty_bgs)))
+                   old_root_used == btrfs_root_used(&root->root_item))
                        break;
 
                btrfs_set_root_node(&root->root_item, root->node);
@@ -1044,14 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
                        return ret;
 
                old_root_used = btrfs_root_used(&root->root_item);
-               if (extent_root) {
-                       ret = btrfs_write_dirty_block_groups(trans, root);
-                       if (ret)
-                               return ret;
-               }
-               ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
-               if (ret)
-                       return ret;
        }
 
        return 0;
@@ -1068,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
                                         struct btrfs_root *root)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
+       struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
        struct list_head *next;
        struct extent_buffer *eb;
        int ret;
@@ -1095,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
+       ret = btrfs_setup_space_cache(trans, root);
+       if (ret)
+               return ret;
+
        /* run_qgroups might have added some more refs */
        ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
        if (ret)
                return ret;
-
+again:
        while (!list_empty(&fs_info->dirty_cowonly_roots)) {
                next = fs_info->dirty_cowonly_roots.next;
                list_del_init(next);
@@ -1112,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
                ret = update_cowonly_root(trans, root);
                if (ret)
                        return ret;
+               ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+               if (ret)
+                       return ret;
        }
 
+       while (!list_empty(dirty_bgs)) {
+               ret = btrfs_write_dirty_block_groups(trans, root);
+               if (ret)
+                       return ret;
+               ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
+               if (ret)
+                       return ret;
+       }
+
+       if (!list_empty(&fs_info->dirty_cowonly_roots))
+               goto again;
+
        list_add_tail(&fs_info->extent_root->dirty_list,
                      &trans->transaction->switch_commits);
        btrfs_after_dev_replace_commit(fs_info);
@@ -1811,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
                wait_for_commit(root, cur_trans);
 
+               if (unlikely(cur_trans->aborted))
+                       ret = cur_trans->aborted;
+
                btrfs_put_transaction(cur_trans);
 
                return ret;
index 4ac7445..aa0dc25 100644 (file)
@@ -1,6 +1,9 @@
 /*
  *   fs/cifs/cifsencrypt.c
  *
+ *   Encryption and hashing operations relating to NTLM, NTLMv2.  See MS-NLMP
+ *   for more detailed information
+ *
  *   Copyright (C) International Business Machines  Corp., 2005,2013
  *   Author(s): Steve French (sfrench@us.ibm.com)
  *
@@ -515,7 +518,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
                                 __func__);
                        return rc;
                }
-       } else if (ses->serverName) {
+       } else {
+               /* We use ses->serverName if no domain name available */
                len = strlen(ses->serverName);
 
                server = kmalloc(2 + (len * 2), GFP_KERNEL);
index d3aa999..480cf9c 100644 (file)
@@ -1599,6 +1599,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                                pr_warn("CIFS: username too long\n");
                                goto cifs_parse_mount_err;
                        }
+
+                       kfree(vol->username);
                        vol->username = kstrdup(string, GFP_KERNEL);
                        if (!vol->username)
                                goto cifs_parse_mount_err;
@@ -1700,6 +1702,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                                goto cifs_parse_mount_err;
                        }
 
+                       kfree(vol->domainname);
                        vol->domainname = kstrdup(string, GFP_KERNEL);
                        if (!vol->domainname) {
                                pr_warn("CIFS: no memory for domainname\n");
@@ -1731,6 +1734,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                        }
 
                         if (strncasecmp(string, "default", 7) != 0) {
+                               kfree(vol->iocharset);
                                vol->iocharset = kstrdup(string,
                                                         GFP_KERNEL);
                                if (!vol->iocharset) {
@@ -2913,8 +2917,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
                 * calling name ends in null (byte 16) from old smb
                 * convention.
                 */
-               if (server->workstation_RFC1001_name &&
-                   server->workstation_RFC1001_name[0] != 0)
+               if (server->workstation_RFC1001_name[0] != 0)
                        rfc1002mangle(ses_init_buf->trailer.
                                      session_req.calling_name,
                                      server->workstation_RFC1001_name,
@@ -3692,6 +3695,12 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
 #endif /* CIFS_WEAK_PW_HASH */
                rc = SMBNTencrypt(tcon->password, ses->server->cryptkey,
                                        bcc_ptr, nls_codepage);
+               if (rc) {
+                       cifs_dbg(FYI, "%s Can't generate NTLM rsp. Error: %d\n",
+                                __func__, rc);
+                       cifs_buf_release(smb_buffer);
+                       return rc;
+               }
 
                bcc_ptr += CIFS_AUTH_RESP_SIZE;
                if (ses->capabilities & CAP_UNICODE) {
index a94b3e6..ca30c39 100644 (file)
@@ -1823,6 +1823,7 @@ refind_writable:
                        cifsFileInfo_put(inv_file);
                        spin_lock(&cifs_file_list_lock);
                        ++refind;
+                       inv_file = NULL;
                        goto refind_writable;
                }
        }
index 2d4f372..3e126d7 100644 (file)
@@ -771,6 +771,8 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
                                cifs_buf_release(srchinf->ntwrk_buf_start);
                        }
                        kfree(srchinf);
+                       if (rc)
+                               goto cgii_exit;
        } else
                goto cgii_exit;
 
index 689f035..22dfdf1 100644 (file)
@@ -322,7 +322,7 @@ smb2_get_data_area_len(int *off, int *len, struct smb2_hdr *hdr)
 
        /* return pointer to beginning of data area, ie offset from SMB start */
        if ((*off != 0) && (*len != 0))
-               return hdr->ProtocolId + *off;
+               return (char *)(&hdr->ProtocolId[0]) + *off;
        else
                return NULL;
 }
index 96b5d40..eab05e1 100644 (file)
@@ -684,7 +684,8 @@ smb2_clone_range(const unsigned int xid,
 
                        /* No need to change MaxChunks since already set to 1 */
                        chunk_sizes_updated = true;
-               }
+               } else
+                       goto cchunk_out;
        }
 
 cchunk_out:
index 3417340..65cd7a8 100644 (file)
@@ -1218,7 +1218,7 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        struct smb2_ioctl_req *req;
        struct smb2_ioctl_rsp *rsp;
        struct TCP_Server_Info *server;
-       struct cifs_ses *ses = tcon->ses;
+       struct cifs_ses *ses;
        struct kvec iov[2];
        int resp_buftype;
        int num_iovecs;
@@ -1233,6 +1233,11 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        if (plen)
                *plen = 0;
 
+       if (tcon)
+               ses = tcon->ses;
+       else
+               return -EIO;
+
        if (ses && (ses->server))
                server = ses->server;
        else
@@ -1296,14 +1301,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
        rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base;
 
        if ((rc != 0) && (rc != -EINVAL)) {
-               if (tcon)
-                       cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
+               cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
                goto ioctl_exit;
        } else if (rc == -EINVAL) {
                if ((opcode != FSCTL_SRV_COPYCHUNK_WRITE) &&
                    (opcode != FSCTL_SRV_COPYCHUNK)) {
-                       if (tcon)
-                               cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
+                       cifs_stats_fail_inc(tcon, SMB2_IOCTL_HE);
                        goto ioctl_exit;
                }
        }
@@ -1629,7 +1632,7 @@ SMB2_flush(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
 
        rc = SendReceive2(xid, ses, iov, 1, &resp_buftype, 0);
 
-       if ((rc != 0) && tcon)
+       if (rc != 0)
                cifs_stats_fail_inc(tcon, SMB2_FLUSH_HE);
 
        free_rsp_buf(resp_buftype, iov[0].iov_base);
@@ -2114,7 +2117,7 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
        struct kvec iov[2];
        int rc = 0;
        int len;
-       int resp_buftype;
+       int resp_buftype = CIFS_NO_BUFFER;
        unsigned char *bufptr;
        struct TCP_Server_Info *server;
        struct cifs_ses *ses = tcon->ses;
index e907052..32a8bbd 100644 (file)
@@ -53,6 +53,18 @@ struct wb_writeback_work {
        struct completion *done;        /* set if the caller waits */
 };
 
+/*
+ * If an inode is constantly having its pages dirtied, but then the
+ * updates stop dirtytime_expire_interval seconds in the past, it's
+ * possible for the worst case time between when an inode has its
+ * timestamps updated and when they finally get written out to be two
+ * dirtytime_expire_intervals.  We set the default to 12 hours (in
+ * seconds), which means most of the time inodes will have their
+ * timestamps written to disk after 12 hours, but in the worst case a
+ * few inodes might not their timestamps updated for 24 hours.
+ */
+unsigned int dirtytime_expire_interval = 12 * 60 * 60;
+
 /**
  * writeback_in_progress - determine whether there is writeback in progress
  * @bdi: the device's backing_dev_info structure.
@@ -275,8 +287,8 @@ static int move_expired_inodes(struct list_head *delaying_queue,
 
        if ((flags & EXPIRE_DIRTY_ATIME) == 0)
                older_than_this = work->older_than_this;
-       else if ((work->reason == WB_REASON_SYNC) == 0) {
-               expire_time = jiffies - (HZ * 86400);
+       else if (!work->for_sync) {
+               expire_time = jiffies - (dirtytime_expire_interval * HZ);
                older_than_this = &expire_time;
        }
        while (!list_empty(delaying_queue)) {
@@ -458,6 +470,7 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
                 */
                redirty_tail(inode, wb);
        } else if (inode->i_state & I_DIRTY_TIME) {
+               inode->dirtied_when = jiffies;
                list_move(&inode->i_wb_list, &wb->b_dirty_time);
        } else {
                /* The inode is clean. Remove from writeback lists. */
@@ -505,12 +518,17 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
        spin_lock(&inode->i_lock);
 
        dirty = inode->i_state & I_DIRTY;
-       if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
-            (inode->i_state & I_DIRTY_TIME)) ||
-           (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
-               dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
-               trace_writeback_lazytime(inode);
-       }
+       if (inode->i_state & I_DIRTY_TIME) {
+               if ((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
+                   unlikely(inode->i_state & I_DIRTY_TIME_EXPIRED) ||
+                   unlikely(time_after(jiffies,
+                                       (inode->dirtied_time_when +
+                                        dirtytime_expire_interval * HZ)))) {
+                       dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
+                       trace_writeback_lazytime(inode);
+               }
+       } else
+               inode->i_state &= ~I_DIRTY_TIME_EXPIRED;
        inode->i_state &= ~dirty;
 
        /*
@@ -1131,6 +1149,56 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
        rcu_read_unlock();
 }
 
+/*
+ * Wake up bdi's periodically to make sure dirtytime inodes gets
+ * written back periodically.  We deliberately do *not* check the
+ * b_dirtytime list in wb_has_dirty_io(), since this would cause the
+ * kernel to be constantly waking up once there are any dirtytime
+ * inodes on the system.  So instead we define a separate delayed work
+ * function which gets called much more rarely.  (By default, only
+ * once every 12 hours.)
+ *
+ * If there is any other write activity going on in the file system,
+ * this function won't be necessary.  But if the only thing that has
+ * happened on the file system is a dirtytime inode caused by an atime
+ * update, we need this infrastructure below to make sure that inode
+ * eventually gets pushed out to disk.
+ */
+static void wakeup_dirtytime_writeback(struct work_struct *w);
+static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
+
+static void wakeup_dirtytime_writeback(struct work_struct *w)
+{
+       struct backing_dev_info *bdi;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
+               if (list_empty(&bdi->wb.b_dirty_time))
+                       continue;
+               bdi_wakeup_thread(bdi);
+       }
+       rcu_read_unlock();
+       schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+}
+
+static int __init start_dirtytime_writeback(void)
+{
+       schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
+       return 0;
+}
+__initcall(start_dirtytime_writeback);
+
+int dirtytime_interval_handler(struct ctl_table *table, int write,
+                              void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       int ret;
+
+       ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+       if (ret == 0 && write)
+               mod_delayed_work(system_wq, &dirtytime_work, 0);
+       return ret;
+}
+
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
        if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1269,8 +1337,13 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                        }
 
                        inode->dirtied_when = jiffies;
-                       list_move(&inode->i_wb_list, dirtytime ?
-                                 &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
+                       if (dirtytime)
+                               inode->dirtied_time_when = jiffies;
+                       if (inode->i_state & (I_DIRTY_INODE | I_DIRTY_PAGES))
+                               list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
+                       else
+                               list_move(&inode->i_wb_list,
+                                         &bdi->wb.b_dirty_time);
                        spin_unlock(&bdi->wb.list_lock);
                        trace_writeback_dirty_inode_enqueue(inode);
 
index ed19a7d..39706c5 100644 (file)
@@ -890,8 +890,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 
        newpage = buf->page;
 
-       if (WARN_ON(!PageUptodate(newpage)))
-               return -EIO;
+       if (!PageUptodate(newpage))
+               SetPageUptodate(newpage);
 
        ClearPageMappedToDisk(newpage);
 
@@ -1353,6 +1353,17 @@ static ssize_t fuse_dev_do_read(struct fuse_conn *fc, struct file *file,
        return err;
 }
 
+static int fuse_dev_open(struct inode *inode, struct file *file)
+{
+       /*
+        * The fuse device's file's private_data is used to hold
+        * the fuse_conn(ection) when it is mounted, and is used to
+        * keep track of whether the file has been mounted already.
+        */
+       file->private_data = NULL;
+       return 0;
+}
+
 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
                              unsigned long nr_segs, loff_t pos)
 {
@@ -1797,6 +1808,9 @@ copy_finish:
 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
                       unsigned int size, struct fuse_copy_state *cs)
 {
+       /* Don't try to move pages (yet) */
+       cs->move_pages = 0;
+
        switch (code) {
        case FUSE_NOTIFY_POLL:
                return fuse_notify_poll(fc, size, cs);
@@ -2217,6 +2231,7 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
 
 const struct file_operations fuse_dev_operations = {
        .owner          = THIS_MODULE,
+       .open           = fuse_dev_open,
        .llseek         = no_llseek,
        .read           = do_sync_read,
        .aio_read       = fuse_dev_read,
index 6e560d5..754fdf8 100644 (file)
@@ -131,13 +131,16 @@ skip:
        hfs_bnode_write(node, entry, data_off + key_len, entry_len);
        hfs_bnode_dump(node);
 
-       if (new_node) {
-               /* update parent key if we inserted a key
-                * at the start of the first node
-                */
-               if (!rec && new_node != node)
-                       hfs_brec_update_parent(fd);
+       /*
+        * update parent key if we inserted a key
+        * at the start of the node and it is not the new node
+        */
+       if (!rec && new_node != node) {
+               hfs_bnode_read_key(node, fd->search_key, data_off + size);
+               hfs_brec_update_parent(fd);
+       }
 
+       if (new_node) {
                hfs_bnode_put(fd->bnode);
                if (!new_node->parent) {
                        hfs_btree_inc_height(tree);
@@ -168,9 +171,6 @@ skip:
                goto again;
        }
 
-       if (!rec)
-               hfs_brec_update_parent(fd);
-
        return 0;
 }
 
@@ -370,6 +370,8 @@ again:
        if (IS_ERR(parent))
                return PTR_ERR(parent);
        __hfs_brec_find(parent, fd, hfs_find_rec_by_key);
+       if (fd->record < 0)
+               return -ENOENT;
        hfs_bnode_dump(parent);
        rec = fd->record;
 
index b684e8a..2bacb99 100644 (file)
@@ -207,6 +207,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
                goto out_free;
        }
 
+       of->event = atomic_read(&of->kn->attr.open->event);
        ops = kernfs_ops(of->kn);
        if (ops->read)
                len = ops->read(of, buf, len, *ppos);
index f1bad68..40bc384 100644 (file)
@@ -1388,9 +1388,8 @@ any_leases_conflict(struct inode *inode, struct file_lock *breaker)
 int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 {
        int error = 0;
-       struct file_lock *new_fl;
        struct file_lock_context *ctx = inode->i_flctx;
-       struct file_lock *fl;
+       struct file_lock *new_fl, *fl, *tmp;
        unsigned long break_time;
        int want_write = (mode & O_ACCMODE) != O_RDONLY;
        LIST_HEAD(dispose);
@@ -1420,7 +1419,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
                        break_time++;   /* so that 0 means no break time */
        }
 
-       list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
+       list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
                if (!leases_conflict(fl, new_fl))
                        continue;
                if (want_write) {
@@ -1728,7 +1727,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
                        break;
                }
        }
-       trace_generic_delete_lease(inode, fl);
+       trace_generic_delete_lease(inode, victim);
        if (victim)
                error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
        spin_unlock(&ctx->flc_lock);
index cdbc78c..03d647b 100644 (file)
@@ -137,7 +137,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
        seg->offset = iomap.offset;
        seg->length = iomap.length;
 
-       dprintk("GET: %lld:%lld %d\n", bex->foff, bex->len, bex->es);
+       dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
        return 0;
 
 out_error:
index 9da89fd..9aa2796 100644 (file)
@@ -122,19 +122,19 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
 
                p = xdr_decode_hyper(p, &bex.foff);
                if (bex.foff & (block_size - 1)) {
-                       dprintk("%s: unaligned offset %lld\n",
+                       dprintk("%s: unaligned offset 0x%llx\n",
                                __func__, bex.foff);
                        goto fail;
                }
                p = xdr_decode_hyper(p, &bex.len);
                if (bex.len & (block_size - 1)) {
-                       dprintk("%s: unaligned length %lld\n",
+                       dprintk("%s: unaligned length 0x%llx\n",
                                __func__, bex.foff);
                        goto fail;
                }
                p = xdr_decode_hyper(p, &bex.soff);
                if (bex.soff & (block_size - 1)) {
-                       dprintk("%s: unaligned disk offset %lld\n",
+                       dprintk("%s: unaligned disk offset 0x%llx\n",
                                __func__, bex.soff);
                        goto fail;
                }
index 3c1bfa1..6904213 100644 (file)
@@ -118,7 +118,7 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
 {
        struct super_block *sb = exp->ex_path.mnt->mnt_sb;
 
-       if (exp->ex_flags & NFSEXP_NOPNFS)
+       if (!(exp->ex_flags & NFSEXP_PNFS))
                return;
 
        if (sb->s_export_op->get_uuid &&
@@ -440,15 +440,14 @@ nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
                        list_move_tail(&lp->lo_perstate, reaplist);
                        return;
                }
-               end = seg->offset;
+               lo->offset = layout_end(seg);
        } else {
                /* retain the whole layout segment on a split. */
                if (layout_end(seg) < end) {
                        dprintk("%s: split not supported\n", __func__);
                        return;
                }
-
-               lo->offset = layout_end(seg);
+               end = seg->offset;
        }
 
        layout_update_len(lo, end);
@@ -513,6 +512,9 @@ nfsd4_return_client_layouts(struct svc_rqst *rqstp,
 
        spin_lock(&clp->cl_lock);
        list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
+               if (ls->ls_layout_type != lrp->lr_layout_type)
+                       continue;
+
                if (lrp->lr_return_type == RETURN_FSID &&
                    !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
                                   &cstate->current_fh.fh_handle))
@@ -587,7 +589,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
 
        rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
 
-       nfsd4_cb_layout_fail(ls);
+       trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
 
        printk(KERN_WARNING
                "nfsd: client %s failed to respond to layout recall. "
index d30bea8..92b9d97 100644 (file)
@@ -1237,8 +1237,8 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
                nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
 
        gdp->gd_notify_types &= ops->notify_types;
-       exp_put(exp);
 out:
+       exp_put(exp);
        return nfserr;
 }
 
index d2f2c37..8ba1d88 100644 (file)
@@ -3221,7 +3221,7 @@ alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
        } else
                nfs4_free_openowner(&oo->oo_owner);
        spin_unlock(&clp->cl_lock);
-       return oo;
+       return ret;
 }
 
 static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
@@ -5062,7 +5062,7 @@ alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
        } else
                nfs4_free_lockowner(&lo->lo_owner);
        spin_unlock(&clp->cl_lock);
-       return lo;
+       return ret;
 }
 
 static void
index df5e66c..5fb7e78 100644 (file)
@@ -1562,7 +1562,11 @@ nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
        p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
        p = xdr_decode_hyper(p, &lgp->lg_seg.length);
        p = xdr_decode_hyper(p, &lgp->lg_minlength);
-       nfsd4_decode_stateid(argp, &lgp->lg_sid);
+
+       status = nfsd4_decode_stateid(argp, &lgp->lg_sid);
+       if (status)
+               return status;
+
        READ_BUF(4);
        lgp->lg_maxcount = be32_to_cpup(p++);
 
@@ -1580,7 +1584,11 @@ nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
        p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
        p = xdr_decode_hyper(p, &lcp->lc_seg.length);
        lcp->lc_reclaim = be32_to_cpup(p++);
-       nfsd4_decode_stateid(argp, &lcp->lc_sid);
+
+       status = nfsd4_decode_stateid(argp, &lcp->lc_sid);
+       if (status)
+               return status;
+
        READ_BUF(4);
        lcp->lc_newoffset = be32_to_cpup(p++);
        if (lcp->lc_newoffset) {
@@ -1628,7 +1636,11 @@ nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
                READ_BUF(16);
                p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
                p = xdr_decode_hyper(p, &lrp->lr_seg.length);
-               nfsd4_decode_stateid(argp, &lrp->lr_sid);
+
+               status = nfsd4_decode_stateid(argp, &lrp->lr_sid);
+               if (status)
+                       return status;
+
                READ_BUF(4);
                lrp->lrf_body_len = be32_to_cpup(p++);
                if (lrp->lrf_body_len > 0) {
@@ -4123,7 +4135,7 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
                return nfserr_resource;
        *p++ = cpu_to_be32(lrp->lrs_present);
        if (lrp->lrs_present)
-               nfsd4_encode_stateid(xdr, &lrp->lr_sid);
+               return nfsd4_encode_stateid(xdr, &lrp->lr_sid);
        return nfs_ok;
 }
 #endif /* CONFIG_NFSD_PNFS */
index 83a9694..46ec934 100644 (file)
@@ -165,13 +165,17 @@ int nfsd_reply_cache_init(void)
 {
        unsigned int hashsize;
        unsigned int i;
+       int status = 0;
 
        max_drc_entries = nfsd_cache_size_limit();
        atomic_set(&num_drc_entries, 0);
        hashsize = nfsd_hashsize(max_drc_entries);
        maskbits = ilog2(hashsize);
 
-       register_shrinker(&nfsd_reply_cache_shrinker);
+       status = register_shrinker(&nfsd_reply_cache_shrinker);
+       if (status)
+               return status;
+
        drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
                                        0, 0, NULL);
        if (!drc_slab)
index 469086b..0c3f303 100644 (file)
@@ -1907,6 +1907,7 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
                                             struct the_nilfs *nilfs)
 {
        struct nilfs_inode_info *ii, *n;
+       int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
        int defer_iput = false;
 
        spin_lock(&nilfs->ns_inode_lock);
@@ -1919,10 +1920,10 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
                brelse(ii->i_bh);
                ii->i_bh = NULL;
                list_del_init(&ii->i_dirty);
-               if (!ii->vfs_inode.i_nlink) {
+               if (!ii->vfs_inode.i_nlink || during_mount) {
                        /*
-                        * Defer calling iput() to avoid a deadlock
-                        * over I_SYNC flag for inodes with i_nlink == 0
+                        * Defer calling iput() to avoid deadlocks if
+                        * i_nlink == 0 or mount is not yet finished.
                         */
                        list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
                        defer_iput = true;
index 9a66ff7..d2f97ec 100644 (file)
@@ -143,7 +143,8 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
            !(marks_mask & FS_ISDIR & ~marks_ignored_mask))
                return false;
 
-       if (event_mask & marks_mask & ~marks_ignored_mask)
+       if (event_mask & FAN_ALL_OUTGOING_EVENTS & marks_mask &
+                                ~marks_ignored_mask)
                return true;
 
        return false;
index 46e0d4e..ba1790e 100644 (file)
@@ -2394,7 +2394,6 @@ relock:
                /*
                 * for completing the rest of the request.
                 */
-               *ppos += written;
                count -= written;
                written_buffered = generic_perform_write(file, from, *ppos);
                /*
@@ -2409,7 +2408,6 @@ relock:
                        goto out_dio;
                }
 
-               iocb->ki_pos = *ppos + written_buffered;
                /* We need to ensure that the page cache pages are written to
                 * disk and invalidated to preserve the expected O_DIRECT
                 * semantics.
@@ -2418,6 +2416,7 @@ relock:
                ret = filemap_write_and_wait_range(file->f_mapping, *ppos,
                                endbyte);
                if (ret == 0) {
+                       iocb->ki_pos = *ppos + written_buffered;
                        written += written_buffered;
                        invalidate_mapping_pages(mapping,
                                        *ppos >> PAGE_CACHE_SHIFT,
@@ -2440,10 +2439,14 @@ out_dio:
        /* buffered aio wouldn't have proper lock coverage today */
        BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
 
+       if (unlikely(written <= 0))
+               goto no_sync;
+
        if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
            ((file->f_flags & O_DIRECT) && !direct_io)) {
-               ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
-                                              *ppos + count - 1);
+               ret = filemap_fdatawrite_range(file->f_mapping,
+                                              iocb->ki_pos - written,
+                                              iocb->ki_pos - 1);
                if (ret < 0)
                        written = ret;
 
@@ -2454,10 +2457,12 @@ out_dio:
                }
 
                if (!ret)
-                       ret = filemap_fdatawait_range(file->f_mapping, *ppos,
-                                                     *ppos + count - 1);
+                       ret = filemap_fdatawait_range(file->f_mapping,
+                                                     iocb->ki_pos - written,
+                                                     iocb->ki_pos - 1);
        }
 
+no_sync:
        /*
         * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
         * function pointer which is called when o_direct io completes so that
index 8490c64..460c6c3 100644 (file)
@@ -502,7 +502,7 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
 
 static inline int ocfs2_supports_append_dio(struct ocfs2_super *osb)
 {
-       if (osb->s_feature_ro_compat & OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
+       if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
                return 1;
        return 0;
 }
index 20e37a3..db64ce2 100644 (file)
                                         | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
                                         | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
                                         | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG  \
-                                        | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)
+                                        | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO \
+                                        | OCFS2_FEATURE_INCOMPAT_APPEND_DIO)
 #define OCFS2_FEATURE_RO_COMPAT_SUPP   (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
                                         | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
-                                        | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA \
-                                        | OCFS2_FEATURE_RO_COMPAT_APPEND_DIO)
+                                        | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
 
 /*
  * Heartbeat-only devices are missing journals and other files.  The
  */
 #define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO     0x4000
 
+/*
+ * Append Direct IO support
+ */
+#define OCFS2_FEATURE_INCOMPAT_APPEND_DIO      0x8000
+
 /*
  * backup superblock flag is used to indicate that this volume
  * has backup superblocks.
 #define OCFS2_FEATURE_RO_COMPAT_USRQUOTA       0x0002
 #define OCFS2_FEATURE_RO_COMPAT_GRPQUOTA       0x0004
 
-/*
- * Append Direct IO support
- */
-#define OCFS2_FEATURE_RO_COMPAT_APPEND_DIO     0x0008
 
 /* The byte offset of the first backup block will be 1G.
  * The following will be 4G, 16G, 64G, 256G and 1T.
index b90952f..5f0d199 100644 (file)
@@ -529,8 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data)
 {
        struct ovl_fs *ufs = sb->s_fs_info;
 
-       if (!(*flags & MS_RDONLY) &&
-           (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY)))
+       if (!(*flags & MS_RDONLY) && !ufs->upper_mnt)
                return -EROFS;
 
        return 0;
@@ -615,9 +614,19 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
                        break;
 
                default:
+                       pr_err("overlayfs: unrecognized mount option \"%s\" or missing value\n", p);
                        return -EINVAL;
                }
        }
+
+       /* Workdir is useless in non-upper mount */
+       if (!config->upperdir && config->workdir) {
+               pr_info("overlayfs: option \"workdir=%s\" is useless in a non-upper mount, ignore\n",
+                       config->workdir);
+               kfree(config->workdir);
+               config->workdir = NULL;
+       }
+
        return 0;
 }
 
@@ -837,7 +846,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
        sb->s_stack_depth = 0;
        if (ufs->config.upperdir) {
-               /* FIXME: workdir is not needed for a R/O mount */
                if (!ufs->config.workdir) {
                        pr_err("overlayfs: missing 'workdir'\n");
                        goto out_free_config;
@@ -847,6 +855,13 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
                if (err)
                        goto out_free_config;
 
+               /* Upper fs should not be r/o */
+               if (upperpath.mnt->mnt_sb->s_flags & MS_RDONLY) {
+                       pr_err("overlayfs: upper fs is r/o, try multi-lower layers mount\n");
+                       err = -EINVAL;
+                       goto out_put_upperpath;
+               }
+
                err = ovl_mount_dir(ufs->config.workdir, &workpath);
                if (err)
                        goto out_put_upperpath;
@@ -869,8 +884,14 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 
        err = -EINVAL;
        stacklen = ovl_split_lowerdirs(lowertmp);
-       if (stacklen > OVL_MAX_STACK)
+       if (stacklen > OVL_MAX_STACK) {
+               pr_err("overlayfs: too many lower directries, limit is %d\n",
+                      OVL_MAX_STACK);
                goto out_free_lowertmp;
+       } else if (!ufs->config.upperdir && stacklen == 1) {
+               pr_err("overlayfs: at least 2 lowerdir are needed while upperdir nonexistent\n");
+               goto out_free_lowertmp;
+       }
 
        stack = kcalloc(stacklen, sizeof(struct path), GFP_KERNEL);
        if (!stack)
@@ -932,8 +953,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
                ufs->numlower++;
        }
 
-       /* If the upper fs is r/o or nonexistent, we mark overlayfs r/o too */
-       if (!ufs->upper_mnt || (ufs->upper_mnt->mnt_sb->s_flags & MS_RDONLY))
+       /* If the upper fs is nonexistent, we mark overlayfs r/o too */
+       if (!ufs->upper_mnt)
                sb->s_flags |= MS_RDONLY;
 
        sb->s_d_op = &ovl_dentry_operations;
index 956b75d..6dee68d 100644 (file)
@@ -1325,6 +1325,9 @@ out:
 
 static int pagemap_open(struct inode *inode, struct file *file)
 {
+       /* do not disclose physical addresses: attack vector */
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
        pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
                        "to stop being page-shift some time soon. See the "
                        "linux/Documentation/vm/pagemap.txt for details.\n");
index 2fbc804..226f772 100644 (file)
@@ -13,7 +13,8 @@
 
 #define PULL_DISABLE           (1 << 3)
 #define INPUT_EN               (1 << 5)
-#define SLEWCTRL_FAST          (1 << 6)
+#define SLEWCTRL_SLOW          (1 << 6)
+#define SLEWCTRL_FAST          0
 
 /* update macro depending on INPUT_EN and PULL_ENA */
 #undef PIN_OUTPUT
index 9c2e4f8..5f4d018 100644 (file)
@@ -18,7 +18,8 @@
 #define PULL_DISABLE           (1 << 16)
 #define PULL_UP                        (1 << 17)
 #define INPUT_EN               (1 << 18)
-#define SLEWCTRL_FAST          (1 << 19)
+#define SLEWCTRL_SLOW          (1 << 19)
+#define SLEWCTRL_FAST          0
 #define DS0_PULL_UP_DOWN_EN    (1 << 27)
 
 #define PIN_OUTPUT             (PULL_DISABLE)
index b3f45a5..e596675 100644 (file)
 #include <linux/workqueue.h>
 
 struct arch_timer_kvm {
-#ifdef CONFIG_KVM_ARM_TIMER
        /* Is the timer enabled */
        bool                    enabled;
 
        /* Virtual offset */
        cycle_t                 cntvoff;
-#endif
 };
 
 struct arch_timer_cpu {
-#ifdef CONFIG_KVM_ARM_TIMER
        /* Registers: control register, timer value */
        u32                             cntv_ctl;       /* Saved/restored */
        cycle_t                         cntv_cval;      /* Saved/restored */
@@ -55,10 +52,8 @@ struct arch_timer_cpu {
 
        /* Timer IRQ */
        const struct kvm_irq_level      *irq;
-#endif
 };
 
-#ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
 void kvm_timer_enable(struct kvm *kvm);
 void kvm_timer_init(struct kvm *kvm);
@@ -72,30 +67,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu);
 u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 
-#else
-static inline int kvm_timer_hyp_init(void)
-{
-       return 0;
-};
-
-static inline void kvm_timer_enable(struct kvm *kvm) {}
-static inline void kvm_timer_init(struct kvm *kvm) {}
-static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
-                                       const struct kvm_irq_level *irq) {}
-static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
-{
-       return 0;
-}
-
-static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
-{
-       return 0;
-}
-#endif
+bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
 
 #endif
index 7c55dd5..133ea00 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/irqreturn.h>
 #include <linux/spinlock.h>
 #include <linux/types.h>
+#include <kvm/iodev.h>
 
 #define VGIC_NR_IRQS_LEGACY    256
 #define VGIC_NR_SGIS           16
@@ -114,6 +115,7 @@ struct vgic_ops {
        void    (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr);
        u64     (*get_elrsr)(const struct kvm_vcpu *vcpu);
        u64     (*get_eisr)(const struct kvm_vcpu *vcpu);
+       void    (*clear_eisr)(struct kvm_vcpu *vcpu);
        u32     (*get_interrupt_status)(const struct kvm_vcpu *vcpu);
        void    (*enable_underflow)(struct kvm_vcpu *vcpu);
        void    (*disable_underflow)(struct kvm_vcpu *vcpu);
@@ -139,16 +141,21 @@ struct vgic_params {
 };
 
 struct vgic_vm_ops {
-       bool    (*handle_mmio)(struct kvm_vcpu *, struct kvm_run *,
-                              struct kvm_exit_mmio *);
        bool    (*queue_sgi)(struct kvm_vcpu *, int irq);
        void    (*add_sgi_source)(struct kvm_vcpu *, int irq, int source);
        int     (*init_model)(struct kvm *);
        int     (*map_resources)(struct kvm *, const struct vgic_params *);
 };
 
+struct vgic_io_device {
+       gpa_t addr;
+       int len;
+       const struct vgic_io_range *reg_ranges;
+       struct kvm_vcpu *redist_vcpu;
+       struct kvm_io_device dev;
+};
+
 struct vgic_dist {
-#ifdef CONFIG_KVM_ARM_VGIC
        spinlock_t              lock;
        bool                    in_kernel;
        bool                    ready;
@@ -196,6 +203,9 @@ struct vgic_dist {
        /* Level-triggered interrupt queued on VCPU interface */
        struct vgic_bitmap      irq_queued;
 
+       /* Interrupt was active when unqueue from VCPU interface */
+       struct vgic_bitmap      irq_active;
+
        /* Interrupt priority. Not used yet. */
        struct vgic_bytemap     irq_priority;
 
@@ -236,8 +246,12 @@ struct vgic_dist {
        /* Bitmap indicating which CPU has something pending */
        unsigned long           *irq_pending_on_cpu;
 
+       /* Bitmap indicating which CPU has active IRQs */
+       unsigned long           *irq_active_on_cpu;
+
        struct vgic_vm_ops      vm_ops;
-#endif
+       struct vgic_io_device   dist_iodev;
+       struct vgic_io_device   *redist_iodevs;
 };
 
 struct vgic_v2_cpu_if {
@@ -265,13 +279,18 @@ struct vgic_v3_cpu_if {
 };
 
 struct vgic_cpu {
-#ifdef CONFIG_KVM_ARM_VGIC
        /* per IRQ to LR mapping */
        u8              *vgic_irq_lr_map;
 
-       /* Pending interrupts on this VCPU */
+       /* Pending/active/both interrupts on this VCPU */
        DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS);
+       DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS);
+       DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS);
+
+       /* Pending/active/both shared interrupts, dynamically sized */
        unsigned long   *pending_shared;
+       unsigned long   *active_shared;
+       unsigned long   *pend_act_shared;
 
        /* Bitmap of used/free list registers */
        DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS);
@@ -284,7 +303,6 @@ struct vgic_cpu {
                struct vgic_v2_cpu_if   vgic_v2;
                struct vgic_v3_cpu_if   vgic_v3;
        };
-#endif
 };
 
 #define LR_EMPTY       0xff
@@ -294,10 +312,7 @@ struct vgic_cpu {
 
 struct kvm;
 struct kvm_vcpu;
-struct kvm_run;
-struct kvm_exit_mmio;
 
-#ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
 int kvm_vgic_map_resources(struct kvm *kvm);
@@ -311,8 +326,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
                        bool level);
 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                     struct kvm_exit_mmio *mmio);
+int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 
 #define irqchip_in_kernel(k)   (!!((k)->arch.vgic.in_kernel))
 #define vgic_initialized(k)    (!!((k)->arch.vgic.nr_cpus))
@@ -334,84 +348,4 @@ static inline int vgic_v3_probe(struct device_node *vgic_node,
 }
 #endif
 
-#else
-static inline int kvm_vgic_hyp_init(void)
-{
-       return 0;
-}
-
-static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
-{
-       return 0;
-}
-
-static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
-{
-       return -ENXIO;
-}
-
-static inline int kvm_vgic_map_resources(struct kvm *kvm)
-{
-       return 0;
-}
-
-static inline int kvm_vgic_create(struct kvm *kvm, u32 type)
-{
-       return 0;
-}
-
-static inline void kvm_vgic_destroy(struct kvm *kvm)
-{
-}
-
-static inline void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-}
-
-static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
-static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {}
-static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {}
-
-static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid,
-                                     unsigned int irq_num, bool level)
-{
-       return 0;
-}
-
-static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
-{
-       return 0;
-}
-
-static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                                   struct kvm_exit_mmio *mmio)
-{
-       return false;
-}
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-       return 0;
-}
-
-static inline bool vgic_initialized(struct kvm *kvm)
-{
-       return true;
-}
-
-static inline bool vgic_ready(struct kvm *kvm)
-{
-       return true;
-}
-
-static inline int kvm_vgic_get_max_vcpus(void)
-{
-       return KVM_MAX_VCPUS;
-}
-#endif
-
 #endif
similarity index 66%
rename from virt/kvm/iodev.h
rename to include/kvm/iodev.h
index 12fd3ca..a6d208b 100644 (file)
@@ -9,17 +9,17 @@
  * GNU General Public License for more details.
  *
  * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __KVM_IODEV_H__
 #define __KVM_IODEV_H__
 
 #include <linux/kvm_types.h>
-#include <asm/errno.h>
+#include <linux/errno.h>
 
 struct kvm_io_device;
+struct kvm_vcpu;
 
 /**
  * kvm_io_device_ops are called under kvm slots_lock.
@@ -27,11 +27,13 @@ struct kvm_io_device;
  * or non-zero to have it passed to the next device.
  **/
 struct kvm_io_device_ops {
-       int (*read)(struct kvm_io_device *this,
+       int (*read)(struct kvm_vcpu *vcpu,
+                   struct kvm_io_device *this,
                    gpa_t addr,
                    int len,
                    void *val);
-       int (*write)(struct kvm_io_device *this,
+       int (*write)(struct kvm_vcpu *vcpu,
+                    struct kvm_io_device *this,
                     gpa_t addr,
                     int len,
                     const void *val);
@@ -49,16 +51,20 @@ static inline void kvm_iodevice_init(struct kvm_io_device *dev,
        dev->ops = ops;
 }
 
-static inline int kvm_iodevice_read(struct kvm_io_device *dev,
-                                   gpa_t addr, int l, void *v)
+static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu,
+                                   struct kvm_io_device *dev, gpa_t addr,
+                                   int l, void *v)
 {
-       return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP;
+       return dev->ops->read ? dev->ops->read(vcpu, dev, addr, l, v)
+                               : -EOPNOTSUPP;
 }
 
-static inline int kvm_iodevice_write(struct kvm_io_device *dev,
-                                    gpa_t addr, int l, const void *v)
+static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu,
+                                    struct kvm_io_device *dev, gpa_t addr,
+                                    int l, const void *v)
 {
-       return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP;
+       return dev->ops->write ? dev->ops->write(vcpu, dev, addr, l, v)
+                                : -EOPNOTSUPP;
 }
 
 static inline void kvm_iodevice_destructor(struct kvm_io_device *dev)
index c294e3e..a1b25e3 100644 (file)
@@ -181,7 +181,9 @@ enum rq_flag_bits {
        __REQ_ELVPRIV,          /* elevator private data attached */
        __REQ_FAILED,           /* set if the request failed */
        __REQ_QUIET,            /* don't worry about errors */
-       __REQ_PREEMPT,          /* set for "ide_preempt" requests */
+       __REQ_PREEMPT,          /* set for "ide_preempt" requests and also
+                                  for requests for which the SCSI "quiesce"
+                                  state must be ignored. */
        __REQ_ALLOCED,          /* request came from our alloc pool */
        __REQ_COPY_USER,        /* contains copies of user pages */
        __REQ_FLUSH_SEQ,        /* request for flush sequence */
index 8381bbf..68c16a6 100644 (file)
@@ -125,6 +125,19 @@ int clk_set_phase(struct clk *clk, int degrees);
  */
 int clk_get_phase(struct clk *clk);
 
+/**
+ * clk_is_match - check if two clk's point to the same hardware clock
+ * @p: clk compared against q
+ * @q: clk compared against p
+ *
+ * Returns true if the two struct clk pointers both point to the same hardware
+ * clock node. Put differently, returns true if struct clk *p and struct clk *q
+ * share the same struct clk_core object.
+ *
+ * Returns false otherwise. Note that two NULL clks are treated as matching.
+ */
+bool clk_is_match(const struct clk *p, const struct clk *q);
+
 #else
 
 static inline long clk_get_accuracy(struct clk *clk)
@@ -142,6 +155,11 @@ static inline long clk_get_phase(struct clk *clk)
        return -ENOTSUPP;
 }
 
+static inline bool clk_is_match(const struct clk *p, const struct clk *q)
+{
+       return p == q;
+}
+
 #endif
 
 /**
index 2e4cb67..96c280b 100644 (file)
@@ -8,64 +8,69 @@
 #ifndef _LINUX_CLOCKCHIPS_H
 #define _LINUX_CLOCKCHIPS_H
 
-/* Clock event notification values */
-enum clock_event_nofitiers {
-       CLOCK_EVT_NOTIFY_ADD,
-       CLOCK_EVT_NOTIFY_BROADCAST_ON,
-       CLOCK_EVT_NOTIFY_BROADCAST_OFF,
-       CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
-       CLOCK_EVT_NOTIFY_BROADCAST_ENTER,
-       CLOCK_EVT_NOTIFY_BROADCAST_EXIT,
-       CLOCK_EVT_NOTIFY_SUSPEND,
-       CLOCK_EVT_NOTIFY_RESUME,
-       CLOCK_EVT_NOTIFY_CPU_DYING,
-       CLOCK_EVT_NOTIFY_CPU_DEAD,
-};
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 
-#include <linux/clocksource.h>
-#include <linux/cpumask.h>
-#include <linux/ktime.h>
-#include <linux/notifier.h>
+# include <linux/clocksource.h>
+# include <linux/cpumask.h>
+# include <linux/ktime.h>
+# include <linux/notifier.h>
 
 struct clock_event_device;
 struct module;
 
-/* Clock event mode commands */
+/* Clock event mode commands for legacy ->set_mode(): OBSOLETE */
 enum clock_event_mode {
-       CLOCK_EVT_MODE_UNUSED = 0,
+       CLOCK_EVT_MODE_UNUSED,
        CLOCK_EVT_MODE_SHUTDOWN,
        CLOCK_EVT_MODE_PERIODIC,
        CLOCK_EVT_MODE_ONESHOT,
        CLOCK_EVT_MODE_RESUME,
 };
 
+/*
+ * Possible states of a clock event device.
+ *
+ * DETACHED:   Device is not used by clockevents core. Initial state or can be
+ *             reached from SHUTDOWN.
+ * SHUTDOWN:   Device is powered-off. Can be reached from PERIODIC or ONESHOT.
+ * PERIODIC:   Device is programmed to generate events periodically. Can be
+ *             reached from DETACHED or SHUTDOWN.
+ * ONESHOT:    Device is programmed to generate event only once. Can be reached
+ *             from DETACHED or SHUTDOWN.
+ */
+enum clock_event_state {
+       CLOCK_EVT_STATE_DETACHED,
+       CLOCK_EVT_STATE_SHUTDOWN,
+       CLOCK_EVT_STATE_PERIODIC,
+       CLOCK_EVT_STATE_ONESHOT,
+};
+
 /*
  * Clock event features
  */
-#define CLOCK_EVT_FEAT_PERIODIC                0x000001
-#define CLOCK_EVT_FEAT_ONESHOT         0x000002
-#define CLOCK_EVT_FEAT_KTIME           0x000004
+# define CLOCK_EVT_FEAT_PERIODIC       0x000001
+# define CLOCK_EVT_FEAT_ONESHOT                0x000002
+# define CLOCK_EVT_FEAT_KTIME          0x000004
+
 /*
- * x86(64) specific misfeatures:
+ * x86(64) specific (mis)features:
  *
  * - Clockevent source stops in C3 State and needs broadcast support.
  * - Local APIC timer is used as a dummy device.
  */
-#define CLOCK_EVT_FEAT_C3STOP          0x000008
-#define CLOCK_EVT_FEAT_DUMMY           0x000010
+# define CLOCK_EVT_FEAT_C3STOP         0x000008
+# define CLOCK_EVT_FEAT_DUMMY          0x000010
 
 /*
  * Core shall set the interrupt affinity dynamically in broadcast mode
  */
-#define CLOCK_EVT_FEAT_DYNIRQ          0x000020
-#define CLOCK_EVT_FEAT_PERCPU          0x000040
+# define CLOCK_EVT_FEAT_DYNIRQ         0x000020
+# define CLOCK_EVT_FEAT_PERCPU         0x000040
 
 /*
  * Clockevent device is based on a hrtimer for broadcast
  */
-#define CLOCK_EVT_FEAT_HRTIMER         0x000080
+# define CLOCK_EVT_FEAT_HRTIMER                0x000080
 
 /**
  * struct clock_event_device - clock event device descriptor
@@ -78,10 +83,15 @@ enum clock_event_mode {
  * @min_delta_ns:      minimum delta value in ns
  * @mult:              nanosecond to cycles multiplier
  * @shift:             nanoseconds to cycles divisor (power of two)
- * @mode:              operating mode assigned by the management code
+ * @mode:              operating mode, relevant only to ->set_mode(), OBSOLETE
+ * @state:             current state of the device, assigned by the core code
  * @features:          features
  * @retries:           number of forced programming retries
- * @set_mode:          set mode function
+ * @set_mode:          legacy set mode function, only for modes <= CLOCK_EVT_MODE_RESUME.
+ * @set_state_periodic:        switch state to periodic, if !set_mode
+ * @set_state_oneshot: switch state to oneshot, if !set_mode
+ * @set_state_shutdown:        switch state to shutdown, if !set_mode
+ * @tick_resume:       resume clkevt device, if !set_mode
  * @broadcast:         function to broadcast events
  * @min_delta_ticks:   minimum delta value in ticks stored for reconfiguration
  * @max_delta_ticks:   maximum delta value in ticks stored for reconfiguration
@@ -95,22 +105,31 @@ enum clock_event_mode {
  */
 struct clock_event_device {
        void                    (*event_handler)(struct clock_event_device *);
-       int                     (*set_next_event)(unsigned long evt,
-                                                 struct clock_event_device *);
-       int                     (*set_next_ktime)(ktime_t expires,
-                                                 struct clock_event_device *);
+       int                     (*set_next_event)(unsigned long evt, struct clock_event_device *);
+       int                     (*set_next_ktime)(ktime_t expires, struct clock_event_device *);
        ktime_t                 next_event;
        u64                     max_delta_ns;
        u64                     min_delta_ns;
        u32                     mult;
        u32                     shift;
        enum clock_event_mode   mode;
+       enum clock_event_state  state;
        unsigned int            features;
        unsigned long           retries;
 
+       /*
+        * State transition callback(s): Only one of the two groups should be
+        * defined:
+        * - set_mode(), only for modes <= CLOCK_EVT_MODE_RESUME.
+        * - set_state_{shutdown|periodic|oneshot}(), tick_resume().
+        */
+       void                    (*set_mode)(enum clock_event_mode mode, struct clock_event_device *);
+       int                     (*set_state_periodic)(struct clock_event_device *);
+       int                     (*set_state_oneshot)(struct clock_event_device *);
+       int                     (*set_state_shutdown)(struct clock_event_device *);
+       int                     (*tick_resume)(struct clock_event_device *);
+
        void                    (*broadcast)(const struct cpumask *mask);
-       void                    (*set_mode)(enum clock_event_mode mode,
-                                           struct clock_event_device *);
        void                    (*suspend)(struct clock_event_device *);
        void                    (*resume)(struct clock_event_device *);
        unsigned long           min_delta_ticks;
@@ -136,18 +155,18 @@ struct clock_event_device {
  *
  * factor = (clock_ticks << shift) / nanoseconds
  */
-static inline unsigned long div_sc(unsigned long ticks, unsigned long nsec,
-                                  int shift)
+static inline unsigned long
+div_sc(unsigned long ticks, unsigned long nsec, int shift)
 {
-       uint64_t tmp = ((uint64_t)ticks) << shift;
+       u64 tmp = ((u64)ticks) << shift;
 
        do_div(tmp, nsec);
+
        return (unsigned long) tmp;
 }
 
 /* Clock event layer functions */
-extern u64 clockevent_delta2ns(unsigned long latch,
-                              struct clock_event_device *evt);
+extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
 extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu);
 
@@ -158,57 +177,42 @@ extern void clockevents_config_and_register(struct clock_event_device *dev,
 
 extern int clockevents_update_freq(struct clock_event_device *ce, u32 freq);
 
-extern void clockevents_exchange_device(struct clock_event_device *old,
-                                       struct clock_event_device *new);
-extern void clockevents_set_mode(struct clock_event_device *dev,
-                                enum clock_event_mode mode);
-extern int clockevents_program_event(struct clock_event_device *dev,
-                                    ktime_t expires, bool force);
-
-extern void clockevents_handle_noop(struct clock_event_device *dev);
-
 static inline void
 clockevents_calc_mult_shift(struct clock_event_device *ce, u32 freq, u32 minsec)
 {
-       return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC,
-                                     freq, minsec);
+       return clocks_calc_mult_shift(&ce->mult, &ce->shift, NSEC_PER_SEC, freq, minsec);
 }
 
 extern void clockevents_suspend(void);
 extern void clockevents_resume(void);
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-#ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+#  ifdef CONFIG_ARCH_HAS_TICK_BROADCAST
 extern void tick_broadcast(const struct cpumask *mask);
-#else
-#define tick_broadcast NULL
-#endif
+#  else
+#   define tick_broadcast      NULL
+#  endif
 extern int tick_receive_broadcast(void);
-#endif
+# endif
 
-#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+# if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
 extern void tick_setup_hrtimer_broadcast(void);
 extern int tick_check_broadcast_expired(void);
-#else
+# else
 static inline int tick_check_broadcast_expired(void) { return 0; }
-static inline void tick_setup_hrtimer_broadcast(void) {};
-#endif
+static inline void tick_setup_hrtimer_broadcast(void) { }
+# endif
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
 extern int clockevents_notify(unsigned long reason, void *arg);
-#else
-static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; }
-#endif
-
-#else /* CONFIG_GENERIC_CLOCKEVENTS_BUILD */
 
-static inline void clockevents_suspend(void) {}
-static inline void clockevents_resume(void) {}
+#else /* !CONFIG_GENERIC_CLOCKEVENTS: */
 
+static inline void clockevents_suspend(void) { }
+static inline void clockevents_resume(void) { }
 static inline int clockevents_notify(unsigned long reason, void *arg) { return 0; }
 static inline int tick_check_broadcast_expired(void) { return 0; }
-static inline void tick_setup_hrtimer_broadcast(void) {};
+static inline void tick_setup_hrtimer_broadcast(void) { }
 
-#endif
+#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
-#endif
+#endif /* _LINUX_CLOCKCHIPS_H */
index 9c78d15..1355098 100644 (file)
@@ -56,6 +56,7 @@ struct module;
  * @shift:             cycle to nanosecond divisor (power of two)
  * @max_idle_ns:       max idle time permitted by the clocksource (nsecs)
  * @maxadj:            maximum adjustment value to mult (~11%)
+ * @max_cycles:                maximum safe cycle value which won't overflow on multiplication
  * @flags:             flags describing special properties
  * @archdata:          arch-specific data
  * @suspend:           suspend function for the clocksource, if necessary
@@ -76,7 +77,7 @@ struct clocksource {
 #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA
        struct arch_clocksource_data archdata;
 #endif
-
+       u64 max_cycles;
        const char *name;
        struct list_head list;
        int rating;
@@ -178,7 +179,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
 }
 
 
-extern int clocksource_register(struct clocksource*);
 extern int clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
 extern struct clocksource* clocksource_get_next(void);
@@ -189,7 +189,7 @@ extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
 extern u64
-clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask);
+clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cycles);
 extern void
 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 
@@ -200,7 +200,16 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 extern int
 __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
 extern void
-__clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq);
+__clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq);
+
+/*
+ * Don't call this unless you are a default clocksource
+ * (AKA: jiffies) and absolutely have to.
+ */
+static inline int __clocksource_register(struct clocksource *cs)
+{
+       return __clocksource_register_scale(cs, 1, 0);
+}
 
 static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
 {
@@ -212,14 +221,14 @@ static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
        return __clocksource_register_scale(cs, 1000, khz);
 }
 
-static inline void __clocksource_updatefreq_hz(struct clocksource *cs, u32 hz)
+static inline void __clocksource_update_freq_hz(struct clocksource *cs, u32 hz)
 {
-       __clocksource_updatefreq_scale(cs, 1, hz);
+       __clocksource_update_freq_scale(cs, 1, hz);
 }
 
-static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
+static inline void __clocksource_update_freq_khz(struct clocksource *cs, u32 khz)
 {
-       __clocksource_updatefreq_scale(cs, 1000, khz);
+       __clocksource_update_freq_scale(cs, 1000, khz);
 }
 
 
index 1b45e4a..0e41ca0 100644 (file)
@@ -192,29 +192,16 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 
 #include <uapi/linux/types.h>
 
-static __always_inline void data_access_exceeds_word_size(void)
-#ifdef __compiletime_warning
-__compiletime_warning("data access exceeds word size and won't be atomic")
-#endif
-;
-
-static __always_inline void data_access_exceeds_word_size(void)
-{
-}
-
 static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
 {
        switch (size) {
        case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
        case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
        case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
-#ifdef CONFIG_64BIT
        case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
-#endif
        default:
                barrier();
                __builtin_memcpy((void *)res, (const void *)p, size);
-               data_access_exceeds_word_size();
                barrier();
        }
 }
@@ -225,13 +212,10 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
        case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
        case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
        case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
-#ifdef CONFIG_64BIT
        case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
-#endif
        default:
                barrier();
                __builtin_memcpy((void *)p, (const void *)res, size);
-               data_access_exceeds_word_size();
                barrier();
        }
 }
index 306178d..9c5e892 100644 (file)
@@ -77,7 +77,6 @@ struct cpuidle_device {
        unsigned int            cpu;
 
        int                     last_residency;
-       int                     state_count;
        struct cpuidle_state_usage      states_usage[CPUIDLE_STATE_MAX];
        struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX];
        struct cpuidle_driver_kobj *kobj_driver;
index 2646aed..fd23978 100644 (file)
@@ -375,6 +375,7 @@ int dm_create(int minor, struct mapped_device **md);
  */
 struct mapped_device *dm_get_md(dev_t dev);
 void dm_get(struct mapped_device *md);
+int dm_hold(struct mapped_device *md);
 void dm_put(struct mapped_device *md);
 
 /*
index 022e34f..52456aa 100644 (file)
@@ -14,6 +14,8 @@
 #include <asm/io.h>
 #include <asm/scatterlist.h>
 
+struct device;
+
 struct dma_pool *dma_pool_create(const char *name, struct device *dev, 
                        size_t size, size_t align, size_t allocation);
 
index cf7e431..af5be03 100644 (file)
@@ -942,6 +942,7 @@ extern int __init efi_setup_pcdp_console(char *);
 #define EFI_64BIT              5       /* Is the firmware 64-bit? */
 #define EFI_PARAVIRT           6       /* Access is via a paravirt interface */
 #define EFI_ARCH_1             7       /* First arch-specific bit */
+#define EFI_DBG                        8       /* Print additional debug info at runtime */
 
 #ifdef CONFIG_EFI
 /*
index b4d71b5..52cc449 100644 (file)
@@ -604,6 +604,7 @@ struct inode {
        struct mutex            i_mutex;
 
        unsigned long           dirtied_when;   /* jiffies of first dirtying */
+       unsigned long           dirtied_time_when;
 
        struct hlist_node       i_hash;
        struct list_head        i_wb_list;      /* backing dev IO list */
@@ -1548,7 +1549,7 @@ struct file_operations {
        long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
        long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
        int (*mmap) (struct file *, struct vm_area_struct *);
-       void (*mremap)(struct file *, struct vm_area_struct *);
+       int (*mremap)(struct file *, struct vm_area_struct *);
        int (*open) (struct inode *, struct file *);
        int (*flush) (struct file *, fl_owner_t id);
        int (*release) (struct inode *, struct file *);
index bf3fe71..47b9ebd 100644 (file)
@@ -38,16 +38,17 @@ bool irq_work_queue(struct irq_work *work);
 bool irq_work_queue_on(struct irq_work *work, int cpu);
 #endif
 
-void irq_work_run(void);
 void irq_work_tick(void);
 void irq_work_sync(struct irq_work *work);
 
 #ifdef CONFIG_IRQ_WORK
 #include <asm/irq_work.h>
 
+void irq_work_run(void);
 bool irq_work_needs_cpu(void);
 #else
 static inline bool irq_work_needs_cpu(void) { return false; }
+static inline void irq_work_run(void) { }
 #endif
 
 #endif /* _LINUX_IRQ_WORK_H */
index 800544b..ffbc034 100644 (file)
 #define GICR_PROPBASER_WaWb            (5U << 7)
 #define GICR_PROPBASER_RaWaWt          (6U << 7)
 #define GICR_PROPBASER_RaWaWb          (7U << 7)
+#define GICR_PROPBASER_CACHEABILITY_MASK (7U << 7)
 #define GICR_PROPBASER_IDBITS_MASK     (0x1f)
 
+#define GICR_PENDBASER_NonShareable    (0U << 10)
+#define GICR_PENDBASER_InnerShareable  (1U << 10)
+#define GICR_PENDBASER_OuterShareable  (2U << 10)
+#define GICR_PENDBASER_SHAREABILITY_MASK (3UL << 10)
+#define GICR_PENDBASER_nCnB            (0U << 7)
+#define GICR_PENDBASER_nC              (1U << 7)
+#define GICR_PENDBASER_RaWt            (2U << 7)
+#define GICR_PENDBASER_RaWb            (3U << 7)
+#define GICR_PENDBASER_WaWt            (4U << 7)
+#define GICR_PENDBASER_WaWb            (5U << 7)
+#define GICR_PENDBASER_RaWaWt          (6U << 7)
+#define GICR_PENDBASER_RaWaWb          (7U << 7)
+#define GICR_PENDBASER_CACHEABILITY_MASK (7U << 7)
+
 /*
  * Re-Distributor registers, offsets from SGI_base
  */
 
 #define GITS_TRANSLATER                        0x10040
 
+#define GITS_CTLR_ENABLE               (1U << 0)
+#define GITS_CTLR_QUIESCENT            (1U << 31)
+
+#define GITS_TYPER_DEVBITS_SHIFT       13
+#define GITS_TYPER_DEVBITS(r)          ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
 #define GITS_TYPER_PTA                 (1UL << 19)
 
 #define GITS_CBASER_VALID              (1UL << 63)
 #define GITS_CBASER_WaWb               (5UL << 59)
 #define GITS_CBASER_RaWaWt             (6UL << 59)
 #define GITS_CBASER_RaWaWb             (7UL << 59)
+#define GITS_CBASER_CACHEABILITY_MASK  (7UL << 59)
 #define GITS_CBASER_NonShareable       (0UL << 10)
 #define GITS_CBASER_InnerShareable     (1UL << 10)
 #define GITS_CBASER_OuterShareable     (2UL << 10)
 #define GITS_BASER_WaWb                        (5UL << 59)
 #define GITS_BASER_RaWaWt              (6UL << 59)
 #define GITS_BASER_RaWaWb              (7UL << 59)
+#define GITS_BASER_CACHEABILITY_MASK   (7UL << 59)
 #define GITS_BASER_TYPE_SHIFT          (56)
 #define GITS_BASER_TYPE(r)             (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
 #define GITS_BASER_ENTRY_SIZE_SHIFT    (48)
index 98f923b..f4de473 100644 (file)
  * same as using STATIC_KEY_INIT_FALSE.
  */
 
+#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+# define HAVE_JUMP_LABEL
+#endif
+
+#ifndef __ASSEMBLY__
+
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/bug.h>
@@ -55,7 +61,7 @@ extern bool static_key_initialized;
                                    "%s used before call to jump_label_init", \
                                    __func__)
 
-#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_JUMP_LABEL)
+#ifdef HAVE_JUMP_LABEL
 
 struct static_key {
        atomic_t enabled;
@@ -66,13 +72,18 @@ struct static_key {
 #endif
 };
 
-# include <asm/jump_label.h>
-# define HAVE_JUMP_LABEL
 #else
 struct static_key {
        atomic_t enabled;
 };
-#endif /* CC_HAVE_ASM_GOTO && CONFIG_JUMP_LABEL */
+#endif /* HAVE_JUMP_LABEL */
+#endif /* __ASSEMBLY__ */
+
+#ifdef HAVE_JUMP_LABEL
+#include <asm/jump_label.h>
+#endif
+
+#ifndef __ASSEMBLY__
 
 enum jump_label_type {
        JUMP_LABEL_DISABLE = 0,
@@ -203,3 +214,5 @@ static inline bool static_key_enabled(struct static_key *key)
 }
 
 #endif /* _LINUX_JUMP_LABEL_H */
+
+#endif /* __ASSEMBLY__ */
index 72ba725..5bb0744 100644 (file)
@@ -5,6 +5,7 @@
 
 struct kmem_cache;
 struct page;
+struct vm_struct;
 
 #ifdef CONFIG_KASAN
 
@@ -49,15 +50,11 @@ void kasan_krealloc(const void *object, size_t new_size);
 void kasan_slab_alloc(struct kmem_cache *s, void *object);
 void kasan_slab_free(struct kmem_cache *s, void *object);
 
-#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
-
 int kasan_module_alloc(void *addr, size_t size);
-void kasan_module_free(void *addr);
+void kasan_free_shadow(const struct vm_struct *vm);
 
 #else /* CONFIG_KASAN */
 
-#define MODULE_ALIGN 1
-
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
 static inline void kasan_enable_current(void) {}
@@ -82,7 +79,7 @@ static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
 static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
 
 static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
-static inline void kasan_module_free(void *addr) {}
+static inline void kasan_free_shadow(const struct vm_struct *vm) {}
 
 #endif /* CONFIG_KASAN */
 
index d12b210..82af5d0 100644 (file)
@@ -165,12 +165,12 @@ enum kvm_bus {
        KVM_NR_BUSES
 };
 
-int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
                     int len, const void *val);
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-                           int len, const void *val, long cookie);
-int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
-                   void *val);
+int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
+                           gpa_t addr, int len, const void *val, long cookie);
+int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
+                   int len, void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                            int len, struct kvm_io_device *dev);
 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
@@ -658,7 +658,6 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
 
 void *kvm_kvzalloc(unsigned long size);
-void kvm_kvfree(const void *addr);
 
 #ifndef __KVM_HAVE_ARCH_VM_ALLOC
 static inline struct kvm *kvm_arch_alloc_vm(void)
@@ -700,6 +699,20 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu)
 #endif
 }
 
+#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED
+/*
+ * returns true if the virtual interrupt controller is initialized and
+ * ready to accept virtual IRQ. On some architectures the virtual interrupt
+ * controller is dynamically instantiated and this is not always true.
+ */
+bool kvm_arch_intc_initialized(struct kvm *kvm);
+#else
+static inline bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+       return true;
+}
+#endif
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type);
 void kvm_arch_destroy_vm(struct kvm *kvm);
 void kvm_arch_sync_events(struct kvm *kvm);
@@ -969,11 +982,16 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 #endif /* CONFIG_HAVE_KVM_EVENTFD */
 
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
-static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
 {
        return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
 }
 
+static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+{
+       return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
+}
+
 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);
 
 #else
index 7bf01d7..1ce79a7 100644 (file)
@@ -4,5 +4,6 @@
 #include <linux/compiler.h>
 
 unsigned long lcm(unsigned long a, unsigned long b) __attribute_const__;
+unsigned long lcm_not_zero(unsigned long a, unsigned long b) __attribute_const__;
 
 #endif /* _LCM_H */
index fc03efa..6b08cc1 100644 (file)
@@ -232,6 +232,7 @@ enum {
                                              * led */
        ATA_FLAG_NO_DIPM        = (1 << 23), /* host not happy with DIPM */
        ATA_FLAG_LOWTAG         = (1 << 24), /* host wants lowest available tag */
+       ATA_FLAG_SAS_HOST       = (1 << 25), /* SAS host */
 
        /* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
index fb0390a..ee7b1ce 100644 (file)
@@ -2999,6 +2999,9 @@ enum usb_irq_events {
 #define PALMAS_GPADC_TRIM15                                    0x0E
 #define PALMAS_GPADC_TRIM16                                    0x0F
 
+/* TPS659038 regen2_ctrl offset iss different from palmas */
+#define TPS659038_REGEN2_CTRL                                  0x12
+
 /* TPS65917 Interrupt registers */
 
 /* Registers for function INTERRUPT */
index f279d9c..2782df4 100644 (file)
@@ -474,16 +474,15 @@ struct zone {
        unsigned long           wait_table_bits;
 
        ZONE_PADDING(_pad1_)
-
-       /* Write-intensive fields used from the page allocator */
-       spinlock_t              lock;
-
        /* free areas of different sizes */
        struct free_area        free_area[MAX_ORDER];
 
        /* zone flags, see below */
        unsigned long           flags;
 
+       /* Write-intensive fields used from the page allocator */
+       spinlock_t              lock;
+
        ZONE_PADDING(_pad2_)
 
        /* Write-intensive fields used by page reclaim */
index 42999fe..b03485b 100644 (file)
@@ -344,6 +344,10 @@ struct module {
        unsigned long *ftrace_callsites;
 #endif
 
+#ifdef CONFIG_LIVEPATCH
+       bool klp_alive;
+#endif
+
 #ifdef CONFIG_MODULE_UNLOAD
        /* What modules depend on me? */
        struct list_head source_list;
index f755626..4d0cb9b 100644 (file)
@@ -84,4 +84,12 @@ void module_arch_cleanup(struct module *mod);
 
 /* Any cleanup before freeing mod->module_init */
 void module_arch_freeing_init(struct module *mod);
+
+#ifdef CONFIG_KASAN
+#include <linux/kasan.h>
+#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+#else
+#define MODULE_ALIGN PAGE_SIZE
+#endif
+
 #endif
index 429d179..2787388 100644 (file)
@@ -965,9 +965,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *     Used to add FDB entries to dump requests. Implementers should add
  *     entries to skb and update idx with the number of entries.
  *
- * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh)
+ * int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh,
+ *                          u16 flags)
  * int (*ndo_bridge_getlink)(struct sk_buff *skb, u32 pid, u32 seq,
  *                          struct net_device *dev, u32 filter_mask)
+ * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
+ *                          u16 flags);
  *
  * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
  *     Called to change device carrier. Soft-devices (like dummy, team, etc)
@@ -2182,6 +2185,12 @@ void netdev_freemem(struct net_device *dev);
 void synchronize_net(void);
 int init_dummy_netdev(struct net_device *dev);
 
+DECLARE_PER_CPU(int, xmit_recursion);
+static inline int dev_recursion_level(void)
+{
+       return this_cpu_read(xmit_recursion);
+}
+
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
index 8a860f0..611a691 100644 (file)
@@ -84,7 +84,7 @@ static inline int of_platform_populate(struct device_node *root,
 static inline void of_platform_depopulate(struct device *parent) { }
 #endif
 
-#ifdef CONFIG_OF_DYNAMIC
+#if defined(CONFIG_OF_DYNAMIC) && defined(CONFIG_OF_ADDRESS)
 extern void of_platform_register_reconfig_notifier(void);
 #else
 static inline void of_platform_register_reconfig_notifier(void) { }
index 72c0415..18eccef 100644 (file)
@@ -82,7 +82,7 @@ static inline int pinctrl_gpio_direction_output(unsigned gpio)
 
 static inline struct pinctrl * __must_check pinctrl_get(struct device *dev)
 {
-       return ERR_PTR(-ENOSYS);
+       return NULL;
 }
 
 static inline void pinctrl_put(struct pinctrl *p)
@@ -93,7 +93,7 @@ static inline struct pinctrl_state * __must_check pinctrl_lookup_state(
                                                        struct pinctrl *p,
                                                        const char *name)
 {
-       return ERR_PTR(-ENOSYS);
+       return NULL;
 }
 
 static inline int pinctrl_select_state(struct pinctrl *p,
@@ -104,7 +104,7 @@ static inline int pinctrl_select_state(struct pinctrl *p,
 
 static inline struct pinctrl * __must_check devm_pinctrl_get(struct device *dev)
 {
-       return ERR_PTR(-ENOSYS);
+       return NULL;
 }
 
 static inline void devm_pinctrl_put(struct pinctrl *p)
index d4ad5b5..045f709 100644 (file)
@@ -316,7 +316,7 @@ struct regulator_desc {
  * @driver_data: private regulator data
  * @of_node: OpenFirmware node to parse for device tree bindings (may be
  *           NULL).
- * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is
+ * @regmap: regmap to use for core regmap helpers if dev_get_regmap() is
  *          insufficient.
  * @ena_gpio_initialized: GPIO controlling regulator enable was properly
  *                        initialized, meaning that >= 0 is a valid gpio
index dcad7ee..8dcf682 100644 (file)
@@ -77,6 +77,7 @@ struct rtc_class_ops {
        int (*read_alarm)(struct device *, struct rtc_wkalrm *);
        int (*set_alarm)(struct device *, struct rtc_wkalrm *);
        int (*proc)(struct device *, struct seq_file *);
+       int (*set_mmss64)(struct device *, time64_t secs);
        int (*set_mmss)(struct device *, unsigned long secs);
        int (*read_callback)(struct device *, int data);
        int (*alarm_irq_enable)(struct device *, unsigned int enabled);
index 6d77432..3f33088 100644 (file)
@@ -176,6 +176,14 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
 extern void calc_global_load(unsigned long ticks);
 extern void update_cpu_load_nohz(void);
 
+/* Notifier for when a task gets migrated to a new CPU */
+struct task_migration_notifier {
+       struct task_struct *task;
+       int from_cpu;
+       int to_cpu;
+};
+extern void register_task_migration_notifier(struct notifier_block *n);
+
 extern unsigned long get_parent_ip(unsigned long addr);
 
 extern void dump_cpu_task(int cpu);
@@ -1115,15 +1123,28 @@ struct load_weight {
 };
 
 struct sched_avg {
+       u64 last_runnable_update;
+       s64 decay_count;
+       /*
+        * utilization_avg_contrib describes the amount of time that a
+        * sched_entity is running on a CPU. It is based on running_avg_sum
+        * and is scaled in the range [0..SCHED_LOAD_SCALE].
+        * load_avg_contrib described the amount of time that a sched_entity
+        * is runnable on a rq. It is based on both runnable_avg_sum and the
+        * weight of the task.
+        */
+       unsigned long load_avg_contrib, utilization_avg_contrib;
        /*
         * These sums represent an infinite geometric series and so are bound
         * above by 1024/(1-y).  Thus we only need a u32 to store them for all
         * choices of y < 1-2^(-32)*1024.
+        * running_avg_sum reflects the time that the sched_entity is
+        * effectively running on the CPU.
+        * runnable_avg_sum represents the amount of time a sched_entity is on
+        * a runqueue which includes the running time that is monitored by
+        * running_avg_sum.
         */
-       u32 runnable_avg_sum, runnable_avg_period;
-       u64 last_runnable_update;
-       s64 decay_count;
-       unsigned long load_avg_contrib;
+       u32 runnable_avg_sum, avg_period, running_avg_sum;
 };
 
 #ifdef CONFIG_SCHEDSTATS
@@ -1625,11 +1646,11 @@ struct task_struct {
 
        /*
         * numa_faults_locality tracks if faults recorded during the last
-        * scan window were remote/local. The task scan period is adapted
-        * based on the locality of the faults with different weights
-        * depending on whether they were shared or private faults
+        * scan window were remote/local or failed to migrate. The task scan
+        * period is adapted based on the locality of the faults with different
+        * weights depending on whether they were shared or private faults
         */
-       unsigned long numa_faults_locality[2];
+       unsigned long numa_faults_locality[3];
 
        unsigned long numa_pages_migrated;
 #endif /* CONFIG_NUMA_BALANCING */
@@ -1719,6 +1740,7 @@ struct task_struct {
 #define TNF_NO_GROUP   0x02
 #define TNF_SHARED     0x04
 #define TNF_FAULT_LOCAL        0x08
+#define TNF_MIGRATE_FAIL 0x10
 
 #ifdef CONFIG_NUMA_BALANCING
 extern void task_numa_fault(int last_node, int node, int pages, int flags);
index f5df8f6..5f68d0a 100644 (file)
@@ -108,7 +108,7 @@ static inline unsigned __read_seqcount_begin(const seqcount_t *s)
        unsigned ret;
 
 repeat:
-       ret = ACCESS_ONCE(s->sequence);
+       ret = READ_ONCE(s->sequence);
        if (unlikely(ret & 1)) {
                cpu_relax();
                goto repeat;
@@ -127,7 +127,7 @@ repeat:
  */
 static inline unsigned raw_read_seqcount(const seqcount_t *s)
 {
-       unsigned ret = ACCESS_ONCE(s->sequence);
+       unsigned ret = READ_ONCE(s->sequence);
        smp_rmb();
        return ret;
 }
@@ -179,7 +179,7 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s)
  */
 static inline unsigned raw_seqcount_begin(const seqcount_t *s)
 {
-       unsigned ret = ACCESS_ONCE(s->sequence);
+       unsigned ret = READ_ONCE(s->sequence);
        smp_rmb();
        return ret & ~1;
 }
index baf3e1d..d10965f 100644 (file)
@@ -143,13 +143,13 @@ struct uart_port {
        unsigned char           iotype;                 /* io access style */
        unsigned char           unused1;
 
-#define UPIO_PORT              (0)                     /* 8b I/O port access */
-#define UPIO_HUB6              (1)                     /* Hub6 ISA card */
-#define UPIO_MEM               (2)                     /* 8b MMIO access */
-#define UPIO_MEM32             (3)                     /* 32b little endian */
-#define UPIO_MEM32BE           (4)                     /* 32b big endian */
-#define UPIO_AU                        (5)                     /* Au1x00 and RT288x type IO */
-#define UPIO_TSI               (6)                     /* Tsi108/109 type IO */
+#define UPIO_PORT              (SERIAL_IO_PORT)        /* 8b I/O port access */
+#define UPIO_HUB6              (SERIAL_IO_HUB6)        /* Hub6 ISA card */
+#define UPIO_MEM               (SERIAL_IO_MEM)         /* 8b MMIO access */
+#define UPIO_MEM32             (SERIAL_IO_MEM32)       /* 32b little endian */
+#define UPIO_AU                        (SERIAL_IO_AU)          /* Au1x00 and RT288x type IO */
+#define UPIO_TSI               (SERIAL_IO_TSI)         /* Tsi108/109 type IO */
+#define UPIO_MEM32BE           (SERIAL_IO_MEM32BE)     /* 32b big endian */
 
        unsigned int            read_status_mask;       /* driver specific */
        unsigned int            ignore_status_mask;     /* driver specific */
index 30007af..f54d665 100644 (file)
@@ -948,6 +948,13 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
        to->l4_hash = from->l4_hash;
 };
 
+static inline void skb_sender_cpu_clear(struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+       skb->sender_cpu = 0;
+#endif
+}
+
 #ifdef NET_SKBUFF_DATA_USES_OFFSET
 static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
 {
index ed9489d..856d34d 100644 (file)
@@ -649,7 +649,7 @@ struct spi_transfer {
  * sequence completes.  On some systems, many such sequences can execute as
  * as single programmed DMA transfer.  On all systems, these messages are
  * queued, and might complete after transactions to other devices.  Messages
- * sent to a given spi_device are alway executed in FIFO order.
+ * sent to a given spi_device are always executed in FIFO order.
  *
  * The code that submits an spi_message (and its spi_transfers)
  * to the lower layers is responsible for managing its memory.
index f4aec0e..076af43 100644 (file)
@@ -19,3 +19,12 @@ enum {
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 #endif
 #endif
+
+/**
+ * offsetofend(TYPE, MEMBER)
+ *
+ * @TYPE: The type of the structure
+ * @MEMBER: The member within the structure to get the end offset of
+ */
+#define offsetofend(TYPE, MEMBER) \
+       (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
index c57d8ea..59a7889 100644 (file)
@@ -60,17 +60,17 @@ struct rpc_xprt;
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 void           rpc_register_sysctl(void);
 void           rpc_unregister_sysctl(void);
-int            sunrpc_debugfs_init(void);
+void           sunrpc_debugfs_init(void);
 void           sunrpc_debugfs_exit(void);
-int            rpc_clnt_debugfs_register(struct rpc_clnt *);
+void           rpc_clnt_debugfs_register(struct rpc_clnt *);
 void           rpc_clnt_debugfs_unregister(struct rpc_clnt *);
-int            rpc_xprt_debugfs_register(struct rpc_xprt *);
+void           rpc_xprt_debugfs_register(struct rpc_xprt *);
 void           rpc_xprt_debugfs_unregister(struct rpc_xprt *);
 #else
-static inline int
+static inline void
 sunrpc_debugfs_init(void)
 {
-       return 0;
+       return;
 }
 
 static inline void
@@ -79,10 +79,10 @@ sunrpc_debugfs_exit(void)
        return;
 }
 
-static inline int
+static inline void
 rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 {
-       return 0;
+       return;
 }
 
 static inline void
@@ -91,10 +91,10 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt)
        return;
 }
 
-static inline int
+static inline void
 rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
 {
-       return 0;
+       return;
 }
 
 static inline void
index 9c085dc..f8492da 100644 (file)
@@ -1,7 +1,5 @@
-/*  linux/include/linux/tick.h
- *
- *  This file contains the structure definitions for tick related functions
- *
+/*
+ * Tick related global functions
  */
 #ifndef _LINUX_TICK_H
 #define _LINUX_TICK_H
 #include <linux/clockchips.h>
 #include <linux/irqflags.h>
 #include <linux/percpu.h>
-#include <linux/hrtimer.h>
 #include <linux/context_tracking_state.h>
 #include <linux/cpumask.h>
 #include <linux/sched.h>
 
 #ifdef CONFIG_GENERIC_CLOCKEVENTS
-
-enum tick_device_mode {
-       TICKDEV_MODE_PERIODIC,
-       TICKDEV_MODE_ONESHOT,
-};
-
-struct tick_device {
-       struct clock_event_device *evtdev;
-       enum tick_device_mode mode;
-};
-
-enum tick_nohz_mode {
-       NOHZ_MODE_INACTIVE,
-       NOHZ_MODE_LOWRES,
-       NOHZ_MODE_HIGHRES,
-};
-
-/**
- * struct tick_sched - sched tick emulation and no idle tick control/stats
- * @sched_timer:       hrtimer to schedule the periodic tick in high
- *                     resolution mode
- * @last_tick:         Store the last tick expiry time when the tick
- *                     timer is modified for nohz sleeps. This is necessary
- *                     to resume the tick timer operation in the timeline
- *                     when the CPU returns from nohz sleep.
- * @tick_stopped:      Indicator that the idle tick has been stopped
- * @idle_jiffies:      jiffies at the entry to idle for idle time accounting
- * @idle_calls:                Total number of idle calls
- * @idle_sleeps:       Number of idle calls, where the sched tick was stopped
- * @idle_entrytime:    Time when the idle call was entered
- * @idle_waketime:     Time when the idle was interrupted
- * @idle_exittime:     Time when the idle state was left
- * @idle_sleeptime:    Sum of the time slept in idle with sched tick stopped
- * @iowait_sleeptime:  Sum of the time slept in idle with sched tick stopped, with IO outstanding
- * @sleep_length:      Duration of the current idle sleep
- * @do_timer_lst:      CPU was the last one doing do_timer before going idle
- */
-struct tick_sched {
-       struct hrtimer                  sched_timer;
-       unsigned long                   check_clocks;
-       enum tick_nohz_mode             nohz_mode;
-       ktime_t                         last_tick;
-       int                             inidle;
-       int                             tick_stopped;
-       unsigned long                   idle_jiffies;
-       unsigned long                   idle_calls;
-       unsigned long                   idle_sleeps;
-       int                             idle_active;
-       ktime_t                         idle_entrytime;
-       ktime_t                         idle_waketime;
-       ktime_t                         idle_exittime;
-       ktime_t                         idle_sleeptime;
-       ktime_t                         iowait_sleeptime;
-       ktime_t                         sleep_length;
-       unsigned long                   last_jiffies;
-       unsigned long                   next_jiffies;
-       ktime_t                         idle_expires;
-       int                             do_timer_last;
-};
-
 extern void __init tick_init(void);
-extern int tick_is_oneshot_available(void);
-extern struct tick_device *tick_get_device(int cpu);
-
 extern void tick_freeze(void);
 extern void tick_unfreeze(void);
+/* Should be core only, but ARM BL switcher requires it */
+extern void tick_suspend_local(void);
+/* Should be core only, but XEN resume magic and ARM BL switcher require it */
+extern void tick_resume_local(void);
+extern void tick_handover_do_timer(void);
+extern void tick_cleanup_dead_cpu(int cpu);
+#else /* CONFIG_GENERIC_CLOCKEVENTS */
+static inline void tick_init(void) { }
+static inline void tick_freeze(void) { }
+static inline void tick_unfreeze(void) { }
+static inline void tick_suspend_local(void) { }
+static inline void tick_resume_local(void) { }
+static inline void tick_handover_do_timer(void) { }
+static inline void tick_cleanup_dead_cpu(int cpu) { }
+#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
 
-# ifdef CONFIG_HIGH_RES_TIMERS
-extern int tick_init_highres(void);
-extern int tick_program_event(ktime_t expires, int force);
-extern void tick_setup_sched_timer(void);
-# endif
-
-# if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
-extern void tick_cancel_sched_timer(int cpu);
-# else
-static inline void tick_cancel_sched_timer(int cpu) { }
-# endif
-
-# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern struct tick_device *tick_get_broadcast_device(void);
-extern struct cpumask *tick_get_broadcast_mask(void);
-
-#  ifdef CONFIG_TICK_ONESHOT
-extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
-#  endif
-
-# endif /* BROADCAST */
-
-# ifdef CONFIG_TICK_ONESHOT
-extern void tick_clock_notify(void);
-extern int tick_check_oneshot_change(int allow_nohz);
-extern struct tick_sched *tick_get_tick_sched(int cpu);
+#ifdef CONFIG_TICK_ONESHOT
 extern void tick_irq_enter(void);
-extern int tick_oneshot_mode_active(void);
 #  ifndef arch_needs_cpu
 #   define arch_needs_cpu() (0)
 #  endif
 # else
-static inline void tick_clock_notify(void) { }
-static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
 static inline void tick_irq_enter(void) { }
-static inline int tick_oneshot_mode_active(void) { return 0; }
-# endif
+#endif
 
-#else /* CONFIG_GENERIC_CLOCKEVENTS */
-static inline void tick_init(void) { }
-static inline void tick_freeze(void) { }
-static inline void tick_unfreeze(void) { }
-static inline void tick_cancel_sched_timer(int cpu) { }
-static inline void tick_clock_notify(void) { }
-static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
-static inline void tick_irq_enter(void) { }
-static inline int tick_oneshot_mode_active(void) { return 0; }
-#endif /* !CONFIG_GENERIC_CLOCKEVENTS */
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+extern void hotplug_cpu__broadcast_tick_pull(int dead_cpu);
+#else
+static inline void hotplug_cpu__broadcast_tick_pull(int dead_cpu) { }
+#endif
 
-# ifdef CONFIG_NO_HZ_COMMON
-DECLARE_PER_CPU(struct tick_sched, tick_cpu_sched);
+enum tick_broadcast_mode {
+       TICK_BROADCAST_OFF,
+       TICK_BROADCAST_ON,
+       TICK_BROADCAST_FORCE,
+};
+
+enum tick_broadcast_state {
+       TICK_BROADCAST_EXIT,
+       TICK_BROADCAST_ENTER,
+};
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern void tick_broadcast_control(enum tick_broadcast_mode mode);
+#else
+static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { }
+#endif /* BROADCAST */
+
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
+extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state);
+#else
+static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; }
+#endif
 
-static inline int tick_nohz_tick_stopped(void)
+static inline void tick_broadcast_enable(void)
+{
+       tick_broadcast_control(TICK_BROADCAST_ON);
+}
+static inline void tick_broadcast_disable(void)
+{
+       tick_broadcast_control(TICK_BROADCAST_OFF);
+}
+static inline void tick_broadcast_force(void)
+{
+       tick_broadcast_control(TICK_BROADCAST_FORCE);
+}
+static inline int tick_broadcast_enter(void)
 {
-       return __this_cpu_read(tick_cpu_sched.tick_stopped);
+       return tick_broadcast_oneshot_control(TICK_BROADCAST_ENTER);
+}
+static inline void tick_broadcast_exit(void)
+{
+       tick_broadcast_oneshot_control(TICK_BROADCAST_EXIT);
 }
 
+#ifdef CONFIG_NO_HZ_COMMON
+extern int tick_nohz_tick_stopped(void);
 extern void tick_nohz_idle_enter(void);
 extern void tick_nohz_idle_exit(void);
 extern void tick_nohz_irq_exit(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
 extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
-
-# else /* !CONFIG_NO_HZ_COMMON */
-static inline int tick_nohz_tick_stopped(void)
-{
-       return 0;
-}
-
+#else /* !CONFIG_NO_HZ_COMMON */
+static inline int tick_nohz_tick_stopped(void) { return 0; }
 static inline void tick_nohz_idle_enter(void) { }
 static inline void tick_nohz_idle_exit(void) { }
 
@@ -163,7 +111,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void)
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
 static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
-# endif /* !CONFIG_NO_HZ_COMMON */
+#endif /* !CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
 extern bool tick_nohz_full_running;
index 05af9a3..fb86963 100644 (file)
  * @read:      Read function of @clock
  * @mask:      Bitmask for two's complement subtraction of non 64bit clocks
  * @cycle_last: @clock cycle value at last update
- * @mult:      NTP adjusted multiplier for scaled math conversion
+ * @mult:      (NTP adjusted) multiplier for scaled math conversion
  * @shift:     Shift value for scaled math conversion
  * @xtime_nsec: Shifted (fractional) nano seconds offset for readout
- * @base_mono:  ktime_t (nanoseconds) base time for readout
+ * @base:      ktime_t (nanoseconds) base time for readout
  *
  * This struct has size 56 byte on 64 bit. Together with a seqcount it
  * occupies a single 64byte cache line.
  *
  * The struct is separate from struct timekeeper as it is also used
- * for a fast NMI safe accessor to clock monotonic.
+ * for a fast NMI safe accessors.
  */
 struct tk_read_base {
        struct clocksource      *clock;
@@ -35,12 +35,13 @@ struct tk_read_base {
        u32                     mult;
        u32                     shift;
        u64                     xtime_nsec;
-       ktime_t                 base_mono;
+       ktime_t                 base;
 };
 
 /**
  * struct timekeeper - Structure holding internal timekeeping values.
- * @tkr:               The readout base structure
+ * @tkr_mono:          The readout base structure for CLOCK_MONOTONIC
+ * @tkr_raw:           The readout base structure for CLOCK_MONOTONIC_RAW
  * @xtime_sec:         Current CLOCK_REALTIME time in seconds
  * @ktime_sec:         Current CLOCK_MONOTONIC time in seconds
  * @wall_to_monotonic: CLOCK_REALTIME to CLOCK_MONOTONIC offset
@@ -48,7 +49,6 @@ struct tk_read_base {
  * @offs_boot:         Offset clock monotonic -> clock boottime
  * @offs_tai:          Offset clock monotonic -> clock tai
  * @tai_offset:                The current UTC to TAI offset in seconds
- * @base_raw:          Monotonic raw base time in ktime_t format
  * @raw_time:          Monotonic raw base time in timespec64 format
  * @cycle_interval:    Number of clock cycles in one NTP interval
  * @xtime_interval:    Number of clock shifted nano seconds in one NTP
@@ -76,7 +76,8 @@ struct tk_read_base {
  * used instead.
  */
 struct timekeeper {
-       struct tk_read_base     tkr;
+       struct tk_read_base     tkr_mono;
+       struct tk_read_base     tkr_raw;
        u64                     xtime_sec;
        unsigned long           ktime_sec;
        struct timespec64       wall_to_monotonic;
@@ -84,7 +85,6 @@ struct timekeeper {
        ktime_t                 offs_boot;
        ktime_t                 offs_tai;
        s32                     tai_offset;
-       ktime_t                 base_raw;
        struct timespec64       raw_time;
 
        /* The following members are for timekeeping internal use */
index 3eaae47..99176af 100644 (file)
@@ -214,12 +214,18 @@ static inline u64 ktime_get_boot_ns(void)
        return ktime_to_ns(ktime_get_boottime());
 }
 
+static inline u64 ktime_get_tai_ns(void)
+{
+       return ktime_to_ns(ktime_get_clocktai());
+}
+
 static inline u64 ktime_get_raw_ns(void)
 {
        return ktime_to_ns(ktime_get_raw());
 }
 
 extern u64 ktime_get_mono_fast_ns(void);
+extern u64 ktime_get_raw_fast_ns(void);
 
 /*
  * Timespec interfaces utilizing the ktime based ones
@@ -242,6 +248,9 @@ static inline void timekeeping_clocktai(struct timespec *ts)
 /*
  * RTC specific
  */
+extern bool timekeeping_rtc_skipsuspend(void);
+extern bool timekeeping_rtc_skipresume(void);
+
 extern void timekeeping_inject_sleeptime64(struct timespec64 *delta);
 
 /*
@@ -253,17 +262,14 @@ extern void getnstime_raw_and_real(struct timespec *ts_raw,
 /*
  * Persistent clock related interfaces
  */
-extern bool persistent_clock_exist;
 extern int persistent_clock_is_local;
 
-static inline bool has_persistent_clock(void)
-{
-       return persistent_clock_exist;
-}
-
 extern void read_persistent_clock(struct timespec *ts);
+extern void read_persistent_clock64(struct timespec64 *ts);
 extern void read_boot_clock(struct timespec *ts);
+extern void read_boot_clock64(struct timespec64 *ts);
 extern int update_persistent_clock(struct timespec now);
+extern int update_persistent_clock64(struct timespec64 now);
 
 
 #endif
index 07a0226..7188029 100644 (file)
@@ -98,6 +98,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
                        size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
 
+const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
+
 static inline size_t iov_iter_count(struct iov_iter *i)
 {
        return i->count;
index 9bb547c..704a1ab 100644 (file)
@@ -190,8 +190,7 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data)
  * @num_ports: the number of different ports this device will have.
  * @bulk_in_size: minimum number of bytes to allocate for bulk-in buffer
  *     (0 = end-point size)
- * @bulk_out_size: minimum number of bytes to allocate for bulk-out buffer
- *     (0 = end-point size)
+ * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size)
  * @calc_num_ports: pointer to a function to determine how many ports this
  *     device has dynamically.  It will be called after the probe()
  *     callback is called, but before attach()
index d9a4905..6e0ce8c 100644 (file)
@@ -227,9 +227,23 @@ struct skb_data {  /* skb->cb is one of these */
        struct urb              *urb;
        struct usbnet           *dev;
        enum skb_state          state;
-       size_t                  length;
+       long                    length;
+       unsigned long           packets;
 };
 
+/* Drivers that set FLAG_MULTI_PACKET must call this in their
+ * tx_fixup method before returning an skb.
+ */
+static inline void
+usbnet_set_skb_tx_stats(struct sk_buff *skb,
+                       unsigned long packets, long bytes_delta)
+{
+       struct skb_data *entry = (struct skb_data *) skb->cb;
+
+       entry->packets = packets;
+       entry->length = bytes_delta;
+}
+
 extern int usbnet_open(struct net_device *net);
 extern int usbnet_stop(struct net_device *net);
 extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb,
index 2d67b89..049b2f4 100644 (file)
@@ -78,19 +78,6 @@ extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
 extern void vfio_unregister_iommu_driver(
                                const struct vfio_iommu_driver_ops *ops);
 
-/**
- * offsetofend(TYPE, MEMBER)
- *
- * @TYPE: The type of the structure
- * @MEMBER: The member within the structure to get the end offset of
- *
- * Simple helper macro for dealing with variable sized structures passed
- * from user space.  This allows us to easily determine if the provided
- * structure is sized to include various fields.
- */
-#define offsetofend(TYPE, MEMBER) \
-       (offsetof(TYPE, MEMBER) + sizeof(((TYPE *)0)->MEMBER))
-
 /*
  * External user API
  */
index 7d7acb3..0ec5983 100644 (file)
@@ -17,6 +17,7 @@ struct vm_area_struct;                /* vma defining user mapping in mm_types.h */
 #define VM_VPAGES              0x00000010      /* buffer for pages was vmalloc'ed */
 #define VM_UNINITIALIZED       0x00000020      /* vm_struct is not fully initialized */
 #define VM_NO_GUARD            0x00000040      /* don't add guard page */
+#define VM_KASAN               0x00000080      /* has allocated kasan shadow memory */
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
index 74db135..f597846 100644 (file)
@@ -70,7 +70,8 @@ enum {
        /* data contains off-queue information when !WORK_STRUCT_PWQ */
        WORK_OFFQ_FLAG_BASE     = WORK_STRUCT_COLOR_SHIFT,
 
-       WORK_OFFQ_CANCELING     = (1 << WORK_OFFQ_FLAG_BASE),
+       __WORK_OFFQ_CANCELING   = WORK_OFFQ_FLAG_BASE,
+       WORK_OFFQ_CANCELING     = (1 << __WORK_OFFQ_CANCELING),
 
        /*
         * When a work item is off queue, its high bits point to the last
index 0004833..b2dd371 100644 (file)
@@ -130,6 +130,7 @@ extern int vm_dirty_ratio;
 extern unsigned long vm_dirty_bytes;
 extern unsigned int dirty_writeback_interval;
 extern unsigned int dirty_expire_interval;
+extern unsigned int dirtytime_expire_interval;
 extern int vm_highmem_is_dirtyable;
 extern int block_dump;
 extern int laptop_mode;
@@ -146,6 +147,8 @@ extern int dirty_ratio_handler(struct ctl_table *table, int write,
 extern int dirty_bytes_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos);
+int dirtytime_interval_handler(struct ctl_table *table, int write,
+                              void __user *buffer, size_t *lenp, loff_t *ppos);
 
 struct ctl_table;
 int dirty_writeback_centisecs_handler(struct ctl_table *, int,
index c2e5703..6008b09 100644 (file)
 #define                ISI_CFG1_FRATE_DIV_MASK         (7 << 8)
 #define ISI_CFG1_DISCR                         (1 << 11)
 #define ISI_CFG1_FULL_MODE                     (1 << 12)
+/* Definition for THMASK(ISI_V2) */
+#define                ISI_CFG1_THMASK_BEATS_4         (0 << 13)
+#define                ISI_CFG1_THMASK_BEATS_8         (1 << 13)
+#define                ISI_CFG1_THMASK_BEATS_16        (2 << 13)
 
 /* Bitfields in CFG2 */
 #define ISI_CFG2_GRAYSCALE                     (1 << 13)
index a8ae4e7..0fb99a2 100644 (file)
@@ -481,6 +481,7 @@ void dst_init(void);
 enum {
        XFRM_LOOKUP_ICMP = 1 << 0,
        XFRM_LOOKUP_QUEUE = 1 << 1,
+       XFRM_LOOKUP_KEEP_DST_REF = 1 << 2,
 };
 
 struct flowi;
index 025c61c..6cc1eaf 100644 (file)
@@ -453,22 +453,6 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
 
 #endif
 
-static inline int sk_mc_loop(struct sock *sk)
-{
-       if (!sk)
-               return 1;
-       switch (sk->sk_family) {
-       case AF_INET:
-               return inet_sk(sk)->mc_loop;
-#if IS_ENABLED(CONFIG_IPV6)
-       case AF_INET6:
-               return inet6_sk(sk)->mc_loop;
-#endif
-       }
-       WARN_ON(1);
-       return 1;
-}
-
 bool ip_call_ra_chain(struct sk_buff *skb);
 
 /*
index 1d09b46..eda131d 100644 (file)
@@ -174,7 +174,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 
 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 {
-       struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+       struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
+                               inet6_sk(skb->sk) : NULL;
 
        return (np && np->pmtudisc >= IPV6_PMTUDISC_PROBE) ?
               skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
index 534e1f2..57639fc 100644 (file)
@@ -79,6 +79,16 @@ void nf_log_packet(struct net *net,
                   const struct nf_loginfo *li,
                   const char *fmt, ...);
 
+__printf(8, 9)
+void nf_log_trace(struct net *net,
+                 u_int8_t pf,
+                 unsigned int hooknum,
+                 const struct sk_buff *skb,
+                 const struct net_device *in,
+                 const struct net_device *out,
+                 const struct nf_loginfo *li,
+                 const char *fmt, ...);
+
 struct nf_log_buf;
 
 struct nf_log_buf *nf_log_buf_open(void);
index 9eaaa78..decb9a0 100644 (file)
@@ -119,6 +119,22 @@ int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg,
                           const struct nft_data *data,
                           enum nft_data_types type);
 
+
+/**
+ *     struct nft_userdata - user defined data associated with an object
+ *
+ *     @len: length of the data
+ *     @data: content
+ *
+ *     The presence of user data is indicated in an object specific fashion,
+ *     so a length of zero can't occur and the value "len" indicates data
+ *     of length len + 1.
+ */
+struct nft_userdata {
+       u8                      len;
+       unsigned char           data[0];
+};
+
 /**
  *     struct nft_set_elem - generic representation of set elements
  *
@@ -380,7 +396,7 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
  *     @handle: rule handle
  *     @genmask: generation mask
  *     @dlen: length of expression data
- *     @ulen: length of user data (used for comments)
+ *     @udata: user data is appended to the rule
  *     @data: expression data
  */
 struct nft_rule {
@@ -388,7 +404,7 @@ struct nft_rule {
        u64                             handle:42,
                                        genmask:2,
                                        dlen:12,
-                                       ulen:8;
+                                       udata:1;
        unsigned char                   data[]
                __attribute__((aligned(__alignof__(struct nft_expr))));
 };
@@ -476,7 +492,7 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
        return (struct nft_expr *)&rule->data[rule->dlen];
 }
 
-static inline void *nft_userdata(const struct nft_rule *rule)
+static inline struct nft_userdata *nft_userdata(const struct nft_rule *rule)
 {
        return (void *)&rule->data[rule->dlen];
 }
index ab186b1..e4079c2 100644 (file)
@@ -1762,6 +1762,8 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
 
 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie);
 
+bool sk_mc_loop(struct sock *sk);
+
 static inline bool sk_can_gso(const struct sock *sk)
 {
        return net_gso_ok(sk->sk_route_caps, sk->sk_gso_type);
index eabd3a0..c73e7ab 100644 (file)
@@ -91,6 +91,7 @@ struct vxlanhdr {
 
 #define VXLAN_N_VID     (1u << 24)
 #define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
+#define VXLAN_VNI_MASK  (VXLAN_VID_MASK << 8)
 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
 
 struct vxlan_metadata {
index 0210797..dc10c52 100644 (file)
@@ -92,7 +92,7 @@
 #define                AT91_DDRSDRC_UPD_MR     (3 << 20)        /* Update load mode register and extended mode register */
 
 #define AT91_DDRSDRC_MDR       0x20    /* Memory Device Register */
-#define                AT91_DDRSDRC_MD         (3 << 0)                /* Memory Device Type */
+#define                AT91_DDRSDRC_MD         (7 << 0)        /* Memory Device Type */
 #define                        AT91_DDRSDRC_MD_SDR             0
 #define                        AT91_DDRSDRC_MD_LOW_POWER_SDR   1
 #define                        AT91_DDRSDRC_MD_LOW_POWER_DDR   3
index db81c65..d61be72 100644 (file)
@@ -111,6 +111,7 @@ void        array_free(void *array, int n);
 void   target_core_setup_sub_cits(struct se_subsystem_api *);
 
 /* attribute helpers from target_core_device.c for backend drivers */
+bool   se_dev_check_wce(struct se_device *);
 int    se_dev_set_max_unmap_lba_count(struct se_device *, u32);
 int    se_dev_set_max_unmap_block_desc_count(struct se_device *, u32);
 int    se_dev_set_unmap_granularity(struct se_device *, u32);
index 23d5615..22317d2 100644 (file)
@@ -7,27 +7,26 @@
 #include <linux/ktime.h>
 #include <linux/tracepoint.h>
 
-struct device;
-struct regmap;
+#include "../../../drivers/base/regmap/internal.h"
 
 /*
  * Log register events
  */
 DECLARE_EVENT_CLASS(regmap_reg,
 
-       TP_PROTO(struct device *dev, unsigned int reg,
+       TP_PROTO(struct regmap *map, unsigned int reg,
                 unsigned int val),
 
-       TP_ARGS(dev, reg, val),
+       TP_ARGS(map, reg, val),
 
        TP_STRUCT__entry(
-               __string(       name,           dev_name(dev)   )
-               __field(        unsigned int,   reg             )
-               __field(        unsigned int,   val             )
+               __string(       name,           regmap_name(map)        )
+               __field(        unsigned int,   reg                     )
+               __field(        unsigned int,   val                     )
        ),
 
        TP_fast_assign(
-               __assign_str(name, dev_name(dev));
+               __assign_str(name, regmap_name(map));
                __entry->reg = reg;
                __entry->val = val;
        ),
@@ -39,45 +38,45 @@ DECLARE_EVENT_CLASS(regmap_reg,
 
 DEFINE_EVENT(regmap_reg, regmap_reg_write,
 
-       TP_PROTO(struct device *dev, unsigned int reg,
+       TP_PROTO(struct regmap *map, unsigned int reg,
                 unsigned int val),
 
-       TP_ARGS(dev, reg, val)
+       TP_ARGS(map, reg, val)
 
 );
 
 DEFINE_EVENT(regmap_reg, regmap_reg_read,
 
-       TP_PROTO(struct device *dev, unsigned int reg,
+       TP_PROTO(struct regmap *map, unsigned int reg,
                 unsigned int val),
 
-       TP_ARGS(dev, reg, val)
+       TP_ARGS(map, reg, val)
 
 );
 
 DEFINE_EVENT(regmap_reg, regmap_reg_read_cache,
 
-       TP_PROTO(struct device *dev, unsigned int reg,
+       TP_PROTO(struct regmap *map, unsigned int reg,
                 unsigned int val),
 
-       TP_ARGS(dev, reg, val)
+       TP_ARGS(map, reg, val)
 
 );
 
 DECLARE_EVENT_CLASS(regmap_block,
 
-       TP_PROTO(struct device *dev, unsigned int reg, int count),
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-       TP_ARGS(dev, reg, count),
+       TP_ARGS(map, reg, count),
 
        TP_STRUCT__entry(
-               __string(       name,           dev_name(dev)   )
-               __field(        unsigned int,   reg             )
-               __field(        int,            count           )
+               __string(       name,           regmap_name(map)        )
+               __field(        unsigned int,   reg                     )
+               __field(        int,            count                   )
        ),
 
        TP_fast_assign(
-               __assign_str(name, dev_name(dev));
+               __assign_str(name, regmap_name(map));
                __entry->reg = reg;
                __entry->count = count;
        ),
@@ -89,48 +88,48 @@ DECLARE_EVENT_CLASS(regmap_block,
 
 DEFINE_EVENT(regmap_block, regmap_hw_read_start,
 
-       TP_PROTO(struct device *dev, unsigned int reg, int count),
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-       TP_ARGS(dev, reg, count)
+       TP_ARGS(map, reg, count)
 );
 
 DEFINE_EVENT(regmap_block, regmap_hw_read_done,
 
-       TP_PROTO(struct device *dev, unsigned int reg, int count),
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-       TP_ARGS(dev, reg, count)
+       TP_ARGS(map, reg, count)
 );
 
 DEFINE_EVENT(regmap_block, regmap_hw_write_start,
 
-       TP_PROTO(struct device *dev, unsigned int reg, int count),
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-       TP_ARGS(dev, reg, count)
+       TP_ARGS(map, reg, count)
 );
 
 DEFINE_EVENT(regmap_block, regmap_hw_write_done,
 
-       TP_PROTO(struct device *dev, unsigned int reg, int count),
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-       TP_ARGS(dev, reg, count)
+       TP_ARGS(map, reg, count)
 );
 
 TRACE_EVENT(regcache_sync,
 
-       TP_PROTO(struct device *dev, const char *type,
+       TP_PROTO(struct regmap *map, const char *type,
                 const char *status),
 
-       TP_ARGS(dev, type, status),
+       TP_ARGS(map, type, status),
 
        TP_STRUCT__entry(
-               __string(       name,           dev_name(dev)   )
-               __string(       status,         status          )
-               __string(       type,           type            )
-               __field(        int,            type            )
+               __string(       name,           regmap_name(map)        )
+               __string(       status,         status                  )
+               __string(       type,           type                    )
+               __field(        int,            type                    )
        ),
 
        TP_fast_assign(
-               __assign_str(name, dev_name(dev));
+               __assign_str(name, regmap_name(map));
                __assign_str(status, status);
                __assign_str(type, type);
        ),
@@ -141,17 +140,17 @@ TRACE_EVENT(regcache_sync,
 
 DECLARE_EVENT_CLASS(regmap_bool,
 
-       TP_PROTO(struct device *dev, bool flag),
+       TP_PROTO(struct regmap *map, bool flag),
 
-       TP_ARGS(dev, flag),
+       TP_ARGS(map, flag),
 
        TP_STRUCT__entry(
-               __string(       name,           dev_name(dev)   )
-               __field(        int,            flag            )
+               __string(       name,           regmap_name(map)        )
+               __field(        int,            flag                    )
        ),
 
        TP_fast_assign(
-               __assign_str(name, dev_name(dev));
+               __assign_str(name, regmap_name(map));
                __entry->flag = flag;
        ),
 
@@ -161,32 +160,32 @@ DECLARE_EVENT_CLASS(regmap_bool,
 
 DEFINE_EVENT(regmap_bool, regmap_cache_only,
 
-       TP_PROTO(struct device *dev, bool flag),
+       TP_PROTO(struct regmap *map, bool flag),
 
-       TP_ARGS(dev, flag)
+       TP_ARGS(map, flag)
 
 );
 
 DEFINE_EVENT(regmap_bool, regmap_cache_bypass,
 
-       TP_PROTO(struct device *dev, bool flag),
+       TP_PROTO(struct regmap *map, bool flag),
 
-       TP_ARGS(dev, flag)
+       TP_ARGS(map, flag)
 
 );
 
 DECLARE_EVENT_CLASS(regmap_async,
 
-       TP_PROTO(struct device *dev),
+       TP_PROTO(struct regmap *map),
 
-       TP_ARGS(dev),
+       TP_ARGS(map),
 
        TP_STRUCT__entry(
-               __string(       name,           dev_name(dev)   )
+               __string(       name,           regmap_name(map)        )
        ),
 
        TP_fast_assign(
-               __assign_str(name, dev_name(dev));
+               __assign_str(name, regmap_name(map));
        ),
 
        TP_printk("%s", __get_str(name))
@@ -194,50 +193,50 @@ DECLARE_EVENT_CLASS(regmap_async,
 
 DEFINE_EVENT(regmap_block, regmap_async_write_start,
 
-       TP_PROTO(struct device *dev, unsigned int reg, int count),
+       TP_PROTO(struct regmap *map, unsigned int reg, int count),
 
-       TP_ARGS(dev, reg, count)
+       TP_ARGS(map, reg, count)
 );
 
 DEFINE_EVENT(regmap_async, regmap_async_io_complete,
 
-       TP_PROTO(struct device *dev),
+       TP_PROTO(struct regmap *map),
 
-       TP_ARGS(dev)
+       TP_ARGS(map)
 
 );
 
 DEFINE_EVENT(regmap_async, regmap_async_complete_start,
 
-       TP_PROTO(struct device *dev),
+       TP_PROTO(struct regmap *map),
 
-       TP_ARGS(dev)
+       TP_ARGS(map)
 
 );
 
 DEFINE_EVENT(regmap_async, regmap_async_complete_done,
 
-       TP_PROTO(struct device *dev),
+       TP_PROTO(struct regmap *map),
 
-       TP_ARGS(dev)
+       TP_ARGS(map)
 
 );
 
 TRACE_EVENT(regcache_drop_region,
 
-       TP_PROTO(struct device *dev, unsigned int from,
+       TP_PROTO(struct regmap *map, unsigned int from,
                 unsigned int to),
 
-       TP_ARGS(dev, from, to),
+       TP_ARGS(map, from, to),
 
        TP_STRUCT__entry(
-               __string(       name,           dev_name(dev)   )
-               __field(        unsigned int,   from            )
-               __field(        unsigned int,   to              )
+               __string(       name,           regmap_name(map)        )
+               __field(        unsigned int,   from                    )
+               __field(        unsigned int,   to                      )
        ),
 
        TP_fast_assign(
-               __assign_str(name, dev_name(dev));
+               __assign_str(name, regmap_name(map));
                __entry->from = from;
                __entry->to = to;
        ),
index b0a8130..2f62ab2 100644 (file)
@@ -973,7 +973,8 @@ struct input_keymap_entry {
  */
 #define MT_TOOL_FINGER         0
 #define MT_TOOL_PEN            1
-#define MT_TOOL_MAX            1
+#define MT_TOOL_PALM           2
+#define MT_TOOL_MAX            2
 
 /*
  * Values describing the status of a force-feedback effect
index 8055706..f574d7b 100644 (file)
@@ -147,6 +147,16 @@ struct kvm_pit_config {
 
 #define KVM_PIT_SPEAKER_DUMMY     1
 
+struct kvm_s390_skeys {
+       __u64 start_gfn;
+       __u64 count;
+       __u64 skeydata_addr;
+       __u32 flags;
+       __u32 reserved[9];
+};
+#define KVM_S390_GET_SKEYS_NONE   1
+#define KVM_S390_SKEYS_MAX        1048576
+
 #define KVM_EXIT_UNKNOWN          0
 #define KVM_EXIT_EXCEPTION        1
 #define KVM_EXIT_IO               2
@@ -172,6 +182,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_S390_TSCH        22
 #define KVM_EXIT_EPR              23
 #define KVM_EXIT_SYSTEM_EVENT     24
+#define KVM_EXIT_S390_STSI        25
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -309,6 +320,15 @@ struct kvm_run {
                        __u32 type;
                        __u64 flags;
                } system_event;
+               /* KVM_EXIT_S390_STSI */
+               struct {
+                       __u64 addr;
+                       __u8 ar;
+                       __u8 reserved;
+                       __u8 fc;
+                       __u8 sel1;
+                       __u16 sel2;
+               } s390_stsi;
                /* Fix the size of the union. */
                char padding[256];
        };
@@ -324,7 +344,7 @@ struct kvm_run {
        __u64 kvm_dirty_regs;
        union {
                struct kvm_sync_regs regs;
-               char padding[1024];
+               char padding[2048];
        } s;
 };
 
@@ -365,6 +385,24 @@ struct kvm_translation {
        __u8  pad[5];
 };
 
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+       /* in */
+       __u64 gaddr;            /* the guest address */
+       __u64 flags;            /* flags */
+       __u32 size;             /* amount of bytes */
+       __u32 op;               /* type of operation */
+       __u64 buf;              /* buffer in userspace */
+       __u8 ar;                /* the access register number */
+       __u8 reserved[31];      /* should be set to 0 */
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ    0
+#define KVM_S390_MEMOP_LOGICAL_WRITE   1
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY            (1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION      (1ULL << 1)
+
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
        /* in */
@@ -520,6 +558,13 @@ struct kvm_s390_irq {
        } u;
 };
 
+struct kvm_s390_irq_state {
+       __u64 buf;
+       __u32 flags;
+       __u32 len;
+       __u32 reserved[4];
+};
+
 /* for KVM_SET_GUEST_DEBUG */
 
 #define KVM_GUESTDBG_ENABLE            0x00000001
@@ -760,6 +805,14 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_ENABLE_HCALL 104
 #define KVM_CAP_CHECK_EXTENSION_VM 105
 #define KVM_CAP_S390_USER_SIGP 106
+#define KVM_CAP_S390_VECTOR_REGISTERS 107
+#define KVM_CAP_S390_MEM_OP 108
+#define KVM_CAP_S390_USER_STSI 109
+#define KVM_CAP_S390_SKEYS 110
+#define KVM_CAP_MIPS_FPU 111
+#define KVM_CAP_MIPS_MSA 112
+#define KVM_CAP_S390_INJECT_IRQ 113
+#define KVM_CAP_S390_IRQ_STATE 114
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1135,6 +1188,16 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ARM_VCPU_INIT        _IOW(KVMIO,  0xae, struct kvm_vcpu_init)
 #define KVM_ARM_PREFERRED_TARGET  _IOR(KVMIO,  0xaf, struct kvm_vcpu_init)
 #define KVM_GET_REG_LIST         _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
+/* Available with KVM_CAP_S390_MEM_OP */
+#define KVM_S390_MEM_OP                  _IOW(KVMIO,  0xb1, struct kvm_s390_mem_op)
+/* Available with KVM_CAP_S390_SKEYS */
+#define KVM_S390_GET_SKEYS      _IOW(KVMIO, 0xb2, struct kvm_s390_skeys)
+#define KVM_S390_SET_SKEYS      _IOW(KVMIO, 0xb3, struct kvm_s390_skeys)
+/* Available with KVM_CAP_S390_INJECT_IRQ */
+#define KVM_S390_IRQ              _IOW(KVMIO,  0xb4, struct kvm_s390_irq)
+/* Available with KVM_CAP_S390_IRQ_STATE */
+#define KVM_S390_SET_IRQ_STATE   _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
+#define KVM_S390_GET_IRQ_STATE   _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3         (1 << 1)
index 4742f2c..d3bd6ff 100644 (file)
@@ -47,7 +47,7 @@
  * exported filesystem.
  */
 #define        NFSEXP_V4ROOT           0x10000
-#define NFSEXP_NOPNFS          0x20000
+#define NFSEXP_PNFS            0x20000
 
 /* All flags that we claim to support.  (Note we don't support NOACL.) */
 #define NFSEXP_ALLFLAGS                0x3FE7F
index 5e0d0ed..25331f9 100644 (file)
@@ -65,6 +65,10 @@ struct serial_struct {
 #define SERIAL_IO_PORT 0
 #define SERIAL_IO_HUB6 1
 #define SERIAL_IO_MEM  2
+#define SERIAL_IO_MEM32          3
+#define SERIAL_IO_AU     4
+#define SERIAL_IO_TSI    5
+#define SERIAL_IO_MEM32BE 6
 
 #define UART_CLEAR_FIFO                0x01
 #define UART_USE_FIFO          0x02
index 3c53eec..19c66fc 100644 (file)
@@ -60,7 +60,7 @@ struct virtio_blk_config {
        __u32 size_max;
        /* The maximum number of segments (if VIRTIO_BLK_F_SEG_MAX) */
        __u32 seg_max;
-       /* geometry the device (if VIRTIO_BLK_F_GEOMETRY) */
+       /* geometry of the device (if VIRTIO_BLK_F_GEOMETRY) */
        struct virtio_blk_geometry {
                __u16 cylinders;
                __u8 heads;
@@ -119,7 +119,11 @@ struct virtio_blk_config {
 #define VIRTIO_BLK_T_BARRIER   0x80000000
 #endif /* !VIRTIO_BLK_NO_LEGACY */
 
-/* This is the first element of the read scatter-gather list. */
+/*
+ * This comes first in the read scatter-gather list.
+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated,
+ * this is the first element of the read scatter-gather list.
+ */
 struct virtio_blk_outhdr {
        /* VIRTIO_BLK_T* */
        __virtio32 type;
index 42b9370..cc18ef8 100644 (file)
 
 #include <linux/virtio_types.h>
 
-#define VIRTIO_SCSI_CDB_SIZE   32
-#define VIRTIO_SCSI_SENSE_SIZE 96
+/* Default values of the CDB and sense data size configuration fields */
+#define VIRTIO_SCSI_CDB_DEFAULT_SIZE   32
+#define VIRTIO_SCSI_SENSE_DEFAULT_SIZE 96
+
+#ifndef VIRTIO_SCSI_CDB_SIZE
+#define VIRTIO_SCSI_CDB_SIZE VIRTIO_SCSI_CDB_DEFAULT_SIZE
+#endif
+#ifndef VIRTIO_SCSI_SENSE_SIZE
+#define VIRTIO_SCSI_SENSE_SIZE VIRTIO_SCSI_SENSE_DEFAULT_SIZE
+#endif
 
 /* SCSI command request, followed by data-out */
 struct virtio_scsi_cmd_req {
index 60de61f..c8ed15d 100644 (file)
@@ -689,6 +689,7 @@ struct omapdss_dsi_ops {
 };
 
 struct omap_dss_device {
+       struct kobject kobj;
        struct device *dev;
 
        struct module *owner;
index b78f21c..b0f1c9e 100644 (file)
@@ -114,9 +114,9 @@ int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
                                           const char *mod_name);
 
 #define xenbus_register_frontend(drv) \
-       __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
+       __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME)
 #define xenbus_register_backend(drv) \
-       __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
+       __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME)
 
 void xenbus_unregister_driver(struct xenbus_driver *drv);
 
index 1972b16..82eea9c 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/gfp.h>
 #include <linux/suspend.h>
 #include <linux/lockdep.h>
+#include <linux/tick.h>
 #include <trace/events/power.h>
 
 #include "smpboot.h"
@@ -338,6 +339,8 @@ static int __ref take_cpu_down(void *_param)
                return err;
 
        cpu_notify(CPU_DYING | param->mod, param->hcpu);
+       /* Give up timekeeping duties */
+       tick_handover_do_timer();
        /* Park the stopper thread */
        kthread_park(current);
        return 0;
@@ -411,10 +414,12 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
        while (!idle_cpu(cpu))
                cpu_relax();
 
+       hotplug_cpu__broadcast_tick_pull(cpu);
        /* This actually kills the CPU. */
        __cpu_die(cpu);
 
        /* CPU is completely dead: tell everyone.  Too late to complain. */
+       tick_cleanup_dead_cpu(cpu);
        cpu_notify_nofail(CPU_DEAD | mod, hcpu);
 
        check_for_tasks(cpu);
index 1d1fe93..fc7f474 100644 (file)
@@ -548,9 +548,6 @@ static void update_domain_attr_tree(struct sched_domain_attr *dattr,
 
        rcu_read_lock();
        cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
-               if (cp == root_cs)
-                       continue;
-
                /* skip the whole subtree if @cp doesn't have any CPU */
                if (cpumask_empty(cp->cpus_allowed)) {
                        pos_css = css_rightmost_descendant(pos_css);
@@ -873,7 +870,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct cpumask *new_cpus)
                 * If it becomes empty, inherit the effective mask of the
                 * parent, which is guaranteed to have some CPUs.
                 */
-               if (cpumask_empty(new_cpus))
+               if (cgroup_on_dfl(cp->css.cgroup) && cpumask_empty(new_cpus))
                        cpumask_copy(new_cpus, parent->effective_cpus);
 
                /* Skip the whole subtree if the cpumask remains the same. */
@@ -1129,7 +1126,7 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
                 * If it becomes empty, inherit the effective mask of the
                 * parent, which is guaranteed to have some MEMs.
                 */
-               if (nodes_empty(*new_mems))
+               if (cgroup_on_dfl(cp->css.cgroup) && nodes_empty(*new_mems))
                        *new_mems = parent->effective_mems;
 
                /* Skip the whole subtree if the nodemask remains the same. */
@@ -1979,7 +1976,9 @@ static int cpuset_css_online(struct cgroup_subsys_state *css)
 
        spin_lock_irq(&callback_lock);
        cs->mems_allowed = parent->mems_allowed;
+       cs->effective_mems = parent->mems_allowed;
        cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+       cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
        spin_unlock_irq(&callback_lock);
 out_unlock:
        mutex_unlock(&cpuset_mutex);
index f04daab..2fabc06 100644 (file)
@@ -3591,7 +3591,7 @@ static void put_event(struct perf_event *event)
        ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
        WARN_ON_ONCE(ctx->parent_ctx);
        perf_remove_from_context(event, true);
-       mutex_unlock(&ctx->mutex);
+       perf_event_ctx_unlock(event, ctx);
 
        _free_event(event);
 }
@@ -4574,6 +4574,13 @@ static void perf_pending_event(struct irq_work *entry)
 {
        struct perf_event *event = container_of(entry,
                        struct perf_event, pending);
+       int rctx;
+
+       rctx = perf_swevent_get_recursion_context();
+       /*
+        * If we 'fail' here, that's OK, it means recursion is already disabled
+        * and we won't recurse 'further'.
+        */
 
        if (event->pending_disable) {
                event->pending_disable = 0;
@@ -4584,6 +4591,9 @@ static void perf_pending_event(struct irq_work *entry)
                event->pending_wakeup = 0;
                perf_event_wakeup(event);
        }
+
+       if (rctx >= 0)
+               perf_swevent_put_recursion_context(rctx);
 }
 
 /*
index 2a5e383..2579e40 100644 (file)
@@ -900,7 +900,7 @@ static int attach_to_pi_owner(u32 uval, union futex_key *key,
        if (!p)
                return -ESRCH;
 
-       if (!p->mm) {
+       if (unlikely(p->flags & PF_KTHREAD)) {
                put_task_struct(p);
                return -EPERM;
        }
index 01ca088..3f9f1d6 100644 (file)
@@ -89,16 +89,28 @@ static bool klp_is_object_loaded(struct klp_object *obj)
 /* sets obj->mod if object is not vmlinux and module is found */
 static void klp_find_object_module(struct klp_object *obj)
 {
+       struct module *mod;
+
        if (!klp_is_module(obj))
                return;
 
        mutex_lock(&module_mutex);
        /*
-        * We don't need to take a reference on the module here because we have
-        * the klp_mutex, which is also taken by the module notifier.  This
-        * prevents any module from unloading until we release the klp_mutex.
+        * We do not want to block removal of patched modules and therefore
+        * we do not take a reference here. The patches are removed by
+        * a going module handler instead.
+        */
+       mod = find_module(obj->name);
+       /*
+        * Do not mess work of the module coming and going notifiers.
+        * Note that the patch might still be needed before the going handler
+        * is called. Module functions can be called even in the GOING state
+        * until mod->exit() finishes. This is especially important for
+        * patches that modify semantic of the functions.
         */
-       obj->mod = find_module(obj->name);
+       if (mod && mod->klp_alive)
+               obj->mod = mod;
+
        mutex_unlock(&module_mutex);
 }
 
@@ -767,6 +779,7 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
                return -EINVAL;
 
        obj->state = KLP_DISABLED;
+       obj->mod = NULL;
 
        klp_find_object_module(obj);
 
@@ -961,6 +974,15 @@ static int klp_module_notify(struct notifier_block *nb, unsigned long action,
 
        mutex_lock(&klp_mutex);
 
+       /*
+        * Each module has to know that the notifier has been called.
+        * We never know what module will get patched by a new patch.
+        */
+       if (action == MODULE_STATE_COMING)
+               mod->klp_alive = true;
+       else /* MODULE_STATE_GOING */
+               mod->klp_alive = false;
+
        list_for_each_entry(patch, &klp_patches, list) {
                for (obj = patch->objs; obj->funcs; obj++) {
                        if (!klp_is_module(obj) || strcmp(obj->name, mod->name))
index 88d0d44..ba77ab5 100644 (file)
@@ -633,7 +633,7 @@ static int count_matching_names(struct lock_class *new_class)
        if (!new_class->name)
                return 0;
 
-       list_for_each_entry(class, &all_lock_classes, lock_entry) {
+       list_for_each_entry_rcu(class, &all_lock_classes, lock_entry) {
                if (new_class->key - new_class->subclass == class->key)
                        return class->name_version;
                if (class->name && !strcmp(class->name, new_class->name))
@@ -700,10 +700,12 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
        hash_head = classhashentry(key);
 
        /*
-        * We can walk the hash lockfree, because the hash only
-        * grows, and we are careful when adding entries to the end:
+        * We do an RCU walk of the hash, see lockdep_free_key_range().
         */
-       list_for_each_entry(class, hash_head, hash_entry) {
+       if (DEBUG_LOCKS_WARN_ON(!irqs_disabled()))
+               return NULL;
+
+       list_for_each_entry_rcu(class, hash_head, hash_entry) {
                if (class->key == key) {
                        /*
                         * Huh! same key, different name? Did someone trample
@@ -728,7 +730,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
        struct lockdep_subclass_key *key;
        struct list_head *hash_head;
        struct lock_class *class;
-       unsigned long flags;
+
+       DEBUG_LOCKS_WARN_ON(!irqs_disabled());
 
        class = look_up_lock_class(lock, subclass);
        if (likely(class))
@@ -750,28 +753,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
        key = lock->key->subkeys + subclass;
        hash_head = classhashentry(key);
 
-       raw_local_irq_save(flags);
        if (!graph_lock()) {
-               raw_local_irq_restore(flags);
                return NULL;
        }
        /*
         * We have to do the hash-walk again, to avoid races
         * with another CPU:
         */
-       list_for_each_entry(class, hash_head, hash_entry)
+       list_for_each_entry_rcu(class, hash_head, hash_entry) {
                if (class->key == key)
                        goto out_unlock_set;
+       }
+
        /*
         * Allocate a new key from the static array, and add it to
         * the hash:
         */
        if (nr_lock_classes >= MAX_LOCKDEP_KEYS) {
                if (!debug_locks_off_graph_unlock()) {
-                       raw_local_irq_restore(flags);
                        return NULL;
                }
-               raw_local_irq_restore(flags);
 
                print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!");
                dump_stack();
@@ -798,7 +799,6 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 
        if (verbose(class)) {
                graph_unlock();
-               raw_local_irq_restore(flags);
 
                printk("\nnew class %p: %s", class->key, class->name);
                if (class->name_version > 1)
@@ -806,15 +806,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
                printk("\n");
                dump_stack();
 
-               raw_local_irq_save(flags);
                if (!graph_lock()) {
-                       raw_local_irq_restore(flags);
                        return NULL;
                }
        }
 out_unlock_set:
        graph_unlock();
-       raw_local_irq_restore(flags);
 
 out_set_class_cache:
        if (!subclass || force)
@@ -870,11 +867,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
        entry->distance = distance;
        entry->trace = *trace;
        /*
-        * Since we never remove from the dependency list, the list can
-        * be walked lockless by other CPUs, it's only allocation
-        * that must be protected by the spinlock. But this also means
-        * we must make new entries visible only once writes to the
-        * entry become visible - hence the RCU op:
+        * Both allocation and removal are done under the graph lock; but
+        * iteration is under RCU-sched; see look_up_lock_class() and
+        * lockdep_free_key_range().
         */
        list_add_tail_rcu(&entry->entry, head);
 
@@ -1025,7 +1020,9 @@ static int __bfs(struct lock_list *source_entry,
                else
                        head = &lock->class->locks_before;
 
-               list_for_each_entry(entry, head, entry) {
+               DEBUG_LOCKS_WARN_ON(!irqs_disabled());
+
+               list_for_each_entry_rcu(entry, head, entry) {
                        if (!lock_accessed(entry)) {
                                unsigned int cq_depth;
                                mark_lock_accessed(entry, lock);
@@ -2022,7 +2019,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
         * We can walk it lock-free, because entries only get added
         * to the hash:
         */
-       list_for_each_entry(chain, hash_head, entry) {
+       list_for_each_entry_rcu(chain, hash_head, entry) {
                if (chain->chain_key == chain_key) {
 cache_hit:
                        debug_atomic_inc(chain_lookup_hits);
@@ -2996,8 +2993,18 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
        if (unlikely(!debug_locks))
                return;
 
-       if (subclass)
+       if (subclass) {
+               unsigned long flags;
+
+               if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion))
+                       return;
+
+               raw_local_irq_save(flags);
+               current->lockdep_recursion = 1;
                register_lock_class(lock, subclass, 1);
+               current->lockdep_recursion = 0;
+               raw_local_irq_restore(flags);
+       }
 }
 EXPORT_SYMBOL_GPL(lockdep_init_map);
 
@@ -3887,9 +3894,17 @@ static inline int within(const void *addr, void *start, unsigned long size)
        return addr >= start && addr < start + size;
 }
 
+/*
+ * Used in module.c to remove lock classes from memory that is going to be
+ * freed; and possibly re-used by other modules.
+ *
+ * We will have had one sync_sched() before getting here, so we're guaranteed
+ * nobody will look up these exact classes -- they're properly dead but still
+ * allocated.
+ */
 void lockdep_free_key_range(void *start, unsigned long size)
 {
-       struct lock_class *class, *next;
+       struct lock_class *class;
        struct list_head *head;
        unsigned long flags;
        int i;
@@ -3905,7 +3920,7 @@ void lockdep_free_key_range(void *start, unsigned long size)
                head = classhash_table + i;
                if (list_empty(head))
                        continue;
-               list_for_each_entry_safe(class, next, head, hash_entry) {
+               list_for_each_entry_rcu(class, head, hash_entry) {
                        if (within(class->key, start, size))
                                zap_class(class);
                        else if (within(class->name, start, size))
@@ -3916,11 +3931,25 @@ void lockdep_free_key_range(void *start, unsigned long size)
        if (locked)
                graph_unlock();
        raw_local_irq_restore(flags);
+
+       /*
+        * Wait for any possible iterators from look_up_lock_class() to pass
+        * before continuing to free the memory they refer to.
+        *
+        * sync_sched() is sufficient because the read-side is IRQ disable.
+        */
+       synchronize_sched();
+
+       /*
+        * XXX at this point we could return the resources to the pool;
+        * instead we leak them. We would need to change to bitmap allocators
+        * instead of the linear allocators we have now.
+        */
 }
 
 void lockdep_reset_lock(struct lockdep_map *lock)
 {
-       struct lock_class *class, *next;
+       struct lock_class *class;
        struct list_head *head;
        unsigned long flags;
        int i, j;
@@ -3948,7 +3977,7 @@ void lockdep_reset_lock(struct lockdep_map *lock)
                head = classhash_table + i;
                if (list_empty(head))
                        continue;
-               list_for_each_entry_safe(class, next, head, hash_entry) {
+               list_for_each_entry_rcu(class, head, hash_entry) {
                        int match = 0;
 
                        for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++)
index d1fe2ba..75e114b 100644 (file)
@@ -78,7 +78,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
                 */
                return;
        }
-       ACCESS_ONCE(prev->next) = node;
+       WRITE_ONCE(prev->next, node);
 
        /* Wait until the lock holder passes the lock down. */
        arch_mcs_spin_lock_contended(&node->locked);
@@ -91,7 +91,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 static inline
 void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 {
-       struct mcs_spinlock *next = ACCESS_ONCE(node->next);
+       struct mcs_spinlock *next = READ_ONCE(node->next);
 
        if (likely(!next)) {
                /*
@@ -100,7 +100,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
                if (likely(cmpxchg(lock, node, NULL) == node))
                        return;
                /* Wait until the next pointer is set */
-               while (!(next = ACCESS_ONCE(node->next)))
+               while (!(next = READ_ONCE(node->next)))
                        cpu_relax_lowlatency();
        }
 
index 94674e5..4cccea6 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
 #include <linux/debug_locks.h>
-#include "mcs_spinlock.h"
+#include <linux/osq_lock.h>
 
 /*
  * In the DEBUG case we are using the "NULL fastpath" for mutexes,
@@ -217,44 +217,35 @@ ww_mutex_set_context_slowpath(struct ww_mutex *lock,
 }
 
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
-static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
-{
-       if (lock->owner != owner)
-               return false;
-
-       /*
-        * Ensure we emit the owner->on_cpu, dereference _after_ checking
-        * lock->owner still matches owner, if that fails, owner might
-        * point to free()d memory, if it still matches, the rcu_read_lock()
-        * ensures the memory stays valid.
-        */
-       barrier();
-
-       return owner->on_cpu;
-}
-
 /*
  * Look out! "owner" is an entirely speculative pointer
  * access and not reliable.
  */
 static noinline
-int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
+bool mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
 {
+       bool ret = true;
+
        rcu_read_lock();
-       while (owner_running(lock, owner)) {
-               if (need_resched())
+       while (lock->owner == owner) {
+               /*
+                * Ensure we emit the owner->on_cpu, dereference _after_
+                * checking lock->owner still matches owner. If that fails,
+                * owner might point to freed memory. If it still matches,
+                * the rcu_read_lock() ensures the memory stays valid.
+                */
+               barrier();
+
+               if (!owner->on_cpu || need_resched()) {
+                       ret = false;
                        break;
+               }
 
                cpu_relax_lowlatency();
        }
        rcu_read_unlock();
 
-       /*
-        * We break out the loop above on need_resched() and when the
-        * owner changed, which is a sign for heavy contention. Return
-        * success only when lock->owner is NULL.
-        */
-       return lock->owner == NULL;
+       return ret;
 }
 
 /*
@@ -269,7 +260,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
                return 0;
 
        rcu_read_lock();
-       owner = ACCESS_ONCE(lock->owner);
+       owner = READ_ONCE(lock->owner);
        if (owner)
                retval = owner->on_cpu;
        rcu_read_unlock();
@@ -343,7 +334,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
                         * As such, when deadlock detection needs to be
                         * performed the optimistic spinning cannot be done.
                         */
-                       if (ACCESS_ONCE(ww->ctx))
+                       if (READ_ONCE(ww->ctx))
                                break;
                }
 
@@ -351,7 +342,7 @@ static bool mutex_optimistic_spin(struct mutex *lock,
                 * If there's an owner, wait for it to either
                 * release the lock or go to sleep.
                 */
-               owner = ACCESS_ONCE(lock->owner);
+               owner = READ_ONCE(lock->owner);
                if (owner && !mutex_spin_on_owner(lock, owner))
                        break;
 
@@ -490,7 +481,7 @@ static inline int __sched
 __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx)
 {
        struct ww_mutex *ww = container_of(lock, struct ww_mutex, base);
-       struct ww_acquire_ctx *hold_ctx = ACCESS_ONCE(ww->ctx);
+       struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx);
 
        if (!hold_ctx)
                return 0;
index c112d00..dc85ee2 100644 (file)
@@ -98,7 +98,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 
        prev = decode_cpu(old);
        node->prev = prev;
-       ACCESS_ONCE(prev->next) = node;
+       WRITE_ONCE(prev->next, node);
 
        /*
         * Normally @prev is untouchable after the above store; because at that
@@ -109,7 +109,7 @@ bool osq_lock(struct optimistic_spin_queue *lock)
         * cmpxchg in an attempt to undo our queueing.
         */
 
-       while (!ACCESS_ONCE(node->locked)) {
+       while (!READ_ONCE(node->locked)) {
                /*
                 * If we need to reschedule bail... so we can block.
                 */
@@ -148,7 +148,7 @@ unqueue:
                 * Or we race against a concurrent unqueue()'s step-B, in which
                 * case its step-C will write us a new @node->prev pointer.
                 */
-               prev = ACCESS_ONCE(node->prev);
+               prev = READ_ONCE(node->prev);
        }
 
        /*
@@ -170,8 +170,8 @@ unqueue:
         * it will wait in Step-A.
         */
 
-       ACCESS_ONCE(next->prev) = prev;
-       ACCESS_ONCE(prev->next) = next;
+       WRITE_ONCE(next->prev, prev);
+       WRITE_ONCE(prev->next, next);
 
        return false;
 }
@@ -193,11 +193,11 @@ void osq_unlock(struct optimistic_spin_queue *lock)
        node = this_cpu_ptr(&osq_node);
        next = xchg(&node->next, NULL);
        if (next) {
-               ACCESS_ONCE(next->locked) = 1;
+               WRITE_ONCE(next->locked, 1);
                return;
        }
 
        next = osq_wait_next(lock, node, NULL);
        if (next)
-               ACCESS_ONCE(next->locked) = 1;
+               WRITE_ONCE(next->locked, 1);
 }
index 6357265..b732793 100644 (file)
@@ -349,7 +349,7 @@ static inline struct rt_mutex *task_blocked_on_lock(struct task_struct *p)
  *
  * @task:      the task owning the mutex (owner) for which a chain walk is
  *             probably needed
- * @deadlock_detect: do we have to carry out deadlock detection?
+ * @chwalk:    do we have to carry out deadlock detection?
  * @orig_lock: the mutex (can be NULL if we are walking the chain to recheck
  *             things for a task that has just got its priority adjusted, and
  *             is waiting on a mutex)
index 2555ae1..3a50485 100644 (file)
@@ -85,6 +85,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 
                list_del(&waiter->list);
                tsk = waiter->task;
+               /*
+                * Make sure we do not wakeup the next reader before
+                * setting the nil condition to grant the next reader;
+                * otherwise we could miss the wakeup on the other
+                * side and end up sleeping again. See the pairing
+                * in rwsem_down_read_failed().
+                */
                smp_mb();
                waiter->task = NULL;
                wake_up_process(tsk);
index 2f7cc40..3417d01 100644 (file)
@@ -14,8 +14,9 @@
 #include <linux/init.h>
 #include <linux/export.h>
 #include <linux/sched/rt.h>
+#include <linux/osq_lock.h>
 
-#include "mcs_spinlock.h"
+#include "rwsem.h"
 
 /*
  * Guide to the rw_semaphore's count field for common values.
@@ -186,6 +187,13 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
                waiter = list_entry(next, struct rwsem_waiter, list);
                next = waiter->list.next;
                tsk = waiter->task;
+               /*
+                * Make sure we do not wakeup the next reader before
+                * setting the nil condition to grant the next reader;
+                * otherwise we could miss the wakeup on the other
+                * side and end up sleeping again. See the pairing
+                * in rwsem_down_read_failed().
+                */
                smp_mb();
                waiter->task = NULL;
                wake_up_process(tsk);
@@ -258,6 +266,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
                    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
                if (!list_is_singular(&sem->wait_list))
                        rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+               rwsem_set_owner(sem);
                return true;
        }
 
@@ -270,15 +279,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
  */
 static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 {
-       long old, count = ACCESS_ONCE(sem->count);
+       long old, count = READ_ONCE(sem->count);
 
        while (true) {
                if (!(count == 0 || count == RWSEM_WAITING_BIAS))
                        return false;
 
                old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
-               if (old == count)
+               if (old == count) {
+                       rwsem_set_owner(sem);
                        return true;
+               }
 
                count = old;
        }
@@ -287,60 +298,67 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
 {
        struct task_struct *owner;
-       bool on_cpu = false;
+       bool ret = true;
 
        if (need_resched())
                return false;
 
        rcu_read_lock();
-       owner = ACCESS_ONCE(sem->owner);
-       if (owner)
-               on_cpu = owner->on_cpu;
-       rcu_read_unlock();
-
-       /*
-        * If sem->owner is not set, yet we have just recently entered the
-        * slowpath, then there is a possibility reader(s) may have the lock.
-        * To be safe, avoid spinning in these situations.
-        */
-       return on_cpu;
-}
-
-static inline bool owner_running(struct rw_semaphore *sem,
-                                struct task_struct *owner)
-{
-       if (sem->owner != owner)
-               return false;
-
-       /*
-        * Ensure we emit the owner->on_cpu, dereference _after_ checking
-        * sem->owner still matches owner, if that fails, owner might
-        * point to free()d memory, if it still matches, the rcu_read_lock()
-        * ensures the memory stays valid.
-        */
-       barrier();
+       owner = READ_ONCE(sem->owner);
+       if (!owner) {
+               long count = READ_ONCE(sem->count);
+               /*
+                * If sem->owner is not set, yet we have just recently entered the
+                * slowpath with the lock being active, then there is a possibility
+                * reader(s) may have the lock. To be safe, bail spinning in these
+                * situations.
+                */
+               if (count & RWSEM_ACTIVE_MASK)
+                       ret = false;
+               goto done;
+       }
 
-       return owner->on_cpu;
+       ret = owner->on_cpu;
+done:
+       rcu_read_unlock();
+       return ret;
 }
 
 static noinline
 bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
 {
+       long count;
+
        rcu_read_lock();
-       while (owner_running(sem, owner)) {
-               if (need_resched())
-                       break;
+       while (sem->owner == owner) {
+               /*
+                * Ensure we emit the owner->on_cpu, dereference _after_
+                * checking sem->owner still matches owner, if that fails,
+                * owner might point to free()d memory, if it still matches,
+                * the rcu_read_lock() ensures the memory stays valid.
+                */
+               barrier();
+
+               /* abort spinning when need_resched or owner is not running */
+               if (!owner->on_cpu || need_resched()) {
+                       rcu_read_unlock();
+                       return false;
+               }
 
                cpu_relax_lowlatency();
        }
        rcu_read_unlock();
 
+       if (READ_ONCE(sem->owner))
+               return true; /* new owner, continue spinning */
+
        /*
-        * We break out the loop above on need_resched() or when the
-        * owner changed, which is a sign for heavy contention. Return
-        * success only when sem->owner is NULL.
+        * When the owner is not set, the lock could be free or
+        * held by readers. Check the counter to verify the
+        * state.
         */
-       return sem->owner == NULL;
+       count = READ_ONCE(sem->count);
+       return (count == 0 || count == RWSEM_WAITING_BIAS);
 }
 
 static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
@@ -358,7 +376,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
                goto done;
 
        while (true) {
-               owner = ACCESS_ONCE(sem->owner);
+               owner = READ_ONCE(sem->owner);
                if (owner && !rwsem_spin_on_owner(sem, owner))
                        break;
 
@@ -432,7 +450,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
 
        /* we're now waiting on the lock, but no longer actively locking */
        if (waiting) {
-               count = ACCESS_ONCE(sem->count);
+               count = READ_ONCE(sem->count);
 
                /*
                 * If there were already threads queued before us and there are
index e2d3bc7..205be0c 100644 (file)
@@ -9,29 +9,9 @@
 #include <linux/sched.h>
 #include <linux/export.h>
 #include <linux/rwsem.h>
-
 #include <linux/atomic.h>
 
-#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-       sem->owner = current;
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-       sem->owner = NULL;
-}
-
-#else
-static inline void rwsem_set_owner(struct rw_semaphore *sem)
-{
-}
-
-static inline void rwsem_clear_owner(struct rw_semaphore *sem)
-{
-}
-#endif
+#include "rwsem.h"
 
 /*
  * lock for reading
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
new file mode 100644 (file)
index 0000000..870ed9a
--- /dev/null
@@ -0,0 +1,20 @@
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+       sem->owner = current;
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+       sem->owner = NULL;
+}
+
+#else
+static inline void rwsem_set_owner(struct rw_semaphore *sem)
+{
+}
+
+static inline void rwsem_clear_owner(struct rw_semaphore *sem)
+{
+}
+#endif
index cc93cf6..ec53f59 100644 (file)
@@ -56,7 +56,6 @@
 #include <linux/async.h>
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
-#include <linux/kasan.h>
 #include <linux/jump_label.h>
 #include <linux/pfn.h>
 #include <linux/bsearch.h>
@@ -1814,7 +1813,6 @@ static void unset_module_init_ro_nx(struct module *mod) { }
 void __weak module_memfree(void *module_region)
 {
        vfree(module_region);
-       kasan_module_free(module_region);
 }
 
 void __weak module_arch_cleanup(struct module *mod)
@@ -1867,7 +1865,7 @@ static void free_module(struct module *mod)
        kfree(mod->args);
        percpu_modfree(mod);
 
-       /* Free lock-classes: */
+       /* Free lock-classes; relies on the preceding sync_rcu(). */
        lockdep_free_key_range(mod->module_core, mod->core_size);
 
        /* Finally, free the core (containing the module structure) */
@@ -2481,6 +2479,23 @@ static int elf_header_check(struct load_info *info)
        return 0;
 }
 
+#define COPY_CHUNK_SIZE (16*PAGE_SIZE)
+
+static int copy_chunked_from_user(void *dst, const void __user *usrc, unsigned long len)
+{
+       do {
+               unsigned long n = min(len, COPY_CHUNK_SIZE);
+
+               if (copy_from_user(dst, usrc, n) != 0)
+                       return -EFAULT;
+               cond_resched();
+               dst += n;
+               usrc += n;
+               len -= n;
+       } while (len);
+       return 0;
+}
+
 /* Sets info->hdr and info->len. */
 static int copy_module_from_user(const void __user *umod, unsigned long len,
                                  struct load_info *info)
@@ -2500,7 +2515,7 @@ static int copy_module_from_user(const void __user *umod, unsigned long len,
        if (!info->hdr)
                return -ENOMEM;
 
-       if (copy_from_user(info->hdr, umod, info->len) != 0) {
+       if (copy_chunked_from_user(info->hdr, umod, info->len) != 0) {
                vfree(info->hdr);
                return -EFAULT;
        }
@@ -3351,9 +3366,6 @@ static int load_module(struct load_info *info, const char __user *uargs,
        module_bug_cleanup(mod);
        mutex_unlock(&module_mutex);
 
-       /* Free lock-classes: */
-       lockdep_free_key_range(mod->module_core, mod->core_size);
-
        /* we can't deallocate the module until we clear memory protection */
        unset_module_init_ro_nx(mod);
        unset_module_core_ro_nx(mod);
@@ -3377,6 +3389,9 @@ static int load_module(struct load_info *info, const char __user *uargs,
        synchronize_rcu();
        mutex_unlock(&module_mutex);
  free_module:
+       /* Free lock-classes; relies on the preceding sync_rcu() */
+       lockdep_free_key_range(mod->module_core, mod->core_size);
+
        module_deallocate(mod, info);
  free_copy:
        free_copy(info);
index c24d5a2..5235dd4 100644 (file)
@@ -955,25 +955,6 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
        }
 }
 
-static bool is_nosave_page(unsigned long pfn)
-{
-       struct nosave_region *region;
-
-       list_for_each_entry(region, &nosave_regions, list) {
-               if (pfn >= region->start_pfn && pfn < region->end_pfn) {
-                       pr_err("PM: %#010llx in e820 nosave region: "
-                              "[mem %#010llx-%#010llx]\n",
-                              (unsigned long long) pfn << PAGE_SHIFT,
-                              (unsigned long long) region->start_pfn << PAGE_SHIFT,
-                              ((unsigned long long) region->end_pfn << PAGE_SHIFT)
-                                       - 1);
-                       return true;
-               }
-       }
-
-       return false;
-}
-
 /**
  *     create_basic_memory_bitmaps - create bitmaps needed for marking page
  *     frames that should not be saved and free page frames.  The pointers
@@ -2042,7 +2023,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
        do {
                pfn = memory_bm_next_pfn(bm);
                if (likely(pfn != BM_END_OF_MAP)) {
-                       if (likely(pfn_valid(pfn)) && !is_nosave_page(pfn))
+                       if (likely(pfn_valid(pfn)))
                                swsusp_set_page_free(pfn_to_page(pfn));
                        else
                                return -EFAULT;
index cbd69d8..2ca4a8b 100644 (file)
@@ -3,7 +3,7 @@
 
 struct console_cmdline
 {
-       char    name[8];                        /* Name of the driver       */
+       char    name[16];                       /* Name of the driver       */
        int     index;                          /* Minor dev. to use        */
        char    *options;                       /* Options for the driver   */
 #ifdef CONFIG_A11Y_BRAILLE_CONSOLE
index 01cfd69..bb0635b 100644 (file)
@@ -2464,6 +2464,7 @@ void register_console(struct console *newcon)
        for (i = 0, c = console_cmdline;
             i < MAX_CMDLINECONSOLES && c->name[0];
             i++, c++) {
+               BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name));
                if (strcmp(c->name, newcon->name) != 0)
                        continue;
                if (newcon->index >= 0 &&
index f0f831e..261af7b 100644 (file)
@@ -689,6 +689,23 @@ static inline bool got_nohz_idle_kick(void)
 #ifdef CONFIG_NO_HZ_FULL
 bool sched_can_stop_tick(void)
 {
+       /*
+        * FIFO realtime policy runs the highest priority task. Other runnable
+        * tasks are of a lower priority. The scheduler tick does nothing.
+        */
+       if (current->policy == SCHED_FIFO)
+               return true;
+
+       /*
+        * Round-robin realtime tasks time slice with other tasks at the same
+        * realtime priority. Is this task the only one at this priority?
+        */
+       if (current->policy == SCHED_RR) {
+               struct sched_rt_entity *rt_se = &current->rt;
+
+               return rt_se->run_list.prev == rt_se->run_list.next;
+       }
+
        /*
         * More than one running task need preemption.
         * nr_running update is assumed to be visible
@@ -996,6 +1013,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
                rq_clock_skip_update(rq, true);
 }
 
+static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
+
+void register_task_migration_notifier(struct notifier_block *n)
+{
+       atomic_notifier_chain_register(&task_migration_notifier, n);
+}
+
 #ifdef CONFIG_SMP
 void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 {
@@ -1026,10 +1050,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
        trace_sched_migrate_task(p, new_cpu);
 
        if (task_cpu(p) != new_cpu) {
+               struct task_migration_notifier tmn;
+
                if (p->sched_class->migrate_task_rq)
                        p->sched_class->migrate_task_rq(p, new_cpu);
                p->se.nr_migrations++;
                perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
+
+               tmn.task = p;
+               tmn.from_cpu = task_cpu(p);
+               tmn.to_cpu = new_cpu;
+
+               atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
        }
 
        __set_task_cpu(p, new_cpu);
@@ -3034,6 +3066,8 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
        } else {
                if (dl_prio(oldprio))
                        p->dl.dl_boosted = 0;
+               if (rt_prio(oldprio))
+                       p->rt.timeout = 0;
                p->sched_class = &fair_sched_class;
        }
 
@@ -5318,36 +5352,13 @@ static int sched_cpu_active(struct notifier_block *nfb,
 static int sched_cpu_inactive(struct notifier_block *nfb,
                                        unsigned long action, void *hcpu)
 {
-       unsigned long flags;
-       long cpu = (long)hcpu;
-       struct dl_bw *dl_b;
-
        switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_DOWN_PREPARE:
-               set_cpu_active(cpu, false);
-
-               /* explicitly allow suspend */
-               if (!(action & CPU_TASKS_FROZEN)) {
-                       bool overflow;
-                       int cpus;
-
-                       rcu_read_lock_sched();
-                       dl_b = dl_bw_of(cpu);
-
-                       raw_spin_lock_irqsave(&dl_b->lock, flags);
-                       cpus = dl_bw_cpus(cpu);
-                       overflow = __dl_overflow(dl_b, cpus, 0, 0);
-                       raw_spin_unlock_irqrestore(&dl_b->lock, flags);
-
-                       rcu_read_unlock_sched();
-
-                       if (overflow)
-                               return notifier_from_errno(-EBUSY);
-               }
+               set_cpu_active((long)hcpu, false);
                return NOTIFY_OK;
+       default:
+               return NOTIFY_DONE;
        }
-
-       return NOTIFY_DONE;
 }
 
 static int __init migration_init(void)
@@ -5428,17 +5439,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
                        break;
                }
 
-               /*
-                * Even though we initialize ->capacity to something semi-sane,
-                * we leave capacity_orig unset. This allows us to detect if
-                * domain iteration is still funny without causing /0 traps.
-                */
-               if (!group->sgc->capacity_orig) {
-                       printk(KERN_CONT "\n");
-                       printk(KERN_ERR "ERROR: domain->cpu_capacity not set\n");
-                       break;
-               }
-
                if (!cpumask_weight(sched_group_cpus(group))) {
                        printk(KERN_CONT "\n");
                        printk(KERN_ERR "ERROR: empty group\n");
@@ -5922,7 +5922,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
                 * die on a /0 trap.
                 */
                sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
-               sg->sgc->capacity_orig = sg->sgc->capacity;
 
                /*
                 * Make sure the first group of this domain contains the
@@ -6233,6 +6232,7 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
         */
 
        if (sd->flags & SD_SHARE_CPUCAPACITY) {
+               sd->flags |= SD_PREFER_SIBLING;
                sd->imbalance_pct = 110;
                sd->smt_gain = 1178; /* ~15% */
 
@@ -6998,7 +6998,6 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
                 */
 
        case CPU_ONLINE:
-       case CPU_DOWN_FAILED:
                cpuset_update_active_cpus(true);
                break;
        default:
@@ -7010,8 +7009,30 @@ static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
 static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action,
                               void *hcpu)
 {
-       switch (action) {
+       unsigned long flags;
+       long cpu = (long)hcpu;
+       struct dl_bw *dl_b;
+
+       switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_DOWN_PREPARE:
+               /* explicitly allow suspend */
+               if (!(action & CPU_TASKS_FROZEN)) {
+                       bool overflow;
+                       int cpus;
+
+                       rcu_read_lock_sched();
+                       dl_b = dl_bw_of(cpu);
+
+                       raw_spin_lock_irqsave(&dl_b->lock, flags);
+                       cpus = dl_bw_cpus(cpu);
+                       overflow = __dl_overflow(dl_b, cpus, 0, 0);
+                       raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+                       rcu_read_unlock_sched();
+
+                       if (overflow)
+                               return notifier_from_errno(-EBUSY);
+               }
                cpuset_update_active_cpus(false);
                break;
        case CPU_DOWN_PREPARE_FROZEN:
@@ -7156,8 +7177,8 @@ void __init sched_init(void)
                rq->calc_load_active = 0;
                rq->calc_load_update = jiffies + LOAD_FREQ;
                init_cfs_rq(&rq->cfs);
-               init_rt_rq(&rq->rt, rq);
-               init_dl_rq(&rq->dl, rq);
+               init_rt_rq(&rq->rt);
+               init_dl_rq(&rq->dl);
 #ifdef CONFIG_FAIR_GROUP_SCHED
                root_task_group.shares = ROOT_TASK_GROUP_LOAD;
                INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@ -7197,7 +7218,7 @@ void __init sched_init(void)
 #ifdef CONFIG_SMP
                rq->sd = NULL;
                rq->rd = NULL;
-               rq->cpu_capacity = SCHED_CAPACITY_SCALE;
+               rq->cpu_capacity = rq->cpu_capacity_orig = SCHED_CAPACITY_SCALE;
                rq->post_schedule = 0;
                rq->active_balance = 0;
                rq->next_balance = jiffies;
@@ -7796,7 +7817,7 @@ static int sched_rt_global_constraints(void)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
 
-static int sched_dl_global_constraints(void)
+static int sched_dl_global_validate(void)
 {
        u64 runtime = global_rt_runtime();
        u64 period = global_rt_period();
@@ -7897,11 +7918,11 @@ int sched_rt_handler(struct ctl_table *table, int write,
                if (ret)
                        goto undo;
 
-               ret = sched_rt_global_constraints();
+               ret = sched_dl_global_validate();
                if (ret)
                        goto undo;
 
-               ret = sched_dl_global_constraints();
+               ret = sched_rt_global_constraints();
                if (ret)
                        goto undo;
 
index 3fa8fa6..5e95145 100644 (file)
@@ -69,7 +69,7 @@ void init_dl_bw(struct dl_bw *dl_b)
        dl_b->total_bw = 0;
 }
 
-void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq)
+void init_dl_rq(struct dl_rq *dl_rq)
 {
        dl_rq->rb_root = RB_ROOT;
 
@@ -218,6 +218,52 @@ static inline void set_post_schedule(struct rq *rq)
        rq->post_schedule = has_pushable_dl_tasks(rq);
 }
 
+static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq);
+
+static void dl_task_offline_migration(struct rq *rq, struct task_struct *p)
+{
+       struct rq *later_rq = NULL;
+       bool fallback = false;
+
+       later_rq = find_lock_later_rq(p, rq);
+
+       if (!later_rq) {
+               int cpu;
+
+               /*
+                * If we cannot preempt any rq, fall back to pick any
+                * online cpu.
+                */
+               fallback = true;
+               cpu = cpumask_any_and(cpu_active_mask, tsk_cpus_allowed(p));
+               if (cpu >= nr_cpu_ids) {
+                       /*
+                        * Fail to find any suitable cpu.
+                        * The task will never come back!
+                        */
+                       BUG_ON(dl_bandwidth_enabled());
+
+                       /*
+                        * If admission control is disabled we
+                        * try a little harder to let the task
+                        * run.
+                        */
+                       cpu = cpumask_any(cpu_active_mask);
+               }
+               later_rq = cpu_rq(cpu);
+               double_lock_balance(rq, later_rq);
+       }
+
+       deactivate_task(rq, p, 0);
+       set_task_cpu(p, later_rq->cpu);
+       activate_task(later_rq, p, ENQUEUE_REPLENISH);
+
+       if (!fallback)
+               resched_curr(later_rq);
+
+       double_unlock_balance(rq, later_rq);
+}
+
 #else
 
 static inline
@@ -514,7 +560,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
        unsigned long flags;
        struct rq *rq;
 
-       rq = task_rq_lock(current, &flags);
+       rq = task_rq_lock(p, &flags);
 
        /*
         * We need to take care of several possible races here:
@@ -536,6 +582,17 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
        sched_clock_tick();
        update_rq_clock(rq);
 
+#ifdef CONFIG_SMP
+       /*
+        * If we find that the rq the task was on is no longer
+        * available, we need to select a new rq.
+        */
+       if (unlikely(!rq->online)) {
+               dl_task_offline_migration(rq, p);
+               goto unlock;
+       }
+#endif
+
        /*
         * If the throttle happened during sched-out; like:
         *
@@ -569,7 +626,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer)
                push_dl_task(rq);
 #endif
 unlock:
-       task_rq_unlock(rq, current, &flags);
+       task_rq_unlock(rq, p, &flags);
 
        return HRTIMER_NORESTART;
 }
@@ -914,6 +971,12 @@ static void yield_task_dl(struct rq *rq)
        }
        update_rq_clock(rq);
        update_curr_dl(rq);
+       /*
+        * Tell update_rq_clock() that we've just updated,
+        * so we don't do microscopic update in schedule()
+        * and double the fastpath cost.
+        */
+       rq_clock_skip_update(rq, true);
 }
 
 #ifdef CONFIG_SMP
@@ -1659,14 +1722,6 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
 {
        int check_resched = 1;
 
-       /*
-        * If p is throttled, don't consider the possibility
-        * of preempting rq->curr, the check will be done right
-        * after its runtime will get replenished.
-        */
-       if (unlikely(p->dl.dl_throttled))
-               return;
-
        if (task_on_rq_queued(p) && rq->curr != p) {
 #ifdef CONFIG_SMP
                if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
index 8baaf85..a245c1f 100644 (file)
@@ -71,7 +71,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
        if (!se) {
                struct sched_avg *avg = &cpu_rq(cpu)->avg;
                P(avg->runnable_avg_sum);
-               P(avg->runnable_avg_period);
+               P(avg->avg_period);
                return;
        }
 
@@ -94,8 +94,10 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
        P(se->load.weight);
 #ifdef CONFIG_SMP
        P(se->avg.runnable_avg_sum);
-       P(se->avg.runnable_avg_period);
+       P(se->avg.running_avg_sum);
+       P(se->avg.avg_period);
        P(se->avg.load_avg_contrib);
+       P(se->avg.utilization_avg_contrib);
        P(se->avg.decay_count);
 #endif
 #undef PN
@@ -214,6 +216,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
                        cfs_rq->runnable_load_avg);
        SEQ_printf(m, "  .%-30s: %ld\n", "blocked_load_avg",
                        cfs_rq->blocked_load_avg);
+       SEQ_printf(m, "  .%-30s: %ld\n", "utilization_load_avg",
+                       cfs_rq->utilization_load_avg);
 #ifdef CONFIG_FAIR_GROUP_SCHED
        SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_contrib",
                        cfs_rq->tg_load_contrib);
@@ -636,8 +640,10 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        P(se.load.weight);
 #ifdef CONFIG_SMP
        P(se.avg.runnable_avg_sum);
-       P(se.avg.runnable_avg_period);
+       P(se.avg.running_avg_sum);
+       P(se.avg.avg_period);
        P(se.avg.load_avg_contrib);
+       P(se.avg.utilization_avg_contrib);
        P(se.avg.decay_count);
 #endif
        P(policy);
index 7ce18f3..ffeaa41 100644 (file)
@@ -670,6 +670,7 @@ static int select_idle_sibling(struct task_struct *p, int cpu);
 static unsigned long task_h_load(struct task_struct *p);
 
 static inline void __update_task_entity_contrib(struct sched_entity *se);
+static inline void __update_task_entity_utilization(struct sched_entity *se);
 
 /* Give new task start runnable values to heavy its load in infant time */
 void init_task_runnable_average(struct task_struct *p)
@@ -677,9 +678,10 @@ void init_task_runnable_average(struct task_struct *p)
        u32 slice;
 
        slice = sched_slice(task_cfs_rq(p), &p->se) >> 10;
-       p->se.avg.runnable_avg_sum = slice;
-       p->se.avg.runnable_avg_period = slice;
+       p->se.avg.runnable_avg_sum = p->se.avg.running_avg_sum = slice;
+       p->se.avg.avg_period = slice;
        __update_task_entity_contrib(&p->se);
+       __update_task_entity_utilization(&p->se);
 }
 #else
 void init_task_runnable_average(struct task_struct *p)
@@ -1196,9 +1198,11 @@ static void task_numa_assign(struct task_numa_env *env,
 static bool load_too_imbalanced(long src_load, long dst_load,
                                struct task_numa_env *env)
 {
-       long imb, old_imb;
-       long orig_src_load, orig_dst_load;
        long src_capacity, dst_capacity;
+       long orig_src_load;
+       long load_a, load_b;
+       long moved_load;
+       long imb;
 
        /*
         * The load is corrected for the CPU capacity available on each node.
@@ -1211,30 +1215,39 @@ static bool load_too_imbalanced(long src_load, long dst_load,
        dst_capacity = env->dst_stats.compute_capacity;
 
        /* We care about the slope of the imbalance, not the direction. */
-       if (dst_load < src_load)
-               swap(dst_load, src_load);
+       load_a = dst_load;
+       load_b = src_load;
+       if (load_a < load_b)
+               swap(load_a, load_b);
 
        /* Is the difference below the threshold? */
-       imb = dst_load * src_capacity * 100 -
-             src_load * dst_capacity * env->imbalance_pct;
+       imb = load_a * src_capacity * 100 -
+               load_b * dst_capacity * env->imbalance_pct;
        if (imb <= 0)
                return false;
 
        /*
         * The imbalance is above the allowed threshold.
-        * Compare it with the old imbalance.
+        * Allow a move that brings us closer to a balanced situation,
+        * without moving things past the point of balance.
         */
        orig_src_load = env->src_stats.load;
-       orig_dst_load = env->dst_stats.load;
 
-       if (orig_dst_load < orig_src_load)
-               swap(orig_dst_load, orig_src_load);
-
-       old_imb = orig_dst_load * src_capacity * 100 -
-                 orig_src_load * dst_capacity * env->imbalance_pct;
+       /*
+        * In a task swap, there will be one load moving from src to dst,
+        * and another moving back. This is the net sum of both moves.
+        * A simple task move will always have a positive value.
+        * Allow the move if it brings the system closer to a balanced
+        * situation, without crossing over the balance point.
+        */
+       moved_load = orig_src_load - src_load;
 
-       /* Would this change make things worse? */
-       return (imb > old_imb);
+       if (moved_load > 0)
+               /* Moving src -> dst. Did we overshoot balance? */
+               return src_load * dst_capacity < dst_load * src_capacity;
+       else
+               /* Moving dst -> src. Did we overshoot balance? */
+               return dst_load * src_capacity < src_load * dst_capacity;
 }
 
 /*
@@ -1609,9 +1622,11 @@ static void update_task_scan_period(struct task_struct *p,
        /*
         * If there were no record hinting faults then either the task is
         * completely idle or all activity is areas that are not of interest
-        * to automatic numa balancing. Scan slower
+        * to automatic numa balancing. Related to that, if there were failed
+        * migration then it implies we are migrating too quickly or the local
+        * node is overloaded. In either case, scan slower
         */
-       if (local + shared == 0) {
+       if (local + shared == 0 || p->numa_faults_locality[2]) {
                p->numa_scan_period = min(p->numa_scan_period_max,
                        p->numa_scan_period << 1);
 
@@ -1673,7 +1688,7 @@ static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
                *period = now - p->last_task_numa_placement;
        } else {
                delta = p->se.avg.runnable_avg_sum;
-               *period = p->se.avg.runnable_avg_period;
+               *period = p->se.avg.avg_period;
        }
 
        p->last_sum_exec_runtime = runtime;
@@ -1763,6 +1778,8 @@ static int preferred_group_nid(struct task_struct *p, int nid)
                        }
                }
                /* Next round, evaluate the nodes within max_group. */
+               if (!max_faults)
+                       break;
                nodes = max_group;
        }
        return nid;
@@ -2080,6 +2097,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 
        if (migrated)
                p->numa_pages_migrated += pages;
+       if (flags & TNF_MIGRATE_FAIL)
+               p->numa_faults_locality[2] += pages;
 
        p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages;
        p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;
@@ -2161,8 +2180,10 @@ void task_numa_work(struct callback_head *work)
                vma = mm->mmap;
        }
        for (; vma; vma = vma->vm_next) {
-               if (!vma_migratable(vma) || !vma_policy_mof(vma))
+               if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
+                       is_vm_hugetlb_page(vma)) {
                        continue;
+               }
 
                /*
                 * Shared library pages mapped by multiple processes are not
@@ -2497,13 +2518,15 @@ static u32 __compute_runnable_contrib(u64 n)
  *   load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... )
  *            = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}]
  */
-static __always_inline int __update_entity_runnable_avg(u64 now,
+static __always_inline int __update_entity_runnable_avg(u64 now, int cpu,
                                                        struct sched_avg *sa,
-                                                       int runnable)
+                                                       int runnable,
+                                                       int running)
 {
        u64 delta, periods;
        u32 runnable_contrib;
        int delta_w, decayed = 0;
+       unsigned long scale_freq = arch_scale_freq_capacity(NULL, cpu);
 
        delta = now - sa->last_runnable_update;
        /*
@@ -2525,7 +2548,7 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
        sa->last_runnable_update = now;
 
        /* delta_w is the amount already accumulated against our next period */
-       delta_w = sa->runnable_avg_period % 1024;
+       delta_w = sa->avg_period % 1024;
        if (delta + delta_w >= 1024) {
                /* period roll-over */
                decayed = 1;
@@ -2538,7 +2561,10 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
                delta_w = 1024 - delta_w;
                if (runnable)
                        sa->runnable_avg_sum += delta_w;
-               sa->runnable_avg_period += delta_w;
+               if (running)
+                       sa->running_avg_sum += delta_w * scale_freq
+                               >> SCHED_CAPACITY_SHIFT;
+               sa->avg_period += delta_w;
 
                delta -= delta_w;
 
@@ -2548,20 +2574,28 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
 
                sa->runnable_avg_sum = decay_load(sa->runnable_avg_sum,
                                                  periods + 1);
-               sa->runnable_avg_period = decay_load(sa->runnable_avg_period,
+               sa->running_avg_sum = decay_load(sa->running_avg_sum,
+                                                 periods + 1);
+               sa->avg_period = decay_load(sa->avg_period,
                                                     periods + 1);
 
                /* Efficiently calculate \sum (1..n_period) 1024*y^i */
                runnable_contrib = __compute_runnable_contrib(periods);
                if (runnable)
                        sa->runnable_avg_sum += runnable_contrib;
-               sa->runnable_avg_period += runnable_contrib;
+               if (running)
+                       sa->running_avg_sum += runnable_contrib * scale_freq
+                               >> SCHED_CAPACITY_SHIFT;
+               sa->avg_period += runnable_contrib;
        }
 
        /* Remainder of delta accrued against u_0` */
        if (runnable)
                sa->runnable_avg_sum += delta;
-       sa->runnable_avg_period += delta;
+       if (running)
+               sa->running_avg_sum += delta * scale_freq
+                       >> SCHED_CAPACITY_SHIFT;
+       sa->avg_period += delta;
 
        return decayed;
 }
@@ -2578,6 +2612,8 @@ static inline u64 __synchronize_entity_decay(struct sched_entity *se)
                return 0;
 
        se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
+       se->avg.utilization_avg_contrib =
+               decay_load(se->avg.utilization_avg_contrib, decays);
 
        return decays;
 }
@@ -2613,7 +2649,7 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa,
 
        /* The fraction of a cpu used by this cfs_rq */
        contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT,
-                         sa->runnable_avg_period + 1);
+                         sa->avg_period + 1);
        contrib -= cfs_rq->tg_runnable_contrib;
 
        if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
@@ -2666,7 +2702,8 @@ static inline void __update_group_entity_contrib(struct sched_entity *se)
 
 static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
 {
-       __update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable);
+       __update_entity_runnable_avg(rq_clock_task(rq), cpu_of(rq), &rq->avg,
+                       runnable, runnable);
        __update_tg_runnable_avg(&rq->avg, &rq->cfs);
 }
 #else /* CONFIG_FAIR_GROUP_SCHED */
@@ -2684,7 +2721,7 @@ static inline void __update_task_entity_contrib(struct sched_entity *se)
 
        /* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */
        contrib = se->avg.runnable_avg_sum * scale_load_down(se->load.weight);
-       contrib /= (se->avg.runnable_avg_period + 1);
+       contrib /= (se->avg.avg_period + 1);
        se->avg.load_avg_contrib = scale_load(contrib);
 }
 
@@ -2703,6 +2740,30 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
        return se->avg.load_avg_contrib - old_contrib;
 }
 
+
+static inline void __update_task_entity_utilization(struct sched_entity *se)
+{
+       u32 contrib;
+
+       /* avoid overflowing a 32-bit type w/ SCHED_LOAD_SCALE */
+       contrib = se->avg.running_avg_sum * scale_load_down(SCHED_LOAD_SCALE);
+       contrib /= (se->avg.avg_period + 1);
+       se->avg.utilization_avg_contrib = scale_load(contrib);
+}
+
+static long __update_entity_utilization_avg_contrib(struct sched_entity *se)
+{
+       long old_contrib = se->avg.utilization_avg_contrib;
+
+       if (entity_is_task(se))
+               __update_task_entity_utilization(se);
+       else
+               se->avg.utilization_avg_contrib =
+                                       group_cfs_rq(se)->utilization_load_avg;
+
+       return se->avg.utilization_avg_contrib - old_contrib;
+}
+
 static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
                                                 long load_contrib)
 {
@@ -2719,7 +2780,8 @@ static inline void update_entity_load_avg(struct sched_entity *se,
                                          int update_cfs_rq)
 {
        struct cfs_rq *cfs_rq = cfs_rq_of(se);
-       long contrib_delta;
+       long contrib_delta, utilization_delta;
+       int cpu = cpu_of(rq_of(cfs_rq));
        u64 now;
 
        /*
@@ -2731,18 +2793,22 @@ static inline void update_entity_load_avg(struct sched_entity *se,
        else
                now = cfs_rq_clock_task(group_cfs_rq(se));
 
-       if (!__update_entity_runnable_avg(now, &se->avg, se->on_rq))
+       if (!__update_entity_runnable_avg(now, cpu, &se->avg, se->on_rq,
+                                       cfs_rq->curr == se))
                return;
 
        contrib_delta = __update_entity_load_avg_contrib(se);
+       utilization_delta = __update_entity_utilization_avg_contrib(se);
 
        if (!update_cfs_rq)
                return;
 
-       if (se->on_rq)
+       if (se->on_rq) {
                cfs_rq->runnable_load_avg += contrib_delta;
-       else
+               cfs_rq->utilization_load_avg += utilization_delta;
+       } else {
                subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
+       }
 }
 
 /*
@@ -2817,6 +2883,7 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
        }
 
        cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
+       cfs_rq->utilization_load_avg += se->avg.utilization_avg_contrib;
        /* we force update consideration on load-balancer moves */
        update_cfs_rq_blocked_load(cfs_rq, !wakeup);
 }
@@ -2835,6 +2902,7 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
        update_cfs_rq_blocked_load(cfs_rq, !sleep);
 
        cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
+       cfs_rq->utilization_load_avg -= se->avg.utilization_avg_contrib;
        if (sleep) {
                cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
                se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
@@ -3172,6 +3240,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
                 */
                update_stats_wait_end(cfs_rq, se);
                __dequeue_entity(cfs_rq, se);
+               update_entity_load_avg(se, 1);
        }
 
        update_stats_curr_start(cfs_rq, se);
@@ -4298,6 +4367,11 @@ static unsigned long capacity_of(int cpu)
        return cpu_rq(cpu)->cpu_capacity;
 }
 
+static unsigned long capacity_orig_of(int cpu)
+{
+       return cpu_rq(cpu)->cpu_capacity_orig;
+}
+
 static unsigned long cpu_avg_load_per_task(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
@@ -4711,6 +4785,33 @@ next:
 done:
        return target;
 }
+/*
+ * get_cpu_usage returns the amount of capacity of a CPU that is used by CFS
+ * tasks. The unit of the return value must be the one of capacity so we can
+ * compare the usage with the capacity of the CPU that is available for CFS
+ * task (ie cpu_capacity).
+ * cfs.utilization_load_avg is the sum of running time of runnable tasks on a
+ * CPU. It represents the amount of utilization of a CPU in the range
+ * [0..SCHED_LOAD_SCALE].  The usage of a CPU can't be higher than the full
+ * capacity of the CPU because it's about the running time on this CPU.
+ * Nevertheless, cfs.utilization_load_avg can be higher than SCHED_LOAD_SCALE
+ * because of unfortunate rounding in avg_period and running_load_avg or just
+ * after migrating tasks until the average stabilizes with the new running
+ * time. So we need to check that the usage stays into the range
+ * [0..cpu_capacity_orig] and cap if necessary.
+ * Without capping the usage, a group could be seen as overloaded (CPU0 usage
+ * at 121% + CPU1 usage at 80%) whereas CPU1 has 20% of available capacity
+ */
+static int get_cpu_usage(int cpu)
+{
+       unsigned long usage = cpu_rq(cpu)->cfs.utilization_load_avg;
+       unsigned long capacity = capacity_orig_of(cpu);
+
+       if (usage >= SCHED_LOAD_SCALE)
+               return capacity;
+
+       return (usage * capacity) >> SCHED_LOAD_SHIFT;
+}
 
 /*
  * select_task_rq_fair: Select target runqueue for the waking task in domains
@@ -5837,12 +5938,12 @@ struct sg_lb_stats {
        unsigned long sum_weighted_load; /* Weighted load of group's tasks */
        unsigned long load_per_task;
        unsigned long group_capacity;
+       unsigned long group_usage; /* Total usage of the group */
        unsigned int sum_nr_running; /* Nr tasks running in the group */
-       unsigned int group_capacity_factor;
        unsigned int idle_cpus;
        unsigned int group_weight;
        enum group_type group_type;
-       int group_has_free_capacity;
+       int group_no_capacity;
 #ifdef CONFIG_NUMA_BALANCING
        unsigned int nr_numa_running;
        unsigned int nr_preferred_running;
@@ -5913,16 +6014,6 @@ static inline int get_sd_load_idx(struct sched_domain *sd,
        return load_idx;
 }
 
-static unsigned long default_scale_capacity(struct sched_domain *sd, int cpu)
-{
-       return SCHED_CAPACITY_SCALE;
-}
-
-unsigned long __weak arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
-{
-       return default_scale_capacity(sd, cpu);
-}
-
 static unsigned long default_scale_cpu_capacity(struct sched_domain *sd, int cpu)
 {
        if ((sd->flags & SD_SHARE_CPUCAPACITY) && (sd->span_weight > 1))
@@ -5939,7 +6030,7 @@ unsigned long __weak arch_scale_cpu_capacity(struct sched_domain *sd, int cpu)
 static unsigned long scale_rt_capacity(int cpu)
 {
        struct rq *rq = cpu_rq(cpu);
-       u64 total, available, age_stamp, avg;
+       u64 total, used, age_stamp, avg;
        s64 delta;
 
        /*
@@ -5955,19 +6046,12 @@ static unsigned long scale_rt_capacity(int cpu)
 
        total = sched_avg_period() + delta;
 
-       if (unlikely(total < avg)) {
-               /* Ensures that capacity won't end up being negative */
-               available = 0;
-       } else {
-               available = total - avg;
-       }
-
-       if (unlikely((s64)total < SCHED_CAPACITY_SCALE))
-               total = SCHED_CAPACITY_SCALE;
+       used = div_u64(avg, total);
 
-       total >>= SCHED_CAPACITY_SHIFT;
+       if (likely(used < SCHED_CAPACITY_SCALE))
+               return SCHED_CAPACITY_SCALE - used;
 
-       return div_u64(available, total);
+       return 1;
 }
 
 static void update_cpu_capacity(struct sched_domain *sd, int cpu)
@@ -5982,14 +6066,7 @@ static void update_cpu_capacity(struct sched_domain *sd, int cpu)
 
        capacity >>= SCHED_CAPACITY_SHIFT;
 
-       sdg->sgc->capacity_orig = capacity;
-
-       if (sched_feat(ARCH_CAPACITY))
-               capacity *= arch_scale_freq_capacity(sd, cpu);
-       else
-               capacity *= default_scale_capacity(sd, cpu);
-
-       capacity >>= SCHED_CAPACITY_SHIFT;
+       cpu_rq(cpu)->cpu_capacity_orig = capacity;
 
        capacity *= scale_rt_capacity(cpu);
        capacity >>= SCHED_CAPACITY_SHIFT;
@@ -6005,7 +6082,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 {
        struct sched_domain *child = sd->child;
        struct sched_group *group, *sdg = sd->groups;
-       unsigned long capacity, capacity_orig;
+       unsigned long capacity;
        unsigned long interval;
 
        interval = msecs_to_jiffies(sd->balance_interval);
@@ -6017,7 +6094,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
                return;
        }
 
-       capacity_orig = capacity = 0;
+       capacity = 0;
 
        if (child->flags & SD_OVERLAP) {
                /*
@@ -6037,19 +6114,15 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
                         * Use capacity_of(), which is set irrespective of domains
                         * in update_cpu_capacity().
                         *
-                        * This avoids capacity/capacity_orig from being 0 and
+                        * This avoids capacity from being 0 and
                         * causing divide-by-zero issues on boot.
-                        *
-                        * Runtime updates will correct capacity_orig.
                         */
                        if (unlikely(!rq->sd)) {
-                               capacity_orig += capacity_of(cpu);
                                capacity += capacity_of(cpu);
                                continue;
                        }
 
                        sgc = rq->sd->groups->sgc;
-                       capacity_orig += sgc->capacity_orig;
                        capacity += sgc->capacity;
                }
        } else  {
@@ -6060,39 +6133,24 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
 
                group = child->groups;
                do {
-                       capacity_orig += group->sgc->capacity_orig;
                        capacity += group->sgc->capacity;
                        group = group->next;
                } while (group != child->groups);
        }
 
-       sdg->sgc->capacity_orig = capacity_orig;
        sdg->sgc->capacity = capacity;
 }
 
 /*
- * Try and fix up capacity for tiny siblings, this is needed when
- * things like SD_ASYM_PACKING need f_b_g to select another sibling
- * which on its own isn't powerful enough.
- *
- * See update_sd_pick_busiest() and check_asym_packing().
+ * Check whether the capacity of the rq has been noticeably reduced by side
+ * activity. The imbalance_pct is used for the threshold.
+ * Return true is the capacity is reduced
  */
 static inline int
-fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
+check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
 {
-       /*
-        * Only siblings can have significantly less than SCHED_CAPACITY_SCALE
-        */
-       if (!(sd->flags & SD_SHARE_CPUCAPACITY))
-               return 0;
-
-       /*
-        * If ~90% of the cpu_capacity is still there, we're good.
-        */
-       if (group->sgc->capacity * 32 > group->sgc->capacity_orig * 29)
-               return 1;
-
-       return 0;
+       return ((rq->cpu_capacity * sd->imbalance_pct) <
+                               (rq->cpu_capacity_orig * 100));
 }
 
 /*
@@ -6130,37 +6188,56 @@ static inline int sg_imbalanced(struct sched_group *group)
 }
 
 /*
- * Compute the group capacity factor.
- *
- * Avoid the issue where N*frac(smt_capacity) >= 1 creates 'phantom' cores by
- * first dividing out the smt factor and computing the actual number of cores
- * and limit unit capacity with that.
+ * group_has_capacity returns true if the group has spare capacity that could
+ * be used by some tasks.
+ * We consider that a group has spare capacity if the  * number of task is
+ * smaller than the number of CPUs or if the usage is lower than the available
+ * capacity for CFS tasks.
+ * For the latter, we use a threshold to stabilize the state, to take into
+ * account the variance of the tasks' load and to return true if the available
+ * capacity in meaningful for the load balancer.
+ * As an example, an available capacity of 1% can appear but it doesn't make
+ * any benefit for the load balance.
  */
-static inline int sg_capacity_factor(struct lb_env *env, struct sched_group *group)
+static inline bool
+group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
 {
-       unsigned int capacity_factor, smt, cpus;
-       unsigned int capacity, capacity_orig;
+       if (sgs->sum_nr_running < sgs->group_weight)
+               return true;
 
-       capacity = group->sgc->capacity;
-       capacity_orig = group->sgc->capacity_orig;
-       cpus = group->group_weight;
+       if ((sgs->group_capacity * 100) >
+                       (sgs->group_usage * env->sd->imbalance_pct))
+               return true;
+
+       return false;
+}
 
-       /* smt := ceil(cpus / capacity), assumes: 1 < smt_capacity < 2 */
-       smt = DIV_ROUND_UP(SCHED_CAPACITY_SCALE * cpus, capacity_orig);
-       capacity_factor = cpus / smt; /* cores */
+/*
+ *  group_is_overloaded returns true if the group has more tasks than it can
+ *  handle.
+ *  group_is_overloaded is not equals to !group_has_capacity because a group
+ *  with the exact right number of tasks, has no more spare capacity but is not
+ *  overloaded so both group_has_capacity and group_is_overloaded return
+ *  false.
+ */
+static inline bool
+group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
+{
+       if (sgs->sum_nr_running <= sgs->group_weight)
+               return false;
 
-       capacity_factor = min_t(unsigned,
-               capacity_factor, DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE));
-       if (!capacity_factor)
-               capacity_factor = fix_small_capacity(env->sd, group);
+       if ((sgs->group_capacity * 100) <
+                       (sgs->group_usage * env->sd->imbalance_pct))
+               return true;
 
-       return capacity_factor;
+       return false;
 }
 
-static enum group_type
-group_classify(struct sched_group *group, struct sg_lb_stats *sgs)
+static enum group_type group_classify(struct lb_env *env,
+               struct sched_group *group,
+               struct sg_lb_stats *sgs)
 {
-       if (sgs->sum_nr_running > sgs->group_capacity_factor)
+       if (sgs->group_no_capacity)
                return group_overloaded;
 
        if (sg_imbalanced(group))
@@ -6198,6 +6275,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                        load = source_load(i, load_idx);
 
                sgs->group_load += load;
+               sgs->group_usage += get_cpu_usage(i);
                sgs->sum_nr_running += rq->cfs.h_nr_running;
 
                if (rq->nr_running > 1)
@@ -6220,11 +6298,9 @@ static inline void update_sg_lb_stats(struct lb_env *env,
                sgs->load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running;
 
        sgs->group_weight = group->group_weight;
-       sgs->group_capacity_factor = sg_capacity_factor(env, group);
-       sgs->group_type = group_classify(group, sgs);
 
-       if (sgs->group_capacity_factor > sgs->sum_nr_running)
-               sgs->group_has_free_capacity = 1;
+       sgs->group_no_capacity = group_is_overloaded(env, sgs);
+       sgs->group_type = group_classify(env, group, sgs);
 }
 
 /**
@@ -6346,18 +6422,19 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 
                /*
                 * In case the child domain prefers tasks go to siblings
-                * first, lower the sg capacity factor to one so that we'll try
+                * first, lower the sg capacity so that we'll try
                 * and move all the excess tasks away. We lower the capacity
                 * of a group only if the local group has the capacity to fit
-                * these excess tasks, i.e. nr_running < group_capacity_factor. The
-                * extra check prevents the case where you always pull from the
-                * heaviest group when it is already under-utilized (possible
-                * with a large weight task outweighs the tasks on the system).
+                * these excess tasks. The extra check prevents the case where
+                * you always pull from the heaviest group when it is already
+                * under-utilized (possible with a large weight task outweighs
+                * the tasks on the system).
                 */
                if (prefer_sibling && sds->local &&
-                   sds->local_stat.group_has_free_capacity) {
-                       sgs->group_capacity_factor = min(sgs->group_capacity_factor, 1U);
-                       sgs->group_type = group_classify(sg, sgs);
+                   group_has_capacity(env, &sds->local_stat) &&
+                   (sgs->sum_nr_running > 1)) {
+                       sgs->group_no_capacity = 1;
+                       sgs->group_type = group_overloaded;
                }
 
                if (update_sd_pick_busiest(env, sds, sg, sgs)) {
@@ -6537,11 +6614,12 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
         */
        if (busiest->group_type == group_overloaded &&
            local->group_type   == group_overloaded) {
-               load_above_capacity =
-                       (busiest->sum_nr_running - busiest->group_capacity_factor);
-
-               load_above_capacity *= (SCHED_LOAD_SCALE * SCHED_CAPACITY_SCALE);
-               load_above_capacity /= busiest->group_capacity;
+               load_above_capacity = busiest->sum_nr_running *
+                                       SCHED_LOAD_SCALE;
+               if (load_above_capacity > busiest->group_capacity)
+                       load_above_capacity -= busiest->group_capacity;
+               else
+                       load_above_capacity = ~0UL;
        }
 
        /*
@@ -6604,6 +6682,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
        local = &sds.local_stat;
        busiest = &sds.busiest_stat;
 
+       /* ASYM feature bypasses nice load balance check */
        if ((env->idle == CPU_IDLE || env->idle == CPU_NEWLY_IDLE) &&
            check_asym_packing(env, &sds))
                return sds.busiest;
@@ -6624,8 +6703,8 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
                goto force_balance;
 
        /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */
-       if (env->idle == CPU_NEWLY_IDLE && local->group_has_free_capacity &&
-           !busiest->group_has_free_capacity)
+       if (env->idle == CPU_NEWLY_IDLE && group_has_capacity(env, local) &&
+           busiest->group_no_capacity)
                goto force_balance;
 
        /*
@@ -6684,7 +6763,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
        int i;
 
        for_each_cpu_and(i, sched_group_cpus(group), env->cpus) {
-               unsigned long capacity, capacity_factor, wl;
+               unsigned long capacity, wl;
                enum fbq_type rt;
 
                rq = cpu_rq(i);
@@ -6713,9 +6792,6 @@ static struct rq *find_busiest_queue(struct lb_env *env,
                        continue;
 
                capacity = capacity_of(i);
-               capacity_factor = DIV_ROUND_CLOSEST(capacity, SCHED_CAPACITY_SCALE);
-               if (!capacity_factor)
-                       capacity_factor = fix_small_capacity(env->sd, group);
 
                wl = weighted_cpuload(i);
 
@@ -6723,7 +6799,9 @@ static struct rq *find_busiest_queue(struct lb_env *env,
                 * When comparing with imbalance, use weighted_cpuload()
                 * which is not scaled with the cpu capacity.
                 */
-               if (capacity_factor && rq->nr_running == 1 && wl > env->imbalance)
+
+               if (rq->nr_running == 1 && wl > env->imbalance &&
+                   !check_cpu_capacity(rq, env->sd))
                        continue;
 
                /*
@@ -6771,6 +6849,19 @@ static int need_active_balance(struct lb_env *env)
                        return 1;
        }
 
+       /*
+        * The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
+        * It's worth migrating the task if the src_cpu's capacity is reduced
+        * because of other sched_class or IRQs if more capacity stays
+        * available on dst_cpu.
+        */
+       if ((env->idle != CPU_NOT_IDLE) &&
+           (env->src_rq->cfs.h_nr_running == 1)) {
+               if ((check_cpu_capacity(env->src_rq, sd)) &&
+                   (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
+                       return 1;
+       }
+
        return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 
@@ -6870,6 +6961,9 @@ redo:
 
        schedstat_add(sd, lb_imbalance[idle], env.imbalance);
 
+       env.src_cpu = busiest->cpu;
+       env.src_rq = busiest;
+
        ld_moved = 0;
        if (busiest->nr_running > 1) {
                /*
@@ -6879,8 +6973,6 @@ redo:
                 * correctly treated as an imbalance.
                 */
                env.flags |= LBF_ALL_PINNED;
-               env.src_cpu   = busiest->cpu;
-               env.src_rq    = busiest;
                env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
 
 more_balance:
@@ -7580,22 +7672,25 @@ end:
 
 /*
  * Current heuristic for kicking the idle load balancer in the presence
- * of an idle cpu is the system.
+ * of an idle cpu in the system.
  *   - This rq has more than one task.
- *   - At any scheduler domain level, this cpu's scheduler group has multiple
- *     busy cpu's exceeding the group's capacity.
+ *   - This rq has at least one CFS task and the capacity of the CPU is
+ *     significantly reduced because of RT tasks or IRQs.
+ *   - At parent of LLC scheduler domain level, this cpu's scheduler group has
+ *     multiple busy cpu.
  *   - For SD_ASYM_PACKING, if the lower numbered cpu's in the scheduler
  *     domain span are idle.
  */
-static inline int nohz_kick_needed(struct rq *rq)
+static inline bool nohz_kick_needed(struct rq *rq)
 {
        unsigned long now = jiffies;
        struct sched_domain *sd;
        struct sched_group_capacity *sgc;
        int nr_busy, cpu = rq->cpu;
+       bool kick = false;
 
        if (unlikely(rq->idle_balance))
-               return 0;
+               return false;
 
        /*
        * We may be recently in ticked or tickless idle mode. At the first
@@ -7609,38 +7704,46 @@ static inline int nohz_kick_needed(struct rq *rq)
         * balancing.
         */
        if (likely(!atomic_read(&nohz.nr_cpus)))
-               return 0;
+               return false;
 
        if (time_before(now, nohz.next_balance))
-               return 0;
+               return false;
 
        if (rq->nr_running >= 2)
-               goto need_kick;
+               return true;
 
        rcu_read_lock();
        sd = rcu_dereference(per_cpu(sd_busy, cpu));
-
        if (sd) {
                sgc = sd->groups->sgc;
                nr_busy = atomic_read(&sgc->nr_busy_cpus);
 
-               if (nr_busy > 1)
-                       goto need_kick_unlock;
+               if (nr_busy > 1) {
+                       kick = true;
+                       goto unlock;
+               }
+
        }
 
-       sd = rcu_dereference(per_cpu(sd_asym, cpu));
+       sd = rcu_dereference(rq->sd);
+       if (sd) {
+               if ((rq->cfs.h_nr_running >= 1) &&
+                               check_cpu_capacity(rq, sd)) {
+                       kick = true;
+                       goto unlock;
+               }
+       }
 
+       sd = rcu_dereference(per_cpu(sd_asym, cpu));
        if (sd && (cpumask_first_and(nohz.idle_cpus_mask,
-                                 sched_domain_span(sd)) < cpu))
-               goto need_kick_unlock;
-
-       rcu_read_unlock();
-       return 0;
+                                 sched_domain_span(sd)) < cpu)) {
+               kick = true;
+               goto unlock;
+       }
 
-need_kick_unlock:
+unlock:
        rcu_read_unlock();
-need_kick:
-       return 1;
+       return kick;
 }
 #else
 static void nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle) { }
@@ -7656,14 +7759,16 @@ static void run_rebalance_domains(struct softirq_action *h)
        enum cpu_idle_type idle = this_rq->idle_balance ?
                                                CPU_IDLE : CPU_NOT_IDLE;
 
-       rebalance_domains(this_rq, idle);
-
        /*
         * If this cpu has a pending nohz_balance_kick, then do the
         * balancing on behalf of the other idle cpus whose ticks are
-        * stopped.
+        * stopped. Do nohz_idle_balance *before* rebalance_domains to
+        * give the idle cpus a chance to load balance. Else we may
+        * load balance only within the local sched_domain hierarchy
+        * and abort nohz_idle_balance altogether if we pull some load.
         */
        nohz_idle_balance(this_rq, idle);
+       rebalance_domains(this_rq, idle);
 }
 
 /*
index 90284d1..91e33cd 100644 (file)
@@ -56,6 +56,19 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
  */
 SCHED_FEAT(TTWU_QUEUE, true)
 
+#ifdef HAVE_RT_PUSH_IPI
+/*
+ * In order to avoid a thundering herd attack of CPUs that are
+ * lowering their priorities at the same time, and there being
+ * a single CPU that has an RT task that can migrate and is waiting
+ * to run, where the other CPUs will try to take that CPUs
+ * rq lock and possibly create a large contention, sending an
+ * IPI to that CPU and let that CPU push the RT task to where
+ * it should go may be a better scenario.
+ */
+SCHED_FEAT(RT_PUSH_IPI, true)
+#endif
+
 SCHED_FEAT(FORCE_SD_OVERLAP, false)
 SCHED_FEAT(RT_RUNTIME_SHARE, true)
 SCHED_FEAT(LB_MIN, false)
index 80014a1..4d207d2 100644 (file)
@@ -158,8 +158,7 @@ static void cpuidle_idle_call(void)
         * is used from another cpu as a broadcast timer, this call may
         * fail if it is not available
         */
-       if (broadcast &&
-           clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu))
+       if (broadcast && tick_broadcast_enter())
                goto use_default;
 
        /* Take note of the planned idle state. */
@@ -176,7 +175,7 @@ static void cpuidle_idle_call(void)
        idle_set_state(this_rq(), NULL);
 
        if (broadcast)
-               clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+               tick_broadcast_exit();
 
        /*
         * Give the governor an opportunity to reflect on the outcome
index f4d4b07..575da76 100644 (file)
@@ -6,6 +6,7 @@
 #include "sched.h"
 
 #include <linux/slab.h>
+#include <linux/irq_work.h>
 
 int sched_rr_timeslice = RR_TIMESLICE;
 
@@ -59,7 +60,11 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
        raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 
-void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
+#ifdef CONFIG_SMP
+static void push_irq_work_func(struct irq_work *work);
+#endif
+
+void init_rt_rq(struct rt_rq *rt_rq)
 {
        struct rt_prio_array *array;
        int i;
@@ -78,7 +83,14 @@ void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
        rt_rq->rt_nr_migratory = 0;
        rt_rq->overloaded = 0;
        plist_head_init(&rt_rq->pushable_tasks);
+
+#ifdef HAVE_RT_PUSH_IPI
+       rt_rq->push_flags = 0;
+       rt_rq->push_cpu = nr_cpu_ids;
+       raw_spin_lock_init(&rt_rq->push_lock);
+       init_irq_work(&rt_rq->push_work, push_irq_work_func);
 #endif
+#endif /* CONFIG_SMP */
        /* We start is dequeued state, because no RT tasks are queued */
        rt_rq->rt_queued = 0;
 
@@ -193,7 +205,7 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
                if (!rt_se)
                        goto err_free_rq;
 
-               init_rt_rq(rt_rq, cpu_rq(i));
+               init_rt_rq(rt_rq);
                rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
                init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]);
        }
@@ -1778,6 +1790,164 @@ static void push_rt_tasks(struct rq *rq)
                ;
 }
 
+#ifdef HAVE_RT_PUSH_IPI
+/*
+ * The search for the next cpu always starts at rq->cpu and ends
+ * when we reach rq->cpu again. It will never return rq->cpu.
+ * This returns the next cpu to check, or nr_cpu_ids if the loop
+ * is complete.
+ *
+ * rq->rt.push_cpu holds the last cpu returned by this function,
+ * or if this is the first instance, it must hold rq->cpu.
+ */
+static int rto_next_cpu(struct rq *rq)
+{
+       int prev_cpu = rq->rt.push_cpu;
+       int cpu;
+
+       cpu = cpumask_next(prev_cpu, rq->rd->rto_mask);
+
+       /*
+        * If the previous cpu is less than the rq's CPU, then it already
+        * passed the end of the mask, and has started from the beginning.
+        * We end if the next CPU is greater or equal to rq's CPU.
+        */
+       if (prev_cpu < rq->cpu) {
+               if (cpu >= rq->cpu)
+                       return nr_cpu_ids;
+
+       } else if (cpu >= nr_cpu_ids) {
+               /*
+                * We passed the end of the mask, start at the beginning.
+                * If the result is greater or equal to the rq's CPU, then
+                * the loop is finished.
+                */
+               cpu = cpumask_first(rq->rd->rto_mask);
+               if (cpu >= rq->cpu)
+                       return nr_cpu_ids;
+       }
+       rq->rt.push_cpu = cpu;
+
+       /* Return cpu to let the caller know if the loop is finished or not */
+       return cpu;
+}
+
+static int find_next_push_cpu(struct rq *rq)
+{
+       struct rq *next_rq;
+       int cpu;
+
+       while (1) {
+               cpu = rto_next_cpu(rq);
+               if (cpu >= nr_cpu_ids)
+                       break;
+               next_rq = cpu_rq(cpu);
+
+               /* Make sure the next rq can push to this rq */
+               if (next_rq->rt.highest_prio.next < rq->rt.highest_prio.curr)
+                       break;
+       }
+
+       return cpu;
+}
+
+#define RT_PUSH_IPI_EXECUTING          1
+#define RT_PUSH_IPI_RESTART            2
+
+static void tell_cpu_to_push(struct rq *rq)
+{
+       int cpu;
+
+       if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) {
+               raw_spin_lock(&rq->rt.push_lock);
+               /* Make sure it's still executing */
+               if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) {
+                       /*
+                        * Tell the IPI to restart the loop as things have
+                        * changed since it started.
+                        */
+                       rq->rt.push_flags |= RT_PUSH_IPI_RESTART;
+                       raw_spin_unlock(&rq->rt.push_lock);
+                       return;
+               }
+               raw_spin_unlock(&rq->rt.push_lock);
+       }
+
+       /* When here, there's no IPI going around */
+
+       rq->rt.push_cpu = rq->cpu;
+       cpu = find_next_push_cpu(rq);
+       if (cpu >= nr_cpu_ids)
+               return;
+
+       rq->rt.push_flags = RT_PUSH_IPI_EXECUTING;
+
+       irq_work_queue_on(&rq->rt.push_work, cpu);
+}
+
+/* Called from hardirq context */
+static void try_to_push_tasks(void *arg)
+{
+       struct rt_rq *rt_rq = arg;
+       struct rq *rq, *src_rq;
+       int this_cpu;
+       int cpu;
+
+       this_cpu = rt_rq->push_cpu;
+
+       /* Paranoid check */
+       BUG_ON(this_cpu != smp_processor_id());
+
+       rq = cpu_rq(this_cpu);
+       src_rq = rq_of_rt_rq(rt_rq);
+
+again:
+       if (has_pushable_tasks(rq)) {
+               raw_spin_lock(&rq->lock);
+               push_rt_task(rq);
+               raw_spin_unlock(&rq->lock);
+       }
+
+       /* Pass the IPI to the next rt overloaded queue */
+       raw_spin_lock(&rt_rq->push_lock);
+       /*
+        * If the source queue changed since the IPI went out,
+        * we need to restart the search from that CPU again.
+        */
+       if (rt_rq->push_flags & RT_PUSH_IPI_RESTART) {
+               rt_rq->push_flags &= ~RT_PUSH_IPI_RESTART;
+               rt_rq->push_cpu = src_rq->cpu;
+       }
+
+       cpu = find_next_push_cpu(src_rq);
+
+       if (cpu >= nr_cpu_ids)
+               rt_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING;
+       raw_spin_unlock(&rt_rq->push_lock);
+
+       if (cpu >= nr_cpu_ids)
+               return;
+
+       /*
+        * It is possible that a restart caused this CPU to be
+        * chosen again. Don't bother with an IPI, just see if we
+        * have more to push.
+        */
+       if (unlikely(cpu == rq->cpu))
+               goto again;
+
+       /* Try the next RT overloaded CPU */
+       irq_work_queue_on(&rt_rq->push_work, cpu);
+}
+
+static void push_irq_work_func(struct irq_work *work)
+{
+       struct rt_rq *rt_rq = container_of(work, struct rt_rq, push_work);
+
+       try_to_push_tasks(rt_rq);
+}
+#endif /* HAVE_RT_PUSH_IPI */
+
 static int pull_rt_task(struct rq *this_rq)
 {
        int this_cpu = this_rq->cpu, ret = 0, cpu;
@@ -1793,6 +1963,13 @@ static int pull_rt_task(struct rq *this_rq)
         */
        smp_rmb();
 
+#ifdef HAVE_RT_PUSH_IPI
+       if (sched_feat(RT_PUSH_IPI)) {
+               tell_cpu_to_push(this_rq);
+               return 0;
+       }
+#endif
+
        for_each_cpu(cpu, this_rq->rd->rto_mask) {
                if (this_cpu == cpu)
                        continue;
index dc0f435..e0e1299 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <linux/spinlock.h>
 #include <linux/stop_machine.h>
+#include <linux/irq_work.h>
 #include <linux/tick.h>
 #include <linux/slab.h>
 
@@ -362,8 +363,14 @@ struct cfs_rq {
         * Under CFS, load is tracked on a per-entity basis and aggregated up.
         * This allows for the description of both thread and group usage (in
         * the FAIR_GROUP_SCHED case).
+        * runnable_load_avg is the sum of the load_avg_contrib of the
+        * sched_entities on the rq.
+        * blocked_load_avg is similar to runnable_load_avg except that its
+        * the blocked sched_entities on the rq.
+        * utilization_load_avg is the sum of the average running time of the
+        * sched_entities on the rq.
         */
-       unsigned long runnable_load_avg, blocked_load_avg;
+       unsigned long runnable_load_avg, blocked_load_avg, utilization_load_avg;
        atomic64_t decay_counter;
        u64 last_decay;
        atomic_long_t removed_load;
@@ -418,6 +425,11 @@ static inline int rt_bandwidth_enabled(void)
        return sysctl_sched_rt_runtime >= 0;
 }
 
+/* RT IPI pull logic requires IRQ_WORK */
+#ifdef CONFIG_IRQ_WORK
+# define HAVE_RT_PUSH_IPI
+#endif
+
 /* Real-Time classes' related field in a runqueue: */
 struct rt_rq {
        struct rt_prio_array active;
@@ -435,7 +447,13 @@ struct rt_rq {
        unsigned long rt_nr_total;
        int overloaded;
        struct plist_head pushable_tasks;
+#ifdef HAVE_RT_PUSH_IPI
+       int push_flags;
+       int push_cpu;
+       struct irq_work push_work;
+       raw_spinlock_t push_lock;
 #endif
+#endif /* CONFIG_SMP */
        int rt_queued;
 
        int rt_throttled;
@@ -597,6 +615,7 @@ struct rq {
        struct sched_domain *sd;
 
        unsigned long cpu_capacity;
+       unsigned long cpu_capacity_orig;
 
        unsigned char idle_balance;
        /* For active balancing */
@@ -807,7 +826,7 @@ struct sched_group_capacity {
         * CPU capacity of this group, SCHED_LOAD_SCALE being max capacity
         * for a single CPU.
         */
-       unsigned int capacity, capacity_orig;
+       unsigned int capacity;
        unsigned long next_update;
        int imbalance; /* XXX unrelated to capacity but shared group state */
        /*
@@ -1368,9 +1387,18 @@ static inline int hrtick_enabled(struct rq *rq)
 
 #ifdef CONFIG_SMP
 extern void sched_avg_update(struct rq *rq);
+
+#ifndef arch_scale_freq_capacity
+static __always_inline
+unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu)
+{
+       return SCHED_CAPACITY_SCALE;
+}
+#endif
+
 static inline void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
 {
-       rq->rt_avg += rt_delta;
+       rq->rt_avg += rt_delta * arch_scale_freq_capacity(NULL, cpu_of(rq));
        sched_avg_update(rq);
 }
 #else
@@ -1643,8 +1671,8 @@ extern void print_rt_stats(struct seq_file *m, int cpu);
 extern void print_dl_stats(struct seq_file *m, int cpu);
 
 extern void init_cfs_rq(struct cfs_rq *cfs_rq);
-extern void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq);
-extern void init_dl_rq(struct dl_rq *dl_rq, struct rq *rq);
+extern void init_rt_rq(struct rt_rq *rt_rq);
+extern void init_dl_rq(struct dl_rq *dl_rq);
 
 extern void cfs_bandwidth_usage_inc(void);
 extern void cfs_bandwidth_usage_dec(void);
index 88ea2d6..ce410bb 100644 (file)
@@ -1227,6 +1227,14 @@ static struct ctl_table vm_table[] = {
                .proc_handler   = proc_dointvec_minmax,
                .extra1         = &zero,
        },
+       {
+               .procname       = "dirtytime_expire_seconds",
+               .data           = &dirtytime_expire_interval,
+               .maxlen         = sizeof(dirty_expire_interval),
+               .mode           = 0644,
+               .proc_handler   = dirtytime_interval_handler,
+               .extra1         = &zero,
+       },
        {
                .procname       = "nr_pdflush_threads",
                .mode           = 0444 /* read-only */,
index d626dc9..579ce1b 100644 (file)
@@ -33,12 +33,6 @@ config ARCH_USES_GETTIMEOFFSET
 config GENERIC_CLOCKEVENTS
        bool
 
-# Migration helper. Builds, but does not invoke
-config GENERIC_CLOCKEVENTS_BUILD
-       bool
-       default y
-       depends on GENERIC_CLOCKEVENTS
-
 # Architecture can handle broadcast in a driver-agnostic way
 config ARCH_HAS_TICK_BROADCAST
        bool
index c09c078..01f0312 100644 (file)
@@ -2,15 +2,13 @@ obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
 obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o
 
-obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)                += clockevents.o
-obj-$(CONFIG_GENERIC_CLOCKEVENTS)              += tick-common.o
+obj-$(CONFIG_GENERIC_CLOCKEVENTS)              += clockevents.o tick-common.o
 ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
  obj-y                                         += tick-broadcast.o
  obj-$(CONFIG_TICK_ONESHOT)                    += tick-broadcast-hrtimer.o
 endif
 obj-$(CONFIG_GENERIC_SCHED_CLOCK)              += sched_clock.o
-obj-$(CONFIG_TICK_ONESHOT)                     += tick-oneshot.o
-obj-$(CONFIG_TICK_ONESHOT)                     += tick-sched.o
+obj-$(CONFIG_TICK_ONESHOT)                     += tick-oneshot.o tick-sched.o
 obj-$(CONFIG_TIMER_STATS)                      += timer_stats.o
 obj-$(CONFIG_DEBUG_FS)                         += timekeeping_debug.o
 obj-$(CONFIG_TEST_UDELAY)                      += test_udelay.o
index 5544990..25d942d 100644 (file)
@@ -94,25 +94,76 @@ u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt)
 }
 EXPORT_SYMBOL_GPL(clockevent_delta2ns);
 
+static int __clockevents_set_state(struct clock_event_device *dev,
+                                  enum clock_event_state state)
+{
+       /* Transition with legacy set_mode() callback */
+       if (dev->set_mode) {
+               /* Legacy callback doesn't support new modes */
+               if (state > CLOCK_EVT_STATE_ONESHOT)
+                       return -ENOSYS;
+               /*
+                * 'clock_event_state' and 'clock_event_mode' have 1-to-1
+                * mapping until *_ONESHOT, and so a simple cast will work.
+                */
+               dev->set_mode((enum clock_event_mode)state, dev);
+               dev->mode = (enum clock_event_mode)state;
+               return 0;
+       }
+
+       if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+               return 0;
+
+       /* Transition with new state-specific callbacks */
+       switch (state) {
+       case CLOCK_EVT_STATE_DETACHED:
+               /*
+                * This is an internal state, which is guaranteed to go from
+                * SHUTDOWN to DETACHED. No driver interaction required.
+                */
+               return 0;
+
+       case CLOCK_EVT_STATE_SHUTDOWN:
+               return dev->set_state_shutdown(dev);
+
+       case CLOCK_EVT_STATE_PERIODIC:
+               /* Core internal bug */
+               if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC))
+                       return -ENOSYS;
+               return dev->set_state_periodic(dev);
+
+       case CLOCK_EVT_STATE_ONESHOT:
+               /* Core internal bug */
+               if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT))
+                       return -ENOSYS;
+               return dev->set_state_oneshot(dev);
+
+       default:
+               return -ENOSYS;
+       }
+}
+
 /**
- * clockevents_set_mode - set the operating mode of a clock event device
+ * clockevents_set_state - set the operating state of a clock event device
  * @dev:       device to modify
- * @mode:      new mode
+ * @state:     new state
  *
  * Must be called with interrupts disabled !
  */
-void clockevents_set_mode(struct clock_event_device *dev,
-                                enum clock_event_mode mode)
+void clockevents_set_state(struct clock_event_device *dev,
+                          enum clock_event_state state)
 {
-       if (dev->mode != mode) {
-               dev->set_mode(mode, dev);
-               dev->mode = mode;
+       if (dev->state != state) {
+               if (__clockevents_set_state(dev, state))
+                       return;
+
+               dev->state = state;
 
                /*
                 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
                 * on it, so fix it up and emit a warning:
                 */
-               if (mode == CLOCK_EVT_MODE_ONESHOT) {
+               if (state == CLOCK_EVT_STATE_ONESHOT) {
                        if (unlikely(!dev->mult)) {
                                dev->mult = 1;
                                WARN_ON(1);
@@ -127,10 +178,28 @@ void clockevents_set_mode(struct clock_event_device *dev,
  */
 void clockevents_shutdown(struct clock_event_device *dev)
 {
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
        dev->next_event.tv64 = KTIME_MAX;
 }
 
+/**
+ * clockevents_tick_resume -   Resume the tick device before using it again
+ * @dev:                       device to resume
+ */
+int clockevents_tick_resume(struct clock_event_device *dev)
+{
+       int ret = 0;
+
+       if (dev->set_mode) {
+               dev->set_mode(CLOCK_EVT_MODE_RESUME, dev);
+               dev->mode = CLOCK_EVT_MODE_RESUME;
+       } else if (dev->tick_resume) {
+               ret = dev->tick_resume(dev);
+       }
+
+       return ret;
+}
+
 #ifdef CONFIG_GENERIC_CLOCKEVENTS_MIN_ADJUST
 
 /* Limit min_delta to a jiffie */
@@ -183,7 +252,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
                delta = dev->min_delta_ns;
                dev->next_event = ktime_add_ns(ktime_get(), delta);
 
-               if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+               if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
                        return 0;
 
                dev->retries++;
@@ -220,7 +289,7 @@ static int clockevents_program_min_delta(struct clock_event_device *dev)
        delta = dev->min_delta_ns;
        dev->next_event = ktime_add_ns(ktime_get(), delta);
 
-       if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+       if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
                return 0;
 
        dev->retries++;
@@ -252,7 +321,7 @@ int clockevents_program_event(struct clock_event_device *dev, ktime_t expires,
 
        dev->next_event = expires;
 
-       if (dev->mode == CLOCK_EVT_MODE_SHUTDOWN)
+       if (dev->state == CLOCK_EVT_STATE_SHUTDOWN)
                return 0;
 
        /* Shortcut for clockevent devices that can deal with ktime. */
@@ -297,7 +366,7 @@ static int clockevents_replace(struct clock_event_device *ced)
        struct clock_event_device *dev, *newdev = NULL;
 
        list_for_each_entry(dev, &clockevent_devices, list) {
-               if (dev == ced || dev->mode != CLOCK_EVT_MODE_UNUSED)
+               if (dev == ced || dev->state != CLOCK_EVT_STATE_DETACHED)
                        continue;
 
                if (!tick_check_replacement(newdev, dev))
@@ -323,7 +392,7 @@ static int clockevents_replace(struct clock_event_device *ced)
 static int __clockevents_try_unbind(struct clock_event_device *ced, int cpu)
 {
        /* Fast track. Device is unused */
-       if (ced->mode == CLOCK_EVT_MODE_UNUSED) {
+       if (ced->state == CLOCK_EVT_STATE_DETACHED) {
                list_del_init(&ced->list);
                return 0;
        }
@@ -373,6 +442,37 @@ int clockevents_unbind_device(struct clock_event_device *ced, int cpu)
 }
 EXPORT_SYMBOL_GPL(clockevents_unbind);
 
+/* Sanity check of state transition callbacks */
+static int clockevents_sanity_check(struct clock_event_device *dev)
+{
+       /* Legacy set_mode() callback */
+       if (dev->set_mode) {
+               /* We shouldn't be supporting new modes now */
+               WARN_ON(dev->set_state_periodic || dev->set_state_oneshot ||
+                       dev->set_state_shutdown || dev->tick_resume);
+
+               BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+               return 0;
+       }
+
+       if (dev->features & CLOCK_EVT_FEAT_DUMMY)
+               return 0;
+
+       /* New state-specific callbacks */
+       if (!dev->set_state_shutdown)
+               return -EINVAL;
+
+       if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
+           !dev->set_state_periodic)
+               return -EINVAL;
+
+       if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) &&
+           !dev->set_state_oneshot)
+               return -EINVAL;
+
+       return 0;
+}
+
 /**
  * clockevents_register_device - register a clock event device
  * @dev:       device to register
@@ -381,7 +481,11 @@ void clockevents_register_device(struct clock_event_device *dev)
 {
        unsigned long flags;
 
-       BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+       BUG_ON(clockevents_sanity_check(dev));
+
+       /* Initialize state to DETACHED */
+       dev->state = CLOCK_EVT_STATE_DETACHED;
+
        if (!dev->cpumask) {
                WARN_ON(num_possible_cpus() > 1);
                dev->cpumask = cpumask_of(smp_processor_id());
@@ -445,11 +549,11 @@ int __clockevents_update_freq(struct clock_event_device *dev, u32 freq)
 {
        clockevents_config(dev, freq);
 
-       if (dev->mode == CLOCK_EVT_MODE_ONESHOT)
+       if (dev->state == CLOCK_EVT_STATE_ONESHOT)
                return clockevents_program_event(dev, dev->next_event, false);
 
-       if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
-               dev->set_mode(CLOCK_EVT_MODE_PERIODIC, dev);
+       if (dev->state == CLOCK_EVT_STATE_PERIODIC)
+               return __clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
 
        return 0;
 }
@@ -491,30 +595,27 @@ void clockevents_handle_noop(struct clock_event_device *dev)
  * @old:       device to release (can be NULL)
  * @new:       device to request (can be NULL)
  *
- * Called from the notifier chain. clockevents_lock is held already
+ * Called from various tick functions with clockevents_lock held and
+ * interrupts disabled.
  */
 void clockevents_exchange_device(struct clock_event_device *old,
                                 struct clock_event_device *new)
 {
-       unsigned long flags;
-
-       local_irq_save(flags);
        /*
         * Caller releases a clock event device. We queue it into the
         * released list and do a notify add later.
         */
        if (old) {
                module_put(old->owner);
-               clockevents_set_mode(old, CLOCK_EVT_MODE_UNUSED);
+               clockevents_set_state(old, CLOCK_EVT_STATE_DETACHED);
                list_del(&old->list);
                list_add(&old->list, &clockevents_released);
        }
 
        if (new) {
-               BUG_ON(new->mode != CLOCK_EVT_MODE_UNUSED);
+               BUG_ON(new->state != CLOCK_EVT_STATE_DETACHED);
                clockevents_shutdown(new);
        }
-       local_irq_restore(flags);
 }
 
 /**
@@ -541,74 +642,40 @@ void clockevents_resume(void)
                        dev->resume(dev);
 }
 
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
+#ifdef CONFIG_HOTPLUG_CPU
 /**
- * clockevents_notify - notification about relevant events
- * Returns 0 on success, any other value on error
+ * tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
  */
-int clockevents_notify(unsigned long reason, void *arg)
+void tick_cleanup_dead_cpu(int cpu)
 {
        struct clock_event_device *dev, *tmp;
        unsigned long flags;
-       int cpu, ret = 0;
 
        raw_spin_lock_irqsave(&clockevents_lock, flags);
 
-       switch (reason) {
-       case CLOCK_EVT_NOTIFY_BROADCAST_ON:
-       case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
-       case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
-               tick_broadcast_on_off(reason, arg);
-               break;
-
-       case CLOCK_EVT_NOTIFY_BROADCAST_ENTER:
-       case CLOCK_EVT_NOTIFY_BROADCAST_EXIT:
-               ret = tick_broadcast_oneshot_control(reason);
-               break;
-
-       case CLOCK_EVT_NOTIFY_CPU_DYING:
-               tick_handover_do_timer(arg);
-               break;
-
-       case CLOCK_EVT_NOTIFY_SUSPEND:
-               tick_suspend();
-               tick_suspend_broadcast();
-               break;
-
-       case CLOCK_EVT_NOTIFY_RESUME:
-               tick_resume();
-               break;
-
-       case CLOCK_EVT_NOTIFY_CPU_DEAD:
-               tick_shutdown_broadcast_oneshot(arg);
-               tick_shutdown_broadcast(arg);
-               tick_shutdown(arg);
-               /*
-                * Unregister the clock event devices which were
-                * released from the users in the notify chain.
-                */
-               list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+       tick_shutdown_broadcast_oneshot(cpu);
+       tick_shutdown_broadcast(cpu);
+       tick_shutdown(cpu);
+       /*
+        * Unregister the clock event devices which were
+        * released from the users in the notify chain.
+        */
+       list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+               list_del(&dev->list);
+       /*
+        * Now check whether the CPU has left unused per cpu devices
+        */
+       list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+               if (cpumask_test_cpu(cpu, dev->cpumask) &&
+                   cpumask_weight(dev->cpumask) == 1 &&
+                   !tick_is_broadcast_device(dev)) {
+                       BUG_ON(dev->state != CLOCK_EVT_STATE_DETACHED);
                        list_del(&dev->list);
-               /*
-                * Now check whether the CPU has left unused per cpu devices
-                */
-               cpu = *((int *)arg);
-               list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
-                       if (cpumask_test_cpu(cpu, dev->cpumask) &&
-                           cpumask_weight(dev->cpumask) == 1 &&
-                           !tick_is_broadcast_device(dev)) {
-                               BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
-                               list_del(&dev->list);
-                       }
                }
-               break;
-       default:
-               break;
        }
        raw_spin_unlock_irqrestore(&clockevents_lock, flags);
-       return ret;
 }
-EXPORT_SYMBOL_GPL(clockevents_notify);
+#endif
 
 #ifdef CONFIG_SYSFS
 struct bus_type clockevents_subsys = {
@@ -727,5 +794,3 @@ static int __init clockevents_init_sysfs(void)
 }
 device_initcall(clockevents_init_sysfs);
 #endif /* SYSFS */
-
-#endif /* GENERIC_CLOCK_EVENTS */
index 4892352..15facb1 100644 (file)
@@ -142,13 +142,6 @@ static void __clocksource_unstable(struct clocksource *cs)
                schedule_work(&watchdog_work);
 }
 
-static void clocksource_unstable(struct clocksource *cs, int64_t delta)
-{
-       printk(KERN_WARNING "Clocksource %s unstable (delta = %Ld ns)\n",
-              cs->name, delta);
-       __clocksource_unstable(cs);
-}
-
 /**
  * clocksource_mark_unstable - mark clocksource unstable via watchdog
  * @cs:                clocksource to be marked unstable
@@ -174,7 +167,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
 static void clocksource_watchdog(unsigned long data)
 {
        struct clocksource *cs;
-       cycle_t csnow, wdnow, delta;
+       cycle_t csnow, wdnow, cslast, wdlast, delta;
        int64_t wd_nsec, cs_nsec;
        int next_cpu, reset_pending;
 
@@ -213,6 +206,8 @@ static void clocksource_watchdog(unsigned long data)
 
                delta = clocksource_delta(csnow, cs->cs_last, cs->mask);
                cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+               wdlast = cs->wd_last; /* save these in case we print them */
+               cslast = cs->cs_last;
                cs->cs_last = csnow;
                cs->wd_last = wdnow;
 
@@ -221,7 +216,12 @@ static void clocksource_watchdog(unsigned long data)
 
                /* Check the deviation from the watchdog clocksource. */
                if ((abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD)) {
-                       clocksource_unstable(cs, cs_nsec - wd_nsec);
+                       pr_warn("timekeeping watchdog: Marking clocksource '%s' as unstable, because the skew is too large:\n", cs->name);
+                       pr_warn("       '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
+                               watchdog->name, wdnow, wdlast, watchdog->mask);
+                       pr_warn("       '%s' cs_now: %llx cs_last: %llx mask: %llx\n",
+                               cs->name, csnow, cslast, cs->mask);
+                       __clocksource_unstable(cs);
                        continue;
                }
 
@@ -469,26 +469,25 @@ static u32 clocksource_max_adjustment(struct clocksource *cs)
  * @shift:     cycle to nanosecond divisor (power of two)
  * @maxadj:    maximum adjustment value to mult (~11%)
  * @mask:      bitmask for two's complement subtraction of non 64 bit counters
+ * @max_cyc:   maximum cycle value before potential overflow (does not include
+ *             any safety margin)
+ *
+ * NOTE: This function includes a safety margin of 50%, in other words, we
+ * return half the number of nanoseconds the hardware counter can technically
+ * cover. This is done so that we can potentially detect problems caused by
+ * delayed timers or bad hardware, which might result in time intervals that
+ * are larger then what the math used can handle without overflows.
  */
-u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
+u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
 {
        u64 max_nsecs, max_cycles;
 
        /*
         * Calculate the maximum number of cycles that we can pass to the
-        * cyc2ns function without overflowing a 64-bit signed result. The
-        * maximum number of cycles is equal to ULLONG_MAX/(mult+maxadj)
-        * which is equivalent to the below.
-        * max_cycles < (2^63)/(mult + maxadj)
-        * max_cycles < 2^(log2((2^63)/(mult + maxadj)))
-        * max_cycles < 2^(log2(2^63) - log2(mult + maxadj))
-        * max_cycles < 2^(63 - log2(mult + maxadj))
-        * max_cycles < 1 << (63 - log2(mult + maxadj))
-        * Please note that we add 1 to the result of the log2 to account for
-        * any rounding errors, ensure the above inequality is satisfied and
-        * no overflow will occur.
+        * cyc2ns() function without overflowing a 64-bit result.
         */
-       max_cycles = 1ULL << (63 - (ilog2(mult + maxadj) + 1));
+       max_cycles = ULLONG_MAX;
+       do_div(max_cycles, mult+maxadj);
 
        /*
         * The actual maximum number of cycles we can defer the clocksource is
@@ -499,27 +498,26 @@ u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask)
        max_cycles = min(max_cycles, mask);
        max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
 
+       /* return the max_cycles value as well if requested */
+       if (max_cyc)
+               *max_cyc = max_cycles;
+
+       /* Return 50% of the actual maximum, so we can detect bad values */
+       max_nsecs >>= 1;
+
        return max_nsecs;
 }
 
 /**
- * clocksource_max_deferment - Returns max time the clocksource can be deferred
- * @cs:         Pointer to clocksource
+ * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
+ * @cs:         Pointer to clocksource to be updated
  *
  */
-static u64 clocksource_max_deferment(struct clocksource *cs)
+static inline void clocksource_update_max_deferment(struct clocksource *cs)
 {
-       u64 max_nsecs;
-
-       max_nsecs = clocks_calc_max_nsecs(cs->mult, cs->shift, cs->maxadj,
-                                         cs->mask);
-       /*
-        * To ensure that the clocksource does not wrap whilst we are idle,
-        * limit the time the clocksource can be deferred by 12.5%. Please
-        * note a margin of 12.5% is used because this can be computed with
-        * a shift, versus say 10% which would require division.
-        */
-       return max_nsecs - (max_nsecs >> 3);
+       cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
+                                               cs->maxadj, cs->mask,
+                                               &cs->max_cycles);
 }
 
 #ifndef CONFIG_ARCH_USES_GETTIMEOFFSET
@@ -648,7 +646,7 @@ static void clocksource_enqueue(struct clocksource *cs)
 }
 
 /**
- * __clocksource_updatefreq_scale - Used update clocksource with new freq
+ * __clocksource_update_freq_scale - Used update clocksource with new freq
  * @cs:                clocksource to be registered
  * @scale:     Scale factor multiplied against freq to get clocksource hz
  * @freq:      clocksource frequency (cycles per second) divided by scale
@@ -656,48 +654,64 @@ static void clocksource_enqueue(struct clocksource *cs)
  * This should only be called from the clocksource->enable() method.
  *
  * This *SHOULD NOT* be called directly! Please use the
- * clocksource_updatefreq_hz() or clocksource_updatefreq_khz helper functions.
+ * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
+ * functions.
  */
-void __clocksource_updatefreq_scale(struct clocksource *cs, u32 scale, u32 freq)
+void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
        u64 sec;
+
        /*
-        * Calc the maximum number of seconds which we can run before
-        * wrapping around. For clocksources which have a mask > 32bit
-        * we need to limit the max sleep time to have a good
-        * conversion precision. 10 minutes is still a reasonable
-        * amount. That results in a shift value of 24 for a
-        * clocksource with mask >= 40bit and f >= 4GHz. That maps to
-        * ~ 0.06ppm granularity for NTP. We apply the same 12.5%
-        * margin as we do in clocksource_max_deferment()
+        * Default clocksources are *special* and self-define their mult/shift.
+        * But, you're not special, so you should specify a freq value.
         */
-       sec = (cs->mask - (cs->mask >> 3));
-       do_div(sec, freq);
-       do_div(sec, scale);
-       if (!sec)
-               sec = 1;
-       else if (sec > 600 && cs->mask > UINT_MAX)
-               sec = 600;
-
-       clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
-                              NSEC_PER_SEC / scale, sec * scale);
-
+       if (freq) {
+               /*
+                * Calc the maximum number of seconds which we can run before
+                * wrapping around. For clocksources which have a mask > 32-bit
+                * we need to limit the max sleep time to have a good
+                * conversion precision. 10 minutes is still a reasonable
+                * amount. That results in a shift value of 24 for a
+                * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
+                * ~ 0.06ppm granularity for NTP.
+                */
+               sec = cs->mask;
+               do_div(sec, freq);
+               do_div(sec, scale);
+               if (!sec)
+                       sec = 1;
+               else if (sec > 600 && cs->mask > UINT_MAX)
+                       sec = 600;
+
+               clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+                                      NSEC_PER_SEC / scale, sec * scale);
+       }
        /*
-        * for clocksources that have large mults, to avoid overflow.
-        * Since mult may be adjusted by ntp, add an safety extra margin
-        *
+        * Ensure clocksources that have large 'mult' values don't overflow
+        * when adjusted.
         */
        cs->maxadj = clocksource_max_adjustment(cs);
-       while ((cs->mult + cs->maxadj < cs->mult)
-               || (cs->mult - cs->maxadj > cs->mult)) {
+       while (freq && ((cs->mult + cs->maxadj < cs->mult)
+               || (cs->mult - cs->maxadj > cs->mult))) {
                cs->mult >>= 1;
                cs->shift--;
                cs->maxadj = clocksource_max_adjustment(cs);
        }
 
-       cs->max_idle_ns = clocksource_max_deferment(cs);
+       /*
+        * Only warn for *special* clocksources that self-define
+        * their mult/shift values and don't specify a freq.
+        */
+       WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
+               "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
+               cs->name);
+
+       clocksource_update_max_deferment(cs);
+
+       pr_info("clocksource %s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
+                       cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
 }
-EXPORT_SYMBOL_GPL(__clocksource_updatefreq_scale);
+EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
 
 /**
  * __clocksource_register_scale - Used to install new clocksources
@@ -714,7 +728,7 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 {
 
        /* Initialize mult/shift and max_idle_ns */
-       __clocksource_updatefreq_scale(cs, scale, freq);
+       __clocksource_update_freq_scale(cs, scale, freq);
 
        /* Add clocksource to the clocksource list */
        mutex_lock(&clocksource_mutex);
@@ -726,33 +740,6 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 }
 EXPORT_SYMBOL_GPL(__clocksource_register_scale);
 
-
-/**
- * clocksource_register - Used to install new clocksources
- * @cs:                clocksource to be registered
- *
- * Returns -EBUSY if registration fails, zero otherwise.
- */
-int clocksource_register(struct clocksource *cs)
-{
-       /* calculate max adjustment for given mult/shift */
-       cs->maxadj = clocksource_max_adjustment(cs);
-       WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
-               "Clocksource %s might overflow on 11%% adjustment\n",
-               cs->name);
-
-       /* calculate max idle time permitted for this clocksource */
-       cs->max_idle_ns = clocksource_max_deferment(cs);
-
-       mutex_lock(&clocksource_mutex);
-       clocksource_enqueue(cs);
-       clocksource_enqueue_watchdog(cs);
-       clocksource_select();
-       mutex_unlock(&clocksource_mutex);
-       return 0;
-}
-EXPORT_SYMBOL(clocksource_register);
-
 static void __clocksource_change_rating(struct clocksource *cs, int rating)
 {
        list_del(&cs->list);
index bee0c1f..76d4bd9 100644 (file)
@@ -54,7 +54,7 @@
 
 #include <trace/events/timer.h>
 
-#include "timekeeping.h"
+#include "tick-internal.h"
 
 /*
  * The timer bases:
@@ -1707,17 +1707,10 @@ static int hrtimer_cpu_notify(struct notifier_block *self,
                break;
 
 #ifdef CONFIG_HOTPLUG_CPU
-       case CPU_DYING:
-       case CPU_DYING_FROZEN:
-               clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DYING, &scpu);
-               break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-       {
-               clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &scpu);
                migrate_hrtimers(scpu);
                break;
-       }
 #endif
 
        default:
index a6a5bf5..347fecf 100644 (file)
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
-#include "tick-internal.h"
+#include "timekeeping.h"
 
 /* The Jiffies based clocksource is the lowest common
  * denominator clock source which should function on
@@ -71,6 +71,7 @@ static struct clocksource clocksource_jiffies = {
        .mask           = 0xffffffff, /*32bits*/
        .mult           = NSEC_PER_JIFFY << JIFFIES_SHIFT, /* details above */
        .shift          = JIFFIES_SHIFT,
+       .max_cycles     = 10,
 };
 
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(jiffies_lock);
@@ -94,7 +95,7 @@ EXPORT_SYMBOL(jiffies);
 
 static int __init init_jiffies_clocksource(void)
 {
-       return clocksource_register(&clocksource_jiffies);
+       return __clocksource_register(&clocksource_jiffies);
 }
 
 core_initcall(init_jiffies_clocksource);
@@ -130,6 +131,6 @@ int register_refined_jiffies(long cycles_per_second)
 
        refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
 
-       clocksource_register(&refined_jiffies);
+       __clocksource_register(&refined_jiffies);
        return 0;
 }
index 0f60b08..7a68100 100644 (file)
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/rtc.h>
 
-#include "tick-internal.h"
 #include "ntp_internal.h"
 
 /*
@@ -459,6 +458,16 @@ out:
        return leap;
 }
 
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
+int __weak update_persistent_clock64(struct timespec64 now64)
+{
+       struct timespec now;
+
+       now = timespec64_to_timespec(now64);
+       return update_persistent_clock(now);
+}
+#endif
+
 #if defined(CONFIG_GENERIC_CMOS_UPDATE) || defined(CONFIG_RTC_SYSTOHC)
 static void sync_cmos_clock(struct work_struct *work);
 
@@ -494,8 +503,9 @@ static void sync_cmos_clock(struct work_struct *work)
                if (persistent_clock_is_local)
                        adjust.tv_sec -= (sys_tz.tz_minuteswest * 60);
 #ifdef CONFIG_GENERIC_CMOS_UPDATE
-               fail = update_persistent_clock(timespec64_to_timespec(adjust));
+               fail = update_persistent_clock64(adjust);
 #endif
+
 #ifdef CONFIG_RTC_SYSTOHC
                if (fail == -ENODEV)
                        fail = rtc_set_ntp_time(adjust);
index 01d2d15..a26036d 100644 (file)
@@ -1,5 +1,6 @@
 /*
- * sched_clock.c: support for extending counters to full 64-bit ns counter
+ * sched_clock.c: Generic sched_clock() support, to extend low level
+ *                hardware time counters to full 64-bit ns values.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
 #include <linux/seqlock.h>
 #include <linux/bitops.h>
 
-struct clock_data {
-       ktime_t wrap_kt;
+/**
+ * struct clock_read_data - data required to read from sched_clock()
+ *
+ * @epoch_ns:          sched_clock() value at last update
+ * @epoch_cyc:         Clock cycle value at last update.
+ * @sched_clock_mask:   Bitmask for two's complement subtraction of non 64bit
+ *                     clocks.
+ * @read_sched_clock:  Current clock source (or dummy source when suspended).
+ * @mult:              Multipler for scaled math conversion.
+ * @shift:             Shift value for scaled math conversion.
+ *
+ * Care must be taken when updating this structure; it is read by
+ * some very hot code paths. It occupies <=40 bytes and, when combined
+ * with the seqcount used to synchronize access, comfortably fits into
+ * a 64 byte cache line.
+ */
+struct clock_read_data {
        u64 epoch_ns;
        u64 epoch_cyc;
-       seqcount_t seq;
-       unsigned long rate;
+       u64 sched_clock_mask;
+       u64 (*read_sched_clock)(void);
        u32 mult;
        u32 shift;
-       bool suspended;
+};
+
+/**
+ * struct clock_data - all data needed for sched_clock() (including
+ *                     registration of a new clock source)
+ *
+ * @seq:               Sequence counter for protecting updates. The lowest
+ *                     bit is the index for @read_data.
+ * @read_data:         Data required to read from sched_clock.
+ * @wrap_kt:           Duration for which clock can run before wrapping.
+ * @rate:              Tick rate of the registered clock.
+ * @actual_read_sched_clock: Registered hardware level clock read function.
+ *
+ * The ordering of this structure has been chosen to optimize cache
+ * performance. In particular 'seq' and 'read_data[0]' (combined) should fit
+ * into a single 64-byte cache line.
+ */
+struct clock_data {
+       seqcount_t              seq;
+       struct clock_read_data  read_data[2];
+       ktime_t                 wrap_kt;
+       unsigned long           rate;
+
+       u64 (*actual_read_sched_clock)(void);
 };
 
 static struct hrtimer sched_clock_timer;
@@ -34,12 +73,6 @@ static int irqtime = -1;
 
 core_param(irqtime, irqtime, int, 0400);
 
-static struct clock_data cd = {
-       .mult   = NSEC_PER_SEC / HZ,
-};
-
-static u64 __read_mostly sched_clock_mask;
-
 static u64 notrace jiffy_sched_clock_read(void)
 {
        /*
@@ -49,7 +82,11 @@ static u64 notrace jiffy_sched_clock_read(void)
        return (u64)(jiffies - INITIAL_JIFFIES);
 }
 
-static u64 __read_mostly (*read_sched_clock)(void) = jiffy_sched_clock_read;
+static struct clock_data cd ____cacheline_aligned = {
+       .read_data[0] = { .mult = NSEC_PER_SEC / HZ,
+                         .read_sched_clock = jiffy_sched_clock_read, },
+       .actual_read_sched_clock = jiffy_sched_clock_read,
+};
 
 static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 {
@@ -58,111 +95,136 @@ static inline u64 notrace cyc_to_ns(u64 cyc, u32 mult, u32 shift)
 
 unsigned long long notrace sched_clock(void)
 {
-       u64 epoch_ns;
-       u64 epoch_cyc;
-       u64 cyc;
+       u64 cyc, res;
        unsigned long seq;
-
-       if (cd.suspended)
-               return cd.epoch_ns;
+       struct clock_read_data *rd;
 
        do {
-               seq = raw_read_seqcount_begin(&cd.seq);
-               epoch_cyc = cd.epoch_cyc;
-               epoch_ns = cd.epoch_ns;
+               seq = raw_read_seqcount(&cd.seq);
+               rd = cd.read_data + (seq & 1);
+
+               cyc = (rd->read_sched_clock() - rd->epoch_cyc) &
+                     rd->sched_clock_mask;
+               res = rd->epoch_ns + cyc_to_ns(cyc, rd->mult, rd->shift);
        } while (read_seqcount_retry(&cd.seq, seq));
 
-       cyc = read_sched_clock();
-       cyc = (cyc - epoch_cyc) & sched_clock_mask;
-       return epoch_ns + cyc_to_ns(cyc, cd.mult, cd.shift);
+       return res;
+}
+
+/*
+ * Updating the data required to read the clock.
+ *
+ * sched_clock() will never observe mis-matched data even if called from
+ * an NMI. We do this by maintaining an odd/even copy of the data and
+ * steering sched_clock() to one or the other using a sequence counter.
+ * In order to preserve the data cache profile of sched_clock() as much
+ * as possible the system reverts back to the even copy when the update
+ * completes; the odd copy is used *only* during an update.
+ */
+static void update_clock_read_data(struct clock_read_data *rd)
+{
+       /* update the backup (odd) copy with the new data */
+       cd.read_data[1] = *rd;
+
+       /* steer readers towards the odd copy */
+       raw_write_seqcount_latch(&cd.seq);
+
+       /* now its safe for us to update the normal (even) copy */
+       cd.read_data[0] = *rd;
+
+       /* switch readers back to the even copy */
+       raw_write_seqcount_latch(&cd.seq);
 }
 
 /*
- * Atomically update the sched_clock epoch.
+ * Atomically update the sched_clock() epoch.
  */
-static void notrace update_sched_clock(void)
+static void update_sched_clock(void)
 {
-       unsigned long flags;
        u64 cyc;
        u64 ns;
+       struct clock_read_data rd;
+
+       rd = cd.read_data[0];
+
+       cyc = cd.actual_read_sched_clock();
+       ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+
+       rd.epoch_ns = ns;
+       rd.epoch_cyc = cyc;
 
-       cyc = read_sched_clock();
-       ns = cd.epoch_ns +
-               cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-                         cd.mult, cd.shift);
-
-       raw_local_irq_save(flags);
-       raw_write_seqcount_begin(&cd.seq);
-       cd.epoch_ns = ns;
-       cd.epoch_cyc = cyc;
-       raw_write_seqcount_end(&cd.seq);
-       raw_local_irq_restore(flags);
+       update_clock_read_data(&rd);
 }
 
 static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)
 {
        update_sched_clock();
        hrtimer_forward_now(hrt, cd.wrap_kt);
+
        return HRTIMER_RESTART;
 }
 
-void __init sched_clock_register(u64 (*read)(void), int bits,
-                                unsigned long rate)
+void __init
+sched_clock_register(u64 (*read)(void), int bits, unsigned long rate)
 {
        u64 res, wrap, new_mask, new_epoch, cyc, ns;
        u32 new_mult, new_shift;
-       ktime_t new_wrap_kt;
        unsigned long r;
        char r_unit;
+       struct clock_read_data rd;
 
        if (cd.rate > rate)
                return;
 
        WARN_ON(!irqs_disabled());
 
-       /* calculate the mult/shift to convert counter ticks to ns. */
+       /* Calculate the mult/shift to convert counter ticks to ns. */
        clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600);
 
        new_mask = CLOCKSOURCE_MASK(bits);
+       cd.rate = rate;
+
+       /* Calculate how many nanosecs until we risk wrapping */
+       wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask, NULL);
+       cd.wrap_kt = ns_to_ktime(wrap);
 
-       /* calculate how many ns until we wrap */
-       wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask);
-       new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3));
+       rd = cd.read_data[0];
 
-       /* update epoch for new counter and update epoch_ns from old counter*/
+       /* Update epoch for new counter and update 'epoch_ns' from old counter*/
        new_epoch = read();
-       cyc = read_sched_clock();
-       ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask,
-                         cd.mult, cd.shift);
+       cyc = cd.actual_read_sched_clock();
+       ns = rd.epoch_ns + cyc_to_ns((cyc - rd.epoch_cyc) & rd.sched_clock_mask, rd.mult, rd.shift);
+       cd.actual_read_sched_clock = read;
 
-       raw_write_seqcount_begin(&cd.seq);
-       read_sched_clock = read;
-       sched_clock_mask = new_mask;
-       cd.rate = rate;
-       cd.wrap_kt = new_wrap_kt;
-       cd.mult = new_mult;
-       cd.shift = new_shift;
-       cd.epoch_cyc = new_epoch;
-       cd.epoch_ns = ns;
-       raw_write_seqcount_end(&cd.seq);
+       rd.read_sched_clock     = read;
+       rd.sched_clock_mask     = new_mask;
+       rd.mult                 = new_mult;
+       rd.shift                = new_shift;
+       rd.epoch_cyc            = new_epoch;
+       rd.epoch_ns             = ns;
+
+       update_clock_read_data(&rd);
 
        r = rate;
        if (r >= 4000000) {
                r /= 1000000;
                r_unit = 'M';
-       } else if (r >= 1000) {
-               r /= 1000;
-               r_unit = 'k';
-       } else
-               r_unit = ' ';
-
-       /* calculate the ns resolution of this counter */
+       } else {
+               if (r >= 1000) {
+                       r /= 1000;
+                       r_unit = 'k';
+               } else {
+                       r_unit = ' ';
+               }
+       }
+
+       /* Calculate the ns resolution of this counter */
        res = cyc_to_ns(1ULL, new_mult, new_shift);
 
        pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",
                bits, r, r_unit, res, wrap);
 
-       /* Enable IRQ time accounting if we have a fast enough sched_clock */
+       /* Enable IRQ time accounting if we have a fast enough sched_clock() */
        if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
                enable_sched_clock_irqtime();
 
@@ -172,10 +234,10 @@ void __init sched_clock_register(u64 (*read)(void), int bits,
 void __init sched_clock_postinit(void)
 {
        /*
-        * If no sched_clock function has been provided at that point,
+        * If no sched_clock() function has been provided at that point,
         * make it the final one one.
         */
-       if (read_sched_clock == jiffy_sched_clock_read)
+       if (cd.actual_read_sched_clock == jiffy_sched_clock_read)
                sched_clock_register(jiffy_sched_clock_read, BITS_PER_LONG, HZ);
 
        update_sched_clock();
@@ -189,29 +251,53 @@ void __init sched_clock_postinit(void)
        hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
 }
 
+/*
+ * Clock read function for use when the clock is suspended.
+ *
+ * This function makes it appear to sched_clock() as if the clock
+ * stopped counting at its last update.
+ *
+ * This function must only be called from the critical
+ * section in sched_clock(). It relies on the read_seqcount_retry()
+ * at the end of the critical section to be sure we observe the
+ * correct copy of 'epoch_cyc'.
+ */
+static u64 notrace suspended_sched_clock_read(void)
+{
+       unsigned long seq = raw_read_seqcount(&cd.seq);
+
+       return cd.read_data[seq & 1].epoch_cyc;
+}
+
 static int sched_clock_suspend(void)
 {
+       struct clock_read_data *rd = &cd.read_data[0];
+
        update_sched_clock();
        hrtimer_cancel(&sched_clock_timer);
-       cd.suspended = true;
+       rd->read_sched_clock = suspended_sched_clock_read;
+
        return 0;
 }
 
 static void sched_clock_resume(void)
 {
-       cd.epoch_cyc = read_sched_clock();
+       struct clock_read_data *rd = &cd.read_data[0];
+
+       rd->epoch_cyc = cd.actual_read_sched_clock();
        hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
-       cd.suspended = false;
+       rd->read_sched_clock = cd.actual_read_sched_clock;
 }
 
 static struct syscore_ops sched_clock_ops = {
-       .suspend = sched_clock_suspend,
-       .resume = sched_clock_resume,
+       .suspend        = sched_clock_suspend,
+       .resume         = sched_clock_resume,
 };
 
 static int __init sched_clock_syscore_init(void)
 {
        register_syscore_ops(&sched_clock_ops);
+
        return 0;
 }
 device_initcall(sched_clock_syscore_init);
index eb682d5..6aac4be 100644 (file)
@@ -49,6 +49,7 @@ static void bc_set_mode(enum clock_event_mode mode,
  */
 static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
 {
+       int bc_moved;
        /*
         * We try to cancel the timer first. If the callback is on
         * flight on some other cpu then we let it handle it. If we
@@ -60,9 +61,15 @@ static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
         * restart the timer because we are in the callback, but we
         * can set the expiry time and let the callback return
         * HRTIMER_RESTART.
+        *
+        * Since we are in the idle loop at this point and because
+        * hrtimer_{start/cancel} functions call into tracing,
+        * calls to these functions must be bound within RCU_NONIDLE.
         */
-       if (hrtimer_try_to_cancel(&bctimer) >= 0) {
-               hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED);
+       RCU_NONIDLE(bc_moved = (hrtimer_try_to_cancel(&bctimer) >= 0) ?
+               !hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED) :
+                       0);
+       if (bc_moved) {
                /* Bind the "device" to the cpu */
                bc->bound_on = smp_processor_id();
        } else if (bc->bound_on == smp_processor_id()) {
index 066f0ec..7e8ca4f 100644 (file)
@@ -33,12 +33,14 @@ static cpumask_var_t tick_broadcast_mask;
 static cpumask_var_t tick_broadcast_on;
 static cpumask_var_t tmpmask;
 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
-static int tick_broadcast_force;
+static int tick_broadcast_forced;
 
 #ifdef CONFIG_TICK_ONESHOT
 static void tick_broadcast_clear_oneshot(int cpu);
+static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 #else
 static inline void tick_broadcast_clear_oneshot(int cpu) { }
+static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
 #endif
 
 /*
@@ -303,7 +305,7 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
        /*
         * The device is in periodic mode. No reprogramming necessary:
         */
-       if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
+       if (dev->state == CLOCK_EVT_STATE_PERIODIC)
                goto unlock;
 
        /*
@@ -324,49 +326,54 @@ unlock:
        raw_spin_unlock(&tick_broadcast_lock);
 }
 
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop
+/**
+ * tick_broadcast_control - Enable/disable or force broadcast mode
+ * @mode:      The selected broadcast mode
+ *
+ * Called when the system enters a state where affected tick devices
+ * might stop. Note: TICK_BROADCAST_FORCE cannot be undone.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
  */
-static void tick_do_broadcast_on_off(unsigned long *reason)
+void tick_broadcast_control(enum tick_broadcast_mode mode)
 {
        struct clock_event_device *bc, *dev;
        struct tick_device *td;
-       unsigned long flags;
        int cpu, bc_stopped;
 
-       raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
-       cpu = smp_processor_id();
-       td = &per_cpu(tick_cpu_device, cpu);
+       td = this_cpu_ptr(&tick_cpu_device);
        dev = td->evtdev;
-       bc = tick_broadcast_device.evtdev;
 
        /*
         * Is the device not affected by the powerstate ?
         */
        if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
-               goto out;
+               return;
 
        if (!tick_device_is_functional(dev))
-               goto out;
+               return;
 
+       raw_spin_lock(&tick_broadcast_lock);
+       cpu = smp_processor_id();
+       bc = tick_broadcast_device.evtdev;
        bc_stopped = cpumask_empty(tick_broadcast_mask);
 
-       switch (*reason) {
-       case CLOCK_EVT_NOTIFY_BROADCAST_ON:
-       case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
+       switch (mode) {
+       case TICK_BROADCAST_FORCE:
+               tick_broadcast_forced = 1;
+       case TICK_BROADCAST_ON:
                cpumask_set_cpu(cpu, tick_broadcast_on);
                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
                        if (tick_broadcast_device.mode ==
                            TICKDEV_MODE_PERIODIC)
                                clockevents_shutdown(dev);
                }
-               if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
-                       tick_broadcast_force = 1;
                break;
-       case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
-               if (tick_broadcast_force)
+
+       case TICK_BROADCAST_OFF:
+               if (tick_broadcast_forced)
                        break;
                cpumask_clear_cpu(cpu, tick_broadcast_on);
                if (!tick_device_is_functional(dev))
@@ -388,22 +395,9 @@ static void tick_do_broadcast_on_off(unsigned long *reason)
                else
                        tick_broadcast_setup_oneshot(bc);
        }
-out:
-       raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
-}
-
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop.
- */
-void tick_broadcast_on_off(unsigned long reason, int *oncpu)
-{
-       if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
-               printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
-                      "offline CPU #%d\n", *oncpu);
-       else
-               tick_do_broadcast_on_off(&reason);
+       raw_spin_unlock(&tick_broadcast_lock);
 }
+EXPORT_SYMBOL_GPL(tick_broadcast_control);
 
 /*
  * Set the periodic handler depending on broadcast on/off
@@ -416,14 +410,14 @@ void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
                dev->event_handler = tick_handle_periodic_broadcast;
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * Remove a CPU from broadcasting
  */
-void tick_shutdown_broadcast(unsigned int *cpup)
+void tick_shutdown_broadcast(unsigned int cpu)
 {
        struct clock_event_device *bc;
        unsigned long flags;
-       unsigned int cpu = *cpup;
 
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -438,6 +432,7 @@ void tick_shutdown_broadcast(unsigned int *cpup)
 
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
+#endif
 
 void tick_suspend_broadcast(void)
 {
@@ -453,38 +448,48 @@ void tick_suspend_broadcast(void)
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
-int tick_resume_broadcast(void)
+/*
+ * This is called from tick_resume_local() on a resuming CPU. That's
+ * called from the core resume function, tick_unfreeze() and the magic XEN
+ * resume hackery.
+ *
+ * In none of these cases the broadcast device mode can change and the
+ * bit of the resuming CPU in the broadcast mask is safe as well.
+ */
+bool tick_resume_check_broadcast(void)
+{
+       if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT)
+               return false;
+       else
+               return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask);
+}
+
+void tick_resume_broadcast(void)
 {
        struct clock_event_device *bc;
        unsigned long flags;
-       int broadcast = 0;
 
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
        bc = tick_broadcast_device.evtdev;
 
        if (bc) {
-               clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
+               clockevents_tick_resume(bc);
 
                switch (tick_broadcast_device.mode) {
                case TICKDEV_MODE_PERIODIC:
                        if (!cpumask_empty(tick_broadcast_mask))
                                tick_broadcast_start_periodic(bc);
-                       broadcast = cpumask_test_cpu(smp_processor_id(),
-                                                    tick_broadcast_mask);
                        break;
                case TICKDEV_MODE_ONESHOT:
                        if (!cpumask_empty(tick_broadcast_mask))
-                               broadcast = tick_resume_broadcast_oneshot(bc);
+                               tick_resume_broadcast_oneshot(bc);
                        break;
                }
        }
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
-
-       return broadcast;
 }
 
-
 #ifdef CONFIG_TICK_ONESHOT
 
 static cpumask_var_t tick_broadcast_oneshot_mask;
@@ -532,8 +537,8 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
 {
        int ret;
 
-       if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
-               clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+       if (bc->state != CLOCK_EVT_STATE_ONESHOT)
+               clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 
        ret = clockevents_program_event(bc, expires, force);
        if (!ret)
@@ -541,10 +546,9 @@ static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
        return ret;
 }
 
-int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
+static void tick_resume_broadcast_oneshot(struct clock_event_device *bc)
 {
-       clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
-       return 0;
+       clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
 }
 
 /*
@@ -562,8 +566,8 @@ void tick_check_oneshot_broadcast_this_cpu(void)
                 * switched over, leave the device alone.
                 */
                if (td->mode == TICKDEV_MODE_ONESHOT) {
-                       clockevents_set_mode(td->evtdev,
-                                            CLOCK_EVT_MODE_ONESHOT);
+                       clockevents_set_state(td->evtdev,
+                                             CLOCK_EVT_STATE_ONESHOT);
                }
        }
 }
@@ -666,31 +670,26 @@ static void broadcast_shutdown_local(struct clock_event_device *bc,
                if (dev->next_event.tv64 < bc->next_event.tv64)
                        return;
        }
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN);
 }
 
-static void broadcast_move_bc(int deadcpu)
-{
-       struct clock_event_device *bc = tick_broadcast_device.evtdev;
-
-       if (!bc || !broadcast_needs_cpu(bc, deadcpu))
-               return;
-       /* This moves the broadcast assignment to this cpu */
-       clockevents_program_event(bc, bc->next_event, 1);
-}
-
-/*
- * Powerstate information: The system enters/leaves a state, where
- * affected devices might stop
+/**
+ * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode
+ * @state:     The target state (enter/exit)
+ *
+ * The system enters/leaves a state, where affected devices might stop
  * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups.
+ *
+ * Called with interrupts disabled, so clockevents_lock is not
+ * required here because the local clock event device cannot go away
+ * under us.
  */
-int tick_broadcast_oneshot_control(unsigned long reason)
+int tick_broadcast_oneshot_control(enum tick_broadcast_state state)
 {
        struct clock_event_device *bc, *dev;
        struct tick_device *td;
-       unsigned long flags;
-       ktime_t now;
        int cpu, ret = 0;
+       ktime_t now;
 
        /*
         * Periodic mode does not care about the enter/exit of power
@@ -703,17 +702,17 @@ int tick_broadcast_oneshot_control(unsigned long reason)
         * We are called with preemtion disabled from the depth of the
         * idle code, so we can't be moved away.
         */
-       cpu = smp_processor_id();
-       td = &per_cpu(tick_cpu_device, cpu);
+       td = this_cpu_ptr(&tick_cpu_device);
        dev = td->evtdev;
 
        if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
                return 0;
 
+       raw_spin_lock(&tick_broadcast_lock);
        bc = tick_broadcast_device.evtdev;
+       cpu = smp_processor_id();
 
-       raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-       if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
+       if (state == TICK_BROADCAST_ENTER) {
                if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
                        WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
                        broadcast_shutdown_local(bc, dev);
@@ -741,7 +740,7 @@ int tick_broadcast_oneshot_control(unsigned long reason)
                        cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
        } else {
                if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
-                       clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+                       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
                        /*
                         * The cpu which was handling the broadcast
                         * timer marked this cpu in the broadcast
@@ -805,9 +804,10 @@ int tick_broadcast_oneshot_control(unsigned long reason)
                }
        }
 out:
-       raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+       raw_spin_unlock(&tick_broadcast_lock);
        return ret;
 }
+EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control);
 
 /*
  * Reset the one shot broadcast for a cpu
@@ -842,7 +842,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
 
        /* Set it up only once ! */
        if (bc->event_handler != tick_handle_oneshot_broadcast) {
-               int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
+               int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC;
 
                bc->event_handler = tick_handle_oneshot_broadcast;
 
@@ -858,7 +858,7 @@ void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
                           tick_broadcast_oneshot_mask, tmpmask);
 
                if (was_periodic && !cpumask_empty(tmpmask)) {
-                       clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
+                       clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT);
                        tick_broadcast_init_next_event(tmpmask,
                                                       tick_next_period);
                        tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
@@ -894,14 +894,28 @@ void tick_broadcast_switch_to_oneshot(void)
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+void hotplug_cpu__broadcast_tick_pull(int deadcpu)
+{
+       struct clock_event_device *bc;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
+       bc = tick_broadcast_device.evtdev;
+
+       if (bc && broadcast_needs_cpu(bc, deadcpu)) {
+               /* This moves the broadcast assignment to this CPU: */
+               clockevents_program_event(bc, bc->next_event, 1);
+       }
+       raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+}
 
 /*
  * Remove a dead CPU from broadcasting
  */
-void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
+void tick_shutdown_broadcast_oneshot(unsigned int cpu)
 {
        unsigned long flags;
-       unsigned int cpu = *cpup;
 
        raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
 
@@ -913,10 +927,9 @@ void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
        cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
        cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
 
-       broadcast_move_bc(cpu);
-
        raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
 }
+#endif
 
 /*
  * Check, whether the broadcast device is in one shot mode
index f7c5155..3ae6afa 100644 (file)
@@ -102,7 +102,7 @@ void tick_handle_periodic(struct clock_event_device *dev)
 
        tick_periodic(cpu);
 
-       if (dev->mode != CLOCK_EVT_MODE_ONESHOT)
+       if (dev->state != CLOCK_EVT_STATE_ONESHOT)
                return;
        for (;;) {
                /*
@@ -140,7 +140,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 
        if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) &&
            !tick_broadcast_oneshot_active()) {
-               clockevents_set_mode(dev, CLOCK_EVT_MODE_PERIODIC);
+               clockevents_set_state(dev, CLOCK_EVT_STATE_PERIODIC);
        } else {
                unsigned long seq;
                ktime_t next;
@@ -150,7 +150,7 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
                        next = tick_next_period;
                } while (read_seqretry(&jiffies_lock, seq));
 
-               clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+               clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
 
                for (;;) {
                        if (!clockevents_program_event(dev, next, false))
@@ -332,14 +332,16 @@ out_bc:
        tick_install_broadcast_device(newdev);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
 /*
  * Transfer the do_timer job away from a dying cpu.
  *
- * Called with interrupts disabled.
+ * Called with interrupts disabled. Not locking required. If
+ * tick_do_timer_cpu is owned by this cpu, nothing can change it.
  */
-void tick_handover_do_timer(int *cpup)
+void tick_handover_do_timer(void)
 {
-       if (*cpup == tick_do_timer_cpu) {
+       if (tick_do_timer_cpu == smp_processor_id()) {
                int cpu = cpumask_first(cpu_online_mask);
 
                tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu :
@@ -354,9 +356,9 @@ void tick_handover_do_timer(int *cpup)
  * access the hardware device itself.
  * We just set the mode and remove it from the lists.
  */
-void tick_shutdown(unsigned int *cpup)
+void tick_shutdown(unsigned int cpu)
 {
-       struct tick_device *td = &per_cpu(tick_cpu_device, *cpup);
+       struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
        struct clock_event_device *dev = td->evtdev;
 
        td->mode = TICKDEV_MODE_PERIODIC;
@@ -365,27 +367,42 @@ void tick_shutdown(unsigned int *cpup)
                 * Prevent that the clock events layer tries to call
                 * the set mode function!
                 */
+               dev->state = CLOCK_EVT_STATE_DETACHED;
                dev->mode = CLOCK_EVT_MODE_UNUSED;
                clockevents_exchange_device(dev, NULL);
                dev->event_handler = clockevents_handle_noop;
                td->evtdev = NULL;
        }
 }
+#endif
 
-void tick_suspend(void)
+/**
+ * tick_suspend_local - Suspend the local tick device
+ *
+ * Called from the local cpu for freeze with interrupts disabled.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_suspend_local(void)
 {
        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
 
        clockevents_shutdown(td->evtdev);
 }
 
-void tick_resume(void)
+/**
+ * tick_resume_local - Resume the local tick device
+ *
+ * Called from the local CPU for unfreeze or XEN resume magic.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_resume_local(void)
 {
        struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
-       int broadcast = tick_resume_broadcast();
-
-       clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_RESUME);
+       bool broadcast = tick_resume_check_broadcast();
 
+       clockevents_tick_resume(td->evtdev);
        if (!broadcast) {
                if (td->mode == TICKDEV_MODE_PERIODIC)
                        tick_setup_periodic(td->evtdev, 0);
@@ -394,6 +411,35 @@ void tick_resume(void)
        }
 }
 
+/**
+ * tick_suspend - Suspend the tick and the broadcast device
+ *
+ * Called from syscore_suspend() via timekeeping_suspend with only one
+ * CPU online and interrupts disabled or from tick_unfreeze() under
+ * tick_freeze_lock.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_suspend(void)
+{
+       tick_suspend_local();
+       tick_suspend_broadcast();
+}
+
+/**
+ * tick_resume - Resume the tick and the broadcast device
+ *
+ * Called from syscore_resume() via timekeeping_resume with only one
+ * CPU online and interrupts disabled.
+ *
+ * No locks required. Nothing can change the per cpu device.
+ */
+void tick_resume(void)
+{
+       tick_resume_broadcast();
+       tick_resume_local();
+}
+
 static DEFINE_RAW_SPINLOCK(tick_freeze_lock);
 static unsigned int tick_freeze_depth;
 
@@ -411,12 +457,10 @@ void tick_freeze(void)
        raw_spin_lock(&tick_freeze_lock);
 
        tick_freeze_depth++;
-       if (tick_freeze_depth == num_online_cpus()) {
+       if (tick_freeze_depth == num_online_cpus())
                timekeeping_suspend();
-       } else {
-               tick_suspend();
-               tick_suspend_broadcast();
-       }
+       else
+               tick_suspend_local();
 
        raw_spin_unlock(&tick_freeze_lock);
 }
@@ -437,7 +481,7 @@ void tick_unfreeze(void)
        if (tick_freeze_depth == num_online_cpus())
                timekeeping_resume();
        else
-               tick_resume();
+               tick_resume_local();
 
        tick_freeze_depth--;
 
index 366aeb4..b64fdd8 100644 (file)
@@ -5,15 +5,12 @@
 #include <linux/tick.h>
 
 #include "timekeeping.h"
+#include "tick-sched.h"
 
-extern seqlock_t jiffies_lock;
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
 
-#define CS_NAME_LEN    32
-
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BUILD
-
-#define TICK_DO_TIMER_NONE     -1
-#define TICK_DO_TIMER_BOOT     -2
+# define TICK_DO_TIMER_NONE    -1
+# define TICK_DO_TIMER_BOOT    -2
 
 DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
 extern ktime_t tick_next_period;
@@ -23,21 +20,72 @@ extern int tick_do_timer_cpu __read_mostly;
 extern void tick_setup_periodic(struct clock_event_device *dev, int broadcast);
 extern void tick_handle_periodic(struct clock_event_device *dev);
 extern void tick_check_new_device(struct clock_event_device *dev);
-extern void tick_handover_do_timer(int *cpup);
-extern void tick_shutdown(unsigned int *cpup);
+extern void tick_shutdown(unsigned int cpu);
 extern void tick_suspend(void);
 extern void tick_resume(void);
 extern bool tick_check_replacement(struct clock_event_device *curdev,
                                   struct clock_event_device *newdev);
 extern void tick_install_replacement(struct clock_event_device *dev);
+extern int tick_is_oneshot_available(void);
+extern struct tick_device *tick_get_device(int cpu);
 
-extern void clockevents_shutdown(struct clock_event_device *dev);
+extern int clockevents_tick_resume(struct clock_event_device *dev);
+/* Check, if the device is functional or a dummy for broadcast */
+static inline int tick_device_is_functional(struct clock_event_device *dev)
+{
+       return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
+}
 
+extern void clockevents_shutdown(struct clock_event_device *dev);
+extern void clockevents_exchange_device(struct clock_event_device *old,
+                                       struct clock_event_device *new);
+extern void clockevents_set_state(struct clock_event_device *dev,
+                                enum clock_event_state state);
+extern int clockevents_program_event(struct clock_event_device *dev,
+                                    ktime_t expires, bool force);
+extern void clockevents_handle_noop(struct clock_event_device *dev);
+extern int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
 extern ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt);
 
-/*
- * NO_HZ / high resolution timer shared code
- */
+/* Broadcasting support */
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
+extern void tick_install_broadcast_device(struct clock_event_device *dev);
+extern int tick_is_broadcast_device(struct clock_event_device *dev);
+extern void tick_shutdown_broadcast(unsigned int cpu);
+extern void tick_suspend_broadcast(void);
+extern void tick_resume_broadcast(void);
+extern bool tick_resume_check_broadcast(void);
+extern void tick_broadcast_init(void);
+extern void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
+extern int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
+extern struct tick_device *tick_get_broadcast_device(void);
+extern struct cpumask *tick_get_broadcast_mask(void);
+# else /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST: */
+static inline void tick_install_broadcast_device(struct clock_event_device *dev) { }
+static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
+static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
+static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
+static inline void tick_shutdown_broadcast(unsigned int cpu) { }
+static inline void tick_suspend_broadcast(void) { }
+static inline void tick_resume_broadcast(void) { }
+static inline bool tick_resume_check_broadcast(void) { return false; }
+static inline void tick_broadcast_init(void) { }
+static inline int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) { return -ENODEV; }
+
+/* Set the periodic handler in non broadcast mode */
+static inline void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
+{
+       dev->event_handler = tick_handle_periodic;
+}
+# endif /* !CONFIG_GENERIC_CLOCKEVENTS_BROADCAST */
+
+#else /* !GENERIC_CLOCKEVENTS: */
+static inline void tick_suspend(void) { }
+static inline void tick_resume(void) { }
+#endif /* !GENERIC_CLOCKEVENTS */
+
+/* Oneshot related functions */
 #ifdef CONFIG_TICK_ONESHOT
 extern void tick_setup_oneshot(struct clock_event_device *newdev,
                               void (*handler)(struct clock_event_device *),
@@ -46,58 +94,42 @@ extern int tick_program_event(ktime_t expires, int force);
 extern void tick_oneshot_notify(void);
 extern int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *));
 extern void tick_resume_oneshot(void);
-# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+static inline bool tick_oneshot_possible(void) { return true; }
+extern int tick_oneshot_mode_active(void);
+extern void tick_clock_notify(void);
+extern int tick_check_oneshot_change(int allow_nohz);
+extern int tick_init_highres(void);
+#else /* !CONFIG_TICK_ONESHOT: */
+static inline
+void tick_setup_oneshot(struct clock_event_device *newdev,
+                       void (*handler)(struct clock_event_device *),
+                       ktime_t nextevt) { BUG(); }
+static inline void tick_resume_oneshot(void) { BUG(); }
+static inline int tick_program_event(ktime_t expires, int force) { return 0; }
+static inline void tick_oneshot_notify(void) { }
+static inline bool tick_oneshot_possible(void) { return false; }
+static inline int tick_oneshot_mode_active(void) { return 0; }
+static inline void tick_clock_notify(void) { }
+static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+#endif /* !CONFIG_TICK_ONESHOT */
+
+/* Functions related to oneshot broadcasting */
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
 extern void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
-extern int tick_broadcast_oneshot_control(unsigned long reason);
 extern void tick_broadcast_switch_to_oneshot(void);
-extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
-extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
 extern int tick_broadcast_oneshot_active(void);
 extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
-# else /* BROADCAST */
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
-{
-       BUG();
-}
-static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
+extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
+#else /* !(BROADCAST && ONESHOT): */
+static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
+static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
 static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
-static inline bool tick_broadcast_oneshot_available(void) { return true; }
-# endif /* !BROADCAST */
-
-#else /* !ONESHOT */
-static inline
-void tick_setup_oneshot(struct clock_event_device *newdev,
-                       void (*handler)(struct clock_event_device *),
-                       ktime_t nextevt)
-{
-       BUG();
-}
-static inline void tick_resume_oneshot(void)
-{
-       BUG();
-}
-static inline int tick_program_event(ktime_t expires, int force)
-{
-       return 0;
-}
-static inline void tick_oneshot_notify(void) { }
-static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
-{
-       BUG();
-}
-static inline int tick_broadcast_oneshot_control(unsigned long reason) { return 0; }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
-static inline int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
-{
-       return 0;
-}
-static inline int tick_broadcast_oneshot_active(void) { return 0; }
-static inline bool tick_broadcast_oneshot_available(void) { return false; }
-#endif /* !TICK_ONESHOT */
+static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
+#endif /* !(BROADCAST && ONESHOT) */
 
 /* NO_HZ_FULL internal */
 #ifdef CONFIG_NO_HZ_FULL
@@ -105,68 +137,3 @@ extern void tick_nohz_init(void);
 # else
 static inline void tick_nohz_init(void) { }
 #endif
-
-/*
- * Broadcasting support
- */
-#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
-extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
-extern void tick_install_broadcast_device(struct clock_event_device *dev);
-extern int tick_is_broadcast_device(struct clock_event_device *dev);
-extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
-extern void tick_shutdown_broadcast(unsigned int *cpup);
-extern void tick_suspend_broadcast(void);
-extern int tick_resume_broadcast(void);
-extern void tick_broadcast_init(void);
-extern void
-tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
-int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq);
-
-#else /* !BROADCAST */
-
-static inline void tick_install_broadcast_device(struct clock_event_device *dev)
-{
-}
-
-static inline int tick_is_broadcast_device(struct clock_event_device *dev)
-{
-       return 0;
-}
-static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
-                                            int cpu)
-{
-       return 0;
-}
-static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
-static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
-static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
-static inline void tick_suspend_broadcast(void) { }
-static inline int tick_resume_broadcast(void) { return 0; }
-static inline void tick_broadcast_init(void) { }
-static inline int tick_broadcast_update_freq(struct clock_event_device *dev,
-                                            u32 freq) { return -ENODEV; }
-
-/*
- * Set the periodic handler in non broadcast mode
- */
-static inline void tick_set_periodic_handler(struct clock_event_device *dev,
-                                            int broadcast)
-{
-       dev->event_handler = tick_handle_periodic;
-}
-#endif /* !BROADCAST */
-
-/*
- * Check, if the device is functional or a dummy for broadcast
- */
-static inline int tick_device_is_functional(struct clock_event_device *dev)
-{
-       return !(dev->features & CLOCK_EVT_FEAT_DUMMY);
-}
-
-int __clockevents_update_freq(struct clock_event_device *dev, u32 freq);
-
-#endif
-
-extern void do_timer(unsigned long ticks);
-extern void update_wall_time(void);
index 7ce740e..67a64b1 100644 (file)
@@ -38,7 +38,7 @@ void tick_resume_oneshot(void)
 {
        struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
 
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
        clockevents_program_event(dev, ktime_get(), true);
 }
 
@@ -50,7 +50,7 @@ void tick_setup_oneshot(struct clock_event_device *newdev,
                        ktime_t next_event)
 {
        newdev->event_handler = handler;
-       clockevents_set_mode(newdev, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(newdev, CLOCK_EVT_STATE_ONESHOT);
        clockevents_program_event(newdev, next_event, true);
 }
 
@@ -81,7 +81,7 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))
 
        td->mode = TICKDEV_MODE_ONESHOT;
        dev->event_handler = handler;
-       clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
+       clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT);
        tick_broadcast_switch_to_oneshot();
        return 0;
 }
index a4c4eda..9142591 100644 (file)
@@ -34,7 +34,7 @@
 /*
  * Per cpu nohz control structure
  */
-DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
 
 /*
  * The time, when the last jiffy update happened. Protected by jiffies_lock.
@@ -416,6 +416,11 @@ static int __init setup_tick_nohz(char *str)
 
 __setup("nohz=", setup_tick_nohz);
 
+int tick_nohz_tick_stopped(void)
+{
+       return __this_cpu_read(tick_cpu_sched.tick_stopped);
+}
+
 /**
  * tick_nohz_update_jiffies - update jiffies when idle was interrupted
  *
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
new file mode 100644 (file)
index 0000000..28b5da3
--- /dev/null
@@ -0,0 +1,74 @@
+#ifndef _TICK_SCHED_H
+#define _TICK_SCHED_H
+
+#include <linux/hrtimer.h>
+
+enum tick_device_mode {
+       TICKDEV_MODE_PERIODIC,
+       TICKDEV_MODE_ONESHOT,
+};
+
+struct tick_device {
+       struct clock_event_device *evtdev;
+       enum tick_device_mode mode;
+};
+
+enum tick_nohz_mode {
+       NOHZ_MODE_INACTIVE,
+       NOHZ_MODE_LOWRES,
+       NOHZ_MODE_HIGHRES,
+};
+
+/**
+ * struct tick_sched - sched tick emulation and no idle tick control/stats
+ * @sched_timer:       hrtimer to schedule the periodic tick in high
+ *                     resolution mode
+ * @last_tick:         Store the last tick expiry time when the tick
+ *                     timer is modified for nohz sleeps. This is necessary
+ *                     to resume the tick timer operation in the timeline
+ *                     when the CPU returns from nohz sleep.
+ * @tick_stopped:      Indicator that the idle tick has been stopped
+ * @idle_jiffies:      jiffies at the entry to idle for idle time accounting
+ * @idle_calls:                Total number of idle calls
+ * @idle_sleeps:       Number of idle calls, where the sched tick was stopped
+ * @idle_entrytime:    Time when the idle call was entered
+ * @idle_waketime:     Time when the idle was interrupted
+ * @idle_exittime:     Time when the idle state was left
+ * @idle_sleeptime:    Sum of the time slept in idle with sched tick stopped
+ * @iowait_sleeptime:  Sum of the time slept in idle with sched tick stopped, with IO outstanding
+ * @sleep_length:      Duration of the current idle sleep
+ * @do_timer_lst:      CPU was the last one doing do_timer before going idle
+ */
+struct tick_sched {
+       struct hrtimer                  sched_timer;
+       unsigned long                   check_clocks;
+       enum tick_nohz_mode             nohz_mode;
+       ktime_t                         last_tick;
+       int                             inidle;
+       int                             tick_stopped;
+       unsigned long                   idle_jiffies;
+       unsigned long                   idle_calls;
+       unsigned long                   idle_sleeps;
+       int                             idle_active;
+       ktime_t                         idle_entrytime;
+       ktime_t                         idle_waketime;
+       ktime_t                         idle_exittime;
+       ktime_t                         idle_sleeptime;
+       ktime_t                         iowait_sleeptime;
+       ktime_t                         sleep_length;
+       unsigned long                   last_jiffies;
+       unsigned long                   next_jiffies;
+       ktime_t                         idle_expires;
+       int                             do_timer_last;
+};
+
+extern struct tick_sched *tick_get_tick_sched(int cpu);
+
+extern void tick_setup_sched_timer(void);
+#if defined CONFIG_NO_HZ_COMMON || defined CONFIG_HIGH_RES_TIMERS
+extern void tick_cancel_sched_timer(int cpu);
+#else
+static inline void tick_cancel_sched_timer(int cpu) { }
+#endif
+
+#endif
index 91db941..946acb7 100644 (file)
@@ -59,17 +59,15 @@ struct tk_fast {
 };
 
 static struct tk_fast tk_fast_mono ____cacheline_aligned;
+static struct tk_fast tk_fast_raw  ____cacheline_aligned;
 
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
-/* Flag for if there is a persistent clock on this platform */
-bool __read_mostly persistent_clock_exist = false;
-
 static inline void tk_normalize_xtime(struct timekeeper *tk)
 {
-       while (tk->tkr.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr.shift)) {
-               tk->tkr.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr.shift;
+       while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
+               tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
                tk->xtime_sec++;
        }
 }
@@ -79,20 +77,20 @@ static inline struct timespec64 tk_xtime(struct timekeeper *tk)
        struct timespec64 ts;
 
        ts.tv_sec = tk->xtime_sec;
-       ts.tv_nsec = (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+       ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        return ts;
 }
 
 static void tk_set_xtime(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec = ts->tv_sec;
-       tk->tkr.xtime_nsec = (u64)ts->tv_nsec << tk->tkr.shift;
+       tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
 }
 
 static void tk_xtime_add(struct timekeeper *tk, const struct timespec64 *ts)
 {
        tk->xtime_sec += ts->tv_sec;
-       tk->tkr.xtime_nsec += (u64)ts->tv_nsec << tk->tkr.shift;
+       tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
        tk_normalize_xtime(tk);
 }
 
@@ -118,6 +116,117 @@ static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
        tk->offs_boot = ktime_add(tk->offs_boot, delta);
 }
 
+#ifdef CONFIG_DEBUG_TIMEKEEPING
+#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */
+/*
+ * These simple flag variables are managed
+ * without locks, which is racy, but ok since
+ * we don't really care about being super
+ * precise about how many events were seen,
+ * just that a problem was observed.
+ */
+static int timekeeping_underflow_seen;
+static int timekeeping_overflow_seen;
+
+/* last_warning is only modified under the timekeeping lock */
+static long timekeeping_last_warning;
+
+static void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+
+       cycle_t max_cycles = tk->tkr_mono.clock->max_cycles;
+       const char *name = tk->tkr_mono.clock->name;
+
+       if (offset > max_cycles) {
+               printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
+                               offset, name, max_cycles);
+               printk_deferred("         timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
+       } else {
+               if (offset > (max_cycles >> 1)) {
+                       printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the the '%s' clock's 50%% safety margin (%lld)\n",
+                                       offset, name, max_cycles >> 1);
+                       printk_deferred("      timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
+               }
+       }
+
+       if (timekeeping_underflow_seen) {
+               if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+                       printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
+                       printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+                       printk_deferred("         Your kernel is probably still fine.\n");
+                       timekeeping_last_warning = jiffies;
+               }
+               timekeeping_underflow_seen = 0;
+       }
+
+       if (timekeeping_overflow_seen) {
+               if (jiffies - timekeeping_last_warning > WARNING_FREQ) {
+                       printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
+                       printk_deferred("         Please report this, consider using a different clocksource, if possible.\n");
+                       printk_deferred("         Your kernel is probably still fine.\n");
+                       timekeeping_last_warning = jiffies;
+               }
+               timekeeping_overflow_seen = 0;
+       }
+}
+
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+       cycle_t now, last, mask, max, delta;
+       unsigned int seq;
+
+       /*
+        * Since we're called holding a seqlock, the data may shift
+        * under us while we're doing the calculation. This can cause
+        * false positives, since we'd note a problem but throw the
+        * results away. So nest another seqlock here to atomically
+        * grab the points we are checking with.
+        */
+       do {
+               seq = read_seqcount_begin(&tk_core.seq);
+               now = tkr->read(tkr->clock);
+               last = tkr->cycle_last;
+               mask = tkr->mask;
+               max = tkr->clock->max_cycles;
+       } while (read_seqcount_retry(&tk_core.seq, seq));
+
+       delta = clocksource_delta(now, last, mask);
+
+       /*
+        * Try to catch underflows by checking if we are seeing small
+        * mask-relative negative values.
+        */
+       if (unlikely((~delta & mask) < (mask >> 3))) {
+               timekeeping_underflow_seen = 1;
+               delta = 0;
+       }
+
+       /* Cap delta value to the max_cycles values to avoid mult overflows */
+       if (unlikely(delta > max)) {
+               timekeeping_overflow_seen = 1;
+               delta = tkr->clock->max_cycles;
+       }
+
+       return delta;
+}
+#else
+static inline void timekeeping_check_update(struct timekeeper *tk, cycle_t offset)
+{
+}
+static inline cycle_t timekeeping_get_delta(struct tk_read_base *tkr)
+{
+       cycle_t cycle_now, delta;
+
+       /* read clocksource */
+       cycle_now = tkr->read(tkr->clock);
+
+       /* calculate the delta since the last update_wall_time */
+       delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+
+       return delta;
+}
+#endif
+
 /**
  * tk_setup_internals - Set up internals to use clocksource clock.
  *
@@ -135,11 +244,16 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        u64 tmp, ntpinterval;
        struct clocksource *old_clock;
 
-       old_clock = tk->tkr.clock;
-       tk->tkr.clock = clock;
-       tk->tkr.read = clock->read;
-       tk->tkr.mask = clock->mask;
-       tk->tkr.cycle_last = tk->tkr.read(clock);
+       old_clock = tk->tkr_mono.clock;
+       tk->tkr_mono.clock = clock;
+       tk->tkr_mono.read = clock->read;
+       tk->tkr_mono.mask = clock->mask;
+       tk->tkr_mono.cycle_last = tk->tkr_mono.read(clock);
+
+       tk->tkr_raw.clock = clock;
+       tk->tkr_raw.read = clock->read;
+       tk->tkr_raw.mask = clock->mask;
+       tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
 
        /* Do the ns -> cycle conversion first, using original mult */
        tmp = NTP_INTERVAL_LENGTH;
@@ -163,11 +277,14 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
        if (old_clock) {
                int shift_change = clock->shift - old_clock->shift;
                if (shift_change < 0)
-                       tk->tkr.xtime_nsec >>= -shift_change;
+                       tk->tkr_mono.xtime_nsec >>= -shift_change;
                else
-                       tk->tkr.xtime_nsec <<= shift_change;
+                       tk->tkr_mono.xtime_nsec <<= shift_change;
        }
-       tk->tkr.shift = clock->shift;
+       tk->tkr_raw.xtime_nsec = 0;
+
+       tk->tkr_mono.shift = clock->shift;
+       tk->tkr_raw.shift = clock->shift;
 
        tk->ntp_error = 0;
        tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
@@ -178,7 +295,8 @@ static void tk_setup_internals(struct timekeeper *tk, struct clocksource *clock)
         * active clocksource. These value will be adjusted via NTP
         * to counteract clock drifting.
         */
-       tk->tkr.mult = clock->mult;
+       tk->tkr_mono.mult = clock->mult;
+       tk->tkr_raw.mult = clock->mult;
        tk->ntp_err_mult = 0;
 }
 
@@ -193,14 +311,10 @@ static inline u32 arch_gettimeoffset(void) { return 0; }
 
 static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
 {
-       cycle_t cycle_now, delta;
+       cycle_t delta;
        s64 nsec;
 
-       /* read clocksource: */
-       cycle_now = tkr->read(tkr->clock);
-
-       /* calculate the delta since the last update_wall_time: */
-       delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
+       delta = timekeeping_get_delta(tkr);
 
        nsec = delta * tkr->mult + tkr->xtime_nsec;
        nsec >>= tkr->shift;
@@ -209,25 +323,6 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr)
        return nsec + arch_gettimeoffset();
 }
 
-static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
-{
-       struct clocksource *clock = tk->tkr.clock;
-       cycle_t cycle_now, delta;
-       s64 nsec;
-
-       /* read clocksource: */
-       cycle_now = tk->tkr.read(clock);
-
-       /* calculate the delta since the last update_wall_time: */
-       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-
-       /* convert delta to nanoseconds. */
-       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
-
-       /* If arch requires, add in get_arch_timeoffset() */
-       return nsec + arch_gettimeoffset();
-}
-
 /**
  * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
  * @tkr: Timekeeping readout base from which we take the update
@@ -267,18 +362,18 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
  * slightly wrong timestamp (a few nanoseconds). See
  * @ktime_get_mono_fast_ns.
  */
-static void update_fast_timekeeper(struct tk_read_base *tkr)
+static void update_fast_timekeeper(struct tk_read_base *tkr, struct tk_fast *tkf)
 {
-       struct tk_read_base *base = tk_fast_mono.base;
+       struct tk_read_base *base = tkf->base;
 
        /* Force readers off to base[1] */
-       raw_write_seqcount_latch(&tk_fast_mono.seq);
+       raw_write_seqcount_latch(&tkf->seq);
 
        /* Update base[0] */
        memcpy(base, tkr, sizeof(*base));
 
        /* Force readers back to base[0] */
-       raw_write_seqcount_latch(&tk_fast_mono.seq);
+       raw_write_seqcount_latch(&tkf->seq);
 
        /* Update base[1] */
        memcpy(base + 1, base, sizeof(*base));
@@ -316,22 +411,33 @@ static void update_fast_timekeeper(struct tk_read_base *tkr)
  * of the following timestamps. Callers need to be aware of that and
  * deal with it.
  */
-u64 notrace ktime_get_mono_fast_ns(void)
+static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
 {
        struct tk_read_base *tkr;
        unsigned int seq;
        u64 now;
 
        do {
-               seq = raw_read_seqcount(&tk_fast_mono.seq);
-               tkr = tk_fast_mono.base + (seq & 0x01);
-               now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr);
+               seq = raw_read_seqcount(&tkf->seq);
+               tkr = tkf->base + (seq & 0x01);
+               now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr);
+       } while (read_seqcount_retry(&tkf->seq, seq));
 
-       } while (read_seqcount_retry(&tk_fast_mono.seq, seq));
        return now;
 }
+
+u64 ktime_get_mono_fast_ns(void)
+{
+       return __ktime_get_fast_ns(&tk_fast_mono);
+}
 EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
 
+u64 ktime_get_raw_fast_ns(void)
+{
+       return __ktime_get_fast_ns(&tk_fast_raw);
+}
+EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
+
 /* Suspend-time cycles value for halted fast timekeeper. */
 static cycle_t cycles_at_suspend;
 
@@ -353,12 +459,17 @@ static cycle_t dummy_clock_read(struct clocksource *cs)
 static void halt_fast_timekeeper(struct timekeeper *tk)
 {
        static struct tk_read_base tkr_dummy;
-       struct tk_read_base *tkr = &tk->tkr;
+       struct tk_read_base *tkr = &tk->tkr_mono;
 
        memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
        cycles_at_suspend = tkr->read(tkr->clock);
        tkr_dummy.read = dummy_clock_read;
-       update_fast_timekeeper(&tkr_dummy);
+       update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
+
+       tkr = &tk->tkr_raw;
+       memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
+       tkr_dummy.read = dummy_clock_read;
+       update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
 }
 
 #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
@@ -369,8 +480,8 @@ static inline void update_vsyscall(struct timekeeper *tk)
 
        xt = timespec64_to_timespec(tk_xtime(tk));
        wm = timespec64_to_timespec(tk->wall_to_monotonic);
-       update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
-                           tk->tkr.cycle_last);
+       update_vsyscall_old(&xt, &wm, tk->tkr_mono.clock, tk->tkr_mono.mult,
+                           tk->tkr_mono.cycle_last);
 }
 
 static inline void old_vsyscall_fixup(struct timekeeper *tk)
@@ -387,11 +498,11 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
        * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
        * users are removed, this can be killed.
        */
-       remainder = tk->tkr.xtime_nsec & ((1ULL << tk->tkr.shift) - 1);
-       tk->tkr.xtime_nsec -= remainder;
-       tk->tkr.xtime_nsec += 1ULL << tk->tkr.shift;
+       remainder = tk->tkr_mono.xtime_nsec & ((1ULL << tk->tkr_mono.shift) - 1);
+       tk->tkr_mono.xtime_nsec -= remainder;
+       tk->tkr_mono.xtime_nsec += 1ULL << tk->tkr_mono.shift;
        tk->ntp_error += remainder << tk->ntp_error_shift;
-       tk->ntp_error -= (1ULL << tk->tkr.shift) << tk->ntp_error_shift;
+       tk->ntp_error -= (1ULL << tk->tkr_mono.shift) << tk->ntp_error_shift;
 }
 #else
 #define old_vsyscall_fixup(tk)
@@ -456,17 +567,17 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
         */
        seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
        nsec = (u32) tk->wall_to_monotonic.tv_nsec;
-       tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
+       tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 
        /* Update the monotonic raw base */
-       tk->base_raw = timespec64_to_ktime(tk->raw_time);
+       tk->tkr_raw.base = timespec64_to_ktime(tk->raw_time);
 
        /*
         * The sum of the nanoseconds portions of xtime and
         * wall_to_monotonic can be greater/equal one second. Take
         * this into account before updating tk->ktime_sec.
         */
-       nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+       nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
        if (nsec >= NSEC_PER_SEC)
                seconds++;
        tk->ktime_sec = seconds;
@@ -489,7 +600,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
                memcpy(&shadow_timekeeper, &tk_core.timekeeper,
                       sizeof(tk_core.timekeeper));
 
-       update_fast_timekeeper(&tk->tkr);
+       update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
+       update_fast_timekeeper(&tk->tkr_raw,  &tk_fast_raw);
 }
 
 /**
@@ -501,22 +613,23 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action)
  */
 static void timekeeping_forward_now(struct timekeeper *tk)
 {
-       struct clocksource *clock = tk->tkr.clock;
+       struct clocksource *clock = tk->tkr_mono.clock;
        cycle_t cycle_now, delta;
        s64 nsec;
 
-       cycle_now = tk->tkr.read(clock);
-       delta = clocksource_delta(cycle_now, tk->tkr.cycle_last, tk->tkr.mask);
-       tk->tkr.cycle_last = cycle_now;
+       cycle_now = tk->tkr_mono.read(clock);
+       delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
+       tk->tkr_mono.cycle_last = cycle_now;
+       tk->tkr_raw.cycle_last  = cycle_now;
 
-       tk->tkr.xtime_nsec += delta * tk->tkr.mult;
+       tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
 
        /* If arch requires, add in get_arch_timeoffset() */
-       tk->tkr.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr.shift;
+       tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
 
        tk_normalize_xtime(tk);
 
-       nsec = clocksource_cyc2ns(delta, clock->mult, clock->shift);
+       nsec = clocksource_cyc2ns(delta, tk->tkr_raw.mult, tk->tkr_raw.shift);
        timespec64_add_ns(&tk->raw_time, nsec);
 }
 
@@ -537,7 +650,7 @@ int __getnstimeofday64(struct timespec64 *ts)
                seq = read_seqcount_begin(&tk_core.seq);
 
                ts->tv_sec = tk->xtime_sec;
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -577,8 +690,8 @@ ktime_t ktime_get(void)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               base = tk->tkr.base_mono;
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               base = tk->tkr_mono.base;
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -603,8 +716,8 @@ ktime_t ktime_get_with_offset(enum tk_offsets offs)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               base = ktime_add(tk->tkr.base_mono, *offset);
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               base = ktime_add(tk->tkr_mono.base, *offset);
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -645,8 +758,8 @@ ktime_t ktime_get_raw(void)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               base = tk->base_raw;
-               nsecs = timekeeping_get_ns_raw(tk);
+               base = tk->tkr_raw.base;
+               nsecs = timekeeping_get_ns(&tk->tkr_raw);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -674,7 +787,7 @@ void ktime_get_ts64(struct timespec64 *ts)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
                ts->tv_sec = tk->xtime_sec;
-               nsec = timekeeping_get_ns(&tk->tkr);
+               nsec = timekeeping_get_ns(&tk->tkr_mono);
                tomono = tk->wall_to_monotonic;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -759,8 +872,8 @@ void getnstime_raw_and_real(struct timespec *ts_raw, struct timespec *ts_real)
                ts_real->tv_sec = tk->xtime_sec;
                ts_real->tv_nsec = 0;
 
-               nsecs_raw = timekeeping_get_ns_raw(tk);
-               nsecs_real = timekeeping_get_ns(&tk->tkr);
+               nsecs_raw  = timekeeping_get_ns(&tk->tkr_raw);
+               nsecs_real = timekeeping_get_ns(&tk->tkr_mono);
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -943,7 +1056,7 @@ static int change_clocksource(void *data)
         */
        if (try_module_get(new->owner)) {
                if (!new->enable || new->enable(new) == 0) {
-                       old = tk->tkr.clock;
+                       old = tk->tkr_mono.clock;
                        tk_setup_internals(tk, new);
                        if (old->disable)
                                old->disable(old);
@@ -971,11 +1084,11 @@ int timekeeping_notify(struct clocksource *clock)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
 
-       if (tk->tkr.clock == clock)
+       if (tk->tkr_mono.clock == clock)
                return 0;
        stop_machine(change_clocksource, clock, NULL);
        tick_clock_notify();
-       return tk->tkr.clock == clock ? 0 : -1;
+       return tk->tkr_mono.clock == clock ? 0 : -1;
 }
 
 /**
@@ -993,7 +1106,7 @@ void getrawmonotonic64(struct timespec64 *ts)
 
        do {
                seq = read_seqcount_begin(&tk_core.seq);
-               nsecs = timekeeping_get_ns_raw(tk);
+               nsecs = timekeeping_get_ns(&tk->tkr_raw);
                ts64 = tk->raw_time;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
@@ -1016,7 +1129,7 @@ int timekeeping_valid_for_hres(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               ret = tk->tkr.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
+               ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1035,7 +1148,7 @@ u64 timekeeping_max_deferment(void)
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               ret = tk->tkr.clock->max_idle_ns;
+               ret = tk->tkr_mono.clock->max_idle_ns;
 
        } while (read_seqcount_retry(&tk_core.seq, seq));
 
@@ -1057,6 +1170,14 @@ void __weak read_persistent_clock(struct timespec *ts)
        ts->tv_nsec = 0;
 }
 
+void __weak read_persistent_clock64(struct timespec64 *ts64)
+{
+       struct timespec ts;
+
+       read_persistent_clock(&ts);
+       *ts64 = timespec_to_timespec64(ts);
+}
+
 /**
  * read_boot_clock -  Return time of the system start.
  *
@@ -1072,6 +1193,20 @@ void __weak read_boot_clock(struct timespec *ts)
        ts->tv_nsec = 0;
 }
 
+void __weak read_boot_clock64(struct timespec64 *ts64)
+{
+       struct timespec ts;
+
+       read_boot_clock(&ts);
+       *ts64 = timespec_to_timespec64(ts);
+}
+
+/* Flag for if timekeeping_resume() has injected sleeptime */
+static bool sleeptime_injected;
+
+/* Flag for if there is a persistent clock on this platform */
+static bool persistent_clock_exists;
+
 /*
  * timekeeping_init - Initializes the clocksource and common timekeeping values
  */
@@ -1081,20 +1216,17 @@ void __init timekeeping_init(void)
        struct clocksource *clock;
        unsigned long flags;
        struct timespec64 now, boot, tmp;
-       struct timespec ts;
 
-       read_persistent_clock(&ts);
-       now = timespec_to_timespec64(ts);
+       read_persistent_clock64(&now);
        if (!timespec64_valid_strict(&now)) {
                pr_warn("WARNING: Persistent clock returned invalid value!\n"
                        "         Check your CMOS/BIOS settings.\n");
                now.tv_sec = 0;
                now.tv_nsec = 0;
        } else if (now.tv_sec || now.tv_nsec)
-               persistent_clock_exist = true;
+               persistent_clock_exists = true;
 
-       read_boot_clock(&ts);
-       boot = timespec_to_timespec64(ts);
+       read_boot_clock64(&boot);
        if (!timespec64_valid_strict(&boot)) {
                pr_warn("WARNING: Boot clock returned invalid value!\n"
                        "         Check your CMOS/BIOS settings.\n");
@@ -1114,7 +1246,6 @@ void __init timekeeping_init(void)
        tk_set_xtime(tk, &now);
        tk->raw_time.tv_sec = 0;
        tk->raw_time.tv_nsec = 0;
-       tk->base_raw.tv64 = 0;
        if (boot.tv_sec == 0 && boot.tv_nsec == 0)
                boot = tk_xtime(tk);
 
@@ -1127,7 +1258,7 @@ void __init timekeeping_init(void)
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 }
 
-/* time in seconds when suspend began */
+/* time in seconds when suspend began for persistent clock */
 static struct timespec64 timekeeping_suspend_time;
 
 /**
@@ -1152,12 +1283,49 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
        tk_debug_account_sleep_time(delta);
 }
 
+#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
+/**
+ * We have three kinds of time sources to use for sleep time
+ * injection, the preference order is:
+ * 1) non-stop clocksource
+ * 2) persistent clock (ie: RTC accessible when irqs are off)
+ * 3) RTC
+ *
+ * 1) and 2) are used by timekeeping, 3) by RTC subsystem.
+ * If system has neither 1) nor 2), 3) will be used finally.
+ *
+ *
+ * If timekeeping has injected sleeptime via either 1) or 2),
+ * 3) becomes needless, so in this case we don't need to call
+ * rtc_resume(), and this is what timekeeping_rtc_skipresume()
+ * means.
+ */
+bool timekeeping_rtc_skipresume(void)
+{
+       return sleeptime_injected;
+}
+
+/**
+ * 1) can be determined whether to use or not only when doing
+ * timekeeping_resume() which is invoked after rtc_suspend(),
+ * so we can't skip rtc_suspend() surely if system has 1).
+ *
+ * But if system has 2), 2) will definitely be used, so in this
+ * case we don't need to call rtc_suspend(), and this is what
+ * timekeeping_rtc_skipsuspend() means.
+ */
+bool timekeeping_rtc_skipsuspend(void)
+{
+       return persistent_clock_exists;
+}
+
 /**
  * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
  * @delta: pointer to a timespec64 delta value
  *
- * This hook is for architectures that cannot support read_persistent_clock
+ * This hook is for architectures that cannot support read_persistent_clock64
  * because their RTC/persistent clock is only accessible when irqs are enabled.
+ * and also don't have an effective nonstop clocksource.
  *
  * This function should only be called by rtc_resume(), and allows
  * a suspend offset to be injected into the timekeeping values.
@@ -1167,13 +1335,6 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
        struct timekeeper *tk = &tk_core.timekeeper;
        unsigned long flags;
 
-       /*
-        * Make sure we don't set the clock twice, as timekeeping_resume()
-        * already did it
-        */
-       if (has_persistent_clock())
-               return;
-
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);
 
@@ -1189,26 +1350,21 @@ void timekeeping_inject_sleeptime64(struct timespec64 *delta)
        /* signal hrtimers about time change */
        clock_was_set();
 }
+#endif
 
 /**
  * timekeeping_resume - Resumes the generic timekeeping subsystem.
- *
- * This is for the generic clocksource timekeeping.
- * xtime/wall_to_monotonic/jiffies/etc are
- * still managed by arch specific suspend/resume code.
  */
 void timekeeping_resume(void)
 {
        struct timekeeper *tk = &tk_core.timekeeper;
-       struct clocksource *clock = tk->tkr.clock;
+       struct clocksource *clock = tk->tkr_mono.clock;
        unsigned long flags;
        struct timespec64 ts_new, ts_delta;
-       struct timespec tmp;
        cycle_t cycle_now, cycle_delta;
-       bool suspendtime_found = false;
 
-       read_persistent_clock(&tmp);
-       ts_new = timespec_to_timespec64(tmp);
+       sleeptime_injected = false;
+       read_persistent_clock64(&ts_new);
 
        clockevents_resume();
        clocksource_resume();
@@ -1228,16 +1384,16 @@ void timekeeping_resume(void)
         * The less preferred source will only be tried if there is no better
         * usable source. The rtc part is handled separately in rtc core code.
         */
-       cycle_now = tk->tkr.read(clock);
+       cycle_now = tk->tkr_mono.read(clock);
        if ((clock->flags & CLOCK_SOURCE_SUSPEND_NONSTOP) &&
-               cycle_now > tk->tkr.cycle_last) {
+               cycle_now > tk->tkr_mono.cycle_last) {
                u64 num, max = ULLONG_MAX;
                u32 mult = clock->mult;
                u32 shift = clock->shift;
                s64 nsec = 0;
 
-               cycle_delta = clocksource_delta(cycle_now, tk->tkr.cycle_last,
-                                               tk->tkr.mask);
+               cycle_delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last,
+                                               tk->tkr_mono.mask);
 
                /*
                 * "cycle_delta * mutl" may cause 64 bits overflow, if the
@@ -1253,17 +1409,19 @@ void timekeeping_resume(void)
                nsec += ((u64) cycle_delta * mult) >> shift;
 
                ts_delta = ns_to_timespec64(nsec);
-               suspendtime_found = true;
+               sleeptime_injected = true;
        } else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
                ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
-               suspendtime_found = true;
+               sleeptime_injected = true;
        }
 
-       if (suspendtime_found)
+       if (sleeptime_injected)
                __timekeeping_inject_sleeptime(tk, &ts_delta);
 
        /* Re-base the last cycle value */
-       tk->tkr.cycle_last = cycle_now;
+       tk->tkr_mono.cycle_last = cycle_now;
+       tk->tkr_raw.cycle_last  = cycle_now;
+
        tk->ntp_error = 0;
        timekeeping_suspended = 0;
        timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
@@ -1272,9 +1430,7 @@ void timekeeping_resume(void)
 
        touch_softlockup_watchdog();
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
-
-       /* Resume hrtimers */
+       tick_resume();
        hrtimers_resume();
 }
 
@@ -1284,10 +1440,8 @@ int timekeeping_suspend(void)
        unsigned long flags;
        struct timespec64               delta, delta_delta;
        static struct timespec64        old_delta;
-       struct timespec tmp;
 
-       read_persistent_clock(&tmp);
-       timekeeping_suspend_time = timespec_to_timespec64(tmp);
+       read_persistent_clock64(&timekeeping_suspend_time);
 
        /*
         * On some systems the persistent_clock can not be detected at
@@ -1295,31 +1449,33 @@ int timekeeping_suspend(void)
         * value returned, update the persistent_clock_exists flag.
         */
        if (timekeeping_suspend_time.tv_sec || timekeeping_suspend_time.tv_nsec)
-               persistent_clock_exist = true;
+               persistent_clock_exists = true;
 
        raw_spin_lock_irqsave(&timekeeper_lock, flags);
        write_seqcount_begin(&tk_core.seq);
        timekeeping_forward_now(tk);
        timekeeping_suspended = 1;
 
-       /*
-        * To avoid drift caused by repeated suspend/resumes,
-        * which each can add ~1 second drift error,
-        * try to compensate so the difference in system time
-        * and persistent_clock time stays close to constant.
-        */
-       delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
-       delta_delta = timespec64_sub(delta, old_delta);
-       if (abs(delta_delta.tv_sec)  >= 2) {
+       if (persistent_clock_exists) {
                /*
-                * if delta_delta is too large, assume time correction
-                * has occured and set old_delta to the current delta.
+                * To avoid drift caused by repeated suspend/resumes,
+                * which each can add ~1 second drift error,
+                * try to compensate so the difference in system time
+                * and persistent_clock time stays close to constant.
                 */
-               old_delta = delta;
-       } else {
-               /* Otherwise try to adjust old_system to compensate */
-               timekeeping_suspend_time =
-                       timespec64_add(timekeeping_suspend_time, delta_delta);
+               delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
+               delta_delta = timespec64_sub(delta, old_delta);
+               if (abs(delta_delta.tv_sec) >= 2) {
+                       /*
+                        * if delta_delta is too large, assume time correction
+                        * has occurred and set old_delta to the current delta.
+                        */
+                       old_delta = delta;
+               } else {
+                       /* Otherwise try to adjust old_system to compensate */
+                       timekeeping_suspend_time =
+                               timespec64_add(timekeeping_suspend_time, delta_delta);
+               }
        }
 
        timekeeping_update(tk, TK_MIRROR);
@@ -1327,7 +1483,7 @@ int timekeeping_suspend(void)
        write_seqcount_end(&tk_core.seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
-       clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+       tick_suspend();
        clocksource_suspend();
        clockevents_suspend();
 
@@ -1416,15 +1572,15 @@ static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
         *
         * XXX - TODO: Doc ntp_error calculation.
         */
-       if ((mult_adj > 0) && (tk->tkr.mult + mult_adj < mult_adj)) {
+       if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
                /* NTP adjustment caused clocksource mult overflow */
                WARN_ON_ONCE(1);
                return;
        }
 
-       tk->tkr.mult += mult_adj;
+       tk->tkr_mono.mult += mult_adj;
        tk->xtime_interval += interval;
-       tk->tkr.xtime_nsec -= offset;
+       tk->tkr_mono.xtime_nsec -= offset;
        tk->ntp_error -= (interval - offset) << tk->ntp_error_shift;
 }
 
@@ -1486,13 +1642,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
                tk->ntp_err_mult = 0;
        }
 
-       if (unlikely(tk->tkr.clock->maxadj &&
-               (abs(tk->tkr.mult - tk->tkr.clock->mult)
-                       > tk->tkr.clock->maxadj))) {
+       if (unlikely(tk->tkr_mono.clock->maxadj &&
+               (abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
+                       > tk->tkr_mono.clock->maxadj))) {
                printk_once(KERN_WARNING
                        "Adjusting %s more than 11%% (%ld vs %ld)\n",
-                       tk->tkr.clock->name, (long)tk->tkr.mult,
-                       (long)tk->tkr.clock->mult + tk->tkr.clock->maxadj);
+                       tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
+                       (long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
        }
 
        /*
@@ -1509,9 +1665,9 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
         * We'll correct this error next time through this function, when
         * xtime_nsec is not as small.
         */
-       if (unlikely((s64)tk->tkr.xtime_nsec < 0)) {
-               s64 neg = -(s64)tk->tkr.xtime_nsec;
-               tk->tkr.xtime_nsec = 0;
+       if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
+               s64 neg = -(s64)tk->tkr_mono.xtime_nsec;
+               tk->tkr_mono.xtime_nsec = 0;
                tk->ntp_error += neg << tk->ntp_error_shift;
        }
 }
@@ -1526,13 +1682,13 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
  */
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
-       u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr.shift;
+       u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
        unsigned int clock_set = 0;
 
-       while (tk->tkr.xtime_nsec >= nsecps) {
+       while (tk->tkr_mono.xtime_nsec >= nsecps) {
                int leap;
 
-               tk->tkr.xtime_nsec -= nsecps;
+               tk->tkr_mono.xtime_nsec -= nsecps;
                tk->xtime_sec++;
 
                /* Figure out if its a leap sec and apply if needed */
@@ -1577,9 +1733,10 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 
        /* Accumulate one shifted interval */
        offset -= interval;
-       tk->tkr.cycle_last += interval;
+       tk->tkr_mono.cycle_last += interval;
+       tk->tkr_raw.cycle_last  += interval;
 
-       tk->tkr.xtime_nsec += tk->xtime_interval << shift;
+       tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
        *clock_set |= accumulate_nsecs_to_secs(tk);
 
        /* Accumulate raw time */
@@ -1622,14 +1779,17 @@ void update_wall_time(void)
 #ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
        offset = real_tk->cycle_interval;
 #else
-       offset = clocksource_delta(tk->tkr.read(tk->tkr.clock),
-                                  tk->tkr.cycle_last, tk->tkr.mask);
+       offset = clocksource_delta(tk->tkr_mono.read(tk->tkr_mono.clock),
+                                  tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
 #endif
 
        /* Check if there's really nothing to do */
        if (offset < real_tk->cycle_interval)
                goto out;
 
+       /* Do some additional sanity checking */
+       timekeeping_check_update(real_tk, offset);
+
        /*
         * With NO_HZ we may have to accumulate many cycle_intervals
         * (think "ticks") worth of time at once. To do this efficiently,
@@ -1784,8 +1944,8 @@ ktime_t ktime_get_update_offsets_tick(ktime_t *offs_real, ktime_t *offs_boot,
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               base = tk->tkr.base_mono;
-               nsecs = tk->tkr.xtime_nsec >> tk->tkr.shift;
+               base = tk->tkr_mono.base;
+               nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
 
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
@@ -1816,8 +1976,8 @@ ktime_t ktime_get_update_offsets_now(ktime_t *offs_real, ktime_t *offs_boot,
        do {
                seq = read_seqcount_begin(&tk_core.seq);
 
-               base = tk->tkr.base_mono;
-               nsecs = timekeeping_get_ns(&tk->tkr);
+               base = tk->tkr_mono.base;
+               nsecs = timekeeping_get_ns(&tk->tkr_mono);
 
                *offs_real = tk->offs_real;
                *offs_boot = tk->offs_boot;
index 1d91416..ead8794 100644 (file)
@@ -19,4 +19,11 @@ extern void timekeeping_clocktai(struct timespec *ts);
 extern int timekeeping_suspend(void);
 extern void timekeeping_resume(void);
 
+extern void do_timer(unsigned long ticks);
+extern void update_wall_time(void);
+
+extern seqlock_t jiffies_lock;
+
+#define CS_NAME_LEN    32
+
 #endif
index 2d3f5c5..2ece3aa 100644 (file)
@@ -90,8 +90,18 @@ struct tvec_base {
        struct tvec tv5;
 } ____cacheline_aligned;
 
+/*
+ * __TIMER_INITIALIZER() needs to set ->base to a valid pointer (because we've
+ * made NULL special, hint: lock_timer_base()) and we cannot get a compile time
+ * pointer to per-cpu entries because we don't know where we'll map the section,
+ * even for the boot cpu.
+ *
+ * And so we use boot_tvec_bases for boot CPU and per-cpu __tvec_bases for the
+ * rest of them.
+ */
 struct tvec_base boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
+
 static DEFINE_PER_CPU(struct tvec_base *, tvec_bases) = &boot_tvec_bases;
 
 /* Functions below help us manage 'deferrable' flag */
@@ -1027,6 +1037,8 @@ int try_to_del_timer_sync(struct timer_list *timer)
 EXPORT_SYMBOL(try_to_del_timer_sync);
 
 #ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct tvec_base, __tvec_bases);
+
 /**
  * del_timer_sync - deactivate a timer and wait for the handler to finish.
  * @timer: the timer to be deactivated
@@ -1532,64 +1544,6 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout)
 }
 EXPORT_SYMBOL(schedule_timeout_uninterruptible);
 
-static int init_timers_cpu(int cpu)
-{
-       int j;
-       struct tvec_base *base;
-       static char tvec_base_done[NR_CPUS];
-
-       if (!tvec_base_done[cpu]) {
-               static char boot_done;
-
-               if (boot_done) {
-                       /*
-                        * The APs use this path later in boot
-                        */
-                       base = kzalloc_node(sizeof(*base), GFP_KERNEL,
-                                           cpu_to_node(cpu));
-                       if (!base)
-                               return -ENOMEM;
-
-                       /* Make sure tvec_base has TIMER_FLAG_MASK bits free */
-                       if (WARN_ON(base != tbase_get_base(base))) {
-                               kfree(base);
-                               return -ENOMEM;
-                       }
-                       per_cpu(tvec_bases, cpu) = base;
-               } else {
-                       /*
-                        * This is for the boot CPU - we use compile-time
-                        * static initialisation because per-cpu memory isn't
-                        * ready yet and because the memory allocators are not
-                        * initialised either.
-                        */
-                       boot_done = 1;
-                       base = &boot_tvec_bases;
-               }
-               spin_lock_init(&base->lock);
-               tvec_base_done[cpu] = 1;
-               base->cpu = cpu;
-       } else {
-               base = per_cpu(tvec_bases, cpu);
-       }
-
-
-       for (j = 0; j < TVN_SIZE; j++) {
-               INIT_LIST_HEAD(base->tv5.vec + j);
-               INIT_LIST_HEAD(base->tv4.vec + j);
-               INIT_LIST_HEAD(base->tv3.vec + j);
-               INIT_LIST_HEAD(base->tv2.vec + j);
-       }
-       for (j = 0; j < TVR_SIZE; j++)
-               INIT_LIST_HEAD(base->tv1.vec + j);
-
-       base->timer_jiffies = jiffies;
-       base->next_timer = base->timer_jiffies;
-       base->active_timers = 0;
-       base->all_timers = 0;
-       return 0;
-}
-
 #ifdef CONFIG_HOTPLUG_CPU
 static void migrate_timer_list(struct tvec_base *new_base, struct list_head *head)
 {
@@ -1631,55 +1585,86 @@ static void migrate_timers(int cpu)
                migrate_timer_list(new_base, old_base->tv5.vec + i);
        }
 
+       old_base->active_timers = 0;
+       old_base->all_timers = 0;
+
        spin_unlock(&old_base->lock);
        spin_unlock_irq(&new_base->lock);
        put_cpu_var(tvec_bases);
 }
-#endif /* CONFIG_HOTPLUG_CPU */
 
 static int timer_cpu_notify(struct notifier_block *self,
                                unsigned long action, void *hcpu)
 {
-       long cpu = (long)hcpu;
-       int err;
-
-       switch(action) {
-       case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
-               err = init_timers_cpu(cpu);
-               if (err < 0)
-                       return notifier_from_errno(err);
-               break;
-#ifdef CONFIG_HOTPLUG_CPU
+       switch (action) {
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-               migrate_timers(cpu);
+               migrate_timers((long)hcpu);
                break;
-#endif
        default:
                break;
        }
+
        return NOTIFY_OK;
 }
 
-static struct notifier_block timers_nb = {
-       .notifier_call  = timer_cpu_notify,
-};
+static inline void timer_register_cpu_notifier(void)
+{
+       cpu_notifier(timer_cpu_notify, 0);
+}
+#else
+static inline void timer_register_cpu_notifier(void) { }
+#endif /* CONFIG_HOTPLUG_CPU */
 
+static void __init init_timer_cpu(struct tvec_base *base, int cpu)
+{
+       int j;
 
-void __init init_timers(void)
+       BUG_ON(base != tbase_get_base(base));
+
+       base->cpu = cpu;
+       per_cpu(tvec_bases, cpu) = base;
+       spin_lock_init(&base->lock);
+
+       for (j = 0; j < TVN_SIZE; j++) {
+               INIT_LIST_HEAD(base->tv5.vec + j);
+               INIT_LIST_HEAD(base->tv4.vec + j);
+               INIT_LIST_HEAD(base->tv3.vec + j);
+               INIT_LIST_HEAD(base->tv2.vec + j);
+       }
+       for (j = 0; j < TVR_SIZE; j++)
+               INIT_LIST_HEAD(base->tv1.vec + j);
+
+       base->timer_jiffies = jiffies;
+       base->next_timer = base->timer_jiffies;
+}
+
+static void __init init_timer_cpus(void)
 {
-       int err;
+       struct tvec_base *base;
+       int local_cpu = smp_processor_id();
+       int cpu;
 
+       for_each_possible_cpu(cpu) {
+               if (cpu == local_cpu)
+                       base = &boot_tvec_bases;
+#ifdef CONFIG_SMP
+               else
+                       base = per_cpu_ptr(&__tvec_bases, cpu);
+#endif
+
+               init_timer_cpu(base, cpu);
+       }
+}
+
+void __init init_timers(void)
+{
        /* ensure there are enough low bits for flags in timer->base pointer */
        BUILD_BUG_ON(__alignof__(struct tvec_base) & TIMER_FLAG_MASK);
 
-       err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
-                              (void *)(long)smp_processor_id());
-       BUG_ON(err != NOTIFY_OK);
-
+       init_timer_cpus();
        init_timer_stats();
-       register_cpu_notifier(&timers_nb);
+       timer_register_cpu_notifier();
        open_softirq(TIMER_SOFTIRQ, run_timer_softirq);
 }
 
index 61ed862..e878c2e 100644 (file)
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/kallsyms.h>
-#include <linux/tick.h>
 
 #include <asm/uaccess.h>
 
+#include "tick-internal.h"
 
 struct timer_list_iter {
        int cpu;
@@ -228,9 +228,35 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
        print_name_offset(m, dev->set_next_event);
        SEQ_printf(m, "\n");
 
-       SEQ_printf(m, " set_mode:       ");
-       print_name_offset(m, dev->set_mode);
-       SEQ_printf(m, "\n");
+       if (dev->set_mode) {
+               SEQ_printf(m, " set_mode:       ");
+               print_name_offset(m, dev->set_mode);
+               SEQ_printf(m, "\n");
+       } else {
+               if (dev->set_state_shutdown) {
+                       SEQ_printf(m, " shutdown: ");
+                       print_name_offset(m, dev->set_state_shutdown);
+                       SEQ_printf(m, "\n");
+               }
+
+               if (dev->set_state_periodic) {
+                       SEQ_printf(m, " periodic: ");
+                       print_name_offset(m, dev->set_state_periodic);
+                       SEQ_printf(m, "\n");
+               }
+
+               if (dev->set_state_oneshot) {
+                       SEQ_printf(m, " oneshot:  ");
+                       print_name_offset(m, dev->set_state_oneshot);
+                       SEQ_printf(m, "\n");
+               }
+
+               if (dev->tick_resume) {
+                       SEQ_printf(m, " resume:   ");
+                       print_name_offset(m, dev->tick_resume);
+                       SEQ_printf(m, "\n");
+               }
+       }
 
        SEQ_printf(m, " event_handler:  ");
        print_name_offset(m, dev->event_handler);
index 45e5cb1..4f22802 100644 (file)
@@ -1059,6 +1059,12 @@ static __init void ftrace_profile_debugfs(struct dentry *d_tracer)
 
 static struct pid * const ftrace_swapper_pid = &init_struct_pid;
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int ftrace_graph_active;
+#else
+# define ftrace_graph_active 0
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 static struct ftrace_ops *removed_ops;
@@ -2041,8 +2047,12 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
                if (!ftrace_rec_count(rec))
                        rec->flags = 0;
                else
-                       /* Just disable the record (keep REGS state) */
-                       rec->flags &= ~FTRACE_FL_ENABLED;
+                       /*
+                        * Just disable the record, but keep the ops TRAMP
+                        * and REGS states. The _EN flags must be disabled though.
+                        */
+                       rec->flags &= ~(FTRACE_FL_ENABLED | FTRACE_FL_TRAMP_EN |
+                                       FTRACE_FL_REGS_EN);
        }
 
        return FTRACE_UPDATE_MAKE_NOP;
@@ -2688,24 +2698,36 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 
 static void ftrace_startup_sysctl(void)
 {
+       int command;
+
        if (unlikely(ftrace_disabled))
                return;
 
        /* Force update next time */
        saved_ftrace_func = NULL;
        /* ftrace_start_up is true if we want ftrace running */
-       if (ftrace_start_up)
-               ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+       if (ftrace_start_up) {
+               command = FTRACE_UPDATE_CALLS;
+               if (ftrace_graph_active)
+                       command |= FTRACE_START_FUNC_RET;
+               ftrace_startup_enable(command);
+       }
 }
 
 static void ftrace_shutdown_sysctl(void)
 {
+       int command;
+
        if (unlikely(ftrace_disabled))
                return;
 
        /* ftrace_start_up is true if ftrace is running */
-       if (ftrace_start_up)
-               ftrace_run_update_code(FTRACE_DISABLE_CALLS);
+       if (ftrace_start_up) {
+               command = FTRACE_DISABLE_CALLS;
+               if (ftrace_graph_active)
+                       command |= FTRACE_STOP_FUNC_RET;
+               ftrace_run_update_code(command);
+       }
 }
 
 static cycle_t         ftrace_update_time;
@@ -5558,12 +5580,12 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
        if (ftrace_enabled) {
 
-               ftrace_startup_sysctl();
-
                /* we are starting ftrace again */
                if (ftrace_ops_list != &ftrace_list_end)
                        update_ftrace_function();
 
+               ftrace_startup_sysctl();
+
        } else {
                /* stopping ftrace calls (just send to ftrace_stub) */
                ftrace_trace_function = ftrace_stub;
@@ -5590,8 +5612,6 @@ static struct ftrace_ops graph_ops = {
        ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
 };
 
-static int ftrace_graph_active;
-
 int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
 {
        return 0;
index f288493..41ff75b 100644 (file)
@@ -2728,19 +2728,57 @@ bool flush_work(struct work_struct *work)
 }
 EXPORT_SYMBOL_GPL(flush_work);
 
+struct cwt_wait {
+       wait_queue_t            wait;
+       struct work_struct      *work;
+};
+
+static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+       struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
+
+       if (cwait->work != key)
+               return 0;
+       return autoremove_wake_function(wait, mode, sync, key);
+}
+
 static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 {
+       static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
        unsigned long flags;
        int ret;
 
        do {
                ret = try_to_grab_pending(work, is_dwork, &flags);
                /*
-                * If someone else is canceling, wait for the same event it
-                * would be waiting for before retrying.
+                * If someone else is already canceling, wait for it to
+                * finish.  flush_work() doesn't work for PREEMPT_NONE
+                * because we may get scheduled between @work's completion
+                * and the other canceling task resuming and clearing
+                * CANCELING - flush_work() will return false immediately
+                * as @work is no longer busy, try_to_grab_pending() will
+                * return -ENOENT as @work is still being canceled and the
+                * other canceling task won't be able to clear CANCELING as
+                * we're hogging the CPU.
+                *
+                * Let's wait for completion using a waitqueue.  As this
+                * may lead to the thundering herd problem, use a custom
+                * wake function which matches @work along with exclusive
+                * wait and wakeup.
                 */
-               if (unlikely(ret == -ENOENT))
-                       flush_work(work);
+               if (unlikely(ret == -ENOENT)) {
+                       struct cwt_wait cwait;
+
+                       init_wait(&cwait.wait);
+                       cwait.wait.func = cwt_wakefn;
+                       cwait.work = work;
+
+                       prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
+                                                 TASK_UNINTERRUPTIBLE);
+                       if (work_is_canceling(work))
+                               schedule();
+                       finish_wait(&cancel_waitq, &cwait.wait);
+               }
        } while (unlikely(ret < 0));
 
        /* tell other tasks trying to grab @work to back off */
@@ -2749,6 +2787,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
 
        flush_work(work);
        clear_work_data(work);
+
+       /*
+        * Paired with prepare_to_wait() above so that either
+        * waitqueue_active() is visible here or !work_is_canceling() is
+        * visible there.
+        */
+       smp_mb();
+       if (waitqueue_active(&cancel_waitq))
+               __wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
+
        return ret;
 }
 
index c5cefb3..36b6fa8 100644 (file)
@@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK
          data corruption or a sporadic crash at a later stage once the region
          is examined. The runtime overhead introduced is minimal.
 
+config DEBUG_TIMEKEEPING
+       bool "Enable extra timekeeping sanity checking"
+       help
+         This option will enable additional timekeeping sanity checks
+         which may be helpful when diagnosing issues where timekeeping
+         problems are suspected.
+
+         This may include checks in the timekeeping hotpaths, so this
+         option may have a (very small) performance impact to some
+         workloads.
+
+         If unsure, say N.
+
 config TIMER_STATS
        bool "Collect kernel timers statistics"
        depends on DEBUG_KERNEL && PROC_FS
index 87eb3bf..58f74d2 100644 (file)
@@ -24,7 +24,7 @@ obj-y += lockref.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
         bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
-        gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \
+        gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
         bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
         percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
similarity index 97%
rename from mm/iov_iter.c
rename to lib/iov_iter.c
index 8277320..9d96e28 100644 (file)
@@ -751,3 +751,18 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
        return npages;
 }
 EXPORT_SYMBOL(iov_iter_npages);
+
+const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
+{
+       *new = *old;
+       if (new->type & ITER_BVEC)
+               return new->bvec = kmemdup(new->bvec,
+                                   new->nr_segs * sizeof(struct bio_vec),
+                                   flags);
+       else
+               /* iovec and kvec have identical layout */
+               return new->iov = kmemdup(new->iov,
+                                  new->nr_segs * sizeof(struct iovec),
+                                  flags);
+}
+EXPORT_SYMBOL(dup_iter);
index e97dbd5..03d7fcb 100644 (file)
--- a/lib/lcm.c
+++ b/lib/lcm.c
@@ -12,3 +12,14 @@ unsigned long lcm(unsigned long a, unsigned long b)
                return 0;
 }
 EXPORT_SYMBOL_GPL(lcm);
+
+unsigned long lcm_not_zero(unsigned long a, unsigned long b)
+{
+       unsigned long l = lcm(a, b);
+
+       if (l)
+               return l;
+
+       return (b ? : a);
+}
+EXPORT_SYMBOL_GPL(lcm_not_zero);
index ecb9a66..494994b 100644 (file)
@@ -18,7 +18,7 @@
 #define CMPXCHG_LOOP(CODE, SUCCESS) do {                                       \
        struct lockref old;                                                     \
        BUILD_BUG_ON(sizeof(old) != 8);                                         \
-       old.lock_count = ACCESS_ONCE(lockref->lock_count);                      \
+       old.lock_count = READ_ONCE(lockref->lock_count);                        \
        while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) {     \
                struct lockref new = old, prev = old;                           \
                CODE                                                            \
index 7a85967..f0f5c5c 100644 (file)
@@ -139,6 +139,9 @@ static int lz4_uncompress(const char *source, char *dest, int osize)
                        /* Error: request to write beyond destination buffer */
                        if (cpy > oend)
                                goto _output_error;
+                       if ((ref + COPYLENGTH) > oend ||
+                                       (op + COPYLENGTH) > oend)
+                               goto _output_error;
                        LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
                        while (op < cpy)
                                *op++ = *ref++;
index 76a1b59..f5907d2 100644 (file)
@@ -279,6 +279,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count)
        int minlen = min_t(int, count, nla_len(src));
 
        memcpy(dest, nla_data(src), minlen);
+       if (count > minlen)
+               memset(dest + minlen, 0, count - minlen);
 
        return minlen;
 }
index 88c0854..5c94e10 100644 (file)
@@ -61,7 +61,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
 
        if (s->len < s->size) {
                len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args);
-               if (seq_buf_can_fit(s, len)) {
+               if (s->len + len < s->size) {
                        s->len += len;
                        return 0;
                }
@@ -118,7 +118,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
 
        if (s->len < s->size) {
                ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
-               if (seq_buf_can_fit(s, ret)) {
+               if (s->len + ret < s->size) {
                        s->len += ret;
                        return 0;
                }
index 3c1caa2..15dbe99 100644 (file)
@@ -21,7 +21,7 @@ obj-y                 := filemap.o mempool.o oom_kill.o \
                           mm_init.o mmu_context.o percpu.o slab_common.o \
                           compaction.o vmacache.o \
                           interval_tree.o list_lru.o workingset.o \
-                          iov_iter.o debug.o $(mmu-y)
+                          debug.o $(mmu-y)
 
 obj-y += init-mm.o
 
index 75016fd..68ecb7a 100644 (file)
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -64,15 +64,17 @@ static unsigned long cma_bitmap_aligned_mask(struct cma *cma, int align_order)
        return (1UL << (align_order - cma->order_per_bit)) - 1;
 }
 
+/*
+ * Find a PFN aligned to the specified order and return an offset represented in
+ * order_per_bits.
+ */
 static unsigned long cma_bitmap_aligned_offset(struct cma *cma, int align_order)
 {
-       unsigned int alignment;
-
        if (align_order <= cma->order_per_bit)
                return 0;
-       alignment = 1UL << (align_order - cma->order_per_bit);
-       return ALIGN(cma->base_pfn, alignment) -
-               (cma->base_pfn >> cma->order_per_bit);
+
+       return (ALIGN(cma->base_pfn, (1UL << align_order))
+               - cma->base_pfn) >> cma->order_per_bit;
 }
 
 static unsigned long cma_bitmap_maxno(struct cma *cma)
index fc00c8c..6817b03 100644 (file)
@@ -1260,6 +1260,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        int target_nid, last_cpupid = -1;
        bool page_locked;
        bool migrated = false;
+       bool was_writable;
        int flags = 0;
 
        /* A PROT_NONE fault should not end up here */
@@ -1291,12 +1292,8 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                flags |= TNF_FAULT_LOCAL;
        }
 
-       /*
-        * Avoid grouping on DSO/COW pages in specific and RO pages
-        * in general, RO pages shouldn't hurt as much anyway since
-        * they can be in shared cache state.
-        */
-       if (!pmd_write(pmd))
+       /* See similar comment in do_numa_page for explanation */
+       if (!(vma->vm_flags & VM_WRITE))
                flags |= TNF_NO_GROUP;
 
        /*
@@ -1353,12 +1350,17 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        if (migrated) {
                flags |= TNF_MIGRATED;
                page_nid = target_nid;
-       }
+       } else
+               flags |= TNF_MIGRATE_FAIL;
 
        goto out;
 clear_pmdnuma:
        BUG_ON(!PageLocked(page));
+       was_writable = pmd_write(pmd);
        pmd = pmd_modify(pmd, vma->vm_page_prot);
+       pmd = pmd_mkyoung(pmd);
+       if (was_writable)
+               pmd = pmd_mkwrite(pmd);
        set_pmd_at(mm, haddr, pmdp, pmd);
        update_mmu_cache_pmd(vma, addr, pmdp);
        unlock_page(page);
@@ -1482,6 +1484,8 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 
        if (__pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                pmd_t entry;
+               bool preserve_write = prot_numa && pmd_write(*pmd);
+               ret = 1;
 
                /*
                 * Avoid trapping faults against the zero page. The read-only
@@ -1490,16 +1494,17 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
                 */
                if (prot_numa && is_huge_zero_pmd(*pmd)) {
                        spin_unlock(ptl);
-                       return 0;
+                       return ret;
                }
 
                if (!prot_numa || !pmd_protnone(*pmd)) {
-                       ret = 1;
                        entry = pmdp_get_and_clear_notify(mm, addr, pmd);
                        entry = pmd_modify(entry, newprot);
+                       if (preserve_write)
+                               entry = pmd_mkwrite(entry);
                        ret = HPAGE_PMD_NR;
                        set_pmd_at(mm, addr, pmd, entry);
-                       BUG_ON(pmd_write(entry));
+                       BUG_ON(!preserve_write && pmd_write(entry));
                }
                spin_unlock(ptl);
        }
index 0a9ac6c..c41b2a0 100644 (file)
@@ -917,7 +917,6 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
        __SetPageHead(page);
        __ClearPageReserved(page);
        for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
-               __SetPageTail(p);
                /*
                 * For gigantic hugepages allocated through bootmem at
                 * boot, it's safer to be consistent with the not-gigantic
@@ -933,6 +932,9 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
                __ClearPageReserved(p);
                set_page_count(p, 0);
                p->first_page = page;
+               /* Make sure p->first_page is always valid for PageTail() */
+               smp_wmb();
+               __SetPageTail(p);
        }
 }
 
index 78fee63..936d816 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/stacktrace.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/vmalloc.h>
 #include <linux/kasan.h>
 
 #include "kasan.h"
@@ -414,12 +415,19 @@ int kasan_module_alloc(void *addr, size_t size)
                        GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
                        PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
                        __builtin_return_address(0));
-       return ret ? 0 : -ENOMEM;
+
+       if (ret) {
+               find_vm_area(addr)->flags |= VM_KASAN;
+               return 0;
+       }
+
+       return -ENOMEM;
 }
 
-void kasan_module_free(void *addr)
+void kasan_free_shadow(const struct vm_struct *vm)
 {
-       vfree(kasan_mem_to_shadow(addr));
+       if (vm->flags & VM_KASAN)
+               vfree(kasan_mem_to_shadow(vm->addr));
 }
 
 static void register_global(struct kasan_global *global)
index 9fe0769..b34ef4a 100644 (file)
@@ -5232,7 +5232,9 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
         * on for the root memcg is enough.
         */
        if (cgroup_on_dfl(root_css->cgroup))
-               mem_cgroup_from_css(root_css)->use_hierarchy = true;
+               root_mem_cgroup->use_hierarchy = true;
+       else
+               root_mem_cgroup->use_hierarchy = false;
 }
 
 static u64 memory_current_read(struct cgroup_subsys_state *css,
index 8068893..97839f5 100644 (file)
@@ -3035,6 +3035,7 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        int last_cpupid;
        int target_nid;
        bool migrated = false;
+       bool was_writable = pte_write(pte);
        int flags = 0;
 
        /* A PROT_NONE fault should not end up here */
@@ -3059,6 +3060,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        /* Make it present again */
        pte = pte_modify(pte, vma->vm_page_prot);
        pte = pte_mkyoung(pte);
+       if (was_writable)
+               pte = pte_mkwrite(pte);
        set_pte_at(mm, addr, ptep, pte);
        update_mmu_cache(vma, addr, ptep);
 
@@ -3069,11 +3072,14 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        }
 
        /*
-        * Avoid grouping on DSO/COW pages in specific and RO pages
-        * in general, RO pages shouldn't hurt as much anyway since
-        * they can be in shared cache state.
+        * Avoid grouping on RO pages in general. RO pages shouldn't hurt as
+        * much anyway since they can be in shared cache state. This misses
+        * the case where a mapping is writable but the process never writes
+        * to it but pte_write gets cleared during protection updates and
+        * pte_dirty has unpredictable behaviour between PTE scan updates,
+        * background writeback, dirty balancing and application behaviour.
         */
-       if (!pte_write(pte))
+       if (!(vma->vm_flags & VM_WRITE))
                flags |= TNF_NO_GROUP;
 
        /*
@@ -3097,7 +3103,8 @@ static int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
        if (migrated) {
                page_nid = target_nid;
                flags |= TNF_MIGRATED;
-       }
+       } else
+               flags |= TNF_MIGRATE_FAIL;
 
 out:
        if (page_nid != -1)
index 9fab107..65842d6 100644 (file)
@@ -1092,6 +1092,10 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
                        return NULL;
 
                arch_refresh_nodedata(nid, pgdat);
+       } else {
+               /* Reset the nr_zones and classzone_idx to 0 before reuse */
+               pgdat->nr_zones = 0;
+               pgdat->classzone_idx = 0;
        }
 
        /* we can use NODE_DATA(nid) from here */
@@ -1977,15 +1981,6 @@ void try_offline_node(int nid)
                if (is_vmalloc_addr(zone->wait_table))
                        vfree(zone->wait_table);
        }
-
-       /*
-        * Since there is no way to guarentee the address of pgdat/zone is not
-        * on stack of any kernel threads or used by other kernel objects
-        * without reference counting or other symchronizing method, do not
-        * reset node_data and free pgdat here. Just reset it to 0 and reuse
-        * the memory when the node is online again.
-        */
-       memset(pgdat, 0, sizeof(*pgdat));
 }
 EXPORT_SYMBOL(try_offline_node);
 
index 73cf098..8a54cd2 100644 (file)
 
 int can_do_mlock(void)
 {
-       if (capable(CAP_IPC_LOCK))
-               return 1;
        if (rlimit(RLIMIT_MEMLOCK) != 0)
                return 1;
+       if (capable(CAP_IPC_LOCK))
+               return 1;
        return 0;
 }
 EXPORT_SYMBOL(can_do_mlock);
index da9990a..9ec50a3 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -774,10 +774,8 @@ again:                     remove_next = 1 + (end > next->vm_end);
 
                        importer->anon_vma = exporter->anon_vma;
                        error = anon_vma_clone(importer, exporter);
-                       if (error) {
-                               importer->anon_vma = NULL;
+                       if (error)
                                return error;
-                       }
                }
        }
 
index 4472781..8858483 100644 (file)
@@ -75,6 +75,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                oldpte = *pte;
                if (pte_present(oldpte)) {
                        pte_t ptent;
+                       bool preserve_write = prot_numa && pte_write(oldpte);
 
                        /*
                         * Avoid trapping faults against the zero or KSM
@@ -94,6 +95,8 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 
                        ptent = ptep_modify_prot_start(mm, addr, pte);
                        ptent = pte_modify(ptent, newprot);
+                       if (preserve_write)
+                               ptent = pte_mkwrite(ptent);
 
                        /* Avoid taking write faults for known dirty pages */
                        if (dirty_accountable && pte_dirty(ptent) &&
index 57dadc0..2dc44b1 100644 (file)
@@ -286,8 +286,14 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                old_len = new_len;
                old_addr = new_addr;
                new_addr = -ENOMEM;
-       } else if (vma->vm_file && vma->vm_file->f_op->mremap)
-               vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+       } else if (vma->vm_file && vma->vm_file->f_op->mremap) {
+               err = vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
+               if (err < 0) {
+                       move_page_tables(new_vma, new_addr, vma, old_addr,
+                                        moved_len, true);
+                       return err;
+               }
+       }
 
        /* Conceal VM_ACCOUNT so old reservation is not undone */
        if (vm_flags & VM_ACCOUNT) {
index 3e67e75..3fba2dc 100644 (file)
@@ -62,6 +62,7 @@ void *high_memory;
 EXPORT_SYMBOL(high_memory);
 struct page *mem_map;
 unsigned long max_mapnr;
+EXPORT_SYMBOL(max_mapnr);
 unsigned long highest_memmap_pfn;
 struct percpu_counter vm_committed_as;
 int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */
index 45e187b..644bcb6 100644 (file)
@@ -857,8 +857,11 @@ static void bdi_update_write_bandwidth(struct backing_dev_info *bdi,
         *                   bw * elapsed + write_bandwidth * (period - elapsed)
         * write_bandwidth = ---------------------------------------------------
         *                                          period
+        *
+        * @written may have decreased due to account_page_redirty().
+        * Avoid underflowing @bw calculation.
         */
-       bw = written - bdi->written_stamp;
+       bw = written - min(written, bdi->written_stamp);
        bw *= HZ;
        if (unlikely(elapsed > period)) {
                do_div(bw, elapsed);
@@ -922,7 +925,7 @@ static void global_update_bandwidth(unsigned long thresh,
                                    unsigned long now)
 {
        static DEFINE_SPINLOCK(dirty_lock);
-       static unsigned long update_time;
+       static unsigned long update_time = INITIAL_JIFFIES;
 
        /*
         * check locklessly first to optimize away locking for the most time
index 7abfa70..40e2942 100644 (file)
@@ -2373,7 +2373,8 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
                        goto out;
        }
        /* Exhausted what can be done so it's blamo time */
-       if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false))
+       if (out_of_memory(ac->zonelist, gfp_mask, order, ac->nodemask, false)
+                       || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL))
                *did_some_progress = 1;
 out:
        oom_zonelist_unlock(ac->zonelist, gfp_mask);
index 72f5ac3..755a42c 100644 (file)
@@ -103,6 +103,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 
                        if (!is_migrate_isolate_page(buddy)) {
                                __isolate_free_page(page, order);
+                               kernel_map_pages(page, (1 << order), 1);
                                set_page_refcounted(page);
                                isolated_page = page;
                        }
index 75c1f28..29f2f8b 100644 (file)
@@ -265,8 +265,15 @@ int walk_page_range(unsigned long start, unsigned long end,
                        vma = vma->vm_next;
 
                        err = walk_page_test(start, next, walk);
-                       if (err > 0)
+                       if (err > 0) {
+                               /*
+                                * positive return values are purely for
+                                * controlling the pagewalk, so should never
+                                * be passed to the callers.
+                                */
+                               err = 0;
                                continue;
+                       }
                        if (err < 0)
                                break;
                }
index 5e3e090..c161a14 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -287,6 +287,13 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
        return 0;
 
  enomem_failure:
+       /*
+        * dst->anon_vma is dropped here otherwise its degree can be incorrectly
+        * decremented in unlink_anon_vmas().
+        * We can safely do this because callers of anon_vma_clone() don't care
+        * about dst->anon_vma if anon_vma_clone() failed.
+        */
+       dst->anon_vma = NULL;
        unlink_anon_vmas(dst);
        return -ENOMEM;
 }
index 6832c4e..82c4737 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2449,7 +2449,8 @@ redo:
        do {
                tid = this_cpu_read(s->cpu_slab->tid);
                c = raw_cpu_ptr(s->cpu_slab);
-       } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
+       } while (IS_ENABLED(CONFIG_PREEMPT) &&
+                unlikely(tid != READ_ONCE(c->tid)));
 
        /*
         * Irqless object alloc/free algorithm used here depends on sequence
@@ -2718,7 +2719,8 @@ redo:
        do {
                tid = this_cpu_read(s->cpu_slab->tid);
                c = raw_cpu_ptr(s->cpu_slab);
-       } while (IS_ENABLED(CONFIG_PREEMPT) && unlikely(tid != c->tid));
+       } while (IS_ENABLED(CONFIG_PREEMPT) &&
+                unlikely(tid != READ_ONCE(c->tid)));
 
        /* Same with comment on barrier() in slab_alloc_node() */
        barrier();
index 35b25e1..49abccf 100644 (file)
@@ -1418,6 +1418,7 @@ struct vm_struct *remove_vm_area(const void *addr)
                spin_unlock(&vmap_area_lock);
 
                vmap_debug_free_range(va->va_start, va->va_end);
+               kasan_free_shadow(vm);
                free_unmap_vmap_area(va);
                vm->size -= PAGE_SIZE;
 
index d8e376a..36a1a73 100644 (file)
@@ -658,14 +658,30 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)
 static void p9_virtio_remove(struct virtio_device *vdev)
 {
        struct virtio_chan *chan = vdev->priv;
-
-       if (chan->inuse)
-               p9_virtio_close(chan->client);
-       vdev->config->del_vqs(vdev);
+       unsigned long warning_time;
 
        mutex_lock(&virtio_9p_lock);
+
+       /* Remove self from list so we don't get new users. */
        list_del(&chan->chan_list);
+       warning_time = jiffies;
+
+       /* Wait for existing users to close. */
+       while (chan->inuse) {
+               mutex_unlock(&virtio_9p_lock);
+               msleep(250);
+               if (time_after(jiffies, warning_time + 10 * HZ)) {
+                       dev_emerg(&vdev->dev,
+                                 "p9_virtio_remove: waiting for device in use.\n");
+                       warning_time = jiffies;
+               }
+               mutex_lock(&virtio_9p_lock);
+       }
+
        mutex_unlock(&virtio_9p_lock);
+
+       vdev->config->del_vqs(vdev);
+
        sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
        kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
        kfree(chan->tag);
index b087d27..1849d96 100644 (file)
@@ -563,6 +563,8 @@ int br_del_if(struct net_bridge *br, struct net_device *dev)
         */
        del_nbp(p);
 
+       dev_set_mtu(br->dev, br_min_mtu(br));
+
        spin_lock_bh(&br->lock);
        changed_addr = br_stp_recalculate_bridge_id(br);
        spin_unlock_bh(&br->lock);
index 769b185..a6e2da0 100644 (file)
@@ -281,7 +281,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
        int copylen;
 
        ret = -EOPNOTSUPP;
-       if (m->msg_flags&MSG_OOB)
+       if (flags & MSG_OOB)
                goto read_error;
 
        skb = skb_recv_datagram(sk, flags, 0 , &ret);
index 66e0804..32d710e 100644 (file)
@@ -259,6 +259,9 @@ int can_send(struct sk_buff *skb, int loop)
                goto inval_skb;
        }
 
+       skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+       skb_reset_mac_header(skb);
        skb_reset_network_header(skb);
        skb_reset_transport_header(skb);
 
index 6b3f54e..a9f4ae4 100644 (file)
@@ -484,7 +484,7 @@ static int ceph_tcp_connect(struct ceph_connection *con)
                               IPPROTO_TCP, &sock);
        if (ret)
                return ret;
-       sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC;
+       sock->sk->sk_allocation = GFP_NOFS;
 
 #ifdef CONFIG_LOCKDEP
        lockdep_set_class(&sock->sk->sk_lock, &socket_class);
@@ -520,8 +520,6 @@ static int ceph_tcp_connect(struct ceph_connection *con)
                               ret);
        }
 
-       sk_set_memalloc(sock->sk);
-
        con->sock = sock;
        return 0;
 }
@@ -2808,11 +2806,8 @@ static void con_work(struct work_struct *work)
 {
        struct ceph_connection *con = container_of(work, struct ceph_connection,
                                                   work.work);
-       unsigned long pflags = current->flags;
        bool fault;
 
-       current->flags |= PF_MEMALLOC;
-
        mutex_lock(&con->mutex);
        while (true) {
                int ret;
@@ -2866,8 +2861,6 @@ static void con_work(struct work_struct *work)
                con_fault_finish(con);
 
        con->ops->put(con);
-
-       tsk_restore_flags(current, pflags, PF_MEMALLOC);
 }
 
 /*
index 94d3d5e..f7bd286 100644 (file)
@@ -49,6 +49,13 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
            __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
            __get_user(kmsg->msg_flags, &umsg->msg_flags))
                return -EFAULT;
+
+       if (!uaddr)
+               kmsg->msg_namelen = 0;
+
+       if (kmsg->msg_namelen < 0)
+               return -EINVAL;
+
        if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
                kmsg->msg_namelen = sizeof(struct sockaddr_storage);
        kmsg->msg_control = compat_ptr(tmp3);
index 962ee9d..45109b7 100644 (file)
@@ -2848,7 +2848,9 @@ static void skb_update_prio(struct sk_buff *skb)
 #define skb_update_prio(skb)
 #endif
 
-static DEFINE_PER_CPU(int, xmit_recursion);
+DEFINE_PER_CPU(int, xmit_recursion);
+EXPORT_SYMBOL(xmit_recursion);
+
 #define RECURSION_LIMIT 10
 
 /**
index 44706e8..e4fdc9d 100644 (file)
@@ -175,9 +175,9 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 
        spin_lock(&net->rules_mod_lock);
        list_del_rcu(&ops->list);
-       fib_rules_cleanup_ops(ops);
        spin_unlock(&net->rules_mod_lock);
 
+       fib_rules_cleanup_ops(ops);
        call_rcu(&ops->rcu, fib_rules_put_rcu);
 }
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
index cb5290b..70d3450 100644 (file)
@@ -198,8 +198,10 @@ static int __peernet2id(struct net *net, struct net *peer, bool alloc)
  */
 int peernet2id(struct net *net, struct net *peer)
 {
-       int id = __peernet2id(net, peer, true);
+       bool alloc = atomic_read(&peer->count) == 0 ? false : true;
+       int id;
 
+       id = __peernet2id(net, peer, alloc);
        return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
 }
 EXPORT_SYMBOL(peernet2id);
index 25b4b5d..7ebed55 100644 (file)
@@ -1932,10 +1932,10 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
                struct ifinfomsg *ifm,
                struct nlattr **tb)
 {
-       struct net_device *dev;
+       struct net_device *dev, *aux;
        int err;
 
-       for_each_netdev(net, dev) {
+       for_each_netdev_safe(net, dev, aux) {
                if (dev->group == group) {
                        err = do_setlink(skb, dev, ifm, tb, NULL, 0);
                        if (err < 0)
@@ -2166,28 +2166,28 @@ replay:
                        }
                }
                err = rtnl_configure_link(dev, ifm);
-               if (err < 0) {
-                       if (ops->newlink) {
-                               LIST_HEAD(list_kill);
-
-                               ops->dellink(dev, &list_kill);
-                               unregister_netdevice_many(&list_kill);
-                       } else {
-                               unregister_netdevice(dev);
-                       }
-                       goto out;
-               }
-
+               if (err < 0)
+                       goto out_unregister;
                if (link_net) {
                        err = dev_change_net_namespace(dev, dest_net, ifname);
                        if (err < 0)
-                               unregister_netdevice(dev);
+                               goto out_unregister;
                }
 out:
                if (link_net)
                        put_net(link_net);
                put_net(dest_net);
                return err;
+out_unregister:
+               if (ops->newlink) {
+                       LIST_HEAD(list_kill);
+
+                       ops->dellink(dev, &list_kill);
+                       unregister_netdevice_many(&list_kill);
+               } else {
+                       unregister_netdevice(dev);
+               }
+               goto out;
        }
 }
 
index f805078..8e4ac97 100644 (file)
@@ -3733,9 +3733,13 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
                     struct sock *sk, int tstype)
 {
        struct sk_buff *skb;
-       bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+       bool tsonly;
 
-       if (!sk || !skb_may_tx_timestamp(sk, tsonly))
+       if (!sk)
+               return;
+
+       tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+       if (!skb_may_tx_timestamp(sk, tsonly))
                return;
 
        if (tsonly)
@@ -4173,7 +4177,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
        skb->ignore_df = 0;
        skb_dst_drop(skb);
        skb->mark = 0;
-       skb->sender_cpu = 0;
+       skb_sender_cpu_clear(skb);
        skb_init_secmark(skb);
        secpath_reset(skb);
        nf_reset(skb);
index 93c8b20..71e3e5f 100644 (file)
@@ -653,6 +653,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool)
                sock_reset_flag(sk, bit);
 }
 
+bool sk_mc_loop(struct sock *sk)
+{
+       if (dev_recursion_level())
+               return false;
+       if (!sk)
+               return true;
+       switch (sk->sk_family) {
+       case AF_INET:
+               return inet_sk(sk)->mc_loop;
+#if IS_ENABLED(CONFIG_IPV6)
+       case AF_INET6:
+               return inet6_sk(sk)->mc_loop;
+#endif
+       }
+       WARN_ON(1);
+       return true;
+}
+EXPORT_SYMBOL(sk_mc_loop);
+
 /*
  *     This is meant for all protocols to use and covers goings on
  *     at the socket level. Everything here is generic.
@@ -1655,6 +1674,10 @@ void sock_rfree(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_rfree);
 
+/*
+ * Buffer destructor for skbs that are not used directly in read or write
+ * path, e.g. for error handler skbs. Automatically called from kfree_skb.
+ */
 void sock_efree(struct sk_buff *skb)
 {
        sock_put(skb->sk);
index 4334248..8ce351f 100644 (file)
@@ -25,6 +25,8 @@
 static int zero = 0;
 static int one = 1;
 static int ushort_max = USHRT_MAX;
+static int min_sndbuf = SOCK_MIN_SNDBUF;
+static int min_rcvbuf = SOCK_MIN_RCVBUF;
 
 static int net_msg_warn;       /* Unused, but still a sysctl */
 
@@ -237,7 +239,7 @@ static struct ctl_table net_core_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &one,
+               .extra1         = &min_sndbuf,
        },
        {
                .procname       = "rmem_max",
@@ -245,7 +247,7 @@ static struct ctl_table net_core_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &one,
+               .extra1         = &min_rcvbuf,
        },
        {
                .procname       = "wmem_default",
@@ -253,7 +255,7 @@ static struct ctl_table net_core_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &one,
+               .extra1         = &min_sndbuf,
        },
        {
                .procname       = "rmem_default",
@@ -261,7 +263,7 @@ static struct ctl_table net_core_table[] = {
                .maxlen         = sizeof(int),
                .mode           = 0644,
                .proc_handler   = proc_dointvec_minmax,
-               .extra1         = &one,
+               .extra1         = &min_rcvbuf,
        },
        {
                .procname       = "dev_weight",
index faf7cc3..9d66a0f 100644 (file)
@@ -248,7 +248,9 @@ void __init dn_fib_rules_init(void)
 
 void __exit dn_fib_rules_cleanup(void)
 {
+       rtnl_lock();
        fib_rules_unregister(dn_fib_rules_ops);
+       rtnl_unlock();
        rcu_barrier();
 }
 
index 2173402..4dea2e0 100644 (file)
@@ -501,12 +501,10 @@ static struct net_device *dev_to_net_device(struct device *dev)
 #ifdef CONFIG_OF
 static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
                                        struct dsa_chip_data *cd,
-                                       int chip_index,
+                                       int chip_index, int port_index,
                                        struct device_node *link)
 {
-       int ret;
        const __be32 *reg;
-       int link_port_addr;
        int link_sw_addr;
        struct device_node *parent_sw;
        int len;
@@ -519,6 +517,10 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
        if (!reg || (len != sizeof(*reg) * 2))
                return -EINVAL;
 
+       /*
+        * Get the destination switch number from the second field of its 'reg'
+        * property, i.e. for "reg = <0x19 1>" sw_addr is '1'.
+        */
        link_sw_addr = be32_to_cpup(reg + 1);
 
        if (link_sw_addr >= pd->nr_chips)
@@ -535,20 +537,9 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
                memset(cd->rtable, -1, pd->nr_chips * sizeof(s8));
        }
 
-       reg = of_get_property(link, "reg", NULL);
-       if (!reg) {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       link_port_addr = be32_to_cpup(reg);
-
-       cd->rtable[link_sw_addr] = link_port_addr;
+       cd->rtable[link_sw_addr] = port_index;
 
        return 0;
-out:
-       kfree(cd->rtable);
-       return ret;
 }
 
 static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
@@ -658,7 +649,7 @@ static int dsa_of_probe(struct platform_device *pdev)
                        if (!strcmp(port_name, "dsa") && link &&
                                        pd->nr_chips > 1) {
                                ret = dsa_of_setup_routing_table(pd, cd,
-                                               chip_index, link);
+                                               chip_index, port_index, link);
                                if (ret)
                                        goto out_free_chip;
                        }
index 57be71d..23b9b3e 100644 (file)
@@ -1111,11 +1111,10 @@ static void ip_fib_net_exit(struct net *net)
 {
        unsigned int i;
 
+       rtnl_lock();
 #ifdef CONFIG_IP_MULTIPLE_TABLES
        fib4_rules_exit(net);
 #endif
-
-       rtnl_lock();
        for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
                struct fib_table *tb;
                struct hlist_head *head;
index 14d02ea..3e44b9b 100644 (file)
@@ -268,6 +268,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
                release_sock(sk);
                if (reqsk_queue_empty(&icsk->icsk_accept_queue))
                        timeo = schedule_timeout(timeo);
+               sched_annotate_sleep();
                lock_sock(sk);
                err = 0;
                if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
index 81751f1..592aff3 100644 (file)
@@ -71,6 +71,20 @@ static inline void inet_diag_unlock_handler(
        mutex_unlock(&inet_diag_table_mutex);
 }
 
+static size_t inet_sk_attr_size(void)
+{
+       return    nla_total_size(sizeof(struct tcp_info))
+               + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+               + nla_total_size(1) /* INET_DIAG_TOS */
+               + nla_total_size(1) /* INET_DIAG_TCLASS */
+               + nla_total_size(sizeof(struct inet_diag_meminfo))
+               + nla_total_size(sizeof(struct inet_diag_msg))
+               + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+               + nla_total_size(TCP_CA_NAME_MAX)
+               + nla_total_size(sizeof(struct tcpvegas_info))
+               + 64;
+}
+
 int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
                              struct sk_buff *skb, struct inet_diag_req_v2 *req,
                              struct user_namespace *user_ns,                   
@@ -326,9 +340,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
        if (err)
                goto out;
 
-       rep = nlmsg_new(sizeof(struct inet_diag_msg) +
-                       sizeof(struct inet_diag_meminfo) +
-                       sizeof(struct tcp_info) + 64, GFP_KERNEL);
+       rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
        if (!rep) {
                err = -ENOMEM;
                goto out;
index 787b3c2..d9bc28a 100644 (file)
@@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb)
        if (unlikely(opt->optlen))
                ip_forward_options(skb);
 
+       skb_sender_cpu_clear(skb);
        return dst_output(skb);
 }
 
index 2c8d98e..145a50c 100644 (file)
@@ -659,27 +659,30 @@ EXPORT_SYMBOL(ip_defrag);
 struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
 {
        struct iphdr iph;
+       int netoff;
        u32 len;
 
        if (skb->protocol != htons(ETH_P_IP))
                return skb;
 
-       if (skb_copy_bits(skb, 0, &iph, sizeof(iph)) < 0)
+       netoff = skb_network_offset(skb);
+
+       if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0)
                return skb;
 
        if (iph.ihl < 5 || iph.version != 4)
                return skb;
 
        len = ntohs(iph.tot_len);
-       if (skb->len < len || len < (iph.ihl * 4))
+       if (skb->len < netoff + len || len < (iph.ihl * 4))
                return skb;
 
        if (ip_is_fragment(&iph)) {
                skb = skb_share_check(skb, GFP_ATOMIC);
                if (skb) {
-                       if (!pskb_may_pull(skb, iph.ihl*4))
+                       if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
                                return skb;
-                       if (pskb_trim_rcsum(skb, len))
+                       if (pskb_trim_rcsum(skb, netoff + len))
                                return skb;
                        memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
                        if (ip_defrag(skb, user))
index 31d8c71..5cd9927 100644 (file)
@@ -432,17 +432,32 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
                kfree_skb(skb);
 }
 
-static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk,
-                                         const struct sk_buff *skb,
-                                         int ee_origin)
+/* IPv4 supports cmsg on all imcp errors and some timestamps
+ *
+ * Timestamp code paths do not initialize the fields expected by cmsg:
+ * the PKTINFO fields in skb->cb[]. Fill those in here.
+ */
+static bool ipv4_datagram_support_cmsg(const struct sock *sk,
+                                      struct sk_buff *skb,
+                                      int ee_origin)
 {
-       struct in_pktinfo *info = PKTINFO_SKB_CB(skb);
+       struct in_pktinfo *info;
+
+       if (ee_origin == SO_EE_ORIGIN_ICMP)
+               return true;
 
-       if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) ||
-           (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
+       if (ee_origin == SO_EE_ORIGIN_LOCAL)
+               return false;
+
+       /* Support IP_PKTINFO on tstamp packets if requested, to correlate
+        * timestamp with egress dev. Not possible for packets without dev
+        * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
+        */
+       if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
            (!skb->dev))
                return false;
 
+       info = PKTINFO_SKB_CB(skb);
        info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
        info->ipi_ifindex = skb->dev->ifindex;
        return true;
@@ -483,7 +498,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
        serr = SKB_EXT_ERR(skb);
 
-       if (sin && skb->len) {
+       if (sin && serr->port) {
                sin->sin_family = AF_INET;
                sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
                                                   serr->addr_offset);
@@ -496,9 +511,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
        sin = &errhdr.offender;
        memset(sin, 0, sizeof(*sin));
 
-       if (skb->len &&
-           (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
-            ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
+       if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
                sin->sin_family = AF_INET;
                sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
                if (inet_sk(sk)->cmsg_flags)
index 9d78427..fe54eba 100644 (file)
@@ -268,7 +268,7 @@ static int __net_init ipmr_rules_init(struct net *net)
        return 0;
 
 err2:
-       kfree(mrt);
+       ipmr_free_table(mrt);
 err1:
        fib_rules_unregister(ops);
        return err;
@@ -278,11 +278,13 @@ static void __net_exit ipmr_rules_exit(struct net *net)
 {
        struct mr_table *mrt, *next;
 
+       rtnl_lock();
        list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
                list_del(&mrt->list);
                ipmr_free_table(mrt);
        }
        fib_rules_unregister(net->ipv4.mr_rules_ops);
+       rtnl_unlock();
 }
 #else
 #define ipmr_for_each_table(mrt, net) \
@@ -308,7 +310,10 @@ static int __net_init ipmr_rules_init(struct net *net)
 
 static void __net_exit ipmr_rules_exit(struct net *net)
 {
+       rtnl_lock();
        ipmr_free_table(net->ipv4.mrt);
+       net->ipv4.mrt = NULL;
+       rtnl_unlock();
 }
 #endif
 
index 99e810f..cf5e82f 100644 (file)
@@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb,
                    &chainname, &comment, &rulenum) != 0)
                        break;
 
-       nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
-                     "TRACE: %s:%s:%s:%u ",
-                     tablename, chainname, comment, rulenum);
+       nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo,
+                    "TRACE: %s:%s:%s:%u ",
+                    tablename, chainname, comment, rulenum);
 }
 #endif
 
index e9f66e1..208d543 100644 (file)
@@ -259,6 +259,9 @@ int ping_init_sock(struct sock *sk)
        kgid_t low, high;
        int ret = 0;
 
+       if (sk->sk_family == AF_INET6)
+               sk->sk_ipv6only = 1;
+
        inet_get_ping_group_range_net(net, &low, &high);
        if (gid_lte(low, group) && gid_lte(group, high))
                return 0;
@@ -305,6 +308,11 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
                if (addr_len < sizeof(*addr))
                        return -EINVAL;
 
+               if (addr->sin_family != AF_INET &&
+                   !(addr->sin_family == AF_UNSPEC &&
+                     addr->sin_addr.s_addr == htonl(INADDR_ANY)))
+                       return -EAFNOSUPPORT;
+
                pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
                         sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
 
@@ -330,7 +338,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
                        return -EINVAL;
 
                if (addr->sin6_family != AF_INET6)
-                       return -EINVAL;
+                       return -EAFNOSUPPORT;
 
                pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n",
                         sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port));
@@ -716,7 +724,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
                if (msg->msg_namelen < sizeof(*usin))
                        return -EINVAL;
                if (usin->sin_family != AF_INET)
-                       return -EINVAL;
+                       return -EAFNOSUPPORT;
                daddr = usin->sin_addr.s_addr;
                /* no remote port */
        } else {
index 9d72a0f..995a225 100644 (file)
@@ -835,17 +835,13 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
                                       int large_allowed)
 {
        struct tcp_sock *tp = tcp_sk(sk);
-       u32 new_size_goal, size_goal, hlen;
+       u32 new_size_goal, size_goal;
 
        if (!large_allowed || !sk_can_gso(sk))
                return mss_now;
 
-       /* Maybe we should/could use sk->sk_prot->max_header here ? */
-       hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
-              inet_csk(sk)->icsk_ext_hdr_len +
-              tp->tcp_header_len;
-
-       new_size_goal = sk->sk_gso_max_size - 1 - hlen;
+       /* Note : tcp_tso_autosize() will eventually split this later */
+       new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
        new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
 
        /* We try hard to avoid divides here */
index d694088..62856e1 100644 (file)
@@ -378,6 +378,12 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
  */
 void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
 {
+       /* If credits accumulated at a higher w, apply them gently now. */
+       if (tp->snd_cwnd_cnt >= w) {
+               tp->snd_cwnd_cnt = 0;
+               tp->snd_cwnd++;
+       }
+
        tp->snd_cwnd_cnt += acked;
        if (tp->snd_cwnd_cnt >= w) {
                u32 delta = tp->snd_cwnd_cnt / w;
index 4b276d1..06d3d66 100644 (file)
@@ -306,8 +306,10 @@ tcp_friendliness:
                }
        }
 
-       if (ca->cnt == 0)                       /* cannot be zero */
-               ca->cnt = 1;
+       /* The maximum rate of cwnd increase CUBIC allows is 1 packet per
+        * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT.
+        */
+       ca->cnt = max(ca->cnt, 2U);
 }
 
 static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
index fb4cf8b..f501ac0 100644 (file)
@@ -3105,10 +3105,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
                        if (!first_ackt.v64)
                                first_ackt = last_ackt;
 
-                       if (!(sacked & TCPCB_SACKED_ACKED))
+                       if (!(sacked & TCPCB_SACKED_ACKED)) {
                                reord = min(pkts_acked, reord);
-                       if (!after(scb->end_seq, tp->high_seq))
-                               flag |= FLAG_ORIG_SACK_ACKED;
+                               if (!after(scb->end_seq, tp->high_seq))
+                                       flag |= FLAG_ORIG_SACK_ACKED;
+                       }
                }
 
                if (sacked & TCPCB_SACKED_ACKED)
index 5a2dfed..f1756ee 100644 (file)
@@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
                skb->sk = sk;
                skb->destructor = sock_edemux;
                if (sk->sk_state != TCP_TIME_WAIT) {
-                       struct dst_entry *dst = sk->sk_rx_dst;
+                       struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
 
                        if (dst)
                                dst = dst_check(dst, 0);
index a2a796c..1db253e 100644 (file)
@@ -2773,15 +2773,11 @@ void tcp_send_fin(struct sock *sk)
        } else {
                /* Socket is locked, keep trying until memory is available. */
                for (;;) {
-                       skb = alloc_skb_fclone(MAX_TCP_HEADER,
-                                              sk->sk_allocation);
+                       skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
                        if (skb)
                                break;
                        yield();
                }
-
-               /* Reserve space for headers and prepare control bits. */
-               skb_reserve(skb, MAX_TCP_HEADER);
                /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
                tcp_init_nondata_skb(skb, tp->write_seq,
                                     TCPHDR_ACK | TCPHDR_FIN);
index d5f6bd9..dab7381 100644 (file)
@@ -63,6 +63,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
                return err;
 
        IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+       skb->protocol = htons(ETH_P_IP);
 
        return x->outer_mode->output2(x, skb);
 }
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
 int xfrm4_output_finish(struct sk_buff *skb)
 {
        memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-       skb->protocol = htons(ETH_P_IP);
 
 #ifdef CONFIG_NETFILTER
        IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
index c215be7..ace8dac 100644 (file)
@@ -325,14 +325,34 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
        kfree_skb(skb);
 }
 
-static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb)
+/* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL.
+ *
+ * At one point, excluding local errors was a quick test to identify icmp/icmp6
+ * errors. This is no longer true, but the test remained, so the v6 stack,
+ * unlike v4, also honors cmsg requests on all wifi and timestamp errors.
+ *
+ * Timestamp code paths do not initialize the fields expected by cmsg:
+ * the PKTINFO fields in skb->cb[]. Fill those in here.
+ */
+static bool ip6_datagram_support_cmsg(struct sk_buff *skb,
+                                     struct sock_exterr_skb *serr)
 {
-       int ifindex = skb->dev ? skb->dev->ifindex : -1;
+       if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+           serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6)
+               return true;
+
+       if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL)
+               return false;
+
+       if (!skb->dev)
+               return false;
 
        if (skb->protocol == htons(ETH_P_IPV6))
-               IP6CB(skb)->iif = ifindex;
+               IP6CB(skb)->iif = skb->dev->ifindex;
        else
-               PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex;
+               PKTINFO_SKB_CB(skb)->ipi_ifindex = skb->dev->ifindex;
+
+       return true;
 }
 
 /*
@@ -369,7 +389,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
        serr = SKB_EXT_ERR(skb);
 
-       if (sin && skb->len) {
+       if (sin && serr->port) {
                const unsigned char *nh = skb_network_header(skb);
                sin->sin6_family = AF_INET6;
                sin->sin6_flowinfo = 0;
@@ -394,14 +414,11 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
        memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
        sin = &errhdr.offender;
        memset(sin, 0, sizeof(*sin));
-       if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
+
+       if (ip6_datagram_support_cmsg(skb, serr)) {
                sin->sin6_family = AF_INET6;
-               if (np->rxopt.all) {
-                       if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
-                           serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6)
-                               ip6_datagram_prepare_pktinfo_errqueue(skb);
+               if (np->rxopt.all)
                        ip6_datagram_recv_common_ctl(sk, msg, skb);
-               }
                if (skb->protocol == htons(ETH_P_IPV6)) {
                        sin->sin6_addr = ipv6_hdr(skb)->saddr;
                        if (np->rxopt.all)
index b4d5e1d..70bc6ab 100644 (file)
@@ -104,6 +104,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
                                goto again;
                        flp6->saddr = saddr;
                }
+               err = rt->dst.error;
                goto out;
        }
 again:
@@ -321,7 +322,9 @@ out_fib6_rules_ops:
 
 static void __net_exit fib6_rules_net_exit(struct net *net)
 {
+       rtnl_lock();
        fib_rules_unregister(net->ipv6.fib6_rules_ops);
+       rtnl_unlock();
 }
 
 static struct pernet_operations fib6_rules_net_ops = {
index 0a04a37..36cf0ab 100644 (file)
@@ -318,6 +318,7 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 
 static inline int ip6_forward_finish(struct sk_buff *skb)
 {
+       skb_sender_cpu_clear(skb);
        return dst_output(skb);
 }
 
@@ -541,7 +542,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 {
        struct sk_buff *frag;
        struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
-       struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
+       struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
+                               inet6_sk(skb->sk) : NULL;
        struct ipv6hdr *tmp_hdr;
        struct frag_hdr *fh;
        unsigned int mtu, hlen, left, len;
index 266a264..ddd94ec 100644 (file)
@@ -314,7 +314,7 @@ out:
  *   Create tunnel matching given parameters.
  *
  * Return:
- *   created tunnel or NULL
+ *   created tunnel or error pointer
  **/
 
 static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
@@ -322,7 +322,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
        struct net_device *dev;
        struct ip6_tnl *t;
        char name[IFNAMSIZ];
-       int err;
+       int err = -ENOMEM;
 
        if (p->name[0])
                strlcpy(name, p->name, IFNAMSIZ);
@@ -348,7 +348,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p)
 failed_free:
        ip6_dev_free(dev);
 failed:
-       return NULL;
+       return ERR_PTR(err);
 }
 
 /**
@@ -362,7 +362,7 @@ failed:
  *   tunnel device is created and registered for use.
  *
  * Return:
- *   matching tunnel or NULL
+ *   matching tunnel or error pointer
  **/
 
 static struct ip6_tnl *ip6_tnl_locate(struct net *net,
@@ -380,13 +380,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
                if (ipv6_addr_equal(local, &t->parms.laddr) &&
                    ipv6_addr_equal(remote, &t->parms.raddr)) {
                        if (create)
-                               return NULL;
+                               return ERR_PTR(-EEXIST);
 
                        return t;
                }
        }
        if (!create)
-               return NULL;
+               return ERR_PTR(-ENODEV);
        return ip6_tnl_create(net, p);
 }
 
@@ -1420,7 +1420,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        }
                        ip6_tnl_parm_from_user(&p1, &p);
                        t = ip6_tnl_locate(net, &p1, 0);
-                       if (t == NULL)
+                       if (IS_ERR(t))
                                t = netdev_priv(dev);
                } else {
                        memset(&p, 0, sizeof(p));
@@ -1445,7 +1445,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                ip6_tnl_parm_from_user(&p1, &p);
                t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL);
                if (cmd == SIOCCHGTUNNEL) {
-                       if (t != NULL) {
+                       if (!IS_ERR(t)) {
                                if (t->dev != dev) {
                                        err = -EEXIST;
                                        break;
@@ -1457,14 +1457,15 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        else
                                err = ip6_tnl_update(t, &p1);
                }
-               if (t) {
+               if (!IS_ERR(t)) {
                        err = 0;
                        ip6_tnl_parm_to_user(&p, &t->parms);
                        if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
                                err = -EFAULT;
 
-               } else
-                       err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
+               } else {
+                       err = PTR_ERR(t);
+               }
                break;
        case SIOCDELTUNNEL:
                err = -EPERM;
@@ -1478,7 +1479,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
                        err = -ENOENT;
                        ip6_tnl_parm_from_user(&p1, &p);
                        t = ip6_tnl_locate(net, &p1, 0);
-                       if (t == NULL)
+                       if (IS_ERR(t))
                                break;
                        err = -EPERM;
                        if (t->dev == ip6n->fb_tnl_dev)
@@ -1672,12 +1673,13 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
                           struct nlattr *tb[], struct nlattr *data[])
 {
        struct net *net = dev_net(dev);
-       struct ip6_tnl *nt;
+       struct ip6_tnl *nt, *t;
 
        nt = netdev_priv(dev);
        ip6_tnl_netlink_parms(data, &nt->parms);
 
-       if (ip6_tnl_locate(net, &nt->parms, 0))
+       t = ip6_tnl_locate(net, &nt->parms, 0);
+       if (!IS_ERR(t))
                return -EEXIST;
 
        return ip6_tnl_create2(dev);
@@ -1697,8 +1699,7 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
        ip6_tnl_netlink_parms(data, &p);
 
        t = ip6_tnl_locate(net, &p, 0);
-
-       if (t) {
+       if (!IS_ERR(t)) {
                if (t->dev != dev)
                        return -EEXIST;
        } else
index 34b6826..312e0ff 100644 (file)
@@ -252,7 +252,7 @@ static int __net_init ip6mr_rules_init(struct net *net)
        return 0;
 
 err2:
-       kfree(mrt);
+       ip6mr_free_table(mrt);
 err1:
        fib_rules_unregister(ops);
        return err;
@@ -267,8 +267,8 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
                list_del(&mrt->list);
                ip6mr_free_table(mrt);
        }
-       rtnl_unlock();
        fib_rules_unregister(net->ipv6.mr6_rules_ops);
+       rtnl_unlock();
 }
 #else
 #define ip6mr_for_each_table(mrt, net) \
@@ -336,7 +336,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 
 static void ip6mr_free_table(struct mr6_table *mrt)
 {
-       del_timer(&mrt->ipmr_expire_timer);
+       del_timer_sync(&mrt->ipmr_expire_timer);
        mroute_clean_tables(mrt);
        kfree(mrt);
 }
index 471ed24..14ecdaf 100644 (file)
@@ -1218,7 +1218,14 @@ static void ndisc_router_discovery(struct sk_buff *skb)
        if (rt)
                rt6_set_expires(rt, jiffies + (HZ * lifetime));
        if (ra_msg->icmph.icmp6_hop_limit) {
-               in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+               /* Only set hop_limit on the interface if it is higher than
+                * the current hop_limit.
+                */
+               if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) {
+                       in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+               } else {
+                       ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n");
+               }
                if (rt)
                        dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
                                       ra_msg->icmph.icmp6_hop_limit);
index e080fbb..bb00c6f 100644 (file)
@@ -298,9 +298,9 @@ static void trace_packet(const struct sk_buff *skb,
                    &chainname, &comment, &rulenum) != 0)
                        break;
 
-       nf_log_packet(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
-                     "TRACE: %s:%s:%s:%u ",
-                     tablename, chainname, comment, rulenum);
+       nf_log_trace(net, AF_INET6, hook, skb, in, out, &trace_loginfo,
+                    "TRACE: %s:%s:%s:%u ",
+                    tablename, chainname, comment, rulenum);
 }
 #endif
 
index bd46f73..a2dfff6 100644 (file)
@@ -102,9 +102,10 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
        if (msg->msg_name) {
                DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name);
-               if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||
-                   u->sin6_family != AF_INET6) {
+               if (msg->msg_namelen < sizeof(*u))
                        return -EINVAL;
+               if (u->sin6_family != AF_INET6) {
+                       return -EAFNOSUPPORT;
                }
                if (sk->sk_bound_dev_if &&
                    sk->sk_bound_dev_if != u->sin6_scope_id) {
index 5d46832..1f5e622 100644 (file)
@@ -1411,6 +1411,15 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
        TCP_SKB_CB(skb)->sacked = 0;
 }
 
+static void tcp_v6_restore_cb(struct sk_buff *skb)
+{
+       /* We need to move header back to the beginning if xfrm6_policy_check()
+        * and tcp_v6_fill_cb() are going to be called again.
+        */
+       memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
+               sizeof(struct inet6_skb_parm));
+}
+
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
        const struct tcphdr *th;
@@ -1543,6 +1552,7 @@ do_time_wait:
                        inet_twsk_deschedule(tw, &tcp_death_row);
                        inet_twsk_put(tw);
                        sk = sk2;
+                       tcp_v6_restore_cb(skb);
                        goto process;
                }
                /* Fall through to ACK */
@@ -1551,6 +1561,7 @@ do_time_wait:
                tcp_v6_timewait_ack(sk, skb);
                break;
        case TCP_TW_RST:
+               tcp_v6_restore_cb(skb);
                goto no_tcp_socket;
        case TCP_TW_SUCCESS:
                ;
@@ -1585,7 +1596,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
                skb->sk = sk;
                skb->destructor = sock_edemux;
                if (sk->sk_state != TCP_TIME_WAIT) {
-                       struct dst_entry *dst = sk->sk_rx_dst;
+                       struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
 
                        if (dst)
                                dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie);
index ab889bb..be2c0ba 100644 (file)
@@ -112,11 +112,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
                fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
                fptr->nexthdr = nexthdr;
                fptr->reserved = 0;
-               if (skb_shinfo(skb)->ip6_frag_id)
-                       fptr->identification = skb_shinfo(skb)->ip6_frag_id;
-               else
-                       ipv6_select_ident(fptr,
-                                         (struct rt6_info *)skb_dst(skb));
+               if (!skb_shinfo(skb)->ip6_frag_id)
+                       ipv6_proxy_select_ident(skb);
+               fptr->identification = skb_shinfo(skb)->ip6_frag_id;
 
                /* Fragment the skb. ipv6 header and the remaining fields of the
                 * fragment header are updated in ipv6_gso_segment()
index ca3f29b..010f8bd 100644 (file)
@@ -114,6 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
                return err;
 
        skb->ignore_df = 1;
+       skb->protocol = htons(ETH_P_IPV6);
 
        return x->outer_mode->output2(x, skb);
 }
@@ -122,7 +123,6 @@ EXPORT_SYMBOL(xfrm6_prepare_output);
 int xfrm6_output_finish(struct sk_buff *skb)
 {
        memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-       skb->protocol = htons(ETH_P_IPV6);
 
 #ifdef CONFIG_NETFILTER
        IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
index 48bf5a0..8d2d01b 100644 (file)
@@ -200,6 +200,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
                case IPPROTO_MH:
+                       offset += ipv6_optlen(exthdr);
                        if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
                                struct ip6_mh *mh;
 
index 9940a41..683346d 100644 (file)
@@ -798,7 +798,9 @@ static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout)
        orig_jiffies = jiffies;
 
        /* Set poll time to 200 ms */
-       poll_time = IRDA_MIN(timeout, msecs_to_jiffies(200));
+       poll_time = msecs_to_jiffies(200);
+       if (timeout)
+               poll_time = min_t(unsigned long, timeout, poll_time);
 
        spin_lock_irqsave(&self->spinlock, flags);
        while (self->tx_skb && self->tx_skb->len) {
index 2e9953b..53d9311 100644 (file)
@@ -1114,10 +1114,8 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
                        noblock, &err);
        else
                skb = sock_alloc_send_skb(sk, len, noblock, &err);
-       if (!skb) {
-               err = -ENOMEM;
+       if (!skb)
                goto out;
-       }
        if (iucv->transport == AF_IUCV_TRANS_HIPER)
                skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
        if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
index 895348e..a29a504 100644 (file)
@@ -1871,6 +1871,7 @@ static int __init l2tp_init(void)
        l2tp_wq = alloc_workqueue("l2tp", WQ_UNBOUND, 0);
        if (!l2tp_wq) {
                pr_err("alloc_workqueue failed\n");
+               unregister_pernet_device(&l2tp_net_ops);
                rc = -ENOMEM;
                goto out;
        }
index a48bad4..7702978 100644 (file)
@@ -49,8 +49,6 @@ static void ieee80211_free_tid_rx(struct rcu_head *h)
                container_of(h, struct tid_ampdu_rx, rcu_head);
        int i;
 
-       del_timer_sync(&tid_rx->reorder_timer);
-
        for (i = 0; i < tid_rx->buf_size; i++)
                __skb_queue_purge(&tid_rx->reorder_buf[i]);
        kfree(tid_rx->reorder_buf);
@@ -93,6 +91,12 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 
        del_timer_sync(&tid_rx->session_timer);
 
+       /* make sure ieee80211_sta_reorder_release() doesn't re-arm the timer */
+       spin_lock_bh(&tid_rx->reorder_lock);
+       tid_rx->removed = true;
+       spin_unlock_bh(&tid_rx->reorder_lock);
+       del_timer_sync(&tid_rx->reorder_timer);
+
        call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
 }
 
index 3afe368..8d53d65 100644 (file)
@@ -58,13 +58,24 @@ struct ieee80211_local;
 #define IEEE80211_UNSET_POWER_LEVEL    INT_MIN
 
 /*
- * Some APs experience problems when working with U-APSD. Decrease the
- * probability of that happening by using legacy mode for all ACs but VO.
- * The AP that caused us trouble was a Cisco 4410N. It ignores our
- * setting, and always treats non-VO ACs as legacy.
+ * Some APs experience problems when working with U-APSD. Decreasing the
+ * probability of that happening by using legacy mode for all ACs but VO isn't
+ * enough.
+ *
+ * Cisco 4410N originally forced us to enable VO by default only because it
+ * treated non-VO ACs as legacy.
+ *
+ * However some APs (notably Netgear R7000) silently reclassify packets to
+ * different ACs. Since u-APSD ACs require trigger frames for frame retrieval
+ * clients would never see some frames (e.g. ARP responses) or would fetch them
+ * accidentally after a long time.
+ *
+ * It makes little sense to enable u-APSD queues by default because it needs
+ * userspace applications to be aware of it to actually take advantage of the
+ * possible additional powersavings. Implicitly depending on driver autotrigger
+ * frame support doesn't make much sense.
  */
-#define IEEE80211_DEFAULT_UAPSD_QUEUES \
-       IEEE80211_WMM_IE_STA_QOSINFO_AC_VO
+#define IEEE80211_DEFAULT_UAPSD_QUEUES 0
 
 #define IEEE80211_DEFAULT_MAX_SP_LEN           \
        IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
@@ -453,6 +464,7 @@ struct ieee80211_if_managed {
        unsigned int flags;
 
        bool csa_waiting_bcn;
+       bool csa_ignored_same_chan;
 
        bool beacon_crc_valid;
        u32 beacon_crc;
index 10ac632..142f66a 100644 (file)
@@ -1150,6 +1150,17 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
                return;
        }
 
+       if (cfg80211_chandef_identical(&csa_ie.chandef,
+                                      &sdata->vif.bss_conf.chandef)) {
+               if (ifmgd->csa_ignored_same_chan)
+                       return;
+               sdata_info(sdata,
+                          "AP %pM tries to chanswitch to same channel, ignore\n",
+                          ifmgd->associated->bssid);
+               ifmgd->csa_ignored_same_chan = true;
+               return;
+       }
+
        mutex_lock(&local->mtx);
        mutex_lock(&local->chanctx_mtx);
        conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
@@ -1210,6 +1221,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
        sdata->vif.csa_active = true;
        sdata->csa_chandef = csa_ie.chandef;
        sdata->csa_block_tx = csa_ie.mode;
+       ifmgd->csa_ignored_same_chan = false;
 
        if (sdata->csa_block_tx)
                ieee80211_stop_vif_queues(local, sdata,
@@ -2090,6 +2102,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
        sdata->vif.csa_active = false;
        ifmgd->csa_waiting_bcn = false;
+       ifmgd->csa_ignored_same_chan = false;
        if (sdata->csa_block_tx) {
                ieee80211_wake_vif_queues(local, sdata,
                                          IEEE80211_QUEUE_STOP_REASON_CSA);
@@ -3204,7 +3217,8 @@ static const u64 care_about_ies =
        (1ULL << WLAN_EID_CHANNEL_SWITCH) |
        (1ULL << WLAN_EID_PWR_CONSTRAINT) |
        (1ULL << WLAN_EID_HT_CAPABILITY) |
-       (1ULL << WLAN_EID_HT_OPERATION);
+       (1ULL << WLAN_EID_HT_OPERATION) |
+       (1ULL << WLAN_EID_EXT_CHANSWITCH_ANN);
 
 static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
                                     struct ieee80211_mgmt *mgmt, size_t len,
index 1101563..1eb730b 100644 (file)
@@ -873,9 +873,10 @@ static void ieee80211_sta_reorder_release(struct ieee80211_sub_if_data *sdata,
 
  set_release_timer:
 
-               mod_timer(&tid_agg_rx->reorder_timer,
-                         tid_agg_rx->reorder_time[j] + 1 +
-                         HT_RX_REORDER_BUF_TIMEOUT);
+               if (!tid_agg_rx->removed)
+                       mod_timer(&tid_agg_rx->reorder_timer,
+                                 tid_agg_rx->reorder_time[j] + 1 +
+                                 HT_RX_REORDER_BUF_TIMEOUT);
        } else {
                del_timer(&tid_agg_rx->reorder_timer);
        }
@@ -2214,6 +2215,9 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
        hdr = (struct ieee80211_hdr *) skb->data;
        mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
 
+       if (ieee80211_drop_unencrypted(rx, hdr->frame_control))
+               return RX_DROP_MONITOR;
+
        /* frame is in RMC, don't forward */
        if (ieee80211_is_data(hdr->frame_control) &&
            is_multicast_ether_addr(hdr->addr1) &&
index 925e68f..fb0fc13 100644 (file)
@@ -175,6 +175,7 @@ struct tid_ampdu_tx {
  * @reorder_lock: serializes access to reorder buffer, see below.
  * @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
  *     and ssn.
+ * @removed: this session is removed (but might have been found due to RCU)
  *
  * This structure's lifetime is managed by RCU, assignments to
  * the array holding it must hold the aggregation mutex.
@@ -199,6 +200,7 @@ struct tid_ampdu_rx {
        u16 timeout;
        u8 dialog_token;
        bool auto_seq;
+       bool removed;
 };
 
 /**
index 8428f4a..747bdcf 100644 (file)
@@ -3178,7 +3178,7 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
                wdev_iter = &sdata_iter->wdev;
 
                if (sdata_iter == sdata ||
-                   rcu_access_pointer(sdata_iter->vif.chanctx_conf) == NULL ||
+                   !ieee80211_sdata_running(sdata_iter) ||
                    local->hw.wiphy->software_iftypes & BIT(wdev_iter->iftype))
                        continue;
 
index c47ffd7..d93ceeb 100644 (file)
@@ -896,6 +896,8 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
                        IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
                        return;
                }
+               if (!(flags & IP_VS_CONN_F_TEMPLATE))
+                       kfree(param->pe_data);
        }
 
        if (opt)
@@ -1169,6 +1171,7 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
                                (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
                                );
 #endif
+       ip_vs_pe_put(param.pe);
        return 0;
        /* Error exit */
 out:
index 0d8448f..675d12c 100644 (file)
@@ -212,6 +212,30 @@ void nf_log_packet(struct net *net,
 }
 EXPORT_SYMBOL(nf_log_packet);
 
+void nf_log_trace(struct net *net,
+                 u_int8_t pf,
+                 unsigned int hooknum,
+                 const struct sk_buff *skb,
+                 const struct net_device *in,
+                 const struct net_device *out,
+                 const struct nf_loginfo *loginfo, const char *fmt, ...)
+{
+       va_list args;
+       char prefix[NF_LOG_PREFIXLEN];
+       const struct nf_logger *logger;
+
+       rcu_read_lock();
+       logger = rcu_dereference(net->nf.nf_loggers[pf]);
+       if (logger) {
+               va_start(args, fmt);
+               vsnprintf(prefix, sizeof(prefix), fmt, args);
+               va_end(args);
+               logger->logfn(net, pf, hooknum, skb, in, out, loginfo, prefix);
+       }
+       rcu_read_unlock();
+}
+EXPORT_SYMBOL(nf_log_trace);
+
 #define S_SIZE (1024 - (sizeof(unsigned int) + 1))
 
 struct nf_log_buf {
index 199fd0f..ac1a952 100644 (file)
@@ -227,7 +227,7 @@ nft_rule_deactivate_next(struct net *net, struct nft_rule *rule)
 
 static inline void nft_rule_clear(struct net *net, struct nft_rule *rule)
 {
-       rule->genmask = 0;
+       rule->genmask &= ~(1 << gencursor_next(net));
 }
 
 static int
@@ -1225,7 +1225,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 
        if (nla[NFTA_CHAIN_POLICY]) {
                if ((chain != NULL &&
-                   !(chain->flags & NFT_BASE_CHAIN)) ||
+                   !(chain->flags & NFT_BASE_CHAIN)))
+                       return -EOPNOTSUPP;
+
+               if (chain == NULL &&
                    nla[NFTA_CHAIN_HOOK] == NULL)
                        return -EOPNOTSUPP;
 
@@ -1711,9 +1714,12 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
        }
        nla_nest_end(skb, list);
 
-       if (rule->ulen &&
-           nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
-               goto nla_put_failure;
+       if (rule->udata) {
+               struct nft_userdata *udata = nft_userdata(rule);
+               if (nla_put(skb, NFTA_RULE_USERDATA, udata->len + 1,
+                           udata->data) < 0)
+                       goto nla_put_failure;
+       }
 
        nlmsg_end(skb, nlh);
        return 0;
@@ -1896,11 +1902,12 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
        struct nft_table *table;
        struct nft_chain *chain;
        struct nft_rule *rule, *old_rule = NULL;
+       struct nft_userdata *udata;
        struct nft_trans *trans = NULL;
        struct nft_expr *expr;
        struct nft_ctx ctx;
        struct nlattr *tmp;
-       unsigned int size, i, n, ulen = 0;
+       unsigned int size, i, n, ulen = 0, usize = 0;
        int err, rem;
        bool create;
        u64 handle, pos_handle;
@@ -1968,12 +1975,19 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
                        n++;
                }
        }
+       /* Check for overflow of dlen field */
+       err = -EFBIG;
+       if (size >= 1 << 12)
+               goto err1;
 
-       if (nla[NFTA_RULE_USERDATA])
+       if (nla[NFTA_RULE_USERDATA]) {
                ulen = nla_len(nla[NFTA_RULE_USERDATA]);
+               if (ulen > 0)
+                       usize = sizeof(struct nft_userdata) + ulen;
+       }
 
        err = -ENOMEM;
-       rule = kzalloc(sizeof(*rule) + size + ulen, GFP_KERNEL);
+       rule = kzalloc(sizeof(*rule) + size + usize, GFP_KERNEL);
        if (rule == NULL)
                goto err1;
 
@@ -1981,10 +1995,13 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
        rule->handle = handle;
        rule->dlen   = size;
-       rule->ulen   = ulen;
+       rule->udata  = ulen ? 1 : 0;
 
-       if (ulen)
-               nla_memcpy(nft_userdata(rule), nla[NFTA_RULE_USERDATA], ulen);
+       if (ulen) {
+               udata = nft_userdata(rule);
+               udata->len = ulen - 1;
+               nla_memcpy(udata->data, nla[NFTA_RULE_USERDATA], ulen);
+       }
 
        expr = nft_expr_first(rule);
        for (i = 0; i < n; i++) {
@@ -2031,12 +2048,6 @@ static int nf_tables_newrule(struct sock *nlsk, struct sk_buff *skb,
 
 err3:
        list_del_rcu(&rule->list);
-       if (trans) {
-               list_del_rcu(&nft_trans_rule(trans)->list);
-               nft_rule_clear(net, nft_trans_rule(trans));
-               nft_trans_destroy(trans);
-               chain->use++;
-       }
 err2:
        nf_tables_rule_destroy(&ctx, rule);
 err1:
@@ -3612,12 +3623,11 @@ static int nf_tables_commit(struct sk_buff *skb)
                                                 &te->elem,
                                                 NFT_MSG_DELSETELEM, 0);
                        te->set->ops->get(te->set, &te->elem);
-                       te->set->ops->remove(te->set, &te->elem);
                        nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
-                       if (te->elem.flags & NFT_SET_MAP) {
-                               nft_data_uninit(&te->elem.data,
-                                               te->set->dtype);
-                       }
+                       if (te->set->flags & NFT_SET_MAP &&
+                           !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
+                               nft_data_uninit(&te->elem.data, te->set->dtype);
+                       te->set->ops->remove(te->set, &te->elem);
                        nft_trans_destroy(trans);
                        break;
                }
@@ -3658,7 +3668,7 @@ static int nf_tables_abort(struct sk_buff *skb)
 {
        struct net *net = sock_net(skb->sk);
        struct nft_trans *trans, *next;
-       struct nft_set *set;
+       struct nft_trans_elem *te;
 
        list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
                switch (trans->msg_type) {
@@ -3719,9 +3729,13 @@ static int nf_tables_abort(struct sk_buff *skb)
                        break;
                case NFT_MSG_NEWSETELEM:
                        nft_trans_elem_set(trans)->nelems--;
-                       set = nft_trans_elem_set(trans);
-                       set->ops->get(set, &nft_trans_elem(trans));
-                       set->ops->remove(set, &nft_trans_elem(trans));
+                       te = (struct nft_trans_elem *)trans->data;
+                       te->set->ops->get(te->set, &te->elem);
+                       nft_data_uninit(&te->elem.key, NFT_DATA_VALUE);
+                       if (te->set->flags & NFT_SET_MAP &&
+                           !(te->elem.flags & NFT_SET_ELEM_INTERVAL_END))
+                               nft_data_uninit(&te->elem.data, te->set->dtype);
+                       te->set->ops->remove(te->set, &te->elem);
                        nft_trans_destroy(trans);
                        break;
                case NFT_MSG_DELSETELEM:
index 3b90eb2..2d298dc 100644 (file)
@@ -94,10 +94,10 @@ static void nft_trace_packet(const struct nft_pktinfo *pkt,
 {
        struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
 
-       nf_log_packet(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
-                     pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
-                     chain->table->name, chain->name, comments[type],
-                     rulenum);
+       nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+                    pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
+                    chain->table->name, chain->name, comments[type],
+                    rulenum);
 }
 
 unsigned int
index a5599fc..54330fb 100644 (file)
@@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
        if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
                return -EINVAL;
 
+       /* Not all fields are initialized so first zero the tuple */
+       memset(tuple, 0, sizeof(struct nf_conntrack_tuple));
+
        tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
        tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);
 
index 1279cd8..65f3e2b 100644 (file)
@@ -123,7 +123,7 @@ static void
 nft_target_set_tgchk_param(struct xt_tgchk_param *par,
                           const struct nft_ctx *ctx,
                           struct xt_target *target, void *info,
-                          union nft_entry *entry, u8 proto, bool inv)
+                          union nft_entry *entry, u16 proto, bool inv)
 {
        par->net        = ctx->net;
        par->table      = ctx->table->name;
@@ -133,11 +133,14 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
                entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
                break;
        case AF_INET6:
+               if (proto)
+                       entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
                entry->e6.ipv6.proto = proto;
                entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
                break;
        case NFPROTO_BRIDGE:
-               entry->ebt.ethproto = proto;
+               entry->ebt.ethproto = (__force __be16)proto;
                entry->ebt.invflags = inv ? EBT_IPROTO : 0;
                break;
        }
@@ -171,7 +174,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
        [NFTA_RULE_COMPAT_FLAGS]        = { .type = NLA_U32 },
 };
 
-static int nft_parse_compat(const struct nlattr *attr, u8 *proto, bool *inv)
+static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
 {
        struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
        u32 flags;
@@ -203,7 +206,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        struct xt_target *target = expr->ops->data;
        struct xt_tgchk_param par;
        size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
-       u8 proto = 0;
+       u16 proto = 0;
        bool inv = false;
        union nft_entry e = {};
        int ret;
@@ -334,7 +337,7 @@ static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
 static void
 nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
                          struct xt_match *match, void *info,
-                         union nft_entry *entry, u8 proto, bool inv)
+                         union nft_entry *entry, u16 proto, bool inv)
 {
        par->net        = ctx->net;
        par->table      = ctx->table->name;
@@ -344,11 +347,14 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
                entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
                break;
        case AF_INET6:
+               if (proto)
+                       entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
                entry->e6.ipv6.proto = proto;
                entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
                break;
        case NFPROTO_BRIDGE:
-               entry->ebt.ethproto = proto;
+               entry->ebt.ethproto = (__force __be16)proto;
                entry->ebt.invflags = inv ? EBT_IPROTO : 0;
                break;
        }
@@ -385,7 +391,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
        struct xt_match *match = expr->ops->data;
        struct xt_mtchk_param par;
        size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
-       u8 proto = 0;
+       u16 proto = 0;
        bool inv = false;
        union nft_entry e = {};
        int ret;
index c82df0a..37c15e6 100644 (file)
@@ -153,6 +153,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
                                iter->err = err;
                                goto out;
                        }
+
+                       continue;
                }
 
                if (iter->count < iter->skip)
index ef8a926..50e1e5a 100644 (file)
@@ -513,8 +513,8 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par)
 {
        const struct ip6t_ip6 *i = par->entryinfo;
 
-       if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
-           && !(i->flags & IP6T_INV_PROTO))
+       if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) &&
+           !(i->invflags & IP6T_INV_PROTO))
                return 0;
 
        pr_info("Can be used only in combination with "
index ec2954f..067a3ff 100644 (file)
@@ -274,10 +274,8 @@ void ovs_vport_del(struct vport *vport)
        ASSERT_OVSL();
 
        hlist_del_rcu(&vport->hash_node);
-
-       vport->ops->destroy(vport);
-
        module_put(vport->ops->owner);
+       vport->ops->destroy(vport);
 }
 
 /**
index 5bf1e96..f8db706 100644 (file)
@@ -3123,11 +3123,18 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
        return 0;
 }
 
-static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
+static void packet_dev_mclist_delete(struct net_device *dev,
+                                    struct packet_mclist **mlp)
 {
-       for ( ; i; i = i->next) {
-               if (i->ifindex == dev->ifindex)
-                       packet_dev_mc(dev, i, what);
+       struct packet_mclist *ml;
+
+       while ((ml = *mlp) != NULL) {
+               if (ml->ifindex == dev->ifindex) {
+                       packet_dev_mc(dev, ml, -1);
+                       *mlp = ml->next;
+                       kfree(ml);
+               } else
+                       mlp = &ml->next;
        }
 }
 
@@ -3204,12 +3211,11 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
                                        packet_dev_mc(dev, ml, -1);
                                kfree(ml);
                        }
-                       rtnl_unlock();
-                       return 0;
+                       break;
                }
        }
        rtnl_unlock();
-       return -EADDRNOTAVAIL;
+       return 0;
 }
 
 static void packet_flush_mclist(struct sock *sk)
@@ -3559,7 +3565,7 @@ static int packet_notifier(struct notifier_block *this,
                switch (msg) {
                case NETDEV_UNREGISTER:
                        if (po->mclist)
-                               packet_dev_mclist(dev, po->mclist, -1);
+                               packet_dev_mclist_delete(dev, &po->mclist);
                        /* fallthrough */
 
                case NETDEV_DOWN:
index a817705..dba8d08 100644 (file)
@@ -88,7 +88,9 @@ static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
                        int *unpinned);
 static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
 
-static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id)
+static int rds_iw_get_device(struct sockaddr_in *src, struct sockaddr_in *dst,
+                            struct rds_iw_device **rds_iwdev,
+                            struct rdma_cm_id **cm_id)
 {
        struct rds_iw_device *iwdev;
        struct rds_iw_cm_id *i_cm_id;
@@ -112,15 +114,15 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd
                                src_addr->sin_port,
                                dst_addr->sin_addr.s_addr,
                                dst_addr->sin_port,
-                               rs->rs_bound_addr,
-                               rs->rs_bound_port,
-                               rs->rs_conn_addr,
-                               rs->rs_conn_port);
+                               src->sin_addr.s_addr,
+                               src->sin_port,
+                               dst->sin_addr.s_addr,
+                               dst->sin_port);
 #ifdef WORKING_TUPLE_DETECTION
-                       if (src_addr->sin_addr.s_addr == rs->rs_bound_addr &&
-                           src_addr->sin_port == rs->rs_bound_port &&
-                           dst_addr->sin_addr.s_addr == rs->rs_conn_addr &&
-                           dst_addr->sin_port == rs->rs_conn_port) {
+                       if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr &&
+                           src_addr->sin_port == src->sin_port &&
+                           dst_addr->sin_addr.s_addr == dst->sin_addr.s_addr &&
+                           dst_addr->sin_port == dst->sin_port) {
 #else
                        /* FIXME - needs to compare the local and remote
                         * ipaddr/port tuple, but the ipaddr is the only
@@ -128,7 +130,7 @@ static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwd
                         * zero'ed.  It doesn't appear to be properly populated
                         * during connection setup...
                         */
-                       if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) {
+                       if (src_addr->sin_addr.s_addr == src->sin_addr.s_addr) {
 #endif
                                spin_unlock_irq(&iwdev->spinlock);
                                *rds_iwdev = iwdev;
@@ -180,19 +182,13 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i
 {
        struct sockaddr_in *src_addr, *dst_addr;
        struct rds_iw_device *rds_iwdev_old;
-       struct rds_sock rs;
        struct rdma_cm_id *pcm_id;
        int rc;
 
        src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
        dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
 
-       rs.rs_bound_addr = src_addr->sin_addr.s_addr;
-       rs.rs_bound_port = src_addr->sin_port;
-       rs.rs_conn_addr = dst_addr->sin_addr.s_addr;
-       rs.rs_conn_port = dst_addr->sin_port;
-
-       rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id);
+       rc = rds_iw_get_device(src_addr, dst_addr, &rds_iwdev_old, &pcm_id);
        if (rc)
                rds_iw_remove_cm_id(rds_iwdev, cm_id);
 
@@ -598,9 +594,17 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
        struct rds_iw_device *rds_iwdev;
        struct rds_iw_mr *ibmr = NULL;
        struct rdma_cm_id *cm_id;
+       struct sockaddr_in src = {
+               .sin_addr.s_addr = rs->rs_bound_addr,
+               .sin_port = rs->rs_bound_port,
+       };
+       struct sockaddr_in dst = {
+               .sin_addr.s_addr = rs->rs_conn_addr,
+               .sin_port = rs->rs_conn_port,
+       };
        int ret;
 
-       ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id);
+       ret = rds_iw_get_device(&src, &dst, &rds_iwdev, &cm_id);
        if (ret || !cm_id) {
                ret = -ENODEV;
                goto out;
index 5394b6b..0610efa 100644 (file)
@@ -42,7 +42,8 @@ void rxrpc_UDP_error_report(struct sock *sk)
                _leave("UDP socket errqueue empty");
                return;
        }
-       if (!skb->len) {
+       serr = SKB_EXT_ERR(skb);
+       if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
                _leave("UDP empty message");
                kfree_skb(skb);
                return;
@@ -50,7 +51,6 @@ void rxrpc_UDP_error_report(struct sock *sk)
 
        rxrpc_new_skb(skb);
 
-       serr = SKB_EXT_ERR(skb);
        addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset);
        port = serr->port;
 
index 4575485..19a5606 100644 (file)
@@ -87,7 +87,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
                if (!skb) {
                        /* nothing remains on the queue */
                        if (copied &&
-                           (msg->msg_flags & MSG_PEEK || timeo == 0))
+                           (flags & MSG_PEEK || timeo == 0))
                                goto out;
 
                        /* wait for a message to turn up */
index 82c5d7f..5f6288f 100644 (file)
@@ -25,21 +25,41 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
                   struct tcf_result *res)
 {
        struct tcf_bpf *b = a->priv;
-       int action;
-       int filter_res;
+       int action, filter_res;
 
        spin_lock(&b->tcf_lock);
+
        b->tcf_tm.lastuse = jiffies;
        bstats_update(&b->tcf_bstats, skb);
-       action = b->tcf_action;
 
        filter_res = BPF_PROG_RUN(b->filter, skb);
-       if (filter_res == 0) {
-               /* Return code 0 from the BPF program
-                * is being interpreted as a drop here.
-                */
-               action = TC_ACT_SHOT;
+
+       /* A BPF program may overwrite the default action opcode.
+        * Similarly as in cls_bpf, if filter_res == -1 we use the
+        * default action specified from tc.
+        *
+        * In case a different well-known TC_ACT opcode has been
+        * returned, it will overwrite the default one.
+        *
+        * For everything else that is unkown, TC_ACT_UNSPEC is
+        * returned.
+        */
+       switch (filter_res) {
+       case TC_ACT_PIPE:
+       case TC_ACT_RECLASSIFY:
+       case TC_ACT_OK:
+               action = filter_res;
+               break;
+       case TC_ACT_SHOT:
+               action = filter_res;
                b->tcf_qstats.drops++;
+               break;
+       case TC_ACT_UNSPEC:
+               action = b->tcf_action;
+               break;
+       default:
+               action = TC_ACT_UNSPEC;
+               break;
        }
 
        spin_unlock(&b->tcf_lock);
index 09487af..95fdf4e 100644 (file)
@@ -78,8 +78,11 @@ struct tc_u_hnode {
        struct tc_u_common      *tp_c;
        int                     refcnt;
        unsigned int            divisor;
-       struct tc_u_knode __rcu *ht[1];
        struct rcu_head         rcu;
+       /* The 'ht' field MUST be the last field in structure to allow for
+        * more entries allocated at end of structure.
+        */
+       struct tc_u_knode __rcu *ht[1];
 };
 
 struct tc_u_common {
index bbedbfc..245330c 100644 (file)
@@ -1702,6 +1702,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
 
        if (len > INT_MAX)
                len = INT_MAX;
+       if (unlikely(!access_ok(VERIFY_READ, buff, len)))
+               return -EFAULT;
        sock = sockfd_lookup_light(fd, &err, &fput_needed);
        if (!sock)
                goto out;
@@ -1760,6 +1762,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
 
        if (size > INT_MAX)
                size = INT_MAX;
+       if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size)))
+               return -EFAULT;
        sock = sockfd_lookup_light(fd, &err, &fput_needed);
        if (!sock)
                goto out;
index 33fb105..5199bb1 100644 (file)
@@ -921,7 +921,7 @@ static unsigned int cache_poll(struct file *filp, poll_table *wait,
        poll_wait(filp, &queue_wait, wait);
 
        /* alway allow write */
-       mask = POLL_OUT | POLLWRNORM;
+       mask = POLLOUT | POLLWRNORM;
 
        if (!rp)
                return mask;
index 612aa73..e6ce151 100644 (file)
@@ -303,9 +303,7 @@ static int rpc_client_register(struct rpc_clnt *clnt,
        struct super_block *pipefs_sb;
        int err;
 
-       err = rpc_clnt_debugfs_register(clnt);
-       if (err)
-               return err;
+       rpc_clnt_debugfs_register(clnt);
 
        pipefs_sb = rpc_get_sb_net(net);
        if (pipefs_sb) {
index e811f39..82962f7 100644 (file)
@@ -129,48 +129,52 @@ static const struct file_operations tasks_fops = {
        .release        = tasks_release,
 };
 
-int
+void
 rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 {
-       int len, err;
+       int len;
        char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
+       struct rpc_xprt *xprt;
 
        /* Already registered? */
-       if (clnt->cl_debugfs)
-               return 0;
+       if (clnt->cl_debugfs || !rpc_clnt_dir)
+               return;
 
        len = snprintf(name, sizeof(name), "%x", clnt->cl_clid);
        if (len >= sizeof(name))
-               return -EINVAL;
+               return;
 
        /* make the per-client dir */
        clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir);
        if (!clnt->cl_debugfs)
-               return -ENOMEM;
+               return;
 
        /* make tasks file */
-       err = -ENOMEM;
        if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
                                 clnt, &tasks_fops))
                goto out_err;
 
-       err = -EINVAL;
        rcu_read_lock();
+       xprt = rcu_dereference(clnt->cl_xprt);
+       /* no "debugfs" dentry? Don't bother with the symlink. */
+       if (!xprt->debugfs) {
+               rcu_read_unlock();
+               return;
+       }
        len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
-                       rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name);
+                       xprt->debugfs->d_name.name);
        rcu_read_unlock();
+
        if (len >= sizeof(name))
                goto out_err;
 
-       err = -ENOMEM;
        if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name))
                goto out_err;
 
-       return 0;
+       return;
 out_err:
        debugfs_remove_recursive(clnt->cl_debugfs);
        clnt->cl_debugfs = NULL;
-       return err;
 }
 
 void
@@ -226,33 +230,33 @@ static const struct file_operations xprt_info_fops = {
        .release        = xprt_info_release,
 };
 
-int
+void
 rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
 {
        int len, id;
        static atomic_t cur_id;
        char            name[9]; /* 8 hex digits + NULL term */
 
+       if (!rpc_xprt_dir)
+               return;
+
        id = (unsigned int)atomic_inc_return(&cur_id);
 
        len = snprintf(name, sizeof(name), "%x", id);
        if (len >= sizeof(name))
-               return -EINVAL;
+               return;
 
        /* make the per-client dir */
        xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir);
        if (!xprt->debugfs)
-               return -ENOMEM;
+               return;
 
        /* make tasks file */
        if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs,
                                 xprt, &xprt_info_fops)) {
                debugfs_remove_recursive(xprt->debugfs);
                xprt->debugfs = NULL;
-               return -ENOMEM;
        }
-
-       return 0;
 }
 
 void
@@ -266,14 +270,17 @@ void __exit
 sunrpc_debugfs_exit(void)
 {
        debugfs_remove_recursive(topdir);
+       topdir = NULL;
+       rpc_clnt_dir = NULL;
+       rpc_xprt_dir = NULL;
 }
 
-int __init
+void __init
 sunrpc_debugfs_init(void)
 {
        topdir = debugfs_create_dir("sunrpc", NULL);
        if (!topdir)
-               goto out;
+               return;
 
        rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
        if (!rpc_clnt_dir)
@@ -283,10 +290,9 @@ sunrpc_debugfs_init(void)
        if (!rpc_xprt_dir)
                goto out_remove;
 
-       return 0;
+       return;
 out_remove:
        debugfs_remove_recursive(topdir);
        topdir = NULL;
-out:
-       return -ENOMEM;
+       rpc_clnt_dir = NULL;
 }
index e37fbed..ee5d3d2 100644 (file)
@@ -98,10 +98,7 @@ init_sunrpc(void)
        if (err)
                goto out4;
 
-       err = sunrpc_debugfs_init();
-       if (err)
-               goto out5;
-
+       sunrpc_debugfs_init();
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
        rpc_register_sysctl();
 #endif
@@ -109,8 +106,6 @@ init_sunrpc(void)
        init_socket_xprt();     /* clnt sock transport */
        return 0;
 
-out5:
-       unregister_rpc_pipefs();
 out4:
        unregister_pernet_subsys(&sunrpc_net_ops);
 out3:
index e3015ae..9949722 100644 (file)
@@ -1331,7 +1331,6 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
  */
 struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
-       int err;
        struct rpc_xprt *xprt;
        struct xprt_class *t;
 
@@ -1372,11 +1371,7 @@ found:
                return ERR_PTR(-ENOMEM);
        }
 
-       err = rpc_xprt_debugfs_register(xprt);
-       if (err) {
-               xprt_destroy(xprt);
-               return ERR_PTR(err);
-       }
+       rpc_xprt_debugfs_register(xprt);
 
        dprintk("RPC:       created transport %p with %u slots\n", xprt,
                        xprt->max_reqs);
index 935205e..be1c9fa 100644 (file)
@@ -152,11 +152,11 @@ out_netlink:
 static void __exit tipc_exit(void)
 {
        tipc_bearer_cleanup();
+       unregister_pernet_subsys(&tipc_net_ops);
        tipc_netlink_stop();
        tipc_netlink_compat_stop();
        tipc_socket_stop();
        tipc_unregister_sysctl();
-       unregister_pernet_subsys(&tipc_net_ops);
 
        pr_info("Deactivated\n");
 }
index a4cf364..14f09b3 100644 (file)
@@ -464,10 +464,11 @@ void tipc_link_reset(struct tipc_link *l_ptr)
        /* Clean up all queues, except inputq: */
        __skb_queue_purge(&l_ptr->outqueue);
        __skb_queue_purge(&l_ptr->deferred_queue);
-       skb_queue_splice_init(&l_ptr->wakeupq, &l_ptr->inputq);
-       if (!skb_queue_empty(&l_ptr->inputq))
+       if (!owner->inputq)
+               owner->inputq = &l_ptr->inputq;
+       skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq);
+       if (!skb_queue_empty(owner->inputq))
                owner->action_flags |= TIPC_MSG_EVT;
-       owner->inputq = &l_ptr->inputq;
        l_ptr->next_out = NULL;
        l_ptr->unacked_window = 0;
        l_ptr->checkpoint = 1;
index be25015..b6f84f6 100644 (file)
@@ -4400,6 +4400,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
        if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params))
                return -EINVAL;
 
+       /* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT
+        * as userspace might just pass through the capabilities from the IEs
+        * directly, rather than enforcing this restriction and returning an
+        * error in this case.
+        */
+       if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) {
+               params.ht_capa = NULL;
+               params.vht_capa = NULL;
+       }
+
        /* When you run into this, adjust the code below for the new flag */
        BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
 
index cee479b..638af06 100644 (file)
@@ -2269,11 +2269,9 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
                 * have the xfrm_state's. We need to wait for KM to
                 * negotiate new SA's or bail out with error.*/
                if (net->xfrm.sysctl_larval_drop) {
-                       dst_release(dst);
-                       xfrm_pols_put(pols, drop_pols);
                        XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-
-                       return ERR_PTR(-EREMOTE);
+                       err = -EREMOTE;
+                       goto error;
                }
 
                err = -EAGAIN;
@@ -2324,7 +2322,8 @@ nopol:
 error:
        dst_release(dst);
 dropdst:
-       dst_release(dst_orig);
+       if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
+               dst_release(dst_orig);
        xfrm_pols_put(pols, drop_pols);
        return ERR_PTR(err);
 }
@@ -2338,7 +2337,8 @@ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
                                    struct sock *sk, int flags)
 {
        struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
-                                           flags | XFRM_LOOKUP_QUEUE);
+                                           flags | XFRM_LOOKUP_QUEUE |
+                                           XFRM_LOOKUP_KEEP_DST_REF);
 
        if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
                return make_blackhole(net, dst_orig->ops->family, dst_orig);
index 1684bcc..5fde343 100644 (file)
@@ -152,7 +152,7 @@ static ssize_t sel_write_enforce(struct file *file, const char __user *buf,
                goto out;
 
        /* No partial writes. */
-       length = EINVAL;
+       length = -EINVAL;
        if (*ppos != 0)
                goto out;
 
index 35324a8..eeb691d 100644 (file)
@@ -1170,6 +1170,10 @@ static int snd_ctl_elem_add(struct snd_ctl_file *file,
 
        if (info->count < 1)
                return -EINVAL;
+       if (!*info->id.name)
+               return -EINVAL;
+       if (strnlen(info->id.name, sizeof(info->id.name)) >= sizeof(info->id.name))
+               return -EINVAL;
        access = info->access == 0 ? SNDRV_CTL_ELEM_ACCESS_READWRITE :
                (info->access & (SNDRV_CTL_ELEM_ACCESS_READWRITE|
                                 SNDRV_CTL_ELEM_ACCESS_INACTIVE|
index a422aaa..9ee25a6 100644 (file)
@@ -96,10 +96,10 @@ int snd_bebob_maudio_load_firmware(struct fw_unit *unit)
        struct fw_device *device = fw_parent_device(unit);
        int err, rcode;
        u64 date;
-       __be32 cues[3] = {
-               MAUDIO_BOOTLOADER_CUE1,
-               MAUDIO_BOOTLOADER_CUE2,
-               MAUDIO_BOOTLOADER_CUE3
+       __le32 cues[3] = {
+               cpu_to_le32(MAUDIO_BOOTLOADER_CUE1),
+               cpu_to_le32(MAUDIO_BOOTLOADER_CUE2),
+               cpu_to_le32(MAUDIO_BOOTLOADER_CUE3)
        };
 
        /* check date of software used to build */
index de7602b..27b044f 100644 (file)
  */
 #define RX_ISOCHRONOUS                 0x008
 
+/*
+ * Index of first quadlet to be interpreted; read/write.  If > 0, that many
+ * quadlets at the beginning of each data block will be ignored, and all the
+ * audio and MIDI quadlets will follow.
+ */
+#define RX_SEQ_START                   0x00c
+
 /*
  * The number of audio channels; read-only.  There will be one quadlet per
  * channel.
  */
-#define RX_NUMBER_AUDIO                        0x00c
+#define RX_NUMBER_AUDIO                        0x010
 
 /*
  * The number of MIDI ports, 0-8; read-only.  If > 0, there will be one
  * additional quadlet in each data block, following the audio quadlets.
  */
-#define RX_NUMBER_MIDI                 0x010
-
-/*
- * Index of first quadlet to be interpreted; read/write.  If > 0, that many
- * quadlets at the beginning of each data block will be ignored, and all the
- * audio and MIDI quadlets will follow.
- */
-#define RX_SEQ_START                   0x014
+#define RX_NUMBER_MIDI                 0x014
 
 /*
  * Names of all audio channels; read-only.  Quadlets are byte-swapped.  Names
index ecfe20f..f5c1d1b 100644 (file)
@@ -99,9 +99,9 @@ static void dice_proc_read(struct snd_info_entry *entry,
                } tx;
                struct {
                        u32 iso;
+                       u32 seq_start;
                        u32 number_audio;
                        u32 number_midi;
-                       u32 seq_start;
                        char names[RX_NAMES_SIZE];
                        u32 ac3_caps;
                        u32 ac3_enable;
@@ -204,10 +204,10 @@ static void dice_proc_read(struct snd_info_entry *entry,
                        break;
                snd_iprintf(buffer, "rx %u:\n", stream);
                snd_iprintf(buffer, "  iso channel: %d\n", (int)buf.rx.iso);
+               snd_iprintf(buffer, "  sequence start: %u\n", buf.rx.seq_start);
                snd_iprintf(buffer, "  audio channels: %u\n",
                            buf.rx.number_audio);
                snd_iprintf(buffer, "  midi ports: %u\n", buf.rx.number_midi);
-               snd_iprintf(buffer, "  sequence start: %u\n", buf.rx.seq_start);
                if (quadlets >= 68) {
                        dice_proc_fixup_string(buf.rx.names, RX_NAMES_SIZE);
                        snd_iprintf(buffer, "  names: %s\n", buf.rx.names);
index 5f17b77..f0e4d50 100644 (file)
@@ -26,7 +26,7 @@
 int fw_iso_resources_init(struct fw_iso_resources *r, struct fw_unit *unit)
 {
        r->channels_mask = ~0uLL;
-       r->unit = fw_unit_get(unit);
+       r->unit = unit;
        mutex_init(&r->mutex);
        r->allocated = false;
 
@@ -42,7 +42,6 @@ void fw_iso_resources_destroy(struct fw_iso_resources *r)
 {
        WARN_ON(r->allocated);
        mutex_destroy(&r->mutex);
-       fw_unit_put(r->unit);
 }
 EXPORT_SYMBOL(fw_iso_resources_destroy);
 
index a2ce773..17c2637 100644 (file)
@@ -1164,7 +1164,7 @@ static unsigned int azx_rirb_get_response(struct hda_bus *bus,
                }
        }
 
-       if (!bus->no_response_fallback)
+       if (bus->no_response_fallback)
                return -1;
 
        if (!chip->polling_mode && chip->poll_count < 2) {
index b680b4e..8ec5289 100644 (file)
@@ -687,12 +687,45 @@ static int get_amp_val_to_activate(struct hda_codec *codec, hda_nid_t nid,
        return val;
 }
 
+/* is this a stereo widget or a stereo-to-mono mix? */
+static bool is_stereo_amps(struct hda_codec *codec, hda_nid_t nid, int dir)
+{
+       unsigned int wcaps = get_wcaps(codec, nid);
+       hda_nid_t conn;
+
+       if (wcaps & AC_WCAP_STEREO)
+               return true;
+       if (dir != HDA_INPUT || get_wcaps_type(wcaps) != AC_WID_AUD_MIX)
+               return false;
+       if (snd_hda_get_num_conns(codec, nid) != 1)
+               return false;
+       if (snd_hda_get_connections(codec, nid, &conn, 1) < 0)
+               return false;
+       return !!(get_wcaps(codec, conn) & AC_WCAP_STEREO);
+}
+
 /* initialize the amp value (only at the first time) */
 static void init_amp(struct hda_codec *codec, hda_nid_t nid, int dir, int idx)
 {
        unsigned int caps = query_amp_caps(codec, nid, dir);
        int val = get_amp_val_to_activate(codec, nid, dir, caps, false);
-       snd_hda_codec_amp_init_stereo(codec, nid, dir, idx, 0xff, val);
+
+       if (is_stereo_amps(codec, nid, dir))
+               snd_hda_codec_amp_init_stereo(codec, nid, dir, idx, 0xff, val);
+       else
+               snd_hda_codec_amp_init(codec, nid, 0, dir, idx, 0xff, val);
+}
+
+/* update the amp, doing in stereo or mono depending on NID */
+static int update_amp(struct hda_codec *codec, hda_nid_t nid, int dir, int idx,
+                     unsigned int mask, unsigned int val)
+{
+       if (is_stereo_amps(codec, nid, dir))
+               return snd_hda_codec_amp_stereo(codec, nid, dir, idx,
+                                               mask, val);
+       else
+               return snd_hda_codec_amp_update(codec, nid, 0, dir, idx,
+                                               mask, val);
 }
 
 /* calculate amp value mask we can modify;
@@ -732,7 +765,7 @@ static void activate_amp(struct hda_codec *codec, hda_nid_t nid, int dir,
                return;
 
        val &= mask;
-       snd_hda_codec_amp_stereo(codec, nid, dir, idx, mask, val);
+       update_amp(codec, nid, dir, idx, mask, val);
 }
 
 static void activate_amp_out(struct hda_codec *codec, struct nid_path *path,
@@ -4424,13 +4457,11 @@ static void mute_all_mixer_nid(struct hda_codec *codec, hda_nid_t mix)
        has_amp = nid_has_mute(codec, mix, HDA_INPUT);
        for (i = 0; i < nums; i++) {
                if (has_amp)
-                       snd_hda_codec_amp_stereo(codec, mix,
-                                                HDA_INPUT, i,
-                                                0xff, HDA_AMP_MUTE);
+                       update_amp(codec, mix, HDA_INPUT, i,
+                                  0xff, HDA_AMP_MUTE);
                else if (nid_has_volume(codec, conn[i], HDA_OUTPUT))
-                       snd_hda_codec_amp_stereo(codec, conn[i],
-                                                HDA_OUTPUT, 0,
-                                                0xff, HDA_AMP_MUTE);
+                       update_amp(codec, conn[i], HDA_OUTPUT, 0,
+                                  0xff, HDA_AMP_MUTE);
        }
 }
 
index 4ca3d5d..a8a1e14 100644 (file)
@@ -1989,7 +1989,7 @@ static const struct pci_device_id azx_ids[] = {
          .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH },
        /* Sunrise Point */
        { PCI_DEVICE(0x8086, 0xa170),
-         .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH },
+         .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
        /* Sunrise Point-LP */
        { PCI_DEVICE(0x8086, 0x9d70),
          .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
index ce5a6da..05e19f7 100644 (file)
@@ -134,13 +134,38 @@ static void print_amp_caps(struct snd_info_buffer *buffer,
                    (caps & AC_AMPCAP_MUTE) >> AC_AMPCAP_MUTE_SHIFT);
 }
 
+/* is this a stereo widget or a stereo-to-mono mix? */
+static bool is_stereo_amps(struct hda_codec *codec, hda_nid_t nid,
+                          int dir, unsigned int wcaps, int indices)
+{
+       hda_nid_t conn;
+
+       if (wcaps & AC_WCAP_STEREO)
+               return true;
+       /* check for a stereo-to-mono mix; it must be:
+        * only a single connection, only for input, and only a mixer widget
+        */
+       if (indices != 1 || dir != HDA_INPUT ||
+           get_wcaps_type(wcaps) != AC_WID_AUD_MIX)
+               return false;
+
+       if (snd_hda_get_raw_connections(codec, nid, &conn, 1) < 0)
+               return false;
+       /* the connection source is a stereo? */
+       wcaps = snd_hda_param_read(codec, conn, AC_PAR_AUDIO_WIDGET_CAP);
+       return !!(wcaps & AC_WCAP_STEREO);
+}
+
 static void print_amp_vals(struct snd_info_buffer *buffer,
                           struct hda_codec *codec, hda_nid_t nid,
-                          int dir, int stereo, int indices)
+                          int dir, unsigned int wcaps, int indices)
 {
        unsigned int val;
+       bool stereo;
        int i;
 
+       stereo = is_stereo_amps(codec, nid, dir, wcaps, indices);
+
        dir = dir == HDA_OUTPUT ? AC_AMP_GET_OUTPUT : AC_AMP_GET_INPUT;
        for (i = 0; i < indices; i++) {
                snd_iprintf(buffer, " [");
@@ -757,12 +782,10 @@ static void print_codec_info(struct snd_info_entry *entry,
                            (codec->single_adc_amp &&
                             wid_type == AC_WID_AUD_IN))
                                print_amp_vals(buffer, codec, nid, HDA_INPUT,
-                                              wid_caps & AC_WCAP_STEREO,
-                                              1);
+                                              wid_caps, 1);
                        else
                                print_amp_vals(buffer, codec, nid, HDA_INPUT,
-                                              wid_caps & AC_WCAP_STEREO,
-                                              conn_len);
+                                              wid_caps, conn_len);
                }
                if (wid_caps & AC_WCAP_OUT_AMP) {
                        snd_iprintf(buffer, "  Amp-Out caps: ");
@@ -771,11 +794,10 @@ static void print_codec_info(struct snd_info_entry *entry,
                        if (wid_type == AC_WID_PIN &&
                            codec->pin_amp_workaround)
                                print_amp_vals(buffer, codec, nid, HDA_OUTPUT,
-                                              wid_caps & AC_WCAP_STEREO,
-                                              conn_len);
+                                              wid_caps, conn_len);
                        else
                                print_amp_vals(buffer, codec, nid, HDA_OUTPUT,
-                                              wid_caps & AC_WCAP_STEREO, 1);
+                                              wid_caps, 1);
                }
 
                switch (wid_type) {
index 1589c9b..dd2b3d9 100644 (file)
@@ -393,6 +393,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = {
        SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81),
        SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122),
        SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101),
+       SND_PCI_QUIRK(0x106b, 0x5600, "MacBookAir 5,2", CS420X_MBP81),
        SND_PCI_QUIRK(0x106b, 0x5b00, "MacBookAir 4,2", CS420X_MBA42),
        SND_PCI_QUIRK_VENDOR(0x106b, "Apple", CS420X_APPLE),
        {} /* terminator */
@@ -584,6 +585,7 @@ static int patch_cs420x(struct hda_codec *codec)
                return -ENOMEM;
 
        spec->gen.automute_hook = cs_automute;
+       codec->single_adc_amp = 1;
 
        snd_hda_pick_fixup(codec, cs420x_models, cs420x_fixup_tbl,
                           cs420x_fixups);
index fd3ed18..da67ea8 100644 (file)
@@ -223,6 +223,7 @@ enum {
        CXT_PINCFG_LENOVO_TP410,
        CXT_PINCFG_LEMOTE_A1004,
        CXT_PINCFG_LEMOTE_A1205,
+       CXT_PINCFG_COMPAQ_CQ60,
        CXT_FIXUP_STEREO_DMIC,
        CXT_FIXUP_INC_MIC_BOOST,
        CXT_FIXUP_HEADPHONE_MIC_PIN,
@@ -660,6 +661,15 @@ static const struct hda_fixup cxt_fixups[] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = cxt_pincfg_lemote,
        },
+       [CXT_PINCFG_COMPAQ_CQ60] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       /* 0x17 was falsely set up as a mic, it should 0x1d */
+                       { 0x17, 0x400001f0 },
+                       { 0x1d, 0x97a70120 },
+                       { }
+               }
+       },
        [CXT_FIXUP_STEREO_DMIC] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cxt_fixup_stereo_dmic,
@@ -769,6 +779,7 @@ static const struct hda_model_fixup cxt5047_fixup_models[] = {
 };
 
 static const struct snd_pci_quirk cxt5051_fixups[] = {
+       SND_PCI_QUIRK(0x103c, 0x360b, "Compaq CQ60", CXT_PINCFG_COMPAQ_CQ60),
        SND_PCI_QUIRK(0x17aa, 0x20f2, "Lenovo X200", CXT_PINCFG_LENOVO_X200),
        {}
 };
index 526398a..f9d12c0 100644 (file)
@@ -396,7 +396,7 @@ static void alc_auto_setup_eapd(struct hda_codec *codec, bool on)
 {
        /* We currently only handle front, HP */
        static hda_nid_t pins[] = {
-               0x0f, 0x10, 0x14, 0x15, 0
+               0x0f, 0x10, 0x14, 0x15, 0x17, 0
        };
        hda_nid_t *p;
        for (p = pins; *p; p++)
@@ -2912,6 +2912,8 @@ static void alc283_init(struct hda_codec *codec)
 
        if (!hp_pin)
                return;
+
+       msleep(30);
        hp_pin_sense = snd_hda_jack_detect(codec, hp_pin);
 
        /* Index 0x43 Direct Drive HP AMP LPM Control 1 */
@@ -3607,6 +3609,7 @@ static void alc_headset_mode_unplugged(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                break;
        case 0x10ec0233:
@@ -3662,6 +3665,7 @@ static void alc_headset_mode_mic_in(struct hda_codec *codec, hda_nid_t hp_pin,
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_write_coef_idx(codec, 0x45, 0xc489);
                snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
                alc_process_coef_fw(codec, coef0255);
@@ -3731,6 +3735,7 @@ static void alc_headset_mode_default(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                break;
        case 0x10ec0233:
@@ -3785,6 +3790,7 @@ static void alc_headset_mode_ctia(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                break;
        case 0x10ec0233:
@@ -3839,6 +3845,7 @@ static void alc_headset_mode_omtp(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                break;
        case 0x10ec0233:
@@ -3884,6 +3891,7 @@ static void alc_determine_headset_type(struct hda_codec *codec)
 
        switch (codec->vendor_id) {
        case 0x10ec0255:
+       case 0x10ec0256:
                alc_process_coef_fw(codec, coef0255);
                msleep(300);
                val = alc_read_coef_idx(codec, 0x46);
@@ -4364,6 +4372,7 @@ enum {
        ALC269_FIXUP_QUANTA_MUTE,
        ALC269_FIXUP_LIFEBOOK,
        ALC269_FIXUP_LIFEBOOK_EXTMIC,
+       ALC269_FIXUP_LIFEBOOK_HP_PIN,
        ALC269_FIXUP_AMIC,
        ALC269_FIXUP_DMIC,
        ALC269VB_FIXUP_AMIC,
@@ -4517,6 +4526,13 @@ static const struct hda_fixup alc269_fixups[] = {
                        { }
                },
        },
+       [ALC269_FIXUP_LIFEBOOK_HP_PIN] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x21, 0x0221102f }, /* HP out */
+                       { }
+               },
+       },
        [ALC269_FIXUP_AMIC] = {
                .type = HDA_FIXUP_PINS,
                .v.pins = (const struct hda_pintbl[]) {
@@ -5010,6 +5026,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x104d, 0x9084, "Sony VAIO", ALC275_FIXUP_SONY_HWEQ),
        SND_PCI_QUIRK(0x104d, 0x9099, "Sony VAIO S13", ALC275_FIXUP_SONY_DISABLE_AAMIX),
        SND_PCI_QUIRK(0x10cf, 0x1475, "Lifebook", ALC269_FIXUP_LIFEBOOK),
+       SND_PCI_QUIRK(0x10cf, 0x15dc, "Lifebook T731", ALC269_FIXUP_LIFEBOOK_HP_PIN),
        SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC),
        SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC),
@@ -5036,6 +5053,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x501a, "Thinkpad", ALC283_FIXUP_INT_MIC),
        SND_PCI_QUIRK(0x17aa, 0x501e, "Thinkpad L440", ALC292_FIXUP_TPT440_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x5026, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+       SND_PCI_QUIRK(0x17aa, 0x5036, "Thinkpad T450s", ALC292_FIXUP_TPT440_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x5109, "Thinkpad", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x17aa, 0x3bf8, "Quanta FL1", ALC269_FIXUP_PCM_44K),
        SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD),
@@ -5216,6 +5234,16 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
                {0x17, 0x40000000},
                {0x1d, 0x40700001},
                {0x21, 0x02211050}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x12, 0x90a60140},
+               {0x13, 0x40000000},
+               {0x14, 0x90170110},
+               {0x19, 0x411111f0},
+               {0x1a, 0x411111f0},
+               {0x1b, 0x411111f0},
+               {0x1d, 0x40700001},
+               {0x1e, 0x411111f0},
+               {0x21, 0x02211020}),
        SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
                {0x12, 0x90a60130},
                {0x13, 0x40000000},
index b67480f..4373ada 100644 (file)
@@ -317,7 +317,7 @@ static int adav80x_put_deemph(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct adav80x *adav80x = snd_soc_codec_get_drvdata(codec);
-       unsigned int deemph = ucontrol->value.enumerated.item[0];
+       unsigned int deemph = ucontrol->value.integer.value[0];
 
        if (deemph > 1)
                return -EINVAL;
@@ -333,7 +333,7 @@ static int adav80x_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct adav80x *adav80x = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = adav80x->deemph;
+       ucontrol->value.integer.value[0] = adav80x->deemph;
        return 0;
 };
 
index 70861c7..81b54a2 100644 (file)
@@ -76,7 +76,7 @@ static int ak4641_put_deemph(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct ak4641_priv *ak4641 = snd_soc_codec_get_drvdata(codec);
-       int deemph = ucontrol->value.enumerated.item[0];
+       int deemph = ucontrol->value.integer.value[0];
 
        if (deemph > 1)
                return -EINVAL;
@@ -92,7 +92,7 @@ static int ak4641_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct ak4641_priv *ak4641 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = ak4641->deemph;
+       ucontrol->value.integer.value[0] = ak4641->deemph;
        return 0;
 };
 
index 632e89f..2a58b1d 100644 (file)
@@ -343,25 +343,25 @@ static const struct snd_soc_dapm_widget ak4671_dapm_widgets[] = {
 };
 
 static const struct snd_soc_dapm_route ak4671_intercon[] = {
-       {"DAC Left", "NULL", "PMPLL"},
-       {"DAC Right", "NULL", "PMPLL"},
-       {"ADC Left", "NULL", "PMPLL"},
-       {"ADC Right", "NULL", "PMPLL"},
+       {"DAC Left", NULL, "PMPLL"},
+       {"DAC Right", NULL, "PMPLL"},
+       {"ADC Left", NULL, "PMPLL"},
+       {"ADC Right", NULL, "PMPLL"},
 
        /* Outputs */
-       {"LOUT1", "NULL", "LOUT1 Mixer"},
-       {"ROUT1", "NULL", "ROUT1 Mixer"},
-       {"LOUT2", "NULL", "LOUT2 Mix Amp"},
-       {"ROUT2", "NULL", "ROUT2 Mix Amp"},
-       {"LOUT3", "NULL", "LOUT3 Mixer"},
-       {"ROUT3", "NULL", "ROUT3 Mixer"},
+       {"LOUT1", NULL, "LOUT1 Mixer"},
+       {"ROUT1", NULL, "ROUT1 Mixer"},
+       {"LOUT2", NULL, "LOUT2 Mix Amp"},
+       {"ROUT2", NULL, "ROUT2 Mix Amp"},
+       {"LOUT3", NULL, "LOUT3 Mixer"},
+       {"ROUT3", NULL, "ROUT3 Mixer"},
 
        {"LOUT1 Mixer", "DACL", "DAC Left"},
        {"ROUT1 Mixer", "DACR", "DAC Right"},
        {"LOUT2 Mixer", "DACHL", "DAC Left"},
        {"ROUT2 Mixer", "DACHR", "DAC Right"},
-       {"LOUT2 Mix Amp", "NULL", "LOUT2 Mixer"},
-       {"ROUT2 Mix Amp", "NULL", "ROUT2 Mixer"},
+       {"LOUT2 Mix Amp", NULL, "LOUT2 Mixer"},
+       {"ROUT2 Mix Amp", NULL, "ROUT2 Mixer"},
        {"LOUT3 Mixer", "DACSL", "DAC Left"},
        {"ROUT3 Mixer", "DACSR", "DAC Right"},
 
@@ -381,18 +381,18 @@ static const struct snd_soc_dapm_route ak4671_intercon[] = {
        {"LIN2", NULL, "Mic Bias"},
        {"RIN2", NULL, "Mic Bias"},
 
-       {"ADC Left", "NULL", "LIN MUX"},
-       {"ADC Right", "NULL", "RIN MUX"},
+       {"ADC Left", NULL, "LIN MUX"},
+       {"ADC Right", NULL, "RIN MUX"},
 
        /* Analog Loops */
-       {"LIN1 Mixing Circuit", "NULL", "LIN1"},
-       {"RIN1 Mixing Circuit", "NULL", "RIN1"},
-       {"LIN2 Mixing Circuit", "NULL", "LIN2"},
-       {"RIN2 Mixing Circuit", "NULL", "RIN2"},
-       {"LIN3 Mixing Circuit", "NULL", "LIN3"},
-       {"RIN3 Mixing Circuit", "NULL", "RIN3"},
-       {"LIN4 Mixing Circuit", "NULL", "LIN4"},
-       {"RIN4 Mixing Circuit", "NULL", "RIN4"},
+       {"LIN1 Mixing Circuit", NULL, "LIN1"},
+       {"RIN1 Mixing Circuit", NULL, "RIN1"},
+       {"LIN2 Mixing Circuit", NULL, "LIN2"},
+       {"RIN2 Mixing Circuit", NULL, "RIN2"},
+       {"LIN3 Mixing Circuit", NULL, "LIN3"},
+       {"RIN3 Mixing Circuit", NULL, "RIN3"},
+       {"LIN4 Mixing Circuit", NULL, "LIN4"},
+       {"RIN4 Mixing Circuit", NULL, "RIN4"},
 
        {"LOUT1 Mixer", "LINL1", "LIN1 Mixing Circuit"},
        {"ROUT1 Mixer", "RINR1", "RIN1 Mixing Circuit"},
index 79a4efc..7d3a6ac 100644 (file)
@@ -286,7 +286,7 @@ static int cs4271_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct cs4271_private *cs4271 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = cs4271->deemph;
+       ucontrol->value.integer.value[0] = cs4271->deemph;
        return 0;
 }
 
@@ -296,7 +296,7 @@ static int cs4271_put_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct cs4271_private *cs4271 = snd_soc_codec_get_drvdata(codec);
 
-       cs4271->deemph = ucontrol->value.enumerated.item[0];
+       cs4271->deemph = ucontrol->value.integer.value[0];
        return cs4271_set_deemph(codec);
 }
 
index ffe9617..911c26c 100644 (file)
@@ -876,11 +876,11 @@ static const struct snd_soc_dapm_widget da732x_dapm_widgets[] = {
 
 static const struct snd_soc_dapm_route da732x_dapm_routes[] = {
        /* Inputs */
-       {"AUX1L PGA", "NULL", "AUX1L"},
-       {"AUX1R PGA", "NULL", "AUX1R"},
+       {"AUX1L PGA", NULL, "AUX1L"},
+       {"AUX1R PGA", NULL, "AUX1R"},
        {"MIC1 PGA", NULL, "MIC1"},
-       {"MIC2 PGA", "NULL", "MIC2"},
-       {"MIC3 PGA", "NULL", "MIC3"},
+       {"MIC2 PGA", NULL, "MIC2"},
+       {"MIC3 PGA", NULL, "MIC3"},
 
        /* Capture Path */
        {"ADC1 Left MUX", "MIC1", "MIC1 PGA"},
index f273251..c5f35a0 100644 (file)
@@ -120,7 +120,7 @@ static int es8328_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = es8328->deemph;
+       ucontrol->value.integer.value[0] = es8328->deemph;
        return 0;
 }
 
@@ -129,7 +129,7 @@ static int es8328_put_deemph(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct es8328_priv *es8328 = snd_soc_codec_get_drvdata(codec);
-       int deemph = ucontrol->value.enumerated.item[0];
+       int deemph = ucontrol->value.integer.value[0];
        int ret;
 
        if (deemph > 1)
index a722a02..477e13d 100644 (file)
@@ -118,7 +118,7 @@ static int pcm1681_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct pcm1681_private *priv = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = priv->deemph;
+       ucontrol->value.integer.value[0] = priv->deemph;
 
        return 0;
 }
@@ -129,7 +129,7 @@ static int pcm1681_put_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct pcm1681_private *priv = snd_soc_codec_get_drvdata(codec);
 
-       priv->deemph = ucontrol->value.enumerated.item[0];
+       priv->deemph = ucontrol->value.integer.value[0];
 
        return pcm1681_set_deemph(codec);
 }
index 9974f20..474cae8 100644 (file)
@@ -1156,25 +1156,6 @@ static int pcm512x_hw_params(struct snd_pcm_substream *substream,
                                ret, pcm512x->pll_out);
                        return ret;
                }
-
-               gpio = PCM512x_G1OE << (4 - 1);
-               ret = regmap_update_bits(pcm512x->regmap, PCM512x_GPIO_EN,
-                                        gpio, gpio);
-               if (ret != 0) {
-                       dev_err(codec->dev, "Failed to enable gpio %d: %d\n",
-                               4, ret);
-                       return ret;
-               }
-
-               gpio = PCM512x_GPIO_OUTPUT_1 + 4 - 1;
-               ret = regmap_update_bits(pcm512x->regmap, gpio,
-                                        PCM512x_GxSL, PCM512x_GxSL_PLLLK);
-               if (ret != 0) {
-                       dev_err(codec->dev,
-                               "Failed to output pll lock on %d: %d\n",
-                               ret, 4);
-                       return ret;
-               }
        }
 
        ret = regmap_update_bits(pcm512x->regmap, PCM512x_SYNCHRONIZE,
index f374840..9b541e5 100644 (file)
@@ -1198,7 +1198,7 @@ static struct dmi_system_id dmi_dell_dino[] = {
                .ident = "Dell Dino",
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
-                       DMI_MATCH(DMI_BOARD_NAME, "0144P8")
+                       DMI_MATCH(DMI_PRODUCT_NAME, "XPS 13 9343")
                }
        },
        { }
index e182e65..3593a14 100644 (file)
@@ -1151,13 +1151,7 @@ static int sgtl5000_set_power_regs(struct snd_soc_codec *codec)
                /* Enable VDDC charge pump */
                ana_pwr |= SGTL5000_VDDC_CHRGPMP_POWERUP;
        } else if (vddio >= 3100 && vdda >= 3100) {
-               /*
-                * if vddio and vddd > 3.1v,
-                * charge pump should be clean before set ana_pwr
-                */
-               snd_soc_update_bits(codec, SGTL5000_CHIP_ANA_POWER,
-                               SGTL5000_VDDC_CHRGPMP_POWERUP, 0);
-
+               ana_pwr &= ~SGTL5000_VDDC_CHRGPMP_POWERUP;
                /* VDDC use VDDIO rail */
                lreg_ctrl |= SGTL5000_VDDC_ASSN_OVRD;
                lreg_ctrl |= SGTL5000_VDDC_MAN_ASSN_VDDIO <<
index 47b257e..82095d6 100644 (file)
@@ -538,8 +538,8 @@ static const struct snd_soc_dapm_route sn95031_audio_map[] = {
        /* speaker map */
        { "IHFOUTL", NULL, "Speaker Rail"},
        { "IHFOUTR", NULL, "Speaker Rail"},
-       { "IHFOUTL", "NULL", "Speaker Left Playback"},
-       { "IHFOUTR", "NULL", "Speaker Right Playback"},
+       { "IHFOUTL", NULL, "Speaker Left Playback"},
+       { "IHFOUTR", NULL, "Speaker Right Playback"},
        { "Speaker Left Playback", NULL, "Speaker Left Filter"},
        { "Speaker Right Playback", NULL, "Speaker Right Filter"},
        { "Speaker Left Filter", NULL, "IHFDAC Left"},
index 249ef5c..32942be 100644 (file)
@@ -281,7 +281,7 @@ static int tas5086_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct tas5086_private *priv = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = priv->deemph;
+       ucontrol->value.integer.value[0] = priv->deemph;
 
        return 0;
 }
@@ -292,7 +292,7 @@ static int tas5086_put_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct tas5086_private *priv = snd_soc_codec_get_drvdata(codec);
 
-       priv->deemph = ucontrol->value.enumerated.item[0];
+       priv->deemph = ucontrol->value.integer.value[0];
 
        return tas5086_set_deemph(codec);
 }
index 8d9de49..21d5402 100644 (file)
@@ -610,7 +610,7 @@ static int wm2000_anc_mode_get(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev);
 
-       ucontrol->value.enumerated.item[0] = wm2000->anc_active;
+       ucontrol->value.integer.value[0] = wm2000->anc_active;
 
        return 0;
 }
@@ -620,7 +620,7 @@ static int wm2000_anc_mode_put(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev);
-       int anc_active = ucontrol->value.enumerated.item[0];
+       int anc_active = ucontrol->value.integer.value[0];
        int ret;
 
        if (anc_active > 1)
@@ -643,7 +643,7 @@ static int wm2000_speaker_get(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev);
 
-       ucontrol->value.enumerated.item[0] = wm2000->spk_ena;
+       ucontrol->value.integer.value[0] = wm2000->spk_ena;
 
        return 0;
 }
@@ -653,7 +653,7 @@ static int wm2000_speaker_put(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm2000_priv *wm2000 = dev_get_drvdata(codec->dev);
-       int val = ucontrol->value.enumerated.item[0];
+       int val = ucontrol->value.integer.value[0];
        int ret;
 
        if (val > 1)
index 098c143..c6d1053 100644 (file)
@@ -125,7 +125,7 @@ static int wm8731_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8731_priv *wm8731 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = wm8731->deemph;
+       ucontrol->value.integer.value[0] = wm8731->deemph;
 
        return 0;
 }
@@ -135,7 +135,7 @@ static int wm8731_put_deemph(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8731_priv *wm8731 = snd_soc_codec_get_drvdata(codec);
-       int deemph = ucontrol->value.enumerated.item[0];
+       int deemph = ucontrol->value.integer.value[0];
        int ret = 0;
 
        if (deemph > 1)
index dde462c..04b04f8 100644 (file)
@@ -442,7 +442,7 @@ static int wm8903_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8903_priv *wm8903 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = wm8903->deemph;
+       ucontrol->value.integer.value[0] = wm8903->deemph;
 
        return 0;
 }
@@ -452,7 +452,7 @@ static int wm8903_put_deemph(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8903_priv *wm8903 = snd_soc_codec_get_drvdata(codec);
-       int deemph = ucontrol->value.enumerated.item[0];
+       int deemph = ucontrol->value.integer.value[0];
        int ret = 0;
 
        if (deemph > 1)
index d3b3f57..215e93c 100644 (file)
@@ -525,7 +525,7 @@ static int wm8904_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = wm8904->deemph;
+       ucontrol->value.integer.value[0] = wm8904->deemph;
        return 0;
 }
 
@@ -534,7 +534,7 @@ static int wm8904_put_deemph(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8904_priv *wm8904 = snd_soc_codec_get_drvdata(codec);
-       int deemph = ucontrol->value.enumerated.item[0];
+       int deemph = ucontrol->value.integer.value[0];
 
        if (deemph > 1)
                return -EINVAL;
index 1ab2d46..00bec91 100644 (file)
@@ -393,7 +393,7 @@ static int wm8955_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = wm8955->deemph;
+       ucontrol->value.integer.value[0] = wm8955->deemph;
        return 0;
 }
 
@@ -402,7 +402,7 @@ static int wm8955_put_deemph(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8955_priv *wm8955 = snd_soc_codec_get_drvdata(codec);
-       int deemph = ucontrol->value.enumerated.item[0];
+       int deemph = ucontrol->value.integer.value[0];
 
        if (deemph > 1)
                return -EINVAL;
index cf8fecf..3035d98 100644 (file)
@@ -184,7 +184,7 @@ static int wm8960_get_deemph(struct snd_kcontrol *kcontrol,
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8960_priv *wm8960 = snd_soc_codec_get_drvdata(codec);
 
-       ucontrol->value.enumerated.item[0] = wm8960->deemph;
+       ucontrol->value.integer.value[0] = wm8960->deemph;
        return 0;
 }
 
@@ -193,7 +193,7 @@ static int wm8960_put_deemph(struct snd_kcontrol *kcontrol,
 {
        struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
        struct wm8960_priv *wm8960 = snd_soc_codec_get_drvdata(codec);
-       int deemph = ucontrol->value.enumerated.item[0];
+       int deemph = ucontrol->value.integer.value[0];
 
        if (deemph > 1)
                return -EINVAL;
index 9517571..98c9525 100644 (file)
@@ -180,7 +180,7 @@ static int wm9712_hp_mixer_put(struct snd_kcontrol *kcontrol,
        struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol);
        struct snd_soc_codec *codec = snd_soc_dapm_to_codec(dapm);
        struct wm9712_priv *wm9712 = snd_soc_codec_get_drvdata(codec);
-       unsigned int val = ucontrol->value.enumerated.item[0];
+       unsigned int val = ucontrol->value.integer.value[0];
        struct soc_mixer_control *mc =
                (struct soc_mixer_control *)kcontrol->private_value;
        unsigned int mixer, mask, shift, old;
@@ -193,7 +193,7 @@ static int wm9712_hp_mixer_put(struct snd_kcontrol *kcontrol,
 
        mutex_lock(&wm9712->lock);
        old = wm9712->hp_mixer[mixer];
-       if (ucontrol->value.enumerated.item[0])
+       if (ucontrol->value.integer.value[0])
                wm9712->hp_mixer[mixer] |= mask;
        else
                wm9712->hp_mixer[mixer] &= ~mask;
@@ -231,7 +231,7 @@ static int wm9712_hp_mixer_get(struct snd_kcontrol *kcontrol,
        mixer = mc->shift >> 8;
        shift = mc->shift & 0xff;
 
-       ucontrol->value.enumerated.item[0] =
+       ucontrol->value.integer.value[0] =
                (wm9712->hp_mixer[mixer] >> shift) & 1;
 
        return 0;
index 6822291..7955295 100644 (file)
@@ -255,7 +255,7 @@ static int wm9713_hp_mixer_put(struct snd_kcontrol *kcontrol,
        struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kcontrol);
        struct snd_soc_codec *codec = snd_soc_dapm_to_codec(dapm);
        struct wm9713_priv *wm9713 = snd_soc_codec_get_drvdata(codec);
-       unsigned int val = ucontrol->value.enumerated.item[0];
+       unsigned int val = ucontrol->value.integer.value[0];
        struct soc_mixer_control *mc =
                (struct soc_mixer_control *)kcontrol->private_value;
        unsigned int mixer, mask, shift, old;
@@ -268,7 +268,7 @@ static int wm9713_hp_mixer_put(struct snd_kcontrol *kcontrol,
 
        mutex_lock(&wm9713->lock);
        old = wm9713->hp_mixer[mixer];
-       if (ucontrol->value.enumerated.item[0])
+       if (ucontrol->value.integer.value[0])
                wm9713->hp_mixer[mixer] |= mask;
        else
                wm9713->hp_mixer[mixer] &= ~mask;
@@ -306,7 +306,7 @@ static int wm9713_hp_mixer_get(struct snd_kcontrol *kcontrol,
        mixer = mc->shift >> 8;
        shift = mc->shift & 0xff;
 
-       ucontrol->value.enumerated.item[0] =
+       ucontrol->value.integer.value[0] =
                (wm9713->hp_mixer[mixer] >> shift) & 1;
 
        return 0;
index 75870c0..91eb3ae 100644 (file)
@@ -1049,7 +1049,7 @@ static u32 fsl_spdif_txclk_caldiv(struct fsl_spdif_priv *spdif_priv,
                                enum spdif_txrate index, bool round)
 {
        const u32 rate[] = { 32000, 44100, 48000, 96000, 192000 };
-       bool is_sysclk = clk == spdif_priv->sysclk;
+       bool is_sysclk = clk_is_match(clk, spdif_priv->sysclk);
        u64 rate_ideal, rate_actual, sub;
        u32 sysclk_dfmin, sysclk_dfmax;
        u32 txclk_df, sysclk_df, arate;
@@ -1143,7 +1143,7 @@ static int fsl_spdif_probe_txclk(struct fsl_spdif_priv *spdif_priv,
                        spdif_priv->txclk_src[index], rate[index]);
        dev_dbg(&pdev->dev, "use txclk df %d for %dHz sample rate\n",
                        spdif_priv->txclk_df[index], rate[index]);
-       if (spdif_priv->txclk[index] == spdif_priv->sysclk)
+       if (clk_is_match(spdif_priv->txclk[index], spdif_priv->sysclk))
                dev_dbg(&pdev->dev, "use sysclk df %d for %dHz sample rate\n",
                                spdif_priv->sysclk_df[index], rate[index]);
        dev_dbg(&pdev->dev, "the best rate for %dHz sample rate is %dHz\n",
index b9fabbf..6b0c8f7 100644 (file)
@@ -603,7 +603,7 @@ static int fsl_ssi_set_bclk(struct snd_pcm_substream *substream,
        factor = (div2 + 1) * (7 * psr + 1) * 2;
 
        for (i = 0; i < 255; i++) {
-               tmprate = freq * factor * (i + 2);
+               tmprate = freq * factor * (i + 1);
 
                if (baudclk_is_used)
                        clkrate = clk_get_rate(ssi_private->baudclk);
@@ -1227,7 +1227,7 @@ static int fsl_ssi_imx_probe(struct platform_device *pdev,
        ssi_private->dma_params_tx.addr = ssi_private->ssi_phys + CCSR_SSI_STX0;
        ssi_private->dma_params_rx.addr = ssi_private->ssi_phys + CCSR_SSI_SRX0;
 
-       ret = !of_property_read_u32_array(np, "dmas", dmas, 4);
+       ret = of_property_read_u32_array(np, "dmas", dmas, 4);
        if (ssi_private->use_dma && !ret && dmas[2] == IMX_DMATYPE_SSI_DUAL) {
                ssi_private->use_dual_fifo = true;
                /* When using dual fifo mode, we need to keep watermark
index c42ffae..402b728 100644 (file)
@@ -207,9 +207,6 @@ static int hsw_parse_fw_image(struct sst_fw *sst_fw)
                module = (void *)module + sizeof(*module) + module->mod_size;
        }
 
-       /* allocate scratch mem regions */
-       sst_block_alloc_scratch(dsp);
-
        return 0;
 }
 
index 394af56..863a9ca 100644 (file)
@@ -1732,6 +1732,7 @@ static void sst_hsw_drop_all(struct sst_hsw *hsw)
 int sst_hsw_dsp_load(struct sst_hsw *hsw)
 {
        struct sst_dsp *dsp = hsw->dsp;
+       struct sst_fw *sst_fw, *t;
        int ret;
 
        dev_dbg(hsw->dev, "loading audio DSP....");
@@ -1748,12 +1749,17 @@ int sst_hsw_dsp_load(struct sst_hsw *hsw)
                return ret;
        }
 
-       ret = sst_fw_reload(hsw->sst_fw);
-       if (ret < 0) {
-               dev_err(hsw->dev, "error: SST FW reload failed\n");
-               sst_dsp_dma_put_channel(dsp);
-               return -ENOMEM;
+       list_for_each_entry_safe_reverse(sst_fw, t, &dsp->fw_list, list) {
+               ret = sst_fw_reload(sst_fw);
+               if (ret < 0) {
+                       dev_err(hsw->dev, "error: SST FW reload failed\n");
+                       sst_dsp_dma_put_channel(dsp);
+                       return -ENOMEM;
+               }
        }
+       ret = sst_block_alloc_scratch(hsw->dsp);
+       if (ret < 0)
+               return -EINVAL;
 
        sst_dsp_dma_put_channel(dsp);
        return 0;
@@ -1809,12 +1815,17 @@ int sst_hsw_dsp_runtime_suspend(struct sst_hsw *hsw)
 
 int sst_hsw_dsp_runtime_sleep(struct sst_hsw *hsw)
 {
-       sst_fw_unload(hsw->sst_fw);
-       sst_block_free_scratch(hsw->dsp);
+       struct sst_fw *sst_fw, *t;
+       struct sst_dsp *dsp = hsw->dsp;
+
+       list_for_each_entry_safe(sst_fw, t, &dsp->fw_list, list) {
+               sst_fw_unload(sst_fw);
+       }
+       sst_block_free_scratch(dsp);
 
        hsw->boot_complete = false;
 
-       sst_dsp_sleep(hsw->dsp);
+       sst_dsp_sleep(dsp);
 
        return 0;
 }
@@ -1943,6 +1954,11 @@ int sst_hsw_dsp_init(struct device *dev, struct sst_pdata *pdata)
                goto fw_err;
        }
 
+       /* allocate scratch mem regions */
+       ret = sst_block_alloc_scratch(hsw->dsp);
+       if (ret < 0)
+               goto boot_err;
+
        /* wait for DSP boot completion */
        sst_dsp_boot(hsw->dsp);
        ret = wait_event_timeout(hsw->boot_wait, hsw->boot_complete,
index def7d82..d194830 100644 (file)
@@ -579,7 +579,7 @@ static int kirkwood_i2s_dev_probe(struct platform_device *pdev)
                if (PTR_ERR(priv->extclk) == -EPROBE_DEFER)
                        return -EPROBE_DEFER;
        } else {
-               if (priv->extclk == priv->clk) {
+               if (clk_is_match(priv->extclk, priv->clk)) {
                        devm_clk_put(&pdev->dev, priv->extclk);
                        priv->extclk = ERR_PTR(-EINVAL);
                } else {
index 30579ca..e5c9908 100644 (file)
@@ -347,6 +347,8 @@ static ssize_t codec_list_read_file(struct file *file, char __user *user_buf,
        if (!buf)
                return -ENOMEM;
 
+       mutex_lock(&client_mutex);
+
        list_for_each_entry(codec, &codec_list, list) {
                len = snprintf(buf + ret, PAGE_SIZE - ret, "%s\n",
                               codec->component.name);
@@ -358,6 +360,8 @@ static ssize_t codec_list_read_file(struct file *file, char __user *user_buf,
                }
        }
 
+       mutex_unlock(&client_mutex);
+
        if (ret >= 0)
                ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret);
 
@@ -382,6 +386,8 @@ static ssize_t dai_list_read_file(struct file *file, char __user *user_buf,
        if (!buf)
                return -ENOMEM;
 
+       mutex_lock(&client_mutex);
+
        list_for_each_entry(component, &component_list, list) {
                list_for_each_entry(dai, &component->dai_list, list) {
                        len = snprintf(buf + ret, PAGE_SIZE - ret, "%s\n",
@@ -395,6 +401,8 @@ static ssize_t dai_list_read_file(struct file *file, char __user *user_buf,
                }
        }
 
+       mutex_unlock(&client_mutex);
+
        ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret);
 
        kfree(buf);
@@ -418,6 +426,8 @@ static ssize_t platform_list_read_file(struct file *file,
        if (!buf)
                return -ENOMEM;
 
+       mutex_lock(&client_mutex);
+
        list_for_each_entry(platform, &platform_list, list) {
                len = snprintf(buf + ret, PAGE_SIZE - ret, "%s\n",
                               platform->component.name);
@@ -429,6 +439,8 @@ static ssize_t platform_list_read_file(struct file *file,
                }
        }
 
+       mutex_unlock(&client_mutex);
+
        ret = simple_read_from_buffer(user_buf, count, ppos, buf, ret);
 
        kfree(buf);
@@ -836,6 +848,8 @@ static struct snd_soc_component *soc_find_component(
 {
        struct snd_soc_component *component;
 
+       lockdep_assert_held(&client_mutex);
+
        list_for_each_entry(component, &component_list, list) {
                if (of_node) {
                        if (component->dev->of_node == of_node)
@@ -854,6 +868,8 @@ static struct snd_soc_dai *snd_soc_find_dai(
        struct snd_soc_component *component;
        struct snd_soc_dai *dai;
 
+       lockdep_assert_held(&client_mutex);
+
        /* Find CPU DAI from registered DAIs*/
        list_for_each_entry(component, &component_list, list) {
                if (dlc->of_node && component->dev->of_node != dlc->of_node)
@@ -1508,6 +1524,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
        struct snd_soc_codec *codec;
        int ret, i, order;
 
+       mutex_lock(&client_mutex);
        mutex_lock_nested(&card->mutex, SND_SOC_CARD_CLASS_INIT);
 
        /* bind DAIs */
@@ -1662,6 +1679,7 @@ static int snd_soc_instantiate_card(struct snd_soc_card *card)
        card->instantiated = 1;
        snd_soc_dapm_sync(&card->dapm);
        mutex_unlock(&card->mutex);
+       mutex_unlock(&client_mutex);
 
        return 0;
 
@@ -1680,6 +1698,7 @@ card_probe_error:
 
 base_error:
        mutex_unlock(&card->mutex);
+       mutex_unlock(&client_mutex);
 
        return ret;
 }
@@ -2713,13 +2732,6 @@ static void snd_soc_component_del_unlocked(struct snd_soc_component *component)
        list_del(&component->list);
 }
 
-static void snd_soc_component_del(struct snd_soc_component *component)
-{
-       mutex_lock(&client_mutex);
-       snd_soc_component_del_unlocked(component);
-       mutex_unlock(&client_mutex);
-}
-
 int snd_soc_register_component(struct device *dev,
                               const struct snd_soc_component_driver *cmpnt_drv,
                               struct snd_soc_dai_driver *dai_drv,
@@ -2767,14 +2779,17 @@ void snd_soc_unregister_component(struct device *dev)
 {
        struct snd_soc_component *cmpnt;
 
+       mutex_lock(&client_mutex);
        list_for_each_entry(cmpnt, &component_list, list) {
                if (dev == cmpnt->dev && cmpnt->registered_as_component)
                        goto found;
        }
+       mutex_unlock(&client_mutex);
        return;
 
 found:
-       snd_soc_component_del(cmpnt);
+       snd_soc_component_del_unlocked(cmpnt);
+       mutex_unlock(&client_mutex);
        snd_soc_component_cleanup(cmpnt);
        kfree(cmpnt);
 }
@@ -2882,10 +2897,14 @@ struct snd_soc_platform *snd_soc_lookup_platform(struct device *dev)
 {
        struct snd_soc_platform *platform;
 
+       mutex_lock(&client_mutex);
        list_for_each_entry(platform, &platform_list, list) {
-               if (dev == platform->dev)
+               if (dev == platform->dev) {
+                       mutex_unlock(&client_mutex);
                        return platform;
+               }
        }
+       mutex_unlock(&client_mutex);
 
        return NULL;
 }
@@ -3090,15 +3109,15 @@ void snd_soc_unregister_codec(struct device *dev)
 {
        struct snd_soc_codec *codec;
 
+       mutex_lock(&client_mutex);
        list_for_each_entry(codec, &codec_list, list) {
                if (dev == codec->dev)
                        goto found;
        }
+       mutex_unlock(&client_mutex);
        return;
 
 found:
-
-       mutex_lock(&client_mutex);
        list_del(&codec->list);
        snd_soc_component_del_unlocked(&codec->component);
        mutex_unlock(&client_mutex);
index dc9df00..337c317 100644 (file)
@@ -192,6 +192,7 @@ static const struct rc_config {
        { USB_ID(0x041e, 0x3040), 2, 2, 6, 6,  2,  0x6e91 }, /* Live! 24-bit */
        { USB_ID(0x041e, 0x3042), 0, 1, 1, 1,  1,  0x000d }, /* Usb X-Fi S51 */
        { USB_ID(0x041e, 0x30df), 0, 1, 1, 1,  1,  0x000d }, /* Usb X-Fi S51 Pro */
+       { USB_ID(0x041e, 0x3237), 0, 1, 1, 1,  1,  0x000d }, /* Usb X-Fi S51 Pro */
        { USB_ID(0x041e, 0x3048), 2, 2, 6, 6,  2,  0x6e91 }, /* Toshiba SB0500 */
 };
 
index 67d4765..07f984d 100644 (file)
@@ -1773,6 +1773,36 @@ YAMAHA_DEVICE(0x7010, "UB99"),
                }
        }
 },
+{
+       USB_DEVICE(0x0582, 0x0159),
+       .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) {
+               /* .vendor_name = "Roland", */
+               /* .product_name = "UA-22", */
+               .ifnum = QUIRK_ANY_INTERFACE,
+               .type = QUIRK_COMPOSITE,
+               .data = (const struct snd_usb_audio_quirk[]) {
+                       {
+                               .ifnum = 0,
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE
+                       },
+                       {
+                               .ifnum = 1,
+                               .type = QUIRK_AUDIO_STANDARD_INTERFACE
+                       },
+                       {
+                               .ifnum = 2,
+                               .type = QUIRK_MIDI_FIXED_ENDPOINT,
+                               .data = & (const struct snd_usb_midi_endpoint_info) {
+                                       .out_cables = 0x0001,
+                                       .in_cables = 0x0001
+                               }
+                       },
+                       {
+                               .ifnum = -1
+                       }
+               }
+       }
+},
 /* this catches most recent vendor-specific Roland devices */
 {
        .match_flags = USB_DEVICE_ID_MATCH_VENDOR |
index 753a47d..9a28365 100644 (file)
@@ -1113,8 +1113,13 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs,
 
 bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
 {
-       /* MS Lifecam HD-5000 doesn't support reading the sample rate. */
-       return chip->usb_id == USB_ID(0x045E, 0x076D);
+       /* devices which do not support reading the sample rate. */
+       switch (chip->usb_id) {
+       case USB_ID(0x045E, 0x076D): /* MS Lifecam HD-5000 */
+       case USB_ID(0x04D8, 0xFEEA): /* Benchmark DAC1 Pre */
+               return true;
+       }
+       return false;
 }
 
 /* Marantz/Denon USB DACs need a vendor cmd to switch
index d66ab79..8c0c1a2 100644 (file)
@@ -1,12 +1,12 @@
 
-MEMCPY_FN(__memcpy,
+MEMCPY_FN(memcpy_orig,
        "x86-64-unrolled",
        "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
 
-MEMCPY_FN(memcpy_c,
+MEMCPY_FN(__memcpy,
        "x86-64-movsq",
        "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
 
-MEMCPY_FN(memcpy_c_e,
+MEMCPY_FN(memcpy_erms,
        "x86-64-movsb",
        "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
index fcd9cf0..e4c2c30 100644 (file)
@@ -1,8 +1,6 @@
 #define memcpy MEMCPY /* don't hide glibc's memcpy() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
-#define Lmemcpy_c globl memcpy_c; memcpy_c
-#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
 #include "../../../arch/x86/lib/memcpy_64.S"
 /*
  * We need to provide note.GNU-stack section, saying that we want
index db1d3a2..d3dfb79 100644 (file)
@@ -36,7 +36,7 @@ static const struct option options[] = {
                    "Specify length of memory to copy. "
                    "Available units: B, KB, MB, GB and TB (upper and lower)"),
        OPT_STRING('r', "routine", &routine, "default",
-                   "Specify routine to copy"),
+                   "Specify routine to copy, \"all\" runs all available routines"),
        OPT_INTEGER('i', "iterations", &iterations,
                    "repeat memcpy() invocation this number of times"),
        OPT_BOOLEAN('c', "cycle", &use_cycle,
@@ -135,55 +135,16 @@ struct bench_mem_info {
        const char *const *usage;
 };
 
-static int bench_mem_common(int argc, const char **argv,
-                    const char *prefix __maybe_unused,
-                    struct bench_mem_info *info)
+static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen)
 {
-       int i;
-       size_t len;
-       double totallen;
+       const struct routine *r = &info->routines[r_idx];
        double result_bps[2];
        u64 result_cycle[2];
 
-       argc = parse_options(argc, argv, options,
-                            info->usage, 0);
-
-       if (no_prefault && only_prefault) {
-               fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
-               return 1;
-       }
-
-       if (use_cycle)
-               init_cycle();
-
-       len = (size_t)perf_atoll((char *)length_str);
-       totallen = (double)len * iterations;
-
        result_cycle[0] = result_cycle[1] = 0ULL;
        result_bps[0] = result_bps[1] = 0.0;
 
-       if ((s64)len <= 0) {
-               fprintf(stderr, "Invalid length:%s\n", length_str);
-               return 1;
-       }
-
-       /* same to without specifying either of prefault and no-prefault */
-       if (only_prefault && no_prefault)
-               only_prefault = no_prefault = false;
-
-       for (i = 0; info->routines[i].name; i++) {
-               if (!strcmp(info->routines[i].name, routine))
-                       break;
-       }
-       if (!info->routines[i].name) {
-               printf("Unknown routine:%s\n", routine);
-               printf("Available routines...\n");
-               for (i = 0; info->routines[i].name; i++) {
-                       printf("\t%s ... %s\n",
-                              info->routines[i].name, info->routines[i].desc);
-               }
-               return 1;
-       }
+       printf("Routine %s (%s)\n", r->name, r->desc);
 
        if (bench_format == BENCH_FORMAT_DEFAULT)
                printf("# Copying %s Bytes ...\n\n", length_str);
@@ -191,28 +152,17 @@ static int bench_mem_common(int argc, const char **argv,
        if (!only_prefault && !no_prefault) {
                /* show both of results */
                if (use_cycle) {
-                       result_cycle[0] =
-                               info->do_cycle(&info->routines[i], len, false);
-                       result_cycle[1] =
-                               info->do_cycle(&info->routines[i], len, true);
+                       result_cycle[0] = info->do_cycle(r, len, false);
+                       result_cycle[1] = info->do_cycle(r, len, true);
                } else {
-                       result_bps[0] =
-                               info->do_gettimeofday(&info->routines[i],
-                                               len, false);
-                       result_bps[1] =
-                               info->do_gettimeofday(&info->routines[i],
-                                               len, true);
+                       result_bps[0]   = info->do_gettimeofday(r, len, false);
+                       result_bps[1]   = info->do_gettimeofday(r, len, true);
                }
        } else {
-               if (use_cycle) {
-                       result_cycle[pf] =
-                               info->do_cycle(&info->routines[i],
-                                               len, only_prefault);
-               } else {
-                       result_bps[pf] =
-                               info->do_gettimeofday(&info->routines[i],
-                                               len, only_prefault);
-               }
+               if (use_cycle)
+                       result_cycle[pf] = info->do_cycle(r, len, only_prefault);
+               else
+                       result_bps[pf] = info->do_gettimeofday(r, len, only_prefault);
        }
 
        switch (bench_format) {
@@ -265,6 +215,60 @@ static int bench_mem_common(int argc, const char **argv,
                die("unknown format: %d\n", bench_format);
                break;
        }
+}
+
+static int bench_mem_common(int argc, const char **argv,
+                    const char *prefix __maybe_unused,
+                    struct bench_mem_info *info)
+{
+       int i;
+       size_t len;
+       double totallen;
+
+       argc = parse_options(argc, argv, options,
+                            info->usage, 0);
+
+       if (no_prefault && only_prefault) {
+               fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
+               return 1;
+       }
+
+       if (use_cycle)
+               init_cycle();
+
+       len = (size_t)perf_atoll((char *)length_str);
+       totallen = (double)len * iterations;
+
+       if ((s64)len <= 0) {
+               fprintf(stderr, "Invalid length:%s\n", length_str);
+               return 1;
+       }
+
+       /* same to without specifying either of prefault and no-prefault */
+       if (only_prefault && no_prefault)
+               only_prefault = no_prefault = false;
+
+       if (!strncmp(routine, "all", 3)) {
+               for (i = 0; info->routines[i].name; i++)
+                       __bench_mem_routine(info, i, len, totallen);
+               return 0;
+       }
+
+       for (i = 0; info->routines[i].name; i++) {
+               if (!strcmp(info->routines[i].name, routine))
+                       break;
+       }
+       if (!info->routines[i].name) {
+               printf("Unknown routine:%s\n", routine);
+               printf("Available routines...\n");
+               for (i = 0; info->routines[i].name; i++) {
+                       printf("\t%s ... %s\n",
+                              info->routines[i].name, info->routines[i].desc);
+               }
+               return 1;
+       }
+
+       __bench_mem_routine(info, i, len, totallen);
 
        return 0;
 }
index a71dff9..f02d028 100644 (file)
@@ -1,12 +1,12 @@
 
-MEMSET_FN(__memset,
+MEMSET_FN(memset_orig,
        "x86-64-unrolled",
        "unrolled memset() in arch/x86/lib/memset_64.S")
 
-MEMSET_FN(memset_c,
+MEMSET_FN(__memset,
        "x86-64-stosq",
        "movsq-based memset() in arch/x86/lib/memset_64.S")
 
-MEMSET_FN(memset_c_e,
+MEMSET_FN(memset_erms,
        "x86-64-stosb",
        "movsb-based memset() in arch/x86/lib/memset_64.S")
index 9e5af89..de27878 100644 (file)
@@ -1,8 +1,6 @@
 #define memset MEMSET /* don't hide glibc's memset() */
 #define altinstr_replacement text
 #define globl p2align 4; .globl
-#define Lmemset_c globl memset_c; memset_c
-#define Lmemset_c_e globl memset_c_e; memset_c_e
 #include "../../../arch/x86/lib/memset_64.S"
 
 /*
index 61bf912..9d9db3b 100644 (file)
@@ -30,6 +30,8 @@ static int disasm_line__parse(char *line, char **namep, char **rawp);
 
 static void ins__delete(struct ins_operands *ops)
 {
+       if (ops == NULL)
+               return;
        zfree(&ops->source.raw);
        zfree(&ops->source.name);
        zfree(&ops->target.raw);
index 6789d78..3a3a0f1 100644 (file)
@@ -4,5 +4,6 @@
 /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
 
 #define altinstruction_entry #
+#define ALTERNATIVE_2 #
 
 #endif
index 3ed7c04..2e2ba2e 100644 (file)
@@ -209,7 +209,7 @@ $(OUTPUT)%.o: %.c
 
 $(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
        $(ECHO) "  CC      " $@
-       $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -Wl,-rpath=./ -lrt -lpci -L$(OUTPUT) -o $@
+       $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -lrt -lpci -L$(OUTPUT) -o $@
        $(QUIET) $(STRIPCMD) $@
 
 $(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC)
index 4e51122..95abddc 100644 (file)
@@ -17,11 +17,20 @@ TARGETS += sysctl
 TARGETS += timers
 TARGETS += user
 TARGETS += vm
+TARGETS += x86
 #Please keep the TARGETS list alphabetically sorted
 
 TARGETS_HOTPLUG = cpu-hotplug
 TARGETS_HOTPLUG += memory-hotplug
 
+# Clear LDFLAGS and MAKEFLAGS if called from main
+# Makefile to avoid test build failures when test
+# Makefile doesn't have explicit build rules.
+ifeq (1,$(MAKELEVEL))
+undefine LDFLAGS
+override MAKEFLAGS =
+endif
+
 all:
        for TARGET in $(TARGETS); do \
                make -C $$TARGET; \
@@ -47,7 +56,40 @@ clean_hotplug:
                make -C $$TARGET clean; \
        done;
 
+INSTALL_PATH ?= install
+INSTALL_PATH := $(abspath $(INSTALL_PATH))
+ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+
+install:
+ifdef INSTALL_PATH
+       @# Ask all targets to install their files
+       mkdir -p $(INSTALL_PATH)
+       for TARGET in $(TARGETS); do \
+               mkdir -p $(INSTALL_PATH)/$$TARGET ; \
+               make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+       done;
+
+       @# Ask all targets to emit their test scripts
+       echo "#!/bin/bash" > $(ALL_SCRIPT)
+       echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT)
+       echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
+
+       for TARGET in $(TARGETS); do \
+               echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+               echo "echo ========================================" >> $(ALL_SCRIPT); \
+               echo "cd $$TARGET" >> $(ALL_SCRIPT); \
+               make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+               echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+       done;
+
+       chmod u+x $(ALL_SCRIPT)
+else
+       $(error Error: set INSTALL_PATH to use install)
+endif
+
 clean:
        for TARGET in $(TARGETS); do \
                make -C $$TARGET clean; \
        done;
+
+.PHONY: install
index e18b42b..1822356 100644 (file)
@@ -16,8 +16,9 @@ else
        echo "Not an x86 target, can't build breakpoints selftests"
 endif
 
-run_tests:
-       @./breakpoint_test || echo "breakpoints selftests: [FAIL]"
+TEST_PROGS := breakpoint_test
+
+include ../lib.mk
 
 clean:
        rm -fr breakpoint_test
index e9c28d8..fe1f991 100644 (file)
@@ -1,9 +1,10 @@
 all:
 
-run_tests:
-       @/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]"
+TEST_PROGS := cpu-on-off-test.sh
+
+include ../lib.mk
 
 run_full_test:
-       @/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+       @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
 
 clean:
index 29e8c6b..736c3dd 100644 (file)
@@ -1,12 +1,13 @@
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall
 
 test_objs = open-unlink create-read
 
 all: $(test_objs)
 
-run_tests: all
-       @/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]"
+TEST_PROGS := efivarfs.sh
+TEST_FILES := $(test_objs)
+
+include ../lib.mk
 
 clean:
        rm -f $(test_objs)
old mode 100644 (file)
new mode 100755 (executable)
index 66dfc2c..4edb7d0 100644 (file)
@@ -1,4 +1,3 @@
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall
 BINARIES = execveat
 DEPS = execveat.symlink execveat.denatured script subdir
@@ -18,8 +17,12 @@ execveat.denatured: execveat
 %: %.c
        $(CC) $(CFLAGS) -o $@ $^
 
-run_tests: all
-       ./execveat
+TEST_PROGS := execveat
+TEST_FILES := $(DEPS)
+
+include ../lib.mk
+
+override EMIT_TESTS := echo "mkdir -p subdir; (./execveat && echo \"selftests: execveat [PASS]\") || echo \"selftests: execveat [FAIL]\""
 
 clean:
        rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx*
index e238c95..8d5d1d2 100644 (file)
@@ -30,7 +30,7 @@ static int execveat_(int fd, const char *path, char **argv, char **envp,
 #ifdef __NR_execveat
        return syscall(__NR_execveat, fd, path, argv, envp, flags);
 #else
-       errno = -ENOSYS;
+       errno = ENOSYS;
        return -1;
 #endif
 }
@@ -234,6 +234,14 @@ static int run_tests(void)
        int fd_cloexec = open_or_die("execveat", O_RDONLY|O_CLOEXEC);
        int fd_script_cloexec = open_or_die("script", O_RDONLY|O_CLOEXEC);
 
+       /* Check if we have execveat at all, and bail early if not */
+       errno = 0;
+       execveat_(-1, NULL, NULL, NULL, 0);
+       if (errno == ENOSYS) {
+               printf("[FAIL] ENOSYS calling execveat - no kernel support?\n");
+               return 1;
+       }
+
        /* Change file position to confirm it doesn't affect anything */
        lseek(fd, 10, SEEK_SET);
 
index e23cce0..9bf8223 100644 (file)
@@ -3,25 +3,9 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-fw_filesystem:
-       @if /bin/sh ./fw_filesystem.sh ; then \
-                echo "fw_filesystem: ok"; \
-        else \
-                echo "fw_filesystem: [FAIL]"; \
-                exit 1; \
-        fi
+TEST_PROGS := fw_filesystem.sh fw_userhelper.sh
 
-fw_userhelper:
-       @if /bin/sh ./fw_userhelper.sh ; then \
-                echo "fw_userhelper: ok"; \
-        else \
-                echo "fw_userhelper: [FAIL]"; \
-                exit 1; \
-        fi
-
-run_tests: all fw_filesystem fw_userhelper
+include ../lib.mk
 
 # Nothing to clean up.
 clean:
-
-.PHONY: all clean run_tests fw_filesystem fw_userhelper
index 76cc9f1..3467206 100644 (file)
@@ -1,7 +1,8 @@
 all:
 
-run_tests:
-       @/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]"
+TEST_PROGS := ftracetest
+
+include ../lib.mk
 
 clean:
        rm -rf logs/*
index fd9c49a..aa51f6c 100644 (file)
@@ -2,4 +2,4 @@
 # description: Basic event tracing check
 test -f available_events -a -f set_event -a -d events
 # check scheduler events are available
-grep -q sched available_events && exit 0 || exit -1
\ No newline at end of file
+grep -q sched available_events && exit 0 || exit $FAIL
index 668616d..87eb9d6 100644 (file)
@@ -9,7 +9,11 @@ do_reset() {
 fail() { #msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
 if [ ! -f set_event -o ! -d events/sched ]; then
@@ -21,7 +25,8 @@ reset_tracer
 do_reset
 
 echo 'sched:sched_switch' > set_event
-usleep 1
+
+yield
 
 count=`cat trace | grep sched_switch | wc -l`
 if [ $count -eq 0 ]; then
@@ -31,7 +36,8 @@ fi
 do_reset
 
 echo 1 > events/sched/sched_switch/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep sched_switch | wc -l`
 if [ $count -eq 0 ]; then
@@ -41,7 +47,8 @@ fi
 do_reset
 
 echo 0 > events/sched/sched_switch/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep sched_switch | wc -l`
 if [ $count -ne 0 ]; then
index 655c415..ced27ef 100644 (file)
@@ -9,7 +9,11 @@ do_reset() {
 fail() { #msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
 if [ ! -f set_event -o ! -d events/sched ]; then
@@ -21,7 +25,8 @@ reset_tracer
 do_reset
 
 echo 'sched:*' > set_event
-usleep 1
+
+yield
 
 count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
@@ -31,7 +36,8 @@ fi
 do_reset
 
 echo 1 > events/sched/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
@@ -41,7 +47,8 @@ fi
 do_reset
 
 echo 0 > events/sched/enable
-usleep 1
+
+yield
 
 count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -ne 0 ]; then
index 4808457..0bb5df3 100644 (file)
@@ -9,7 +9,11 @@ do_reset() {
 fail() { #msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
 }
 
 if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
@@ -21,6 +25,9 @@ reset_tracer
 do_reset
 
 echo '*:*' > set_event
+
+yield
+
 count=`cat trace | grep -v ^# | wc -l`
 if [ $count -eq 0 ]; then
     fail "none of events are recorded"
@@ -29,6 +36,9 @@ fi
 do_reset
 
 echo 1 > events/enable
+
+yield
+
 count=`cat trace | grep -v ^# | wc -l`
 if [ $count -eq 0 ]; then
     fail "none of events are recorded"
@@ -37,6 +47,9 @@ fi
 do_reset
 
 echo 0 > events/enable
+
+yield
+
 count=`cat trace | grep -v ^# | wc -l`
 if [ $count -ne 0 ]; then
     fail "any of events should not be recorded"
index c15e018..15c2dba 100644 (file)
@@ -16,7 +16,9 @@ fi
 
 do_reset() {
     reset_tracer
-    echo 0 > /proc/sys/kernel/stack_tracer_enabled
+    if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
+           echo 0 > /proc/sys/kernel/stack_tracer_enabled
+    fi
     enable_tracing
     clear_trace
     echo > set_ftrace_filter
@@ -25,7 +27,7 @@ do_reset() {
 fail() { # msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
 }
 
 disable_tracing
index 6af5f63..0ab2189 100644 (file)
@@ -17,7 +17,7 @@ do_reset() {
 fail() { # msg
     do_reset
     echo $1
-    exit -1
+    exit $FAIL
 }
 
 disable_tracing
index 2e719cb..7808336 100644 (file)
@@ -31,7 +31,7 @@ fail() { # mesg
     reset_tracer
     echo > set_ftrace_filter
     echo $1
-    exit -1
+    exit $FAIL
 }
 
 echo "Testing function tracer with profiler:"
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
new file mode 100755 (executable)
index 0000000..17d5bd0
--- /dev/null
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# gen_kselftest_tar
+# Generate kselftest tarball
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# This software may be freely redistributed under the terms of the GNU
+# General Public License (GPLv2).
+
+# main
+main()
+{
+       if [ "$#" -eq 0 ]; then
+               echo "$0: Generating default compression gzip"
+               copts="cvzf"
+               ext=".tar.gz"
+       else
+               case "$1" in
+                       tar)
+                               copts="cvf"
+                               ext=".tar"
+                               ;;
+                       targz)
+                               copts="cvzf"
+                               ext=".tar.gz"
+                               ;;
+                       tarbz2)
+                               copts="cvjf"
+                               ext=".tar.bz2"
+                               ;;
+                       tarxz)
+                               copts="cvJf"
+                               ext=".tar.xz"
+                               ;;
+                       *)
+                       echo "Unknown tarball format $1"
+                       exit 1
+                       ;;
+       esac
+       fi
+
+       install_dir=./kselftest
+
+# Run install using INSTALL_KSFT_PATH override to generate install
+# directory
+./kselftest_install.sh
+tar $copts kselftest${ext} $install_dir
+echo "Kselftest archive kselftest${ext} created!"
+
+# clean up install directory
+rm -rf kselftest
+}
+
+main "$@"
index 74bbefd..25d2e70 100644 (file)
@@ -12,14 +12,11 @@ endif
 CFLAGS += -I../../../../usr/include/
 
 all:
-ifeq ($(ARCH),x86)
-       gcc $(CFLAGS) msgque.c -o msgque_test
-else
-       echo "Not an x86 target, can't build msgque selftest"
-endif
+       $(CC) $(CFLAGS) msgque.c -o msgque_test
+
+TEST_PROGS := msgque_test
 
-run_tests: all
-       ./msgque_test
+include ../lib.mk
 
 clean:
        rm -fr ./msgque_test
index ff0eefd..2ae7450 100644 (file)
@@ -1,10 +1,10 @@
-CC := $(CROSS_COMPILE)$(CC)
 CFLAGS += -I../../../../usr/include/
 
 all: kcmp_test
 
-run_tests: all
-       @./kcmp_test || echo "kcmp_test: [FAIL]"
+TEST_PROGS := kcmp_test
+
+include ../lib.mk
 
 clean:
        $(RM) kcmp_test kcmp-test-file
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
new file mode 100755 (executable)
index 0000000..1555fbd
--- /dev/null
@@ -0,0 +1,37 @@
+#!/bin/bash
+#
+# Kselftest Install
+# Install kselftest tests
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# This software may be freely redistributed under the terms of the GNU
+# General Public License (GPLv2).
+
+install_loc=`pwd`
+
+main()
+{
+       if [ $(basename $install_loc) !=  "selftests" ]; then
+               echo "$0: Please run it in selftests directory ..."
+               exit 1;
+       fi
+       if [ "$#" -eq 0 ]; then
+               echo "$0: Installing in default location - $install_loc ..."
+       elif [ ! -d "$1" ]; then
+               echo "$0: $1 doesn't exist!!"
+               exit 1;
+       else
+               install_loc=$1
+               echo "$0: Installing in specified location - $install_loc ..."
+       fi
+
+       install_dir=$install_loc/kselftest
+
+# Create install directory
+       mkdir -p $install_dir
+# Build tests
+       INSTALL_PATH=$install_dir make install
+}
+
+main "$@"
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
new file mode 100644 (file)
index 0000000..2194155
--- /dev/null
@@ -0,0 +1,35 @@
+# This mimics the top-level Makefile. We do it explicitly here so that this
+# Makefile can operate with or without the kbuild infrastructure.
+CC := $(CROSS_COMPILE)gcc
+
+define RUN_TESTS
+       @for TEST in $(TEST_PROGS); do \
+               (./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \
+       done;
+endef
+
+run_tests: all
+       $(RUN_TESTS)
+
+define INSTALL_RULE
+       mkdir -p $(INSTALL_PATH)
+       install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+endef
+
+install: all
+ifdef INSTALL_PATH
+       $(INSTALL_RULE)
+else
+       $(error Error: set INSTALL_PATH to use install)
+endif
+
+define EMIT_TESTS
+       @for TEST in $(TEST_PROGS); do \
+               echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \
+       done;
+endef
+
+emit_tests:
+       $(EMIT_TESTS)
+
+.PHONY: run_tests all clean install emit_tests
index b80cd10..3e7eb79 100644 (file)
@@ -1,17 +1,19 @@
+CC = $(CROSS_COMPILE)gcc
 CFLAGS += -D_FILE_OFFSET_BITS=64
 CFLAGS += -I../../../../include/uapi/
 CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
 
 all:
-       gcc $(CFLAGS) memfd_test.c -o memfd_test
+       $(CC) $(CFLAGS) memfd_test.c -o memfd_test
 
-run_tests: all
-       gcc $(CFLAGS) memfd_test.c -o memfd_test
-       @./memfd_test || echo "memfd_test: [FAIL]"
+TEST_PROGS := memfd_test
+
+include ../lib.mk
 
 build_fuse:
-       gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
-       gcc $(CFLAGS) fuse_test.c -o fuse_test
+       $(CC) $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
+       $(CC) $(CFLAGS) fuse_test.c -o fuse_test
 
 run_fuse: build_fuse
        @./run_fuse_test.sh || echo "fuse_test: [FAIL]"
index d46b8d4..afb2624 100644 (file)
@@ -1,9 +1,12 @@
 all:
 
-run_tests:
-       @/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]"
+include ../lib.mk
+
+TEST_PROGS := mem-on-off-test.sh
+override RUN_TESTS := ./mem-on-off-test.sh -r 2 || echo "selftests: memory-hotplug [FAIL]"
+override EMIT_TESTS := echo "$(RUN_TESTS)"
 
 run_full_test:
-       @/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
+       @/bin/bash ./mem-on-off-test.sh || echo "memory-hotplug selftests: [FAIL]"
 
 clean:
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
new file mode 100644 (file)
index 0000000..856ad41
--- /dev/null
@@ -0,0 +1 @@
+unprivileged-remount-test
index 337d853..95580a9 100644 (file)
@@ -1,17 +1,16 @@
 # Makefile for mount selftests.
-
+CFLAGS = -Wall \
+         -O2
 all: unprivileged-remount-test
 
 unprivileged-remount-test: unprivileged-remount-test.c
-       gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test
+       $(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test
 
-# Allow specific tests to be selected.
-test_unprivileged_remount: unprivileged-remount-test
-       @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+include ../lib.mk
 
-run_tests: all test_unprivileged_remount
+TEST_PROGS := unprivileged-remount-test
+override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+override EMIT_TESTS := echo "$(RUN_TESTS)"
 
 clean:
        rm -f unprivileged-remount-test
-
-.PHONY: all test_unprivileged_remount
index 8056e2e..0e3b41e 100644 (file)
@@ -1,10 +1,22 @@
+CFLAGS = -O2
+
 all:
-       gcc -O2 mq_open_tests.c -o mq_open_tests -lrt
-       gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
+       $(CC) $(CFLAGS) mq_open_tests.c -o mq_open_tests -lrt
+       $(CC) $(CFLAGS) -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt
+
+include ../lib.mk
+
+override define RUN_TESTS
+       @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]"
+       @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]"
+endef
+
+TEST_PROGS := mq_open_tests mq_perf_tests
 
-run_tests:
-       @./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]"
-       @./mq_perf_tests || echo "mq_perf_tests: [FAIL]"
+override define EMIT_TESTS
+       echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\""
+       echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\""
+endef
 
 clean:
        rm -f mq_open_tests mq_perf_tests
index 62f22cc..fac4782 100644 (file)
@@ -1,6 +1,5 @@
 # Makefile for net selftests
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall -O2 -g
 
 CFLAGS += -I../../../../usr/include/
@@ -11,9 +10,10 @@ all: $(NET_PROGS)
 %: %.c
        $(CC) $(CFLAGS) -o $@ $^
 
-run_tests: all
-       @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]"
-       @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]"
-       ./test_bpf.sh
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh
+TEST_FILES := $(NET_PROGS)
+
+include ../lib.mk
+
 clean:
        $(RM) $(NET_PROGS)
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index 1d5e7ad..2958fe9 100644 (file)
@@ -8,10 +8,9 @@ ifeq ($(ARCH),powerpc)
 
 GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
 
-CC := $(CROSS_COMPILE)$(CC)
 CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
 
-export CC CFLAGS
+export CFLAGS
 
 TARGETS = pmu copyloops mm tm primitives stringloops
 
@@ -22,10 +21,25 @@ all: $(TARGETS)
 $(TARGETS):
        $(MAKE) -k -C $@ all
 
-run_tests: all
+include ../lib.mk
+
+override define RUN_TESTS
        @for TARGET in $(TARGETS); do \
                $(MAKE) -C $$TARGET run_tests; \
        done;
+endef
+
+override define INSTALL_RULE
+       @for TARGET in $(TARGETS); do \
+               $(MAKE) -C $$TARGET install; \
+       done;
+endef
+
+override define EMIT_TESTS
+       @for TARGET in $(TARGETS); do \
+               $(MAKE) -s -C $$TARGET emit_tests; \
+       done;
+endef
 
 clean:
        @for TARGET in $(TARGETS); do \
@@ -36,4 +50,4 @@ clean:
 tags:
        find . -name '*.c' -o -name '*.h' | xargs ctags
 
-.PHONY: all run_tests clean tags $(TARGETS)
+.PHONY: tags $(TARGETS)
index 6f2d3be..c050235 100644 (file)
@@ -6,24 +6,19 @@ CFLAGS += -D SELFTEST
 # Use our CFLAGS for the implicit .S rule
 ASFLAGS = $(CFLAGS)
 
-PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
+TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7
 EXTRA_SOURCES := validate.c ../harness.c
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
 copyuser_64:     CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base
 copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7
 memcpy_64:       CPPFLAGS += -D COPY_LOOP=test_memcpy
 memcpy_power7:   CPPFLAGS += -D COPY_LOOP=test_memcpy_power7
 
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 clean:
-       rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) *.o
index a14c538..41cc3ed 100644 (file)
@@ -1,21 +1,16 @@
 noarg:
        $(MAKE) -C ../
 
-PROGS := hugetlb_vs_thp_test subpage_prot
+TEST_PROGS := hugetlb_vs_thp_test subpage_prot
 
-all: $(PROGS) tempfile
+all: $(TEST_PROGS) tempfile
 
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 tempfile:
        dd if=/dev/zero of=tempfile bs=64k count=1
 
 clean:
-       rm -f $(PROGS) tempfile
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) tempfile
index c9f4263..5a16117 100644 (file)
@@ -1,38 +1,42 @@
 noarg:
        $(MAKE) -C ../
 
-PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_PROGS := count_instructions l3_bank_test per_event_excludes
 EXTRA_SOURCES := ../harness.c event.c lib.c
 
-SUB_TARGETS = ebb
+all: $(TEST_PROGS) ebb
 
-all: $(PROGS) $(SUB_TARGETS)
-
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
 
 # loop.S can only be built 64-bit
 count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
        $(CC) $(CFLAGS) -m64 -o $@ $^
 
-run_tests: all sub_run_tests
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
-clean: sub_clean
-       rm -f $(PROGS) loop.o
+DEFAULT_RUN_TESTS := $(RUN_TESTS)
+override define RUN_TESTS
+       $(DEFAULT_RUN_TESTS)
+       $(MAKE) -C ebb run_tests
+endef
 
-$(SUB_TARGETS):
-       $(MAKE) -k -C $@ all
+DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
+override define EMIT_TESTS
+       $(DEFAULT_EMIT_TESTS)
+       $(MAKE) -s -C ebb emit_tests
+endef
 
-sub_run_tests: all
-       @for TARGET in $(SUB_TARGETS); do \
-               $(MAKE) -C $$TARGET run_tests; \
-       done;
+DEFAULT_INSTALL := $(INSTALL_RULE)
+override define INSTALL_RULE
+       $(DEFAULT_INSTALL_RULE)
+       $(MAKE) -C ebb install
+endef
 
-sub_clean:
-       @for TARGET in $(SUB_TARGETS); do \
-               $(MAKE) -C $$TARGET clean; \
-       done;
+clean:
+       rm -f $(TEST_PROGS) loop.o
+       $(MAKE) -C ebb clean
+
+ebb:
+       $(MAKE) -k -C $@ all
 
-.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS)
+.PHONY: all run_tests clean ebb
index 3dc4332..5cdc9db 100644 (file)
@@ -4,7 +4,7 @@ noarg:
 # The EBB handler is 64-bit code and everything links against it
 CFLAGS += -m64
 
-PROGS := reg_access_test event_attributes_test cycles_test     \
+TEST_PROGS := reg_access_test event_attributes_test cycles_test        \
         cycles_with_freeze_test pmc56_overflow_test            \
         ebb_vs_cpu_event_test cpu_event_vs_ebb_test            \
         cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test    \
@@ -16,18 +16,15 @@ PROGS := reg_access_test event_attributes_test cycles_test  \
         lost_exception_test no_handler_test                    \
         cycles_with_mmcr2_test
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
+$(TEST_PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S
 
 instruction_count_test: ../loop.S
 
 lost_exception_test: ../lib.c
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../../lib.mk
 
 clean:
-       rm -f $(PROGS)
+       rm -f $(TEST_PROGS)
index ea737ca..b68c622 100644 (file)
@@ -1,17 +1,12 @@
 CFLAGS += -I$(CURDIR)
 
-PROGS := load_unaligned_zeropad
+TEST_PROGS := load_unaligned_zeropad
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 clean:
-       rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) *.o
index 506d773..2a728f4 100644 (file)
@@ -2,19 +2,14 @@
 CFLAGS += -m64
 CFLAGS += -I$(CURDIR)
 
-PROGS := memcmp
+TEST_PROGS := memcmp
 EXTRA_SOURCES := memcmp_64.S ../harness.c
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): $(EXTRA_SOURCES)
+$(TEST_PROGS): $(EXTRA_SOURCES)
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 clean:
-       rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) *.o
index 2cede23..34f2ec6 100644 (file)
@@ -1,15 +1,10 @@
-PROGS := tm-resched-dscr
+TEST_PROGS := tm-resched-dscr
 
-all: $(PROGS)
+all: $(TEST_PROGS)
 
-$(PROGS): ../harness.c
+$(TEST_PROGS): ../harness.c
 
-run_tests: all
-       @-for PROG in $(PROGS); do \
-               ./$$PROG; \
-       done;
+include ../../lib.mk
 
 clean:
-       rm -f $(PROGS) *.o
-
-.PHONY: all run_tests clean
+       rm -f $(TEST_PROGS) *.o
index 47ae2d3..453927f 100644 (file)
@@ -6,5 +6,6 @@ all: peeksiginfo
 clean:
        rm -f peeksiginfo
 
-run_tests: all
-       @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]"
+TEST_PROGS := peeksiginfo
+
+include ../lib.mk
index 04dc25e..bbd0b53 100644 (file)
@@ -1,12 +1,11 @@
-CC = $(CROSS_COMPILE)gcc
-
 all: get_size
 
 get_size: get_size.c
        $(CC) -static -ffreestanding -nostartfiles -s $< -o $@
 
-run_tests: all
-       ./get_size
+TEST_PROGS := get_size
+
+include ../lib.mk
 
 clean:
        $(RM) get_size
index 0a92ada..b3c33e0 100644 (file)
@@ -4,16 +4,10 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
 all:
 
-# Allow specific tests to be selected.
-test_num:
-       @/bin/sh ./run_numerictests
+TEST_PROGS := run_numerictests run_stringtests
+TEST_FILES := common_tests
 
-test_string:
-       @/bin/sh ./run_stringtests
-
-run_tests: all test_num test_string
+include ../lib.mk
 
 # Nothing to clean up.
 clean:
-
-.PHONY: all run_tests clean test_num test_string
old mode 100644 (file)
new mode 100755 (executable)
old mode 100644 (file)
new mode 100755 (executable)
index eb2859f..89a3f44 100644 (file)
@@ -1,8 +1,36 @@
-all:
-       gcc posix_timers.c -o posix_timers -lrt
+CC = $(CROSS_COMPILE)gcc
+BUILD_FLAGS = -DKTEST
+CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS)
+LDFLAGS += -lrt -lpthread
 
-run_tests: all
-       ./posix_timers
+# these are all "safe" tests that don't modify
+# system time or require escalated privledges
+TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
+            inconsistency-check raw_skew threadtest rtctest
+
+TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex change_skew \
+                     skew_consistency clocksource-switch leap-a-day \
+                     leapcrash set-tai set-2038
+
+bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED)
+
+all: ${bins}
+
+include ../lib.mk
+
+# these tests require escalated privledges
+# and may modify the system time or trigger
+# other behavior like suspend
+run_destructive_tests: run_tests
+       ./alarmtimer-suspend
+       ./valid-adjtimex
+       ./change_skew
+       ./skew_consistency
+       ./clocksource-switch
+       ./leap-a-day -s -i 10
+       ./leapcrash
+       ./set-tai
+       ./set-2038
 
 clean:
-       rm -f ./posix_timers
+       rm -f ${bins}
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
new file mode 100644 (file)
index 0000000..aaffbde
--- /dev/null
@@ -0,0 +1,185 @@
+/* alarmtimer suspend test
+ *             John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *   This test makes sure the alarmtimer & RTC wakeup code is
+ *   functioning.
+ *
+ *  To build:
+ *     $ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNREASONABLE_LAT (NSEC_PER_SEC * 4) /* hopefully we resume in 4secs */
+
+#define SUSPEND_SECS 15
+int alarmcount;
+int alarm_clock_id;
+struct timespec start_time;
+
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+       long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+       ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+       return ret;
+}
+
+int final_ret = 0;
+
+void sigalarm(int signo)
+{
+       long long delta_ns;
+       struct timespec ts;
+
+       clock_gettime(alarm_clock_id, &ts);
+       alarmcount++;
+
+       delta_ns = timespec_sub(start_time, ts);
+       delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount;
+
+       printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec,
+                                                       ts.tv_nsec, delta_ns);
+
+       if (delta_ns > UNREASONABLE_LAT) {
+               printf("[FAIL]\n");
+               final_ret = -1;
+       } else
+               printf("[OK]\n");
+
+}
+
+int main(void)
+{
+       timer_t tm1;
+       struct itimerspec its1, its2;
+       struct sigevent se;
+       struct sigaction act;
+       int signum = SIGRTMAX;
+
+       /* Set up signal handler: */
+       sigfillset(&act.sa_mask);
+       act.sa_flags = 0;
+       act.sa_handler = sigalarm;
+       sigaction(signum, &act, NULL);
+
+       /* Set up timer: */
+       memset(&se, 0, sizeof(se));
+       se.sigev_notify = SIGEV_SIGNAL;
+       se.sigev_signo = signum;
+       se.sigev_value.sival_int = 0;
+
+       for (alarm_clock_id = CLOCK_REALTIME_ALARM;
+                       alarm_clock_id <= CLOCK_BOOTTIME_ALARM;
+                       alarm_clock_id++) {
+
+               alarmcount = 0;
+               timer_create(alarm_clock_id, &se, &tm1);
+
+               clock_gettime(alarm_clock_id, &start_time);
+               printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id),
+                               start_time.tv_sec, start_time.tv_nsec);
+               printf("Setting alarm for every %i seconds\n", SUSPEND_SECS);
+               its1.it_value = start_time;
+               its1.it_value.tv_sec += SUSPEND_SECS;
+               its1.it_interval.tv_sec = SUSPEND_SECS;
+               its1.it_interval.tv_nsec = 0;
+
+               timer_settime(tm1, TIMER_ABSTIME, &its1, &its2);
+
+               while (alarmcount < 5)
+                       sleep(1); /* First 5 alarms, do nothing */
+
+               printf("Starting suspend loops\n");
+               while (alarmcount < 10) {
+                       int ret;
+
+                       sleep(1);
+                       ret = system("echo mem > /sys/power/state");
+                       if (ret)
+                               break;
+               }
+               timer_delete(tm1);
+       }
+       if (final_ret)
+               return ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
new file mode 100644 (file)
index 0000000..cb19689
--- /dev/null
@@ -0,0 +1,107 @@
+/* ADJ_FREQ Skew change test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2012
+ *             Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which cranks the ADJ_FREQ knob and
+ *  then uses other tests to detect problems. Thus this test requires
+ *  that the raw_skew, inconsistency-check and nanosleep tests be
+ *  present in the same directory it is run from.
+ *
+ *  To build:
+ *     $ gcc change_skew.c -o change_skew -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+
+int change_skew_test(int ppm)
+{
+       struct timex tx;
+       int ret;
+
+       tx.modes = ADJ_FREQUENCY;
+       tx.freq = ppm << 16;
+
+       ret = adjtimex(&tx);
+       if (ret < 0) {
+               printf("Error adjusting freq\n");
+               return ret;
+       }
+
+       ret = system("./raw_skew");
+       ret |= system("./inconsistency-check");
+       ret |= system("./nanosleep");
+
+       return ret;
+}
+
+
+int main(int argv, char **argc)
+{
+       struct timex tx;
+       int i, ret;
+
+       int ppm[5] = {0, 250, 500, -250, -500};
+
+       /* Kill ntpd */
+       ret = system("killall -9 ntpd");
+
+       /* Make sure there's no offset adjustment going on */
+       tx.modes = ADJ_OFFSET;
+       tx.offset = 0;
+       ret = adjtimex(&tx);
+
+       if (ret < 0) {
+               printf("Maybe you're not running as root?\n");
+               return -1;
+       }
+
+       for (i = 0; i < 5; i++) {
+               printf("Using %i ppm adjustment\n", ppm[i]);
+               ret = change_skew_test(ppm[i]);
+               if (ret)
+                       break;
+       }
+
+       /* Set things back */
+       tx.modes = ADJ_FREQUENCY;
+       tx.offset = 0;
+       adjtimex(&tx);
+
+       if (ret) {
+               printf("[FAIL]");
+               return ksft_exit_fail();
+       }
+       printf("[OK]");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
new file mode 100644 (file)
index 0000000..627ec74
--- /dev/null
@@ -0,0 +1,179 @@
+/* Clocksource change test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2012
+ *             Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which quickly changes the clocksourc and
+ *  then uses other tests to detect problems. Thus this test requires
+ *  that the inconsistency-check and nanosleep tests be present in the
+ *  same directory it is run from.
+ *
+ *  To build:
+ *     $ gcc clocksource-switch.c -o clocksource-switch -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/wait.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+
+int get_clocksources(char list[][30])
+{
+       int fd, i;
+       size_t size;
+       char buf[512];
+       char *head, *tmp;
+
+       fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY);
+
+       size = read(fd, buf, 512);
+
+       close(fd);
+
+       for (i = 0; i < 30; i++)
+               list[i][0] = '\0';
+
+       head = buf;
+       i = 0;
+       while (head - buf < size) {
+               /* Find the next space */
+               for (tmp = head; *tmp != ' '; tmp++) {
+                       if (*tmp == '\n')
+                               break;
+                       if (*tmp == '\0')
+                               break;
+               }
+               *tmp = '\0';
+               strcpy(list[i], head);
+               head = tmp + 1;
+               i++;
+       }
+
+       return i-1;
+}
+
+int get_cur_clocksource(char *buf, size_t size)
+{
+       int fd;
+
+       fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY);
+
+       size = read(fd, buf, size);
+
+       return 0;
+}
+
+int change_clocksource(char *clocksource)
+{
+       int fd;
+       size_t size;
+
+       fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);
+
+       if (fd < 0)
+               return -1;
+
+       size = write(fd, clocksource, strlen(clocksource));
+
+       if (size < 0)
+               return -1;
+
+       close(fd);
+       return 0;
+}
+
+
+int run_tests(int secs)
+{
+       int ret;
+       char buf[255];
+
+       sprintf(buf, "./inconsistency-check -t %i", secs);
+       ret = system(buf);
+       if (ret)
+               return ret;
+       ret = system("./nanosleep");
+       return ret;
+}
+
+
+char clocksource_list[10][30];
+
+int main(int argv, char **argc)
+{
+       char orig_clk[512];
+       int count, i, status;
+       pid_t pid;
+
+       get_cur_clocksource(orig_clk, 512);
+
+       count = get_clocksources(clocksource_list);
+
+       if (change_clocksource(clocksource_list[0])) {
+               printf("Error: You probably need to run this as root\n");
+               return -1;
+       }
+
+       /* Check everything is sane before we start switching asyncrhonously */
+       for (i = 0; i < count; i++) {
+               printf("Validating clocksource %s\n", clocksource_list[i]);
+               if (change_clocksource(clocksource_list[i])) {
+                       status = -1;
+                       goto out;
+               }
+               if (run_tests(5)) {
+                       status = -1;
+                       goto out;
+               }
+       }
+
+
+       printf("Running Asyncrhonous Switching Tests...\n");
+       pid = fork();
+       if (!pid)
+               return run_tests(60);
+
+       while (pid != waitpid(pid, &status, WNOHANG))
+               for (i = 0; i < count; i++)
+                       if (change_clocksource(clocksource_list[i])) {
+                               status = -1;
+                               goto out;
+                       }
+out:
+       change_clocksource(orig_clk);
+
+       if (status)
+               return ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
new file mode 100644 (file)
index 0000000..caf1bc9
--- /dev/null
@@ -0,0 +1,204 @@
+/* Time inconsistency check test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2003, 2004, 2005, 2012
+ *             (C) Copyright Linaro Limited 2015
+ *             Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc inconsistency-check.c -o inconsistency-check -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define CALLS_PER_LOOP 64
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+       /* use unsigned to avoid false positives on 2038 rollover */
+       if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec)
+               return 1;
+       if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec)
+               return 0;
+       if (a.tv_nsec > b.tv_nsec)
+               return 0;
+       return 1;
+}
+
+
+
+int consistency_test(int clock_type, unsigned long seconds)
+{
+       struct timespec list[CALLS_PER_LOOP];
+       int i, inconsistent;
+       long now, then;
+       time_t t;
+       char *start_str;
+
+       clock_gettime(clock_type, &list[0]);
+       now = then = list[0].tv_sec;
+
+       /* timestamp start of test */
+       t = time(0);
+       start_str = ctime(&t);
+
+       while (seconds == -1 || now - then < seconds) {
+               inconsistent = 0;
+
+               /* Fill list */
+               for (i = 0; i < CALLS_PER_LOOP; i++)
+                       clock_gettime(clock_type, &list[i]);
+
+               /* Check for inconsistencies */
+               for (i = 0; i < CALLS_PER_LOOP - 1; i++)
+                       if (!in_order(list[i], list[i+1]))
+                               inconsistent = i;
+
+               /* display inconsistency */
+               if (inconsistent) {
+                       unsigned long long delta;
+
+                       printf("\%s\n", start_str);
+                       for (i = 0; i < CALLS_PER_LOOP; i++) {
+                               if (i == inconsistent)
+                                       printf("--------------------\n");
+                               printf("%lu:%lu\n", list[i].tv_sec,
+                                                       list[i].tv_nsec);
+                               if (i == inconsistent + 1)
+                                       printf("--------------------\n");
+                       }
+                       delta = list[inconsistent].tv_sec * NSEC_PER_SEC;
+                       delta += list[inconsistent].tv_nsec;
+                       delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC;
+                       delta -= list[inconsistent+1].tv_nsec;
+                       printf("Delta: %llu ns\n", delta);
+                       fflush(0);
+                       /* timestamp inconsistency*/
+                       t = time(0);
+                       printf("%s\n", ctime(&t));
+                       printf("[FAILED]\n");
+                       return -1;
+               }
+               now = list[0].tv_sec;
+       }
+       printf("[OK]\n");
+       return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+       int clockid, opt;
+       int userclock = CLOCK_REALTIME;
+       int maxclocks = NR_CLOCKIDS;
+       int runtime = 10;
+       struct timespec ts;
+
+       /* Process arguments */
+       while ((opt = getopt(argc, argv, "t:c:")) != -1) {
+               switch (opt) {
+               case 't':
+                       runtime = atoi(optarg);
+                       break;
+               case 'c':
+                       userclock = atoi(optarg);
+                       maxclocks = userclock + 1;
+                       break;
+               default:
+                       printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]);
+                       printf("        -t: Number of seconds to run\n");
+                       printf("        -c: clockid to use (default, all clockids)\n");
+                       exit(-1);
+               }
+       }
+
+       setbuf(stdout, NULL);
+
+       for (clockid = userclock; clockid < maxclocks; clockid++) {
+
+               if (clockid == CLOCK_HWSPECIFIC)
+                       continue;
+
+               if (!clock_gettime(clockid, &ts)) {
+                       printf("Consistent %-30s ", clockstring(clockid));
+                       if (consistency_test(clockid, runtime))
+                               return ksft_exit_fail();
+               }
+       }
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
new file mode 100644 (file)
index 0000000..b8272e6
--- /dev/null
@@ -0,0 +1,319 @@
+/* Leap second stress test
+ *              by: John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright 2013, 2015 Linaro Limited
+ *              Licensed under the GPLv2
+ *
+ *  This test signals the kernel to insert a leap second
+ *  every day at midnight GMT. This allows for stessing the
+ *  kernel's leap-second behavior, as well as how well applications
+ *  handle the leap-second discontinuity.
+ *
+ *  Usage: leap-a-day [-s] [-i <num>]
+ *
+ *  Options:
+ *     -s:     Each iteration, set the date to 10 seconds before midnight GMT.
+ *             This speeds up the number of leapsecond transitions tested,
+ *             but because it calls settimeofday frequently, advancing the
+ *             time by 24 hours every ~16 seconds, it may cause application
+ *             disruption.
+ *
+ *     -i:     Number of iterations to run (default: infinite)
+ *
+ *  Other notes: Disabling NTP prior to running this is advised, as the two
+ *              may conflict in their commands to the kernel.
+ *
+ *  To build:
+ *     $ gcc leap-a-day.c -o leap-a-day -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+#define CLOCK_TAI 11
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+       if (a.tv_sec < b.tv_sec)
+               return 1;
+       if (a.tv_sec > b.tv_sec)
+               return 0;
+       if (a.tv_nsec > b.tv_nsec)
+               return 0;
+       return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+       ts.tv_nsec += ns;
+       while (ts.tv_nsec >= NSEC_PER_SEC) {
+               ts.tv_nsec -= NSEC_PER_SEC;
+               ts.tv_sec++;
+       }
+       return ts;
+}
+
+char *time_state_str(int state)
+{
+       switch (state) {
+       case TIME_OK:   return "TIME_OK";
+       case TIME_INS:  return "TIME_INS";
+       case TIME_DEL:  return "TIME_DEL";
+       case TIME_OOP:  return "TIME_OOP";
+       case TIME_WAIT: return "TIME_WAIT";
+       case TIME_BAD:  return "TIME_BAD";
+       }
+       return "ERROR";
+}
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+       struct timex tx;
+       int ret;
+
+       /*
+        * We have to call adjtime twice here, as kernels
+        * prior to 6b1859dba01c7 (included in 3.5 and
+        * -stable), had an issue with the state machine
+        * and wouldn't clear the STA_INS/DEL flag directly.
+        */
+       tx.modes = ADJ_STATUS;
+       tx.status = STA_PLL;
+       ret = adjtimex(&tx);
+
+       /* Clear maxerror, as it can cause UNSYNC to be set */
+       tx.modes = ADJ_MAXERROR;
+       tx.maxerror = 0;
+       ret = adjtimex(&tx);
+
+       /* Clear the status */
+       tx.modes = ADJ_STATUS;
+       tx.status = 0;
+       ret = adjtimex(&tx);
+
+       return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+       clear_time_state();
+       exit(0);
+}
+
+/* Test for known hrtimer failure */
+void test_hrtimer_failure(void)
+{
+       struct timespec now, target;
+
+       clock_gettime(CLOCK_REALTIME, &now);
+       target = timespec_add(now, NSEC_PER_SEC/2);
+       clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
+       clock_gettime(CLOCK_REALTIME, &now);
+
+       if (!in_order(target, now))
+               printf("ERROR: hrtimer early expiration failure observed.\n");
+}
+
+int main(int argc, char **argv)
+{
+       int settime = 0;
+       int tai_time = 0;
+       int insert = 1;
+       int iterations = -1;
+       int opt;
+
+       /* Process arguments */
+       while ((opt = getopt(argc, argv, "sti:")) != -1) {
+               switch (opt) {
+               case 's':
+                       printf("Setting time to speed up testing\n");
+                       settime = 1;
+                       break;
+               case 'i':
+                       iterations = atoi(optarg);
+                       break;
+               case 't':
+                       tai_time = 1;
+                       break;
+               default:
+                       printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]);
+                       printf("        -s: Set time to right before leap second each iteration\n");
+                       printf("        -i: Number of iterations\n");
+                       printf("        -t: Print TAI time\n");
+                       exit(-1);
+               }
+       }
+
+       /* Make sure TAI support is present if -t was used */
+       if (tai_time) {
+               struct timespec ts;
+
+               if (clock_gettime(CLOCK_TAI, &ts)) {
+                       printf("System doesn't support CLOCK_TAI\n");
+                       ksft_exit_fail();
+               }
+       }
+
+       signal(SIGINT, handler);
+       signal(SIGKILL, handler);
+
+       if (iterations < 0)
+               printf("This runs continuously. Press ctrl-c to stop\n");
+       else
+               printf("Running for %i iterations. Press ctrl-c to stop\n", iterations);
+
+       printf("\n");
+       while (1) {
+               int ret;
+               struct timespec ts;
+               struct timex tx;
+               time_t now, next_leap;
+
+               /* Get the current time */
+               clock_gettime(CLOCK_REALTIME, &ts);
+
+               /* Calculate the next possible leap second 23:59:60 GMT */
+               next_leap = ts.tv_sec;
+               next_leap += 86400 - (next_leap % 86400);
+
+               if (settime) {
+                       struct timeval tv;
+
+                       tv.tv_sec = next_leap - 10;
+                       tv.tv_usec = 0;
+                       settimeofday(&tv, NULL);
+                       printf("Setting time to %s", ctime(&tv.tv_sec));
+               }
+
+               /* Reset NTP time state */
+               clear_time_state();
+
+               /* Set the leap second insert flag */
+               tx.modes = ADJ_STATUS;
+               if (insert)
+                       tx.status = STA_INS;
+               else
+                       tx.status = STA_DEL;
+               ret = adjtimex(&tx);
+               if (ret < 0) {
+                       printf("Error: Problem setting STA_INS/STA_DEL!: %s\n",
+                                                       time_state_str(ret));
+                       return ksft_exit_fail();
+               }
+
+               /* Validate STA_INS was set */
+               tx.modes = 0;
+               ret = adjtimex(&tx);
+               if (tx.status != STA_INS && tx.status != STA_DEL) {
+                       printf("Error: STA_INS/STA_DEL not set!: %s\n",
+                                                       time_state_str(ret));
+                       return ksft_exit_fail();
+               }
+
+               if (tai_time) {
+                       printf("Using TAI time,"
+                               " no inconsistencies should be seen!\n");
+               }
+
+               printf("Scheduling leap second for %s", ctime(&next_leap));
+
+               /* Wake up 3 seconds before leap */
+               ts.tv_sec = next_leap - 3;
+               ts.tv_nsec = 0;
+
+               while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
+                       printf("Something woke us up, returning to sleep\n");
+
+               /* Validate STA_INS is still set */
+               tx.modes = 0;
+               ret = adjtimex(&tx);
+               if (tx.status != STA_INS && tx.status != STA_DEL) {
+                       printf("Something cleared STA_INS/STA_DEL, setting it again.\n");
+                       tx.modes = ADJ_STATUS;
+                       if (insert)
+                               tx.status = STA_INS;
+                       else
+                               tx.status = STA_DEL;
+                       ret = adjtimex(&tx);
+               }
+
+               /* Check adjtimex output every half second */
+               now = tx.time.tv_sec;
+               while (now < next_leap + 2) {
+                       char buf[26];
+                       struct timespec tai;
+
+                       tx.modes = 0;
+                       ret = adjtimex(&tx);
+
+                       if (tai_time) {
+                               clock_gettime(CLOCK_TAI, &tai);
+                               printf("%ld sec, %9ld ns\t%s\n",
+                                               tai.tv_sec,
+                                               tai.tv_nsec,
+                                               time_state_str(ret));
+                       } else {
+                               ctime_r(&tx.time.tv_sec, buf);
+                               buf[strlen(buf)-1] = 0; /*remove trailing\n */
+
+                               printf("%s + %6ld us (%i)\t%s\n",
+                                               buf,
+                                               tx.time.tv_usec,
+                                               tx.tai,
+                                               time_state_str(ret));
+                       }
+                       now = tx.time.tv_sec;
+                       /* Sleep for another half second */
+                       ts.tv_sec = 0;
+                       ts.tv_nsec = NSEC_PER_SEC / 2;
+                       clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
+               }
+               /* Switch to using other mode */
+               insert = !insert;
+
+               /* Note if kernel has known hrtimer failure */
+               test_hrtimer_failure();
+
+               printf("Leap complete\n\n");
+
+               if ((iterations != -1) && !(--iterations))
+                       break;
+       }
+
+       clear_time_state();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
new file mode 100644 (file)
index 0000000..a1071bd
--- /dev/null
@@ -0,0 +1,120 @@
+/* Demo leapsecond deadlock
+ *              by: John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright 2013, 2015 Linaro Limited
+ *              Licensed under the GPL
+ *
+ * This test demonstrates leapsecond deadlock that is possibe
+ * on kernels from 2.6.26 to 3.3.
+ *
+ * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * RUN AT YOUR OWN RISK!
+ *  To build:
+ *     $ gcc leapcrash.c -o leapcrash -lrt
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+       struct timex tx;
+       int ret;
+
+       /*
+        * We have to call adjtime twice here, as kernels
+        * prior to 6b1859dba01c7 (included in 3.5 and
+        * -stable), had an issue with the state machine
+        * and wouldn't clear the STA_INS/DEL flag directly.
+        */
+       tx.modes = ADJ_STATUS;
+       tx.status = STA_PLL;
+       ret = adjtimex(&tx);
+
+       tx.modes = ADJ_STATUS;
+       tx.status = 0;
+       ret = adjtimex(&tx);
+
+       return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+       clear_time_state();
+       exit(0);
+}
+
+
+int main(void)
+{
+       struct timex tx;
+       struct timespec ts;
+       time_t next_leap;
+       int count = 0;
+
+       setbuf(stdout, NULL);
+
+       signal(SIGINT, handler);
+       signal(SIGKILL, handler);
+       printf("This runs for a few minutes. Press ctrl-c to stop\n");
+
+       clear_time_state();
+
+
+       /* Get the current time */
+       clock_gettime(CLOCK_REALTIME, &ts);
+
+       /* Calculate the next possible leap second 23:59:60 GMT */
+       next_leap = ts.tv_sec;
+       next_leap += 86400 - (next_leap % 86400);
+
+       for (count = 0; count < 20; count++) {
+               struct timeval tv;
+
+
+               /* set the time to 2 seconds before the leap */
+               tv.tv_sec = next_leap - 2;
+               tv.tv_usec = 0;
+               if (settimeofday(&tv, NULL)) {
+                       printf("Error: You're likely not running with proper (ie: root) permissions\n");
+                       return ksft_exit_fail();
+               }
+               tx.modes = 0;
+               adjtimex(&tx);
+
+               /* hammer on adjtime w/ STA_INS */
+               while (tx.time.tv_sec < next_leap + 1) {
+                       /* Set the leap second insert flag */
+                       tx.modes = ADJ_STATUS;
+                       tx.status = STA_INS;
+                       adjtimex(&tx);
+               }
+               clear_time_state();
+               printf(".");
+       }
+       printf("[OK]\n");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
new file mode 100644 (file)
index 0000000..a2a3924
--- /dev/null
@@ -0,0 +1,124 @@
+/* Measure mqueue timeout latency
+ *              by: john stultz (john.stultz@linaro.org)
+ *             (C) Copyright Linaro 2013
+ *
+ *             Inspired with permission from example test by:
+ *                     Romain Francoise <romain@orebokech.com>
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc mqueue-lat.c -o mqueue-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <mqueue.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define TARGET_TIMEOUT         100000000       /* 100ms in nanoseconds */
+#define UNRESONABLE_LATENCY    40000000        /* 40ms in nanosecs */
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+       long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+       ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+       return ret;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+       ts.tv_nsec += ns;
+       while (ts.tv_nsec >= NSEC_PER_SEC) {
+               ts.tv_nsec -= NSEC_PER_SEC;
+               ts.tv_sec++;
+       }
+       return ts;
+}
+
+int mqueue_lat_test(void)
+{
+
+       mqd_t q;
+       struct mq_attr attr;
+       struct timespec start, end, now, target;
+       int i, count, ret;
+
+       q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL);
+       if (q < 0) {
+               perror("mq_open");
+               return -1;
+       }
+       mq_getattr(q, &attr);
+
+
+       count = 100;
+       clock_gettime(CLOCK_MONOTONIC, &start);
+
+       for (i = 0; i < count; i++) {
+               char buf[attr.mq_msgsize];
+
+               clock_gettime(CLOCK_REALTIME, &now);
+               target = now;
+               target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */
+
+               ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target);
+               if (ret < 0 && errno != ETIMEDOUT) {
+                       perror("mq_timedreceive");
+                       return -1;
+               }
+       }
+       clock_gettime(CLOCK_MONOTONIC, &end);
+
+       mq_close(q);
+
+       if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY)
+               return -1;
+
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       int ret;
+
+       printf("Mqueue latency :                          ");
+
+       ret = mqueue_lat_test();
+       if (ret < 0) {
+               printf("[FAILED]\n");
+               return ksft_exit_fail();
+       }
+       printf("[OK]\n");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
new file mode 100644 (file)
index 0000000..8a3c29d
--- /dev/null
@@ -0,0 +1,174 @@
+/* Make sure timers don't return early
+ *              by: john stultz (johnstul@us.ibm.com)
+ *                 John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright Linaro 2013 2015
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc nanosleep.c -o nanosleep -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+       if (a.tv_sec < b.tv_sec)
+               return 1;
+       if (a.tv_sec > b.tv_sec)
+               return 0;
+       if (a.tv_nsec > b.tv_nsec)
+               return 0;
+       return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+       ts.tv_nsec += ns;
+       while (ts.tv_nsec >= NSEC_PER_SEC) {
+               ts.tv_nsec -= NSEC_PER_SEC;
+               ts.tv_sec++;
+       }
+       return ts;
+}
+
+int nanosleep_test(int clockid, long long ns)
+{
+       struct timespec now, target, rel;
+
+       /* First check abs time */
+       if (clock_gettime(clockid, &now))
+               return UNSUPPORTED;
+       target = timespec_add(now, ns);
+
+       if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL))
+               return UNSUPPORTED;
+       clock_gettime(clockid, &now);
+
+       if (!in_order(target, now))
+               return -1;
+
+       /* Second check reltime */
+       clock_gettime(clockid, &now);
+       rel.tv_sec = 0;
+       rel.tv_nsec = 0;
+       rel = timespec_add(rel, ns);
+       target = timespec_add(now, ns);
+       clock_nanosleep(clockid, 0, &rel, NULL);
+       clock_gettime(clockid, &now);
+
+       if (!in_order(target, now))
+               return -1;
+       return 0;
+}
+
+int main(int argc, char **argv)
+{
+       long long length;
+       int clockid, ret;
+
+       for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+               /* Skip cputime clockids since nanosleep won't increment cputime */
+               if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+                               clockid == CLOCK_THREAD_CPUTIME_ID ||
+                               clockid == CLOCK_HWSPECIFIC)
+                       continue;
+
+               printf("Nanosleep %-31s ", clockstring(clockid));
+
+               length = 10;
+               while (length <= (NSEC_PER_SEC * 10)) {
+                       ret = nanosleep_test(clockid, length);
+                       if (ret == UNSUPPORTED) {
+                               printf("[UNSUPPORTED]\n");
+                               goto next;
+                       }
+                       if (ret < 0) {
+                               printf("[FAILED]\n");
+                               return ksft_exit_fail();
+                       }
+                       length *= 100;
+               }
+               printf("[OK]\n");
+next:
+               ret = 0;
+       }
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
new file mode 100644 (file)
index 0000000..2d7898f
--- /dev/null
@@ -0,0 +1,190 @@
+/* Measure nanosleep timer latency
+ *              by: john stultz (john.stultz@linaro.org)
+ *             (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc nsleep-lat.c -o nsleep-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+       ts.tv_nsec += ns;
+       while (ts.tv_nsec >= NSEC_PER_SEC) {
+               ts.tv_nsec -= NSEC_PER_SEC;
+               ts.tv_sec++;
+       }
+       return ts;
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+       long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+       ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+       return ret;
+}
+
+int nanosleep_lat_test(int clockid, long long ns)
+{
+       struct timespec start, end, target;
+       long long latency = 0;
+       int i, count;
+
+       target.tv_sec = ns/NSEC_PER_SEC;
+       target.tv_nsec = ns%NSEC_PER_SEC;
+
+       if (clock_gettime(clockid, &start))
+               return UNSUPPORTED;
+       if (clock_nanosleep(clockid, 0, &target, NULL))
+               return UNSUPPORTED;
+
+       count = 10;
+
+       /* First check relative latency */
+       clock_gettime(clockid, &start);
+       for (i = 0; i < count; i++)
+               clock_nanosleep(clockid, 0, &target, NULL);
+       clock_gettime(clockid, &end);
+
+       if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
+               printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+               return -1;
+       }
+
+       /* Next check absolute latency */
+       for (i = 0; i < count; i++) {
+               clock_gettime(clockid, &start);
+               target = timespec_add(start, ns);
+               clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL);
+               clock_gettime(clockid, &end);
+               latency += timespec_sub(target, end);
+       }
+
+       if (latency/count > UNRESONABLE_LATENCY) {
+               printf("Large abs latency: %lld ns :", latency/count);
+               return -1;
+       }
+
+       return 0;
+}
+
+
+
+int main(int argc, char **argv)
+{
+       long long length;
+       int clockid, ret;
+
+       for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+               /* Skip cputime clockids since nanosleep won't increment cputime */
+               if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+                               clockid == CLOCK_THREAD_CPUTIME_ID ||
+                               clockid == CLOCK_HWSPECIFIC)
+                       continue;
+
+               printf("nsleep latency %-26s ", clockstring(clockid));
+
+               length = 10;
+               while (length <= (NSEC_PER_SEC * 10)) {
+                       ret = nanosleep_lat_test(clockid, length);
+                       if (ret)
+                               break;
+                       length *= 100;
+
+               }
+
+               if (ret == UNSUPPORTED) {
+                       printf("[UNSUPPORTED]\n");
+                       continue;
+               }
+               if (ret < 0) {
+                       printf("[FAILED]\n");
+                       return ksft_exit_fail();
+               }
+               printf("[OK]\n");
+       }
+       return ksft_exit_pass();
+}
index f87d970..5a246a0 100644 (file)
@@ -35,10 +35,11 @@ static void user_loop(void)
 static void kernel_loop(void)
 {
        void *addr = sbrk(0);
+       int err = 0;
 
-       while (!done) {
-               brk(addr + 4096);
-               brk(addr);
+       while (!done && !err) {
+               err = brk(addr + 4096);
+               err |= brk(addr);
        }
 }
 
@@ -190,8 +191,6 @@ static int check_timer_create(int which)
 
 int main(int argc, char **argv)
 {
-       int err;
-
        printf("Testing posix timers. False negative may happen on CPU execution \n");
        printf("based timers if other threads run on the CPU...\n");
 
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
new file mode 100644 (file)
index 0000000..30906bf
--- /dev/null
@@ -0,0 +1,154 @@
+/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *                 John Stultz <john.stultz@linaro.org>
+ *             (C) Copyright IBM 2012
+ *             (C) Copyright Linaro Limited 2015
+ *             Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc raw_skew.c -o raw_skew -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+
+#define CLOCK_MONOTONIC_RAW            4
+#define NSEC_PER_SEC 1000000000LL
+
+#define shift_right(x, s) ({           \
+       __typeof__(x) __x = (x);        \
+       __typeof__(s) __s = (s);        \
+       __x < 0 ? -(-__x >> __s) : __x >> __s; \
+})
+
+long long llabs(long long val)
+{
+       if (val < 0)
+               val = -val;
+       return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+       return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+       struct timespec ts;
+
+       ts.tv_sec = ns/NSEC_PER_SEC;
+       ts.tv_nsec = ns%NSEC_PER_SEC;
+       return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+       long long start_ns, end_ns;
+
+       start_ns = ts_to_nsec(start);
+       end_ns = ts_to_nsec(end);
+       return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+       struct timespec start, mid, end;
+       long long diff = 0, tmp;
+       int i;
+
+       for (i = 0; i < 3; i++) {
+               long long newdiff;
+
+               clock_gettime(CLOCK_MONOTONIC, &start);
+               clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+               clock_gettime(CLOCK_MONOTONIC, &end);
+
+               newdiff = diff_timespec(start, end);
+               if (diff == 0 || newdiff < diff) {
+                       diff = newdiff;
+                       *raw = mid;
+                       tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+                       *mon = nsec_to_ts(tmp);
+               }
+       }
+}
+
+int main(int argv, char **argc)
+{
+       struct timespec mon, raw, start, end;
+       long long delta1, delta2, interval, eppm, ppm;
+       struct timex tx1, tx2;
+
+       setbuf(stdout, NULL);
+
+       if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+               printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+               return -1;
+       }
+
+       tx1.modes = 0;
+       adjtimex(&tx1);
+       get_monotonic_and_raw(&mon, &raw);
+       start = mon;
+       delta1 = diff_timespec(mon, raw);
+
+       if (tx1.offset)
+               printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n");
+
+       printf("Estimating clock drift: ");
+       sleep(120);
+
+       get_monotonic_and_raw(&mon, &raw);
+       end = mon;
+       tx2.modes = 0;
+       adjtimex(&tx2);
+       delta2 = diff_timespec(mon, raw);
+
+       interval = diff_timespec(start, end);
+
+       /* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */
+       eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval;
+       eppm = -eppm;
+       printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
+
+       /* Avg the two actual freq samples adjtimex gave us */
+       ppm = (tx1.freq + tx2.freq) * 1000 / 2;
+       ppm = (long long)tx1.freq * 1000;
+       ppm = shift_right(ppm, 16);
+       printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
+
+       if (llabs(eppm - ppm) > 1000) {
+               printf("        [FAILED]\n");
+               return ksft_exit_fail();
+       }
+       printf("        [OK]\n");
+       return  ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c
new file mode 100644 (file)
index 0000000..d80ae85
--- /dev/null
@@ -0,0 +1,271 @@
+/*
+ *      Real Time Clock Driver Test/Example Program
+ *
+ *      Compile with:
+ *                  gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest
+ *
+ *      Copyright (C) 1996, Paul Gortmaker.
+ *
+ *      Released under the GNU General Public License, version 2,
+ *      included herein by reference.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+
+int main(int argc, char **argv)
+{
+       int i, fd, retval, irqcount = 0;
+       unsigned long tmp, data;
+       struct rtc_time rtc_tm;
+       const char *rtc = default_rtc;
+       struct timeval start, end, diff;
+
+       switch (argc) {
+       case 2:
+               rtc = argv[1];
+               /* FALLTHROUGH */
+       case 1:
+               break;
+       default:
+               fprintf(stderr, "usage:  rtctest [rtcdev]\n");
+               return 1;
+       }
+
+       fd = open(rtc, O_RDONLY);
+
+       if (fd ==  -1) {
+               perror(rtc);
+               exit(errno);
+       }
+
+       fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n");
+
+       /* Turn on update interrupts (one per second) */
+       retval = ioctl(fd, RTC_UIE_ON, 0);
+       if (retval == -1) {
+               if (errno == ENOTTY) {
+                       fprintf(stderr,
+                               "\n...Update IRQs not supported.\n");
+                       goto test_READ;
+               }
+               perror("RTC_UIE_ON ioctl");
+               exit(errno);
+       }
+
+       fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:",
+                       rtc);
+       fflush(stderr);
+       for (i=1; i<6; i++) {
+               /* This read will block */
+               retval = read(fd, &data, sizeof(unsigned long));
+               if (retval == -1) {
+                       perror("read");
+                       exit(errno);
+               }
+               fprintf(stderr, " %d",i);
+               fflush(stderr);
+               irqcount++;
+       }
+
+       fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:");
+       fflush(stderr);
+       for (i=1; i<6; i++) {
+               struct timeval tv = {5, 0};     /* 5 second timeout on select */
+               fd_set readfds;
+
+               FD_ZERO(&readfds);
+               FD_SET(fd, &readfds);
+               /* The select will wait until an RTC interrupt happens. */
+               retval = select(fd+1, &readfds, NULL, NULL, &tv);
+               if (retval == -1) {
+                       perror("select");
+                       exit(errno);
+               }
+               /* This read won't block unlike the select-less case above. */
+               retval = read(fd, &data, sizeof(unsigned long));
+               if (retval == -1) {
+                       perror("read");
+                       exit(errno);
+               }
+               fprintf(stderr, " %d",i);
+               fflush(stderr);
+               irqcount++;
+       }
+
+       /* Turn off update interrupts */
+       retval = ioctl(fd, RTC_UIE_OFF, 0);
+       if (retval == -1) {
+               perror("RTC_UIE_OFF ioctl");
+               exit(errno);
+       }
+
+test_READ:
+       /* Read the RTC time/date */
+       retval = ioctl(fd, RTC_RD_TIME, &rtc_tm);
+       if (retval == -1) {
+               perror("RTC_RD_TIME ioctl");
+               exit(errno);
+       }
+
+       fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+               rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+               rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+
+       /* Set the alarm to 5 sec in the future, and check for rollover */
+       rtc_tm.tm_sec += 5;
+       if (rtc_tm.tm_sec >= 60) {
+               rtc_tm.tm_sec %= 60;
+               rtc_tm.tm_min++;
+       }
+       if (rtc_tm.tm_min == 60) {
+               rtc_tm.tm_min = 0;
+               rtc_tm.tm_hour++;
+       }
+       if (rtc_tm.tm_hour == 24)
+               rtc_tm.tm_hour = 0;
+
+       retval = ioctl(fd, RTC_ALM_SET, &rtc_tm);
+       if (retval == -1) {
+               if (errno == ENOTTY) {
+                       fprintf(stderr,
+                               "\n...Alarm IRQs not supported.\n");
+                       goto test_PIE;
+               }
+               perror("RTC_ALM_SET ioctl");
+               exit(errno);
+       }
+
+       /* Read the current alarm settings */
+       retval = ioctl(fd, RTC_ALM_READ, &rtc_tm);
+       if (retval == -1) {
+               perror("RTC_ALM_READ ioctl");
+               exit(errno);
+       }
+
+       fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n",
+               rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+
+       /* Enable alarm interrupts */
+       retval = ioctl(fd, RTC_AIE_ON, 0);
+       if (retval == -1) {
+               perror("RTC_AIE_ON ioctl");
+               exit(errno);
+       }
+
+       fprintf(stderr, "Waiting 5 seconds for alarm...");
+       fflush(stderr);
+       /* This blocks until the alarm ring causes an interrupt */
+       retval = read(fd, &data, sizeof(unsigned long));
+       if (retval == -1) {
+               perror("read");
+               exit(errno);
+       }
+       irqcount++;
+       fprintf(stderr, " okay. Alarm rang.\n");
+
+       /* Disable alarm interrupts */
+       retval = ioctl(fd, RTC_AIE_OFF, 0);
+       if (retval == -1) {
+               perror("RTC_AIE_OFF ioctl");
+               exit(errno);
+       }
+
+test_PIE:
+       /* Read periodic IRQ rate */
+       retval = ioctl(fd, RTC_IRQP_READ, &tmp);
+       if (retval == -1) {
+               /* not all RTCs support periodic IRQs */
+               if (errno == ENOTTY) {
+                       fprintf(stderr, "\nNo periodic IRQ support\n");
+                       goto done;
+               }
+               perror("RTC_IRQP_READ ioctl");
+               exit(errno);
+       }
+       fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp);
+
+       fprintf(stderr, "Counting 20 interrupts at:");
+       fflush(stderr);
+
+       /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+       for (tmp=2; tmp<=64; tmp*=2) {
+
+               retval = ioctl(fd, RTC_IRQP_SET, tmp);
+               if (retval == -1) {
+                       /* not all RTCs can change their periodic IRQ rate */
+                       if (errno == ENOTTY) {
+                               fprintf(stderr,
+                                       "\n...Periodic IRQ rate is fixed\n");
+                               goto done;
+                       }
+                       perror("RTC_IRQP_SET ioctl");
+                       exit(errno);
+               }
+
+               fprintf(stderr, "\n%ldHz:\t", tmp);
+               fflush(stderr);
+
+               /* Enable periodic interrupts */
+               retval = ioctl(fd, RTC_PIE_ON, 0);
+               if (retval == -1) {
+                       perror("RTC_PIE_ON ioctl");
+                       exit(errno);
+               }
+
+               for (i=1; i<21; i++) {
+                       gettimeofday(&start, NULL);
+                       /* This blocks */
+                       retval = read(fd, &data, sizeof(unsigned long));
+                       if (retval == -1) {
+                               perror("read");
+                               exit(errno);
+                       }
+                       gettimeofday(&end, NULL);
+                       timersub(&end, &start, &diff);
+                       if (diff.tv_sec > 0 ||
+                           diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+                               fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+                                      diff.tv_sec, diff.tv_usec,
+                                      (1000000L / tmp));
+                               fflush(stdout);
+                               exit(-1);
+                       }
+
+                       fprintf(stderr, " %d",i);
+                       fflush(stderr);
+                       irqcount++;
+               }
+
+               /* Disable periodic interrupts */
+               retval = ioctl(fd, RTC_PIE_OFF, 0);
+               if (retval == -1) {
+                       perror("RTC_PIE_OFF ioctl");
+                       exit(errno);
+               }
+       }
+
+done:
+       fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+       close(fd);
+
+       return 0;
+}
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
new file mode 100644 (file)
index 0000000..c8a7e14
--- /dev/null
@@ -0,0 +1,144 @@
+/* Time bounds setting test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2012
+ *             Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which sets the time to edge cases then
+ *  uses other tests to detect problems. Thus this test requires that
+ *  the inconsistency-check and nanosleep tests be present in the same
+ *  directory it is run from.
+ *
+ *  To build:
+ *     $ gcc set-2038.c -o set-2038 -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+#define KTIME_MAX      ((long long)~((unsigned long long)1 << 63))
+#define KTIME_SEC_MAX  (KTIME_MAX / NSEC_PER_SEC)
+
+#define YEAR_1901 (-0x7fffffffL)
+#define YEAR_1970 1
+#define YEAR_2038 0x7fffffffL                  /*overflows 32bit time_t */
+#define YEAR_2262 KTIME_SEC_MAX                        /*overflows 64bit ktime_t */
+#define YEAR_MAX  ((long long)((1ULL<<63)-1))  /*overflows 64bit time_t */
+
+int is32bits(void)
+{
+       return (sizeof(long) == 4);
+}
+
+int settime(long long time)
+{
+       struct timeval now;
+       int ret;
+
+       now.tv_sec = (time_t)time;
+       now.tv_usec  = 0;
+
+       ret = settimeofday(&now, NULL);
+
+       printf("Setting time to 0x%lx: %d\n", (long)time, ret);
+       return ret;
+}
+
+int do_tests(void)
+{
+       int ret;
+
+       ret = system("date");
+       ret = system("./inconsistency-check -c 0 -t 20");
+       ret |= system("./nanosleep");
+       ret |= system("./nsleep-lat");
+       return ret;
+
+}
+
+int main(int argc, char *argv[])
+{
+       int ret = 0;
+       int opt, dangerous = 0;
+       time_t start;
+
+       /* Process arguments */
+       while ((opt = getopt(argc, argv, "d")) != -1) {
+               switch (opt) {
+               case 'd':
+                       dangerous = 1;
+               }
+       }
+
+       start = time(0);
+
+       /* First test that crazy values don't work */
+       if (!settime(YEAR_1901)) {
+               ret = -1;
+               goto out;
+       }
+       if (!settime(YEAR_MAX)) {
+               ret = -1;
+               goto out;
+       }
+       if (!is32bits() && !settime(YEAR_2262)) {
+               ret = -1;
+               goto out;
+       }
+
+       /* Now test behavior near edges */
+       settime(YEAR_1970);
+       ret = do_tests();
+       if (ret)
+               goto out;
+
+       settime(YEAR_2038 - 600);
+       ret = do_tests();
+       if (ret)
+               goto out;
+
+       /* The rest of the tests can blowup on 32bit systems */
+       if (is32bits() && !dangerous)
+               goto out;
+       /* Test rollover behavior 32bit edge */
+       settime(YEAR_2038 - 10);
+       ret = do_tests();
+       if (ret)
+               goto out;
+
+       settime(YEAR_2262 - 600);
+       ret = do_tests();
+
+out:
+       /* restore clock */
+       settime(start);
+       if (ret)
+               return ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
new file mode 100644 (file)
index 0000000..dc88dbc
--- /dev/null
@@ -0,0 +1,79 @@
+/* Set tai offset
+ *              by: John Stultz <john.stultz@linaro.org>
+ *              (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+int set_tai(int offset)
+{
+       struct timex tx;
+
+       memset(&tx, 0, sizeof(tx));
+
+       tx.modes = ADJ_TAI;
+       tx.constant = offset;
+
+       return adjtimex(&tx);
+}
+
+int get_tai(void)
+{
+       struct timex tx;
+
+       memset(&tx, 0, sizeof(tx));
+
+       adjtimex(&tx);
+       return tx.tai;
+}
+
+int main(int argc, char **argv)
+{
+       int i, ret;
+
+       ret = get_tai();
+       printf("tai offset started at %i\n", ret);
+
+       printf("Checking tai offsets can be properly set: ");
+       for (i = 1; i <= 60; i++) {
+               ret = set_tai(i);
+               ret = get_tai();
+               if (ret != i) {
+                       printf("[FAILED] expected: %i got %i\n", i, ret);
+                       return ksft_exit_fail();
+               }
+       }
+       printf("[OK]\n");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
new file mode 100644 (file)
index 0000000..4fc98c5
--- /dev/null
@@ -0,0 +1,216 @@
+/* set_timer latency test
+ *             John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright Linaro 2014
+ *              Licensed under the GPLv2
+ *
+ *   This test makes sure the set_timer api is correct
+ *
+ *  To build:
+ *     $ gcc set-timer-lat.c -o set-timer-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define CLOCK_REALTIME                 0
+#define CLOCK_MONOTONIC                        1
+#define CLOCK_PROCESS_CPUTIME_ID       2
+#define CLOCK_THREAD_CPUTIME_ID                3
+#define CLOCK_MONOTONIC_RAW            4
+#define CLOCK_REALTIME_COARSE          5
+#define CLOCK_MONOTONIC_COARSE         6
+#define CLOCK_BOOTTIME                 7
+#define CLOCK_REALTIME_ALARM           8
+#define CLOCK_BOOTTIME_ALARM           9
+#define CLOCK_HWSPECIFIC               10
+#define CLOCK_TAI                      11
+#define NR_CLOCKIDS                    12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+#define TIMER_SECS 1
+int alarmcount;
+int clock_id;
+struct timespec start_time;
+long long max_latency_ns;
+
+char *clockstring(int clockid)
+{
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               return "CLOCK_REALTIME";
+       case CLOCK_MONOTONIC:
+               return "CLOCK_MONOTONIC";
+       case CLOCK_PROCESS_CPUTIME_ID:
+               return "CLOCK_PROCESS_CPUTIME_ID";
+       case CLOCK_THREAD_CPUTIME_ID:
+               return "CLOCK_THREAD_CPUTIME_ID";
+       case CLOCK_MONOTONIC_RAW:
+               return "CLOCK_MONOTONIC_RAW";
+       case CLOCK_REALTIME_COARSE:
+               return "CLOCK_REALTIME_COARSE";
+       case CLOCK_MONOTONIC_COARSE:
+               return "CLOCK_MONOTONIC_COARSE";
+       case CLOCK_BOOTTIME:
+               return "CLOCK_BOOTTIME";
+       case CLOCK_REALTIME_ALARM:
+               return "CLOCK_REALTIME_ALARM";
+       case CLOCK_BOOTTIME_ALARM:
+               return "CLOCK_BOOTTIME_ALARM";
+       case CLOCK_TAI:
+               return "CLOCK_TAI";
+       };
+       return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+       long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+       ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+       return ret;
+}
+
+
+void sigalarm(int signo)
+{
+       long long delta_ns;
+       struct timespec ts;
+
+       clock_gettime(clock_id, &ts);
+       alarmcount++;
+
+       delta_ns = timespec_sub(start_time, ts);
+       delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount;
+
+       if (delta_ns < 0)
+               printf("%s timer fired early: FAIL\n", clockstring(clock_id));
+
+       if (delta_ns > max_latency_ns)
+               max_latency_ns = delta_ns;
+}
+
+int do_timer(int clock_id, int flags)
+{
+       struct sigevent se;
+       timer_t tm1;
+       struct itimerspec its1, its2;
+       int err;
+
+       /* Set up timer: */
+       memset(&se, 0, sizeof(se));
+       se.sigev_notify = SIGEV_SIGNAL;
+       se.sigev_signo = SIGRTMAX;
+       se.sigev_value.sival_int = 0;
+
+       max_latency_ns = 0;
+       alarmcount = 0;
+
+       err = timer_create(clock_id, &se, &tm1);
+       if (err) {
+               if ((clock_id == CLOCK_REALTIME_ALARM) ||
+                   (clock_id == CLOCK_BOOTTIME_ALARM)) {
+                       printf("%-22s %s missing CAP_WAKE_ALARM?    : [UNSUPPORTED]\n",
+                                       clockstring(clock_id),
+                                       flags ? "ABSTIME":"RELTIME");
+                       return 0;
+               }
+               printf("%s - timer_create() failed\n", clockstring(clock_id));
+               return -1;
+       }
+
+       clock_gettime(clock_id, &start_time);
+       if (flags) {
+               its1.it_value = start_time;
+               its1.it_value.tv_sec += TIMER_SECS;
+       } else {
+               its1.it_value.tv_sec = TIMER_SECS;
+               its1.it_value.tv_nsec = 0;
+       }
+       its1.it_interval.tv_sec = TIMER_SECS;
+       its1.it_interval.tv_nsec = 0;
+
+       err = timer_settime(tm1, flags, &its1, &its2);
+       if (err) {
+               printf("%s - timer_settime() failed\n", clockstring(clock_id));
+               return -1;
+       }
+
+       while (alarmcount < 5)
+               sleep(1);
+
+       printf("%-22s %s max latency: %10lld ns : ",
+                       clockstring(clock_id),
+                       flags ? "ABSTIME":"RELTIME",
+                       max_latency_ns);
+
+       timer_delete(tm1);
+       if (max_latency_ns < UNRESONABLE_LATENCY) {
+               printf("[OK]\n");
+               return 0;
+       }
+       printf("[FAILED]\n");
+       return -1;
+}
+
+int main(void)
+{
+       struct sigaction act;
+       int signum = SIGRTMAX;
+       int ret = 0;
+
+       /* Set up signal handler: */
+       sigfillset(&act.sa_mask);
+       act.sa_flags = 0;
+       act.sa_handler = sigalarm;
+       sigaction(signum, &act, NULL);
+
+       printf("Setting timers for every %i seconds\n", TIMER_SECS);
+       for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) {
+
+               if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) ||
+                               (clock_id == CLOCK_THREAD_CPUTIME_ID) ||
+                               (clock_id == CLOCK_MONOTONIC_RAW) ||
+                               (clock_id == CLOCK_REALTIME_COARSE) ||
+                               (clock_id == CLOCK_MONOTONIC_COARSE) ||
+                               (clock_id == CLOCK_HWSPECIFIC))
+                       continue;
+
+               ret |= do_timer(clock_id, TIMER_ABSTIME);
+               ret |= do_timer(clock_id, 0);
+       }
+       if (ret)
+               return ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
new file mode 100644 (file)
index 0000000..5562f84
--- /dev/null
@@ -0,0 +1,89 @@
+/* ADJ_FREQ Skew consistency test
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2012
+ *             Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which cranks the ADJ_FREQ knob back
+ *  and forth and watches for consistency problems. Thus this test requires
+ *  that the inconsistency-check tests be present in the same directory it
+ *  is run from.
+ *
+ *  To build:
+ *     $ gcc skew_consistency.c -o skew_consistency -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000LL
+
+int main(int argv, char **argc)
+{
+       struct timex tx;
+       int ret, ppm;
+       pid_t pid;
+
+
+       printf("Running Asyncrhonous Frequency Changing Tests...\n");
+
+       pid = fork();
+       if (!pid)
+               return system("./inconsistency-check -c 1 -t 600");
+
+       ppm = 500;
+       ret = 0;
+
+       while (pid != waitpid(pid, &ret, WNOHANG)) {
+               ppm = -ppm;
+               tx.modes = ADJ_FREQUENCY;
+               tx.freq = ppm << 16;
+               adjtimex(&tx);
+               usleep(500000);
+       }
+
+       /* Set things back */
+       tx.modes = ADJ_FREQUENCY;
+       tx.offset = 0;
+       adjtimex(&tx);
+
+
+       if (ret) {
+               printf("[FAILED]\n");
+               return ksft_exit_fail();
+       }
+       printf("[OK]\n");
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
new file mode 100644 (file)
index 0000000..e632e11
--- /dev/null
@@ -0,0 +1,204 @@
+/* threadtest.c
+ *             by: john stultz (johnstul@us.ibm.com)
+ *             (C) Copyright IBM 2004, 2005, 2006, 2012
+ *             Licensed under the GPLv2
+ *
+ *  To build:
+ *     $ gcc threadtest.c -o threadtest -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+
+/* serializes shared list access */
+pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
+/* serializes console output */
+pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+#define MAX_THREADS 128
+#define LISTSIZE 128
+
+int done = 0;
+
+struct timespec global_list[LISTSIZE];
+int listcount = 0;
+
+
+void checklist(struct timespec *list, int size)
+{
+       int i, j;
+       struct timespec *a, *b;
+
+       /* scan the list */
+       for (i = 0; i < size-1; i++) {
+               a = &list[i];
+               b = &list[i+1];
+
+               /* look for any time inconsistencies */
+               if ((b->tv_sec <= a->tv_sec) &&
+                       (b->tv_nsec < a->tv_nsec)) {
+
+                       /* flag other threads */
+                       done = 1;
+
+                       /*serialize printing to avoid junky output*/
+                       pthread_mutex_lock(&print_lock);
+
+                       /* dump the list */
+                       printf("\n");
+                       for (j = 0; j < size; j++) {
+                               if (j == i)
+                                       printf("---------------\n");
+                               printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec);
+                               if (j == i+1)
+                                       printf("---------------\n");
+                       }
+                       printf("[FAILED]\n");
+
+                       pthread_mutex_unlock(&print_lock);
+               }
+       }
+}
+
+/* The shared thread shares a global list
+ * that each thread fills while holding the lock.
+ * This stresses clock syncronization across cpus.
+ */
+void *shared_thread(void *arg)
+{
+       while (!done) {
+               /* protect the list */
+               pthread_mutex_lock(&list_lock);
+
+               /* see if we're ready to check the list */
+               if (listcount >= LISTSIZE) {
+                       checklist(global_list, LISTSIZE);
+                       listcount = 0;
+               }
+               clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]);
+
+               pthread_mutex_unlock(&list_lock);
+       }
+       return NULL;
+}
+
+
+/* Each independent thread fills in its own
+ * list. This stresses clock_gettime() lock contention.
+ */
+void *independent_thread(void *arg)
+{
+       struct timespec my_list[LISTSIZE];
+       int count;
+
+       while (!done) {
+               /* fill the list */
+               for (count = 0; count < LISTSIZE; count++)
+                       clock_gettime(CLOCK_MONOTONIC, &my_list[count]);
+               checklist(my_list, LISTSIZE);
+       }
+       return NULL;
+}
+
+#define DEFAULT_THREAD_COUNT 8
+#define DEFAULT_RUNTIME 30
+
+int main(int argc, char **argv)
+{
+       int thread_count, i;
+       time_t start, now, runtime;
+       char buf[255];
+       pthread_t pth[MAX_THREADS];
+       int opt;
+       void *tret;
+       int ret = 0;
+       void *(*thread)(void *) = shared_thread;
+
+       thread_count = DEFAULT_THREAD_COUNT;
+       runtime = DEFAULT_RUNTIME;
+
+       /* Process arguments */
+       while ((opt = getopt(argc, argv, "t:n:i")) != -1) {
+               switch (opt) {
+               case 't':
+                       runtime = atoi(optarg);
+                       break;
+               case 'n':
+                       thread_count = atoi(optarg);
+                       break;
+               case 'i':
+                       thread = independent_thread;
+                       printf("using independent threads\n");
+                       break;
+               default:
+                       printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]);
+                       printf("        -t: time to run\n");
+                       printf("        -n: number of threads\n");
+                       printf("        -i: use independent threads\n");
+                       return -1;
+               }
+       }
+
+       if (thread_count > MAX_THREADS)
+               thread_count = MAX_THREADS;
+
+
+       setbuf(stdout, NULL);
+
+       start = time(0);
+       strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start));
+       printf("%s\n", buf);
+       printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime);
+
+       /* spawn */
+       for (i = 0; i < thread_count; i++)
+               pthread_create(&pth[i], 0, thread, 0);
+
+       while (time(&now) < start + runtime) {
+               sleep(1);
+               if (done) {
+                       ret = 1;
+                       strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now));
+                       printf("%s\n", buf);
+                       goto out;
+               }
+       }
+       printf("[OK]\n");
+       done = 1;
+
+out:
+       /* wait */
+       for (i = 0; i < thread_count; i++)
+               pthread_join(pth[i], &tret);
+
+       /* die */
+       if (ret)
+               ksft_exit_fail();
+       return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
new file mode 100644 (file)
index 0000000..e86d937
--- /dev/null
@@ -0,0 +1,202 @@
+/* valid adjtimex test
+ *              by: John Stultz <john.stultz@linaro.org>
+ *              (C) Copyright Linaro 2015
+ *              Licensed under the GPLv2
+ *
+ *  This test validates adjtimex interface with valid
+ *  and invalid test data.
+ *
+ *  Usage: valid-adjtimex
+ *
+ *  To build:
+ *     $ gcc valid-adjtimex.c -o valid-adjtimex -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#ifdef KTEST
+#include "../kselftest.h"
+#else
+static inline int ksft_exit_pass(void)
+{
+       exit(0);
+}
+static inline int ksft_exit_fail(void)
+{
+       exit(1);
+}
+#endif
+
+#define NSEC_PER_SEC 1000000000L
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+       struct timex tx;
+       int ret;
+
+       tx.modes = ADJ_STATUS;
+       tx.status = 0;
+       ret = adjtimex(&tx);
+       return ret;
+}
+
+#define NUM_FREQ_VALID 32
+#define NUM_FREQ_OUTOFRANGE 4
+#define NUM_FREQ_INVALID 2
+
+long valid_freq[NUM_FREQ_VALID] = {
+       -499<<16,
+       -450<<16,
+       -400<<16,
+       -350<<16,
+       -300<<16,
+       -250<<16,
+       -200<<16,
+       -150<<16,
+       -100<<16,
+       -75<<16,
+       -50<<16,
+       -25<<16,
+       -10<<16,
+       -5<<16,
+       -1<<16,
+       -1000,
+       1<<16,
+       5<<16,
+       10<<16,
+       25<<16,
+       50<<16,
+       75<<16,
+       100<<16,
+       150<<16,
+       200<<16,
+       250<<16,
+       300<<16,
+       350<<16,
+       400<<16,
+       450<<16,
+       499<<16,
+};
+
+long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
+       -1000<<16,
+       -550<<16,
+       550<<16,
+       1000<<16,
+};
+
+#define LONG_MAX (~0UL>>1)
+#define LONG_MIN (-LONG_MAX - 1)
+
+long invalid_freq[NUM_FREQ_INVALID] = {
+       LONG_MAX,
+       LONG_MIN,
+};
+
+int validate_freq(void)
+{
+       struct timex tx;
+       int ret, pass = 0;
+       int i;
+
+       clear_time_state();
+
+       memset(&tx, 0, sizeof(struct timex));
+       /* Set the leap second insert flag */
+
+       printf("Testing ADJ_FREQ... ");
+       for (i = 0; i < NUM_FREQ_VALID; i++) {
+               tx.modes = ADJ_FREQUENCY;
+               tx.freq = valid_freq[i];
+
+               ret = adjtimex(&tx);
+               if (ret < 0) {
+                       printf("[FAIL]\n");
+                       printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+                               valid_freq[i], valid_freq[i]>>16);
+                       pass = -1;
+                       goto out;
+               }
+               tx.modes = 0;
+               ret = adjtimex(&tx);
+               if (tx.freq != valid_freq[i]) {
+                       printf("Warning: freq value %ld not what we set it (%ld)!\n",
+                                       tx.freq, valid_freq[i]);
+               }
+       }
+       for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) {
+               tx.modes = ADJ_FREQUENCY;
+               tx.freq = outofrange_freq[i];
+
+               ret = adjtimex(&tx);
+               if (ret < 0) {
+                       printf("[FAIL]\n");
+                       printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+                               outofrange_freq[i], outofrange_freq[i]>>16);
+                       pass = -1;
+                       goto out;
+               }
+               tx.modes = 0;
+               ret = adjtimex(&tx);
+               if (tx.freq == outofrange_freq[i]) {
+                       printf("[FAIL]\n");
+                       printf("ERROR: out of range value %ld actually set!\n",
+                                       tx.freq);
+                       pass = -1;
+                       goto out;
+               }
+       }
+
+
+       if (sizeof(long) == 8) { /* this case only applies to 64bit systems */
+               for (i = 0; i < NUM_FREQ_INVALID; i++) {
+                       tx.modes = ADJ_FREQUENCY;
+                       tx.freq = invalid_freq[i];
+                       ret = adjtimex(&tx);
+                       if (ret >= 0) {
+                               printf("[FAIL]\n");
+                               printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n",
+                                       invalid_freq[i]);
+                               pass = -1;
+                               goto out;
+                       }
+               }
+       }
+
+       printf("[OK]\n");
+out:
+       /* reset freq to zero */
+       tx.modes = ADJ_FREQUENCY;
+       tx.freq = 0;
+       ret = adjtimex(&tx);
+
+       return pass;
+}
+
+
+int main(int argc, char **argv)
+{
+       if (validate_freq())
+               return ksft_exit_fail();
+
+       return ksft_exit_pass();
+}
index 12c9d15..d401b63 100644 (file)
@@ -3,5 +3,6 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-run_tests: all
-       ./test_user_copy.sh
+TEST_PROGS := test_user_copy.sh
+
+include ../lib.mk
index 077828c..a5ce953 100644 (file)
@@ -1,6 +1,5 @@
 # Makefile for vm selftests
 
-CC = $(CROSS_COMPILE)gcc
 CFLAGS = -Wall
 BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest
 BINARIES += transhuge-stress
@@ -9,8 +8,10 @@ all: $(BINARIES)
 %: %.c
        $(CC) $(CFLAGS) -o $@ $^ -lrt
 
-run_tests: all
-       @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1)
+TEST_PROGS := run_vmtests
+TEST_FILES := $(BINARIES)
+
+include ../lib.mk
 
 clean:
        $(RM) $(BINARIES)
old mode 100644 (file)
new mode 100755 (executable)
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
new file mode 100644 (file)
index 0000000..15034fe
--- /dev/null
@@ -0,0 +1,2 @@
+*_32
+*_64
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
new file mode 100644 (file)
index 0000000..f0a7918
--- /dev/null
@@ -0,0 +1,48 @@
+.PHONY: all all_32 all_64 check_build32 clean run_tests
+
+TARGETS_C_BOTHBITS := sigreturn
+
+BINARIES_32 := $(TARGETS_C_BOTHBITS:%=%_32)
+BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64)
+
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+UNAME_P := $(shell uname -p)
+
+# Always build 32-bit tests
+all: all_32
+
+# If we're on a 64-bit host, build 64-bit tests as well
+ifeq ($(shell uname -p),x86_64)
+all: all_64
+endif
+
+all_32: check_build32 $(BINARIES_32)
+
+all_64: $(BINARIES_64)
+
+clean:
+       $(RM) $(BINARIES_32) $(BINARIES_64)
+
+run_tests:
+       ./run_x86_tests.sh
+
+$(TARGETS_C_BOTHBITS:%=%_32): %_32: %.c
+       $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c
+       $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+check_build32:
+       @if ! $(CC) -m32 -o /dev/null trivial_32bit_program.c; then     \
+         echo "Warning: you seem to have a broken 32-bit build" 2>&1;  \
+         echo "environment.  If you are using a Debian-like";          \
+         echo " distribution, try:";                                   \
+         echo "";                                                      \
+         echo "  apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+         echo "";                                                      \
+         echo "If you are using a Fedora-like distribution, try:";     \
+         echo "";                                                      \
+         echo "  yum install glibc-devel.*i686";                       \
+         exit 1;                                                       \
+       fi
diff --git a/tools/testing/selftests/x86/run_x86_tests.sh b/tools/testing/selftests/x86/run_x86_tests.sh
new file mode 100644 (file)
index 0000000..3d3ec65
--- /dev/null
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# This is deliberately minimal.  IMO kselftests should provide a standard
+# script here.
+./sigreturn_32 || exit 1
+
+if [[ "$uname -p" -eq "x86_64" ]]; then
+    ./sigreturn_64 || exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
new file mode 100644 (file)
index 0000000..b5aa1ba
--- /dev/null
@@ -0,0 +1,684 @@
+/*
+ * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This is a series of tests that exercises the sigreturn(2) syscall and
+ * the IRET / SYSRET paths in the kernel.
+ *
+ * For now, this focuses on the effects of unusual CS and SS values,
+ * and it has a bunch of tests to make sure that ESP/RSP is restored
+ * properly.
+ *
+ * The basic idea behind these tests is to raise(SIGUSR1) to create a
+ * sigcontext frame, plug in the values to be tested, and then return,
+ * which implicitly invokes sigreturn(2) and programs the user context
+ * as desired.
+ *
+ * For tests for which we expect sigreturn and the subsequent return to
+ * user mode to succeed, we return to a short trampoline that generates
+ * SIGTRAP so that the meat of the tests can be ordinary C code in a
+ * SIGTRAP handler.
+ *
+ * The inner workings of each test is documented below.
+ *
+ * Do not run on outdated, unpatched kernels at risk of nasty crashes.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+/*
+ * In principle, this test can run on Linux emulation layers (e.g.
+ * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
+ * entries 0-5 for their own internal purposes, so start our LDT
+ * allocations above that reservation.  (The tests don't pass on LX
+ * branded zones, but at least this lets them run.)
+ */
+#define LDT_OFFSET 6
+
+/* An aligned stack accessible through some of our segments. */
+static unsigned char stack16[65536] __attribute__((aligned(4096)));
+
+/*
+ * An aligned int3 instruction used as a trampoline.  Some of the tests
+ * want to fish out their ss values, so this trampoline copies ss to eax
+ * before the int3.
+ */
+asm (".pushsection .text\n\t"
+     ".type int3, @function\n\t"
+     ".align 4096\n\t"
+     "int3:\n\t"
+     "mov %ss,%eax\n\t"
+     "int3\n\t"
+     ".size int3, . - int3\n\t"
+     ".align 4096, 0xcc\n\t"
+     ".popsection");
+extern char int3[4096];
+
+/*
+ * At startup, we prepapre:
+ *
+ * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
+ *   descriptor or out of bounds).
+ * - code16_sel: A 16-bit LDT code segment pointing to int3.
+ * - data16_sel: A 16-bit LDT data segment pointing to stack16.
+ * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
+ * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
+ * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
+ * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
+ *   stack16.
+ *
+ * For no particularly good reason, xyz_sel is a selector value with the
+ * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
+ * descriptor table.  These variables will be zero if their respective
+ * segments could not be allocated.
+ */
+static unsigned short ldt_nonexistent_sel;
+static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
+
+static unsigned short gdt_data16_idx, gdt_npdata32_idx;
+
+static unsigned short GDT3(int idx)
+{
+       return (idx << 3) | 3;
+}
+
+static unsigned short LDT3(int idx)
+{
+       return (idx << 3) | 7;
+}
+
+/* Our sigaltstack scratch space. */
+static char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+                      int flags)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_sigaction = handler;
+       sa.sa_flags = SA_SIGINFO | flags;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+       struct sigaction sa;
+       memset(&sa, 0, sizeof(sa));
+       sa.sa_handler = SIG_DFL;
+       sigemptyset(&sa.sa_mask);
+       if (sigaction(sig, &sa, 0))
+               err(1, "sigaction");
+}
+
+static void add_ldt(const struct user_desc *desc, unsigned short *var,
+                   const char *name)
+{
+       if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
+               *var = LDT3(desc->entry_number);
+       } else {
+               printf("[NOTE]\tFailed to create %s segment\n", name);
+               *var = 0;
+       }
+}
+
+static void setup_ldt(void)
+{
+       if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
+               errx(1, "stack16 is too high\n");
+       if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
+               errx(1, "int3 is too high\n");
+
+       ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
+
+       const struct user_desc code16_desc = {
+               .entry_number    = LDT_OFFSET + 0,
+               .base_addr       = (unsigned long)int3,
+               .limit           = 4095,
+               .seg_32bit       = 0,
+               .contents        = 2, /* Code, not conforming */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 0,
+               .useable         = 0
+       };
+       add_ldt(&code16_desc, &code16_sel, "code16");
+
+       const struct user_desc data16_desc = {
+               .entry_number    = LDT_OFFSET + 1,
+               .base_addr       = (unsigned long)stack16,
+               .limit           = 0xffff,
+               .seg_32bit       = 0,
+               .contents        = 0, /* Data, grow-up */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 0,
+               .useable         = 0
+       };
+       add_ldt(&data16_desc, &data16_sel, "data16");
+
+       const struct user_desc npcode32_desc = {
+               .entry_number    = LDT_OFFSET + 3,
+               .base_addr       = (unsigned long)int3,
+               .limit           = 4095,
+               .seg_32bit       = 1,
+               .contents        = 2, /* Code, not conforming */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 1,
+               .useable         = 0
+       };
+       add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
+
+       const struct user_desc npdata32_desc = {
+               .entry_number    = LDT_OFFSET + 4,
+               .base_addr       = (unsigned long)stack16,
+               .limit           = 0xffff,
+               .seg_32bit       = 1,
+               .contents        = 0, /* Data, grow-up */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 1,
+               .useable         = 0
+       };
+       add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
+
+       struct user_desc gdt_data16_desc = {
+               .entry_number    = -1,
+               .base_addr       = (unsigned long)stack16,
+               .limit           = 0xffff,
+               .seg_32bit       = 0,
+               .contents        = 0, /* Data, grow-up */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 0,
+               .useable         = 0
+       };
+
+       if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
+               /*
+                * This probably indicates vulnerability to CVE-2014-8133.
+                * Merely getting here isn't definitive, though, and we'll
+                * diagnose the problem for real later on.
+                */
+               printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
+                      gdt_data16_desc.entry_number);
+               gdt_data16_idx = gdt_data16_desc.entry_number;
+       } else {
+               printf("[OK]\tset_thread_area refused 16-bit data\n");
+       }
+
+       struct user_desc gdt_npdata32_desc = {
+               .entry_number    = -1,
+               .base_addr       = (unsigned long)stack16,
+               .limit           = 0xffff,
+               .seg_32bit       = 1,
+               .contents        = 0, /* Data, grow-up */
+               .read_exec_only  = 0,
+               .limit_in_pages  = 0,
+               .seg_not_present = 1,
+               .useable         = 0
+       };
+
+       if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
+               /*
+                * As a hardening measure, newer kernels don't allow this.
+                */
+               printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
+                      gdt_npdata32_desc.entry_number);
+               gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
+       } else {
+               printf("[OK]\tset_thread_area refused 16-bit data\n");
+       }
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs, requested_regs, resulting_regs;
+
+/* Instructions for the SIGUSR1 handler. */
+static volatile unsigned short sig_cs, sig_ss;
+static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
+
+/* Abstractions for some 32-bit vs 64-bit differences. */
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define REG_SP REG_RSP
+# define REG_AX REG_RAX
+
+struct selectors {
+       unsigned short cs, gs, fs, ss;
+};
+
+static unsigned short *ssptr(ucontext_t *ctx)
+{
+       struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+       return &sels->ss;
+}
+
+static unsigned short *csptr(ucontext_t *ctx)
+{
+       struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+       return &sels->cs;
+}
+#else
+# define REG_IP REG_EIP
+# define REG_SP REG_ESP
+# define REG_AX REG_EAX
+
+static greg_t *ssptr(ucontext_t *ctx)
+{
+       return &ctx->uc_mcontext.gregs[REG_SS];
+}
+
+static greg_t *csptr(ucontext_t *ctx)
+{
+       return &ctx->uc_mcontext.gregs[REG_CS];
+}
+#endif
+
+/* Number of errors in the current test case. */
+static volatile sig_atomic_t nerrs;
+
+/*
+ * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
+ * int3 trampoline.  Sets SP to a large known value so that we can see
+ * whether the value round-trips back to user mode correctly.
+ */
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+       ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+       memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+       *csptr(ctx) = sig_cs;
+       *ssptr(ctx) = sig_ss;
+
+       ctx->uc_mcontext.gregs[REG_IP] =
+               sig_cs == code16_sel ? 0 : (unsigned long)&int3;
+       ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+       ctx->uc_mcontext.gregs[REG_AX] = 0;
+
+       memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+       requested_regs[REG_AX] = *ssptr(ctx);   /* The asm code does this. */
+
+       return;
+}
+
+/*
+ * Called after a successful sigreturn.  Restores our state so that
+ * the original raise(SIGUSR1) returns.
+ */
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+       ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+       sig_err = ctx->uc_mcontext.gregs[REG_ERR];
+       sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
+
+       unsigned short ss;
+       asm ("mov %%ss,%0" : "=r" (ss));
+
+       greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX];
+       if (asm_ss != sig_ss && sig == SIGTRAP) {
+               /* Sanity check failure. */
+               printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+                      ss, *ssptr(ctx), (unsigned long long)asm_ss);
+               nerrs++;
+       }
+
+       memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+       memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+       sig_trapped = sig;
+}
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+int cs_bitness(unsigned short cs)
+{
+       uint32_t valid = 0, ar;
+       asm ("lar %[cs], %[ar]\n\t"
+            "jnz 1f\n\t"
+            "mov $1, %[valid]\n\t"
+            "1:"
+            : [ar] "=r" (ar), [valid] "+rm" (valid)
+            : [cs] "r" (cs));
+
+       if (!valid)
+               return -1;
+
+       bool db = (ar & (1 << 22));
+       bool l = (ar & (1 << 21));
+
+       if (!(ar & (1<<11)))
+           return -1;  /* Not code. */
+
+       if (l && !db)
+               return 64;
+       else if (!l && db)
+               return 32;
+       else if (!l && !db)
+               return 16;
+       else
+               return -1;      /* Unknown bitness. */
+}
+
+/* Finds a usable code segment of the requested bitness. */
+int find_cs(int bitness)
+{
+       unsigned short my_cs;
+
+       asm ("mov %%cs,%0" :  "=r" (my_cs));
+
+       if (cs_bitness(my_cs) == bitness)
+               return my_cs;
+       if (cs_bitness(my_cs + (2 << 3)) == bitness)
+               return my_cs + (2 << 3);
+       if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
+           return my_cs - (2 << 3);
+       if (cs_bitness(code16_sel) == bitness)
+               return code16_sel;
+
+       printf("[WARN]\tCould not find %d-bit CS\n", bitness);
+       return -1;
+}
+
+static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
+{
+       int cs = find_cs(cs_bits);
+       if (cs == -1) {
+               printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
+                      cs_bits, use_16bit_ss ? 16 : 32);
+               return 0;
+       }
+
+       if (force_ss != -1) {
+               sig_ss = force_ss;
+       } else {
+               if (use_16bit_ss) {
+                       if (!data16_sel) {
+                               printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
+                                      cs_bits);
+                               return 0;
+                       }
+                       sig_ss = data16_sel;
+               } else {
+                       asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
+               }
+       }
+
+       sig_cs = cs;
+
+       printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
+              cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
+              (sig_ss & 4) ? "" : ", GDT");
+
+       raise(SIGUSR1);
+
+       nerrs = 0;
+
+       /*
+        * Check that each register had an acceptable value when the
+        * int3 trampoline was invoked.
+        */
+       for (int i = 0; i < NGREG; i++) {
+               greg_t req = requested_regs[i], res = resulting_regs[i];
+               if (i == REG_TRAPNO || i == REG_IP)
+                       continue;       /* don't care */
+               if (i == REG_SP) {
+                       printf("\tSP: %llx -> %llx\n", (unsigned long long)req,
+                              (unsigned long long)res);
+
+                       /*
+                        * In many circumstances, the high 32 bits of rsp
+                        * are zeroed.  For example, we could be a real
+                        * 32-bit program, or we could hit any of a number
+                        * of poorly-documented IRET or segmented ESP
+                        * oddities.  If this happens, it's okay.
+                        */
+                       if (res == (req & 0xFFFFFFFF))
+                               continue;  /* OK; not expected to work */
+               }
+
+               bool ignore_reg = false;
+#if __i386__
+               if (i == REG_UESP)
+                       ignore_reg = true;
+#else
+               if (i == REG_CSGSFS) {
+                       struct selectors *req_sels =
+                               (void *)&requested_regs[REG_CSGSFS];
+                       struct selectors *res_sels =
+                               (void *)&resulting_regs[REG_CSGSFS];
+                       if (req_sels->cs != res_sels->cs) {
+                               printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
+                                      req_sels->cs, res_sels->cs);
+                               nerrs++;
+                       }
+
+                       if (req_sels->ss != res_sels->ss) {
+                               printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
+                                      req_sels->ss, res_sels->ss);
+                               nerrs++;
+                       }
+
+                       continue;
+               }
+#endif
+
+               /* Sanity check on the kernel */
+               if (i == REG_AX && requested_regs[i] != resulting_regs[i]) {
+                       printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
+                              (unsigned long long)requested_regs[i],
+                              (unsigned long long)resulting_regs[i]);
+                       nerrs++;
+                       continue;
+               }
+
+               if (requested_regs[i] != resulting_regs[i] && !ignore_reg) {
+                       /*
+                        * SP is particularly interesting here.  The
+                        * usual cause of failures is that we hit the
+                        * nasty IRET case of returning to a 16-bit SS,
+                        * in which case bits 16:31 of the *kernel*
+                        * stack pointer persist in ESP.
+                        */
+                       printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
+                              i, (unsigned long long)requested_regs[i],
+                              (unsigned long long)resulting_regs[i]);
+                       nerrs++;
+               }
+       }
+
+       if (nerrs == 0)
+               printf("[OK]\tall registers okay\n");
+
+       return nerrs;
+}
+
+static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
+{
+       int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
+       if (cs == -1)
+               return 0;
+
+       sig_cs = cs;
+       sig_ss = ss;
+
+       printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
+              cs_bits, sig_cs, sig_ss);
+
+       sig_trapped = 0;
+       raise(SIGUSR1);
+       if (sig_trapped) {
+               char errdesc[32] = "";
+               if (sig_err) {
+                       const char *src = (sig_err & 1) ? " EXT" : "";
+                       const char *table;
+                       if ((sig_err & 0x6) == 0x0)
+                               table = "GDT";
+                       else if ((sig_err & 0x6) == 0x4)
+                               table = "LDT";
+                       else if ((sig_err & 0x6) == 0x2)
+                               table = "IDT";
+                       else
+                               table = "???";
+
+                       sprintf(errdesc, "%s%s index %d, ",
+                               table, src, sig_err >> 3);
+               }
+
+               char trapname[32];
+               if (sig_trapno == 13)
+                       strcpy(trapname, "GP");
+               else if (sig_trapno == 11)
+                       strcpy(trapname, "NP");
+               else if (sig_trapno == 12)
+                       strcpy(trapname, "SS");
+               else if (sig_trapno == 32)
+                       strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
+               else
+                       sprintf(trapname, "%d", sig_trapno);
+
+               printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
+                      trapname, (unsigned long)sig_err,
+                      errdesc, strsignal(sig_trapped));
+               return 0;
+       } else {
+               printf("[FAIL]\tDid not get SIGSEGV\n");
+               return 1;
+       }
+}
+
+int main()
+{
+       int total_nerrs = 0;
+       unsigned short my_cs, my_ss;
+
+       asm volatile ("mov %%cs,%0" : "=r" (my_cs));
+       asm volatile ("mov %%ss,%0" : "=r" (my_ss));
+       setup_ldt();
+
+       stack_t stack = {
+               .ss_sp = altstack_data,
+               .ss_size = SIGSTKSZ,
+       };
+       if (sigaltstack(&stack, NULL) != 0)
+               err(1, "sigaltstack");
+
+       sethandler(SIGUSR1, sigusr1, 0);
+       sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+       /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
+       total_nerrs += test_valid_sigreturn(64, false, -1);
+       total_nerrs += test_valid_sigreturn(32, false, -1);
+       total_nerrs += test_valid_sigreturn(16, false, -1);
+
+       /*
+        * Test easy espfix cases: return to a 16-bit LDT SS in each possible
+        * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
+        *
+        * This catches the original missing-espfix-on-64-bit-kernels issue
+        * as well as CVE-2014-8134.
+        */
+       total_nerrs += test_valid_sigreturn(64, true, -1);
+       total_nerrs += test_valid_sigreturn(32, true, -1);
+       total_nerrs += test_valid_sigreturn(16, true, -1);
+
+       if (gdt_data16_idx) {
+               /*
+                * For performance reasons, Linux skips espfix if SS points
+                * to the GDT.  If we were able to allocate a 16-bit SS in
+                * the GDT, see if it leaks parts of the kernel stack pointer.
+                *
+                * This tests for CVE-2014-8133.
+                */
+               total_nerrs += test_valid_sigreturn(64, true,
+                                                   GDT3(gdt_data16_idx));
+               total_nerrs += test_valid_sigreturn(32, true,
+                                                   GDT3(gdt_data16_idx));
+               total_nerrs += test_valid_sigreturn(16, true,
+                                                   GDT3(gdt_data16_idx));
+       }
+
+       /*
+        * We're done testing valid sigreturn cases.  Now we test states
+        * for which sigreturn itself will succeed but the subsequent
+        * entry to user mode will fail.
+        *
+        * Depending on the failure mode and the kernel bitness, these
+        * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
+        */
+       clearhandler(SIGTRAP);
+       sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
+       sethandler(SIGBUS, sigtrap, SA_ONSTACK);
+       sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
+
+       /* Easy failures: invalid SS, resulting in #GP(0) */
+       test_bad_iret(64, ldt_nonexistent_sel, -1);
+       test_bad_iret(32, ldt_nonexistent_sel, -1);
+       test_bad_iret(16, ldt_nonexistent_sel, -1);
+
+       /* These fail because SS isn't a data segment, resulting in #GP(SS) */
+       test_bad_iret(64, my_cs, -1);
+       test_bad_iret(32, my_cs, -1);
+       test_bad_iret(16, my_cs, -1);
+
+       /* Try to return to a not-present code segment, triggering #NP(SS). */
+       test_bad_iret(32, my_ss, npcode32_sel);
+
+       /*
+        * Try to return to a not-present but otherwise valid data segment.
+        * This will cause IRET to fail with #SS on the espfix stack.  This
+        * exercises CVE-2014-9322.
+        *
+        * Note that, if espfix is enabled, 64-bit Linux will lose track
+        * of the actual cause of failure and report #GP(0) instead.
+        * This would be very difficult for Linux to avoid, because
+        * espfix64 causes IRET failures to be promoted to #DF, so the
+        * original exception frame is never pushed onto the stack.
+        */
+       test_bad_iret(32, npdata32_sel, -1);
+
+       /*
+        * Try to return to a not-present but otherwise valid data
+        * segment without invoking espfix.  Newer kernels don't allow
+        * this to happen in the first place.  On older kernels, though,
+        * this can trigger CVE-2014-9322.
+        */
+       if (gdt_npdata32_idx)
+               test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
+
+       return total_nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c
new file mode 100644 (file)
index 0000000..2e231be
--- /dev/null
@@ -0,0 +1,14 @@
+/*
+ * Trivial program to check that we have a valid 32-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#include <stdio.h>
+
+int main()
+{
+       printf("\n");
+
+       return 0;
+}
index 6e54f35..98c95f2 100644 (file)
@@ -85,13 +85,22 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
        return IRQ_HANDLED;
 }
 
+/*
+ * Work function for handling the backup timer that we schedule when a vcpu is
+ * no longer running, but had a timer programmed to fire in the future.
+ */
 static void kvm_timer_inject_irq_work(struct work_struct *work)
 {
        struct kvm_vcpu *vcpu;
 
        vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
        vcpu->arch.timer_cpu.armed = false;
-       kvm_timer_inject_irq(vcpu);
+
+       /*
+        * If the vcpu is blocked we want to wake it up so that it will see
+        * the timer has expired when entering the guest.
+        */
+       kvm_vcpu_kick(vcpu);
 }
 
 static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
@@ -102,6 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
        return HRTIMER_NORESTART;
 }
 
+bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
+{
+       struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+       cycle_t cval, now;
+
+       if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
+               !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
+               return false;
+
+       cval = timer->cntv_cval;
+       now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+       return cval <= now;
+}
+
 /**
  * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
  * @vcpu: The vcpu pointer
@@ -119,6 +143,13 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
         * populate the CPU timer again.
         */
        timer_disarm(timer);
+
+       /*
+        * If the timer expired while we were not scheduled, now is the time
+        * to inject it.
+        */
+       if (kvm_timer_should_fire(vcpu))
+               kvm_timer_inject_irq(vcpu);
 }
 
 /**
@@ -134,16 +165,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
        cycle_t cval, now;
        u64 ns;
 
-       if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
-               !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
-               return;
-
-       cval = timer->cntv_cval;
-       now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
        BUG_ON(timer_is_armed(timer));
 
-       if (cval <= now) {
+       if (kvm_timer_should_fire(vcpu)) {
                /*
                 * Timer has already expired while we were not
                 * looking. Inject the interrupt and carry on.
@@ -152,6 +176,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
                return;
        }
 
+       cval = timer->cntv_cval;
+       now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
        ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
                                 &timecounter->frac);
        timer_arm(timer, ns);
index 19c6210..1390797 100644 (file)
@@ -107,6 +107,22 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu,
                                             vcpu->vcpu_id);
 }
 
+static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu,
+                                      struct kvm_exit_mmio *mmio,
+                                      phys_addr_t offset)
+{
+       return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset,
+                                         vcpu->vcpu_id);
+}
+
+static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu,
+                                        struct kvm_exit_mmio *mmio,
+                                        phys_addr_t offset)
+{
+       return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset,
+                                           vcpu->vcpu_id);
+}
+
 static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu,
                                     struct kvm_exit_mmio *mmio,
                                     phys_addr_t offset)
@@ -303,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
                return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
 }
 
-static const struct kvm_mmio_range vgic_dist_ranges[] = {
+static const struct vgic_io_range vgic_dist_ranges[] = {
        {
                .base           = GIC_DIST_CTRL,
                .len            = 12,
@@ -344,13 +360,13 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = {
                .base           = GIC_DIST_ACTIVE_SET,
                .len            = VGIC_MAX_IRQS / 8,
                .bits_per_irq   = 1,
-               .handle_mmio    = handle_mmio_raz_wi,
+               .handle_mmio    = handle_mmio_set_active_reg,
        },
        {
                .base           = GIC_DIST_ACTIVE_CLEAR,
                .len            = VGIC_MAX_IRQS / 8,
                .bits_per_irq   = 1,
-               .handle_mmio    = handle_mmio_raz_wi,
+               .handle_mmio    = handle_mmio_clear_active_reg,
        },
        {
                .base           = GIC_DIST_PRI,
@@ -388,24 +404,6 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = {
        {}
 };
 
-static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                               struct kvm_exit_mmio *mmio)
-{
-       unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base;
-
-       if (!is_in_range(mmio->phys_addr, mmio->len, base,
-                        KVM_VGIC_V2_DIST_SIZE))
-               return false;
-
-       /* GICv2 does not support accesses wider than 32 bits */
-       if (mmio->len > 4) {
-               kvm_inject_dabt(vcpu, mmio->phys_addr);
-               return true;
-       }
-
-       return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base);
-}
-
 static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
 {
        struct kvm *kvm = vcpu->kvm;
@@ -490,6 +488,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 static int vgic_v2_map_resources(struct kvm *kvm,
                                 const struct vgic_params *params)
 {
+       struct vgic_dist *dist = &kvm->arch.vgic;
        int ret = 0;
 
        if (!irqchip_in_kernel(kvm))
@@ -500,13 +499,17 @@ static int vgic_v2_map_resources(struct kvm *kvm,
        if (vgic_ready(kvm))
                goto out;
 
-       if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
-           IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) {
+       if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
+           IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
                kvm_err("Need to set vgic cpu and dist addresses first\n");
                ret = -ENXIO;
                goto out;
        }
 
+       vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
+                                KVM_VGIC_V2_DIST_SIZE,
+                                vgic_dist_ranges, -1, &dist->dist_iodev);
+
        /*
         * Initialize the vgic if this hasn't already been done on demand by
         * accessing the vgic state from userspace.
@@ -514,18 +517,23 @@ static int vgic_v2_map_resources(struct kvm *kvm,
        ret = vgic_init(kvm);
        if (ret) {
                kvm_err("Unable to allocate maps\n");
-               goto out;
+               goto out_unregister;
        }
 
-       ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base,
+       ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
                                    params->vcpu_base, KVM_VGIC_V2_CPU_SIZE,
                                    true);
        if (ret) {
                kvm_err("Unable to remap VGIC CPU to VCPU\n");
-               goto out;
+               goto out_unregister;
        }
 
-       kvm->arch.vgic.ready = true;
+       dist->ready = true;
+       goto out;
+
+out_unregister:
+       kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
+
 out:
        if (ret)
                kvm_vgic_destroy(kvm);
@@ -554,7 +562,6 @@ void vgic_v2_init_emulation(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
 
-       dist->vm_ops.handle_mmio = vgic_v2_handle_mmio;
        dist->vm_ops.queue_sgi = vgic_v2_queue_sgi;
        dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source;
        dist->vm_ops.init_model = vgic_v2_init_model;
@@ -631,7 +638,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
  * CPU Interface Register accesses - these are not accessed by the VM, but by
  * user space for saving and restoring VGIC state.
  */
-static const struct kvm_mmio_range vgic_cpu_ranges[] = {
+static const struct vgic_io_range vgic_cpu_ranges[] = {
        {
                .base           = GIC_CPU_CTRL,
                .len            = 12,
@@ -658,12 +665,13 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
                                 struct kvm_device_attr *attr,
                                 u32 *reg, bool is_write)
 {
-       const struct kvm_mmio_range *r = NULL, *ranges;
+       const struct vgic_io_range *r = NULL, *ranges;
        phys_addr_t offset;
        int ret, cpuid, c;
        struct kvm_vcpu *vcpu, *tmp_vcpu;
        struct vgic_dist *vgic;
        struct kvm_exit_mmio mmio;
+       u32 data;
 
        offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
        cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
@@ -685,6 +693,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
 
        mmio.len = 4;
        mmio.is_write = is_write;
+       mmio.data = &data;
        if (is_write)
                mmio_data_write(&mmio, ~0, *reg);
        switch (attr->group) {
@@ -699,7 +708,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev,
        default:
                BUG();
        }
-       r = vgic_find_range(ranges, &mmio, offset);
+       r = vgic_find_range(ranges, 4, offset);
 
        if (unlikely(!r || !r->handle_mmio)) {
                ret = -ENXIO;
index a0a7b5d..f9b9c7c 100644 (file)
@@ -72,6 +72,8 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
 {
        if (!(lr_desc.state & LR_STATE_MASK))
                vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
+       else
+               vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr);
 }
 
 static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -84,6 +86,11 @@ static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
        return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
 }
 
+static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0;
+}
+
 static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
 {
        u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr;
@@ -148,6 +155,7 @@ static const struct vgic_ops vgic_v2_ops = {
        .sync_lr_elrsr          = vgic_v2_sync_lr_elrsr,
        .get_elrsr              = vgic_v2_get_elrsr,
        .get_eisr               = vgic_v2_get_eisr,
+       .clear_eisr             = vgic_v2_clear_eisr,
        .get_interrupt_status   = vgic_v2_get_interrupt_status,
        .enable_underflow       = vgic_v2_enable_underflow,
        .disable_underflow      = vgic_v2_disable_underflow,
index b3f1546..e9c3a7a 100644 (file)
@@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu,
        return false;
 }
 
-static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
+static const struct vgic_io_range vgic_v3_dist_ranges[] = {
        {
                .base           = GICD_CTLR,
                .len            = 0x04,
@@ -502,6 +502,43 @@ static const struct kvm_mmio_range vgic_v3_dist_ranges[] = {
        {},
 };
 
+static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
+                                   struct kvm_exit_mmio *mmio,
+                                   phys_addr_t offset)
+{
+       /* since we don't support LPIs, this register is zero for now */
+       vgic_reg_access(mmio, NULL, offset,
+                       ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
+       return false;
+}
+
+static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
+                                    struct kvm_exit_mmio *mmio,
+                                    phys_addr_t offset)
+{
+       u32 reg;
+       u64 mpidr;
+       struct kvm_vcpu *redist_vcpu = mmio->private;
+       int target_vcpu_id = redist_vcpu->vcpu_id;
+
+       /* the upper 32 bits contain the affinity value */
+       if ((offset & ~3) == 4) {
+               mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
+               reg = compress_mpidr(mpidr);
+
+               vgic_reg_access(mmio, &reg, offset,
+                               ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+               return false;
+       }
+
+       reg = redist_vcpu->vcpu_id << 8;
+       if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
+               reg |= GICR_TYPER_LAST;
+       vgic_reg_access(mmio, &reg, offset,
+                       ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
+       return false;
+}
+
 static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu,
                                              struct kvm_exit_mmio *mmio,
                                              phys_addr_t offset)
@@ -570,186 +607,107 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu,
        return vgic_handle_cfg_reg(reg, mmio, offset);
 }
 
-static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = {
+#define SGI_base(x) ((x) + SZ_64K)
+
+static const struct vgic_io_range vgic_redist_ranges[] = {
+       {
+               .base           = GICR_CTLR,
+               .len            = 0x04,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_ctlr_redist,
+       },
+       {
+               .base           = GICR_TYPER,
+               .len            = 0x08,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_typer_redist,
+       },
+       {
+               .base           = GICR_IIDR,
+               .len            = 0x04,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_iidr,
+       },
+       {
+               .base           = GICR_WAKER,
+               .len            = 0x04,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_raz_wi,
+       },
        {
-               .base           = GICR_IGROUPR0,
+               .base           = GICR_IDREGS,
+               .len            = 0x30,
+               .bits_per_irq   = 0,
+               .handle_mmio    = handle_mmio_idregs,
+       },
+       {
+               .base           = SGI_base(GICR_IGROUPR0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_rao_wi,
        },
        {
-               .base           = GICR_ISENABLER0,
+               .base           = SGI_base(GICR_ISENABLER0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_set_enable_reg_redist,
        },
        {
-               .base           = GICR_ICENABLER0,
+               .base           = SGI_base(GICR_ICENABLER0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_clear_enable_reg_redist,
        },
        {
-               .base           = GICR_ISPENDR0,
+               .base           = SGI_base(GICR_ISPENDR0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_set_pending_reg_redist,
        },
        {
-               .base           = GICR_ICPENDR0,
+               .base           = SGI_base(GICR_ICPENDR0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_clear_pending_reg_redist,
        },
        {
-               .base           = GICR_ISACTIVER0,
+               .base           = SGI_base(GICR_ISACTIVER0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_raz_wi,
        },
        {
-               .base           = GICR_ICACTIVER0,
+               .base           = SGI_base(GICR_ICACTIVER0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_raz_wi,
        },
        {
-               .base           = GICR_IPRIORITYR0,
+               .base           = SGI_base(GICR_IPRIORITYR0),
                .len            = 0x20,
                .bits_per_irq   = 8,
                .handle_mmio    = handle_mmio_priority_reg_redist,
        },
        {
-               .base           = GICR_ICFGR0,
+               .base           = SGI_base(GICR_ICFGR0),
                .len            = 0x08,
                .bits_per_irq   = 2,
                .handle_mmio    = handle_mmio_cfg_reg_redist,
        },
        {
-               .base           = GICR_IGRPMODR0,
+               .base           = SGI_base(GICR_IGRPMODR0),
                .len            = 0x04,
                .bits_per_irq   = 1,
                .handle_mmio    = handle_mmio_raz_wi,
        },
        {
-               .base           = GICR_NSACR,
+               .base           = SGI_base(GICR_NSACR),
                .len            = 0x04,
                .handle_mmio    = handle_mmio_raz_wi,
        },
        {},
 };
 
-static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu,
-                                   struct kvm_exit_mmio *mmio,
-                                   phys_addr_t offset)
-{
-       /* since we don't support LPIs, this register is zero for now */
-       vgic_reg_access(mmio, NULL, offset,
-                       ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED);
-       return false;
-}
-
-static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu,
-                                    struct kvm_exit_mmio *mmio,
-                                    phys_addr_t offset)
-{
-       u32 reg;
-       u64 mpidr;
-       struct kvm_vcpu *redist_vcpu = mmio->private;
-       int target_vcpu_id = redist_vcpu->vcpu_id;
-
-       /* the upper 32 bits contain the affinity value */
-       if ((offset & ~3) == 4) {
-               mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu);
-               reg = compress_mpidr(mpidr);
-
-               vgic_reg_access(mmio, &reg, offset,
-                               ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-               return false;
-       }
-
-       reg = redist_vcpu->vcpu_id << 8;
-       if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
-               reg |= GICR_TYPER_LAST;
-       vgic_reg_access(mmio, &reg, offset,
-                       ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
-       return false;
-}
-
-static const struct kvm_mmio_range vgic_redist_ranges[] = {
-       {
-               .base           = GICR_CTLR,
-               .len            = 0x04,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_ctlr_redist,
-       },
-       {
-               .base           = GICR_TYPER,
-               .len            = 0x08,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_typer_redist,
-       },
-       {
-               .base           = GICR_IIDR,
-               .len            = 0x04,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_iidr,
-       },
-       {
-               .base           = GICR_WAKER,
-               .len            = 0x04,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_raz_wi,
-       },
-       {
-               .base           = GICR_IDREGS,
-               .len            = 0x30,
-               .bits_per_irq   = 0,
-               .handle_mmio    = handle_mmio_idregs,
-       },
-       {},
-};
-
-/*
- * This function splits accesses between the distributor and the two
- * redistributor parts (private/SPI). As each redistributor is accessible
- * from any CPU, we have to determine the affected VCPU by taking the faulting
- * address into account. We then pass this VCPU to the handler function via
- * the private parameter.
- */
-#define SGI_BASE_OFFSET SZ_64K
-static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                               struct kvm_exit_mmio *mmio)
-{
-       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-       unsigned long dbase = dist->vgic_dist_base;
-       unsigned long rdbase = dist->vgic_redist_base;
-       int nrcpus = atomic_read(&vcpu->kvm->online_vcpus);
-       int vcpu_id;
-       const struct kvm_mmio_range *mmio_range;
-
-       if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) {
-               return vgic_handle_mmio_range(vcpu, run, mmio,
-                                             vgic_v3_dist_ranges, dbase);
-       }
-
-       if (!is_in_range(mmio->phys_addr, mmio->len, rdbase,
-           GIC_V3_REDIST_SIZE * nrcpus))
-               return false;
-
-       vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE;
-       rdbase += (vcpu_id * GIC_V3_REDIST_SIZE);
-       mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id);
-
-       if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) {
-               rdbase += SGI_BASE_OFFSET;
-               mmio_range = vgic_redist_sgi_ranges;
-       } else {
-               mmio_range = vgic_redist_ranges;
-       }
-       return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase);
-}
-
 static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq)
 {
        if (vgic_queue_irq(vcpu, 0, irq)) {
@@ -766,6 +724,9 @@ static int vgic_v3_map_resources(struct kvm *kvm,
 {
        int ret = 0;
        struct vgic_dist *dist = &kvm->arch.vgic;
+       gpa_t rdbase = dist->vgic_redist_base;
+       struct vgic_io_device *iodevs = NULL;
+       int i;
 
        if (!irqchip_in_kernel(kvm))
                return 0;
@@ -791,7 +752,41 @@ static int vgic_v3_map_resources(struct kvm *kvm,
                goto out;
        }
 
-       kvm->arch.vgic.ready = true;
+       ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base,
+                                      GIC_V3_DIST_SIZE, vgic_v3_dist_ranges,
+                                      -1, &dist->dist_iodev);
+       if (ret)
+               goto out;
+
+       iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL);
+       if (!iodevs) {
+               ret = -ENOMEM;
+               goto out_unregister;
+       }
+
+       for (i = 0; i < dist->nr_cpus; i++) {
+               ret = vgic_register_kvm_io_dev(kvm, rdbase,
+                                              SZ_128K, vgic_redist_ranges,
+                                              i, &iodevs[i]);
+               if (ret)
+                       goto out_unregister;
+               rdbase += GIC_V3_REDIST_SIZE;
+       }
+
+       dist->redist_iodevs = iodevs;
+       dist->ready = true;
+       goto out;
+
+out_unregister:
+       kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev);
+       if (iodevs) {
+               for (i = 0; i < dist->nr_cpus; i++) {
+                       if (iodevs[i].dev.ops)
+                               kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+                                                         &iodevs[i].dev);
+               }
+       }
+
 out:
        if (ret)
                kvm_vgic_destroy(kvm);
@@ -832,7 +827,6 @@ void vgic_v3_init_emulation(struct kvm *kvm)
 {
        struct vgic_dist *dist = &kvm->arch.vgic;
 
-       dist->vm_ops.handle_mmio = vgic_v3_handle_mmio;
        dist->vm_ops.queue_sgi = vgic_v3_queue_sgi;
        dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source;
        dist->vm_ops.init_model = vgic_v3_init_model;
index 3a62d8a..dff0602 100644 (file)
@@ -104,6 +104,8 @@ static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
 {
        if (!(lr_desc.state & LR_STATE_MASK))
                vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
+       else
+               vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr);
 }
 
 static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu)
@@ -116,6 +118,11 @@ static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu)
        return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr;
 }
 
+static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0;
+}
+
 static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu)
 {
        u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr;
@@ -192,6 +199,7 @@ static const struct vgic_ops vgic_v3_ops = {
        .sync_lr_elrsr          = vgic_v3_sync_lr_elrsr,
        .get_elrsr              = vgic_v3_get_elrsr,
        .get_eisr               = vgic_v3_get_eisr,
+       .clear_eisr             = vgic_v3_clear_eisr,
        .get_interrupt_status   = vgic_v3_get_interrupt_status,
        .enable_underflow       = vgic_v3_enable_underflow,
        .disable_underflow      = vgic_v3_disable_underflow,
index 0cc6ab6..8d550ff 100644 (file)
@@ -31,6 +31,9 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_arm.h>
 #include <asm/kvm_mmu.h>
+#include <trace/events/kvm.h>
+#include <asm/kvm.h>
+#include <kvm/iodev.h>
 
 /*
  * How the whole thing works (courtesy of Christoffer Dall):
@@ -263,6 +266,13 @@ static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq)
        return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq);
 }
 
+static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+       return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq);
+}
+
 static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq)
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -277,6 +287,20 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq)
        vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0);
 }
 
+static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+       vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1);
+}
+
+static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+       vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0);
+}
+
 static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq)
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -520,6 +544,44 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
        return false;
 }
 
+bool vgic_handle_set_active_reg(struct kvm *kvm,
+                               struct kvm_exit_mmio *mmio,
+                               phys_addr_t offset, int vcpu_id)
+{
+       u32 *reg;
+       struct vgic_dist *dist = &kvm->arch.vgic;
+
+       reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+       vgic_reg_access(mmio, reg, offset,
+                       ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT);
+
+       if (mmio->is_write) {
+               vgic_update_state(kvm);
+               return true;
+       }
+
+       return false;
+}
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+                                 struct kvm_exit_mmio *mmio,
+                                 phys_addr_t offset, int vcpu_id)
+{
+       u32 *reg;
+       struct vgic_dist *dist = &kvm->arch.vgic;
+
+       reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset);
+       vgic_reg_access(mmio, reg, offset,
+                       ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
+
+       if (mmio->is_write) {
+               vgic_update_state(kvm);
+               return true;
+       }
+
+       return false;
+}
+
 static u32 vgic_cfg_expand(u16 val)
 {
        u32 res = 0;
@@ -588,16 +650,12 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
 }
 
 /**
- * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor
  * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
  *
- * Move any pending IRQs that have already been assigned to LRs back to the
+ * Move any IRQs that have already been assigned to LRs back to the
  * emulated distributor state so that the complete emulated state can be read
  * from the main emulation structures without investigating the LRs.
- *
- * Note that IRQs in the active state in the LRs get their pending state moved
- * to the distributor but the active state stays in the LRs, because we don't
- * track the active state on the distributor side.
  */
 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 {
@@ -613,12 +671,22 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
                 * 01: pending
                 * 10: active
                 * 11: pending and active
-                *
-                * If the LR holds only an active interrupt (not pending) then
-                * just leave it alone.
                 */
-               if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE)
-                       continue;
+               BUG_ON(!(lr.state & LR_STATE_MASK));
+
+               /* Reestablish SGI source for pending and active IRQs */
+               if (lr.irq < VGIC_NR_SGIS)
+                       add_sgi_source(vcpu, lr.irq, lr.source);
+
+               /*
+                * If the LR holds an active (10) or a pending and active (11)
+                * interrupt then move the active state to the
+                * distributor tracking bit.
+                */
+               if (lr.state & LR_STATE_ACTIVE) {
+                       vgic_irq_set_active(vcpu, lr.irq);
+                       lr.state &= ~LR_STATE_ACTIVE;
+               }
 
                /*
                 * Reestablish the pending state on the distributor and the
@@ -626,21 +694,19 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
                 * is fine, then we are only setting a few bits that were
                 * already set.
                 */
-               vgic_dist_irq_set_pending(vcpu, lr.irq);
-               if (lr.irq < VGIC_NR_SGIS)
-                       add_sgi_source(vcpu, lr.irq, lr.source);
-               lr.state &= ~LR_STATE_PENDING;
+               if (lr.state & LR_STATE_PENDING) {
+                       vgic_dist_irq_set_pending(vcpu, lr.irq);
+                       lr.state &= ~LR_STATE_PENDING;
+               }
+
                vgic_set_lr(vcpu, i, lr);
 
                /*
-                * If there's no state left on the LR (it could still be
-                * active), then the LR does not hold any useful info and can
-                * be marked as free for other use.
+                * Mark the LR as free for other use.
                 */
-               if (!(lr.state & LR_STATE_MASK)) {
-                       vgic_retire_lr(i, lr.irq, vcpu);
-                       vgic_irq_clear_queued(vcpu, lr.irq);
-               }
+               BUG_ON(lr.state & LR_STATE_MASK);
+               vgic_retire_lr(i, lr.irq, vcpu);
+               vgic_irq_clear_queued(vcpu, lr.irq);
 
                /* Finally update the VGIC state. */
                vgic_update_state(vcpu->kvm);
@@ -648,24 +714,21 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 }
 
 const
-struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
-                                      struct kvm_exit_mmio *mmio,
-                                      phys_addr_t offset)
-{
-       const struct kvm_mmio_range *r = ranges;
-
-       while (r->len) {
-               if (offset >= r->base &&
-                   (offset + mmio->len) <= (r->base + r->len))
-                       return r;
-               r++;
+struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
+                                     int len, gpa_t offset)
+{
+       while (ranges->len) {
+               if (offset >= ranges->base &&
+                   (offset + len) <= (ranges->base + ranges->len))
+                       return ranges;
+               ranges++;
        }
 
        return NULL;
 }
 
 static bool vgic_validate_access(const struct vgic_dist *dist,
-                                const struct kvm_mmio_range *range,
+                                const struct vgic_io_range *range,
                                 unsigned long offset)
 {
        int irq;
@@ -693,9 +756,8 @@ static bool vgic_validate_access(const struct vgic_dist *dist,
 static bool call_range_handler(struct kvm_vcpu *vcpu,
                               struct kvm_exit_mmio *mmio,
                               unsigned long offset,
-                              const struct kvm_mmio_range *range)
+                              const struct vgic_io_range *range)
 {
-       u32 *data32 = (void *)mmio->data;
        struct kvm_exit_mmio mmio32;
        bool ret;
 
@@ -712,91 +774,142 @@ static bool call_range_handler(struct kvm_vcpu *vcpu,
        mmio32.private = mmio->private;
 
        mmio32.phys_addr = mmio->phys_addr + 4;
-       if (mmio->is_write)
-               *(u32 *)mmio32.data = data32[1];
+       mmio32.data = &((u32 *)mmio->data)[1];
        ret = range->handle_mmio(vcpu, &mmio32, offset + 4);
-       if (!mmio->is_write)
-               data32[1] = *(u32 *)mmio32.data;
 
        mmio32.phys_addr = mmio->phys_addr;
-       if (mmio->is_write)
-               *(u32 *)mmio32.data = data32[0];
+       mmio32.data = &((u32 *)mmio->data)[0];
        ret |= range->handle_mmio(vcpu, &mmio32, offset);
-       if (!mmio->is_write)
-               data32[0] = *(u32 *)mmio32.data;
 
        return ret;
 }
 
 /**
- * vgic_handle_mmio_range - handle an in-kernel MMIO access
+ * vgic_handle_mmio_access - handle an in-kernel MMIO access
+ * This is called by the read/write KVM IO device wrappers below.
  * @vcpu:      pointer to the vcpu performing the access
- * @run:       pointer to the kvm_run structure
- * @mmio:      pointer to the data describing the access
- * @ranges:    array of MMIO ranges in a given region
- * @mmio_base: base address of that region
+ * @this:      pointer to the KVM IO device in charge
+ * @addr:      guest physical address of the access
+ * @len:       size of the access
+ * @val:       pointer to the data region
+ * @is_write:  read or write access
  *
  * returns true if the MMIO access could be performed
  */
-bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                           struct kvm_exit_mmio *mmio,
-                           const struct kvm_mmio_range *ranges,
-                           unsigned long mmio_base)
+static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
+                                  struct kvm_io_device *this, gpa_t addr,
+                                  int len, void *val, bool is_write)
 {
-       const struct kvm_mmio_range *range;
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       struct vgic_io_device *iodev = container_of(this,
+                                                   struct vgic_io_device, dev);
+       struct kvm_run *run = vcpu->run;
+       const struct vgic_io_range *range;
+       struct kvm_exit_mmio mmio;
        bool updated_state;
-       unsigned long offset;
+       gpa_t offset;
 
-       offset = mmio->phys_addr - mmio_base;
-       range = vgic_find_range(ranges, mmio, offset);
+       offset = addr - iodev->addr;
+       range = vgic_find_range(iodev->reg_ranges, len, offset);
        if (unlikely(!range || !range->handle_mmio)) {
-               pr_warn("Unhandled access %d %08llx %d\n",
-                       mmio->is_write, mmio->phys_addr, mmio->len);
-               return false;
+               pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len);
+               return -ENXIO;
        }
 
-       spin_lock(&vcpu->kvm->arch.vgic.lock);
+       mmio.phys_addr = addr;
+       mmio.len = len;
+       mmio.is_write = is_write;
+       mmio.data = val;
+       mmio.private = iodev->redist_vcpu;
+
+       spin_lock(&dist->lock);
        offset -= range->base;
        if (vgic_validate_access(dist, range, offset)) {
-               updated_state = call_range_handler(vcpu, mmio, offset, range);
+               updated_state = call_range_handler(vcpu, &mmio, offset, range);
        } else {
-               if (!mmio->is_write)
-                       memset(mmio->data, 0, mmio->len);
+               if (!is_write)
+                       memset(val, 0, len);
                updated_state = false;
        }
-       spin_unlock(&vcpu->kvm->arch.vgic.lock);
-       kvm_prepare_mmio(run, mmio);
+       spin_unlock(&dist->lock);
+       run->mmio.is_write      = is_write;
+       run->mmio.len           = len;
+       run->mmio.phys_addr     = addr;
+       memcpy(run->mmio.data, val, len);
+
        kvm_handle_mmio_return(vcpu, run);
 
        if (updated_state)
                vgic_kick_vcpus(vcpu->kvm);
 
-       return true;
+       return 0;
 }
 
+static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu,
+                                struct kvm_io_device *this,
+                                gpa_t addr, int len, void *val)
+{
+       return vgic_handle_mmio_access(vcpu, this, addr, len, val, false);
+}
+
+static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
+                                 struct kvm_io_device *this,
+                                 gpa_t addr, int len, const void *val)
+{
+       return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val,
+                                      true);
+}
+
+struct kvm_io_device_ops vgic_io_ops = {
+       .read   = vgic_handle_mmio_read,
+       .write  = vgic_handle_mmio_write,
+};
+
 /**
- * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation
- * @vcpu:      pointer to the vcpu performing the access
- * @run:       pointer to the kvm_run structure
- * @mmio:      pointer to the data describing the access
+ * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus
+ * @kvm:            The VM structure pointer
+ * @base:           The (guest) base address for the register frame
+ * @len:            Length of the register frame window
+ * @ranges:         Describing the handler functions for each register
+ * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call
+ * @iodev:          Points to memory to be passed on to the handler
  *
- * returns true if the MMIO access has been performed in kernel space,
- * and false if it needs to be emulated in user space.
- * Calls the actual handling routine for the selected VGIC model.
+ * @iodev stores the parameters of this function to be usable by the handler
+ * respectively the dispatcher function (since the KVM I/O bus framework lacks
+ * an opaque parameter). Initialization is done in this function, but the
+ * reference should be valid and unique for the whole VGIC lifetime.
+ * If the register frame is not mapped for a specific VCPU, pass -1 to
+ * @redist_vcpu_id.
  */
-bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                     struct kvm_exit_mmio *mmio)
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+                            const struct vgic_io_range *ranges,
+                            int redist_vcpu_id,
+                            struct vgic_io_device *iodev)
 {
-       if (!irqchip_in_kernel(vcpu->kvm))
-               return false;
+       struct kvm_vcpu *vcpu = NULL;
+       int ret;
 
-       /*
-        * This will currently call either vgic_v2_handle_mmio() or
-        * vgic_v3_handle_mmio(), which in turn will call
-        * vgic_handle_mmio_range() defined above.
-        */
-       return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio);
+       if (redist_vcpu_id >= 0)
+               vcpu = kvm_get_vcpu(kvm, redist_vcpu_id);
+
+       iodev->addr             = base;
+       iodev->len              = len;
+       iodev->reg_ranges       = ranges;
+       iodev->redist_vcpu      = vcpu;
+
+       kvm_iodevice_init(&iodev->dev, &vgic_io_ops);
+
+       mutex_lock(&kvm->slots_lock);
+
+       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len,
+                                     &iodev->dev);
+       mutex_unlock(&kvm->slots_lock);
+
+       /* Mark the iodev as invalid if registration fails. */
+       if (ret)
+               iodev->dev.ops = NULL;
+
+       return ret;
 }
 
 static int vgic_nr_shared_irqs(struct vgic_dist *dist)
@@ -804,6 +917,36 @@ static int vgic_nr_shared_irqs(struct vgic_dist *dist)
        return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS;
 }
 
+static int compute_active_for_cpu(struct kvm_vcpu *vcpu)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       unsigned long *active, *enabled, *act_percpu, *act_shared;
+       unsigned long active_private, active_shared;
+       int nr_shared = vgic_nr_shared_irqs(dist);
+       int vcpu_id;
+
+       vcpu_id = vcpu->vcpu_id;
+       act_percpu = vcpu->arch.vgic_cpu.active_percpu;
+       act_shared = vcpu->arch.vgic_cpu.active_shared;
+
+       active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id);
+       enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
+       bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS);
+
+       active = vgic_bitmap_get_shared_map(&dist->irq_active);
+       enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled);
+       bitmap_and(act_shared, active, enabled, nr_shared);
+       bitmap_and(act_shared, act_shared,
+                  vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]),
+                  nr_shared);
+
+       active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS);
+       active_shared = find_first_bit(act_shared, nr_shared);
+
+       return (active_private < VGIC_NR_PRIVATE_IRQS ||
+               active_shared < nr_shared);
+}
+
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 {
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
@@ -835,7 +978,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
 
 /*
  * Update the interrupt state and determine which CPUs have pending
- * interrupts. Must be called with distributor lock held.
+ * or active interrupts. Must be called with distributor lock held.
  */
 void vgic_update_state(struct kvm *kvm)
 {
@@ -849,10 +992,13 @@ void vgic_update_state(struct kvm *kvm)
        }
 
        kvm_for_each_vcpu(c, vcpu, kvm) {
-               if (compute_pending_for_cpu(vcpu)) {
-                       pr_debug("CPU%d has pending interrupts\n", c);
+               if (compute_pending_for_cpu(vcpu))
                        set_bit(c, dist->irq_pending_on_cpu);
-               }
+
+               if (compute_active_for_cpu(vcpu))
+                       set_bit(c, dist->irq_active_on_cpu);
+               else
+                       clear_bit(c, dist->irq_active_on_cpu);
        }
 }
 
@@ -883,6 +1029,11 @@ static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu)
        return vgic_ops->get_eisr(vcpu);
 }
 
+static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu)
+{
+       vgic_ops->clear_eisr(vcpu);
+}
+
 static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu)
 {
        return vgic_ops->get_interrupt_status(vcpu);
@@ -922,6 +1073,7 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
        vgic_set_lr(vcpu, lr_nr, vlr);
        clear_bit(lr_nr, vgic_cpu->lr_used);
        vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+       vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
 }
 
 /*
@@ -949,6 +1101,26 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
        }
 }
 
+static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
+                                int lr_nr, struct vgic_lr vlr)
+{
+       if (vgic_irq_is_active(vcpu, irq)) {
+               vlr.state |= LR_STATE_ACTIVE;
+               kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
+               vgic_irq_clear_active(vcpu, irq);
+               vgic_update_state(vcpu->kvm);
+       } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
+               vlr.state |= LR_STATE_PENDING;
+               kvm_debug("Set pending: 0x%x\n", vlr.state);
+       }
+
+       if (!vgic_irq_is_edge(vcpu, irq))
+               vlr.state |= LR_EOI_INT;
+
+       vgic_set_lr(vcpu, lr_nr, vlr);
+       vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
+}
+
 /*
  * Queue an interrupt to a CPU virtual interface. Return true on success,
  * or false if it wasn't possible to queue it.
@@ -976,8 +1148,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
                if (vlr.source == sgi_source_id) {
                        kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
                        BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
-                       vlr.state |= LR_STATE_PENDING;
-                       vgic_set_lr(vcpu, lr, vlr);
+                       vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
                        return true;
                }
        }
@@ -994,11 +1165,8 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
        vlr.irq = irq;
        vlr.source = sgi_source_id;
-       vlr.state = LR_STATE_PENDING;
-       if (!vgic_irq_is_edge(vcpu, irq))
-               vlr.state |= LR_EOI_INT;
-
-       vgic_set_lr(vcpu, lr, vlr);
+       vlr.state = 0;
+       vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
 
        return true;
 }
@@ -1030,39 +1198,49 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
        struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+       unsigned long *pa_percpu, *pa_shared;
        int i, vcpu_id;
        int overflow = 0;
+       int nr_shared = vgic_nr_shared_irqs(dist);
 
        vcpu_id = vcpu->vcpu_id;
 
+       pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu;
+       pa_shared = vcpu->arch.vgic_cpu.pend_act_shared;
+
+       bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu,
+                 VGIC_NR_PRIVATE_IRQS);
+       bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared,
+                 nr_shared);
        /*
         * We may not have any pending interrupt, or the interrupts
         * may have been serviced from another vcpu. In all cases,
         * move along.
         */
-       if (!kvm_vgic_vcpu_pending_irq(vcpu)) {
-               pr_debug("CPU%d has no pending interrupt\n", vcpu_id);
+       if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu))
                goto epilog;
-       }
 
        /* SGIs */
-       for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) {
+       for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) {
                if (!queue_sgi(vcpu, i))
                        overflow = 1;
        }
 
        /* PPIs */
-       for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) {
+       for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) {
                if (!vgic_queue_hwirq(vcpu, i))
                        overflow = 1;
        }
 
        /* SPIs */
-       for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) {
+       for_each_set_bit(i, pa_shared, nr_shared) {
                if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS))
                        overflow = 1;
        }
 
+
+
+
 epilog:
        if (overflow) {
                vgic_enable_underflow(vcpu);
@@ -1081,7 +1259,9 @@ epilog:
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
        u32 status = vgic_get_interrupt_status(vcpu);
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
        bool level_pending = false;
+       struct kvm *kvm = vcpu->kvm;
 
        kvm_debug("STATUS = %08x\n", status);
 
@@ -1098,6 +1278,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
                        struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
                        WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 
+                       spin_lock(&dist->lock);
                        vgic_irq_clear_queued(vcpu, vlr.irq);
                        WARN_ON(vlr.state & LR_STATE_MASK);
                        vlr.state = 0;
@@ -1116,6 +1297,17 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
                         */
                        vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
 
+                       /*
+                        * kvm_notify_acked_irq calls kvm_set_irq()
+                        * to reset the IRQ level. Need to release the
+                        * lock for kvm_set_irq to grab it.
+                        */
+                       spin_unlock(&dist->lock);
+
+                       kvm_notify_acked_irq(kvm, 0,
+                                            vlr.irq - VGIC_NR_PRIVATE_IRQS);
+                       spin_lock(&dist->lock);
+
                        /* Any additional pending interrupt? */
                        if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
                                vgic_cpu_irq_set(vcpu, vlr.irq);
@@ -1125,6 +1317,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
                                vgic_cpu_irq_clear(vcpu, vlr.irq);
                        }
 
+                       spin_unlock(&dist->lock);
+
                        /*
                         * Despite being EOIed, the LR may not have
                         * been marked as empty.
@@ -1136,13 +1330,18 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
        if (status & INT_STATUS_UNDERFLOW)
                vgic_disable_underflow(vcpu);
 
+       /*
+        * In the next iterations of the vcpu loop, if we sync the vgic state
+        * after flushing it, but before entering the guest (this happens for
+        * pending signals and vmid rollovers), then make sure we don't pick
+        * up any old maintenance interrupts here.
+        */
+       vgic_clear_eisr(vcpu);
+
        return level_pending;
 }
 
-/*
- * Sync back the VGIC state after a guest run. The distributor lock is
- * needed so we don't get preempted in the middle of the state processing.
- */
+/* Sync back the VGIC state after a guest run */
 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1189,14 +1388,10 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 
 void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-
        if (!irqchip_in_kernel(vcpu->kvm))
                return;
 
-       spin_lock(&dist->lock);
        __kvm_vgic_sync_hwstate(vcpu);
-       spin_unlock(&dist->lock);
 }
 
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
@@ -1209,6 +1404,17 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
        return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
 }
 
+int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu)
+{
+       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+
+       if (!irqchip_in_kernel(vcpu->kvm))
+               return 0;
+
+       return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
+}
+
+
 void vgic_kick_vcpus(struct kvm *kvm)
 {
        struct kvm_vcpu *vcpu;
@@ -1381,8 +1587,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
        struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 
        kfree(vgic_cpu->pending_shared);
+       kfree(vgic_cpu->active_shared);
+       kfree(vgic_cpu->pend_act_shared);
        kfree(vgic_cpu->vgic_irq_lr_map);
        vgic_cpu->pending_shared = NULL;
+       vgic_cpu->active_shared = NULL;
+       vgic_cpu->pend_act_shared = NULL;
        vgic_cpu->vgic_irq_lr_map = NULL;
 }
 
@@ -1392,9 +1602,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 
        int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
        vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
+       vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
+       vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
        vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
 
-       if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
+       if (!vgic_cpu->pending_shared
+               || !vgic_cpu->active_shared
+               || !vgic_cpu->pend_act_shared
+               || !vgic_cpu->vgic_irq_lr_map) {
                kvm_vgic_vcpu_destroy(vcpu);
                return -ENOMEM;
        }
@@ -1447,10 +1662,12 @@ void kvm_vgic_destroy(struct kvm *kvm)
        kfree(dist->irq_spi_mpidr);
        kfree(dist->irq_spi_target);
        kfree(dist->irq_pending_on_cpu);
+       kfree(dist->irq_active_on_cpu);
        dist->irq_sgi_sources = NULL;
        dist->irq_spi_cpu = NULL;
        dist->irq_spi_target = NULL;
        dist->irq_pending_on_cpu = NULL;
+       dist->irq_active_on_cpu = NULL;
        dist->nr_cpus = 0;
 }
 
@@ -1486,6 +1703,7 @@ int vgic_init(struct kvm *kvm)
        ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs);
        ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs);
        ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs);
+       ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs);
        ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs);
        ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs);
 
@@ -1498,10 +1716,13 @@ int vgic_init(struct kvm *kvm)
                                       GFP_KERNEL);
        dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
                                           GFP_KERNEL);
+       dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long),
+                                          GFP_KERNEL);
        if (!dist->irq_sgi_sources ||
            !dist->irq_spi_cpu ||
            !dist->irq_spi_target ||
-           !dist->irq_pending_on_cpu) {
+           !dist->irq_pending_on_cpu ||
+           !dist->irq_active_on_cpu) {
                ret = -ENOMEM;
                goto out;
        }
@@ -1583,8 +1804,10 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
         * emulation. So check this here again. KVM_CREATE_DEVICE does
         * the proper checks already.
         */
-       if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2)
-               return -ENODEV;
+       if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) {
+               ret = -ENODEV;
+               goto out;
+       }
 
        /*
         * Any time a vcpu is run, vcpu_load is called which tries to grab the
@@ -1827,12 +2050,9 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
        return r;
 }
 
-int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset)
+int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset)
 {
-       struct kvm_exit_mmio dev_attr_mmio;
-
-       dev_attr_mmio.len = 4;
-       if (vgic_find_range(ranges, &dev_attr_mmio, offset))
+       if (vgic_find_range(ranges, 4, offset))
                return 0;
        else
                return -ENXIO;
@@ -1865,8 +2085,10 @@ static struct notifier_block vgic_cpu_nb = {
 };
 
 static const struct of_device_id vgic_ids[] = {
-       { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
-       { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+       { .compatible = "arm,cortex-a15-gic",   .data = vgic_v2_probe, },
+       { .compatible = "arm,cortex-a7-gic",    .data = vgic_v2_probe, },
+       { .compatible = "arm,gic-400",          .data = vgic_v2_probe, },
+       { .compatible = "arm,gic-v3",           .data = vgic_v3_probe, },
        {},
 };
 
@@ -1914,3 +2136,38 @@ out_free_irq:
        free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
        return ret;
 }
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+                   struct kvm_kernel_irq_routing_entry *entries,
+                   int gsi)
+{
+       return gsi;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
+       return pin;
+}
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id,
+               u32 irq, int level, bool line_status)
+{
+       unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
+
+       trace_kvm_set_irq(irq, level, irq_source_id);
+
+       BUG_ON(!vgic_initialized(kvm));
+
+       if (spi > kvm->arch.vgic.nr_irqs)
+               return -EINVAL;
+       return kvm_vgic_inject_irq(kvm, 0, spi, level);
+
+}
+
+/* MSI not implemented yet */
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+               struct kvm *kvm, int irq_source_id,
+               int level, bool line_status)
+{
+       return 0;
+}
index 1e83bdf..0df74cb 100644 (file)
@@ -20,6 +20,8 @@
 #ifndef __KVM_VGIC_H__
 #define __KVM_VGIC_H__
 
+#include <kvm/iodev.h>
+
 #define VGIC_ADDR_UNDEF                (-1)
 #define IS_VGIC_ADDR_UNDEF(_x)  ((_x) == VGIC_ADDR_UNDEF)
 
@@ -57,6 +59,14 @@ void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
 bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq);
 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu);
 
+struct kvm_exit_mmio {
+       phys_addr_t     phys_addr;
+       void            *data;
+       u32             len;
+       bool            is_write;
+       void            *private;
+};
+
 void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg,
                     phys_addr_t offset, int mode);
 bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio,
@@ -74,7 +84,7 @@ void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value)
        *((u32 *)mmio->data) = cpu_to_le32(value) & mask;
 }
 
-struct kvm_mmio_range {
+struct vgic_io_range {
        phys_addr_t base;
        unsigned long len;
        int bits_per_irq;
@@ -82,6 +92,11 @@ struct kvm_mmio_range {
                            phys_addr_t offset);
 };
 
+int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len,
+                            const struct vgic_io_range *ranges,
+                            int redist_id,
+                            struct vgic_io_device *iodev);
+
 static inline bool is_in_range(phys_addr_t addr, unsigned long len,
                               phys_addr_t baseaddr, unsigned long size)
 {
@@ -89,14 +104,8 @@ static inline bool is_in_range(phys_addr_t addr, unsigned long len,
 }
 
 const
-struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges,
-                                      struct kvm_exit_mmio *mmio,
-                                      phys_addr_t offset);
-
-bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run,
-                           struct kvm_exit_mmio *mmio,
-                           const struct kvm_mmio_range *ranges,
-                           unsigned long mmio_base);
+struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges,
+                                     int len, gpa_t offset);
 
 bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
                            phys_addr_t offset, int vcpu_id, int access);
@@ -107,12 +116,20 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
 bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio,
                                   phys_addr_t offset, int vcpu_id);
 
+bool vgic_handle_set_active_reg(struct kvm *kvm,
+                               struct kvm_exit_mmio *mmio,
+                               phys_addr_t offset, int vcpu_id);
+
+bool vgic_handle_clear_active_reg(struct kvm *kvm,
+                                 struct kvm_exit_mmio *mmio,
+                                 phys_addr_t offset, int vcpu_id);
+
 bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
                         phys_addr_t offset);
 
 void vgic_kick_vcpus(struct kvm *kvm);
 
-int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset);
+int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset);
 int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
 int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr);
 
index 00d8642..571c1ce 100644 (file)
@@ -8,7 +8,7 @@
  *
  */
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 #include <linux/slab.h>
@@ -60,8 +60,9 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
        return 1;
 }
 
-static int coalesced_mmio_write(struct kvm_io_device *this,
-                               gpa_t addr, int len, const void *val)
+static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
+                               struct kvm_io_device *this, gpa_t addr,
+                               int len, const void *val)
 {
        struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
        struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
index 148b239..9ff4193 100644 (file)
@@ -36,7 +36,7 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
 /*
@@ -311,6 +311,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
        unsigned int events;
        int idx;
 
+       if (!kvm_arch_intc_initialized(kvm))
+               return -EAGAIN;
+
        irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
        if (!irqfd)
                return -ENOMEM;
@@ -712,8 +715,8 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
 
 /* MMIO/PIO writes trigger an event if the addr/val match */
 static int
-ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len,
-               const void *val)
+ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
+               int len, const void *val)
 {
        struct _ioeventfd *p = to_ioeventfd(this);
 
index 7f256f3..1d56a90 100644 (file)
@@ -105,7 +105,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
        i = kvm_irq_map_gsi(kvm, irq_set, irq);
        srcu_read_unlock(&kvm->irq_srcu, idx);
 
-       while(i--) {
+       while (i--) {
                int r;
                r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level,
                                   line_status);
index a109370..d3fc939 100644 (file)
@@ -16,7 +16,7 @@
  *
  */
 
-#include "iodev.h"
+#include <kvm/iodev.h>
 
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
 
-unsigned int halt_poll_ns = 0;
+static unsigned int halt_poll_ns;
 module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
 
 /*
  * Ordering of locks:
  *
- *             kvm->lock --> kvm->slots_lock --> kvm->irq_lock
+ *     kvm->lock --> kvm->slots_lock --> kvm->irq_lock
  */
 
 DEFINE_SPINLOCK(kvm_lock);
@@ -80,7 +80,7 @@ static DEFINE_RAW_SPINLOCK(kvm_count_lock);
 LIST_HEAD(vm_list);
 
 static cpumask_var_t cpus_hardware_enabled;
-static int kvm_usage_count = 0;
+static int kvm_usage_count;
 static atomic_t hardware_enable_failed;
 
 struct kmem_cache *kvm_vcpu_cache;
@@ -471,7 +471,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
        BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
 
        r = -ENOMEM;
-       kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+       kvm->memslots = kvm_kvzalloc(sizeof(struct kvm_memslots));
        if (!kvm->memslots)
                goto out_err_no_srcu;
 
@@ -522,7 +522,7 @@ out_err_no_srcu:
 out_err_no_disable:
        for (i = 0; i < KVM_NR_BUSES; i++)
                kfree(kvm->buses[i]);
-       kfree(kvm->memslots);
+       kvfree(kvm->memslots);
        kvm_arch_free_vm(kvm);
        return ERR_PTR(r);
 }
@@ -539,20 +539,12 @@ void *kvm_kvzalloc(unsigned long size)
                return kzalloc(size, GFP_KERNEL);
 }
 
-void kvm_kvfree(const void *addr)
-{
-       if (is_vmalloc_addr(addr))
-               vfree(addr);
-       else
-               kfree(addr);
-}
-
 static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 {
        if (!memslot->dirty_bitmap)
                return;
 
-       kvm_kvfree(memslot->dirty_bitmap);
+       kvfree(memslot->dirty_bitmap);
        memslot->dirty_bitmap = NULL;
 }
 
@@ -578,7 +570,7 @@ static void kvm_free_physmem(struct kvm *kvm)
        kvm_for_each_memslot(memslot, slots)
                kvm_free_physmem_slot(kvm, memslot, NULL);
 
-       kfree(kvm->memslots);
+       kvfree(kvm->memslots);
 }
 
 static void kvm_destroy_devices(struct kvm *kvm)
@@ -871,10 +863,10 @@ int __kvm_set_memory_region(struct kvm *kvm,
                        goto out_free;
        }
 
-       slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-                       GFP_KERNEL);
+       slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
        if (!slots)
                goto out_free;
+       memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
 
        if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
                slot = id_to_memslot(slots, mem->slot);
@@ -888,8 +880,8 @@ int __kvm_set_memory_region(struct kvm *kvm,
                 * or moved, memslot will be created.
                 *
                 * validation of sp->gfn happens in:
-                *      - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
-                *      - kvm_is_visible_gfn (mmu_check_roots)
+                *      - gfn_to_hva (kvm_read_guest, gfn_to_pfn)
+                *      - kvm_is_visible_gfn (mmu_check_roots)
                 */
                kvm_arch_flush_shadow_memslot(kvm, slot);
 
@@ -917,7 +909,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
        kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
        kvm_free_physmem_slot(kvm, &old, &new);
-       kfree(old_memslots);
+       kvfree(old_memslots);
 
        /*
         * IOMMU mapping:  New slots need to be mapped.  Old slots need to be
@@ -936,7 +928,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
        return 0;
 
 out_slots:
-       kfree(slots);
+       kvfree(slots);
 out_free:
        kvm_free_physmem_slot(kvm, &new, &old);
 out:
@@ -1061,9 +1053,11 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
                mask = xchg(&dirty_bitmap[i], 0);
                dirty_bitmap_buffer[i] = mask;
 
-               offset = i * BITS_PER_LONG;
-               kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset,
-                                                               mask);
+               if (mask) {
+                       offset = i * BITS_PER_LONG;
+                       kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
+                                                               offset, mask);
+               }
        }
 
        spin_unlock(&kvm->mmu_lock);
@@ -1193,16 +1187,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
        return gfn_to_hva_memslot_prot(slot, gfn, writable);
 }
 
-static int kvm_read_hva(void *data, void __user *hva, int len)
-{
-       return __copy_from_user(data, hva, len);
-}
-
-static int kvm_read_hva_atomic(void *data, void __user *hva, int len)
-{
-       return __copy_from_user_inatomic(data, hva, len);
-}
-
 static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
        unsigned long start, int write, struct page **page)
 {
@@ -1481,7 +1465,6 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 
        return kvm_pfn_to_page(pfn);
 }
-
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
 void kvm_release_page_clean(struct page *page)
@@ -1517,6 +1500,7 @@ void kvm_set_pfn_dirty(pfn_t pfn)
 {
        if (!kvm_is_reserved_pfn(pfn)) {
                struct page *page = pfn_to_page(pfn);
+
                if (!PageReserved(page))
                        SetPageDirty(page);
        }
@@ -1554,7 +1538,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
        addr = gfn_to_hva_prot(kvm, gfn, NULL);
        if (kvm_is_error_hva(addr))
                return -EFAULT;
-       r = kvm_read_hva(data, (void __user *)addr + offset, len);
+       r = __copy_from_user(data, (void __user *)addr + offset, len);
        if (r)
                return -EFAULT;
        return 0;
@@ -1593,7 +1577,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
        if (kvm_is_error_hva(addr))
                return -EFAULT;
        pagefault_disable();
-       r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len);
+       r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len);
        pagefault_enable();
        if (r)
                return -EFAULT;
@@ -1653,8 +1637,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
        ghc->generation = slots->generation;
        ghc->len = len;
        ghc->memslot = gfn_to_memslot(kvm, start_gfn);
-       ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail);
-       if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) {
+       ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL);
+       if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) {
                ghc->hva += offset;
        } else {
                /*
@@ -1742,7 +1726,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len)
        int offset = offset_in_page(gpa);
        int ret;
 
-        while ((seg = next_segment(len, offset)) != 0) {
+       while ((seg = next_segment(len, offset)) != 0) {
                ret = kvm_clear_guest_page(kvm, gfn, offset, seg);
                if (ret < 0)
                        return ret;
@@ -1800,6 +1784,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
        start = cur = ktime_get();
        if (halt_poll_ns) {
                ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
+
                do {
                        /*
                         * This sets KVM_REQ_UNHALT if an interrupt
@@ -2118,7 +2103,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
         * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
         * so vcpu_load() would break it.
         */
-       if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
+       if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
                return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
 #endif
 
@@ -2135,6 +2120,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
                        /* The thread running this VCPU changed. */
                        struct pid *oldpid = vcpu->pid;
                        struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+
                        rcu_assign_pointer(vcpu->pid, newpid);
                        if (oldpid)
                                synchronize_rcu();
@@ -2205,7 +2191,7 @@ out_free1:
                if (r)
                        goto out;
                r = -EFAULT;
-               if (copy_to_user(argp, &mp_state, sizeof mp_state))
+               if (copy_to_user(argp, &mp_state, sizeof(mp_state)))
                        goto out;
                r = 0;
                break;
@@ -2214,7 +2200,7 @@ out_free1:
                struct kvm_mp_state mp_state;
 
                r = -EFAULT;
-               if (copy_from_user(&mp_state, argp, sizeof mp_state))
+               if (copy_from_user(&mp_state, argp, sizeof(mp_state)))
                        goto out;
                r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state);
                break;
@@ -2223,13 +2209,13 @@ out_free1:
                struct kvm_translation tr;
 
                r = -EFAULT;
-               if (copy_from_user(&tr, argp, sizeof tr))
+               if (copy_from_user(&tr, argp, sizeof(tr)))
                        goto out;
                r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
                if (r)
                        goto out;
                r = -EFAULT;
-               if (copy_to_user(argp, &tr, sizeof tr))
+               if (copy_to_user(argp, &tr, sizeof(tr)))
                        goto out;
                r = 0;
                break;
@@ -2238,7 +2224,7 @@ out_free1:
                struct kvm_guest_debug dbg;
 
                r = -EFAULT;
-               if (copy_from_user(&dbg, argp, sizeof dbg))
+               if (copy_from_user(&dbg, argp, sizeof(dbg)))
                        goto out;
                r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg);
                break;
@@ -2252,14 +2238,14 @@ out_free1:
                if (argp) {
                        r = -EFAULT;
                        if (copy_from_user(&kvm_sigmask, argp,
-                                          sizeof kvm_sigmask))
+                                          sizeof(kvm_sigmask)))
                                goto out;
                        r = -EINVAL;
-                       if (kvm_sigmask.len != sizeof sigset)
+                       if (kvm_sigmask.len != sizeof(sigset))
                                goto out;
                        r = -EFAULT;
                        if (copy_from_user(&sigset, sigmask_arg->sigset,
-                                          sizeof sigset))
+                                          sizeof(sigset)))
                                goto out;
                        p = &sigset;
                }
@@ -2321,14 +2307,14 @@ static long kvm_vcpu_compat_ioctl(struct file *filp,
                if (argp) {
                        r = -EFAULT;
                        if (copy_from_user(&kvm_sigmask, argp,
-                                          sizeof kvm_sigmask))
+                                          sizeof(kvm_sigmask)))
                                goto out;
                        r = -EINVAL;
-                       if (kvm_sigmask.len != sizeof csigset)
+                       if (kvm_sigmask.len != sizeof(csigset))
                                goto out;
                        r = -EFAULT;
                        if (copy_from_user(&csigset, sigmask_arg->sigset,
-                                          sizeof csigset))
+                                          sizeof(csigset)))
                                goto out;
                        sigset_from_compat(&sigset, &csigset);
                        r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset);
@@ -2492,6 +2478,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
        case KVM_CAP_SIGNAL_MSI:
 #endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
+       case KVM_CAP_IRQFD:
        case KVM_CAP_IRQFD_RESAMPLE:
 #endif
        case KVM_CAP_CHECK_EXTENSION_VM:
@@ -2524,7 +2511,7 @@ static long kvm_vm_ioctl(struct file *filp,
 
                r = -EFAULT;
                if (copy_from_user(&kvm_userspace_mem, argp,
-                                               sizeof kvm_userspace_mem))
+                                               sizeof(kvm_userspace_mem)))
                        goto out;
 
                r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem);
@@ -2534,7 +2521,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_dirty_log log;
 
                r = -EFAULT;
-               if (copy_from_user(&log, argp, sizeof log))
+               if (copy_from_user(&log, argp, sizeof(log)))
                        goto out;
                r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
                break;
@@ -2542,16 +2529,18 @@ static long kvm_vm_ioctl(struct file *filp,
 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
        case KVM_REGISTER_COALESCED_MMIO: {
                struct kvm_coalesced_mmio_zone zone;
+
                r = -EFAULT;
-               if (copy_from_user(&zone, argp, sizeof zone))
+               if (copy_from_user(&zone, argp, sizeof(zone)))
                        goto out;
                r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone);
                break;
        }
        case KVM_UNREGISTER_COALESCED_MMIO: {
                struct kvm_coalesced_mmio_zone zone;
+
                r = -EFAULT;
-               if (copy_from_user(&zone, argp, sizeof zone))
+               if (copy_from_user(&zone, argp, sizeof(zone)))
                        goto out;
                r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone);
                break;
@@ -2561,7 +2550,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_irqfd data;
 
                r = -EFAULT;
-               if (copy_from_user(&data, argp, sizeof data))
+               if (copy_from_user(&data, argp, sizeof(data)))
                        goto out;
                r = kvm_irqfd(kvm, &data);
                break;
@@ -2570,7 +2559,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_ioeventfd data;
 
                r = -EFAULT;
-               if (copy_from_user(&data, argp, sizeof data))
+               if (copy_from_user(&data, argp, sizeof(data)))
                        goto out;
                r = kvm_ioeventfd(kvm, &data);
                break;
@@ -2591,7 +2580,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_msi msi;
 
                r = -EFAULT;
-               if (copy_from_user(&msi, argp, sizeof msi))
+               if (copy_from_user(&msi, argp, sizeof(msi)))
                        goto out;
                r = kvm_send_userspace_msi(kvm, &msi);
                break;
@@ -2603,7 +2592,7 @@ static long kvm_vm_ioctl(struct file *filp,
                struct kvm_irq_level irq_event;
 
                r = -EFAULT;
-               if (copy_from_user(&irq_event, argp, sizeof irq_event))
+               if (copy_from_user(&irq_event, argp, sizeof(irq_event)))
                        goto out;
 
                r = kvm_vm_ioctl_irq_line(kvm, &irq_event,
@@ -2613,7 +2602,7 @@ static long kvm_vm_ioctl(struct file *filp,
 
                r = -EFAULT;
                if (ioctl == KVM_IRQ_LINE_STATUS) {
-                       if (copy_to_user(argp, &irq_event, sizeof irq_event))
+                       if (copy_to_user(argp, &irq_event, sizeof(irq_event)))
                                goto out;
                }
 
@@ -2646,7 +2635,7 @@ static long kvm_vm_ioctl(struct file *filp,
                        goto out_free_irq_routing;
                r = kvm_set_irq_routing(kvm, entries, routing.nr,
                                        routing.flags);
-       out_free_irq_routing:
+out_free_irq_routing:
                vfree(entries);
                break;
        }
@@ -2821,8 +2810,7 @@ static void hardware_enable_nolock(void *junk)
        if (r) {
                cpumask_clear_cpu(cpu, cpus_hardware_enabled);
                atomic_inc(&hardware_enable_failed);
-               printk(KERN_INFO "kvm: enabling virtualization on "
-                                "CPU%d failed\n", cpu);
+               pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu);
        }
 }
 
@@ -2898,12 +2886,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
        val &= ~CPU_TASKS_FROZEN;
        switch (val) {
        case CPU_DYING:
-               printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
+               pr_info("kvm: disabling virtualization on CPU%d\n",
                       cpu);
                hardware_disable();
                break;
        case CPU_STARTING:
-               printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
+               pr_info("kvm: enabling virtualization on CPU%d\n",
                       cpu);
                hardware_enable();
                break;
@@ -2920,7 +2908,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
         *
         * And Intel TXT required VMX off for all cpu when system shutdown.
         */
-       printk(KERN_INFO "kvm: exiting hardware virtualization\n");
+       pr_info("kvm: exiting hardware virtualization\n");
        kvm_rebooting = true;
        on_each_cpu(hardware_disable_nolock, NULL, 1);
        return NOTIFY_OK;
@@ -2944,7 +2932,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 }
 
 static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
-                                 const struct kvm_io_range *r2)
+                                const struct kvm_io_range *r2)
 {
        if (r1->addr < r2->addr)
                return -1;
@@ -2997,7 +2985,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
        return off;
 }
 
-static int __kvm_io_bus_write(struct kvm_io_bus *bus,
+static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
                              struct kvm_io_range *range, const void *val)
 {
        int idx;
@@ -3008,7 +2996,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
 
        while (idx < bus->dev_count &&
                kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
-               if (!kvm_iodevice_write(bus->range[idx].dev, range->addr,
+               if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr,
                                        range->len, val))
                        return idx;
                idx++;
@@ -3018,7 +3006,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus,
 }
 
 /* kvm_io_bus_write - called under kvm->slots_lock */
-int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
                     int len, const void *val)
 {
        struct kvm_io_bus *bus;
@@ -3030,14 +3018,14 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                .len = len,
        };
 
-       bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-       r = __kvm_io_bus_write(bus, &range, val);
+       bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       r = __kvm_io_bus_write(vcpu, bus, &range, val);
        return r < 0 ? r : 0;
 }
 
 /* kvm_io_bus_write_cookie - called under kvm->slots_lock */
-int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
-                           int len, const void *val, long cookie)
+int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx,
+                           gpa_t addr, int len, const void *val, long cookie)
 {
        struct kvm_io_bus *bus;
        struct kvm_io_range range;
@@ -3047,12 +3035,12 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                .len = len,
        };
 
-       bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
+       bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
 
        /* First try the device referenced by cookie. */
        if ((cookie >= 0) && (cookie < bus->dev_count) &&
            (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0))
-               if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len,
+               if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len,
                                        val))
                        return cookie;
 
@@ -3060,11 +3048,11 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
         * cookie contained garbage; fall back to search and return the
         * correct cookie value.
         */
-       return __kvm_io_bus_write(bus, &range, val);
+       return __kvm_io_bus_write(vcpu, bus, &range, val);
 }
 
-static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
-                            void *val)
+static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus,
+                            struct kvm_io_range *range, void *val)
 {
        int idx;
 
@@ -3074,7 +3062,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
 
        while (idx < bus->dev_count &&
                kvm_io_bus_cmp(range, &bus->range[idx]) == 0) {
-               if (!kvm_iodevice_read(bus->range[idx].dev, range->addr,
+               if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr,
                                       range->len, val))
                        return idx;
                idx++;
@@ -3085,7 +3073,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
 EXPORT_SYMBOL_GPL(kvm_io_bus_write);
 
 /* kvm_io_bus_read - called under kvm->slots_lock */
-int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr,
                    int len, void *val)
 {
        struct kvm_io_bus *bus;
@@ -3097,8 +3085,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                .len = len,
        };
 
-       bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-       r = __kvm_io_bus_read(bus, &range, val);
+       bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu);
+       r = __kvm_io_bus_read(vcpu, bus, &range, val);
        return r < 0 ? r : 0;
 }
 
@@ -3268,6 +3256,7 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
 static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 {
        struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
+
        if (vcpu->preempted)
                vcpu->preempted = false;
 
@@ -3349,7 +3338,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
        r = misc_register(&kvm_dev);
        if (r) {
-               printk(KERN_ERR "kvm: misc device register failed\n");
+               pr_err("kvm: misc device register failed\n");
                goto out_unreg;
        }
 
@@ -3360,7 +3349,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 
        r = kvm_init_debug();
        if (r) {
-               printk(KERN_ERR "kvm: create debugfs files failed\n");
+               pr_err("kvm: create debugfs files failed\n");
                goto out_undebugfs;
        }