Merge branch 'for-linus' of git://git.alsa-project.org/alsa-kernel
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 14 Jul 2008 20:26:07 +0000 (13:26 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 14 Jul 2008 20:26:07 +0000 (13:26 -0700)
* 'for-linus' of git://git.alsa-project.org/alsa-kernel: (179 commits)
  ALSA: Release v1.0.17
  ALSA: correct kcalloc usage
  ALSA: ALSA driver for SGI O2 audio board
  ALSA: asoc: kbuild - only show menus for the current ASoC CPU platform.
  ALSA: ALSA driver for SGI HAL2 audio device
  ALSA: hda - Fix FSC V5505 model
  ALSA: hda - Fix missing init for unsol events on micsense model
  ALSA: hda - Fix internal mic vref pin setup
  ALSA: hda: 92hd71bxx PC Beep
  ALSA: HDA - HP dc7600 with pci sub IDs 0x103c/0x3011 belongs to hp-3013 model
  ALSA: usb-audio: add some Yamaha USB MIDI quirks
  ALSA: usb-audio: fix Yamaha KX quirk
  ALSA: ASoC: Au12x0/Au1550 PSC Audio support
  ALSA: Add Yamaha KX49 (USB MIDI controller) to usbquirks.h
  ALSA: ASoC: pxa2xx-ac97: fix warning due to missing argument in fuction declaration
  ALSA: tosa: fix compilation with new DAPM API
  ALSA: wavefront - add const
  ALSA: remove CONFIG_KMOD from sound
  ALSA: Fix a const to non-const assignment in the Digigram VXpocket sound driver
  ALSA: Fix a const pointer usage warning in the Digigram VX soundcard driver
  ...

219 files changed:
Documentation/ABI/testing/sysfs-block
Documentation/ABI/testing/sysfs-bus-css [new file with mode: 0644]
Documentation/block/data-integrity.txt [new file with mode: 0644]
Documentation/ioctl-number.txt
arch/s390/Kconfig
arch/s390/appldata/appldata.h
arch/s390/appldata/appldata_base.c
arch/s390/appldata/appldata_mem.c
arch/s390/appldata/appldata_net_sum.c
arch/s390/appldata/appldata_os.c
arch/s390/crypto/prng.c
arch/s390/hypfs/inode.c
arch/s390/kernel/Makefile
arch/s390/kernel/binfmt_elf32.c [deleted file]
arch/s390/kernel/compat_ptrace.h
arch/s390/kernel/debug.c
arch/s390/kernel/early.c
arch/s390/kernel/ipl.c
arch/s390/kernel/kprobes.c
arch/s390/kernel/machine_kexec.c
arch/s390/kernel/mem_detect.c [new file with mode: 0644]
arch/s390/kernel/process.c
arch/s390/kernel/ptrace.c
arch/s390/kernel/setup.c
arch/s390/kernel/time.c
arch/s390/kernel/topology.c
arch/s390/kernel/vtime.c
arch/s390/mm/init.c
arch/x86/kernel/traps_64.c
block/Kconfig
block/Makefile
block/as-iosched.c
block/blk-core.c
block/blk-integrity.c [new file with mode: 0644]
block/blk-map.c
block/blk-merge.c
block/blk-settings.c
block/blk.h
block/blktrace.c
block/bsg.c
block/cfq-iosched.c
block/cmd-filter.c [new file with mode: 0644]
block/elevator.c
block/genhd.c
block/scsi_ioctl.c
drivers/ata/libata-scsi.c
drivers/block/DAC960.c
drivers/block/aoe/aoecmd.c
drivers/block/paride/pt.c
drivers/block/pktcdvd.c
drivers/block/xen-blkfront.c
drivers/cdrom/cdrom.c
drivers/char/pcmcia/cm4000_cs.c
drivers/char/pcmcia/cm4040_cs.c
drivers/char/pcmcia/ipwireless/main.c
drivers/ide/legacy/ide-cs.c
drivers/md/linear.c
drivers/md/raid0.c
drivers/md/raid10.c
drivers/md/raid5.c
drivers/mtd/ftl.c
drivers/mtd/maps/pcmciamtd.c
drivers/net/xen-netfront.c
drivers/pcmcia/Kconfig
drivers/pcmcia/Makefile
drivers/pcmcia/au1000_generic.h
drivers/pcmcia/au1000_pb1x00.c
drivers/pcmcia/au1000_xxs1500.c
drivers/pcmcia/bfin_cf_pcmcia.c [new file with mode: 0644]
drivers/pcmcia/cardbus.c
drivers/pcmcia/cistpl.c
drivers/pcmcia/cs.c
drivers/pcmcia/cs_internal.h
drivers/pcmcia/ds.c
drivers/pcmcia/hd64465_ss.c
drivers/pcmcia/i82092.c
drivers/pcmcia/i82092aa.h
drivers/pcmcia/i82365.c
drivers/pcmcia/m8xx_pcmcia.c
drivers/pcmcia/pcmcia_ioctl.c
drivers/pcmcia/pcmcia_resource.c
drivers/pcmcia/pxa2xx_base.c
drivers/pcmcia/rsrc_mgr.c
drivers/pcmcia/rsrc_nonstatic.c
drivers/pcmcia/soc_common.h
drivers/pcmcia/socket_sysfs.c
drivers/pcmcia/ti113x.h
drivers/s390/block/dasd.c
drivers/s390/block/dasd_3990_erp.c
drivers/s390/block/dasd_eckd.c
drivers/s390/block/dasd_fba.c
drivers/s390/block/dcssblk.c
drivers/s390/block/xpram.c
drivers/s390/char/con3215.c
drivers/s390/char/con3270.c
drivers/s390/char/fs3270.c
drivers/s390/char/monreader.c
drivers/s390/char/raw3270.c
drivers/s390/char/sclp.c
drivers/s390/char/sclp_cmd.c
drivers/s390/char/sclp_con.c
drivers/s390/char/sclp_config.c
drivers/s390/char/sclp_cpi_sys.c
drivers/s390/char/sclp_quiesce.c
drivers/s390/char/sclp_rw.c
drivers/s390/char/sclp_sdias.c
drivers/s390/char/sclp_tty.c
drivers/s390/char/sclp_tty.h
drivers/s390/char/sclp_vt220.c
drivers/s390/char/tape_34xx.c
drivers/s390/char/tape_3590.c
drivers/s390/char/tape_core.c
drivers/s390/char/tty3270.c
drivers/s390/char/vmcp.c
drivers/s390/char/vmlogrdr.c
drivers/s390/char/vmur.c
drivers/s390/char/vmwatchdog.c
drivers/s390/char/zcore.c
drivers/s390/cio/Makefile
drivers/s390/cio/airq.c
drivers/s390/cio/chp.c
drivers/s390/cio/chp.h
drivers/s390/cio/chsc.c
drivers/s390/cio/chsc.h
drivers/s390/cio/chsc_sch.c [new file with mode: 0644]
drivers/s390/cio/chsc_sch.h [new file with mode: 0644]
drivers/s390/cio/cio.c
drivers/s390/cio/cio.h
drivers/s390/cio/cmf.c
drivers/s390/cio/css.c
drivers/s390/cio/css.h
drivers/s390/cio/device.c
drivers/s390/cio/device.h
drivers/s390/cio/device_fsm.c
drivers/s390/cio/device_id.c
drivers/s390/cio/device_ops.c
drivers/s390/cio/device_pgid.c
drivers/s390/cio/device_status.c
drivers/s390/cio/fcx.c [new file with mode: 0644]
drivers/s390/cio/idset.h
drivers/s390/cio/io_sch.h
drivers/s390/cio/ioasm.h
drivers/s390/cio/isc.c [new file with mode: 0644]
drivers/s390/cio/itcw.c [new file with mode: 0644]
drivers/s390/cio/qdio.c
drivers/s390/cio/qdio.h
drivers/s390/cio/scsw.c [new file with mode: 0644]
drivers/s390/crypto/ap_bus.c
drivers/s390/crypto/ap_bus.h
drivers/s390/crypto/zcrypt_api.c
drivers/s390/crypto/zcrypt_api.h
drivers/s390/crypto/zcrypt_cex2a.c
drivers/s390/crypto/zcrypt_error.h
drivers/s390/crypto/zcrypt_pcica.c
drivers/s390/crypto/zcrypt_pcicc.c
drivers/s390/crypto/zcrypt_pcixcc.c
drivers/s390/net/claw.c
drivers/s390/net/ctcm_fsms.c
drivers/s390/net/ctcm_main.c
drivers/s390/net/cu3088.c
drivers/s390/net/cu3088.h
drivers/s390/net/lcs.c
drivers/s390/net/netiucv.c
drivers/s390/net/qeth_core_main.c
drivers/s390/net/qeth_l3_main.c
drivers/s390/net/smsgiucv.c
drivers/s390/s390mach.c
drivers/s390/s390mach.h
drivers/scsi/sg.c
drivers/scsi/sr.c
drivers/xen/xenbus/xenbus_client.c
drivers/xen/xenbus/xenbus_xs.c
fs/Makefile
fs/bio-integrity.c [new file with mode: 0644]
fs/bio.c
fs/ramfs/file-mmu.c
fs/ramfs/file-nommu.c
fs/splice.c
include/asm-s390/Kbuild
include/asm-s390/airq.h
include/asm-s390/ccwdev.h
include/asm-s390/chpid.h
include/asm-s390/chsc.h [new file with mode: 0644]
include/asm-s390/cio.h
include/asm-s390/elf.h
include/asm-s390/etr.h
include/asm-s390/fcx.h [new file with mode: 0644]
include/asm-s390/ipl.h
include/asm-s390/isc.h [new file with mode: 0644]
include/asm-s390/itcw.h [new file with mode: 0644]
include/asm-s390/pgtable.h
include/asm-s390/processor.h
include/asm-s390/ptrace.h
include/asm-s390/schid.h [moved from drivers/s390/cio/schid.h with 65% similarity]
include/asm-s390/sclp.h
include/asm-s390/setup.h
include/asm-s390/sparsemem.h
include/asm-s390/timer.h
include/asm-s390/zcrypt.h
include/linux/bio.h
include/linux/blkdev.h
include/linux/blktrace_api.h
include/linux/genhd.h
include/linux/iocontext.h
include/linux/mod_devicetable.h
include/pcmcia/bulkmem.h [deleted file]
include/pcmcia/cistpl.h
include/pcmcia/cs.h
include/pcmcia/cs_types.h
include/pcmcia/ds.h
include/pcmcia/ss.h
include/pcmcia/version.h [deleted file]
kernel/exit.c
kernel/fork.c
mm/Kconfig
mm/slub.c
net/iucv/af_iucv.c
net/iucv/iucv.c
scripts/mod/file2alias.c

index 4bd9ea5..44f52a4 100644 (file)
@@ -26,3 +26,37 @@ Description:
                I/O statistics of partition <part>. The format is the
                same as the above-written /sys/block/<disk>/stat
                format.
+
+
+What:          /sys/block/<disk>/integrity/format
+Date:          June 2008
+Contact:       Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+               Metadata format for integrity capable block device.
+               E.g. T10-DIF-TYPE1-CRC.
+
+
+What:          /sys/block/<disk>/integrity/read_verify
+Date:          June 2008
+Contact:       Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+               Indicates whether the block layer should verify the
+               integrity of read requests serviced by devices that
+               support sending integrity metadata.
+
+
+What:          /sys/block/<disk>/integrity/tag_size
+Date:          June 2008
+Contact:       Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+               Number of bytes of integrity tag space available per
+               512 bytes of data.
+
+
+What:          /sys/block/<disk>/integrity/write_generate
+Date:          June 2008
+Contact:       Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+               Indicates whether the block layer should automatically
+               generate checksums for write requests bound for
+               devices that support receiving integrity metadata.
diff --git a/Documentation/ABI/testing/sysfs-bus-css b/Documentation/ABI/testing/sysfs-bus-css
new file mode 100644 (file)
index 0000000..b585ec2
--- /dev/null
@@ -0,0 +1,35 @@
+What:          /sys/bus/css/devices/.../type
+Date:          March 2008
+Contact:       Cornelia Huck <cornelia.huck@de.ibm.com>
+               linux-s390@vger.kernel.org
+Description:   Contains the subchannel type, as reported by the hardware.
+               This attribute is present for all subchannel types.
+
+What:          /sys/bus/css/devices/.../modalias
+Date:          March 2008
+Contact:       Cornelia Huck <cornelia.huck@de.ibm.com>
+               linux-s390@vger.kernel.org
+Description:   Contains the module alias as reported with uevents.
+               It is of the format css:t<type> and present for all
+               subchannel types.
+
+What:          /sys/bus/css/drivers/io_subchannel/.../chpids
+Date:          December 2002
+Contact:       Cornelia Huck <cornelia.huck@de.ibm.com>
+               linux-s390@vger.kernel.org
+Description:   Contains the ids of the channel paths used by this
+               subchannel, as reported by the channel subsystem
+               during subchannel recognition.
+               Note: This is an I/O-subchannel specific attribute.
+Users:         s390-tools, HAL
+
+What:          /sys/bus/css/drivers/io_subchannel/.../pimpampom
+Date:          December 2002
+Contact:       Cornelia Huck <cornelia.huck@de.ibm.com>
+               linux-s390@vger.kernel.org
+Description:   Contains the PIM/PAM/POM values, as reported by the
+               channel subsystem when last queried by the common I/O
+               layer (this implies that this attribute is not neccessarily
+               in sync with the values current in the channel subsystem).
+               Note: This is an I/O-subchannel specific attribute.
+Users:         s390-tools, HAL
diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
new file mode 100644 (file)
index 0000000..e9dc8d8
--- /dev/null
@@ -0,0 +1,327 @@
+----------------------------------------------------------------------
+1. INTRODUCTION
+
+Modern filesystems feature checksumming of data and metadata to
+protect against data corruption.  However, the detection of the
+corruption is done at read time which could potentially be months
+after the data was written.  At that point the original data that the
+application tried to write is most likely lost.
+
+The solution is to ensure that the disk is actually storing what the
+application meant it to.  Recent additions to both the SCSI family
+protocols (SBC Data Integrity Field, SCC protection proposal) as well
+as SATA/T13 (External Path Protection) try to remedy this by adding
+support for appending integrity metadata to an I/O.  The integrity
+metadata (or protection information in SCSI terminology) includes a
+checksum for each sector as well as an incrementing counter that
+ensures the individual sectors are written in the right order.  And
+for some protection schemes also that the I/O is written to the right
+place on disk.
+
+Current storage controllers and devices implement various protective
+measures, for instance checksumming and scrubbing.  But these
+technologies are working in their own isolated domains or at best
+between adjacent nodes in the I/O path.  The interesting thing about
+DIF and the other integrity extensions is that the protection format
+is well defined and every node in the I/O path can verify the
+integrity of the I/O and reject it if corruption is detected.  This
+allows not only corruption prevention but also isolation of the point
+of failure.
+
+----------------------------------------------------------------------
+2. THE DATA INTEGRITY EXTENSIONS
+
+As written, the protocol extensions only protect the path between
+controller and storage device.  However, many controllers actually
+allow the operating system to interact with the integrity metadata
+(IMD).  We have been working with several FC/SAS HBA vendors to enable
+the protection information to be transferred to and from their
+controllers.
+
+The SCSI Data Integrity Field works by appending 8 bytes of protection
+information to each sector.  The data + integrity metadata is stored
+in 520 byte sectors on disk.  Data + IMD are interleaved when
+transferred between the controller and target.  The T13 proposal is
+similar.
+
+Because it is highly inconvenient for operating systems to deal with
+520 (and 4104) byte sectors, we approached several HBA vendors and
+encouraged them to allow separation of the data and integrity metadata
+scatter-gather lists.
+
+The controller will interleave the buffers on write and split them on
+read.  This means that the Linux can DMA the data buffers to and from
+host memory without changes to the page cache.
+
+Also, the 16-bit CRC checksum mandated by both the SCSI and SATA specs
+is somewhat heavy to compute in software.  Benchmarks found that
+calculating this checksum had a significant impact on system
+performance for a number of workloads.  Some controllers allow a
+lighter-weight checksum to be used when interfacing with the operating
+system.  Emulex, for instance, supports the TCP/IP checksum instead.
+The IP checksum received from the OS is converted to the 16-bit CRC
+when writing and vice versa.  This allows the integrity metadata to be
+generated by Linux or the application at very low cost (comparable to
+software RAID5).
+
+The IP checksum is weaker than the CRC in terms of detecting bit
+errors.  However, the strength is really in the separation of the data
+buffers and the integrity metadata.  These two distinct buffers much
+match up for an I/O to complete.
+
+The separation of the data and integrity metadata buffers as well as
+the choice in checksums is referred to as the Data Integrity
+Extensions.  As these extensions are outside the scope of the protocol
+bodies (T10, T13), Oracle and its partners are trying to standardize
+them within the Storage Networking Industry Association.
+
+----------------------------------------------------------------------
+3. KERNEL CHANGES
+
+The data integrity framework in Linux enables protection information
+to be pinned to I/Os and sent to/received from controllers that
+support it.
+
+The advantage to the integrity extensions in SCSI and SATA is that
+they enable us to protect the entire path from application to storage
+device.  However, at the same time this is also the biggest
+disadvantage. It means that the protection information must be in a
+format that can be understood by the disk.
+
+Generally Linux/POSIX applications are agnostic to the intricacies of
+the storage devices they are accessing.  The virtual filesystem switch
+and the block layer make things like hardware sector size and
+transport protocols completely transparent to the application.
+
+However, this level of detail is required when preparing the
+protection information to send to a disk.  Consequently, the very
+concept of an end-to-end protection scheme is a layering violation.
+It is completely unreasonable for an application to be aware whether
+it is accessing a SCSI or SATA disk.
+
+The data integrity support implemented in Linux attempts to hide this
+from the application.  As far as the application (and to some extent
+the kernel) is concerned, the integrity metadata is opaque information
+that's attached to the I/O.
+
+The current implementation allows the block layer to automatically
+generate the protection information for any I/O.  Eventually the
+intent is to move the integrity metadata calculation to userspace for
+user data.  Metadata and other I/O that originates within the kernel
+will still use the automatic generation interface.
+
+Some storage devices allow each hardware sector to be tagged with a
+16-bit value.  The owner of this tag space is the owner of the block
+device.  I.e. the filesystem in most cases.  The filesystem can use
+this extra space to tag sectors as they see fit.  Because the tag
+space is limited, the block interface allows tagging bigger chunks by
+way of interleaving.  This way, 8*16 bits of information can be
+attached to a typical 4KB filesystem block.
+
+This also means that applications such as fsck and mkfs will need
+access to manipulate the tags from user space.  A passthrough
+interface for this is being worked on.
+
+
+----------------------------------------------------------------------
+4. BLOCK LAYER IMPLEMENTATION DETAILS
+
+4.1 BIO
+
+The data integrity patches add a new field to struct bio when
+CONFIG_BLK_DEV_INTEGRITY is enabled.  bio->bi_integrity is a pointer
+to a struct bip which contains the bio integrity payload.  Essentially
+a bip is a trimmed down struct bio which holds a bio_vec containing
+the integrity metadata and the required housekeeping information (bvec
+pool, vector count, etc.)
+
+A kernel subsystem can enable data integrity protection on a bio by
+calling bio_integrity_alloc(bio).  This will allocate and attach the
+bip to the bio.
+
+Individual pages containing integrity metadata can subsequently be
+attached using bio_integrity_add_page().
+
+bio_free() will automatically free the bip.
+
+
+4.2 BLOCK DEVICE
+
+Because the format of the protection data is tied to the physical
+disk, each block device has been extended with a block integrity
+profile (struct blk_integrity).  This optional profile is registered
+with the block layer using blk_integrity_register().
+
+The profile contains callback functions for generating and verifying
+the protection data, as well as getting and setting application tags.
+The profile also contains a few constants to aid in completing,
+merging and splitting the integrity metadata.
+
+Layered block devices will need to pick a profile that's appropriate
+for all subdevices.  blk_integrity_compare() can help with that.  DM
+and MD linear, RAID0 and RAID1 are currently supported.  RAID4/5/6
+will require extra work due to the application tag.
+
+
+----------------------------------------------------------------------
+5.0 BLOCK LAYER INTEGRITY API
+
+5.1 NORMAL FILESYSTEM
+
+    The normal filesystem is unaware that the underlying block device
+    is capable of sending/receiving integrity metadata.  The IMD will
+    be automatically generated by the block layer at submit_bio() time
+    in case of a WRITE.  A READ request will cause the I/O integrity
+    to be verified upon completion.
+
+    IMD generation and verification can be toggled using the
+
+      /sys/block/<bdev>/integrity/write_generate
+
+    and
+
+      /sys/block/<bdev>/integrity/read_verify
+
+    flags.
+
+
+5.2 INTEGRITY-AWARE FILESYSTEM
+
+    A filesystem that is integrity-aware can prepare I/Os with IMD
+    attached.  It can also use the application tag space if this is
+    supported by the block device.
+
+
+    int bdev_integrity_enabled(block_device, int rw);
+
+      bdev_integrity_enabled() will return 1 if the block device
+      supports integrity metadata transfer for the data direction
+      specified in 'rw'.
+
+      bdev_integrity_enabled() honors the write_generate and
+      read_verify flags in sysfs and will respond accordingly.
+
+
+    int bio_integrity_prep(bio);
+
+      To generate IMD for WRITE and to set up buffers for READ, the
+      filesystem must call bio_integrity_prep(bio).
+
+      Prior to calling this function, the bio data direction and start
+      sector must be set, and the bio should have all data pages
+      added.  It is up to the caller to ensure that the bio does not
+      change while I/O is in progress.
+
+      bio_integrity_prep() should only be called if
+      bio_integrity_enabled() returned 1.
+
+
+    int bio_integrity_tag_size(bio);
+
+      If the filesystem wants to use the application tag space it will
+      first have to find out how much storage space is available.
+      Because tag space is generally limited (usually 2 bytes per
+      sector regardless of sector size), the integrity framework
+      supports interleaving the information between the sectors in an
+      I/O.
+
+      Filesystems can call bio_integrity_tag_size(bio) to find out how
+      many bytes of storage are available for that particular bio.
+
+      Another option is bdev_get_tag_size(block_device) which will
+      return the number of available bytes per hardware sector.
+
+
+    int bio_integrity_set_tag(bio, void *tag_buf, len);
+
+      After a successful return from bio_integrity_prep(),
+      bio_integrity_set_tag() can be used to attach an opaque tag
+      buffer to a bio.  Obviously this only makes sense if the I/O is
+      a WRITE.
+
+
+    int bio_integrity_get_tag(bio, void *tag_buf, len);
+
+      Similarly, at READ I/O completion time the filesystem can
+      retrieve the tag buffer using bio_integrity_get_tag().
+
+
+6.3 PASSING EXISTING INTEGRITY METADATA
+
+    Filesystems that either generate their own integrity metadata or
+    are capable of transferring IMD from user space can use the
+    following calls:
+
+
+    struct bip * bio_integrity_alloc(bio, gfp_mask, nr_pages);
+
+      Allocates the bio integrity payload and hangs it off of the bio.
+      nr_pages indicate how many pages of protection data need to be
+      stored in the integrity bio_vec list (similar to bio_alloc()).
+
+      The integrity payload will be freed at bio_free() time.
+
+
+    int bio_integrity_add_page(bio, page, len, offset);
+
+      Attaches a page containing integrity metadata to an existing
+      bio.  The bio must have an existing bip,
+      i.e. bio_integrity_alloc() must have been called.  For a WRITE,
+      the integrity metadata in the pages must be in a format
+      understood by the target device with the notable exception that
+      the sector numbers will be remapped as the request traverses the
+      I/O stack.  This implies that the pages added using this call
+      will be modified during I/O!  The first reference tag in the
+      integrity metadata must have a value of bip->bip_sector.
+
+      Pages can be added using bio_integrity_add_page() as long as
+      there is room in the bip bio_vec array (nr_pages).
+
+      Upon completion of a READ operation, the attached pages will
+      contain the integrity metadata received from the storage device.
+      It is up to the receiver to process them and verify data
+      integrity upon completion.
+
+
+6.4 REGISTERING A BLOCK DEVICE AS CAPABLE OF EXCHANGING INTEGRITY
+    METADATA
+
+    To enable integrity exchange on a block device the gendisk must be
+    registered as capable:
+
+    int blk_integrity_register(gendisk, blk_integrity);
+
+      The blk_integrity struct is a template and should contain the
+      following:
+
+        static struct blk_integrity my_profile = {
+            .name                   = "STANDARDSBODY-TYPE-VARIANT-CSUM",
+            .generate_fn            = my_generate_fn,
+                   .verify_fn              = my_verify_fn,
+                   .get_tag_fn             = my_get_tag_fn,
+                   .set_tag_fn             = my_set_tag_fn,
+           .tuple_size             = sizeof(struct my_tuple_size),
+           .tag_size               = <tag bytes per hw sector>,
+        };
+
+      'name' is a text string which will be visible in sysfs.  This is
+      part of the userland API so chose it carefully and never change
+      it.  The format is standards body-type-variant.
+      E.g. T10-DIF-TYPE1-IP or T13-EPP-0-CRC.
+
+      'generate_fn' generates appropriate integrity metadata (for WRITE).
+
+      'verify_fn' verifies that the data buffer matches the integrity
+      metadata.
+
+      'tuple_size' must be set to match the size of the integrity
+      metadata per sector.  I.e. 8 for DIF and EPP.
+
+      'tag_size' must be set to identify how many bytes of tag space
+      are available per hardware sector.  For DIF this is either 2 or
+      0 depending on the value of the Control Mode Page ATO bit.
+
+      See 6.2 for a description of get_tag_fn and set_tag_fn.
+
+----------------------------------------------------------------------
+2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
index 240ce7a..3bb5f46 100644 (file)
@@ -117,6 +117,7 @@ Code        Seq#    Include File            Comments
                                        <mailto:natalia@nikhefk.nikhef.nl>
 'c'    00-7F   linux/comstats.h        conflict!
 'c'    00-7F   linux/coda.h            conflict!
+'c'    80-9F   asm-s390/chsc.h
 'd'    00-FF   linux/char/drm/drm/h    conflict!
 'd'    00-DF   linux/video_decoder.h   conflict!
 'd'    F0-FF   linux/digi1.h
index 107e492..5dc8f80 100644 (file)
@@ -146,6 +146,7 @@ config MATHEMU
 config COMPAT
        bool "Kernel support for 31 bit emulation"
        depends on 64BIT
+       select COMPAT_BINFMT_ELF
        help
          Select this option if you want to enable your system kernel to
          handle system-calls from ELF binaries for 31 bit ESA.  This option
@@ -312,6 +313,10 @@ config ARCH_SPARSEMEM_DEFAULT
 config ARCH_SELECT_MEMORY_MODEL
        def_bool y
 
+config ARCH_ENABLE_MEMORY_HOTPLUG
+       def_bool y
+       depends on SPARSEMEM
+
 source "mm/Kconfig"
 
 comment "I/O subsystem configuration"
@@ -344,6 +349,22 @@ config QDIO_DEBUG
 
          If unsure, say N.
 
+config CHSC_SCH
+       tristate "Support for CHSC subchannels"
+       help
+         This driver allows usage of CHSC subchannels. A CHSC subchannel
+         is usually present on LPAR only.
+         The driver creates a device /dev/chsc, which may be used to
+         obtain I/O configuration information about the machine and
+         to issue asynchronous chsc commands (DANGEROUS).
+         You will usually only want to use this interface on a special
+         LPAR designated for system management.
+
+         To compile this driver as a module, choose M here: the
+         module will be called chsc_sch.
+
+         If unsure, say N.
+
 comment "Misc"
 
 config IPL
index db3ae85..17a2636 100644 (file)
@@ -3,13 +3,11 @@
  *
  * Definitions and interface for Linux - z/VM Monitor Stream.
  *
- * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2003, 2008
  *
  * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
  */
 
-//#define APPLDATA_DEBUG                       /* Debug messages on/off */
-
 #define APPLDATA_MAX_REC_SIZE    4024  /* Maximum size of the */
                                        /* data buffer */
 #define APPLDATA_MAX_PROCS 100
 #define P_ERROR(x...)  printk(KERN_ERR MY_PRINT_NAME " error: " x)
 #define P_WARNING(x...)        printk(KERN_WARNING MY_PRINT_NAME " status: " x)
 
-#ifdef APPLDATA_DEBUG
-#define P_DEBUG(x...)   printk(KERN_DEBUG MY_PRINT_NAME " debug: " x)
-#else
-#define P_DEBUG(x...)   do {} while (0)
-#endif
-
 struct appldata_ops {
        struct list_head list;
        struct ctl_table_header *sysctl_header;
index ad40729..9cb3d92 100644 (file)
@@ -5,7 +5,7 @@
  * Exports appldata_register_ops() and appldata_unregister_ops() for the
  * data gathering modules.
  *
- * Copyright (C) 2003,2006 IBM Corporation, IBM Deutschland Entwicklung GmbH.
+ * Copyright IBM Corp. 2003, 2008
  *
  * Author: Gerald Schaefer <gerald.schaefer@de.ibm.com>
  */
@@ -108,9 +108,6 @@ static LIST_HEAD(appldata_ops_list);
  */
 static void appldata_timer_function(unsigned long data)
 {
-       P_DEBUG("   -= Timer =-\n");
-       P_DEBUG("CPU: %i, expire_count: %i\n", smp_processor_id(),
-               atomic_read(&appldata_expire_count));
        if (atomic_dec_and_test(&appldata_expire_count)) {
                atomic_set(&appldata_expire_count, num_online_cpus());
                queue_work(appldata_wq, (struct work_struct *) data);
@@ -128,14 +125,11 @@ static void appldata_work_fn(struct work_struct *work)
        struct appldata_ops *ops;
        int i;
 
-       P_DEBUG("  -= Work Queue =-\n");
        i = 0;
        get_online_cpus();
        spin_lock(&appldata_ops_lock);
        list_for_each(lh, &appldata_ops_list) {
                ops = list_entry(lh, struct appldata_ops, list);
-               P_DEBUG("list_for_each loop: %i) active = %u, name = %s\n",
-                       ++i, ops->active, ops->name);
                if (ops->active == 1) {
                        ops->callback(ops->data);
                }
@@ -212,7 +206,6 @@ __appldata_vtimer_setup(int cmd)
                                                 0, 1);
                }
                appldata_timer_active = 1;
-               P_INFO("Monitoring timer started.\n");
                break;
        case APPLDATA_DEL_TIMER:
                for_each_online_cpu(i)
@@ -221,7 +214,6 @@ __appldata_vtimer_setup(int cmd)
                        break;
                appldata_timer_active = 0;
                atomic_set(&appldata_expire_count, num_online_cpus());
-               P_INFO("Monitoring timer stopped.\n");
                break;
        case APPLDATA_MOD_TIMER:
                per_cpu_interval = (u64) (appldata_interval*1000 /
@@ -313,10 +305,8 @@ appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
        }
        interval = 0;
        sscanf(buf, "%i", &interval);
-       if (interval <= 0) {
-               P_ERROR("Timer CPU interval has to be > 0!\n");
+       if (interval <= 0)
                return -EINVAL;
-       }
 
        get_online_cpus();
        spin_lock(&appldata_timer_lock);
@@ -324,9 +314,6 @@ appldata_interval_handler(ctl_table *ctl, int write, struct file *filp,
        __appldata_vtimer_setup(APPLDATA_MOD_TIMER);
        spin_unlock(&appldata_timer_lock);
        put_online_cpus();
-
-       P_INFO("Monitoring CPU interval set to %u milliseconds.\n",
-                interval);
 out:
        *lenp = len;
        *ppos += len;
@@ -406,23 +393,16 @@ appldata_generic_handler(ctl_table *ctl, int write, struct file *filp,
                        P_ERROR("START DIAG 0xDC for %s failed, "
                                "return code: %d\n", ops->name, rc);
                        module_put(ops->owner);
-               } else {
-                       P_INFO("Monitoring %s data enabled, "
-                               "DIAG 0xDC started.\n", ops->name);
+               } else
                        ops->active = 1;
-               }
        } else if ((buf[0] == '0') && (ops->active == 1)) {
                ops->active = 0;
                rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC,
                                (unsigned long) ops->data, ops->size,
                                ops->mod_lvl);
-               if (rc != 0) {
+               if (rc != 0)
                        P_ERROR("STOP DIAG 0xDC for %s failed, "
                                "return code: %d\n", ops->name, rc);
-               } else {
-                       P_INFO("Monitoring %s data disabled, "
-                               "DIAG 0xDC stopped.\n", ops->name);
-               }
                module_put(ops->owner);
        }
        spin_unlock(&appldata_ops_lock);
@@ -468,7 +448,6 @@ int appldata_register_ops(struct appldata_ops *ops)
        ops->sysctl_header = register_sysctl_table(ops->ctl_table);
        if (!ops->sysctl_header)
                goto out;
-       P_INFO("%s-ops registered!\n", ops->name);
        return 0;
 out:
        spin_lock(&appldata_ops_lock);
@@ -490,7 +469,6 @@ void appldata_unregister_ops(struct appldata_ops *ops)
        spin_unlock(&appldata_ops_lock);
        unregister_sysctl_table(ops->sysctl_header);
        kfree(ops->ctl_table);
-       P_INFO("%s-ops unregistered!\n", ops->name);
 }
 /********************** module-ops management <END> **************************/
 
@@ -553,14 +531,9 @@ static int __init appldata_init(void)
 {
        int i;
 
-       P_DEBUG("sizeof(parameter_list) = %lu\n",
-               sizeof(struct appldata_parameter_list));
-
        appldata_wq = create_singlethread_workqueue("appldata");
-       if (!appldata_wq) {
-               P_ERROR("Could not create work queue\n");
+       if (!appldata_wq)
                return -ENOMEM;
-       }
 
        get_online_cpus();
        for_each_online_cpu(i)
@@ -571,8 +544,6 @@ static int __init appldata_init(void)
        register_hotcpu_notifier(&appldata_nb);
 
        appldata_sysctl_header = register_sysctl_table(appldata_dir_table);
-
-       P_DEBUG("Base interface initialized.\n");
        return 0;
 }
 
@@ -584,7 +555,9 @@ EXPORT_SYMBOL_GPL(appldata_register_ops);
 EXPORT_SYMBOL_GPL(appldata_unregister_ops);
 EXPORT_SYMBOL_GPL(appldata_diag);
 
+#ifdef CONFIG_SWAP
 EXPORT_SYMBOL_GPL(si_swapinfo);
+#endif
 EXPORT_SYMBOL_GPL(nr_threads);
 EXPORT_SYMBOL_GPL(nr_running);
 EXPORT_SYMBOL_GPL(nr_iowait);
index 51181cc..3ed56b7 100644 (file)
 #include <linux/slab.h>
 #include <linux/errno.h>
 #include <linux/kernel_stat.h>
-#include <asm/io.h>
 #include <linux/pagemap.h>
 #include <linux/swap.h>
+#include <asm/io.h>
 
 #include "appldata.h"
 
 
-#define MY_PRINT_NAME "appldata_mem"           /* for debug messages, etc. */
 #define P2K(x) ((x) << (PAGE_SHIFT - 10))      /* Converts #Pages to KB */
 
 /*
@@ -70,30 +69,6 @@ static struct appldata_mem_data {
 } __attribute__((packed)) appldata_mem_data;
 
 
-static inline void appldata_debug_print(struct appldata_mem_data *mem_data)
-{
-       P_DEBUG("--- MEM - RECORD ---\n");
-       P_DEBUG("pgpgin     = %8lu KB\n", mem_data->pgpgin);
-       P_DEBUG("pgpgout    = %8lu KB\n", mem_data->pgpgout);
-       P_DEBUG("pswpin     = %8lu Pages\n", mem_data->pswpin);
-       P_DEBUG("pswpout    = %8lu Pages\n", mem_data->pswpout);
-       P_DEBUG("pgalloc    = %8lu \n", mem_data->pgalloc);
-       P_DEBUG("pgfault    = %8lu \n", mem_data->pgfault);
-       P_DEBUG("pgmajfault = %8lu \n", mem_data->pgmajfault);
-       P_DEBUG("sharedram  = %8lu KB\n", mem_data->sharedram);
-       P_DEBUG("totalram   = %8lu KB\n", mem_data->totalram);
-       P_DEBUG("freeram    = %8lu KB\n", mem_data->freeram);
-       P_DEBUG("totalhigh  = %8lu KB\n", mem_data->totalhigh);
-       P_DEBUG("freehigh   = %8lu KB\n", mem_data->freehigh);
-       P_DEBUG("bufferram  = %8lu KB\n", mem_data->bufferram);
-       P_DEBUG("cached     = %8lu KB\n", mem_data->cached);
-       P_DEBUG("totalswap  = %8lu KB\n", mem_data->totalswap);
-       P_DEBUG("freeswap   = %8lu KB\n", mem_data->freeswap);
-       P_DEBUG("sync_count_1 = %u\n", mem_data->sync_count_1);
-       P_DEBUG("sync_count_2 = %u\n", mem_data->sync_count_2);
-       P_DEBUG("timestamp    = %lX\n", mem_data->timestamp);
-}
-
 /*
  * appldata_get_mem_data()
  *
@@ -140,9 +115,6 @@ static void appldata_get_mem_data(void *data)
 
        mem_data->timestamp = get_clock();
        mem_data->sync_count_2++;
-#ifdef APPLDATA_DEBUG
-       appldata_debug_print(mem_data);
-#endif
 }
 
 
@@ -164,17 +136,7 @@ static struct appldata_ops ops = {
  */
 static int __init appldata_mem_init(void)
 {
-       int rc;
-
-       P_DEBUG("sizeof(mem) = %lu\n", sizeof(struct appldata_mem_data));
-
-       rc = appldata_register_ops(&ops);
-       if (rc != 0) {
-               P_ERROR("Error registering ops, rc = %i\n", rc);
-       } else {
-               P_DEBUG("%s-ops registered!\n", ops.name);
-       }
-       return rc;
+       return appldata_register_ops(&ops);
 }
 
 /*
@@ -185,7 +147,6 @@ static int __init appldata_mem_init(void)
 static void __exit appldata_mem_exit(void)
 {
        appldata_unregister_ops(&ops);
-       P_DEBUG("%s-ops unregistered!\n", ops.name);
 }
 
 
index 4d83443..3b74655 100644 (file)
@@ -21,9 +21,6 @@
 #include "appldata.h"
 
 
-#define MY_PRINT_NAME  "appldata_net_sum"      /* for debug messages, etc. */
-
-
 /*
  * Network data
  *
@@ -60,26 +57,6 @@ static struct appldata_net_sum_data {
 } __attribute__((packed)) appldata_net_sum_data;
 
 
-static inline void appldata_print_debug(struct appldata_net_sum_data *net_data)
-{
-       P_DEBUG("--- NET - RECORD ---\n");
-
-       P_DEBUG("nr_interfaces = %u\n", net_data->nr_interfaces);
-       P_DEBUG("rx_packets    = %8lu\n", net_data->rx_packets);
-       P_DEBUG("tx_packets    = %8lu\n", net_data->tx_packets);
-       P_DEBUG("rx_bytes      = %8lu\n", net_data->rx_bytes);
-       P_DEBUG("tx_bytes      = %8lu\n", net_data->tx_bytes);
-       P_DEBUG("rx_errors     = %8lu\n", net_data->rx_errors);
-       P_DEBUG("tx_errors     = %8lu\n", net_data->tx_errors);
-       P_DEBUG("rx_dropped    = %8lu\n", net_data->rx_dropped);
-       P_DEBUG("tx_dropped    = %8lu\n", net_data->tx_dropped);
-       P_DEBUG("collisions    = %8lu\n", net_data->collisions);
-
-       P_DEBUG("sync_count_1 = %u\n", net_data->sync_count_1);
-       P_DEBUG("sync_count_2 = %u\n", net_data->sync_count_2);
-       P_DEBUG("timestamp    = %lX\n", net_data->timestamp);
-}
-
 /*
  * appldata_get_net_sum_data()
  *
@@ -135,9 +112,6 @@ static void appldata_get_net_sum_data(void *data)
 
        net_data->timestamp = get_clock();
        net_data->sync_count_2++;
-#ifdef APPLDATA_DEBUG
-       appldata_print_debug(net_data);
-#endif
 }
 
 
@@ -159,17 +133,7 @@ static struct appldata_ops ops = {
  */
 static int __init appldata_net_init(void)
 {
-       int rc;
-
-       P_DEBUG("sizeof(net) = %lu\n", sizeof(struct appldata_net_sum_data));
-
-       rc = appldata_register_ops(&ops);
-       if (rc != 0) {
-               P_ERROR("Error registering ops, rc = %i\n", rc);
-       } else {
-               P_DEBUG("%s-ops registered!\n", ops.name);
-       }
-       return rc;
+       return appldata_register_ops(&ops);
 }
 
 /*
@@ -180,7 +144,6 @@ static int __init appldata_net_init(void)
 static void __exit appldata_net_exit(void)
 {
        appldata_unregister_ops(&ops);
-       P_DEBUG("%s-ops unregistered!\n", ops.name);
 }
 
 
index 6b3eafe..eb44f9f 100644 (file)
@@ -89,44 +89,6 @@ static struct appldata_ops ops = {
 };
 
 
-static inline void appldata_print_debug(struct appldata_os_data *os_data)
-{
-       int a0, a1, a2, i;
-
-       P_DEBUG("--- OS - RECORD ---\n");
-       P_DEBUG("nr_threads   = %u\n", os_data->nr_threads);
-       P_DEBUG("nr_running   = %u\n", os_data->nr_running);
-       P_DEBUG("nr_iowait    = %u\n", os_data->nr_iowait);
-       P_DEBUG("avenrun(int) = %8x / %8x / %8x\n", os_data->avenrun[0],
-               os_data->avenrun[1], os_data->avenrun[2]);
-       a0 = os_data->avenrun[0];
-       a1 = os_data->avenrun[1];
-       a2 = os_data->avenrun[2];
-       P_DEBUG("avenrun(float) = %d.%02d / %d.%02d / %d.%02d\n",
-               LOAD_INT(a0), LOAD_FRAC(a0), LOAD_INT(a1), LOAD_FRAC(a1),
-               LOAD_INT(a2), LOAD_FRAC(a2));
-
-       P_DEBUG("nr_cpus = %u\n", os_data->nr_cpus);
-       for (i = 0; i < os_data->nr_cpus; i++) {
-               P_DEBUG("cpu%u : user = %u, nice = %u, system = %u, "
-                       "idle = %u, irq = %u, softirq = %u, iowait = %u, "
-                       "steal = %u\n",
-                               os_data->os_cpu[i].cpu_id,
-                               os_data->os_cpu[i].per_cpu_user,
-                               os_data->os_cpu[i].per_cpu_nice,
-                               os_data->os_cpu[i].per_cpu_system,
-                               os_data->os_cpu[i].per_cpu_idle,
-                               os_data->os_cpu[i].per_cpu_irq,
-                               os_data->os_cpu[i].per_cpu_softirq,
-                               os_data->os_cpu[i].per_cpu_iowait,
-                               os_data->os_cpu[i].per_cpu_steal);
-       }
-
-       P_DEBUG("sync_count_1 = %u\n", os_data->sync_count_1);
-       P_DEBUG("sync_count_2 = %u\n", os_data->sync_count_2);
-       P_DEBUG("timestamp    = %lX\n", os_data->timestamp);
-}
-
 /*
  * appldata_get_os_data()
  *
@@ -180,13 +142,10 @@ static void appldata_get_os_data(void *data)
                                           APPLDATA_START_INTERVAL_REC,
                                           (unsigned long) ops.data, new_size,
                                           ops.mod_lvl);
-                       if (rc != 0) {
+                       if (rc != 0)
                                P_ERROR("os: START NEW DIAG 0xDC failed, "
                                        "return code: %d, new size = %i\n", rc,
                                        new_size);
-                               P_INFO("os: stopping old record now\n");
-                       } else
-                               P_INFO("os: new record size = %i\n", new_size);
 
                        rc = appldata_diag(APPLDATA_RECORD_OS_ID,
                                           APPLDATA_STOP_REC,
@@ -204,9 +163,6 @@ static void appldata_get_os_data(void *data)
        }
        os_data->timestamp = get_clock();
        os_data->sync_count_2++;
-#ifdef APPLDATA_DEBUG
-       appldata_print_debug(os_data);
-#endif
 }
 
 
@@ -227,12 +183,9 @@ static int __init appldata_os_init(void)
                rc = -ENOMEM;
                goto out;
        }
-       P_DEBUG("max. sizeof(os) = %i, sizeof(os_cpu) = %lu\n", max_size,
-               sizeof(struct appldata_os_per_cpu));
 
        appldata_os_data = kzalloc(max_size, GFP_DMA);
        if (appldata_os_data == NULL) {
-               P_ERROR("No memory for %s!\n", ops.name);
                rc = -ENOMEM;
                goto out;
        }
@@ -240,17 +193,12 @@ static int __init appldata_os_init(void)
        appldata_os_data->per_cpu_size = sizeof(struct appldata_os_per_cpu);
        appldata_os_data->cpu_offset   = offsetof(struct appldata_os_data,
                                                        os_cpu);
-       P_DEBUG("cpu offset = %u\n", appldata_os_data->cpu_offset);
 
        ops.data = appldata_os_data;
        ops.callback  = &appldata_get_os_data;
        rc = appldata_register_ops(&ops);
-       if (rc != 0) {
-               P_ERROR("Error registering ops, rc = %i\n", rc);
+       if (rc != 0)
                kfree(appldata_os_data);
-       } else {
-               P_DEBUG("%s-ops registered!\n", ops.name);
-       }
 out:
        return rc;
 }
@@ -264,7 +212,6 @@ static void __exit appldata_os_exit(void)
 {
        appldata_unregister_ops(&ops);
        kfree(appldata_os_data);
-       P_DEBUG("%s-ops unregistered!\n", ops.name);
 }
 
 
index 0cfefdd..6a4300b 100644 (file)
@@ -185,11 +185,8 @@ static int __init prng_init(void)
        prng_seed(16);
 
        ret = misc_register(&prng_dev);
-       if (ret) {
-               printk(KERN_WARNING
-                      "Could not register misc device for PRNG.\n");
+       if (ret)
                goto out_buf;
-       }
        return 0;
 
 out_buf:
index 4b010ff..7383781 100644 (file)
@@ -150,33 +150,24 @@ static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov,
                              unsigned long nr_segs, loff_t offset)
 {
        char *data;
-       size_t len;
+       ssize_t ret;
        struct file *filp = iocb->ki_filp;
        /* XXX: temporary */
        char __user *buf = iov[0].iov_base;
        size_t count = iov[0].iov_len;
 
-       if (nr_segs != 1) {
-               count = -EINVAL;
-               goto out;
-       }
+       if (nr_segs != 1)
+               return -EINVAL;
 
        data = filp->private_data;
-       len = strlen(data);
-       if (offset > len) {
-               count = 0;
-               goto out;
-       }
-       if (count > len - offset)
-               count = len - offset;
-       if (copy_to_user(buf, data + offset, count)) {
-               count = -EFAULT;
-               goto out;
-       }
-       iocb->ki_pos += count;
+       ret = simple_read_from_buffer(buf, count, &offset, data, strlen(data));
+       if (ret <= 0)
+               return ret;
+
+       iocb->ki_pos += ret;
        file_accessed(filp);
-out:
-       return count;
+
+       return ret;
 }
 static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov,
                              unsigned long nr_segs, loff_t offset)
index 6302f50..50f657e 100644 (file)
@@ -7,9 +7,14 @@
 #
 CFLAGS_smp.o   := -Wno-nonnull
 
+#
+# Pass UTS_MACHINE for user_regset definition
+#
+CFLAGS_ptrace.o                += -DUTS_MACHINE='"$(UTS_MACHINE)"'
+
 obj-y  :=  bitmap.o traps.o time.o process.o base.o early.o \
             setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
-           s390_ext.o debug.o irq.o ipl.o dis.o diag.o
+           s390_ext.o debug.o irq.o ipl.o dis.o diag.o mem_detect.o
 
 obj-y  += $(if $(CONFIG_64BIT),entry64.o,entry.o)
 obj-y  += $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
@@ -23,7 +28,7 @@ obj-$(CONFIG_AUDIT)           += audit.o
 compat-obj-$(CONFIG_AUDIT)     += compat_audit.o
 obj-$(CONFIG_COMPAT)           += compat_linux.o compat_signal.o \
                                        compat_wrapper.o compat_exec_domain.o \
-                                       binfmt_elf32.o $(compat-obj-y)
+                                       $(compat-obj-y)
 
 obj-$(CONFIG_VIRT_TIMER)       += vtime.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c
deleted file mode 100644 (file)
index 3e1c315..0000000
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Support for 32-bit Linux for S390 ELF binaries.
- *
- * Copyright (C) 2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
- * Author(s): Gerhard Tonn (ton@de.ibm.com)
- *
- * Heavily inspired by the 32-bit Sparc compat code which is
- * Copyright (C) 1995, 1996, 1997, 1998 David S. Miller (davem@redhat.com)
- * Copyright (C) 1995, 1996, 1997, 1998 Jakub Jelinek   (jj@ultra.linux.cz)
- */
-
-#define __ASMS390_ELF_H
-
-#include <linux/time.h>
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS      ELFCLASS32
-#define ELF_DATA       ELFDATA2MSB
-#define ELF_ARCH       EM_S390
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) \
-       (((x)->e_machine == EM_S390 || (x)->e_machine == EM_S390_OLD) \
-         && (x)->e_ident[EI_CLASS] == ELF_CLASS)
-
-/* ELF register definitions */
-#define NUM_GPRS      16
-#define NUM_FPRS      16
-#define NUM_ACRS      16    
-
-/* For SVR4/S390 the function pointer to be registered with `atexit` is
-   passed in R14. */
-#define ELF_PLAT_INIT(_r, load_addr) \
-       do { \
-               _r->gprs[14] = 0; \
-       } while(0)
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE       4096
-
-/* This is the location that an ET_DYN program is loaded if exec'ed.  Typical
-   use of this is to invoke "./ld.so someprog" to test out a new version of
-   the loader.  We need to make sure that it is out of the way of the program
-   that it will "exec", and that there is sufficient room for the brk.  */
-
-#define ELF_ET_DYN_BASE         (TASK_SIZE / 3 * 2)
-
-/* Wow, the "main" arch needs arch dependent functions too.. :) */
-
-/* regs is struct pt_regs, pr_reg is elf_gregset_t (which is
-   now struct_user_regs, they are different) */
-
-#define ELF_CORE_COPY_REGS(pr_reg, regs) dump_regs32(regs, &pr_reg);
-
-#define ELF_CORE_COPY_TASK_REGS(tsk, regs) dump_task_regs32(tsk, regs)
-
-#define ELF_CORE_COPY_FPREGS(tsk, fpregs) dump_task_fpu(tsk, fpregs)
-
-/* This yields a mask that user programs can use to figure out what
-   instruction set this CPU supports. */
-
-#define ELF_HWCAP (0)
-
-/* This yields a string that ld.so will use to load implementation
-   specific libraries for optimization.  This is more specific in
-   intent than poking at uname or /proc/cpuinfo.
-
-   For the moment, we have only optimizations for the Intel generations,
-   but that could change... */
-
-#define ELF_PLATFORM (NULL)
-
-#define SET_PERSONALITY(ex, ibcs2)                     \
-do {                                                   \
-       if (ibcs2)                                      \
-               set_personality(PER_SVR4);              \
-       else if (current->personality != PER_LINUX32)   \
-               set_personality(PER_LINUX);             \
-       set_thread_flag(TIF_31BIT);                     \
-} while (0)
-
-#include "compat_linux.h"
-
-typedef _s390_fp_regs32 elf_fpregset_t;
-
-typedef struct
-{
-       
-       _psw_t32        psw;
-       __u32           gprs[__NUM_GPRS]; 
-       __u32           acrs[__NUM_ACRS]; 
-       __u32           orig_gpr2;
-} s390_regs32;
-typedef s390_regs32 elf_gregset_t;
-
-static inline int dump_regs32(struct pt_regs *ptregs, elf_gregset_t *regs)
-{
-       int i;
-
-       memcpy(&regs->psw.mask, &ptregs->psw.mask, 4);
-       memcpy(&regs->psw.addr, (char *)&ptregs->psw.addr + 4, 4);
-       for (i = 0; i < NUM_GPRS; i++)
-               regs->gprs[i] = ptregs->gprs[i];
-       save_access_regs(regs->acrs);
-       regs->orig_gpr2 = ptregs->orig_gpr2;
-       return 1;
-}
-
-static inline int dump_task_regs32(struct task_struct *tsk, elf_gregset_t *regs)
-{
-       struct pt_regs *ptregs = task_pt_regs(tsk);
-       int i;
-
-       memcpy(&regs->psw.mask, &ptregs->psw.mask, 4);
-       memcpy(&regs->psw.addr, (char *)&ptregs->psw.addr + 4, 4);
-       for (i = 0; i < NUM_GPRS; i++)
-               regs->gprs[i] = ptregs->gprs[i];
-       memcpy(regs->acrs, tsk->thread.acrs, sizeof(regs->acrs));
-       regs->orig_gpr2 = ptregs->orig_gpr2;
-       return 1;
-}
-
-static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
-{
-       if (tsk == current)
-               save_fp_regs((s390_fp_regs *) fpregs);
-       else
-               memcpy(fpregs, &tsk->thread.fp_regs, sizeof(elf_fpregset_t));
-       return 1;
-}
-
-#include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <linux/module.h>
-#include <linux/elfcore.h>
-#include <linux/binfmts.h>
-#include <linux/compat.h>
-
-#define elf_prstatus elf_prstatus32
-struct elf_prstatus32
-{
-       struct elf_siginfo pr_info;     /* Info associated with signal */
-       short   pr_cursig;              /* Current signal */
-       u32     pr_sigpend;     /* Set of pending signals */
-       u32     pr_sighold;     /* Set of held signals */
-       pid_t   pr_pid;
-       pid_t   pr_ppid;
-       pid_t   pr_pgrp;
-       pid_t   pr_sid;
-       struct compat_timeval pr_utime; /* User time */
-       struct compat_timeval pr_stime; /* System time */
-       struct compat_timeval pr_cutime;        /* Cumulative user time */
-       struct compat_timeval pr_cstime;        /* Cumulative system time */
-       elf_gregset_t pr_reg;   /* GP registers */
-       int pr_fpvalid;         /* True if math co-processor being used.  */
-};
-
-#define elf_prpsinfo elf_prpsinfo32
-struct elf_prpsinfo32
-{
-       char    pr_state;       /* numeric process state */
-       char    pr_sname;       /* char for pr_state */
-       char    pr_zomb;        /* zombie */
-       char    pr_nice;        /* nice val */
-       u32     pr_flag;        /* flags */
-       u16     pr_uid;
-       u16     pr_gid;
-       pid_t   pr_pid, pr_ppid, pr_pgrp, pr_sid;
-       /* Lots missing */
-       char    pr_fname[16];   /* filename of executable */
-       char    pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
-};
-
-#include <linux/highuid.h>
-
-/*
-#define init_elf_binfmt init_elf32_binfmt
-*/
-
-#undef start_thread
-#define start_thread                    start_thread31 
-
-static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw,
-                                 unsigned long new_stackp)
-{
-       set_fs(USER_DS);
-       regs->psw.mask  = psw_user32_bits;
-       regs->psw.addr  = new_psw;
-       regs->gprs[15]  = new_stackp;
-       crst_table_downgrade(current->mm, 1UL << 31);
-}
-
-MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries,"
-                   " Copyright 2000 IBM Corporation"); 
-MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>");
-
-#undef MODULE_DESCRIPTION
-#undef MODULE_AUTHOR
-
-#undef cputime_to_timeval
-#define cputime_to_timeval cputime_to_compat_timeval
-static inline void
-cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value)
-{
-       value->tv_usec = cputime % 1000000;
-       value->tv_sec = cputime / 1000000;
-}
-
-#include "../../../fs/binfmt_elf.c"
-
index 419aef9..cde81fa 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef _PTRACE32_H
 #define _PTRACE32_H
 
-#include "compat_linux.h"  /* needed for _psw_t32 */
+#include "compat_linux.h"  /* needed for psw_compat_t */
 
 typedef struct {
        __u32 cr[3];
@@ -38,7 +38,7 @@ typedef struct {
 
 struct user_regs_struct32
 {
-       _psw_t32 psw;
+       psw_compat_t psw;
        u32 gprs[NUM_GPRS];
        u32 acrs[NUM_ACRS];
        u32 orig_gpr2;
index c93d129..d80fcd4 100644 (file)
@@ -1079,7 +1079,6 @@ __init debug_init(void)
        s390dbf_sysctl_header = register_sysctl_table(s390dbf_dir_table);
        mutex_lock(&debug_mutex);
        debug_debugfs_root_entry = debugfs_create_dir(DEBUG_DIR_ROOT,NULL);
-       printk(KERN_INFO "debug: Initialization complete\n");
        initialized = 1;
        mutex_unlock(&debug_mutex);
 
@@ -1193,7 +1192,6 @@ debug_get_uint(char *buf)
        for(; isspace(*buf); buf++);
        rc = simple_strtoul(buf, &buf, 10);
        if(*buf){
-               printk("debug: no integer specified!\n");
                rc = -EINVAL;
        }
        return rc;
@@ -1340,19 +1338,12 @@ static void debug_flush(debug_info_t* id, int area)
                                memset(id->areas[i][j], 0, PAGE_SIZE);
                        }
                }
-                printk(KERN_INFO "debug: %s: all areas flushed\n",id->name);
         } else if(area >= 0 && area < id->nr_areas) {
                 id->active_entries[area] = 0;
                id->active_pages[area] = 0;
                for(i = 0; i < id->pages_per_area; i++) {
                        memset(id->areas[area][i],0,PAGE_SIZE);
                }
-                printk(KERN_INFO "debug: %s: area %i has been flushed\n",
-                        id->name, area);
-        } else {
-                printk(KERN_INFO
-                      "debug: %s: area %i cannot be flushed (range: %i - %i)\n",
-                        id->name, area, 0, id->nr_areas-1);
         }
         spin_unlock_irqrestore(&id->lock,flags);
 }
index d0e0968..2a2ca26 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
+#include <asm/ebcdic.h>
 #include <asm/ipl.h>
 #include <asm/lowcore.h>
 #include <asm/processor.h>
 /*
  * Create a Kernel NSS if the SAVESYS= parameter is defined
  */
-#define DEFSYS_CMD_SIZE                96
+#define DEFSYS_CMD_SIZE                128
 #define SAVESYS_CMD_SIZE       32
 
 char kernel_nss_name[NSS_NAME_SIZE + 1];
 
+static void __init setup_boot_command_line(void);
+
+
 #ifdef CONFIG_SHARED_KERNEL
+int __init savesys_ipl_nss(char *cmd, const int cmdlen);
+
+asm(
+       "       .section .init.text,\"ax\",@progbits\n"
+       "       .align  4\n"
+       "       .type   savesys_ipl_nss, @function\n"
+       "savesys_ipl_nss:\n"
+#ifdef CONFIG_64BIT
+       "       stmg    6,15,48(15)\n"
+       "       lgr     14,3\n"
+       "       sam31\n"
+       "       diag    2,14,0x8\n"
+       "       sam64\n"
+       "       lgr     2,14\n"
+       "       lmg     6,15,48(15)\n"
+#else
+       "       stm     6,15,24(15)\n"
+       "       lr      14,3\n"
+       "       diag    2,14,0x8\n"
+       "       lr      2,14\n"
+       "       lm      6,15,24(15)\n"
+#endif
+       "       br      14\n"
+       "       .size   savesys_ipl_nss, .-savesys_ipl_nss\n");
+
 static noinline __init void create_kernel_nss(void)
 {
        unsigned int i, stext_pfn, eshared_pfn, end_pfn, min_size;
@@ -39,6 +68,7 @@ static noinline __init void create_kernel_nss(void)
        unsigned int sinitrd_pfn, einitrd_pfn;
 #endif
        int response;
+       size_t len;
        char *savesys_ptr;
        char upper_command_line[COMMAND_LINE_SIZE];
        char defsys_cmd[DEFSYS_CMD_SIZE];
@@ -49,8 +79,8 @@ static noinline __init void create_kernel_nss(void)
                return;
 
        /* Convert COMMAND_LINE to upper case */
-       for (i = 0; i < strlen(COMMAND_LINE); i++)
-               upper_command_line[i] = toupper(COMMAND_LINE[i]);
+       for (i = 0; i < strlen(boot_command_line); i++)
+               upper_command_line[i] = toupper(boot_command_line[i]);
 
        savesys_ptr = strstr(upper_command_line, "SAVESYS=");
 
@@ -83,7 +113,8 @@ static noinline __init void create_kernel_nss(void)
        }
 #endif
 
-       sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK", defsys_cmd, min_size);
+       sprintf(defsys_cmd, "%s EW MINSIZE=%.7iK PARMREGS=0-13",
+               defsys_cmd, min_size);
        sprintf(savesys_cmd, "SAVESYS %s \n IPL %s",
                kernel_nss_name, kernel_nss_name);
 
@@ -94,13 +125,24 @@ static noinline __init void create_kernel_nss(void)
                return;
        }
 
-       __cpcmd(savesys_cmd, NULL, 0, &response);
+       len = strlen(savesys_cmd);
+       ASCEBC(savesys_cmd, len);
+       response = savesys_ipl_nss(savesys_cmd, len);
 
-       if (response != strlen(savesys_cmd)) {
+       /* On success: response is equal to the command size,
+        *             max SAVESYS_CMD_SIZE
+        * On error: response contains the numeric portion of cp error message.
+        *           for SAVESYS it will be >= 263
+        */
+       if (response > SAVESYS_CMD_SIZE) {
                kernel_nss_name[0] = '\0';
                return;
        }
 
+       /* re-setup boot command line with new ipl vm parms */
+       ipl_update_parameters();
+       setup_boot_command_line();
+
        ipl_flags = IPL_NSS_VALID;
 }
 
@@ -141,109 +183,11 @@ static noinline __init void detect_machine_type(void)
        if (cpuinfo->cpu_id.version == 0xff)
                machine_flags |= MACHINE_FLAG_VM;
 
-       /* Running on a P/390 ? */
-       if (cpuinfo->cpu_id.machine == 0x7490)
-               machine_flags |= MACHINE_FLAG_P390;
-
        /* Running under KVM ? */
        if (cpuinfo->cpu_id.version == 0xfe)
                machine_flags |= MACHINE_FLAG_KVM;
 }
 
-#ifdef CONFIG_64BIT
-static noinline __init int memory_fast_detect(void)
-{
-       unsigned long val0 = 0;
-       unsigned long val1 = 0xc;
-       int ret = -ENOSYS;
-
-       if (ipl_flags & IPL_NSS_VALID)
-               return -ENOSYS;
-
-       asm volatile(
-               "       diag    %1,%2,0x260\n"
-               "0:     lhi     %0,0\n"
-               "1:\n"
-               EX_TABLE(0b,1b)
-               : "+d" (ret), "+d" (val0), "+d" (val1) : : "cc");
-
-       if (ret || val0 != val1)
-               return -ENOSYS;
-
-       memory_chunk[0].size = val0 + 1;
-       return 0;
-}
-#else
-static inline int memory_fast_detect(void)
-{
-       return -ENOSYS;
-}
-#endif
-
-static inline __init unsigned long __tprot(unsigned long addr)
-{
-       int cc = -1;
-
-       asm volatile(
-               "       tprot   0(%1),0\n"
-               "0:     ipm     %0\n"
-               "       srl     %0,28\n"
-               "1:\n"
-               EX_TABLE(0b,1b)
-               : "+d" (cc) : "a" (addr) : "cc");
-       return (unsigned long)cc;
-}
-
-/* Checking memory in 128KB increments. */
-#define CHUNK_INCR     (1UL << 17)
-#define ADDR2G         (1UL << 31)
-
-static noinline __init void find_memory_chunks(unsigned long memsize)
-{
-       unsigned long addr = 0, old_addr = 0;
-       unsigned long old_cc = CHUNK_READ_WRITE;
-       unsigned long cc;
-       int chunk = 0;
-
-       while (chunk < MEMORY_CHUNKS) {
-               cc = __tprot(addr);
-               while (cc == old_cc) {
-                       addr += CHUNK_INCR;
-                       if (memsize && addr >= memsize)
-                               break;
-#ifndef CONFIG_64BIT
-                       if (addr == ADDR2G)
-                               break;
-#endif
-                       cc = __tprot(addr);
-               }
-
-               if (old_addr != addr &&
-                   (old_cc == CHUNK_READ_WRITE || old_cc == CHUNK_READ_ONLY)) {
-                       memory_chunk[chunk].addr = old_addr;
-                       memory_chunk[chunk].size = addr - old_addr;
-                       memory_chunk[chunk].type = old_cc;
-                       chunk++;
-               }
-
-               old_addr = addr;
-               old_cc = cc;
-
-#ifndef CONFIG_64BIT
-               if (addr == ADDR2G)
-                       break;
-#endif
-               /*
-                * Finish memory detection at the first hole
-                * if storage size is unknown.
-                */
-               if (cc == -1UL && !memsize)
-                       break;
-               if (memsize && addr >= memsize)
-                       break;
-       }
-}
-
 static __init void early_pgm_check_handler(void)
 {
        unsigned long addr;
@@ -380,23 +324,61 @@ static __init void detect_machine_facilities(void)
 #endif
 }
 
+static __init void rescue_initrd(void)
+{
+#ifdef CONFIG_BLK_DEV_INITRD
+       /*
+        * Move the initrd right behind the bss section in case it starts
+        * within the bss section. So we don't overwrite it when the bss
+        * section gets cleared.
+        */
+       if (!INITRD_START || !INITRD_SIZE)
+               return;
+       if (INITRD_START >= (unsigned long) __bss_stop)
+               return;
+       memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE);
+       INITRD_START = (unsigned long) __bss_stop;
+#endif
+}
+
+/* Set up boot command line */
+static void __init setup_boot_command_line(void)
+{
+       char *parm = NULL;
+
+       /* copy arch command line */
+       strlcpy(boot_command_line, COMMAND_LINE, ARCH_COMMAND_LINE_SIZE);
+       boot_command_line[ARCH_COMMAND_LINE_SIZE - 1] = 0;
+
+       /* append IPL PARM data to the boot command line */
+       if (MACHINE_IS_VM) {
+               parm = boot_command_line + strlen(boot_command_line);
+               *parm++ = ' ';
+               get_ipl_vmparm(parm);
+               if (parm[0] == '=')
+                       memmove(boot_command_line, parm + 1, strlen(parm));
+       }
+}
+
+
 /*
  * Save ipl parameters, clear bss memory, initialize storage keys
  * and create a kernel NSS at startup if the SAVESYS= parm is defined
  */
 void __init startup_init(void)
 {
-       unsigned long long memsize;
-
        ipl_save_parameters();
+       rescue_initrd();
        clear_bss_section();
        init_kernel_storage_key();
        lockdep_init();
        lockdep_off();
-       detect_machine_type();
-       create_kernel_nss();
        sort_main_extable();
        setup_lowcore_early();
+       detect_machine_type();
+       ipl_update_parameters();
+       setup_boot_command_line();
+       create_kernel_nss();
        detect_mvpg();
        detect_ieee();
        detect_csp();
@@ -404,18 +386,7 @@ void __init startup_init(void)
        detect_diag44();
        detect_machine_facilities();
        setup_hpage();
-       sclp_read_info_early();
        sclp_facilities_detect();
-       memsize = sclp_memory_detect();
-#ifndef CONFIG_64BIT
-       /*
-        * Can't deal with more than 2G in 31 bit addressing mode, so
-        * limit the value in order to avoid strange side effects.
-        */
-       if (memsize > ADDR2G)
-               memsize = ADDR2G;
-#endif
-       if (memory_fast_detect() < 0)
-               find_memory_chunks((unsigned long) memsize);
+       detect_memory_layout(memory_chunk);
        lockdep_on();
 }
index 5325424..54b2779 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/reboot.h>
 #include <linux/ctype.h>
+#include <linux/fs.h>
 #include <asm/ipl.h>
 #include <asm/smp.h>
 #include <asm/setup.h>
@@ -22,6 +23,7 @@
 #include <asm/ebcdic.h>
 #include <asm/reset.h>
 #include <asm/sclp.h>
+#include <asm/setup.h>
 
 #define IPL_PARM_BLOCK_VERSION 0
 
@@ -121,6 +123,7 @@ enum ipl_method {
        REIPL_METHOD_FCP_RO_VM,
        REIPL_METHOD_FCP_DUMP,
        REIPL_METHOD_NSS,
+       REIPL_METHOD_NSS_DIAG,
        REIPL_METHOD_DEFAULT,
 };
 
@@ -134,14 +137,15 @@ enum dump_method {
 
 static int diag308_set_works = 0;
 
+static struct ipl_parameter_block ipl_block;
+
 static int reipl_capabilities = IPL_TYPE_UNKNOWN;
 
 static enum ipl_type reipl_type = IPL_TYPE_UNKNOWN;
 static enum ipl_method reipl_method = REIPL_METHOD_DEFAULT;
 static struct ipl_parameter_block *reipl_block_fcp;
 static struct ipl_parameter_block *reipl_block_ccw;
-
-static char reipl_nss_name[NSS_NAME_SIZE + 1];
+static struct ipl_parameter_block *reipl_block_nss;
 
 static int dump_capabilities = DUMP_TYPE_NONE;
 static enum dump_type dump_type = DUMP_TYPE_NONE;
@@ -263,6 +267,56 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
 
 static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
 
+/* VM IPL PARM routines */
+static void reipl_get_ascii_vmparm(char *dest,
+                                  const struct ipl_parameter_block *ipb)
+{
+       int i;
+       int len = 0;
+       char has_lowercase = 0;
+
+       if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
+           (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+
+               len = ipb->ipl_info.ccw.vm_parm_len;
+               memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+               /* If at least one character is lowercase, we assume mixed
+                * case; otherwise we convert everything to lowercase.
+                */
+               for (i = 0; i < len; i++)
+                       if ((dest[i] > 0x80 && dest[i] < 0x8a) || /* a-i */
+                           (dest[i] > 0x90 && dest[i] < 0x9a) || /* j-r */
+                           (dest[i] > 0xa1 && dest[i] < 0xaa)) { /* s-z */
+                               has_lowercase = 1;
+                               break;
+                       }
+               if (!has_lowercase)
+                       EBC_TOLOWER(dest, len);
+               EBCASC(dest, len);
+       }
+       dest[len] = 0;
+}
+
+void get_ipl_vmparm(char *dest)
+{
+       if (diag308_set_works && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+               reipl_get_ascii_vmparm(dest, &ipl_block);
+       else
+               dest[0] = 0;
+}
+
+static ssize_t ipl_vm_parm_show(struct kobject *kobj,
+                               struct kobj_attribute *attr, char *page)
+{
+       char parm[DIAG308_VMPARM_SIZE + 1] = {};
+
+       get_ipl_vmparm(parm);
+       return sprintf(page, "%s\n", parm);
+}
+
+static struct kobj_attribute sys_ipl_vm_parm_attr =
+       __ATTR(parm, S_IRUGO, ipl_vm_parm_show, NULL);
+
 static ssize_t sys_ipl_device_show(struct kobject *kobj,
                                   struct kobj_attribute *attr, char *page)
 {
@@ -285,14 +339,8 @@ static struct kobj_attribute sys_ipl_device_attr =
 static ssize_t ipl_parameter_read(struct kobject *kobj, struct bin_attribute *attr,
                                  char *buf, loff_t off, size_t count)
 {
-       unsigned int size = IPL_PARMBLOCK_SIZE;
-
-       if (off > size)
-               return 0;
-       if (off + count > size)
-               count = size - off;
-       memcpy(buf, (void *)IPL_PARMBLOCK_START + off, count);
-       return count;
+       return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
+                                       IPL_PARMBLOCK_SIZE);
 }
 
 static struct bin_attribute ipl_parameter_attr = {
@@ -310,12 +358,7 @@ static ssize_t ipl_scp_data_read(struct kobject *kobj, struct bin_attribute *att
        unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
        void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
 
-       if (off > size)
-               return 0;
-       if (off + count > size)
-               count = size - off;
-       memcpy(buf, scp_data + off, count);
-       return count;
+       return memory_read_from_buffer(buf, count, &off, scp_data, size);
 }
 
 static struct bin_attribute ipl_scp_data_attr = {
@@ -370,15 +413,27 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
 static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
        __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
 
-static struct attribute *ipl_ccw_attrs[] = {
+static struct attribute *ipl_ccw_attrs_vm[] = {
        &sys_ipl_type_attr.attr,
        &sys_ipl_device_attr.attr,
        &sys_ipl_ccw_loadparm_attr.attr,
+       &sys_ipl_vm_parm_attr.attr,
        NULL,
 };
 
-static struct attribute_group ipl_ccw_attr_group = {
-       .attrs = ipl_ccw_attrs,
+static struct attribute *ipl_ccw_attrs_lpar[] = {
+       &sys_ipl_type_attr.attr,
+       &sys_ipl_device_attr.attr,
+       &sys_ipl_ccw_loadparm_attr.attr,
+       NULL,
+};
+
+static struct attribute_group ipl_ccw_attr_group_vm = {
+       .attrs = ipl_ccw_attrs_vm,
+};
+
+static struct attribute_group ipl_ccw_attr_group_lpar = {
+       .attrs = ipl_ccw_attrs_lpar
 };
 
 /* NSS ipl device attributes */
@@ -388,6 +443,8 @@ DEFINE_IPL_ATTR_RO(ipl_nss, name, "%s\n", kernel_nss_name);
 static struct attribute *ipl_nss_attrs[] = {
        &sys_ipl_type_attr.attr,
        &sys_ipl_nss_name_attr.attr,
+       &sys_ipl_ccw_loadparm_attr.attr,
+       &sys_ipl_vm_parm_attr.attr,
        NULL,
 };
 
@@ -450,7 +507,12 @@ static int __init ipl_init(void)
        }
        switch (ipl_info.type) {
        case IPL_TYPE_CCW:
-               rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group);
+               if (MACHINE_IS_VM)
+                       rc = sysfs_create_group(&ipl_kset->kobj,
+                                               &ipl_ccw_attr_group_vm);
+               else
+                       rc = sysfs_create_group(&ipl_kset->kobj,
+                                               &ipl_ccw_attr_group_lpar);
                break;
        case IPL_TYPE_FCP:
        case IPL_TYPE_FCP_DUMP:
@@ -481,6 +543,83 @@ static struct shutdown_action __refdata ipl_action = {
  * reipl shutdown action: Reboot Linux on shutdown.
  */
 
+/* VM IPL PARM attributes */
+static ssize_t reipl_generic_vmparm_show(struct ipl_parameter_block *ipb,
+                                         char *page)
+{
+       char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+
+       reipl_get_ascii_vmparm(vmparm, ipb);
+       return sprintf(page, "%s\n", vmparm);
+}
+
+static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
+                                         size_t vmparm_max,
+                                         const char *buf, size_t len)
+{
+       int i, ip_len;
+
+       /* ignore trailing newline */
+       ip_len = len;
+       if ((len > 0) && (buf[len - 1] == '\n'))
+               ip_len--;
+
+       if (ip_len > vmparm_max)
+               return -EINVAL;
+
+       /* parm is used to store kernel options, check for common chars */
+       for (i = 0; i < ip_len; i++)
+               if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
+                       return -EINVAL;
+
+       memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+       ipb->ipl_info.ccw.vm_parm_len = ip_len;
+       if (ip_len > 0) {
+               ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+               memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
+               ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+       } else {
+               ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+       }
+
+       return len;
+}
+
+/* NSS wrapper */
+static ssize_t reipl_nss_vmparm_show(struct kobject *kobj,
+                                    struct kobj_attribute *attr, char *page)
+{
+       return reipl_generic_vmparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_vmparm_store(struct kobject *kobj,
+                                     struct kobj_attribute *attr,
+                                     const char *buf, size_t len)
+{
+       return reipl_generic_vmparm_store(reipl_block_nss, 56, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_vmparm_show(struct kobject *kobj,
+                                    struct kobj_attribute *attr, char *page)
+{
+       return reipl_generic_vmparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_vmparm_store(struct kobject *kobj,
+                                     struct kobj_attribute *attr,
+                                     const char *buf, size_t len)
+{
+       return reipl_generic_vmparm_store(reipl_block_ccw, 64, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_nss_vmparm_attr =
+       __ATTR(parm, S_IRUGO | S_IWUSR, reipl_nss_vmparm_show,
+                                       reipl_nss_vmparm_store);
+static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
+       __ATTR(parm, S_IRUGO | S_IWUSR, reipl_ccw_vmparm_show,
+                                       reipl_ccw_vmparm_store);
+
 /* FCP reipl device attributes */
 
 DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%016llx\n",
@@ -513,27 +652,26 @@ static struct attribute_group reipl_fcp_attr_group = {
 DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
        reipl_block_ccw->ipl_info.ccw.devno);
 
-static void reipl_get_ascii_loadparm(char *loadparm)
+static void reipl_get_ascii_loadparm(char *loadparm,
+                                    struct ipl_parameter_block *ibp)
 {
-       memcpy(loadparm, &reipl_block_ccw->ipl_info.ccw.load_param,
-              LOADPARM_LEN);
+       memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN);
        EBCASC(loadparm, LOADPARM_LEN);
        loadparm[LOADPARM_LEN] = 0;
        strstrip(loadparm);
 }
 
-static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
-                                      struct kobj_attribute *attr, char *page)
+static ssize_t reipl_generic_loadparm_show(struct ipl_parameter_block *ipb,
+                                          char *page)
 {
        char buf[LOADPARM_LEN + 1];
 
-       reipl_get_ascii_loadparm(buf);
+       reipl_get_ascii_loadparm(buf, ipb);
        return sprintf(page, "%s\n", buf);
 }
 
-static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
-                                       struct kobj_attribute *attr,
-                                       const char *buf, size_t len)
+static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
+                                           const char *buf, size_t len)
 {
        int i, lp_len;
 
@@ -552,35 +690,128 @@ static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
                return -EINVAL;
        }
        /* initialize loadparm with blanks */
-       memset(&reipl_block_ccw->ipl_info.ccw.load_param, ' ', LOADPARM_LEN);
+       memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN);
        /* copy and convert to ebcdic */
-       memcpy(&reipl_block_ccw->ipl_info.ccw.load_param, buf, lp_len);
-       ASCEBC(reipl_block_ccw->ipl_info.ccw.load_param, LOADPARM_LEN);
+       memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len);
+       ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN);
        return len;
 }
 
+/* NSS wrapper */
+static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
+                                      struct kobj_attribute *attr, char *page)
+{
+       return reipl_generic_loadparm_show(reipl_block_nss, page);
+}
+
+static ssize_t reipl_nss_loadparm_store(struct kobject *kobj,
+                                       struct kobj_attribute *attr,
+                                       const char *buf, size_t len)
+{
+       return reipl_generic_loadparm_store(reipl_block_nss, buf, len);
+}
+
+/* CCW wrapper */
+static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj,
+                                      struct kobj_attribute *attr, char *page)
+{
+       return reipl_generic_loadparm_show(reipl_block_ccw, page);
+}
+
+static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj,
+                                       struct kobj_attribute *attr,
+                                       const char *buf, size_t len)
+{
+       return reipl_generic_loadparm_store(reipl_block_ccw, buf, len);
+}
+
 static struct kobj_attribute sys_reipl_ccw_loadparm_attr =
-       __ATTR(loadparm, 0644, reipl_ccw_loadparm_show,
-              reipl_ccw_loadparm_store);
+       __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_ccw_loadparm_show,
+                                           reipl_ccw_loadparm_store);
 
-static struct attribute *reipl_ccw_attrs[] = {
+static struct attribute *reipl_ccw_attrs_vm[] = {
        &sys_reipl_ccw_device_attr.attr,
        &sys_reipl_ccw_loadparm_attr.attr,
+       &sys_reipl_ccw_vmparm_attr.attr,
        NULL,
 };
 
-static struct attribute_group reipl_ccw_attr_group = {
+static struct attribute *reipl_ccw_attrs_lpar[] = {
+       &sys_reipl_ccw_device_attr.attr,
+       &sys_reipl_ccw_loadparm_attr.attr,
+       NULL,
+};
+
+static struct attribute_group reipl_ccw_attr_group_vm = {
+       .name  = IPL_CCW_STR,
+       .attrs = reipl_ccw_attrs_vm,
+};
+
+static struct attribute_group reipl_ccw_attr_group_lpar = {
        .name  = IPL_CCW_STR,
-       .attrs = reipl_ccw_attrs,
+       .attrs = reipl_ccw_attrs_lpar,
 };
 
 
 /* NSS reipl device attributes */
+static void reipl_get_ascii_nss_name(char *dst,
+                                    struct ipl_parameter_block *ipb)
+{
+       memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+       EBCASC(dst, NSS_NAME_SIZE);
+       dst[NSS_NAME_SIZE] = 0;
+}
+
+static ssize_t reipl_nss_name_show(struct kobject *kobj,
+                                  struct kobj_attribute *attr, char *page)
+{
+       char nss_name[NSS_NAME_SIZE + 1] = {};
 
-DEFINE_IPL_ATTR_STR_RW(reipl_nss, name, "%s\n", "%s\n", reipl_nss_name);
+       reipl_get_ascii_nss_name(nss_name, reipl_block_nss);
+       return sprintf(page, "%s\n", nss_name);
+}
+
+static ssize_t reipl_nss_name_store(struct kobject *kobj,
+                                   struct kobj_attribute *attr,
+                                   const char *buf, size_t len)
+{
+       int nss_len;
+
+       /* ignore trailing newline */
+       nss_len = len;
+       if ((len > 0) && (buf[len - 1] == '\n'))
+               nss_len--;
+
+       if (nss_len > NSS_NAME_SIZE)
+               return -EINVAL;
+
+       memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+       if (nss_len > 0) {
+               reipl_block_nss->ipl_info.ccw.vm_flags |=
+                       DIAG308_VM_FLAGS_NSS_VALID;
+               memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
+               ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+               EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+       } else {
+               reipl_block_nss->ipl_info.ccw.vm_flags &=
+                       ~DIAG308_VM_FLAGS_NSS_VALID;
+       }
+
+       return len;
+}
+
+static struct kobj_attribute sys_reipl_nss_name_attr =
+       __ATTR(name, S_IRUGO | S_IWUSR, reipl_nss_name_show,
+                                       reipl_nss_name_store);
+
+static struct kobj_attribute sys_reipl_nss_loadparm_attr =
+       __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_nss_loadparm_show,
+                                           reipl_nss_loadparm_store);
 
 static struct attribute *reipl_nss_attrs[] = {
        &sys_reipl_nss_name_attr.attr,
+       &sys_reipl_nss_loadparm_attr.attr,
+       &sys_reipl_nss_vmparm_attr.attr,
        NULL,
 };
 
@@ -617,7 +848,10 @@ static int reipl_set_type(enum ipl_type type)
                reipl_method = REIPL_METHOD_FCP_DUMP;
                break;
        case IPL_TYPE_NSS:
-               reipl_method = REIPL_METHOD_NSS;
+               if (diag308_set_works)
+                       reipl_method = REIPL_METHOD_NSS_DIAG;
+               else
+                       reipl_method = REIPL_METHOD_NSS;
                break;
        case IPL_TYPE_UNKNOWN:
                reipl_method = REIPL_METHOD_DEFAULT;
@@ -655,11 +889,38 @@ static struct kobj_attribute reipl_type_attr =
 
 static struct kset *reipl_kset;
 
+static void get_ipl_string(char *dst, struct ipl_parameter_block *ipb,
+                          const enum ipl_method m)
+{
+       char loadparm[LOADPARM_LEN + 1] = {};
+       char vmparm[DIAG308_VMPARM_SIZE + 1] = {};
+       char nss_name[NSS_NAME_SIZE + 1] = {};
+       size_t pos = 0;
+
+       reipl_get_ascii_loadparm(loadparm, ipb);
+       reipl_get_ascii_nss_name(nss_name, ipb);
+       reipl_get_ascii_vmparm(vmparm, ipb);
+
+       switch (m) {
+       case REIPL_METHOD_CCW_VM:
+               pos = sprintf(dst, "IPL %X CLEAR", ipb->ipl_info.ccw.devno);
+               break;
+       case REIPL_METHOD_NSS:
+               pos = sprintf(dst, "IPL %s", nss_name);
+               break;
+       default:
+               break;
+       }
+       if (strlen(loadparm) > 0)
+               pos += sprintf(dst + pos, " LOADPARM '%s'", loadparm);
+       if (strlen(vmparm) > 0)
+               sprintf(dst + pos, " PARM %s", vmparm);
+}
+
 static void reipl_run(struct shutdown_trigger *trigger)
 {
        struct ccw_dev_id devid;
-       static char buf[100];
-       char loadparm[LOADPARM_LEN + 1];
+       static char buf[128];
 
        switch (reipl_method) {
        case REIPL_METHOD_CCW_CIO:
@@ -668,13 +929,7 @@ static void reipl_run(struct shutdown_trigger *trigger)
                reipl_ccw_dev(&devid);
                break;
        case REIPL_METHOD_CCW_VM:
-               reipl_get_ascii_loadparm(loadparm);
-               if (strlen(loadparm) == 0)
-                       sprintf(buf, "IPL %X CLEAR",
-                               reipl_block_ccw->ipl_info.ccw.devno);
-               else
-                       sprintf(buf, "IPL %X CLEAR LOADPARM '%s'",
-                               reipl_block_ccw->ipl_info.ccw.devno, loadparm);
+               get_ipl_string(buf, reipl_block_ccw, REIPL_METHOD_CCW_VM);
                __cpcmd(buf, NULL, 0, NULL);
                break;
        case REIPL_METHOD_CCW_DIAG:
@@ -691,8 +946,12 @@ static void reipl_run(struct shutdown_trigger *trigger)
        case REIPL_METHOD_FCP_RO_VM:
                __cpcmd("IPL", NULL, 0, NULL);
                break;
+       case REIPL_METHOD_NSS_DIAG:
+               diag308(DIAG308_SET, reipl_block_nss);
+               diag308(DIAG308_IPL, NULL);
+               break;
        case REIPL_METHOD_NSS:
-               sprintf(buf, "IPL %s", reipl_nss_name);
+               get_ipl_string(buf, reipl_block_nss, REIPL_METHOD_NSS);
                __cpcmd(buf, NULL, 0, NULL);
                break;
        case REIPL_METHOD_DEFAULT:
@@ -707,16 +966,36 @@ static void reipl_run(struct shutdown_trigger *trigger)
        disabled_wait((unsigned long) __builtin_return_address(0));
 }
 
-static void __init reipl_probe(void)
+static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
 {
-       void *buffer;
+       ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+       ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
+       ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
+       ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+}
 
-       buffer = (void *) get_zeroed_page(GFP_KERNEL);
-       if (!buffer)
-               return;
-       if (diag308(DIAG308_STORE, buffer) == DIAG308_RC_OK)
-               diag308_set_works = 1;
-       free_page((unsigned long)buffer);
+static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
+{
+       /* LOADPARM */
+       /* check if read scp info worked and set loadparm */
+       if (sclp_ipl_info.is_valid)
+               memcpy(ipb->ipl_info.ccw.load_parm,
+                               &sclp_ipl_info.loadparm, LOADPARM_LEN);
+       else
+               /* read scp info failed: set empty loadparm (EBCDIC blanks) */
+               memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN);
+       ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+
+       /* VM PARM */
+       if (MACHINE_IS_VM && diag308_set_works &&
+           (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+
+               ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
+               ipb->ipl_info.ccw.vm_parm_len =
+                                       ipl_block.ipl_info.ccw.vm_parm_len;
+               memcpy(ipb->ipl_info.ccw.vm_parm,
+                      ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+       }
 }
 
 static int __init reipl_nss_init(void)
@@ -725,10 +1004,31 @@ static int __init reipl_nss_init(void)
 
        if (!MACHINE_IS_VM)
                return 0;
+
+       reipl_block_nss = (void *) get_zeroed_page(GFP_KERNEL);
+       if (!reipl_block_nss)
+               return -ENOMEM;
+
+       if (!diag308_set_works)
+               sys_reipl_nss_vmparm_attr.attr.mode = S_IRUGO;
+
        rc = sysfs_create_group(&reipl_kset->kobj, &reipl_nss_attr_group);
        if (rc)
                return rc;
-       strncpy(reipl_nss_name, kernel_nss_name, NSS_NAME_SIZE + 1);
+
+       reipl_block_ccw_init(reipl_block_nss);
+       if (ipl_info.type == IPL_TYPE_NSS) {
+               memset(reipl_block_nss->ipl_info.ccw.nss_name,
+                       ' ', NSS_NAME_SIZE);
+               memcpy(reipl_block_nss->ipl_info.ccw.nss_name,
+                       kernel_nss_name, strlen(kernel_nss_name));
+               ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+               reipl_block_nss->ipl_info.ccw.vm_flags |=
+                       DIAG308_VM_FLAGS_NSS_VALID;
+
+               reipl_block_ccw_fill_parms(reipl_block_nss);
+       }
+
        reipl_capabilities |= IPL_TYPE_NSS;
        return 0;
 }
@@ -740,28 +1040,27 @@ static int __init reipl_ccw_init(void)
        reipl_block_ccw = (void *) get_zeroed_page(GFP_KERNEL);
        if (!reipl_block_ccw)
                return -ENOMEM;
-       rc = sysfs_create_group(&reipl_kset->kobj, &reipl_ccw_attr_group);
-       if (rc) {
-               free_page((unsigned long)reipl_block_ccw);
-               return rc;
+
+       if (MACHINE_IS_VM) {
+               if (!diag308_set_works)
+                       sys_reipl_ccw_vmparm_attr.attr.mode = S_IRUGO;
+               rc = sysfs_create_group(&reipl_kset->kobj,
+                                       &reipl_ccw_attr_group_vm);
+       } else {
+               if(!diag308_set_works)
+                       sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
+               rc = sysfs_create_group(&reipl_kset->kobj,
+                                       &reipl_ccw_attr_group_lpar);
        }
-       reipl_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
-       reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
-       reipl_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
-       reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
-       reipl_block_ccw->hdr.flags = DIAG308_FLAGS_LP_VALID;
-       /* check if read scp info worked and set loadparm */
-       if (sclp_ipl_info.is_valid)
-               memcpy(reipl_block_ccw->ipl_info.ccw.load_param,
-                      &sclp_ipl_info.loadparm, LOADPARM_LEN);
-       else
-               /* read scp info failed: set empty loadparm (EBCDIC blanks) */
-               memset(reipl_block_ccw->ipl_info.ccw.load_param, 0x40,
-                      LOADPARM_LEN);
-       if (!MACHINE_IS_VM && !diag308_set_works)
-               sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
-       if (ipl_info.type == IPL_TYPE_CCW)
+       if (rc)
+               return rc;
+
+       reipl_block_ccw_init(reipl_block_ccw);
+       if (ipl_info.type == IPL_TYPE_CCW) {
                reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
+               reipl_block_ccw_fill_parms(reipl_block_ccw);
+       }
+
        reipl_capabilities |= IPL_TYPE_CCW;
        return 0;
 }
@@ -1298,7 +1597,6 @@ static void __init shutdown_actions_init(void)
 
 static int __init s390_ipl_init(void)
 {
-       reipl_probe();
        sclp_get_ipl_info(&sclp_ipl_info);
        shutdown_actions_init();
        shutdown_triggers_init();
@@ -1405,6 +1703,12 @@ void __init setup_ipl(void)
        atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
 }
 
+void __init ipl_update_parameters(void)
+{
+       if (diag308(DIAG308_STORE, &ipl_block) == DIAG308_RC_OK)
+               diag308_set_works = 1;
+}
+
 void __init ipl_save_parameters(void)
 {
        struct cio_iplinfo iplinfo;
index ed04d13..288ad49 100644 (file)
@@ -41,10 +41,8 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
        if (is_prohibited_opcode((kprobe_opcode_t *) p->addr))
                return -EINVAL;
 
-       if ((unsigned long)p->addr & 0x01) {
-               printk("Attempt to register kprobe at an unaligned address\n");
+       if ((unsigned long)p->addr & 0x01)
                return -EINVAL;
-               }
 
        /* Use the get_insn_slot() facility for correctness */
        if (!(p->ainsn.insn = get_insn_slot()))
index 3c77dd3..131d7ee 100644 (file)
@@ -52,7 +52,6 @@ void machine_kexec_cleanup(struct kimage *image)
 
 void machine_shutdown(void)
 {
-       printk(KERN_INFO "kexec: machine_shutdown called\n");
 }
 
 void machine_kexec(struct kimage *image)
diff --git a/arch/s390/kernel/mem_detect.c b/arch/s390/kernel/mem_detect.c
new file mode 100644 (file)
index 0000000..18ed7ab
--- /dev/null
@@ -0,0 +1,100 @@
+/*
+ *    Copyright IBM Corp. 2008
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <asm/ipl.h>
+#include <asm/sclp.h>
+#include <asm/setup.h>
+
+static int memory_fast_detect(struct mem_chunk *chunk)
+{
+       unsigned long val0 = 0;
+       unsigned long val1 = 0xc;
+       int rc = -EOPNOTSUPP;
+
+       if (ipl_flags & IPL_NSS_VALID)
+               return -EOPNOTSUPP;
+       asm volatile(
+               "       diag    %1,%2,0x260\n"
+               "0:     lhi     %0,0\n"
+               "1:\n"
+               EX_TABLE(0b,1b)
+               : "+d" (rc), "+d" (val0), "+d" (val1) : : "cc");
+
+       if (rc || val0 != val1)
+               return -EOPNOTSUPP;
+       chunk->size = val0 + 1;
+       return 0;
+}
+
+static inline int tprot(unsigned long addr)
+{
+       int rc = -EFAULT;
+
+       asm volatile(
+               "       tprot   0(%1),0\n"
+               "0:     ipm     %0\n"
+               "       srl     %0,28\n"
+               "1:\n"
+               EX_TABLE(0b,1b)
+               : "+d" (rc) : "a" (addr) : "cc");
+       return rc;
+}
+
+#define ADDR2G (1ULL << 31)
+
+static void find_memory_chunks(struct mem_chunk chunk[])
+{
+       unsigned long long memsize, rnmax, rzm;
+       unsigned long addr = 0, size;
+       int i = 0, type;
+
+       rzm = sclp_get_rzm();
+       rnmax = sclp_get_rnmax();
+       memsize = rzm * rnmax;
+       if (!rzm)
+               rzm = 1ULL << 17;
+       if (sizeof(long) == 4) {
+               rzm = min(ADDR2G, rzm);
+               memsize = memsize ? min(ADDR2G, memsize) : ADDR2G;
+       }
+       do {
+               size = 0;
+               type = tprot(addr);
+               do {
+                       size += rzm;
+                       if (memsize && addr + size >= memsize)
+                               break;
+               } while (type == tprot(addr + size));
+               if (type == CHUNK_READ_WRITE || type == CHUNK_READ_ONLY) {
+                       chunk[i].addr = addr;
+                       chunk[i].size = size;
+                       chunk[i].type = type;
+                       i++;
+               }
+               addr += size;
+       } while (addr < memsize && i < MEMORY_CHUNKS);
+}
+
+void detect_memory_layout(struct mem_chunk chunk[])
+{
+       unsigned long flags, cr0;
+
+       memset(chunk, 0, MEMORY_CHUNKS * sizeof(struct mem_chunk));
+       if (memory_fast_detect(&chunk[0]) == 0)
+               return;
+       /* Disable IRQs, DAT and low address protection so tprot does the
+        * right thing and we don't get scheduled away with low address
+        * protection disabled.
+        */
+       flags = __raw_local_irq_stnsm(0xf8);
+       __ctl_store(cr0, 0, 0);
+       __ctl_clear_bit(0, 28);
+       find_memory_chunks(chunk);
+       __ctl_load(cr0, 0, 0);
+       __raw_local_irq_ssm(flags);
+}
+EXPORT_SYMBOL(detect_memory_layout);
index 7920861..85defd0 100644 (file)
@@ -75,46 +75,19 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
        return sf->gprs[8];
 }
 
-/*
- * Need to know about CPUs going idle?
- */
-static ATOMIC_NOTIFIER_HEAD(idle_chain);
 DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
 
-int register_idle_notifier(struct notifier_block *nb)
-{
-       return atomic_notifier_chain_register(&idle_chain, nb);
-}
-EXPORT_SYMBOL(register_idle_notifier);
-
-int unregister_idle_notifier(struct notifier_block *nb)
-{
-       return atomic_notifier_chain_unregister(&idle_chain, nb);
-}
-EXPORT_SYMBOL(unregister_idle_notifier);
-
 static int s390_idle_enter(void)
 {
        struct s390_idle_data *idle;
-       int nr_calls = 0;
-       void *hcpu;
-       int rc;
 
-       hcpu = (void *)(long)smp_processor_id();
-       rc = __atomic_notifier_call_chain(&idle_chain, S390_CPU_IDLE, hcpu, -1,
-                                         &nr_calls);
-       if (rc == NOTIFY_BAD) {
-               nr_calls--;
-               __atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-                                            hcpu, nr_calls, NULL);
-               return rc;
-       }
        idle = &__get_cpu_var(s390_idle);
        spin_lock(&idle->lock);
        idle->idle_count++;
        idle->in_idle = 1;
        idle->idle_enter = get_clock();
        spin_unlock(&idle->lock);
+       vtime_stop_cpu_timer();
        return NOTIFY_OK;
 }
 
@@ -122,13 +95,12 @@ void s390_idle_leave(void)
 {
        struct s390_idle_data *idle;
 
+       vtime_start_cpu_timer();
        idle = &__get_cpu_var(s390_idle);
        spin_lock(&idle->lock);
        idle->idle_time += get_clock() - idle->idle_enter;
        idle->in_idle = 0;
        spin_unlock(&idle->lock);
-       atomic_notifier_call_chain(&idle_chain, S390_CPU_NOT_IDLE,
-                                  (void *)(long) smp_processor_id());
 }
 
 extern void s390_handle_mcck(void);
index 35827b9..2815bfe 100644 (file)
@@ -33,6 +33,8 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/signal.h>
+#include <linux/elf.h>
+#include <linux/regset.h>
 
 #include <asm/segment.h>
 #include <asm/page.h>
 #include "compat_ptrace.h"
 #endif
 
+enum s390_regset {
+       REGSET_GENERAL,
+       REGSET_FP,
+};
+
 static void
 FixPerRegisters(struct task_struct *task)
 {
@@ -126,24 +133,10 @@ ptrace_disable(struct task_struct *child)
  * struct user contain pad bytes that should be read as zeroes.
  * Lovely...
  */
-static int
-peek_user(struct task_struct *child, addr_t addr, addr_t data)
+static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 {
        struct user *dummy = NULL;
-       addr_t offset, tmp, mask;
-
-       /*
-        * Stupid gdb peeks/pokes the access registers in 64 bit with
-        * an alignment of 4. Programmers from hell...
-        */
-       mask = __ADDR_MASK;
-#ifdef CONFIG_64BIT
-       if (addr >= (addr_t) &dummy->regs.acrs &&
-           addr < (addr_t) &dummy->regs.orig_gpr2)
-               mask = 3;
-#endif
-       if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
-               return -EIO;
+       addr_t offset, tmp;
 
        if (addr < (addr_t) &dummy->regs.acrs) {
                /*
@@ -197,24 +190,18 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
        } else
                tmp = 0;
 
-       return put_user(tmp, (addr_t __user *) data);
+       return tmp;
 }
 
-/*
- * Write a word to the user area of a process at location addr. This
- * operation does have an additional problem compared to peek_user.
- * Stores to the program status word and on the floating point
- * control register needs to get checked for validity.
- */
 static int
-poke_user(struct task_struct *child, addr_t addr, addr_t data)
+peek_user(struct task_struct *child, addr_t addr, addr_t data)
 {
        struct user *dummy = NULL;
-       addr_t offset, mask;
+       addr_t tmp, mask;
 
        /*
         * Stupid gdb peeks/pokes the access registers in 64 bit with
-        * an alignment of 4. Programmers from hell indeed...
+        * an alignment of 4. Programmers from hell...
         */
        mask = __ADDR_MASK;
 #ifdef CONFIG_64BIT
@@ -225,6 +212,21 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
        if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
                return -EIO;
 
+       tmp = __peek_user(child, addr);
+       return put_user(tmp, (addr_t __user *) data);
+}
+
+/*
+ * Write a word to the user area of a process at location addr. This
+ * operation does have an additional problem compared to peek_user.
+ * Stores to the program status word and on the floating point
+ * control register needs to get checked for validity.
+ */
+static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+       struct user *dummy = NULL;
+       addr_t offset;
+
        if (addr < (addr_t) &dummy->regs.acrs) {
                /*
                 * psw and gprs are stored on the stack
@@ -292,6 +294,28 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
        return 0;
 }
 
+static int
+poke_user(struct task_struct *child, addr_t addr, addr_t data)
+{
+       struct user *dummy = NULL;
+       addr_t mask;
+
+       /*
+        * Stupid gdb peeks/pokes the access registers in 64 bit with
+        * an alignment of 4. Programmers from hell indeed...
+        */
+       mask = __ADDR_MASK;
+#ifdef CONFIG_64BIT
+       if (addr >= (addr_t) &dummy->regs.acrs &&
+           addr < (addr_t) &dummy->regs.orig_gpr2)
+               mask = 3;
+#endif
+       if ((addr & mask) || addr > sizeof(struct user) - __ADDR_MASK)
+               return -EIO;
+
+       return __poke_user(child, addr, data);
+}
+
 long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 {
        ptrace_area parea; 
@@ -367,18 +391,13 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 /*
  * Same as peek_user but for a 31 bit program.
  */
-static int
-peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
+static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 {
        struct user32 *dummy32 = NULL;
        per_struct32 *dummy_per32 = NULL;
        addr_t offset;
        __u32 tmp;
 
-       if (!test_thread_flag(TIF_31BIT) ||
-           (addr & 3) || addr > sizeof(struct user) - 3)
-               return -EIO;
-
        if (addr < (addr_t) &dummy32->regs.acrs) {
                /*
                 * psw and gprs are stored on the stack
@@ -435,25 +454,32 @@ peek_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
        } else
                tmp = 0;
 
+       return tmp;
+}
+
+static int peek_user_compat(struct task_struct *child,
+                           addr_t addr, addr_t data)
+{
+       __u32 tmp;
+
+       if (!test_thread_flag(TIF_31BIT) ||
+           (addr & 3) || addr > sizeof(struct user) - 3)
+               return -EIO;
+
+       tmp = __peek_user_compat(child, addr);
        return put_user(tmp, (__u32 __user *) data);
 }
 
 /*
  * Same as poke_user but for a 31 bit program.
  */
-static int
-poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
+static int __poke_user_compat(struct task_struct *child,
+                             addr_t addr, addr_t data)
 {
        struct user32 *dummy32 = NULL;
        per_struct32 *dummy_per32 = NULL;
+       __u32 tmp = (__u32) data;
        addr_t offset;
-       __u32 tmp;
-
-       if (!test_thread_flag(TIF_31BIT) ||
-           (addr & 3) || addr > sizeof(struct user32) - 3)
-               return -EIO;
-
-       tmp = (__u32) data;
 
        if (addr < (addr_t) &dummy32->regs.acrs) {
                /*
@@ -528,6 +554,16 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
        return 0;
 }
 
+static int poke_user_compat(struct task_struct *child,
+                           addr_t addr, addr_t data)
+{
+       if (!test_thread_flag(TIF_31BIT) ||
+           (addr & 3) || addr > sizeof(struct user32) - 3)
+               return -EIO;
+
+       return __poke_user_compat(child, addr, data);
+}
+
 long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                        compat_ulong_t caddr, compat_ulong_t cdata)
 {
@@ -539,11 +575,11 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
        switch (request) {
        case PTRACE_PEEKUSR:
                /* read the word at location addr in the USER area. */
-               return peek_user_emu31(child, addr, data);
+               return peek_user_compat(child, addr, data);
 
        case PTRACE_POKEUSR:
                /* write the word at location addr in the USER area */
-               return poke_user_emu31(child, addr, data);
+               return poke_user_compat(child, addr, data);
 
        case PTRACE_PEEKUSR_AREA:
        case PTRACE_POKEUSR_AREA:
@@ -555,13 +591,13 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
                copied = 0;
                while (copied < parea.len) {
                        if (request == PTRACE_PEEKUSR_AREA)
-                               ret = peek_user_emu31(child, addr, data);
+                               ret = peek_user_compat(child, addr, data);
                        else {
                                __u32 utmp;
                                if (get_user(utmp,
                                             (__u32 __force __user *) data))
                                        return -EFAULT;
-                               ret = poke_user_emu31(child, addr, utmp);
+                               ret = poke_user_compat(child, addr, utmp);
                        }
                        if (ret)
                                return ret;
@@ -610,3 +646,240 @@ syscall_trace(struct pt_regs *regs, int entryexit)
                                    regs->gprs[2], regs->orig_gpr2, regs->gprs[3],
                                    regs->gprs[4], regs->gprs[5]);
 }
+
+/*
+ * user_regset definitions.
+ */
+
+static int s390_regs_get(struct task_struct *target,
+                        const struct user_regset *regset,
+                        unsigned int pos, unsigned int count,
+                        void *kbuf, void __user *ubuf)
+{
+       if (target == current)
+               save_access_regs(target->thread.acrs);
+
+       if (kbuf) {
+               unsigned long *k = kbuf;
+               while (count > 0) {
+                       *k++ = __peek_user(target, pos);
+                       count -= sizeof(*k);
+                       pos += sizeof(*k);
+               }
+       } else {
+               unsigned long __user *u = ubuf;
+               while (count > 0) {
+                       if (__put_user(__peek_user(target, pos), u++))
+                               return -EFAULT;
+                       count -= sizeof(*u);
+                       pos += sizeof(*u);
+               }
+       }
+       return 0;
+}
+
+static int s390_regs_set(struct task_struct *target,
+                        const struct user_regset *regset,
+                        unsigned int pos, unsigned int count,
+                        const void *kbuf, const void __user *ubuf)
+{
+       int rc = 0;
+
+       if (target == current)
+               save_access_regs(target->thread.acrs);
+
+       if (kbuf) {
+               const unsigned long *k = kbuf;
+               while (count > 0 && !rc) {
+                       rc = __poke_user(target, pos, *k++);
+                       count -= sizeof(*k);
+                       pos += sizeof(*k);
+               }
+       } else {
+               const unsigned long  __user *u = ubuf;
+               while (count > 0 && !rc) {
+                       unsigned long word;
+                       rc = __get_user(word, u++);
+                       if (rc)
+                               break;
+                       rc = __poke_user(target, pos, word);
+                       count -= sizeof(*u);
+                       pos += sizeof(*u);
+               }
+       }
+
+       if (rc == 0 && target == current)
+               restore_access_regs(target->thread.acrs);
+
+       return rc;
+}
+
+static int s390_fpregs_get(struct task_struct *target,
+                          const struct user_regset *regset, unsigned int pos,
+                          unsigned int count, void *kbuf, void __user *ubuf)
+{
+       if (target == current)
+               save_fp_regs(&target->thread.fp_regs);
+
+       return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+                                  &target->thread.fp_regs, 0, -1);
+}
+
+static int s390_fpregs_set(struct task_struct *target,
+                          const struct user_regset *regset, unsigned int pos,
+                          unsigned int count, const void *kbuf,
+                          const void __user *ubuf)
+{
+       int rc = 0;
+
+       if (target == current)
+               save_fp_regs(&target->thread.fp_regs);
+
+       /* If setting FPC, must validate it first. */
+       if (count > 0 && pos < offsetof(s390_fp_regs, fprs)) {
+               u32 fpc[2] = { target->thread.fp_regs.fpc, 0 };
+               rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpc,
+                                       0, offsetof(s390_fp_regs, fprs));
+               if (rc)
+                       return rc;
+               if ((fpc[0] & ~FPC_VALID_MASK) != 0 || fpc[1] != 0)
+                       return -EINVAL;
+               target->thread.fp_regs.fpc = fpc[0];
+       }
+
+       if (rc == 0 && count > 0)
+               rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                       target->thread.fp_regs.fprs,
+                                       offsetof(s390_fp_regs, fprs), -1);
+
+       if (rc == 0 && target == current)
+               restore_fp_regs(&target->thread.fp_regs);
+
+       return rc;
+}
+
+static const struct user_regset s390_regsets[] = {
+       [REGSET_GENERAL] = {
+               .core_note_type = NT_PRSTATUS,
+               .n = sizeof(s390_regs) / sizeof(long),
+               .size = sizeof(long),
+               .align = sizeof(long),
+               .get = s390_regs_get,
+               .set = s390_regs_set,
+       },
+       [REGSET_FP] = {
+               .core_note_type = NT_PRFPREG,
+               .n = sizeof(s390_fp_regs) / sizeof(long),
+               .size = sizeof(long),
+               .align = sizeof(long),
+               .get = s390_fpregs_get,
+               .set = s390_fpregs_set,
+       },
+};
+
+static const struct user_regset_view user_s390_view = {
+       .name = UTS_MACHINE,
+       .e_machine = EM_S390,
+       .regsets = s390_regsets,
+       .n = ARRAY_SIZE(s390_regsets)
+};
+
+#ifdef CONFIG_COMPAT
+static int s390_compat_regs_get(struct task_struct *target,
+                               const struct user_regset *regset,
+                               unsigned int pos, unsigned int count,
+                               void *kbuf, void __user *ubuf)
+{
+       if (target == current)
+               save_access_regs(target->thread.acrs);
+
+       if (kbuf) {
+               compat_ulong_t *k = kbuf;
+               while (count > 0) {
+                       *k++ = __peek_user_compat(target, pos);
+                       count -= sizeof(*k);
+                       pos += sizeof(*k);
+               }
+       } else {
+               compat_ulong_t __user *u = ubuf;
+               while (count > 0) {
+                       if (__put_user(__peek_user_compat(target, pos), u++))
+                               return -EFAULT;
+                       count -= sizeof(*u);
+                       pos += sizeof(*u);
+               }
+       }
+       return 0;
+}
+
+static int s390_compat_regs_set(struct task_struct *target,
+                               const struct user_regset *regset,
+                               unsigned int pos, unsigned int count,
+                               const void *kbuf, const void __user *ubuf)
+{
+       int rc = 0;
+
+       if (target == current)
+               save_access_regs(target->thread.acrs);
+
+       if (kbuf) {
+               const compat_ulong_t *k = kbuf;
+               while (count > 0 && !rc) {
+                       rc = __poke_user_compat(target, pos, *k++);
+                       count -= sizeof(*k);
+                       pos += sizeof(*k);
+               }
+       } else {
+               const compat_ulong_t  __user *u = ubuf;
+               while (count > 0 && !rc) {
+                       compat_ulong_t word;
+                       rc = __get_user(word, u++);
+                       if (rc)
+                               break;
+                       rc = __poke_user_compat(target, pos, word);
+                       count -= sizeof(*u);
+                       pos += sizeof(*u);
+               }
+       }
+
+       if (rc == 0 && target == current)
+               restore_access_regs(target->thread.acrs);
+
+       return rc;
+}
+
+static const struct user_regset s390_compat_regsets[] = {
+       [REGSET_GENERAL] = {
+               .core_note_type = NT_PRSTATUS,
+               .n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
+               .size = sizeof(compat_long_t),
+               .align = sizeof(compat_long_t),
+               .get = s390_compat_regs_get,
+               .set = s390_compat_regs_set,
+       },
+       [REGSET_FP] = {
+               .core_note_type = NT_PRFPREG,
+               .n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
+               .size = sizeof(compat_long_t),
+               .align = sizeof(compat_long_t),
+               .get = s390_fpregs_get,
+               .set = s390_fpregs_set,
+       },
+};
+
+static const struct user_regset_view user_s390_compat_view = {
+       .name = "s390",
+       .e_machine = EM_S390,
+       .regsets = s390_compat_regsets,
+       .n = ARRAY_SIZE(s390_compat_regsets)
+};
+#endif
+
+const struct user_regset_view *task_user_regset_view(struct task_struct *task)
+{
+#ifdef CONFIG_COMPAT
+       if (test_tsk_thread_flag(task, TIF_31BIT))
+               return &user_s390_compat_view;
+#endif
+       return &user_s390_view;
+}
index 2bc70b6..b358e18 100644 (file)
@@ -77,7 +77,7 @@ unsigned long machine_flags;
 unsigned long elf_hwcap = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
-struct mem_chunk __meminitdata memory_chunk[MEMORY_CHUNKS];
+struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
 volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
 static unsigned long __initdata memory_end;
 
@@ -205,12 +205,6 @@ static void __init conmode_default(void)
                        SET_CONSOLE_SCLP;
 #endif
                }
-        } else if (MACHINE_IS_P390) {
-#if defined(CONFIG_TN3215_CONSOLE)
-               SET_CONSOLE_3215;
-#elif defined(CONFIG_TN3270_CONSOLE)
-               SET_CONSOLE_3270;
-#endif
        } else {
 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
                SET_CONSOLE_SCLP;
@@ -221,18 +215,17 @@ static void __init conmode_default(void)
 #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
 static void __init setup_zfcpdump(unsigned int console_devno)
 {
-       static char str[64];
+       static char str[41];
 
        if (ipl_info.type != IPL_TYPE_FCP_DUMP)
                return;
        if (console_devno != -1)
-               sprintf(str, "cio_ignore=all,!0.0.%04x,!0.0.%04x",
+               sprintf(str, " cio_ignore=all,!0.0.%04x,!0.0.%04x",
                        ipl_info.data.fcp.dev_id.devno, console_devno);
        else
-               sprintf(str, "cio_ignore=all,!0.0.%04x",
+               sprintf(str, " cio_ignore=all,!0.0.%04x",
                        ipl_info.data.fcp.dev_id.devno);
-       strcat(COMMAND_LINE, " ");
-       strcat(COMMAND_LINE, str);
+       strcat(boot_command_line, str);
        console_loglevel = 2;
 }
 #else
@@ -289,32 +282,6 @@ static int __init early_parse_mem(char *p)
 }
 early_param("mem", early_parse_mem);
 
-/*
- * "ipldelay=XXX[sm]" sets ipl delay in seconds or minutes
- */
-static int __init early_parse_ipldelay(char *p)
-{
-       unsigned long delay = 0;
-
-       delay = simple_strtoul(p, &p, 0);
-
-       switch (*p) {
-       case 's':
-       case 'S':
-               delay *= 1000000;
-               break;
-       case 'm':
-       case 'M':
-               delay *= 60 * 1000000;
-       }
-
-       /* now wait for the requested amount of time */
-       udelay(delay);
-
-       return 0;
-}
-early_param("ipldelay", early_parse_ipldelay);
-
 #ifdef CONFIG_S390_SWITCH_AMODE
 #ifdef CONFIG_PGSTE
 unsigned int switch_amode = 1;
@@ -804,11 +771,9 @@ setup_arch(char **cmdline_p)
                printk("We are running native (64 bit mode)\n");
 #endif /* CONFIG_64BIT */
 
-       /* Save unparsed command line copy for /proc/cmdline */
-       strlcpy(boot_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
-
-       *cmdline_p = COMMAND_LINE;
-       *(*cmdline_p + COMMAND_LINE_SIZE - 1) = '\0';
+       /* Have one command line that is parsed and saved in /proc/cmdline */
+       /* boot_command_line has been already set up in early.c */
+       *cmdline_p = boot_command_line;
 
         ROOT_DEV = Root_RAM0;
 
index 7aec676..7418beb 100644 (file)
@@ -3,7 +3,7 @@
  *    Time of day based timer functions.
  *
  *  S390 version
- *    Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
+ *    Copyright IBM Corp. 1999, 2008
  *    Author(s): Hartmut Penner (hp@de.ibm.com),
  *               Martin Schwidefsky (schwidefsky@de.ibm.com),
  *               Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
@@ -31,6 +31,7 @@
 #include <linux/notifier.h>
 #include <linux/clocksource.h>
 #include <linux/clockchips.h>
+#include <linux/bootmem.h>
 #include <asm/uaccess.h>
 #include <asm/delay.h>
 #include <asm/s390_ext.h>
@@ -162,7 +163,7 @@ void init_cpu_timer(void)
        /* Enable clock comparator timer interrupt. */
        __ctl_set_bit(0,11);
 
-       /* Always allow ETR external interrupts, even without an ETR. */
+       /* Always allow the timing alert external interrupt. */
        __ctl_set_bit(0, 4);
 }
 
@@ -170,8 +171,21 @@ static void clock_comparator_interrupt(__u16 code)
 {
 }
 
+static void etr_timing_alert(struct etr_irq_parm *);
+static void stp_timing_alert(struct stp_irq_parm *);
+
+static void timing_alert_interrupt(__u16 code)
+{
+       if (S390_lowcore.ext_params & 0x00c40000)
+               etr_timing_alert((struct etr_irq_parm *)
+                                &S390_lowcore.ext_params);
+       if (S390_lowcore.ext_params & 0x00038000)
+               stp_timing_alert((struct stp_irq_parm *)
+                                &S390_lowcore.ext_params);
+}
+
 static void etr_reset(void);
-static void etr_ext_handler(__u16);
+static void stp_reset(void);
 
 /*
  * Get the TOD clock running.
@@ -181,6 +195,7 @@ static u64 __init reset_tod_clock(void)
        u64 time;
 
        etr_reset();
+       stp_reset();
        if (store_clock(&time) == 0)
                return time;
        /* TOD clock not running. Set the clock to Unix Epoch. */
@@ -231,8 +246,9 @@ void __init time_init(void)
        if (clocksource_register(&clocksource_tod) != 0)
                panic("Could not register TOD clock source");
 
-       /* request the etr external interrupt */
-       if (register_early_external_interrupt(0x1406, etr_ext_handler,
+       /* request the timing alert external interrupt */
+       if (register_early_external_interrupt(0x1406,
+                                             timing_alert_interrupt,
                                              &ext_int_etr_cc) != 0)
                panic("Couldn't request external interrupt 0x1406");
 
@@ -244,11 +260,113 @@ void __init time_init(void)
 #endif
 }
 
+/*
+ * The time is "clock". old is what we think the time is.
+ * Adjust the value by a multiple of jiffies and add the delta to ntp.
+ * "delay" is an approximation how long the synchronization took. If
+ * the time correction is positive, then "delay" is subtracted from
+ * the time difference and only the remaining part is passed to ntp.
+ */
+static unsigned long long adjust_time(unsigned long long old,
+                                     unsigned long long clock,
+                                     unsigned long long delay)
+{
+       unsigned long long delta, ticks;
+       struct timex adjust;
+
+       if (clock > old) {
+               /* It is later than we thought. */
+               delta = ticks = clock - old;
+               delta = ticks = (delta < delay) ? 0 : delta - delay;
+               delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+               adjust.offset = ticks * (1000000 / HZ);
+       } else {
+               /* It is earlier than we thought. */
+               delta = ticks = old - clock;
+               delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
+               delta = -delta;
+               adjust.offset = -ticks * (1000000 / HZ);
+       }
+       jiffies_timer_cc += delta;
+       if (adjust.offset != 0) {
+               printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
+                      adjust.offset);
+               adjust.modes = ADJ_OFFSET_SINGLESHOT;
+               do_adjtimex(&adjust);
+       }
+       return delta;
+}
+
+static DEFINE_PER_CPU(atomic_t, clock_sync_word);
+static unsigned long clock_sync_flags;
+
+#define CLOCK_SYNC_HAS_ETR     0
+#define CLOCK_SYNC_HAS_STP     1
+#define CLOCK_SYNC_ETR         2
+#define CLOCK_SYNC_STP         3
+
+/*
+ * The synchronous get_clock function. It will write the current clock
+ * value to the clock pointer and return 0 if the clock is in sync with
+ * the external time source. If the clock mode is local it will return
+ * -ENOSYS and -EAGAIN if the clock is not in sync with the external
+ * reference.
+ */
+int get_sync_clock(unsigned long long *clock)
+{
+       atomic_t *sw_ptr;
+       unsigned int sw0, sw1;
+
+       sw_ptr = &get_cpu_var(clock_sync_word);
+       sw0 = atomic_read(sw_ptr);
+       *clock = get_clock();
+       sw1 = atomic_read(sw_ptr);
+       put_cpu_var(clock_sync_sync);
+       if (sw0 == sw1 && (sw0 & 0x80000000U))
+               /* Success: time is in sync. */
+               return 0;
+       if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
+           !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+               return -ENOSYS;
+       if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
+           !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+               return -EACCES;
+       return -EAGAIN;
+}
+EXPORT_SYMBOL(get_sync_clock);
+
+/*
+ * Make get_sync_clock return -EAGAIN.
+ */
+static void disable_sync_clock(void *dummy)
+{
+       atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+       /*
+        * Clear the in-sync bit 2^31. All get_sync_clock calls will
+        * fail until the sync bit is turned back on. In addition
+        * increase the "sequence" counter to avoid the race of an
+        * etr event and the complete recovery against get_sync_clock.
+        */
+       atomic_clear_mask(0x80000000, sw_ptr);
+       atomic_inc(sw_ptr);
+}
+
+/*
+ * Make get_sync_clock return 0 again.
+ * Needs to be called from a context disabled for preemption.
+ */
+static void enable_sync_clock(void)
+{
+       atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+       atomic_set_mask(0x80000000, sw_ptr);
+}
+
 /*
  * External Time Reference (ETR) code.
  */
 static int etr_port0_online;
 static int etr_port1_online;
+static int etr_steai_available;
 
 static int __init early_parse_etr(char *p)
 {
@@ -273,12 +391,6 @@ enum etr_event {
        ETR_EVENT_UPDATE,
 };
 
-enum etr_flags {
-       ETR_FLAG_ENOSYS,
-       ETR_FLAG_EACCES,
-       ETR_FLAG_STEAI,
-};
-
 /*
  * Valid bit combinations of the eacr register are (x = don't care):
  * e0 e1 dp p0 p1 ea es sl
@@ -305,73 +417,17 @@ enum etr_flags {
  */
 static struct etr_eacr etr_eacr;
 static u64 etr_tolec;                  /* time of last eacr update */
-static unsigned long etr_flags;
 static struct etr_aib etr_port0;
 static int etr_port0_uptodate;
 static struct etr_aib etr_port1;
 static int etr_port1_uptodate;
 static unsigned long etr_events;
 static struct timer_list etr_timer;
-static DEFINE_PER_CPU(atomic_t, etr_sync_word);
 
 static void etr_timeout(unsigned long dummy);
 static void etr_work_fn(struct work_struct *work);
 static DECLARE_WORK(etr_work, etr_work_fn);
 
-/*
- * The etr get_clock function. It will write the current clock value
- * to the clock pointer and return 0 if the clock is in sync with the
- * external time source. If the clock mode is local it will return
- * -ENOSYS and -EAGAIN if the clock is not in sync with the external
- * reference. This function is what ETR is all about..
- */
-int get_sync_clock(unsigned long long *clock)
-{
-       atomic_t *sw_ptr;
-       unsigned int sw0, sw1;
-
-       sw_ptr = &get_cpu_var(etr_sync_word);
-       sw0 = atomic_read(sw_ptr);
-       *clock = get_clock();
-       sw1 = atomic_read(sw_ptr);
-       put_cpu_var(etr_sync_sync);
-       if (sw0 == sw1 && (sw0 & 0x80000000U))
-               /* Success: time is in sync. */
-               return 0;
-       if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
-               return -ENOSYS;
-       if (test_bit(ETR_FLAG_EACCES, &etr_flags))
-               return -EACCES;
-       return -EAGAIN;
-}
-EXPORT_SYMBOL(get_sync_clock);
-
-/*
- * Make get_sync_clock return -EAGAIN.
- */
-static void etr_disable_sync_clock(void *dummy)
-{
-       atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word);
-       /*
-        * Clear the in-sync bit 2^31. All get_sync_clock calls will
-        * fail until the sync bit is turned back on. In addition
-        * increase the "sequence" counter to avoid the race of an
-        * etr event and the complete recovery against get_sync_clock.
-        */
-       atomic_clear_mask(0x80000000, sw_ptr);
-       atomic_inc(sw_ptr);
-}
-
-/*
- * Make get_sync_clock return 0 again.
- * Needs to be called from a context disabled for preemption.
- */
-static void etr_enable_sync_clock(void)
-{
-       atomic_t *sw_ptr = &__get_cpu_var(etr_sync_word);
-       atomic_set_mask(0x80000000, sw_ptr);
-}
-
 /*
  * Reset ETR attachment.
  */
@@ -381,15 +437,13 @@ static void etr_reset(void)
                .e0 = 0, .e1 = 0, ._pad0 = 4, .dp = 0,
                .p0 = 0, .p1 = 0, ._pad1 = 0, .ea = 0,
                .es = 0, .sl = 0 };
-       if (etr_setr(&etr_eacr) == 0)
+       if (etr_setr(&etr_eacr) == 0) {
                etr_tolec = get_clock();
-       else {
-               set_bit(ETR_FLAG_ENOSYS, &etr_flags);
-               if (etr_port0_online || etr_port1_online) {
-                       printk(KERN_WARNING "Running on non ETR capable "
-                              "machine, only local mode available.\n");
-                       etr_port0_online = etr_port1_online = 0;
-               }
+               set_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags);
+       } else if (etr_port0_online || etr_port1_online) {
+               printk(KERN_WARNING "Running on non ETR capable "
+                      "machine, only local mode available.\n");
+               etr_port0_online = etr_port1_online = 0;
        }
 }
 
@@ -397,14 +451,12 @@ static int __init etr_init(void)
 {
        struct etr_aib aib;
 
-       if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
+       if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
                return 0;
        /* Check if this machine has the steai instruction. */
        if (etr_steai(&aib, ETR_STEAI_STEPPING_PORT) == 0)
-               set_bit(ETR_FLAG_STEAI, &etr_flags);
+               etr_steai_available = 1;
        setup_timer(&etr_timer, etr_timeout, 0UL);
-       if (!etr_port0_online && !etr_port1_online)
-               set_bit(ETR_FLAG_EACCES, &etr_flags);
        if (etr_port0_online) {
                set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
                schedule_work(&etr_work);
@@ -435,7 +487,8 @@ void etr_switch_to_local(void)
 {
        if (!etr_eacr.sl)
                return;
-       etr_disable_sync_clock(NULL);
+       if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+               disable_sync_clock(NULL);
        set_bit(ETR_EVENT_SWITCH_LOCAL, &etr_events);
        schedule_work(&etr_work);
 }
@@ -450,23 +503,21 @@ void etr_sync_check(void)
 {
        if (!etr_eacr.es)
                return;
-       etr_disable_sync_clock(NULL);
+       if (test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+               disable_sync_clock(NULL);
        set_bit(ETR_EVENT_SYNC_CHECK, &etr_events);
        schedule_work(&etr_work);
 }
 
 /*
- * ETR external interrupt. There are two causes:
+ * ETR timing alert. There are two causes:
  * 1) port state change, check the usability of the port
  * 2) port alert, one of the ETR-data-validity bits (v1-v2 bits of the
  *    sldr-status word) or ETR-data word 1 (edf1) or ETR-data word 3 (edf3)
  *    or ETR-data word 4 (edf4) has changed.
  */
-static void etr_ext_handler(__u16 code)
+static void etr_timing_alert(struct etr_irq_parm *intparm)
 {
-       struct etr_interruption_parameter *intparm =
-               (struct etr_interruption_parameter *) &S390_lowcore.ext_params;
-
        if (intparm->pc0)
                /* ETR port 0 state change. */
                set_bit(ETR_EVENT_PORT0_CHANGE, &etr_events);
@@ -591,58 +642,23 @@ static int etr_aib_follows(struct etr_aib *a1, struct etr_aib *a2, int p)
        return 1;
 }
 
-/*
- * The time is "clock". old is what we think the time is.
- * Adjust the value by a multiple of jiffies and add the delta to ntp.
- * "delay" is an approximation how long the synchronization took. If
- * the time correction is positive, then "delay" is subtracted from
- * the time difference and only the remaining part is passed to ntp.
- */
-static unsigned long long etr_adjust_time(unsigned long long old,
-                                         unsigned long long clock,
-                                         unsigned long long delay)
-{
-       unsigned long long delta, ticks;
-       struct timex adjust;
-
-       if (clock > old) {
-               /* It is later than we thought. */
-               delta = ticks = clock - old;
-               delta = ticks = (delta < delay) ? 0 : delta - delay;
-               delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-               adjust.offset = ticks * (1000000 / HZ);
-       } else {
-               /* It is earlier than we thought. */
-               delta = ticks = old - clock;
-               delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-               delta = -delta;
-               adjust.offset = -ticks * (1000000 / HZ);
-       }
-       jiffies_timer_cc += delta;
-       if (adjust.offset != 0) {
-               printk(KERN_NOTICE "etr: time adjusted by %li micro-seconds\n",
-                      adjust.offset);
-               adjust.modes = ADJ_OFFSET_SINGLESHOT;
-               do_adjtimex(&adjust);
-       }
-       return delta;
-}
-
-static struct {
+struct clock_sync_data {
        int in_sync;
        unsigned long long fixup_cc;
-} etr_sync;
+};
 
-static void etr_sync_cpu_start(void *dummy)
+static void clock_sync_cpu_start(void *dummy)
 {
-       etr_enable_sync_clock();
+       struct clock_sync_data *sync = dummy;
+
+       enable_sync_clock();
        /*
         * This looks like a busy wait loop but it isn't. etr_sync_cpus
         * is called on all other cpus while the TOD clocks is stopped.
         * __udelay will stop the cpu on an enabled wait psw until the
         * TOD is running again.
         */
-       while (etr_sync.in_sync == 0) {
+       while (sync->in_sync == 0) {
                __udelay(1);
                /*
                 * A different cpu changes *in_sync. Therefore use
@@ -650,17 +666,17 @@ static void etr_sync_cpu_start(void *dummy)
                 */
                barrier();
        }
-       if (etr_sync.in_sync != 1)
+       if (sync->in_sync != 1)
                /* Didn't work. Clear per-cpu in sync bit again. */
-               etr_disable_sync_clock(NULL);
+               disable_sync_clock(NULL);
        /*
         * This round of TOD syncing is done. Set the clock comparator
         * to the next tick and let the processor continue.
         */
-       fixup_clock_comparator(etr_sync.fixup_cc);
+       fixup_clock_comparator(sync->fixup_cc);
 }
 
-static void etr_sync_cpu_end(void *dummy)
+static void clock_sync_cpu_end(void *dummy)
 {
 }
 
@@ -672,6 +688,7 @@ static void etr_sync_cpu_end(void *dummy)
 static int etr_sync_clock(struct etr_aib *aib, int port)
 {
        struct etr_aib *sync_port;
+       struct clock_sync_data etr_sync;
        unsigned long long clock, old_clock, delay, delta;
        int follows;
        int rc;
@@ -690,9 +707,9 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
         */
        memset(&etr_sync, 0, sizeof(etr_sync));
        preempt_disable();
-       smp_call_function(etr_sync_cpu_start, NULL, 0, 0);
+       smp_call_function(clock_sync_cpu_start, &etr_sync, 0, 0);
        local_irq_disable();
-       etr_enable_sync_clock();
+       enable_sync_clock();
 
        /* Set clock to next OTE. */
        __ctl_set_bit(14, 21);
@@ -707,13 +724,13 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
                /* Adjust Linux timing variables. */
                delay = (unsigned long long)
                        (aib->edf2.etv - sync_port->edf2.etv) << 32;
-               delta = etr_adjust_time(old_clock, clock, delay);
+               delta = adjust_time(old_clock, clock, delay);
                etr_sync.fixup_cc = delta;
                fixup_clock_comparator(delta);
                /* Verify that the clock is properly set. */
                if (!etr_aib_follows(sync_port, aib, port)) {
                        /* Didn't work. */
-                       etr_disable_sync_clock(NULL);
+                       disable_sync_clock(NULL);
                        etr_sync.in_sync = -EAGAIN;
                        rc = -EAGAIN;
                } else {
@@ -724,12 +741,12 @@ static int etr_sync_clock(struct etr_aib *aib, int port)
                /* Could not set the clock ?!? */
                __ctl_clear_bit(0, 29);
                __ctl_clear_bit(14, 21);
-               etr_disable_sync_clock(NULL);
+               disable_sync_clock(NULL);
                etr_sync.in_sync = -EAGAIN;
                rc = -EAGAIN;
        }
        local_irq_enable();
-       smp_call_function(etr_sync_cpu_end,NULL,0,0);
+       smp_call_function(clock_sync_cpu_end, NULL, 0, 0);
        preempt_enable();
        return rc;
 }
@@ -832,7 +849,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib,
         * Do not try to get the alternate port aib if the clock
         * is not in sync yet.
         */
-       if (!eacr.es)
+       if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags) && !eacr.es)
                return eacr;
 
        /*
@@ -840,7 +857,7 @@ static struct etr_eacr etr_handle_update(struct etr_aib *aib,
         * the other port immediately. If only stetr is available the
         * data-port bit toggle has to be used.
         */
-       if (test_bit(ETR_FLAG_STEAI, &etr_flags)) {
+       if (etr_steai_available) {
                if (eacr.p0 && !etr_port0_uptodate) {
                        etr_steai_cv(&etr_port0, ETR_STEAI_PORT_0);
                        etr_port0_uptodate = 1;
@@ -909,10 +926,10 @@ static void etr_work_fn(struct work_struct *work)
        if (!eacr.ea) {
                /* Both ports offline. Reset everything. */
                eacr.dp = eacr.es = eacr.sl = 0;
-               on_each_cpu(etr_disable_sync_clock, NULL, 0, 1);
+               on_each_cpu(disable_sync_clock, NULL, 0, 1);
                del_timer_sync(&etr_timer);
                etr_update_eacr(eacr);
-               set_bit(ETR_FLAG_EACCES, &etr_flags);
+               clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
                return;
        }
 
@@ -953,7 +970,6 @@ static void etr_work_fn(struct work_struct *work)
                        eacr.e1 = 1;
                sync_port = (etr_port0_uptodate &&
                             etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-               clear_bit(ETR_FLAG_EACCES, &etr_flags);
        } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_pps_mode) {
                eacr.sl = 0;
                eacr.e0 = 0;
@@ -962,7 +978,6 @@ static void etr_work_fn(struct work_struct *work)
                        eacr.es = 0;
                sync_port = (etr_port1_uptodate &&
                             etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-               clear_bit(ETR_FLAG_EACCES, &etr_flags);
        } else if (eacr.p0 && aib.esw.psc0 == etr_lpsc_operational_step) {
                eacr.sl = 1;
                eacr.e0 = 1;
@@ -976,7 +991,6 @@ static void etr_work_fn(struct work_struct *work)
                        eacr.e1 = 1;
                sync_port = (etr_port0_uptodate &&
                             etr_port_valid(&etr_port0, 0)) ? 0 : -1;
-               clear_bit(ETR_FLAG_EACCES, &etr_flags);
        } else if (eacr.p1 && aib.esw.psc1 == etr_lpsc_operational_step) {
                eacr.sl = 1;
                eacr.e0 = 0;
@@ -985,19 +999,22 @@ static void etr_work_fn(struct work_struct *work)
                        eacr.es = 0;
                sync_port = (etr_port1_uptodate &&
                             etr_port_valid(&etr_port1, 1)) ? 1 : -1;
-               clear_bit(ETR_FLAG_EACCES, &etr_flags);
        } else {
                /* Both ports not usable. */
                eacr.es = eacr.sl = 0;
                sync_port = -1;
-               set_bit(ETR_FLAG_EACCES, &etr_flags);
+               clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
        }
 
+       if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+               eacr.es = 0;
+
        /*
         * If the clock is in sync just update the eacr and return.
         * If there is no valid sync port wait for a port update.
         */
-       if (eacr.es || sync_port < 0) {
+       if (test_bit(CLOCK_SYNC_STP, &clock_sync_flags) ||
+           eacr.es || sync_port < 0) {
                etr_update_eacr(eacr);
                etr_set_tolec_timeout(now);
                return;
@@ -1018,11 +1035,13 @@ static void etr_work_fn(struct work_struct *work)
         * and set up a timer to try again after 0.5 seconds
         */
        etr_update_eacr(eacr);
+       set_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
        if (now < etr_tolec + (1600000 << 12) ||
            etr_sync_clock(&aib, sync_port) != 0) {
                /* Sync failed. Try again in 1/2 second. */
                eacr.es = 0;
                etr_update_eacr(eacr);
+               clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags);
                etr_set_sync_timeout();
        } else
                etr_set_tolec_timeout(now);
@@ -1097,8 +1116,8 @@ static ssize_t etr_online_store(struct sys_device *dev,
        value = simple_strtoul(buf, NULL, 0);
        if (value != 0 && value != 1)
                return -EINVAL;
-       if (test_bit(ETR_FLAG_ENOSYS, &etr_flags))
-               return -ENOSYS;
+       if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags))
+               return -EOPNOTSUPP;
        if (dev == &etr_port0_dev) {
                if (etr_port0_online == value)
                        return count;   /* Nothing to do. */
@@ -1292,3 +1311,318 @@ out:
 }
 
 device_initcall(etr_init_sysfs);
+
+/*
+ * Server Time Protocol (STP) code.
+ */
+static int stp_online;
+static struct stp_sstpi stp_info;
+static void *stp_page;
+
+static void stp_work_fn(struct work_struct *work);
+static DECLARE_WORK(stp_work, stp_work_fn);
+
+static int __init early_parse_stp(char *p)
+{
+       if (strncmp(p, "off", 3) == 0)
+               stp_online = 0;
+       else if (strncmp(p, "on", 2) == 0)
+               stp_online = 1;
+       return 0;
+}
+early_param("stp", early_parse_stp);
+
+/*
+ * Reset STP attachment.
+ */
+static void stp_reset(void)
+{
+       int rc;
+
+       stp_page = alloc_bootmem_pages(PAGE_SIZE);
+       rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+       if (rc == 1)
+               set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags);
+       else if (stp_online) {
+               printk(KERN_WARNING "Running on non STP capable machine.\n");
+               free_bootmem((unsigned long) stp_page, PAGE_SIZE);
+               stp_page = NULL;
+               stp_online = 0;
+       }
+}
+
+static int __init stp_init(void)
+{
+       if (test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags) && stp_online)
+               schedule_work(&stp_work);
+       return 0;
+}
+
+arch_initcall(stp_init);
+
+/*
+ * STP timing alert. There are three causes:
+ * 1) timing status change
+ * 2) link availability change
+ * 3) time control parameter change
+ * In all three cases we are only interested in the clock source state.
+ * If a STP clock source is now available use it.
+ */
+static void stp_timing_alert(struct stp_irq_parm *intparm)
+{
+       if (intparm->tsc || intparm->lac || intparm->tcpc)
+               schedule_work(&stp_work);
+}
+
+/*
+ * STP sync check machine check. This is called when the timing state
+ * changes from the synchronized state to the unsynchronized state.
+ * After a STP sync check the clock is not in sync. The machine check
+ * is broadcasted to all cpus at the same time.
+ */
+void stp_sync_check(void)
+{
+       if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+               return;
+       disable_sync_clock(NULL);
+       schedule_work(&stp_work);
+}
+
+/*
+ * STP island condition machine check. This is called when an attached
+ * server  attempts to communicate over an STP link and the servers
+ * have matching CTN ids and have a valid stratum-1 configuration
+ * but the configurations do not match.
+ */
+void stp_island_check(void)
+{
+       if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+               return;
+       disable_sync_clock(NULL);
+       schedule_work(&stp_work);
+}
+
+/*
+ * STP tasklet. Check for the STP state and take over the clock
+ * synchronization if the STP clock source is usable.
+ */
+static void stp_work_fn(struct work_struct *work)
+{
+       struct clock_sync_data stp_sync;
+       unsigned long long old_clock, delta;
+       int rc;
+
+       if (!stp_online) {
+               chsc_sstpc(stp_page, STP_OP_CTRL, 0x0000);
+               return;
+       }
+
+       rc = chsc_sstpc(stp_page, STP_OP_CTRL, 0xb0e0);
+       if (rc)
+               return;
+
+       rc = chsc_sstpi(stp_page, &stp_info, sizeof(struct stp_sstpi));
+       if (rc || stp_info.c == 0)
+               return;
+
+       /*
+        * Catch all other cpus and make them wait until we have
+        * successfully synced the clock. smp_call_function will
+        * return after all other cpus are in clock_sync_cpu_start.
+        */
+       memset(&stp_sync, 0, sizeof(stp_sync));
+       preempt_disable();
+       smp_call_function(clock_sync_cpu_start, &stp_sync, 0, 0);
+       local_irq_disable();
+       enable_sync_clock();
+
+       set_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+       if (test_and_clear_bit(CLOCK_SYNC_ETR, &clock_sync_flags))
+               schedule_work(&etr_work);
+
+       rc = 0;
+       if (stp_info.todoff[0] || stp_info.todoff[1] ||
+           stp_info.todoff[2] || stp_info.todoff[3] ||
+           stp_info.tmd != 2) {
+               old_clock = get_clock();
+               rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0);
+               if (rc == 0) {
+                       delta = adjust_time(old_clock, get_clock(), 0);
+                       fixup_clock_comparator(delta);
+                       rc = chsc_sstpi(stp_page, &stp_info,
+                                       sizeof(struct stp_sstpi));
+                       if (rc == 0 && stp_info.tmd != 2)
+                               rc = -EAGAIN;
+               }
+       }
+       if (rc) {
+               disable_sync_clock(NULL);
+               stp_sync.in_sync = -EAGAIN;
+               clear_bit(CLOCK_SYNC_STP, &clock_sync_flags);
+               if (etr_port0_online || etr_port1_online)
+                       schedule_work(&etr_work);
+       } else
+               stp_sync.in_sync = 1;
+
+       local_irq_enable();
+       smp_call_function(clock_sync_cpu_end, NULL, 0, 0);
+       preempt_enable();
+}
+
+/*
+ * STP class sysfs interface functions
+ */
+static struct sysdev_class stp_sysclass = {
+       .name   = "stp",
+};
+
+static ssize_t stp_ctn_id_show(struct sysdev_class *class, char *buf)
+{
+       if (!stp_online)
+               return -ENODATA;
+       return sprintf(buf, "%016llx\n",
+                      *(unsigned long long *) stp_info.ctnid);
+}
+
+static SYSDEV_CLASS_ATTR(ctn_id, 0400, stp_ctn_id_show, NULL);
+
+static ssize_t stp_ctn_type_show(struct sysdev_class *class, char *buf)
+{
+       if (!stp_online)
+               return -ENODATA;
+       return sprintf(buf, "%i\n", stp_info.ctn);
+}
+
+static SYSDEV_CLASS_ATTR(ctn_type, 0400, stp_ctn_type_show, NULL);
+
+static ssize_t stp_dst_offset_show(struct sysdev_class *class, char *buf)
+{
+       if (!stp_online || !(stp_info.vbits & 0x2000))
+               return -ENODATA;
+       return sprintf(buf, "%i\n", (int)(s16) stp_info.dsto);
+}
+
+static SYSDEV_CLASS_ATTR(dst_offset, 0400, stp_dst_offset_show, NULL);
+
+static ssize_t stp_leap_seconds_show(struct sysdev_class *class, char *buf)
+{
+       if (!stp_online || !(stp_info.vbits & 0x8000))
+               return -ENODATA;
+       return sprintf(buf, "%i\n", (int)(s16) stp_info.leaps);
+}
+
+static SYSDEV_CLASS_ATTR(leap_seconds, 0400, stp_leap_seconds_show, NULL);
+
+static ssize_t stp_stratum_show(struct sysdev_class *class, char *buf)
+{
+       if (!stp_online)
+               return -ENODATA;
+       return sprintf(buf, "%i\n", (int)(s16) stp_info.stratum);
+}
+
+static SYSDEV_CLASS_ATTR(stratum, 0400, stp_stratum_show, NULL);
+
+static ssize_t stp_time_offset_show(struct sysdev_class *class, char *buf)
+{
+       if (!stp_online || !(stp_info.vbits & 0x0800))
+               return -ENODATA;
+       return sprintf(buf, "%i\n", (int) stp_info.tto);
+}
+
+static SYSDEV_CLASS_ATTR(time_offset, 0400, stp_time_offset_show, NULL);
+
+static ssize_t stp_time_zone_offset_show(struct sysdev_class *class, char *buf)
+{
+       if (!stp_online || !(stp_info.vbits & 0x4000))
+               return -ENODATA;
+       return sprintf(buf, "%i\n", (int)(s16) stp_info.tzo);
+}
+
+static SYSDEV_CLASS_ATTR(time_zone_offset, 0400,
+                        stp_time_zone_offset_show, NULL);
+
+static ssize_t stp_timing_mode_show(struct sysdev_class *class, char *buf)
+{
+       if (!stp_online)
+               return -ENODATA;
+       return sprintf(buf, "%i\n", stp_info.tmd);
+}
+
+static SYSDEV_CLASS_ATTR(timing_mode, 0400, stp_timing_mode_show, NULL);
+
+static ssize_t stp_timing_state_show(struct sysdev_class *class, char *buf)
+{
+       if (!stp_online)
+               return -ENODATA;
+       return sprintf(buf, "%i\n", stp_info.tst);
+}
+
+static SYSDEV_CLASS_ATTR(timing_state, 0400, stp_timing_state_show, NULL);
+
+static ssize_t stp_online_show(struct sysdev_class *class, char *buf)
+{
+       return sprintf(buf, "%i\n", stp_online);
+}
+
+static ssize_t stp_online_store(struct sysdev_class *class,
+                               const char *buf, size_t count)
+{
+       unsigned int value;
+
+       value = simple_strtoul(buf, NULL, 0);
+       if (value != 0 && value != 1)
+               return -EINVAL;
+       if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+               return -EOPNOTSUPP;
+       stp_online = value;
+       schedule_work(&stp_work);
+       return count;
+}
+
+/*
+ * Can't use SYSDEV_CLASS_ATTR because the attribute should be named
+ * stp/online but attr_online already exists in this file ..
+ */
+static struct sysdev_class_attribute attr_stp_online = {
+       .attr = { .name = "online", .mode = 0600 },
+       .show   = stp_online_show,
+       .store  = stp_online_store,
+};
+
+static struct sysdev_class_attribute *stp_attributes[] = {
+       &attr_ctn_id,
+       &attr_ctn_type,
+       &attr_dst_offset,
+       &attr_leap_seconds,
+       &attr_stp_online,
+       &attr_stratum,
+       &attr_time_offset,
+       &attr_time_zone_offset,
+       &attr_timing_mode,
+       &attr_timing_state,
+       NULL
+};
+
+static int __init stp_init_sysfs(void)
+{
+       struct sysdev_class_attribute **attr;
+       int rc;
+
+       rc = sysdev_class_register(&stp_sysclass);
+       if (rc)
+               goto out;
+       for (attr = stp_attributes; *attr; attr++) {
+               rc = sysdev_class_create_file(&stp_sysclass, *attr);
+               if (rc)
+                       goto out_unreg;
+       }
+       return 0;
+out_unreg:
+       for (; attr >= stp_attributes; attr--)
+               sysdev_class_remove_file(&stp_sysclass, *attr);
+       sysdev_class_unregister(&stp_sysclass);
+out:
+       return rc;
+}
+
+device_initcall(stp_init_sysfs);
index 661a072..212d618 100644 (file)
@@ -313,8 +313,6 @@ void __init s390_init_cpu_topology(void)
                machine_has_topology_irq = 1;
 
        tl_info = alloc_bootmem_pages(PAGE_SIZE);
-       if (!tl_info)
-               goto error;
        info = tl_info;
        stsi(info, 15, 1, 2);
 
index ca90ee3..0fa5dc5 100644 (file)
@@ -136,7 +136,7 @@ static inline void set_vtimer(__u64 expires)
 }
 #endif
 
-static void start_cpu_timer(void)
+void vtime_start_cpu_timer(void)
 {
        struct vtimer_queue *vt_list;
 
@@ -150,7 +150,7 @@ static void start_cpu_timer(void)
                set_vtimer(vt_list->idle);
 }
 
-static void stop_cpu_timer(void)
+void vtime_stop_cpu_timer(void)
 {
        struct vtimer_queue *vt_list;
 
@@ -318,8 +318,7 @@ static void internal_add_vtimer(struct vtimer_list *timer)
        vt_list = &per_cpu(virt_cpu_timer, timer->cpu);
        spin_lock_irqsave(&vt_list->lock, flags);
 
-       if (timer->cpu != smp_processor_id())
-               printk("internal_add_vtimer: BUG, running on wrong CPU");
+       BUG_ON(timer->cpu != smp_processor_id());
 
        /* if list is empty we only have to set the timer */
        if (list_empty(&vt_list->list)) {
@@ -353,25 +352,12 @@ static void internal_add_vtimer(struct vtimer_list *timer)
        put_cpu();
 }
 
-static inline int prepare_vtimer(struct vtimer_list *timer)
+static inline void prepare_vtimer(struct vtimer_list *timer)
 {
-       if (!timer->function) {
-               printk("add_virt_timer: uninitialized timer\n");
-               return -EINVAL;
-       }
-
-       if (!timer->expires || timer->expires > VTIMER_MAX_SLICE) {
-               printk("add_virt_timer: invalid timer expire value!\n");
-               return -EINVAL;
-       }
-
-       if (vtimer_pending(timer)) {
-               printk("add_virt_timer: timer pending\n");
-               return -EBUSY;
-       }
-
+       BUG_ON(!timer->function);
+       BUG_ON(!timer->expires || timer->expires > VTIMER_MAX_SLICE);
+       BUG_ON(vtimer_pending(timer));
        timer->cpu = get_cpu();
-       return 0;
 }
 
 /*
@@ -382,10 +368,7 @@ void add_virt_timer(void *new)
        struct vtimer_list *timer;
 
        timer = (struct vtimer_list *)new;
-
-       if (prepare_vtimer(timer) < 0)
-               return;
-
+       prepare_vtimer(timer);
        timer->interval = 0;
        internal_add_vtimer(timer);
 }
@@ -399,10 +382,7 @@ void add_virt_timer_periodic(void *new)
        struct vtimer_list *timer;
 
        timer = (struct vtimer_list *)new;
-
-       if (prepare_vtimer(timer) < 0)
-               return;
-
+       prepare_vtimer(timer);
        timer->interval = timer->expires;
        internal_add_vtimer(timer);
 }
@@ -423,15 +403,8 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
        unsigned long flags;
        int cpu;
 
-       if (!timer->function) {
-               printk("mod_virt_timer: uninitialized timer\n");
-               return  -EINVAL;
-       }
-
-       if (!expires || expires > VTIMER_MAX_SLICE) {
-               printk("mod_virt_timer: invalid expire range\n");
-               return -EINVAL;
-       }
+       BUG_ON(!timer->function);
+       BUG_ON(!expires || expires > VTIMER_MAX_SLICE);
 
        /*
         * This is a common optimization triggered by the
@@ -444,6 +417,9 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
        cpu = get_cpu();
        vt_list = &per_cpu(virt_cpu_timer, cpu);
 
+       /* check if we run on the right CPU */
+       BUG_ON(timer->cpu != cpu);
+
        /* disable interrupts before test if timer is pending */
        spin_lock_irqsave(&vt_list->lock, flags);
 
@@ -458,14 +434,6 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
                return 0;
        }
 
-       /* check if we run on the right CPU */
-       if (timer->cpu != cpu) {
-               printk("mod_virt_timer: running on wrong CPU, check your code\n");
-               spin_unlock_irqrestore(&vt_list->lock, flags);
-               put_cpu();
-               return -EINVAL;
-       }
-
        list_del_init(&timer->entry);
        timer->expires = expires;
 
@@ -536,24 +504,6 @@ void init_cpu_vtimer(void)
 
 }
 
-static int vtimer_idle_notify(struct notifier_block *self,
-                             unsigned long action, void *hcpu)
-{
-       switch (action) {
-       case S390_CPU_IDLE:
-               stop_cpu_timer();
-               break;
-       case S390_CPU_NOT_IDLE:
-               start_cpu_timer();
-               break;
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block vtimer_idle_nb = {
-       .notifier_call = vtimer_idle_notify,
-};
-
 void __init vtime_init(void)
 {
        /* request the cpu timer external interrupt */
@@ -561,9 +511,6 @@ void __init vtime_init(void)
                                              &ext_int_info_timer) != 0)
                panic("Couldn't request external interrupt 0x1005");
 
-       if (register_idle_notifier(&vtimer_idle_nb))
-               panic("Couldn't register idle notifier");
-
        /* Enable cpu timer interrupts on the boot cpu. */
        init_cpu_vtimer();
 }
index 0559864..388cc74 100644 (file)
@@ -202,3 +202,22 @@ void free_initrd_mem(unsigned long start, unsigned long end)
         }
 }
 #endif
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+int arch_add_memory(int nid, u64 start, u64 size)
+{
+       struct pglist_data *pgdat;
+       struct zone *zone;
+       int rc;
+
+       pgdat = NODE_DATA(nid);
+       zone = pgdat->node_zones + ZONE_NORMAL;
+       rc = vmem_add_mapping(start, size);
+       if (rc)
+               return rc;
+       rc = __add_pages(zone, PFN_DOWN(start), PFN_DOWN(size));
+       if (rc)
+               vmem_remove_mapping(start, size);
+       return rc;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
index adff76e..f1a95d1 100644 (file)
@@ -104,30 +104,7 @@ int kstack_depth_to_print = 12;
 
 void printk_address(unsigned long address, int reliable)
 {
-#ifdef CONFIG_KALLSYMS
-       unsigned long offset = 0, symsize;
-       const char *symname;
-       char *modname;
-       char *delim = ":";
-       char namebuf[KSYM_NAME_LEN];
-       char reliab[4] = "";
-
-       symname = kallsyms_lookup(address, &symsize, &offset,
-                                       &modname, namebuf);
-       if (!symname) {
-               printk(" [<%016lx>]\n", address);
-               return;
-       }
-       if (!reliable)
-               strcpy(reliab, "? ");
-
-       if (!modname)
-               modname = delim = "";
-       printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
-               address, reliab, delim, modname, delim, symname, offset, symsize);
-#else
-       printk(" [<%016lx>]\n", address);
-#endif
+       printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
 }
 
 static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
index 3e97f2b..1ab7c15 100644 (file)
@@ -81,6 +81,18 @@ config BLK_DEV_BSG
 
          If unsure, say N.
 
+config BLK_DEV_INTEGRITY
+       bool "Block layer data integrity support"
+       ---help---
+       Some storage devices allow extra information to be
+       stored/retrieved to help protect the data.  The block layer
+       data integrity option provides hooks which can be used by
+       filesystems to ensure better data integrity.
+
+       Say yes here if you have a storage device that provides the
+       T10/SCSI Data Integrity Field or the T13/ATA External Path
+       Protection.  If in doubt, say N.
+
 endif # BLOCK
 
 config BLOCK_COMPAT
index 5a43c7d..208000b 100644 (file)
@@ -4,7 +4,8 @@
 
 obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
                        blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
-                       blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o
+                       blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
+                       cmd-filter.o
 
 obj-$(CONFIG_BLK_DEV_BSG)      += bsg.o
 obj-$(CONFIG_IOSCHED_NOOP)     += noop-iosched.o
@@ -14,3 +15,4 @@ obj-$(CONFIG_IOSCHED_CFQ)     += cfq-iosched.o
 
 obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
 obj-$(CONFIG_BLOCK_COMPAT)     += compat_ioctl.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY)        += blk-integrity.o
index 743f33a..9735acb 100644 (file)
@@ -151,6 +151,7 @@ enum arq_state {
 
 static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
+static DEFINE_SPINLOCK(ioc_gone_lock);
 
 static void as_move_to_dispatch(struct as_data *ad, struct request *rq);
 static void as_antic_stop(struct as_data *ad);
@@ -164,8 +165,19 @@ static void free_as_io_context(struct as_io_context *aic)
 {
        kfree(aic);
        elv_ioc_count_dec(ioc_count);
-       if (ioc_gone && !elv_ioc_count_read(ioc_count))
-               complete(ioc_gone);
+       if (ioc_gone) {
+               /*
+                * AS scheduler is exiting, grab exit lock and check
+                * the pending io context count. If it hits zero,
+                * complete ioc_gone and set it back to NULL.
+                */
+               spin_lock(&ioc_gone_lock);
+               if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+                       complete(ioc_gone);
+                       ioc_gone = NULL;
+               }
+               spin_unlock(&ioc_gone_lock);
+       }
 }
 
 static void as_trim(struct io_context *ioc)
@@ -1493,7 +1505,7 @@ static void __exit as_exit(void)
        /* ioc_gone's update must be visible before reading ioc_count */
        smp_wmb();
        if (elv_ioc_count_read(ioc_count))
-               wait_for_completion(ioc_gone);
+               wait_for_completion(&all_gone);
        synchronize_rcu();
 }
 
index 1905aab..dbc7f42 100644 (file)
@@ -143,6 +143,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
 
                bio->bi_size -= nbytes;
                bio->bi_sector += (nbytes >> 9);
+
+               if (bio_integrity(bio))
+                       bio_integrity_advance(bio, nbytes);
+
                if (bio->bi_size == 0)
                        bio_endio(bio, error);
        } else {
@@ -201,8 +205,7 @@ void blk_plug_device(struct request_queue *q)
        if (blk_queue_stopped(q))
                return;
 
-       if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) {
-               __set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+       if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) {
                mod_timer(&q->unplug_timer, jiffies + q->unplug_delay);
                blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG);
        }
@@ -217,10 +220,9 @@ int blk_remove_plug(struct request_queue *q)
 {
        WARN_ON(!irqs_disabled());
 
-       if (!test_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags))
+       if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
                return 0;
 
-       queue_flag_clear(QUEUE_FLAG_PLUGGED, q);
        del_timer(&q->unplug_timer);
        return 1;
 }
@@ -324,8 +326,7 @@ void blk_start_queue(struct request_queue *q)
         * one level of recursion is ok and is much faster than kicking
         * the unplug handling
         */
-       if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
-               queue_flag_set(QUEUE_FLAG_REENTER, q);
+       if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
                q->request_fn(q);
                queue_flag_clear(QUEUE_FLAG_REENTER, q);
        } else {
@@ -390,8 +391,7 @@ void __blk_run_queue(struct request_queue *q)
         * handling reinvoke the handler shortly if we already got there.
         */
        if (!elv_queue_empty(q)) {
-               if (!test_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) {
-                       queue_flag_set(QUEUE_FLAG_REENTER, q);
+               if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
                        q->request_fn(q);
                        queue_flag_clear(QUEUE_FLAG_REENTER, q);
                } else {
@@ -1381,6 +1381,9 @@ end_io:
                 */
                blk_partition_remap(bio);
 
+               if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
+                       goto end_io;
+
                if (old_sector != -1)
                        blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
                                            old_sector);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
new file mode 100644 (file)
index 0000000..3f1a847
--- /dev/null
@@ -0,0 +1,381 @@
+/*
+ * blk-integrity.c - Block layer data integrity extensions
+ *
+ * Copyright (C) 2007, 2008 Oracle Corporation
+ * Written by: Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/blkdev.h>
+#include <linux/mempool.h>
+#include <linux/bio.h>
+#include <linux/scatterlist.h>
+
+#include "blk.h"
+
+static struct kmem_cache *integrity_cachep;
+
+/**
+ * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
+ * @rq:                request with integrity metadata attached
+ *
+ * Description: Returns the number of elements required in a
+ * scatterlist corresponding to the integrity metadata in a request.
+ */
+int blk_rq_count_integrity_sg(struct request *rq)
+{
+       struct bio_vec *iv, *ivprv;
+       struct req_iterator iter;
+       unsigned int segments;
+
+       ivprv = NULL;
+       segments = 0;
+
+       rq_for_each_integrity_segment(iv, rq, iter) {
+
+               if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+                       segments++;
+
+               ivprv = iv;
+       }
+
+       return segments;
+}
+EXPORT_SYMBOL(blk_rq_count_integrity_sg);
+
+/**
+ * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
+ * @rq:                request with integrity metadata attached
+ * @sglist:    target scatterlist
+ *
+ * Description: Map the integrity vectors in request into a
+ * scatterlist.  The scatterlist must be big enough to hold all
+ * elements.  I.e. sized using blk_rq_count_integrity_sg().
+ */
+int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
+{
+       struct bio_vec *iv, *ivprv;
+       struct req_iterator iter;
+       struct scatterlist *sg;
+       unsigned int segments;
+
+       ivprv = NULL;
+       sg = NULL;
+       segments = 0;
+
+       rq_for_each_integrity_segment(iv, rq, iter) {
+
+               if (ivprv) {
+                       if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
+                               goto new_segment;
+
+                       sg->length += iv->bv_len;
+               } else {
+new_segment:
+                       if (!sg)
+                               sg = sglist;
+                       else {
+                               sg->page_link &= ~0x02;
+                               sg = sg_next(sg);
+                       }
+
+                       sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset);
+                       segments++;
+               }
+
+               ivprv = iv;
+       }
+
+       if (sg)
+               sg_mark_end(sg);
+
+       return segments;
+}
+EXPORT_SYMBOL(blk_rq_map_integrity_sg);
+
+/**
+ * blk_integrity_compare - Compare integrity profile of two block devices
+ * @b1:                Device to compare
+ * @b2:                Device to compare
+ *
+ * Description: Meta-devices like DM and MD need to verify that all
+ * sub-devices use the same integrity format before advertising to
+ * upper layers that they can send/receive integrity metadata.  This
+ * function can be used to check whether two block devices have
+ * compatible integrity formats.
+ */
+int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
+{
+       struct blk_integrity *b1 = bd1->bd_disk->integrity;
+       struct blk_integrity *b2 = bd2->bd_disk->integrity;
+
+       BUG_ON(bd1->bd_disk == NULL);
+       BUG_ON(bd2->bd_disk == NULL);
+
+       if (!b1 || !b2)
+               return 0;
+
+       if (b1->sector_size != b2->sector_size) {
+               printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
+                      bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+                      b1->sector_size, b2->sector_size);
+               return -1;
+       }
+
+       if (b1->tuple_size != b2->tuple_size) {
+               printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
+                      bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+                      b1->tuple_size, b2->tuple_size);
+               return -1;
+       }
+
+       if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
+               printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
+                      bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+                      b1->tag_size, b2->tag_size);
+               return -1;
+       }
+
+       if (strcmp(b1->name, b2->name)) {
+               printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
+                      bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
+                      b1->name, b2->name);
+               return -1;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(blk_integrity_compare);
+
+struct integrity_sysfs_entry {
+       struct attribute attr;
+       ssize_t (*show)(struct blk_integrity *, char *);
+       ssize_t (*store)(struct blk_integrity *, const char *, size_t);
+};
+
+static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
+                                  char *page)
+{
+       struct blk_integrity *bi =
+               container_of(kobj, struct blk_integrity, kobj);
+       struct integrity_sysfs_entry *entry =
+               container_of(attr, struct integrity_sysfs_entry, attr);
+
+       return entry->show(bi, page);
+}
+
+static ssize_t integrity_attr_store(struct kobject *kobj,
+                                   struct attribute *attr, const char *page,
+                                   size_t count)
+{
+       struct blk_integrity *bi =
+               container_of(kobj, struct blk_integrity, kobj);
+       struct integrity_sysfs_entry *entry =
+               container_of(attr, struct integrity_sysfs_entry, attr);
+       ssize_t ret = 0;
+
+       if (entry->store)
+               ret = entry->store(bi, page, count);
+
+       return ret;
+}
+
+static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
+{
+       if (bi != NULL && bi->name != NULL)
+               return sprintf(page, "%s\n", bi->name);
+       else
+               return sprintf(page, "none\n");
+}
+
+static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
+{
+       if (bi != NULL)
+               return sprintf(page, "%u\n", bi->tag_size);
+       else
+               return sprintf(page, "0\n");
+}
+
+static ssize_t integrity_read_store(struct blk_integrity *bi,
+                                   const char *page, size_t count)
+{
+       char *p = (char *) page;
+       unsigned long val = simple_strtoul(p, &p, 10);
+
+       if (val)
+               bi->flags |= INTEGRITY_FLAG_READ;
+       else
+               bi->flags &= ~INTEGRITY_FLAG_READ;
+
+       return count;
+}
+
+static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
+{
+       return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0);
+}
+
+static ssize_t integrity_write_store(struct blk_integrity *bi,
+                                    const char *page, size_t count)
+{
+       char *p = (char *) page;
+       unsigned long val = simple_strtoul(p, &p, 10);
+
+       if (val)
+               bi->flags |= INTEGRITY_FLAG_WRITE;
+       else
+               bi->flags &= ~INTEGRITY_FLAG_WRITE;
+
+       return count;
+}
+
+static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
+{
+       return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0);
+}
+
+static struct integrity_sysfs_entry integrity_format_entry = {
+       .attr = { .name = "format", .mode = S_IRUGO },
+       .show = integrity_format_show,
+};
+
+static struct integrity_sysfs_entry integrity_tag_size_entry = {
+       .attr = { .name = "tag_size", .mode = S_IRUGO },
+       .show = integrity_tag_size_show,
+};
+
+static struct integrity_sysfs_entry integrity_read_entry = {
+       .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
+       .show = integrity_read_show,
+       .store = integrity_read_store,
+};
+
+static struct integrity_sysfs_entry integrity_write_entry = {
+       .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
+       .show = integrity_write_show,
+       .store = integrity_write_store,
+};
+
+static struct attribute *integrity_attrs[] = {
+       &integrity_format_entry.attr,
+       &integrity_tag_size_entry.attr,
+       &integrity_read_entry.attr,
+       &integrity_write_entry.attr,
+       NULL,
+};
+
+static struct sysfs_ops integrity_ops = {
+       .show   = &integrity_attr_show,
+       .store  = &integrity_attr_store,
+};
+
+static int __init blk_dev_integrity_init(void)
+{
+       integrity_cachep = kmem_cache_create("blkdev_integrity",
+                                            sizeof(struct blk_integrity),
+                                            0, SLAB_PANIC, NULL);
+       return 0;
+}
+subsys_initcall(blk_dev_integrity_init);
+
+static void blk_integrity_release(struct kobject *kobj)
+{
+       struct blk_integrity *bi =
+               container_of(kobj, struct blk_integrity, kobj);
+
+       kmem_cache_free(integrity_cachep, bi);
+}
+
+static struct kobj_type integrity_ktype = {
+       .default_attrs  = integrity_attrs,
+       .sysfs_ops      = &integrity_ops,
+       .release        = blk_integrity_release,
+};
+
+/**
+ * blk_integrity_register - Register a gendisk as being integrity-capable
+ * @disk:      struct gendisk pointer to make integrity-aware
+ * @template:  integrity profile
+ *
+ * Description: When a device needs to advertise itself as being able
+ * to send/receive integrity metadata it must use this function to
+ * register the capability with the block layer.  The template is a
+ * blk_integrity struct with values appropriate for the underlying
+ * hardware.  See Documentation/block/data-integrity.txt.
+ */
+int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
+{
+       struct blk_integrity *bi;
+
+       BUG_ON(disk == NULL);
+       BUG_ON(template == NULL);
+
+       if (disk->integrity == NULL) {
+               bi = kmem_cache_alloc(integrity_cachep,
+                                               GFP_KERNEL | __GFP_ZERO);
+               if (!bi)
+                       return -1;
+
+               if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
+                                        &disk->dev.kobj, "%s", "integrity")) {
+                       kmem_cache_free(integrity_cachep, bi);
+                       return -1;
+               }
+
+               kobject_uevent(&bi->kobj, KOBJ_ADD);
+
+               bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
+               bi->sector_size = disk->queue->hardsect_size;
+               disk->integrity = bi;
+       } else
+               bi = disk->integrity;
+
+       /* Use the provided profile as template */
+       bi->name = template->name;
+       bi->generate_fn = template->generate_fn;
+       bi->verify_fn = template->verify_fn;
+       bi->tuple_size = template->tuple_size;
+       bi->set_tag_fn = template->set_tag_fn;
+       bi->get_tag_fn = template->get_tag_fn;
+       bi->tag_size = template->tag_size;
+
+       return 0;
+}
+EXPORT_SYMBOL(blk_integrity_register);
+
+/**
+ * blk_integrity_unregister - Remove block integrity profile
+ * @disk:      disk whose integrity profile to deallocate
+ *
+ * Description: This function frees all memory used by the block
+ * integrity profile.  To be called at device teardown.
+ */
+void blk_integrity_unregister(struct gendisk *disk)
+{
+       struct blk_integrity *bi;
+
+       if (!disk || !disk->integrity)
+               return;
+
+       bi = disk->integrity;
+
+       kobject_uevent(&bi->kobj, KOBJ_REMOVE);
+       kobject_del(&bi->kobj);
+       kobject_put(&disk->dev.kobj);
+       kmem_cache_free(integrity_cachep, bi);
+}
+EXPORT_SYMBOL(blk_integrity_unregister);
index 0b1af5a..ddd96fb 100644 (file)
@@ -210,6 +210,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
        if (!bio_flagged(bio, BIO_USER_MAPPED))
                rq->cmd_flags |= REQ_COPY_USER;
 
+       blk_queue_bounce(q, &bio);
        bio_get(bio);
        blk_rq_bio_prep(q, rq, bio);
        rq->buffer = rq->data = NULL;
@@ -268,6 +269,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        int reading = rq_data_dir(rq) == READ;
        int do_copy = 0;
        struct bio *bio;
+       unsigned long stack_mask = ~(THREAD_SIZE - 1);
 
        if (len > (q->max_hw_sectors << 9))
                return -EINVAL;
@@ -278,6 +280,10 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
        alignment = queue_dma_alignment(q) | q->dma_pad_mask;
        do_copy = ((kaddr & alignment) || (len & alignment));
 
+       if (!((kaddr & stack_mask) ^
+             ((unsigned long)current->stack & stack_mask)))
+               do_copy = 1;
+
        if (do_copy)
                bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading);
        else
index 651136a..5efc9e7 100644 (file)
@@ -441,6 +441,9 @@ static int attempt_merge(struct request_queue *q, struct request *req,
            || next->special)
                return 0;
 
+       if (blk_integrity_rq(req) != blk_integrity_rq(next))
+               return 0;
+
        /*
         * If we are allowed to merge, then append bio list
         * from next to rq and release next. merge_requests_fn
index 8dd8641..dfc7701 100644 (file)
@@ -302,11 +302,10 @@ EXPORT_SYMBOL(blk_queue_stack_limits);
  * @q:     the request queue for the device
  * @mask:  pad mask
  *
- * Set pad mask.  Direct IO requests are padded to the mask specified.
+ * Set dma pad mask.
  *
- * Appending pad buffer to a request modifies ->data_len such that it
- * includes the pad buffer.  The original requested data length can be
- * obtained using blk_rq_raw_data_len().
+ * Appending pad buffer to a request modifies the last entry of a
+ * scatter list such that it includes the pad buffer.
  **/
 void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
 {
@@ -314,6 +313,23 @@ void blk_queue_dma_pad(struct request_queue *q, unsigned int mask)
 }
 EXPORT_SYMBOL(blk_queue_dma_pad);
 
+/**
+ * blk_queue_update_dma_pad - update pad mask
+ * @q:     the request queue for the device
+ * @mask:  pad mask
+ *
+ * Update dma pad mask.
+ *
+ * Appending pad buffer to a request modifies the last entry of a
+ * scatter list such that it includes the pad buffer.
+ **/
+void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
+{
+       if (mask > q->dma_pad_mask)
+               q->dma_pad_mask = mask;
+}
+EXPORT_SYMBOL(blk_queue_update_dma_pad);
+
 /**
  * blk_queue_dma_drain - Set up a drain buffer for excess dma.
  * @q:  the request queue for the device
index 59776ab..c79f30e 100644 (file)
@@ -51,4 +51,12 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
        return q->nr_congestion_off;
 }
 
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+#define rq_for_each_integrity_segment(bvl, _rq, _iter)         \
+       __rq_for_each_bio(_iter.bio, _rq)                       \
+               bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i)
+
+#endif /* BLK_DEV_INTEGRITY */
+
 #endif
index 8d3a277..eb9651c 100644 (file)
@@ -244,6 +244,7 @@ err:
 static void blk_trace_cleanup(struct blk_trace *bt)
 {
        relay_close(bt->rchan);
+       debugfs_remove(bt->msg_file);
        debugfs_remove(bt->dropped_file);
        blk_remove_tree(bt->dir);
        free_percpu(bt->sequence);
@@ -291,6 +292,44 @@ static const struct file_operations blk_dropped_fops = {
        .read =         blk_dropped_read,
 };
 
+static int blk_msg_open(struct inode *inode, struct file *filp)
+{
+       filp->private_data = inode->i_private;
+
+       return 0;
+}
+
+static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
+                               size_t count, loff_t *ppos)
+{
+       char *msg;
+       struct blk_trace *bt;
+
+       if (count > BLK_TN_MAX_MSG)
+               return -EINVAL;
+
+       msg = kmalloc(count, GFP_KERNEL);
+       if (msg == NULL)
+               return -ENOMEM;
+
+       if (copy_from_user(msg, buffer, count)) {
+               kfree(msg);
+               return -EFAULT;
+       }
+
+       bt = filp->private_data;
+       __trace_note_message(bt, "%s", msg);
+       kfree(msg);
+
+       return count;
+}
+
+static const struct file_operations blk_msg_fops = {
+       .owner =        THIS_MODULE,
+       .open =         blk_msg_open,
+       .write =        blk_msg_write,
+};
+
 /*
  * Keep track of how many times we encountered a full subbuffer, to aid
  * the user space app in telling how many lost events there were.
@@ -380,6 +419,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
        if (!bt->dropped_file)
                goto err;
 
+       bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
+       if (!bt->msg_file)
+               goto err;
+
        bt->rchan = relay_open("trace", dir, buts->buf_size,
                                buts->buf_nr, &blk_relay_callbacks, bt);
        if (!bt->rchan)
@@ -409,6 +452,8 @@ err:
        if (dir)
                blk_remove_tree(dir);
        if (bt) {
+               if (bt->msg_file)
+                       debugfs_remove(bt->msg_file);
                if (bt->dropped_file)
                        debugfs_remove(bt->dropped_file);
                free_percpu(bt->sequence);
index 54d617f..93e757d 100644 (file)
@@ -44,11 +44,12 @@ struct bsg_device {
        char name[BUS_ID_SIZE];
        int max_queue;
        unsigned long flags;
+       struct blk_scsi_cmd_filter *cmd_filter;
+       mode_t *f_mode;
 };
 
 enum {
        BSG_F_BLOCK             = 1,
-       BSG_F_WRITE_PERM        = 2,
 };
 
 #define BSG_DEFAULT_CMDS       64
@@ -172,7 +173,7 @@ unlock:
 }
 
 static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
-                               struct sg_io_v4 *hdr, int has_write_perm)
+                               struct sg_io_v4 *hdr, struct bsg_device *bd)
 {
        if (hdr->request_len > BLK_MAX_CDB) {
                rq->cmd = kzalloc(hdr->request_len, GFP_KERNEL);
@@ -185,7 +186,8 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
                return -EFAULT;
 
        if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) {
-               if (blk_verify_command(rq->cmd, has_write_perm))
+               if (blk_cmd_filter_verify_command(bd->cmd_filter, rq->cmd,
+                                                bd->f_mode))
                        return -EPERM;
        } else if (!capable(CAP_SYS_RAWIO))
                return -EPERM;
@@ -263,8 +265,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
        rq = blk_get_request(q, rw, GFP_KERNEL);
        if (!rq)
                return ERR_PTR(-ENOMEM);
-       ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
-                                                      &bd->flags));
+       ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd);
        if (ret)
                goto out;
 
@@ -566,12 +567,23 @@ static inline void bsg_set_block(struct bsg_device *bd, struct file *file)
                set_bit(BSG_F_BLOCK, &bd->flags);
 }
 
-static inline void bsg_set_write_perm(struct bsg_device *bd, struct file *file)
+static void bsg_set_cmd_filter(struct bsg_device *bd,
+                          struct file *file)
 {
-       if (file->f_mode & FMODE_WRITE)
-               set_bit(BSG_F_WRITE_PERM, &bd->flags);
-       else
-               clear_bit(BSG_F_WRITE_PERM, &bd->flags);
+       struct inode *inode;
+       struct gendisk *disk;
+
+       if (!file)
+               return;
+
+       inode = file->f_dentry->d_inode;
+       if (!inode)
+               return;
+
+       disk = inode->i_bdev->bd_disk;
+
+       bd->cmd_filter = &disk->cmd_filter;
+       bd->f_mode = &file->f_mode;
 }
 
 /*
@@ -595,6 +607,8 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
        dprintk("%s: read %Zd bytes\n", bd->name, count);
 
        bsg_set_block(bd, file);
+       bsg_set_cmd_filter(bd, file);
+
        bytes_read = 0;
        ret = __bsg_read(buf, count, bd, NULL, &bytes_read);
        *ppos = bytes_read;
@@ -668,7 +682,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
        dprintk("%s: write %Zd bytes\n", bd->name, count);
 
        bsg_set_block(bd, file);
-       bsg_set_write_perm(bd, file);
+       bsg_set_cmd_filter(bd, file);
 
        bytes_written = 0;
        ret = __bsg_write(bd, buf, count, &bytes_written);
@@ -772,7 +786,9 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
        }
 
        bd->queue = rq;
+
        bsg_set_block(bd, file);
+       bsg_set_cmd_filter(bd, file);
 
        atomic_set(&bd->ref_count, 1);
        mutex_lock(&bsg_mutex);
index d01b411..1e2aff8 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/elevator.h>
 #include <linux/rbtree.h>
 #include <linux/ioprio.h>
+#include <linux/blktrace_api.h>
 
 /*
  * tunables
@@ -41,13 +42,14 @@ static int cfq_slice_idle = HZ / 125;
 
 #define RQ_CIC(rq)             \
        ((struct cfq_io_context *) (rq)->elevator_private)
-#define RQ_CFQQ(rq)            ((rq)->elevator_private2)
+#define RQ_CFQQ(rq)            (struct cfq_queue *) ((rq)->elevator_private2)
 
 static struct kmem_cache *cfq_pool;
 static struct kmem_cache *cfq_ioc_pool;
 
 static DEFINE_PER_CPU(unsigned long, ioc_count);
 static struct completion *ioc_gone;
+static DEFINE_SPINLOCK(ioc_gone_lock);
 
 #define CFQ_PRIO_LISTS         IOPRIO_BE_NR
 #define cfq_class_idle(cfqq)   ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE)
@@ -155,6 +157,7 @@ struct cfq_queue {
        unsigned short ioprio, org_ioprio;
        unsigned short ioprio_class, org_ioprio_class;
 
+       pid_t pid;
 };
 
 enum cfqq_state_flags {
@@ -198,6 +201,11 @@ CFQ_CFQQ_FNS(slice_new);
 CFQ_CFQQ_FNS(sync);
 #undef CFQ_CFQQ_FNS
 
+#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
+       blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
+#define cfq_log(cfqd, fmt, args...)    \
+       blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
+
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
 static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
                                       struct io_context *, gfp_t);
@@ -234,8 +242,10 @@ static inline int cfq_bio_sync(struct bio *bio)
  */
 static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
 {
-       if (cfqd->busy_queues)
+       if (cfqd->busy_queues) {
+               cfq_log(cfqd, "schedule dispatch");
                kblockd_schedule_work(&cfqd->unplug_work);
+       }
 }
 
 static int cfq_queue_empty(struct request_queue *q)
@@ -270,6 +280,7 @@ static inline void
 cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies;
+       cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
 }
 
 /*
@@ -539,6 +550,7 @@ static void cfq_resort_rr_list(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  */
 static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
+       cfq_log_cfqq(cfqd, cfqq, "add_to_rr");
        BUG_ON(cfq_cfqq_on_rr(cfqq));
        cfq_mark_cfqq_on_rr(cfqq);
        cfqd->busy_queues++;
@@ -552,6 +564,7 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  */
 static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
+       cfq_log_cfqq(cfqd, cfqq, "del_from_rr");
        BUG_ON(!cfq_cfqq_on_rr(cfqq));
        cfq_clear_cfqq_on_rr(cfqq);
 
@@ -638,6 +651,8 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
        struct cfq_data *cfqd = q->elevator->elevator_data;
 
        cfqd->rq_in_driver++;
+       cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
+                                               cfqd->rq_in_driver);
 
        /*
         * If the depth is larger 1, it really could be queueing. But lets
@@ -657,6 +672,8 @@ static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
 
        WARN_ON(!cfqd->rq_in_driver);
        cfqd->rq_in_driver--;
+       cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
+                                               cfqd->rq_in_driver);
 }
 
 static void cfq_remove_request(struct request *rq)
@@ -746,6 +763,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
                                   struct cfq_queue *cfqq)
 {
        if (cfqq) {
+               cfq_log_cfqq(cfqd, cfqq, "set_active");
                cfqq->slice_end = 0;
                cfq_clear_cfqq_must_alloc_slice(cfqq);
                cfq_clear_cfqq_fifo_expire(cfqq);
@@ -763,6 +781,8 @@ static void
 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                    int timed_out)
 {
+       cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
+
        if (cfq_cfqq_wait_request(cfqq))
                del_timer(&cfqd->idle_slice_timer);
 
@@ -772,8 +792,10 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        /*
         * store what was left of this slice, if the queue idled/timed out
         */
-       if (timed_out && !cfq_cfqq_slice_new(cfqq))
+       if (timed_out && !cfq_cfqq_slice_new(cfqq)) {
                cfqq->slice_resid = cfqq->slice_end - jiffies;
+               cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
+       }
 
        cfq_resort_rr_list(cfqd, cfqq);
 
@@ -865,6 +887,12 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
        if (!cfqd->cfq_slice_idle || !cfq_cfqq_idle_window(cfqq))
                return;
 
+       /*
+        * still requests with the driver, don't idle
+        */
+       if (cfqd->rq_in_driver)
+               return;
+
        /*
         * task has exited, don't wait
         */
@@ -892,6 +920,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
                sl = min(sl, msecs_to_jiffies(CFQ_MIN_TT));
 
        mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
+       cfq_log(cfqd, "arm_idle: %lu", sl);
 }
 
 /*
@@ -902,6 +931,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
+       cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
+
        cfq_remove_request(rq);
        cfqq->dispatched++;
        elv_dispatch_sort(q, rq);
@@ -931,8 +962,9 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
        rq = rq_entry_fifo(cfqq->fifo.next);
 
        if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
-               return NULL;
+               rq = NULL;
 
+       cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq);
        return rq;
 }
 
@@ -1072,6 +1104,7 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
 
        BUG_ON(cfqd->busy_queues);
 
+       cfq_log(cfqd, "forced_dispatch=%d\n", dispatched);
        return dispatched;
 }
 
@@ -1112,6 +1145,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
                dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch);
        }
 
+       cfq_log(cfqd, "dispatched=%d", dispatched);
        return dispatched;
 }
 
@@ -1130,6 +1164,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
        if (!atomic_dec_and_test(&cfqq->ref))
                return;
 
+       cfq_log_cfqq(cfqd, cfqq, "put_queue");
        BUG_ON(rb_first(&cfqq->sort_list));
        BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
        BUG_ON(cfq_cfqq_on_rr(cfqq));
@@ -1177,8 +1212,19 @@ static void cfq_cic_free_rcu(struct rcu_head *head)
        kmem_cache_free(cfq_ioc_pool, cic);
        elv_ioc_count_dec(ioc_count);
 
-       if (ioc_gone && !elv_ioc_count_read(ioc_count))
-               complete(ioc_gone);
+       if (ioc_gone) {
+               /*
+                * CFQ scheduler is exiting, grab exit lock and check
+                * the pending io context count. If it hits zero,
+                * complete ioc_gone and set it back to NULL
+                */
+               spin_lock(&ioc_gone_lock);
+               if (ioc_gone && !elv_ioc_count_read(ioc_count)) {
+                       complete(ioc_gone);
+                       ioc_gone = NULL;
+               }
+               spin_unlock(&ioc_gone_lock);
+       }
 }
 
 static void cfq_cic_free(struct cfq_io_context *cic)
@@ -1427,6 +1473,8 @@ retry:
                                cfq_mark_cfqq_idle_window(cfqq);
                        cfq_mark_cfqq_sync(cfqq);
                }
+               cfqq->pid = current->pid;
+               cfq_log_cfqq(cfqd, cfqq, "alloced");
        }
 
        if (new_cfqq)
@@ -1675,7 +1723,7 @@ static void
 cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                       struct cfq_io_context *cic)
 {
-       int enable_idle;
+       int old_idle, enable_idle;
 
        /*
         * Don't idle for async or idle io prio class
@@ -1683,7 +1731,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        if (!cfq_cfqq_sync(cfqq) || cfq_class_idle(cfqq))
                return;
 
-       enable_idle = cfq_cfqq_idle_window(cfqq);
+       enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
 
        if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
            (cfqd->hw_tag && CIC_SEEKY(cic)))
@@ -1695,10 +1743,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                        enable_idle = 1;
        }
 
-       if (enable_idle)
-               cfq_mark_cfqq_idle_window(cfqq);
-       else
-               cfq_clear_cfqq_idle_window(cfqq);
+       if (old_idle != enable_idle) {
+               cfq_log_cfqq(cfqd, cfqq, "idle=%d", enable_idle);
+               if (enable_idle)
+                       cfq_mark_cfqq_idle_window(cfqq);
+               else
+                       cfq_clear_cfqq_idle_window(cfqq);
+       }
 }
 
 /*
@@ -1757,6 +1808,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
  */
 static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
+       cfq_log_cfqq(cfqd, cfqq, "preempt");
        cfq_slice_expired(cfqd, 1);
 
        /*
@@ -1818,6 +1870,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
        struct cfq_data *cfqd = q->elevator->elevator_data;
        struct cfq_queue *cfqq = RQ_CFQQ(rq);
 
+       cfq_log_cfqq(cfqd, cfqq, "insert_request");
        cfq_init_prio_data(cfqq, RQ_CIC(rq)->ioc);
 
        cfq_add_rq_rb(rq);
@@ -1835,6 +1888,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        unsigned long now;
 
        now = jiffies;
+       cfq_log_cfqq(cfqd, cfqq, "complete");
 
        WARN_ON(!cfqd->rq_in_driver);
        WARN_ON(!cfqq->dispatched);
@@ -2004,6 +2058,7 @@ queue_fail:
 
        cfq_schedule_dispatch(cfqd);
        spin_unlock_irqrestore(q->queue_lock, flags);
+       cfq_log(cfqd, "set_request fail");
        return 1;
 }
 
@@ -2029,6 +2084,8 @@ static void cfq_idle_slice_timer(unsigned long data)
        unsigned long flags;
        int timed_out = 1;
 
+       cfq_log(cfqd, "idle timer fired");
+
        spin_lock_irqsave(cfqd->queue->queue_lock, flags);
 
        cfqq = cfqd->active_queue;
@@ -2317,7 +2374,7 @@ static void __exit cfq_exit(void)
         * pending RCU callbacks
         */
        if (elv_ioc_count_read(ioc_count))
-               wait_for_completion(ioc_gone);
+               wait_for_completion(&all_gone);
        cfq_slab_kill();
 }
 
diff --git a/block/cmd-filter.c b/block/cmd-filter.c
new file mode 100644 (file)
index 0000000..eec4404
--- /dev/null
@@ -0,0 +1,334 @@
+/*
+ * Copyright 2004 Peter M. Jones <pjones@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ *
+ */
+
+#include <linux/list.h>
+#include <linux/genhd.h>
+#include <linux/spinlock.h>
+#include <linux/parser.h>
+#include <linux/capability.h>
+#include <linux/bitops.h>
+
+#include <scsi/scsi.h>
+#include <linux/cdrom.h>
+
+int blk_cmd_filter_verify_command(struct blk_scsi_cmd_filter *filter,
+                                 unsigned char *cmd, mode_t *f_mode)
+{
+       /* root can do any command. */
+       if (capable(CAP_SYS_RAWIO))
+               return 0;
+
+       /* if there's no filter set, assume we're filtering everything out */
+       if (!filter)
+               return -EPERM;
+
+       /* Anybody who can open the device can do a read-safe command */
+       if (test_bit(cmd[0], filter->read_ok))
+               return 0;
+
+       /* Write-safe commands require a writable open */
+       if (test_bit(cmd[0], filter->write_ok) && (*f_mode & FMODE_WRITE))
+               return 0;
+
+       return -EPERM;
+}
+EXPORT_SYMBOL(blk_cmd_filter_verify_command);
+
+int blk_verify_command(struct file *file, unsigned char *cmd)
+{
+       struct gendisk *disk;
+       struct inode *inode;
+
+       if (!file)
+               return -EINVAL;
+
+       inode = file->f_dentry->d_inode;
+       if (!inode)
+               return -EINVAL;
+
+       disk = inode->i_bdev->bd_disk;
+
+       return blk_cmd_filter_verify_command(&disk->cmd_filter,
+                                                cmd, &file->f_mode);
+}
+EXPORT_SYMBOL(blk_verify_command);
+
+/* and now, the sysfs stuff */
+static ssize_t rcf_cmds_show(struct blk_scsi_cmd_filter *filter, char *page,
+                            int rw)
+{
+       char *npage = page;
+       unsigned long *okbits;
+       int i;
+
+       if (rw == READ)
+               okbits = filter->read_ok;
+       else
+               okbits = filter->write_ok;
+
+       for (i = 0; i < BLK_SCSI_MAX_CMDS; i++) {
+               if (test_bit(i, okbits)) {
+                       sprintf(npage, "%02x", i);
+                       npage += 2;
+                       if (i < BLK_SCSI_MAX_CMDS - 1)
+                               sprintf(npage++, " ");
+               }
+       }
+
+       if (npage != page)
+               npage += sprintf(npage, "\n");
+
+       return npage - page;
+}
+
+static ssize_t rcf_readcmds_show(struct blk_scsi_cmd_filter *filter, char *page)
+{
+       return rcf_cmds_show(filter, page, READ);
+}
+
+static ssize_t rcf_writecmds_show(struct blk_scsi_cmd_filter *filter,
+                                char *page)
+{
+       return rcf_cmds_show(filter, page, WRITE);
+}
+
+static ssize_t rcf_cmds_store(struct blk_scsi_cmd_filter *filter,
+                             const char *page, size_t count, int rw)
+{
+       ssize_t ret = 0;
+       unsigned long okbits[BLK_SCSI_CMD_PER_LONG], *target_okbits;
+       int cmd, status, len;
+       substring_t ss;
+
+       memset(&okbits, 0, sizeof(okbits));
+
+       for (len = strlen(page); len > 0; len -= 3) {
+               if (len < 2)
+                       break;
+               ss.from = (char *) page + ret;
+               ss.to = (char *) page + ret + 2;
+               ret += 3;
+               status = match_hex(&ss, &cmd);
+               /* either of these cases means invalid input, so do nothing. */
+               if (status || cmd >= BLK_SCSI_MAX_CMDS)
+                       return -EINVAL;
+
+               __set_bit(cmd, okbits);
+       }
+
+       if (rw == READ)
+               target_okbits = filter->read_ok;
+       else
+               target_okbits = filter->write_ok;
+
+       memmove(target_okbits, okbits, sizeof(okbits));
+       return count;
+}
+
+static ssize_t rcf_readcmds_store(struct blk_scsi_cmd_filter *filter,
+                                 const char *page, size_t count)
+{
+       return rcf_cmds_store(filter, page, count, READ);
+}
+
+static ssize_t rcf_writecmds_store(struct blk_scsi_cmd_filter *filter,
+                                  const char *page, size_t count)
+{
+       return rcf_cmds_store(filter, page, count, WRITE);
+}
+
+struct rcf_sysfs_entry {
+       struct attribute attr;
+       ssize_t (*show)(struct blk_scsi_cmd_filter *, char *);
+       ssize_t (*store)(struct blk_scsi_cmd_filter *, const char *, size_t);
+};
+
+static struct rcf_sysfs_entry rcf_readcmds_entry = {
+       .attr = { .name = "read_table", .mode = S_IRUGO | S_IWUSR },
+       .show = rcf_readcmds_show,
+       .store = rcf_readcmds_store,
+};
+
+static struct rcf_sysfs_entry rcf_writecmds_entry = {
+       .attr = {.name = "write_table", .mode = S_IRUGO | S_IWUSR },
+       .show = rcf_writecmds_show,
+       .store = rcf_writecmds_store,
+};
+
+static struct attribute *default_attrs[] = {
+       &rcf_readcmds_entry.attr,
+       &rcf_writecmds_entry.attr,
+       NULL,
+};
+
+#define to_rcf(atr) container_of((atr), struct rcf_sysfs_entry, attr)
+
+static ssize_t
+rcf_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
+{
+       struct rcf_sysfs_entry *entry = to_rcf(attr);
+       struct blk_scsi_cmd_filter *filter;
+
+       filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+       if (entry->show)
+               return entry->show(filter, page);
+
+       return 0;
+}
+
+static ssize_t
+rcf_attr_store(struct kobject *kobj, struct attribute *attr,
+                       const char *page, size_t length)
+{
+       struct rcf_sysfs_entry *entry = to_rcf(attr);
+       struct blk_scsi_cmd_filter *filter;
+
+       if (!capable(CAP_SYS_RAWIO))
+               return -EPERM;
+
+       if (!entry->store)
+               return -EINVAL;
+
+       filter = container_of(kobj, struct blk_scsi_cmd_filter, kobj);
+       return entry->store(filter, page, length);
+}
+
+static struct sysfs_ops rcf_sysfs_ops = {
+       .show = rcf_attr_show,
+       .store = rcf_attr_store,
+};
+
+static struct kobj_type rcf_ktype = {
+       .sysfs_ops = &rcf_sysfs_ops,
+       .default_attrs = default_attrs,
+};
+
+#ifndef MAINTENANCE_IN_CMD
+#define MAINTENANCE_IN_CMD 0xa3
+#endif
+
+static void rcf_set_defaults(struct blk_scsi_cmd_filter *filter)
+{
+       /* Basic read-only commands */
+       __set_bit(TEST_UNIT_READY, filter->read_ok);
+       __set_bit(REQUEST_SENSE, filter->read_ok);
+       __set_bit(READ_6, filter->read_ok);
+       __set_bit(READ_10, filter->read_ok);
+       __set_bit(READ_12, filter->read_ok);
+       __set_bit(READ_16, filter->read_ok);
+       __set_bit(READ_BUFFER, filter->read_ok);
+       __set_bit(READ_DEFECT_DATA, filter->read_ok);
+       __set_bit(READ_CAPACITY, filter->read_ok);
+       __set_bit(READ_LONG, filter->read_ok);
+       __set_bit(INQUIRY, filter->read_ok);
+       __set_bit(MODE_SENSE, filter->read_ok);
+       __set_bit(MODE_SENSE_10, filter->read_ok);
+       __set_bit(LOG_SENSE, filter->read_ok);
+       __set_bit(START_STOP, filter->read_ok);
+       __set_bit(GPCMD_VERIFY_10, filter->read_ok);
+       __set_bit(VERIFY_16, filter->read_ok);
+       __set_bit(REPORT_LUNS, filter->read_ok);
+       __set_bit(SERVICE_ACTION_IN, filter->read_ok);
+       __set_bit(RECEIVE_DIAGNOSTIC, filter->read_ok);
+       __set_bit(MAINTENANCE_IN_CMD, filter->read_ok);
+       __set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
+
+       /* Audio CD commands */
+       __set_bit(GPCMD_PLAY_CD, filter->read_ok);
+       __set_bit(GPCMD_PLAY_AUDIO_10, filter->read_ok);
+       __set_bit(GPCMD_PLAY_AUDIO_MSF, filter->read_ok);
+       __set_bit(GPCMD_PLAY_AUDIO_TI, filter->read_ok);
+       __set_bit(GPCMD_PAUSE_RESUME, filter->read_ok);
+
+       /* CD/DVD data reading */
+       __set_bit(GPCMD_READ_CD, filter->read_ok);
+       __set_bit(GPCMD_READ_CD_MSF, filter->read_ok);
+       __set_bit(GPCMD_READ_DISC_INFO, filter->read_ok);
+       __set_bit(GPCMD_READ_CDVD_CAPACITY, filter->read_ok);
+       __set_bit(GPCMD_READ_DVD_STRUCTURE, filter->read_ok);
+       __set_bit(GPCMD_READ_HEADER, filter->read_ok);
+       __set_bit(GPCMD_READ_TRACK_RZONE_INFO, filter->read_ok);
+       __set_bit(GPCMD_READ_SUBCHANNEL, filter->read_ok);
+       __set_bit(GPCMD_READ_TOC_PMA_ATIP, filter->read_ok);
+       __set_bit(GPCMD_REPORT_KEY, filter->read_ok);
+       __set_bit(GPCMD_SCAN, filter->read_ok);
+       __set_bit(GPCMD_GET_CONFIGURATION, filter->read_ok);
+       __set_bit(GPCMD_READ_FORMAT_CAPACITIES, filter->read_ok);
+       __set_bit(GPCMD_GET_EVENT_STATUS_NOTIFICATION, filter->read_ok);
+       __set_bit(GPCMD_GET_PERFORMANCE, filter->read_ok);
+       __set_bit(GPCMD_SEEK, filter->read_ok);
+       __set_bit(GPCMD_STOP_PLAY_SCAN, filter->read_ok);
+
+       /* Basic writing commands */
+       __set_bit(WRITE_6, filter->write_ok);
+       __set_bit(WRITE_10, filter->write_ok);
+       __set_bit(WRITE_VERIFY, filter->write_ok);
+       __set_bit(WRITE_12, filter->write_ok);
+       __set_bit(WRITE_VERIFY_12, filter->write_ok);
+       __set_bit(WRITE_16, filter->write_ok);
+       __set_bit(WRITE_LONG, filter->write_ok);
+       __set_bit(WRITE_LONG_2, filter->write_ok);
+       __set_bit(ERASE, filter->write_ok);
+       __set_bit(GPCMD_MODE_SELECT_10, filter->write_ok);
+       __set_bit(MODE_SELECT, filter->write_ok);
+       __set_bit(LOG_SELECT, filter->write_ok);
+       __set_bit(GPCMD_BLANK, filter->write_ok);
+       __set_bit(GPCMD_CLOSE_TRACK, filter->write_ok);
+       __set_bit(GPCMD_FLUSH_CACHE, filter->write_ok);
+       __set_bit(GPCMD_FORMAT_UNIT, filter->write_ok);
+       __set_bit(GPCMD_REPAIR_RZONE_TRACK, filter->write_ok);
+       __set_bit(GPCMD_RESERVE_RZONE_TRACK, filter->write_ok);
+       __set_bit(GPCMD_SEND_DVD_STRUCTURE, filter->write_ok);
+       __set_bit(GPCMD_SEND_EVENT, filter->write_ok);
+       __set_bit(GPCMD_SEND_KEY, filter->write_ok);
+       __set_bit(GPCMD_SEND_OPC, filter->write_ok);
+       __set_bit(GPCMD_SEND_CUE_SHEET, filter->write_ok);
+       __set_bit(GPCMD_SET_SPEED, filter->write_ok);
+       __set_bit(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL, filter->write_ok);
+       __set_bit(GPCMD_LOAD_UNLOAD, filter->write_ok);
+       __set_bit(GPCMD_SET_STREAMING, filter->write_ok);
+}
+
+int blk_register_filter(struct gendisk *disk)
+{
+       int ret;
+       struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+       struct kobject *parent = kobject_get(disk->holder_dir->parent);
+
+       if (!parent)
+               return -ENODEV;
+
+       ret = kobject_init_and_add(&filter->kobj, &rcf_ktype, parent,
+                                "%s", "cmd_filter");
+
+       if (ret < 0)
+               return ret;
+
+       rcf_set_defaults(filter);
+       return 0;
+}
+
+void blk_unregister_filter(struct gendisk *disk)
+{
+       struct blk_scsi_cmd_filter *filter = &disk->cmd_filter;
+
+       kobject_put(&filter->kobj);
+       kobject_put(disk->holder_dir->parent);
+}
+
index 902dd13..ed6f8f3 100644 (file)
@@ -86,6 +86,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
        if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
                return 0;
 
+       /*
+        * only merge integrity protected bio into ditto rq
+        */
+       if (bio_integrity(bio) != blk_integrity_rq(rq))
+               return 0;
+
        if (!elv_iosched_allow_merge(rq, bio))
                return 0;
 
@@ -144,7 +150,7 @@ static struct elevator_type *elevator_get(const char *name)
                else
                        sprintf(elv, "%s-iosched", name);
 
-               request_module(elv);
+               request_module("%s", elv);
                spin_lock(&elv_list_lock);
                e = elevator_find(name);
        }
index b922d48..9074f38 100644 (file)
@@ -189,6 +189,7 @@ void add_disk(struct gendisk *disk)
                            disk->minors, NULL, exact_match, exact_lock, disk);
        register_disk(disk);
        blk_register_queue(disk);
+       blk_register_filter(disk);
 
        bdi = &disk->queue->backing_dev_info;
        bdi_register_dev(bdi, MKDEV(disk->major, disk->first_minor));
@@ -200,6 +201,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
 
 void unlink_gendisk(struct gendisk *disk)
 {
+       blk_unregister_filter(disk);
        sysfs_remove_link(&disk->dev.kobj, "bdi");
        bdi_unregister(&disk->queue->backing_dev_info);
        blk_unregister_queue(disk);
@@ -400,6 +402,14 @@ static ssize_t disk_removable_show(struct device *dev,
                       (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
 }
 
+static ssize_t disk_ro_show(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct gendisk *disk = dev_to_disk(dev);
+
+       return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
+}
+
 static ssize_t disk_size_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
@@ -472,6 +482,7 @@ static ssize_t disk_fail_store(struct device *dev,
 
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
+static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
 static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
@@ -483,6 +494,7 @@ static struct device_attribute dev_attr_fail =
 static struct attribute *disk_attrs[] = {
        &dev_attr_range.attr,
        &dev_attr_removable.attr,
+       &dev_attr_ro.attr,
        &dev_attr_size.attr,
        &dev_attr_capability.attr,
        &dev_attr_stat.attr,
index 78199c0..c5b9bcf 100644 (file)
@@ -105,120 +105,12 @@ static int sg_emulated_host(struct request_queue *q, int __user *p)
        return put_user(1, p);
 }
 
-#define CMD_READ_SAFE  0x01
-#define CMD_WRITE_SAFE 0x02
-#define CMD_WARNED     0x04
-#define safe_for_read(cmd)     [cmd] = CMD_READ_SAFE
-#define safe_for_write(cmd)    [cmd] = CMD_WRITE_SAFE
-
-int blk_verify_command(unsigned char *cmd, int has_write_perm)
-{
-       static unsigned char cmd_type[256] = {
-
-               /* Basic read-only commands */
-               safe_for_read(TEST_UNIT_READY),
-               safe_for_read(REQUEST_SENSE),
-               safe_for_read(READ_6),
-               safe_for_read(READ_10),
-               safe_for_read(READ_12),
-               safe_for_read(READ_16),
-               safe_for_read(READ_BUFFER),
-               safe_for_read(READ_DEFECT_DATA),
-               safe_for_read(READ_LONG),
-               safe_for_read(INQUIRY),
-               safe_for_read(MODE_SENSE),
-               safe_for_read(MODE_SENSE_10),
-               safe_for_read(LOG_SENSE),
-               safe_for_read(START_STOP),
-               safe_for_read(GPCMD_VERIFY_10),
-               safe_for_read(VERIFY_16),
-
-               /* Audio CD commands */
-               safe_for_read(GPCMD_PLAY_CD),
-               safe_for_read(GPCMD_PLAY_AUDIO_10),
-               safe_for_read(GPCMD_PLAY_AUDIO_MSF),
-               safe_for_read(GPCMD_PLAY_AUDIO_TI),
-               safe_for_read(GPCMD_PAUSE_RESUME),
-
-               /* CD/DVD data reading */
-               safe_for_read(GPCMD_READ_BUFFER_CAPACITY),
-               safe_for_read(GPCMD_READ_CD),
-               safe_for_read(GPCMD_READ_CD_MSF),
-               safe_for_read(GPCMD_READ_DISC_INFO),
-               safe_for_read(GPCMD_READ_CDVD_CAPACITY),
-               safe_for_read(GPCMD_READ_DVD_STRUCTURE),
-               safe_for_read(GPCMD_READ_HEADER),
-               safe_for_read(GPCMD_READ_TRACK_RZONE_INFO),
-               safe_for_read(GPCMD_READ_SUBCHANNEL),
-               safe_for_read(GPCMD_READ_TOC_PMA_ATIP),
-               safe_for_read(GPCMD_REPORT_KEY),
-               safe_for_read(GPCMD_SCAN),
-               safe_for_read(GPCMD_GET_CONFIGURATION),
-               safe_for_read(GPCMD_READ_FORMAT_CAPACITIES),
-               safe_for_read(GPCMD_GET_EVENT_STATUS_NOTIFICATION),
-               safe_for_read(GPCMD_GET_PERFORMANCE),
-               safe_for_read(GPCMD_SEEK),
-               safe_for_read(GPCMD_STOP_PLAY_SCAN),
-
-               /* Basic writing commands */
-               safe_for_write(WRITE_6),
-               safe_for_write(WRITE_10),
-               safe_for_write(WRITE_VERIFY),
-               safe_for_write(WRITE_12),
-               safe_for_write(WRITE_VERIFY_12),
-               safe_for_write(WRITE_16),
-               safe_for_write(WRITE_LONG),
-               safe_for_write(WRITE_LONG_2),
-               safe_for_write(ERASE),
-               safe_for_write(GPCMD_MODE_SELECT_10),
-               safe_for_write(MODE_SELECT),
-               safe_for_write(LOG_SELECT),
-               safe_for_write(GPCMD_BLANK),
-               safe_for_write(GPCMD_CLOSE_TRACK),
-               safe_for_write(GPCMD_FLUSH_CACHE),
-               safe_for_write(GPCMD_FORMAT_UNIT),
-               safe_for_write(GPCMD_REPAIR_RZONE_TRACK),
-               safe_for_write(GPCMD_RESERVE_RZONE_TRACK),
-               safe_for_write(GPCMD_SEND_DVD_STRUCTURE),
-               safe_for_write(GPCMD_SEND_EVENT),
-               safe_for_write(GPCMD_SEND_KEY),
-               safe_for_write(GPCMD_SEND_OPC),
-               safe_for_write(GPCMD_SEND_CUE_SHEET),
-               safe_for_write(GPCMD_SET_SPEED),
-               safe_for_write(GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL),
-               safe_for_write(GPCMD_LOAD_UNLOAD),
-               safe_for_write(GPCMD_SET_STREAMING),
-       };
-       unsigned char type = cmd_type[cmd[0]];
-
-       /* Anybody who can open the device can do a read-safe command */
-       if (type & CMD_READ_SAFE)
-               return 0;
-
-       /* Write-safe commands just require a writable open.. */
-       if ((type & CMD_WRITE_SAFE) && has_write_perm)
-               return 0;
-
-       /* And root can do any command.. */
-       if (capable(CAP_SYS_RAWIO))
-               return 0;
-
-       if (!type) {
-               cmd_type[cmd[0]] = CMD_WARNED;
-               printk(KERN_WARNING "scsi: unknown opcode 0x%02x\n", cmd[0]);
-       }
-
-       /* Otherwise fail it with an "Operation not permitted" */
-       return -EPERM;
-}
-EXPORT_SYMBOL_GPL(blk_verify_command);
-
 static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq,
-                            struct sg_io_hdr *hdr, int has_write_perm)
+                            struct sg_io_hdr *hdr, struct file *file)
 {
        if (copy_from_user(rq->cmd, hdr->cmdp, hdr->cmd_len))
                return -EFAULT;
-       if (blk_verify_command(rq->cmd, has_write_perm))
+       if (blk_verify_command(file, rq->cmd))
                return -EPERM;
 
        /*
@@ -287,7 +179,7 @@ static int sg_io(struct file *file, struct request_queue *q,
                struct gendisk *bd_disk, struct sg_io_hdr *hdr)
 {
        unsigned long start_time;
-       int writing = 0, ret = 0, has_write_perm = 0;
+       int writing = 0, ret = 0;
        struct request *rq;
        char sense[SCSI_SENSE_BUFFERSIZE];
        struct bio *bio;
@@ -316,10 +208,7 @@ static int sg_io(struct file *file, struct request_queue *q,
        if (!rq)
                return -ENOMEM;
 
-       if (file)
-               has_write_perm = file->f_mode & FMODE_WRITE;
-
-       if (blk_fill_sghdr_rq(q, rq, hdr, has_write_perm)) {
+       if (blk_fill_sghdr_rq(q, rq, hdr, file)) {
                blk_put_request(rq);
                return -EFAULT;
        }
@@ -451,7 +340,7 @@ int sg_scsi_ioctl(struct file *file, struct request_queue *q,
        if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len))
                goto error;
 
-       err = blk_verify_command(rq->cmd, file->f_mode & FMODE_WRITE);
+       err = blk_verify_command(file, rq->cmd);
        if (err)
                goto error;
 
index 57a4364..499ccc6 100644 (file)
@@ -885,7 +885,8 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
                /* set the min alignment and padding */
                blk_queue_update_dma_alignment(sdev->request_queue,
                                               ATA_DMA_PAD_SZ - 1);
-               blk_queue_dma_pad(sdev->request_queue, ATA_DMA_PAD_SZ - 1);
+               blk_queue_update_dma_pad(sdev->request_queue,
+                                        ATA_DMA_PAD_SZ - 1);
 
                /* configure draining */
                buf = kmalloc(ATAPI_MAX_DRAIN, q->bounce_gfp | GFP_KERNEL);
index cd03473..a002a38 100644 (file)
@@ -6628,15 +6628,18 @@ static void DAC960_DestroyProcEntries(DAC960_Controller_T *Controller)
  * DAC960_gam_ioctl is the ioctl function for performing RAID operations.
 */
 
-static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
-                           unsigned int Request, unsigned long Argument)
+static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
+                                               unsigned long Argument)
 {
-  int ErrorCode = 0;
+  long ErrorCode = 0;
   if (!capable(CAP_SYS_ADMIN)) return -EACCES;
+
+  lock_kernel();
   switch (Request)
     {
     case DAC960_IOCTL_GET_CONTROLLER_COUNT:
-      return DAC960_ControllerCount;
+      ErrorCode = DAC960_ControllerCount;
+      break;
     case DAC960_IOCTL_GET_CONTROLLER_INFO:
       {
        DAC960_ControllerInfo_T __user *UserSpaceControllerInfo =
@@ -6644,15 +6647,20 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
        DAC960_ControllerInfo_T ControllerInfo;
        DAC960_Controller_T *Controller;
        int ControllerNumber;
-       if (UserSpaceControllerInfo == NULL) return -EINVAL;
-       ErrorCode = get_user(ControllerNumber,
+       if (UserSpaceControllerInfo == NULL)
+               ErrorCode = -EINVAL;
+       else ErrorCode = get_user(ControllerNumber,
                             &UserSpaceControllerInfo->ControllerNumber);
-       if (ErrorCode != 0) return ErrorCode;
+       if (ErrorCode != 0)
+               break;;
+       ErrorCode = -ENXIO;
        if (ControllerNumber < 0 ||
-           ControllerNumber > DAC960_ControllerCount - 1)
-         return -ENXIO;
+           ControllerNumber > DAC960_ControllerCount - 1) {
+         break;
+       }
        Controller = DAC960_Controllers[ControllerNumber];
-       if (Controller == NULL) return -ENXIO;
+       if (Controller == NULL)
+               break;;
        memset(&ControllerInfo, 0, sizeof(DAC960_ControllerInfo_T));
        ControllerInfo.ControllerNumber = ControllerNumber;
        ControllerInfo.FirmwareType = Controller->FirmwareType;
@@ -6665,8 +6673,9 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
        ControllerInfo.PCI_Address = Controller->PCI_Address;
        strcpy(ControllerInfo.ModelName, Controller->ModelName);
        strcpy(ControllerInfo.FirmwareVersion, Controller->FirmwareVersion);
-       return (copy_to_user(UserSpaceControllerInfo, &ControllerInfo,
+       ErrorCode = (copy_to_user(UserSpaceControllerInfo, &ControllerInfo,
                             sizeof(DAC960_ControllerInfo_T)) ? -EFAULT : 0);
+       break;
       }
     case DAC960_IOCTL_V1_EXECUTE_COMMAND:
       {
@@ -6684,30 +6693,39 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
        int ControllerNumber, DataTransferLength;
        unsigned char *DataTransferBuffer = NULL;
        dma_addr_t DataTransferBufferDMA;
-       if (UserSpaceUserCommand == NULL) return -EINVAL;
+       if (UserSpaceUserCommand == NULL) {
+               ErrorCode = -EINVAL;
+               break;
+       }
        if (copy_from_user(&UserCommand, UserSpaceUserCommand,
                                   sizeof(DAC960_V1_UserCommand_T))) {
                ErrorCode = -EFAULT;
-               goto Failure1a;
+               break;
        }
        ControllerNumber = UserCommand.ControllerNumber;
+       ErrorCode = -ENXIO;
        if (ControllerNumber < 0 ||
            ControllerNumber > DAC960_ControllerCount - 1)
-         return -ENXIO;
+               break;
        Controller = DAC960_Controllers[ControllerNumber];
-       if (Controller == NULL) return -ENXIO;
-       if (Controller->FirmwareType != DAC960_V1_Controller) return -EINVAL;
+       if (Controller == NULL)
+               break;
+       ErrorCode = -EINVAL;
+       if (Controller->FirmwareType != DAC960_V1_Controller)
+               break;
        CommandOpcode = UserCommand.CommandMailbox.Common.CommandOpcode;
        DataTransferLength = UserCommand.DataTransferLength;
-       if (CommandOpcode & 0x80) return -EINVAL;
+       if (CommandOpcode & 0x80)
+               break;
        if (CommandOpcode == DAC960_V1_DCDB)
          {
            if (copy_from_user(&DCDB, UserCommand.DCDB,
                               sizeof(DAC960_V1_DCDB_T))) {
                ErrorCode = -EFAULT;
-               goto Failure1a;
+               break;
            }
-           if (DCDB.Channel >= DAC960_V1_MaxChannels) return -EINVAL;
+           if (DCDB.Channel >= DAC960_V1_MaxChannels)
+                       break;
            if (!((DataTransferLength == 0 &&
                   DCDB.Direction
                   == DAC960_V1_DCDB_NoDataTransfer) ||
@@ -6717,38 +6735,37 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
                  (DataTransferLength < 0 &&
                   DCDB.Direction
                   == DAC960_V1_DCDB_DataTransferSystemToDevice)))
-             return -EINVAL;
+                       break;
            if (((DCDB.TransferLengthHigh4 << 16) | DCDB.TransferLength)
                != abs(DataTransferLength))
-             return -EINVAL;
+                       break;
            DCDB_IOBUF = pci_alloc_consistent(Controller->PCIDevice,
                        sizeof(DAC960_V1_DCDB_T), &DCDB_IOBUFDMA);
-           if (DCDB_IOBUF == NULL)
-                       return -ENOMEM;
+           if (DCDB_IOBUF == NULL) {
+                       ErrorCode = -ENOMEM;
+                       break;
+               }
          }
+       ErrorCode = -ENOMEM;
        if (DataTransferLength > 0)
          {
            DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
                                DataTransferLength, &DataTransferBufferDMA);
-           if (DataTransferBuffer == NULL) {
-               ErrorCode = -ENOMEM;
-               goto Failure1;
-           }
+           if (DataTransferBuffer == NULL)
+               break;
            memset(DataTransferBuffer, 0, DataTransferLength);
          }
        else if (DataTransferLength < 0)
          {
            DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
                                -DataTransferLength, &DataTransferBufferDMA);
-           if (DataTransferBuffer == NULL) {
-               ErrorCode = -ENOMEM;
-               goto Failure1;
-           }
+           if (DataTransferBuffer == NULL)
+               break;
            if (copy_from_user(DataTransferBuffer,
                               UserCommand.DataTransferBuffer,
                               -DataTransferLength)) {
                ErrorCode = -EFAULT;
-               goto Failure1;
+               break;
            }
          }
        if (CommandOpcode == DAC960_V1_DCDB)
@@ -6825,8 +6842,7 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
        if (DCDB_IOBUF != NULL)
          pci_free_consistent(Controller->PCIDevice, sizeof(DAC960_V1_DCDB_T),
                        DCDB_IOBUF, DCDB_IOBUFDMA);
-      Failure1a:
-       return ErrorCode;
+       break;
       }
     case DAC960_IOCTL_V2_EXECUTE_COMMAND:
       {
@@ -6844,32 +6860,43 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
        dma_addr_t DataTransferBufferDMA;
        unsigned char *RequestSenseBuffer = NULL;
        dma_addr_t RequestSenseBufferDMA;
-       if (UserSpaceUserCommand == NULL) return -EINVAL;
+
+       ErrorCode = -EINVAL;
+       if (UserSpaceUserCommand == NULL)
+               break;
        if (copy_from_user(&UserCommand, UserSpaceUserCommand,
                           sizeof(DAC960_V2_UserCommand_T))) {
                ErrorCode = -EFAULT;
-               goto Failure2a;
+               break;
        }
+       ErrorCode = -ENXIO;
        ControllerNumber = UserCommand.ControllerNumber;
        if (ControllerNumber < 0 ||
            ControllerNumber > DAC960_ControllerCount - 1)
-         return -ENXIO;
+               break;
        Controller = DAC960_Controllers[ControllerNumber];
-       if (Controller == NULL) return -ENXIO;
-       if (Controller->FirmwareType != DAC960_V2_Controller) return -EINVAL;
+       if (Controller == NULL)
+               break;
+       if (Controller->FirmwareType != DAC960_V2_Controller){
+               ErrorCode = -EINVAL;
+               break;
+       }
        DataTransferLength = UserCommand.DataTransferLength;
+       ErrorCode = -ENOMEM;
        if (DataTransferLength > 0)
          {
            DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
                                DataTransferLength, &DataTransferBufferDMA);
-           if (DataTransferBuffer == NULL) return -ENOMEM;
+           if (DataTransferBuffer == NULL)
+               break;
            memset(DataTransferBuffer, 0, DataTransferLength);
          }
        else if (DataTransferLength < 0)
          {
            DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
                                -DataTransferLength, &DataTransferBufferDMA);
-           if (DataTransferBuffer == NULL) return -ENOMEM;
+           if (DataTransferBuffer == NULL)
+               break;
            if (copy_from_user(DataTransferBuffer,
                               UserCommand.DataTransferBuffer,
                               -DataTransferLength)) {
@@ -6979,8 +7006,7 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
        if (RequestSenseBuffer != NULL)
          pci_free_consistent(Controller->PCIDevice, RequestSenseLength,
                RequestSenseBuffer, RequestSenseBufferDMA);
-      Failure2a:
-       return ErrorCode;
+        break;
       }
     case DAC960_IOCTL_V2_GET_HEALTH_STATUS:
       {
@@ -6990,21 +7016,33 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
        DAC960_V2_HealthStatusBuffer_T HealthStatusBuffer;
        DAC960_Controller_T *Controller;
        int ControllerNumber;
-       if (UserSpaceGetHealthStatus == NULL) return -EINVAL;
+       if (UserSpaceGetHealthStatus == NULL) {
+               ErrorCode = -EINVAL;
+               break;
+       }
        if (copy_from_user(&GetHealthStatus, UserSpaceGetHealthStatus,
-                          sizeof(DAC960_V2_GetHealthStatus_T)))
-               return -EFAULT;
+                          sizeof(DAC960_V2_GetHealthStatus_T))) {
+               ErrorCode = -EFAULT;
+               break;
+       }
+       ErrorCode = -ENXIO;
        ControllerNumber = GetHealthStatus.ControllerNumber;
        if (ControllerNumber < 0 ||
            ControllerNumber > DAC960_ControllerCount - 1)
-         return -ENXIO;
+                   break;
        Controller = DAC960_Controllers[ControllerNumber];
-       if (Controller == NULL) return -ENXIO;
-       if (Controller->FirmwareType != DAC960_V2_Controller) return -EINVAL;
+       if (Controller == NULL)
+               break;
+       if (Controller->FirmwareType != DAC960_V2_Controller) {
+               ErrorCode = -EINVAL;
+               break;
+       }
        if (copy_from_user(&HealthStatusBuffer,
                           GetHealthStatus.HealthStatusBuffer,
-                          sizeof(DAC960_V2_HealthStatusBuffer_T)))
-               return -EFAULT;
+                          sizeof(DAC960_V2_HealthStatusBuffer_T))) {
+               ErrorCode = -EFAULT;
+               break;
+       }
        while (Controller->V2.HealthStatusBuffer->StatusChangeCounter
               == HealthStatusBuffer.StatusChangeCounter &&
               Controller->V2.HealthStatusBuffer->NextEventSequenceNumber
@@ -7012,21 +7050,28 @@ static int DAC960_gam_ioctl(struct inode *inode, struct file *file,
          {
            interruptible_sleep_on_timeout(&Controller->HealthStatusWaitQueue,
                                           DAC960_MonitoringTimerInterval);
-           if (signal_pending(current)) return -EINTR;
+           if (signal_pending(current)) {
+               ErrorCode = -EINTR;
+               break;
+           }
          }
        if (copy_to_user(GetHealthStatus.HealthStatusBuffer,
                         Controller->V2.HealthStatusBuffer,
                         sizeof(DAC960_V2_HealthStatusBuffer_T)))
-               return -EFAULT;
-       return 0;
+               ErrorCode = -EFAULT;
+       else
+               ErrorCode =  0;
       }
+      default:
+       ErrorCode = -ENOTTY;
     }
-  return -EINVAL;
+  unlock_kernel();
+  return ErrorCode;
 }
 
 static const struct file_operations DAC960_gam_fops = {
        .owner          = THIS_MODULE,
-       .ioctl          = DAC960_gam_ioctl
+       .unlocked_ioctl = DAC960_gam_ioctl
 };
 
 static struct miscdevice DAC960_gam_dev = {
index 41f818b..2f17462 100644 (file)
@@ -1003,7 +1003,7 @@ aoecmd_cfg_rsp(struct sk_buff *skb)
         * Enough people have their dip switches set backwards to
         * warrant a loud message for this special case.
         */
-       aoemajor = be16_to_cpu(get_unaligned(&h->major));
+       aoemajor = get_unaligned_be16(&h->major);
        if (aoemajor == 0xfff) {
                printk(KERN_ERR "aoe: Warning: shelf address is all ones.  "
                        "Check shelf dip switches.\n");
index 8b9549a..27455ee 100644 (file)
@@ -146,6 +146,7 @@ static int (*drives[4])[6] = {&drive0, &drive1, &drive2, &drive3};
 #include <linux/mtio.h>
 #include <linux/device.h>
 #include <linux/sched.h>       /* current, TASK_*, schedule_timeout() */
+#include <linux/smp_lock.h>
 
 #include <asm/uaccess.h>
 
@@ -189,8 +190,7 @@ module_param_array(drive3, int, NULL, 0);
 #define ATAPI_LOG_SENSE                0x4d
 
 static int pt_open(struct inode *inode, struct file *file);
-static int pt_ioctl(struct inode *inode, struct file *file,
-                   unsigned int cmd, unsigned long arg);
+static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 static int pt_release(struct inode *inode, struct file *file);
 static ssize_t pt_read(struct file *filp, char __user *buf,
                       size_t count, loff_t * ppos);
@@ -236,7 +236,7 @@ static const struct file_operations pt_fops = {
        .owner = THIS_MODULE,
        .read = pt_read,
        .write = pt_write,
-       .ioctl = pt_ioctl,
+       .unlocked_ioctl = pt_ioctl,
        .open = pt_open,
        .release = pt_release,
 };
@@ -685,8 +685,7 @@ out:
        return err;
 }
 
-static int pt_ioctl(struct inode *inode, struct file *file,
-        unsigned int cmd, unsigned long arg)
+static long pt_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
        struct pt_unit *tape = file->private_data;
        struct mtop __user *p = (void __user *)arg;
@@ -700,23 +699,26 @@ static int pt_ioctl(struct inode *inode, struct file *file,
                switch (mtop.mt_op) {
 
                case MTREW: