Merge git://git.kernel.org/pub/scm/linux/kernel/git/bart/ide-2.6
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 27 Jan 2008 06:54:32 +0000 (22:54 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 27 Jan 2008 06:54:32 +0000 (22:54 -0800)
* git://git.kernel.org/pub/scm/linux/kernel/git/bart/ide-2.6: (63 commits)
  ide: remove REQ_TYPE_ATA_CMD
  ide: switch ide_cmd_ioctl() to use REQ_TYPE_ATA_TASKFILE requests
  ide: switch set_xfer_rate() to use REQ_TYPE_ATA_TASKFILE requests
  ide: fix final status check in drive_cmd_intr()
  ide: check BUSY and ERROR status bits before reading data in drive_cmd_intr()
  ide: don't enable local IRQs for PIO-in in driver_cmd_intr() (take 2)
  ide: convert "empty" REQ_TYPE_ATA_CMD requests to use REQ_TYPE_ATA_TASKFILE
  ide: initialize rq->cmd_type in ide_init_drive_cmd() callers
  ide: use wait_drive_not_busy() in drive_cmd_intr() (take 2)
  ide: kill DATA_READY define
  ide: task_end_request() fix
  ide: use rq->nr_sectors in task_end_request()
  ide: remove needless ->cursg clearing from task_end_request()
  ide: set IDE_TFLAG_IN_* flags before queuing/executing command
  ide-tape: fix handling of non-special requests in ->end_request method
  ide: fix final status check in task_in_intr()
  ide: clear HOB bit for REQ_TYPE_ATA_CMD requests in ide_end_drive_cmd()
  ide: fix ->io_32bit race in ide_taskfile_ioctl()
  cmd64x: remove /proc/ide/cmd64x
  ide: remove broken disk byte-swapping support
  ...

106 files changed:
Documentation/DocBook/s390-drivers.tmpl
Documentation/cpu-hotplug.txt
Documentation/kernel-parameters.txt
Documentation/s390/CommonIO
arch/s390/Kconfig
arch/s390/crypto/Kconfig [deleted file]
arch/s390/crypto/aes_s390.c
arch/s390/crypto/prng.c
arch/s390/kernel/Makefile
arch/s390/kernel/early.c
arch/s390/kernel/head64.S
arch/s390/kernel/ipl.c
arch/s390/kernel/process.c
arch/s390/kernel/ptrace.c
arch/s390/kernel/setup.c
arch/s390/kernel/signal.c
arch/s390/kernel/smp.c
arch/s390/kernel/traps.c
arch/s390/kernel/vmlinux.lds.S
arch/s390/lib/spinlock.c
arch/s390/mm/extmem.c
arch/s390/mm/vmem.c
drivers/crypto/Kconfig
drivers/s390/block/Makefile
drivers/s390/block/dasd.c
drivers/s390/block/dasd_3370_erp.c [deleted file]
drivers/s390/block/dasd_3990_erp.c
drivers/s390/block/dasd_9336_erp.c [deleted file]
drivers/s390/block/dasd_9343_erp.c [deleted file]
drivers/s390/block/dasd_alias.c [new file with mode: 0644]
drivers/s390/block/dasd_devmap.c
drivers/s390/block/dasd_diag.c
drivers/s390/block/dasd_eckd.c
drivers/s390/block/dasd_eckd.h
drivers/s390/block/dasd_eer.c
drivers/s390/block/dasd_erp.c
drivers/s390/block/dasd_fba.c
drivers/s390/block/dasd_genhd.c
drivers/s390/block/dasd_int.h
drivers/s390/block/dasd_ioctl.c
drivers/s390/block/dasd_proc.c
drivers/s390/block/dcssblk.c
drivers/s390/char/Makefile
drivers/s390/char/monwriter.c
drivers/s390/char/raw3270.c
drivers/s390/char/sclp.h
drivers/s390/char/sclp_chp.c [deleted file]
drivers/s390/char/sclp_cmd.c [new file with mode: 0644]
drivers/s390/char/sclp_cpi.c
drivers/s390/char/sclp_cpi_sys.c [new file with mode: 0644]
drivers/s390/char/sclp_cpi_sys.h [new file with mode: 0644]
drivers/s390/char/sclp_info.c [deleted file]
drivers/s390/char/sclp_rw.c
drivers/s390/char/tape_3590.c
drivers/s390/char/tape_core.c
drivers/s390/char/tape_proc.c
drivers/s390/char/vmlogrdr.c
drivers/s390/char/vmur.c
drivers/s390/char/zcore.c
drivers/s390/cio/airq.c
drivers/s390/cio/airq.h [deleted file]
drivers/s390/cio/blacklist.c
drivers/s390/cio/ccwgroup.c
drivers/s390/cio/chsc.c
drivers/s390/cio/cio.c
drivers/s390/cio/cio.h
drivers/s390/cio/cio_debug.h
drivers/s390/cio/css.c
drivers/s390/cio/css.h
drivers/s390/cio/device.c
drivers/s390/cio/device.h
drivers/s390/cio/device_fsm.c
drivers/s390/cio/device_id.c
drivers/s390/cio/device_ops.c
drivers/s390/cio/device_pgid.c
drivers/s390/cio/device_status.c
drivers/s390/cio/io_sch.h [new file with mode: 0644]
drivers/s390/cio/ioasm.h
drivers/s390/cio/qdio.c
drivers/s390/cio/qdio.h
drivers/s390/net/claw.c
drivers/s390/net/lcs.c
drivers/s390/net/netiucv.c
drivers/s390/net/qeth_proc.c
drivers/s390/net/smsgiucv.c
drivers/s390/scsi/zfcp_erp.c
drivers/s390/scsi/zfcp_fsf.c
drivers/s390/scsi/zfcp_qdio.c
include/asm-s390/airq.h [new file with mode: 0644]
include/asm-s390/cio.h
include/asm-s390/dasd.h
include/asm-s390/ipl.h
include/asm-s390/mmu_context.h
include/asm-s390/pgtable.h
include/asm-s390/processor.h
include/asm-s390/ptrace.h
include/asm-s390/qdio.h
include/asm-s390/rwsem.h
include/asm-s390/sclp.h
include/asm-s390/smp.h
include/asm-s390/spinlock.h
include/asm-s390/spinlock_types.h
include/asm-s390/tlbflush.h
include/asm-s390/zcrypt.h
kernel/sysctl_check.c
security/selinux/ss/services.c

index 254e769..3d2f31b 100644 (file)
 !Iinclude/asm-s390/ccwdev.h
 !Edrivers/s390/cio/device.c
 !Edrivers/s390/cio/device_ops.c
+!Edrivers/s390/cio/airq.c
     </sect1>
     <sect1 id="cmf">
      <title>The channel-measurement facility</title>
index fb94f5a..ba0aacd 100644 (file)
@@ -50,7 +50,7 @@ additional_cpus=n (*) Use this to limit hotpluggable cpus. This option sets
                        cpu_possible_map = cpu_present_map + additional_cpus
 
 (*) Option valid only for following architectures
-- x86_64, ia64, s390
+- x86_64, ia64
 
 ia64 and x86_64 use the number of disabled local apics in ACPI tables MADT
 to determine the number of potentially hot-pluggable cpus. The implementation
index 65de5ba..880f882 100644 (file)
@@ -370,7 +370,8 @@ and is between 256 and 4096 characters. It is defined in the file
                        configured.  Potentially dangerous and should only be
                        used if you are entirely sure of the consequences.
 
-       chandev=        [HW,NET] Generic channel device initialisation
+       ccw_timeout_log [S390]
+                       See Documentation/s390/CommonIO for details.
 
        checkreqprot    [SELINUX] Set initial checkreqprot flag value.
                        Format: { "0" | "1" }
@@ -382,6 +383,12 @@ and is between 256 and 4096 characters. It is defined in the file
                        Value can be changed at runtime via
                                /selinux/checkreqprot.
 
+       cio_ignore=     [S390]
+                       See Documentation/s390/CommonIO for details.
+
+       cio_msg=        [S390]
+                       See Documentation/s390/CommonIO for details.
+
        clock=          [BUGS=X86-32, HW] gettimeofday clocksource override.
                        [Deprecated]
                        Forces specified clocksource (if available) to be used
index 86320aa..8fbc0a8 100644 (file)
@@ -4,6 +4,11 @@ S/390 common I/O-Layer - command line parameters, procfs and debugfs entries
 Command line parameters
 -----------------------
 
+* ccw_timeout_log
+
+  Enable logging of debug information in case of ccw device timeouts.
+
+
 * cio_msg = yes | no
   
   Determines whether information on found devices and sensed device 
index 1330061..6ef54d2 100644 (file)
@@ -276,9 +276,6 @@ source "kernel/Kconfig.preempt"
 
 source "mm/Kconfig"
 
-config HOLES_IN_ZONE
-       def_bool y
-
 comment "I/O subsystem configuration"
 
 config MACHCHK_WARNING
diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig
deleted file mode 100644 (file)
index d1defbb..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-config CRYPTO_SHA1_S390
-       tristate "SHA1 digest algorithm"
-       depends on S390
-       select CRYPTO_ALGAPI
-       help
-         This is the s390 hardware accelerated implementation of the
-         SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
-
-config CRYPTO_SHA256_S390
-       tristate "SHA256 digest algorithm"
-       depends on S390
-       select CRYPTO_ALGAPI
-       help
-         This is the s390 hardware accelerated implementation of the
-         SHA256 secure hash standard (DFIPS 180-2).
-
-         This version of SHA implements a 256 bit hash with 128 bits of
-         security against collision attacks.
-
-config CRYPTO_DES_S390
-       tristate "DES and Triple DES cipher algorithms"
-       depends on S390
-       select CRYPTO_ALGAPI
-       select CRYPTO_BLKCIPHER
-       help
-         This us the s390 hardware accelerated implementation of the
-         DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
-
-config CRYPTO_AES_S390
-       tristate "AES cipher algorithms"
-       depends on S390
-       select CRYPTO_ALGAPI
-       select CRYPTO_BLKCIPHER
-       help
-         This is the s390 hardware accelerated implementation of the
-         AES cipher algorithms (FIPS-197). AES uses the Rijndael
-         algorithm.
-
-         Rijndael appears to be consistently a very good performer in
-         both hardware and software across a wide range of computing
-         environments regardless of its use in feedback or non-feedback
-         modes. Its key setup time is excellent, and its key agility is
-         good. Rijndael's very low memory requirements make it very well
-         suited for restricted-space environments, in which it also
-         demonstrates excellent performance. Rijndael's operations are
-         among the easiest to defend against power and timing attacks.
-
-         On s390 the System z9-109 currently only supports the key size
-         of 128 bit.
-
-config S390_PRNG
-       tristate "Pseudo random number generator device driver"
-       depends on S390
-       default "m"
-       help
-         Select this option if you want to use the s390 pseudo random number
-         generator. The PRNG is part of the cryptographic processor functions
-         and uses triple-DES to generate secure random numbers like the
-         ANSI X9.17 standard. The PRNG is usable via the char device
-         /dev/prandom.
index 46c9705..a3f67f8 100644 (file)
@@ -516,7 +516,7 @@ static int __init aes_init(void)
        /* z9 109 and z9 BC/EC only support 128 bit key length */
        if (keylen_flag == AES_KEYLEN_128)
                printk(KERN_INFO
-                      "aes_s390: hardware acceleration only available for"
+                      "aes_s390: hardware acceleration only available for "
                       "128 bit keys\n");
 
        ret = crypto_register_alg(&aes_alg);
index 8eb3a1a..0cfefdd 100644 (file)
@@ -90,7 +90,7 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
        int ret = 0;
        int tmp;
 
-       /* nbytes can be arbitrary long, we spilt it into chunks */
+       /* nbytes can be arbitrary length, we split it into chunks */
        while (nbytes) {
                /* same as in extract_entropy_user in random.c */
                if (need_resched()) {
@@ -146,7 +146,7 @@ static ssize_t prng_read(struct file *file, char __user *ubuf, size_t nbytes,
        return ret;
 }
 
-static struct file_operations prng_fops = {
+static const struct file_operations prng_fops = {
        .owner          = THIS_MODULE,
        .open           = &prng_open,
        .release        = NULL,
index 56cb710..b3b650a 100644 (file)
@@ -31,7 +31,3 @@ S390_KEXEC_OBJS := machine_kexec.o crash.o
 S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o)
 obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS)
 
-#
-# This is just to get the dependencies...
-#
-binfmt_elf32.o:        $(TOPDIR)/fs/binfmt_elf.c
index 1b3af7d..9f7b73b 100644 (file)
@@ -276,7 +276,7 @@ void __init startup_init(void)
        create_kernel_nss();
        sort_main_extable();
        setup_lowcore_early();
-       sclp_readinfo_early();
+       sclp_read_info_early();
        sclp_facilities_detect();
        memsize = sclp_memory_detect();
 #ifndef CONFIG_64BIT
index a87b197..79dccd2 100644 (file)
@@ -157,7 +157,7 @@ startup_continue:
        .long   0xb2b10000              # store facility list
        tm      0xc8,0x08               # check bit for clearing-by-ASCE
        bno     0f-.LPG1(%r13)
-       lhi     %r1,2094
+       lhi     %r1,2048
        lhi     %r2,0
        .long   0xb98e2001
        oi      7(%r12),0x80            # set IDTE flag
index b97694f..db28cca 100644 (file)
@@ -2,7 +2,7 @@
  *  arch/s390/kernel/ipl.c
  *    ipl/reipl/dump support for Linux on s390.
  *
- *    Copyright (C) IBM Corp. 2005,2006
+ *    Copyright IBM Corp. 2005,2007
  *    Author(s): Michael Holzheu <holzheu@de.ibm.com>
  *              Heiko Carstens <heiko.carstens@de.ibm.com>
  *              Volker Sameske <sameske@de.ibm.com>
 #define IPL_FCP_DUMP_STR       "fcp_dump"
 #define IPL_NSS_STR            "nss"
 
+#define DUMP_CCW_STR           "ccw"
+#define DUMP_FCP_STR           "fcp"
+#define DUMP_NONE_STR          "none"
+
+/*
+ * Four shutdown trigger types are supported:
+ * - panic
+ * - halt
+ * - power off
+ * - reipl
+ */
+#define ON_PANIC_STR           "on_panic"
+#define ON_HALT_STR            "on_halt"
+#define ON_POFF_STR            "on_poff"
+#define ON_REIPL_STR           "on_reboot"
+
+struct shutdown_action;
+struct shutdown_trigger {
+       char *name;
+       struct shutdown_action *action;
+};
+
+/*
+ * Five shutdown action types are supported:
+ */
+#define SHUTDOWN_ACTION_IPL_STR                "ipl"
+#define SHUTDOWN_ACTION_REIPL_STR      "reipl"
+#define SHUTDOWN_ACTION_DUMP_STR       "dump"
+#define SHUTDOWN_ACTION_VMCMD_STR      "vmcmd"
+#define SHUTDOWN_ACTION_STOP_STR       "stop"
+
+struct shutdown_action {
+       char *name;
+       void (*fn) (struct shutdown_trigger *trigger);
+       int (*init) (void);
+};
+
 static char *ipl_type_str(enum ipl_type type)
 {
        switch (type) {
@@ -54,10 +91,6 @@ enum dump_type {
        DUMP_TYPE_FCP   = 4,
 };
 
-#define DUMP_NONE_STR   "none"
-#define DUMP_CCW_STR    "ccw"
-#define DUMP_FCP_STR    "fcp"
-
 static char *dump_type_str(enum dump_type type)
 {
        switch (type) {
@@ -99,30 +132,6 @@ enum dump_method {
        DUMP_METHOD_FCP_DIAG,
 };
 
-enum shutdown_action {
-       SHUTDOWN_REIPL,
-       SHUTDOWN_DUMP,
-       SHUTDOWN_STOP,
-};
-
-#define SHUTDOWN_REIPL_STR "reipl"
-#define SHUTDOWN_DUMP_STR  "dump"
-#define SHUTDOWN_STOP_STR  "stop"
-
-static char *shutdown_action_str(enum shutdown_action action)
-{
-       switch (action) {
-       case SHUTDOWN_REIPL:
-               return SHUTDOWN_REIPL_STR;
-       case SHUTDOWN_DUMP:
-               return SHUTDOWN_DUMP_STR;
-       case SHUTDOWN_STOP:
-               return SHUTDOWN_STOP_STR;
-       default:
-               return NULL;
-       }
-}
-
 static int diag308_set_works = 0;
 
 static int reipl_capabilities = IPL_TYPE_UNKNOWN;
@@ -140,8 +149,6 @@ static enum dump_method dump_method = DUMP_METHOD_NONE;
 static struct ipl_parameter_block *dump_block_fcp;
 static struct ipl_parameter_block *dump_block_ccw;
 
-static enum shutdown_action on_panic_action = SHUTDOWN_STOP;
-
 static struct sclp_ipl_info sclp_ipl_info;
 
 int diag308(unsigned long subcode, void *addr)
@@ -205,8 +212,8 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj,        \
                struct kobj_attribute *attr,                            \
                const char *buf, size_t len)                            \
 {                                                                      \
-       if (sscanf(buf, _fmt_in, _value) != 1)                          \
-               return -EINVAL;                                         \
+       strncpy(_value, buf, sizeof(_value) - 1);                       \
+       strstrip(_value);                                               \
        return len;                                                     \
 }                                                                      \
 static struct kobj_attribute sys_##_prefix##_##_name##_attr =          \
@@ -245,33 +252,6 @@ static __init enum ipl_type get_ipl_type(void)
        return IPL_TYPE_FCP;
 }
 
-void __init setup_ipl_info(void)
-{
-       ipl_info.type = get_ipl_type();
-       switch (ipl_info.type) {
-       case IPL_TYPE_CCW:
-               ipl_info.data.ccw.dev_id.devno = ipl_devno;
-               ipl_info.data.ccw.dev_id.ssid = 0;
-               break;
-       case IPL_TYPE_FCP:
-       case IPL_TYPE_FCP_DUMP:
-               ipl_info.data.fcp.dev_id.devno =
-                       IPL_PARMBLOCK_START->ipl_info.fcp.devno;
-               ipl_info.data.fcp.dev_id.ssid = 0;
-               ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
-               ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
-               break;
-       case IPL_TYPE_NSS:
-               strncpy(ipl_info.data.nss.name, kernel_nss_name,
-                       sizeof(ipl_info.data.nss.name));
-               break;
-       case IPL_TYPE_UNKNOWN:
-       default:
-               /* We have no info to copy */
-               break;
-       }
-}
-
 struct ipl_info ipl_info;
 EXPORT_SYMBOL_GPL(ipl_info);
 
@@ -428,8 +408,74 @@ static struct attribute_group ipl_unknown_attr_group = {
 
 static struct kset *ipl_kset;
 
+static int __init ipl_register_fcp_files(void)
+{
+       int rc;
+
+       rc = sysfs_create_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
+       if (rc)
+               goto out;
+       rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_parameter_attr);
+       if (rc)
+               goto out_ipl_parm;
+       rc = sysfs_create_bin_file(&ipl_kset->kobj, &ipl_scp_data_attr);
+       if (!rc)
+               goto out;
+
+       sysfs_remove_bin_file(&ipl_kset->kobj, &ipl_parameter_attr);
+
+out_ipl_parm:
+       sysfs_remove_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
+out:
+       return rc;
+}
+
+static void ipl_run(struct shutdown_trigger *trigger)
+{
+       diag308(DIAG308_IPL, NULL);
+       if (MACHINE_IS_VM)
+               __cpcmd("IPL", NULL, 0, NULL);
+       else if (ipl_info.type == IPL_TYPE_CCW)
+               reipl_ccw_dev(&ipl_info.data.ccw.dev_id);
+}
+
+static int ipl_init(void)
+{
+       int rc;
+
+       ipl_kset = kset_create_and_add("ipl", NULL, firmware_kobj);
+       if (!ipl_kset) {
+               rc = -ENOMEM;
+               goto out;
+       }
+       switch (ipl_info.type) {
+       case IPL_TYPE_CCW:
+               rc = sysfs_create_group(&ipl_kset->kobj, &ipl_ccw_attr_group);
+               break;
+       case IPL_TYPE_FCP:
+       case IPL_TYPE_FCP_DUMP:
+               rc = ipl_register_fcp_files();
+               break;
+       case IPL_TYPE_NSS:
+               rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nss_attr_group);
+               break;
+       default:
+               rc = sysfs_create_group(&ipl_kset->kobj,
+                                       &ipl_unknown_attr_group);
+               break;
+       }
+out:
+       if (rc)
+               panic("ipl_init failed: rc = %i\n", rc);
+
+       return 0;
+}
+
+static struct shutdown_action ipl_action = {SHUTDOWN_ACTION_IPL_STR, ipl_run,
+                                           ipl_init};
+
 /*
- * reipl section
+ * reipl shutdown action: Reboot Linux on shutdown.
  */
 
 /* FCP reipl device attributes */
@@ -549,7 +595,9 @@ static int reipl_set_type(enum ipl_type type)
 
        switch(type) {
        case IPL_TYPE_CCW:
-               if (MACHINE_IS_VM)
+               if (diag308_set_works)
+                       reipl_method = REIPL_METHOD_CCW_DIAG;
+               else if (MACHINE_IS_VM)
                        reipl_method = REIPL_METHOD_CCW_VM;
                else
                        reipl_method = REIPL_METHOD_CCW_CIO;
@@ -600,143 +648,11 @@ static ssize_t reipl_type_store(struct kobject *kobj,
 }
 
 static struct kobj_attribute reipl_type_attr =
-               __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
+       __ATTR(reipl_type, 0644, reipl_type_show, reipl_type_store);
 
 static struct kset *reipl_kset;
 
-/*
- * dump section
- */
-
-/* FCP dump device attributes */
-
-DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n",
-                  dump_block_fcp->ipl_info.fcp.wwpn);
-DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n",
-                  dump_block_fcp->ipl_info.fcp.lun);
-DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
-                  dump_block_fcp->ipl_info.fcp.bootprog);
-DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
-                  dump_block_fcp->ipl_info.fcp.br_lba);
-DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
-                  dump_block_fcp->ipl_info.fcp.devno);
-
-static struct attribute *dump_fcp_attrs[] = {
-       &sys_dump_fcp_device_attr.attr,
-       &sys_dump_fcp_wwpn_attr.attr,
-       &sys_dump_fcp_lun_attr.attr,
-       &sys_dump_fcp_bootprog_attr.attr,
-       &sys_dump_fcp_br_lba_attr.attr,
-       NULL,
-};
-
-static struct attribute_group dump_fcp_attr_group = {
-       .name  = IPL_FCP_STR,
-       .attrs = dump_fcp_attrs,
-};
-
-/* CCW dump device attributes */
-
-DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
-                  dump_block_ccw->ipl_info.ccw.devno);
-
-static struct attribute *dump_ccw_attrs[] = {
-       &sys_dump_ccw_device_attr.attr,
-       NULL,
-};
-
-static struct attribute_group dump_ccw_attr_group = {
-       .name  = IPL_CCW_STR,
-       .attrs = dump_ccw_attrs,
-};
-
-/* dump type */
-
-static int dump_set_type(enum dump_type type)
-{
-       if (!(dump_capabilities & type))
-               return -EINVAL;
-       switch(type) {
-       case DUMP_TYPE_CCW:
-               if (MACHINE_IS_VM)
-                       dump_method = DUMP_METHOD_CCW_VM;
-               else if (diag308_set_works)
-                       dump_method = DUMP_METHOD_CCW_DIAG;
-               else
-                       dump_method = DUMP_METHOD_CCW_CIO;
-               break;
-       case DUMP_TYPE_FCP:
-               dump_method = DUMP_METHOD_FCP_DIAG;
-               break;
-       default:
-               dump_method = DUMP_METHOD_NONE;
-       }
-       dump_type = type;
-       return 0;
-}
-
-static ssize_t dump_type_show(struct kobject *kobj,
-                             struct kobj_attribute *attr, char *page)
-{
-       return sprintf(page, "%s\n", dump_type_str(dump_type));
-}
-
-static ssize_t dump_type_store(struct kobject *kobj,
-                              struct kobj_attribute *attr,
-                              const char *buf, size_t len)
-{
-       int rc = -EINVAL;
-
-       if (strncmp(buf, DUMP_NONE_STR, strlen(DUMP_NONE_STR)) == 0)
-               rc = dump_set_type(DUMP_TYPE_NONE);
-       else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
-               rc = dump_set_type(DUMP_TYPE_CCW);
-       else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
-               rc = dump_set_type(DUMP_TYPE_FCP);
-       return (rc != 0) ? rc : len;
-}
-
-static struct kobj_attribute dump_type_attr =
-               __ATTR(dump_type, 0644, dump_type_show, dump_type_store);
-
-static struct kset *dump_kset;
-
-/*
- * Shutdown actions section
- */
-
-static struct kset *shutdown_actions_kset;
-
-/* on panic */
-
-static ssize_t on_panic_show(struct kobject *kobj,
-                            struct kobj_attribute *attr, char *page)
-{
-       return sprintf(page, "%s\n", shutdown_action_str(on_panic_action));
-}
-
-static ssize_t on_panic_store(struct kobject *kobj,
-                             struct kobj_attribute *attr,
-                             const char *buf, size_t len)
-{
-       if (strncmp(buf, SHUTDOWN_REIPL_STR, strlen(SHUTDOWN_REIPL_STR)) == 0)
-               on_panic_action = SHUTDOWN_REIPL;
-       else if (strncmp(buf, SHUTDOWN_DUMP_STR,
-                        strlen(SHUTDOWN_DUMP_STR)) == 0)
-               on_panic_action = SHUTDOWN_DUMP;
-       else if (strncmp(buf, SHUTDOWN_STOP_STR,
-                        strlen(SHUTDOWN_STOP_STR)) == 0)
-               on_panic_action = SHUTDOWN_STOP;
-       else
-               return -EINVAL;
-
-       return len;
-}
-
-static struct kobj_attribute on_panic_attr =
-               __ATTR(on_panic, 0644, on_panic_show, on_panic_store);
-
-void do_reipl(void)
+void reipl_run(struct shutdown_trigger *trigger)
 {
        struct ccw_dev_id devid;
        static char buf[100];
@@ -745,8 +661,6 @@ void do_reipl(void)
        switch (reipl_method) {
        case REIPL_METHOD_CCW_CIO:
                devid.devno = reipl_block_ccw->ipl_info.ccw.devno;
-               if (ipl_info.type == IPL_TYPE_CCW && devid.devno == ipl_devno)
-                       diag308(DIAG308_IPL, NULL);
                devid.ssid  = 0;
                reipl_ccw_dev(&devid);
                break;
@@ -787,113 +701,21 @@ void do_reipl(void)
        default:
                break;
        }
-       signal_processor(smp_processor_id(), sigp_stop_and_store_status);
 }
 
-static void do_dump(void)
+static void __init reipl_probe(void)
 {
-       struct ccw_dev_id devid;
-       static char buf[100];
+       void *buffer;
 
-       switch (dump_method) {
-       case DUMP_METHOD_CCW_CIO:
-               smp_send_stop();
-               devid.devno = dump_block_ccw->ipl_info.ccw.devno;
-               devid.ssid  = 0;
-               reipl_ccw_dev(&devid);
-               break;
-       case DUMP_METHOD_CCW_VM:
-               smp_send_stop();
-               sprintf(buf, "STORE STATUS");
-               __cpcmd(buf, NULL, 0, NULL);
-               sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
-               __cpcmd(buf, NULL, 0, NULL);
-               break;
-       case DUMP_METHOD_CCW_DIAG:
-               diag308(DIAG308_SET, dump_block_ccw);
-               diag308(DIAG308_DUMP, NULL);
-               break;
-       case DUMP_METHOD_FCP_DIAG:
-               diag308(DIAG308_SET, dump_block_fcp);
-               diag308(DIAG308_DUMP, NULL);
-               break;
-       case DUMP_METHOD_NONE:
-       default:
+       buffer = (void *) get_zeroed_page(GFP_KERNEL);
+       if (!buffer)
                return;
-       }
-       printk(KERN_EMERG "Dump failed!\n");
+       if (diag308(DIAG308_STORE, buffer) == DIAG308_RC_OK)
+               diag308_set_works = 1;
+       free_page((unsigned long)buffer);
 }
 
-/* init functions */
-
-static int __init ipl_register_fcp_files(void)
-{
-       int rc;
-
-       rc = sysfs_create_group(&ipl_kset->kobj,
-                               &ipl_fcp_attr_group);
-       if (rc)
-               goto out;
-       rc = sysfs_create_bin_file(&ipl_kset->kobj,
-                                  &ipl_parameter_attr);
-       if (rc)
-               goto out_ipl_parm;
-       rc = sysfs_create_bin_file(&ipl_kset->kobj,
-                                  &ipl_scp_data_attr);
-       if (!rc)
-               goto out;
-
-       sysfs_remove_bin_file(&ipl_kset->kobj, &ipl_parameter_attr);
-
-out_ipl_parm:
-       sysfs_remove_group(&ipl_kset->kobj, &ipl_fcp_attr_group);
-out:
-       return rc;
-}
-
-static int __init ipl_init(void)
-{
-       int rc;
-
-       ipl_kset = kset_create_and_add("ipl", NULL, firmware_kobj);
-       if (!ipl_kset)
-               return -ENOMEM;
-       switch (ipl_info.type) {
-       case IPL_TYPE_CCW:
-               rc = sysfs_create_group(&ipl_kset->kobj,
-                                       &ipl_ccw_attr_group);
-               break;
-       case IPL_TYPE_FCP:
-       case IPL_TYPE_FCP_DUMP:
-               rc = ipl_register_fcp_files();
-               break;
-       case IPL_TYPE_NSS:
-               rc = sysfs_create_group(&ipl_kset->kobj,
-                                       &ipl_nss_attr_group);
-               break;
-       default:
-               rc = sysfs_create_group(&ipl_kset->kobj,
-                                       &ipl_unknown_attr_group);
-               break;
-       }
-       if (rc)
-               kset_unregister(ipl_kset);
-       return rc;
-}
-
-static void __init reipl_probe(void)
-{
-       void *buffer;
-
-       buffer = (void *) get_zeroed_page(GFP_KERNEL);
-       if (!buffer)
-               return;
-       if (diag308(DIAG308_STORE, buffer) == DIAG308_RC_OK)
-               diag308_set_works = 1;
-       free_page((unsigned long)buffer);
-}
-
-static int __init reipl_nss_init(void)
+static int __init reipl_nss_init(void)
 {
        int rc;
 
@@ -923,6 +745,7 @@ static int __init reipl_ccw_init(void)
        reipl_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
        reipl_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
        reipl_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+       reipl_block_ccw->hdr.flags = DIAG308_FLAGS_LP_VALID;
        /* check if read scp info worked and set loadparm */
        if (sclp_ipl_info.is_valid)
                memcpy(reipl_block_ccw->ipl_info.ccw.load_param,
@@ -931,8 +754,7 @@ static int __init reipl_ccw_init(void)
                /* read scp info failed: set empty loadparm (EBCDIC blanks) */
                memset(reipl_block_ccw->ipl_info.ccw.load_param, 0x40,
                       LOADPARM_LEN);
-       /* FIXME: check for diag308_set_works when enabling diag ccw reipl */
-       if (!MACHINE_IS_VM)
+       if (!MACHINE_IS_VM && !diag308_set_works)
                sys_reipl_ccw_loadparm_attr.attr.mode = S_IRUGO;
        if (ipl_info.type == IPL_TYPE_CCW)
                reipl_block_ccw->ipl_info.ccw.devno = ipl_devno;
@@ -970,7 +792,7 @@ static int __init reipl_fcp_init(void)
        return 0;
 }
 
-static int __init reipl_init(void)
+static int reipl_init(void)
 {
        int rc;
 
@@ -997,6 +819,140 @@ static int __init reipl_init(void)
        return 0;
 }
 
+static struct shutdown_action reipl_action = {SHUTDOWN_ACTION_REIPL_STR,
+                                             reipl_run, reipl_init};
+
+/*
+ * dump shutdown action: Dump Linux on shutdown.
+ */
+
+/* FCP dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%016llx\n",
+                  dump_block_fcp->ipl_info.fcp.wwpn);
+DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%016llx\n",
+                  dump_block_fcp->ipl_info.fcp.lun);
+DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
+                  dump_block_fcp->ipl_info.fcp.bootprog);
+DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
+                  dump_block_fcp->ipl_info.fcp.br_lba);
+DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
+                  dump_block_fcp->ipl_info.fcp.devno);
+
+static struct attribute *dump_fcp_attrs[] = {
+       &sys_dump_fcp_device_attr.attr,
+       &sys_dump_fcp_wwpn_attr.attr,
+       &sys_dump_fcp_lun_attr.attr,
+       &sys_dump_fcp_bootprog_attr.attr,
+       &sys_dump_fcp_br_lba_attr.attr,
+       NULL,
+};
+
+static struct attribute_group dump_fcp_attr_group = {
+       .name  = IPL_FCP_STR,
+       .attrs = dump_fcp_attrs,
+};
+
+/* CCW dump device attributes */
+
+DEFINE_IPL_ATTR_RW(dump_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+                  dump_block_ccw->ipl_info.ccw.devno);
+
+static struct attribute *dump_ccw_attrs[] = {
+       &sys_dump_ccw_device_attr.attr,
+       NULL,
+};
+
+static struct attribute_group dump_ccw_attr_group = {
+       .name  = IPL_CCW_STR,
+       .attrs = dump_ccw_attrs,
+};
+
+/* dump type */
+
+static int dump_set_type(enum dump_type type)
+{
+       if (!(dump_capabilities & type))
+               return -EINVAL;
+       switch (type) {
+       case DUMP_TYPE_CCW:
+               if (diag308_set_works)
+                       dump_method = DUMP_METHOD_CCW_DIAG;
+               else if (MACHINE_IS_VM)
+                       dump_method = DUMP_METHOD_CCW_VM;
+               else
+                       dump_method = DUMP_METHOD_CCW_CIO;
+               break;
+       case DUMP_TYPE_FCP:
+               dump_method = DUMP_METHOD_FCP_DIAG;
+               break;
+       default:
+               dump_method = DUMP_METHOD_NONE;
+       }
+       dump_type = type;
+       return 0;
+}
+
+static ssize_t dump_type_show(struct kobject *kobj,
+                             struct kobj_attribute *attr, char *page)
+{
+       return sprintf(page, "%s\n", dump_type_str(dump_type));
+}
+
+static ssize_t dump_type_store(struct kobject *kobj,
+                              struct kobj_attribute *attr,
+                              const char *buf, size_t len)
+{
+       int rc = -EINVAL;
+
+       if (strncmp(buf, DUMP_NONE_STR, strlen(DUMP_NONE_STR)) == 0)
+               rc = dump_set_type(DUMP_TYPE_NONE);
+       else if (strncmp(buf, DUMP_CCW_STR, strlen(DUMP_CCW_STR)) == 0)
+               rc = dump_set_type(DUMP_TYPE_CCW);
+       else if (strncmp(buf, DUMP_FCP_STR, strlen(DUMP_FCP_STR)) == 0)
+               rc = dump_set_type(DUMP_TYPE_FCP);
+       return (rc != 0) ? rc : len;
+}
+
+static struct kobj_attribute dump_type_attr =
+       __ATTR(dump_type, 0644, dump_type_show, dump_type_store);
+
+static struct kset *dump_kset;
+
+static void dump_run(struct shutdown_trigger *trigger)
+{
+       struct ccw_dev_id devid;
+       static char buf[100];
+
+       switch (dump_method) {
+       case DUMP_METHOD_CCW_CIO:
+               smp_send_stop();
+               devid.devno = dump_block_ccw->ipl_info.ccw.devno;
+               devid.ssid  = 0;
+               reipl_ccw_dev(&devid);
+               break;
+       case DUMP_METHOD_CCW_VM:
+               smp_send_stop();
+               sprintf(buf, "STORE STATUS");
+               __cpcmd(buf, NULL, 0, NULL);
+               sprintf(buf, "IPL %X", dump_block_ccw->ipl_info.ccw.devno);
+               __cpcmd(buf, NULL, 0, NULL);
+               break;
+       case DUMP_METHOD_CCW_DIAG:
+               diag308(DIAG308_SET, dump_block_ccw);
+               diag308(DIAG308_DUMP, NULL);
+               break;
+       case DUMP_METHOD_FCP_DIAG:
+               diag308(DIAG308_SET, dump_block_fcp);
+               diag308(DIAG308_DUMP, NULL);
+               break;
+       case DUMP_METHOD_NONE:
+       default:
+               return;
+       }
+       printk(KERN_EMERG "Dump failed!\n");
+}
+
 static int __init dump_ccw_init(void)
 {
        int rc;
@@ -1042,31 +998,14 @@ static int __init dump_fcp_init(void)
        return 0;
 }
 
-#define SHUTDOWN_ON_PANIC_PRIO 0
-
-static int shutdown_on_panic_notify(struct notifier_block *self,
-                                   unsigned long event, void *data)
-{
-       if (on_panic_action == SHUTDOWN_DUMP)
-               do_dump();
-       else if (on_panic_action == SHUTDOWN_REIPL)
-               do_reipl();
-       return NOTIFY_OK;
-}
-
-static struct notifier_block shutdown_on_panic_nb = {
-       .notifier_call = shutdown_on_panic_notify,
-       .priority = SHUTDOWN_ON_PANIC_PRIO
-};
-
-static int __init dump_init(void)
+static int dump_init(void)
 {
        int rc;
 
        dump_kset = kset_create_and_add("dump", NULL, firmware_kobj);
        if (!dump_kset)
                return -ENOMEM;
-       rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr);
+       rc = sysfs_create_file(&dump_kset->kobj, &dump_type_attr.attr);
        if (rc) {
                kset_unregister(dump_kset);
                return rc;
@@ -1081,47 +1020,381 @@ static int __init dump_init(void)
        return 0;
 }
 
-static int __init shutdown_actions_init(void)
+static struct shutdown_action dump_action = {SHUTDOWN_ACTION_DUMP_STR,
+                                            dump_run, dump_init};
+
+/*
+ * vmcmd shutdown action: Trigger vm command on shutdown.
+ */
+
+static char vmcmd_on_reboot[128];
+static char vmcmd_on_panic[128];
+static char vmcmd_on_halt[128];
+static char vmcmd_on_poff[128];
+
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_reboot, "%s\n", "%s\n", vmcmd_on_reboot);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_panic, "%s\n", "%s\n", vmcmd_on_panic);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_halt, "%s\n", "%s\n", vmcmd_on_halt);
+DEFINE_IPL_ATTR_STR_RW(vmcmd, on_poff, "%s\n", "%s\n", vmcmd_on_poff);
+
+static struct attribute *vmcmd_attrs[] = {
+       &sys_vmcmd_on_reboot_attr.attr,
+       &sys_vmcmd_on_panic_attr.attr,
+       &sys_vmcmd_on_halt_attr.attr,
+       &sys_vmcmd_on_poff_attr.attr,
+       NULL,
+};
+
+static struct attribute_group vmcmd_attr_group = {
+       .attrs = vmcmd_attrs,
+};
+
+static struct kset *vmcmd_kset;
+
+static void vmcmd_run(struct shutdown_trigger *trigger)
 {
-       int rc;
+       char *cmd, *next_cmd;
+
+       if (strcmp(trigger->name, ON_REIPL_STR) == 0)
+               cmd = vmcmd_on_reboot;
+       else if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+               cmd = vmcmd_on_panic;
+       else if (strcmp(trigger->name, ON_HALT_STR) == 0)
+               cmd = vmcmd_on_halt;
+       else if (strcmp(trigger->name, ON_POFF_STR) == 0)
+               cmd = vmcmd_on_poff;
+       else
+               return;
+
+       if (strlen(cmd) == 0)
+               return;
+       do {
+               next_cmd = strchr(cmd, '\n');
+               if (next_cmd) {
+                       next_cmd[0] = 0;
+                       next_cmd += 1;
+               }
+               __cpcmd(cmd, NULL, 0, NULL);
+               cmd = next_cmd;
+       } while (cmd != NULL);
+}
+
+static int vmcmd_init(void)
+{
+       if (!MACHINE_IS_VM)
+               return -ENOTSUPP;
+       vmcmd_kset = kset_create_and_add("vmcmd", NULL, firmware_kobj);
+       if (!vmcmd_kset)
+               return -ENOMEM;
+       return sysfs_create_group(&vmcmd_kset->kobj, &vmcmd_attr_group);
+}
+
+static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR,
+                                             vmcmd_run, vmcmd_init};
+
+/*
+ * stop shutdown action: Stop Linux on shutdown.
+ */
+
+static void stop_run(struct shutdown_trigger *trigger)
+{
+       if (strcmp(trigger->name, ON_PANIC_STR) == 0)
+               disabled_wait((unsigned long) __builtin_return_address(0));
+       else {
+               signal_processor(smp_processor_id(), sigp_stop);
+               for (;;);
+       }
+}
+
+static struct shutdown_action stop_action = {SHUTDOWN_ACTION_STOP_STR,
+                                            stop_run, NULL};
+
+/* action list */
+
+static struct shutdown_action *shutdown_actions_list[] = {
+       &ipl_action, &reipl_action, &dump_action, &vmcmd_action, &stop_action};
+#define SHUTDOWN_ACTIONS_COUNT (sizeof(shutdown_actions_list) / sizeof(void *))
+
+/*
+ * Trigger section
+ */
+
+static struct kset *shutdown_actions_kset;
+
+static int set_trigger(const char *buf, struct shutdown_trigger *trigger,
+                      size_t len)
+{
+       int i;
+       for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+               if (!shutdown_actions_list[i])
+                       continue;
+               if (strncmp(buf, shutdown_actions_list[i]->name,
+                           strlen(shutdown_actions_list[i]->name)) == 0) {
+                       trigger->action = shutdown_actions_list[i];
+                       return len;
+               }
+       }
+       return -EINVAL;
+}
+
+/* on reipl */
+
+static struct shutdown_trigger on_reboot_trigger = {ON_REIPL_STR,
+                                                   &reipl_action};
+
+static ssize_t on_reboot_show(struct kobject *kobj,
+                             struct kobj_attribute *attr, char *page)
+{
+       return sprintf(page, "%s\n", on_reboot_trigger.action->name);
+}
+
+static ssize_t on_reboot_store(struct kobject *kobj,
+                              struct kobj_attribute *attr,
+                              const char *buf, size_t len)
+{
+       return set_trigger(buf, &on_reboot_trigger, len);
+}
+
+static struct kobj_attribute on_reboot_attr =
+       __ATTR(on_reboot, 0644, on_reboot_show, on_reboot_store);
+
+static void do_machine_restart(char *__unused)
+{
+       smp_send_stop();
+       on_reboot_trigger.action->fn(&on_reboot_trigger);
+       reipl_run(NULL);
+}
+void (*_machine_restart)(char *command) = do_machine_restart;
+
+/* on panic */
+
+static struct shutdown_trigger on_panic_trigger = {ON_PANIC_STR, &stop_action};
+
+static ssize_t on_panic_show(struct kobject *kobj,
+                            struct kobj_attribute *attr, char *page)
+{
+       return sprintf(page, "%s\n", on_panic_trigger.action->name);
+}
+
+static ssize_t on_panic_store(struct kobject *kobj,
+                             struct kobj_attribute *attr,
+                             const char *buf, size_t len)
+{
+       return set_trigger(buf, &on_panic_trigger, len);
+}
+
+static struct kobj_attribute on_panic_attr =
+       __ATTR(on_panic, 0644, on_panic_show, on_panic_store);
+
+static void do_panic(void)
+{
+       on_panic_trigger.action->fn(&on_panic_trigger);
+       stop_run(&on_panic_trigger);
+}
+
+/* on halt */
+
+static struct shutdown_trigger on_halt_trigger = {ON_HALT_STR, &stop_action};
+
+static ssize_t on_halt_show(struct kobject *kobj,
+                           struct kobj_attribute *attr, char *page)
+{
+       return sprintf(page, "%s\n", on_halt_trigger.action->name);
+}
+
+static ssize_t on_halt_store(struct kobject *kobj,
+                            struct kobj_attribute *attr,
+                            const char *buf, size_t len)
+{
+       return set_trigger(buf, &on_halt_trigger, len);
+}
+
+static struct kobj_attribute on_halt_attr =
+       __ATTR(on_halt, 0644, on_halt_show, on_halt_store);
+
 
+static void do_machine_halt(void)
+{
+       smp_send_stop();
+       on_halt_trigger.action->fn(&on_halt_trigger);
+       stop_run(&on_halt_trigger);
+}
+void (*_machine_halt)(void) = do_machine_halt;
+
+/* on power off */
+
+static struct shutdown_trigger on_poff_trigger = {ON_POFF_STR, &stop_action};
+
+static ssize_t on_poff_show(struct kobject *kobj,
+                           struct kobj_attribute *attr, char *page)
+{
+       return sprintf(page, "%s\n", on_poff_trigger.action->name);
+}
+
+static ssize_t on_poff_store(struct kobject *kobj,
+                            struct kobj_attribute *attr,
+                            const char *buf, size_t len)
+{
+       return set_trigger(buf, &on_poff_trigger, len);
+}
+
+static struct kobj_attribute on_poff_attr =
+       __ATTR(on_poff, 0644, on_poff_show, on_poff_store);
+
+
+static void do_machine_power_off(void)
+{
+       smp_send_stop();
+       on_poff_trigger.action->fn(&on_poff_trigger);
+       stop_run(&on_poff_trigger);
+}
+void (*_machine_power_off)(void) = do_machine_power_off;
+
+static void __init shutdown_triggers_init(void)
+{
        shutdown_actions_kset = kset_create_and_add("shutdown_actions", NULL,
                                                    firmware_kobj);
        if (!shutdown_actions_kset)
-               return -ENOMEM;
-       rc = sysfs_create_file(&shutdown_actions_kset->kobj, &on_panic_attr);
-       if (rc) {
-               kset_unregister(shutdown_actions_kset);
-               return rc;
+               goto fail;
+       if (sysfs_create_file(&shutdown_actions_kset->kobj,
+                             &on_reboot_attr.attr))
+               goto fail;
+       if (sysfs_create_file(&shutdown_actions_kset->kobj,
+                             &on_panic_attr.attr))
+               goto fail;
+       if (sysfs_create_file(&shutdown_actions_kset->kobj,
+                             &on_halt_attr.attr))
+               goto fail;
+       if (sysfs_create_file(&shutdown_actions_kset->kobj,
+                             &on_poff_attr.attr))
+               goto fail;
+
+       return;
+fail:
+       panic("shutdown_triggers_init failed\n");
+}
+
+static void __init shutdown_actions_init(void)
+{
+       int i;
+
+       for (i = 0; i < SHUTDOWN_ACTIONS_COUNT; i++) {
+               if (!shutdown_actions_list[i]->init)
+                       continue;
+               if (shutdown_actions_list[i]->init())
+                       shutdown_actions_list[i] = NULL;
        }
-       atomic_notifier_chain_register(&panic_notifier_list,
-                                      &shutdown_on_panic_nb);
-       return 0;
 }
 
 static int __init s390_ipl_init(void)
 {
-       int rc;
-
-       sclp_get_ipl_info(&sclp_ipl_info);
        reipl_probe();
-       rc = ipl_init();
-       if (rc)
-               return rc;
-       rc = reipl_init();
-       if (rc)
-               return rc;
-       rc = dump_init();
-       if (rc)
-               return rc;
-       rc = shutdown_actions_init();
-       if (rc)
-               return rc;
+       sclp_get_ipl_info(&sclp_ipl_info);
+       shutdown_actions_init();
+       shutdown_triggers_init();
        return 0;
 }
 
 __initcall(s390_ipl_init);
 
+static void __init strncpy_skip_quote(char *dst, char *src, int n)
+{
+       int sx, dx;
+
+       dx = 0;
+       for (sx = 0; src[sx] != 0; sx++) {
+               if (src[sx] == '"')
+                       continue;
+               dst[dx++] = src[sx];
+               if (dx >= n)
+                       break;
+       }
+}
+
+static int __init vmcmd_on_reboot_setup(char *str)
+{
+       if (!MACHINE_IS_VM)
+               return 1;
+       strncpy_skip_quote(vmcmd_on_reboot, str, 127);
+       vmcmd_on_reboot[127] = 0;
+       on_reboot_trigger.action = &vmcmd_action;
+       return 1;
+}
+__setup("vmreboot=", vmcmd_on_reboot_setup);
+
+static int __init vmcmd_on_panic_setup(char *str)
+{
+       if (!MACHINE_IS_VM)
+               return 1;
+       strncpy_skip_quote(vmcmd_on_panic, str, 127);
+       vmcmd_on_panic[127] = 0;
+       on_panic_trigger.action = &vmcmd_action;
+       return 1;
+}
+__setup("vmpanic=", vmcmd_on_panic_setup);
+
+static int __init vmcmd_on_halt_setup(char *str)
+{
+       if (!MACHINE_IS_VM)
+               return 1;
+       strncpy_skip_quote(vmcmd_on_halt, str, 127);
+       vmcmd_on_halt[127] = 0;
+       on_halt_trigger.action = &vmcmd_action;
+       return 1;
+}
+__setup("vmhalt=", vmcmd_on_halt_setup);
+
+static int __init vmcmd_on_poff_setup(char *str)
+{
+       if (!MACHINE_IS_VM)
+               return 1;
+       strncpy_skip_quote(vmcmd_on_poff, str, 127);
+       vmcmd_on_poff[127] = 0;
+       on_poff_trigger.action = &vmcmd_action;
+       return 1;
+}
+__setup("vmpoff=", vmcmd_on_poff_setup);
+
+static int on_panic_notify(struct notifier_block *self,
+                          unsigned long event, void *data)
+{
+       do_panic();
+       return NOTIFY_OK;
+}
+
+static struct notifier_block on_panic_nb = {
+       .notifier_call = on_panic_notify,
+       .priority = 0,
+};
+
+void __init setup_ipl(void)
+{
+       ipl_info.type = get_ipl_type();
+       switch (ipl_info.type) {
+       case IPL_TYPE_CCW:
+               ipl_info.data.ccw.dev_id.devno = ipl_devno;
+               ipl_info.data.ccw.dev_id.ssid = 0;
+               break;
+       case IPL_TYPE_FCP:
+       case IPL_TYPE_FCP_DUMP:
+               ipl_info.data.fcp.dev_id.devno =
+                       IPL_PARMBLOCK_START->ipl_info.fcp.devno;
+               ipl_info.data.fcp.dev_id.ssid = 0;
+               ipl_info.data.fcp.wwpn = IPL_PARMBLOCK_START->ipl_info.fcp.wwpn;
+               ipl_info.data.fcp.lun = IPL_PARMBLOCK_START->ipl_info.fcp.lun;
+               break;
+       case IPL_TYPE_NSS:
+               strncpy(ipl_info.data.nss.name, kernel_nss_name,
+                       sizeof(ipl_info.data.nss.name));
+               break;
+       case IPL_TYPE_UNKNOWN:
+       default:
+               /* We have no info to copy */
+               break;
+       }
+       atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
+}
+
 void __init ipl_save_parameters(void)
 {
        struct cio_iplinfo iplinfo;
@@ -1202,3 +1475,4 @@ void s390_reset_system(void)
 
        do_reset_calls();
 }
+
index 29f7884..0e7aca0 100644 (file)
@@ -36,7 +36,7 @@
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/notifier.h>
-
+#include <linux/utsname.h>
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -182,13 +182,15 @@ void cpu_idle(void)
 
 void show_regs(struct pt_regs *regs)
 {
-       struct task_struct *tsk = current;
-
-        printk("CPU:    %d    %s\n", task_thread_info(tsk)->cpu, print_tainted());
-        printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
-              current->comm, task_pid_nr(current), (void *) tsk,
-              (void *) tsk->thread.ksp);
-
+       print_modules();
+       printk("CPU: %d %s %s %.*s\n",
+              task_thread_info(current)->cpu, print_tainted(),
+              init_utsname()->release,
+              (int)strcspn(init_utsname()->version, " "),
+              init_utsname()->version);
+       printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
+              current->comm, current->pid, current,
+              (void *) current->thread.ksp);
        show_registers(regs);
        /* Show stack backtrace if pt_regs is from kernel mode */
        if (!(regs->psw.mask & PSW_MASK_PSTATE))
index 1d81bf9..6e036ba 100644 (file)
@@ -86,13 +86,13 @@ FixPerRegisters(struct task_struct *task)
                per_info->control_regs.bits.storage_alt_space_ctl = 0;
 }
 
-static void set_single_step(struct task_struct *task)
+void user_enable_single_step(struct task_struct *task)
 {
        task->thread.per_info.single_step = 1;
        FixPerRegisters(task);
 }
 
-static void clear_single_step(struct task_struct *task)
+void user_disable_single_step(struct task_struct *task)
 {
        task->thread.per_info.single_step = 0;
        FixPerRegisters(task);
@@ -107,7 +107,7 @@ void
 ptrace_disable(struct task_struct *child)
 {
        /* make sure the single step bit is not set. */
-       clear_single_step(child);
+       user_disable_single_step(child);
 }
 
 #ifndef CONFIG_64BIT
@@ -651,7 +651,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data)
                        clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
                child->exit_code = data;
                /* make sure the single step bit is not set. */
-               clear_single_step(child);
+               user_disable_single_step(child);
                wake_up_process(child);
                return 0;
 
@@ -665,7 +665,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data)
                        return 0;
                child->exit_code = SIGKILL;
                /* make sure the single step bit is not set. */
-               clear_single_step(child);
+               user_disable_single_step(child);
                wake_up_process(child);
                return 0;
 
@@ -675,10 +675,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data)
                        return -EIO;
                clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
                child->exit_code = data;
-               if (data)
-                       set_tsk_thread_flag(child, TIF_SINGLE_STEP);
-               else
-                       set_single_step(child);
+               user_enable_single_step(child);
                /* give it a chance to run. */
                wake_up_process(child);
                return 0;
index 577aa7d..766c783 100644 (file)
@@ -125,75 +125,6 @@ void __cpuinit cpu_init(void)
         enter_lazy_tlb(&init_mm, current);
 }
 
-/*
- * VM halt and poweroff setup routines
- */
-char vmhalt_cmd[128] = "";
-char vmpoff_cmd[128] = "";
-static char vmpanic_cmd[128] = "";
-
-static void strncpy_skip_quote(char *dst, char *src, int n)
-{
-        int sx, dx;
-
-        dx = 0;
-        for (sx = 0; src[sx] != 0; sx++) {
-                if (src[sx] == '"') continue;
-                dst[dx++] = src[sx];
-                if (dx >= n) break;
-        }
-}
-
-static int __init vmhalt_setup(char *str)
-{
-        strncpy_skip_quote(vmhalt_cmd, str, 127);
-        vmhalt_cmd[127] = 0;
-        return 1;
-}
-
-__setup("vmhalt=", vmhalt_setup);
-
-static int __init vmpoff_setup(char *str)
-{
-        strncpy_skip_quote(vmpoff_cmd, str, 127);
-        vmpoff_cmd[127] = 0;
-        return 1;
-}
-
-__setup("vmpoff=", vmpoff_setup);
-
-static int vmpanic_notify(struct notifier_block *self, unsigned long event,
-                         void *data)
-{
-       if (MACHINE_IS_VM && strlen(vmpanic_cmd) > 0)
-               cpcmd(vmpanic_cmd, NULL, 0, NULL);
-
-       return NOTIFY_OK;
-}
-
-#define PANIC_PRI_VMPANIC      0
-
-static struct notifier_block vmpanic_nb = {
-       .notifier_call = vmpanic_notify,
-       .priority = PANIC_PRI_VMPANIC
-};
-
-static int __init vmpanic_setup(char *str)
-{
-       static int register_done __initdata = 0;
-
-       strncpy_skip_quote(vmpanic_cmd, str, 127);
-       vmpanic_cmd[127] = 0;
-       if (!register_done) {
-               register_done = 1;
-               atomic_notifier_chain_register(&panic_notifier_list,
-                                              &vmpanic_nb);
-       }
-       return 1;
-}
-
-__setup("vmpanic=", vmpanic_setup);
-
 /*
  * condev= and conmode= setup parameter.
  */
@@ -308,38 +239,6 @@ static void __init setup_zfcpdump(unsigned int console_devno)
 static inline void setup_zfcpdump(unsigned int console_devno) {}
 #endif /* CONFIG_ZFCPDUMP */
 
-#ifdef CONFIG_SMP
-void (*_machine_restart)(char *command) = machine_restart_smp;
-void (*_machine_halt)(void) = machine_halt_smp;
-void (*_machine_power_off)(void) = machine_power_off_smp;
-#else
-/*
- * Reboot, halt and power_off routines for non SMP.
- */
-static void do_machine_restart_nonsmp(char * __unused)
-{
-       do_reipl();
-}
-
-static void do_machine_halt_nonsmp(void)
-{
-        if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0)
-               __cpcmd(vmhalt_cmd, NULL, 0, NULL);
-        signal_processor(smp_processor_id(), sigp_stop_and_store_status);
-}
-
-static void do_machine_power_off_nonsmp(void)
-{
-        if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0)
-               __cpcmd(vmpoff_cmd, NULL, 0, NULL);
-        signal_processor(smp_processor_id(), sigp_stop_and_store_status);
-}
-
-void (*_machine_restart)(char *command) = do_machine_restart_nonsmp;
-void (*_machine_halt)(void) = do_machine_halt_nonsmp;
-void (*_machine_power_off)(void) = do_machine_power_off_nonsmp;
-#endif
-
  /*
  * Reboot, halt and power_off stubs. They just call _machine_restart,
  * _machine_halt or _machine_power_off. 
@@ -559,7 +458,9 @@ setup_resources(void)
        data_resource.start = (unsigned long) &_etext;
        data_resource.end = (unsigned long) &_edata - 1;
 
-       for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+       for (i = 0; i < MEMORY_CHUNKS; i++) {
+               if (!memory_chunk[i].size)
+                       continue;
                res = alloc_bootmem_low(sizeof(struct resource));
                res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
                switch (memory_chunk[i].type) {
@@ -617,7 +518,7 @@ EXPORT_SYMBOL_GPL(real_memory_size);
 static void __init setup_memory_end(void)
 {
        unsigned long memory_size;
-       unsigned long max_mem, max_phys;
+       unsigned long max_mem;
        int i;
 
 #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
@@ -625,10 +526,31 @@ static void __init setup_memory_end(void)
                memory_end = ZFCPDUMP_HSA_SIZE;
 #endif
        memory_size = 0;
-       max_phys = VMALLOC_END_INIT - VMALLOC_MIN_SIZE;
        memory_end &= PAGE_MASK;
 
-       max_mem = memory_end ? min(max_phys, memory_end) : max_phys;
+       max_mem = memory_end ? min(VMALLOC_START, memory_end) : VMALLOC_START;
+       memory_end = min(max_mem, memory_end);
+
+       /*
+        * Make sure all chunks are MAX_ORDER aligned so we don't need the
+        * extra checks that HOLES_IN_ZONE would require.
+        */
+       for (i = 0; i < MEMORY_CHUNKS; i++) {
+               unsigned long start, end;
+               struct mem_chunk *chunk;
+               unsigned long align;
+
+               chunk = &memory_chunk[i];
+               align = 1UL << (MAX_ORDER + PAGE_SHIFT - 1);
+               start = (chunk->addr + align - 1) & ~(align - 1);
+               end = (chunk->addr + chunk->size) & ~(align - 1);
+               if (start >= end)
+                       memset(chunk, 0, sizeof(*chunk));
+               else {
+                       chunk->addr = start;
+                       chunk->size = end - start;
+               }
+       }
 
        for (i = 0; i < MEMORY_CHUNKS; i++) {
                struct mem_chunk *chunk = &memory_chunk[i];
@@ -890,7 +812,7 @@ setup_arch(char **cmdline_p)
 
        parse_early_param();
 
-       setup_ipl_info();
+       setup_ipl();
        setup_memory_end();
        setup_addressing_mode();
        setup_memory();
@@ -899,7 +821,6 @@ setup_arch(char **cmdline_p)
 
         cpu_init();
         __cpu_logical_map[0] = S390_lowcore.cpu_data.cpu_addr;
-       smp_setup_cpu_possible_map();
 
        /*
         * Setup capabilities (ELF_HWCAP & ELF_PLATFORM).
@@ -920,7 +841,7 @@ setup_arch(char **cmdline_p)
 
 void __cpuinit print_cpu_info(struct cpuinfo_S390 *cpuinfo)
 {
-   printk("cpu %d "
+   printk(KERN_INFO "cpu %d "
 #ifdef CONFIG_SMP
            "phys_idx=%d "
 #endif
@@ -996,7 +917,7 @@ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 static void c_stop(struct seq_file *m, void *v)
 {
 }
-struct seq_operations cpuinfo_op = {
+const struct seq_operations cpuinfo_op = {
        .start  = c_start,
        .next   = c_next,
        .stop   = c_stop,
index d264671..4449bf3 100644 (file)
@@ -471,6 +471,7 @@ void do_signal(struct pt_regs *regs)
 
        if (signr > 0) {
                /* Whee!  Actually deliver the signal.  */
+               int ret;
 #ifdef CONFIG_COMPAT
                if (test_thread_flag(TIF_31BIT)) {
                        extern int handle_signal32(unsigned long sig,
@@ -478,15 +479,12 @@ void do_signal(struct pt_regs *regs)
                                                   siginfo_t *info,
                                                   sigset_t *oldset,
                                                   struct pt_regs *regs);
-                       if (handle_signal32(
-                                   signr, &ka, &info, oldset, regs) == 0) {
-                               if (test_thread_flag(TIF_RESTORE_SIGMASK))
-                                       clear_thread_flag(TIF_RESTORE_SIGMASK);
-                       }
-                       return;
+                       ret = handle_signal32(signr, &ka, &info, oldset, regs);
                }
+               else
 #endif
-               if (handle_signal(signr, &ka, &info, oldset, regs) == 0) {
+                       ret = handle_signal(signr, &ka, &info, oldset, regs);
+               if (!ret) {
                        /*
                         * A signal was successfully delivered; the saved
                         * sigmask will have been stored in the signal frame,
@@ -495,6 +493,14 @@ void do_signal(struct pt_regs *regs)
                         */
                        if (test_thread_flag(TIF_RESTORE_SIGMASK))
                                clear_thread_flag(TIF_RESTORE_SIGMASK);
+
+                       /*
+                        * If we would have taken a single-step trap
+                        * for a normal instruction, act like we took
+                        * one for the handler setup.
+                        */
+                       if (current->thread.per_info.single_step)
+                               set_thread_flag(TIF_SINGLE_STEP);
                }
                return;
        }
index 264ea90..aa37fa1 100644 (file)
@@ -42,6 +42,7 @@
 #include <asm/tlbflush.h>
 #include <asm/timer.h>
 #include <asm/lowcore.h>
+#include <asm/sclp.h>
 #include <asm/cpu.h>
 
 /*
@@ -53,11 +54,27 @@ EXPORT_SYMBOL(lowcore_ptr);
 cpumask_t cpu_online_map = CPU_MASK_NONE;
 EXPORT_SYMBOL(cpu_online_map);
 
-cpumask_t cpu_possible_map = CPU_MASK_NONE;
+cpumask_t cpu_possible_map = CPU_MASK_ALL;
 EXPORT_SYMBOL(cpu_possible_map);
 
 static struct task_struct *current_set[NR_CPUS];
 
+static u8 smp_cpu_type;
+static int smp_use_sigp_detection;
+
+enum s390_cpu_state {
+       CPU_STATE_STANDBY,
+       CPU_STATE_CONFIGURED,
+};
+
+#ifdef CONFIG_HOTPLUG_CPU
+static DEFINE_MUTEX(smp_cpu_state_mutex);
+#endif
+static int smp_cpu_state[NR_CPUS];
+
+static DEFINE_PER_CPU(struct cpu, cpu_devices);
+DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+
 static void smp_ext_bitcall(int, ec_bit_sig);
 
 /*
@@ -193,6 +210,33 @@ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
 }
 EXPORT_SYMBOL(smp_call_function_single);
 
+/**
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
+ * @mask: The set of cpus to run on.  Must not include the current cpu.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code.
+ *
+ * If @wait is true, then returns once @func has returned; otherwise
+ * it returns just before the target cpu calls @func.
+ *
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.
+ */
+int
+smp_call_function_mask(cpumask_t mask,
+                       void (*func)(void *), void *info,
+                       int wait)
+{
+       preempt_disable();
+       __smp_call_function_map(func, info, 0, wait, mask);
+       preempt_enable();
+       return 0;
+}
+EXPORT_SYMBOL(smp_call_function_mask);
+
 void smp_send_stop(void)
 {
        int cpu, rc;
@@ -216,33 +260,6 @@ void smp_send_stop(void)
        }
 }
 
-/*
- * Reboot, halt and power_off routines for SMP.
- */
-void machine_restart_smp(char *__unused)
-{
-       smp_send_stop();
-       do_reipl();
-}
-
-void machine_halt_smp(void)
-{
-       smp_send_stop();
-       if (MACHINE_IS_VM && strlen(vmhalt_cmd) > 0)
-               __cpcmd(vmhalt_cmd, NULL, 0, NULL);
-       signal_processor(smp_processor_id(), sigp_stop_and_store_status);
-       for (;;);
-}
-
-void machine_power_off_smp(void)
-{
-       smp_send_stop();
-       if (MACHINE_IS_VM && strlen(vmpoff_cmd) > 0)
-               __cpcmd(vmpoff_cmd, NULL, 0, NULL);
-       signal_processor(smp_processor_id(), sigp_stop_and_store_status);
-       for (;;);
-}
-
 /*
  * This is the main routine where commands issued by other
  * cpus are handled.
@@ -355,6 +372,13 @@ void smp_ctl_clear_bit(int cr, int bit)
 }
 EXPORT_SYMBOL(smp_ctl_clear_bit);
 
+/*
+ * In early ipl state a temp. logically cpu number is needed, so the sigp
+ * functions can be used to sense other cpus. Since NR_CPUS is >= 2 on
+ * CONFIG_SMP and the ipl cpu is logical cpu 0, it must be 1.
+ */
+#define CPU_INIT_NO    1
+
 #if defined(CONFIG_ZFCPDUMP) || defined(CONFIG_ZFCPDUMP_MODULE)
 
 /*
@@ -375,9 +399,10 @@ static void __init smp_get_save_area(unsigned int cpu, unsigned int phy_cpu)
                       "kernel was compiled with NR_CPUS=%i\n", cpu, NR_CPUS);
                return;
        }
-       zfcpdump_save_areas[cpu] = alloc_bootmem(sizeof(union save_area));
-       __cpu_logical_map[1] = (__u16) phy_cpu;
-       while (signal_processor(1, sigp_stop_and_store_status) == sigp_busy)
+       zfcpdump_save_areas[cpu] = kmalloc(sizeof(union save_area), GFP_KERNEL);
+       __cpu_logical_map[CPU_INIT_NO] = (__u16) phy_cpu;
+       while (signal_processor(CPU_INIT_NO, sigp_stop_and_store_status) ==
+              sigp_busy)
                cpu_relax();
        memcpy(zfcpdump_save_areas[cpu],
               (void *)(unsigned long) store_prefix() + SAVE_AREA_BASE,
@@ -397,32 +422,155 @@ static inline void smp_get_save_area(unsigned int cpu, unsigned int phy_cpu) { }
 
 #endif /* CONFIG_ZFCPDUMP || CONFIG_ZFCPDUMP_MODULE */
 
-/*
- * Lets check how many CPUs we have.
- */
-static unsigned int __init smp_count_cpus(void)
+static int cpu_stopped(int cpu)
 {
-       unsigned int cpu, num_cpus;
-       __u16 boot_cpu_addr;
+       __u32 status;
 
-       /*
-        * cpu 0 is the boot cpu. See smp_prepare_boot_cpu.
-        */
+       /* Check for stopped state */
+       if (signal_processor_ps(&status, 0, cpu, sigp_sense) ==
+           sigp_status_stored) {
+               if (status & 0x40)
+                       return 1;
+       }
+       return 0;
+}
+
+static int cpu_known(int cpu_id)
+{
+       int cpu;
+
+       for_each_present_cpu(cpu) {
+               if (__cpu_logical_map[cpu] == cpu_id)
+                       return 1;
+       }
+       return 0;
+}
+
+static int smp_rescan_cpus_sigp(cpumask_t avail)
+{
+       int cpu_id, logical_cpu;
+
+       logical_cpu = first_cpu(avail);
+       if (logical_cpu == NR_CPUS)
+               return 0;
+       for (cpu_id = 0; cpu_id <= 65535; cpu_id++) {
+               if (cpu_known(cpu_id))
+                       continue;
+               __cpu_logical_map[logical_cpu] = cpu_id;
+               if (!cpu_stopped(logical_cpu))
+                       continue;
+               cpu_set(logical_cpu, cpu_present_map);
+               smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
+               logical_cpu = next_cpu(logical_cpu, avail);
+               if (logical_cpu == NR_CPUS)
+                       break;
+       }
+       return 0;
+}
+
+static int smp_rescan_cpus_sclp(cpumask_t avail)
+{
+       struct sclp_cpu_info *info;
+       int cpu_id, logical_cpu, cpu;
+       int rc;
+
+       logical_cpu = first_cpu(avail);
+       if (logical_cpu == NR_CPUS)
+               return 0;
+       info = kmalloc(sizeof(*info), GFP_KERNEL);
+       if (!info)
+               return -ENOMEM;
+       rc = sclp_get_cpu_info(info);
+       if (rc)
+               goto out;
+       for (cpu = 0; cpu < info->combined; cpu++) {
+               if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
+                       continue;
+               cpu_id = info->cpu[cpu].address;
+               if (cpu_known(cpu_id))
+                       continue;
+               __cpu_logical_map[logical_cpu] = cpu_id;
+               cpu_set(logical_cpu, cpu_present_map);
+               if (cpu >= info->configured)
+                       smp_cpu_state[logical_cpu] = CPU_STATE_STANDBY;
+               else
+                       smp_cpu_state[logical_cpu] = CPU_STATE_CONFIGURED;
+               logical_cpu = next_cpu(logical_cpu, avail);
+               if (logical_cpu == NR_CPUS)
+                       break;
+       }
+out:
+       kfree(info);
+       return rc;
+}
+
+static int smp_rescan_cpus(void)
+{
+       cpumask_t avail;
+
+       cpus_xor(avail, cpu_possible_map, cpu_present_map);
+       if (smp_use_sigp_detection)
+               return smp_rescan_cpus_sigp(avail);
+       else
+               return smp_rescan_cpus_sclp(avail);
+}
+
+static void __init smp_detect_cpus(void)
+{
+       unsigned int cpu, c_cpus, s_cpus;
+       struct sclp_cpu_info *info;
+       u16 boot_cpu_addr, cpu_addr;
+
+       c_cpus = 1;
+       s_cpus = 0;
        boot_cpu_addr = S390_lowcore.cpu_data.cpu_addr;
-       current_thread_info()->cpu = 0;
-       num_cpus = 1;
-       for (cpu = 0; cpu <= 65535; cpu++) {
-               if ((__u16) cpu == boot_cpu_addr)
+       info = kmalloc(sizeof(*info), GFP_KERNEL);
+       if (!info)
+               panic("smp_detect_cpus failed to allocate memory\n");
+       /* Use sigp detection algorithm if sclp doesn't work. */
+       if (sclp_get_cpu_info(info)) {
+               smp_use_sigp_detection = 1;
+               for (cpu = 0; cpu <= 65535; cpu++) {
+                       if (cpu == boot_cpu_addr)
+                               continue;
+                       __cpu_logical_map[CPU_INIT_NO] = cpu;
+                       if (!cpu_stopped(CPU_INIT_NO))
+                               continue;
+                       smp_get_save_area(c_cpus, cpu);
+                       c_cpus++;
+               }
+               goto out;
+       }
+
+       if (info->has_cpu_type) {
+               for (cpu = 0; cpu < info->combined; cpu++) {
+                       if (info->cpu[cpu].address == boot_cpu_addr) {
+                               smp_cpu_type = info->cpu[cpu].type;
+                               break;
+                       }
+               }
+       }
+
+       for (cpu = 0; cpu < info->combined; cpu++) {
+               if (info->has_cpu_type && info->cpu[cpu].type != smp_cpu_type)
+                       continue;
+               cpu_addr = info->cpu[cpu].address;
+               if (cpu_addr == boot_cpu_addr)
                        continue;
-               __cpu_logical_map[1] = (__u16) cpu;
-               if (signal_processor(1, sigp_sense) == sigp_not_operational)
+               __cpu_logical_map[CPU_INIT_NO] = cpu_addr;
+               if (!cpu_stopped(CPU_INIT_NO)) {
+                       s_cpus++;
                        continue;
-               smp_get_save_area(num_cpus, cpu);
-               num_cpus++;
+               }
+               smp_get_save_area(c_cpus, cpu_addr);
+               c_cpus++;
        }
-       printk("Detected %d CPU's\n", (int) num_cpus);
-       printk("Boot cpu address %2X\n", boot_cpu_addr);
-       return num_cpus;
+out:
+       kfree(info);
+       printk(KERN_INFO "CPUs: %d configured, %d standby\n", c_cpus, s_cpus);
+       get_online_cpus();
+       smp_rescan_cpus();
+       put_online_cpus();
 }
 
 /*
@@ -453,8 +601,6 @@ int __cpuinit start_secondary(void *cpuvoid)
        return 0;
 }
 
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
-
 static void __init smp_create_idle(unsigned int cpu)
 {
        struct task_struct *p;
@@ -470,37 +616,82 @@ static void __init smp_create_idle(unsigned int cpu)
        spin_lock_init(&(&per_cpu(s390_idle, cpu))->lock);
 }
 
-static int cpu_stopped(int cpu)
+static int __cpuinit smp_alloc_lowcore(int cpu)
 {
-       __u32 status;
+       unsigned long async_stack, panic_stack;
+       struct _lowcore *lowcore;
+       int lc_order;
+
+       lc_order = sizeof(long) == 8 ? 1 : 0;
+       lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
+       if (!lowcore)
+               return -ENOMEM;
+       async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
+       if (!async_stack)
+               goto out_async_stack;
+       panic_stack = __get_free_page(GFP_KERNEL);
+       if (!panic_stack)
+               goto out_panic_stack;
+
+       *lowcore = S390_lowcore;
+       lowcore->async_stack = async_stack + ASYNC_SIZE;
+       lowcore->panic_stack = panic_stack + PAGE_SIZE;
 
-       /* Check for stopped state */
-       if (signal_processor_ps(&status, 0, cpu, sigp_sense) ==
-           sigp_status_stored) {
-               if (status & 0x40)
-                       return 1;
+#ifndef CONFIG_64BIT
+       if (MACHINE_HAS_IEEE) {
+               unsigned long save_area;
+
+               save_area = get_zeroed_page(GFP_KERNEL);
+               if (!save_area)
+                       goto out_save_area;
+               lowcore->extended_save_area_addr = (u32) save_area;
        }
+#endif
+       lowcore_ptr[cpu] = lowcore;
        return 0;
+
+#ifndef CONFIG_64BIT
+out_save_area:
+       free_page(panic_stack);
+#endif
+out_panic_stack:
+       free_pages(async_stack, ASYNC_ORDER);
+out_async_stack:
+       free_pages((unsigned long) lowcore, lc_order);
+       return -ENOMEM;
 }
 
-/* Upping and downing of CPUs */
+#ifdef CONFIG_HOTPLUG_CPU
+static void smp_free_lowcore(int cpu)
+{
+       struct _lowcore *lowcore;
+       int lc_order;
+
+       lc_order = sizeof(long) == 8 ? 1 : 0;
+       lowcore = lowcore_ptr[cpu];
+#ifndef CONFIG_64BIT
+       if (MACHINE_HAS_IEEE)
+               free_page((unsigned long) lowcore->extended_save_area_addr);
+#endif
+       free_page(lowcore->panic_stack - PAGE_SIZE);
+       free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
+       free_pages((unsigned long) lowcore, lc_order);
+       lowcore_ptr[cpu] = NULL;
+}
+#endif /* CONFIG_HOTPLUG_CPU */
 
-int __cpu_up(unsigned int cpu)
+/* Upping and downing of CPUs */
+int __cpuinit __cpu_up(unsigned int cpu)
 {
        struct task_struct *idle;
        struct _lowcore *cpu_lowcore;
        struct stack_frame *sf;
        sigp_ccode ccode;
-       int curr_cpu;
 
-       for (curr_cpu = 0; curr_cpu <= 65535; curr_cpu++) {
-               __cpu_logical_map[cpu] = (__u16) curr_cpu;
-               if (cpu_stopped(cpu))
-                       break;
-       }
-
-       if (!cpu_stopped(cpu))
-               return -ENODEV;
+       if (smp_cpu_state[cpu] != CPU_STATE_CONFIGURED)
+               return -EIO;
+       if (smp_alloc_lowcore(cpu))
+               return -ENOMEM;
 
        ccode = signal_processor_p((__u32)(unsigned long)(lowcore_ptr[cpu]),
                                   cpu, sigp_set_prefix);
@@ -515,6 +706,7 @@ int __cpu_up(unsigned int cpu)
        cpu_lowcore = lowcore_ptr[cpu];
        cpu_lowcore->kernel_stack = (unsigned long)
                task_stack_page(idle) + THREAD_SIZE;
+       cpu_lowcore->thread_info = (unsigned long) task_thread_info(idle);
        sf = (struct stack_frame *) (cpu_lowcore->kernel_stack
                                     - sizeof(struct pt_regs)
                                     - sizeof(struct stack_frame));
@@ -528,6 +720,8 @@ int __cpu_up(unsigned int cpu)
        cpu_lowcore->percpu_offset = __per_cpu_offset[cpu];
        cpu_lowcore->current_task = (unsigned long) idle;
        cpu_lowcore->cpu_data.cpu_nr = cpu;
+       cpu_lowcore->softirq_pending = 0;
+       cpu_lowcore->ext_call_fast = 0;
        eieio();
 
        while (signal_processor(cpu, sigp_restart) == sigp_busy)
@@ -538,44 +732,20 @@ int __cpu_up(unsigned int cpu)
        return 0;
 }
 
-static unsigned int __initdata additional_cpus;
-static unsigned int __initdata possible_cpus;
-
-void __init smp_setup_cpu_possible_map(void)
+static int __init setup_possible_cpus(char *s)
 {
-       unsigned int phy_cpus, pos_cpus, cpu;
-
-       phy_cpus = smp_count_cpus();
-       pos_cpus = min(phy_cpus + additional_cpus, (unsigned int) NR_CPUS);
-
-       if (possible_cpus)
-               pos_cpus = min(possible_cpus, (unsigned int) NR_CPUS);
+       int pcpus, cpu;
 
-       for (cpu = 0; cpu < pos_cpus; cpu++)
+       pcpus = simple_strtoul(s, NULL, 0);
+       cpu_possible_map = cpumask_of_cpu(0);
+       for (cpu = 1; cpu < pcpus && cpu < NR_CPUS; cpu++)
                cpu_set(cpu, cpu_possible_map);
-
-       phy_cpus = min(phy_cpus, pos_cpus);
-
-       for (cpu = 0; cpu < phy_cpus; cpu++)
-               cpu_set(cpu, cpu_present_map);
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-
-static int __init setup_additional_cpus(char *s)
-{
-       additional_cpus = simple_strtoul(s, NULL, 0);
-       return 0;
-}
-early_param("additional_cpus", setup_additional_cpus);
-
-static int __init setup_possible_cpus(char *s)
-{
-       possible_cpus = simple_strtoul(s, NULL, 0);
        return 0;
 }
 early_param("possible_cpus", setup_possible_cpus);
 
+#ifdef CONFIG_HOTPLUG_CPU
+
 int __cpu_disable(void)
 {
        struct ec_creg_mask_parms cr_parms;
@@ -612,7 +782,8 @@ void __cpu_die(unsigned int cpu)
        /* Wait until target cpu is down */
        while (!smp_cpu_not_running(cpu))
                cpu_relax();
-       printk("Processor %d spun down\n", cpu);
+       smp_free_lowcore(cpu);
+       printk(KERN_INFO "Processor %d spun down\n", cpu);
 }
 
 void cpu_die(void)
@@ -625,49 +796,19 @@ void cpu_die(void)
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
-/*
- *     Cycle through the processors and setup structures.
- */
-
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-       unsigned long stack;
        unsigned int cpu;
-       int i;
+
+       smp_detect_cpus();
 
        /* request the 0x1201 emergency signal external interrupt */
        if (register_external_interrupt(0x1201, do_ext_call_interrupt) != 0)
                panic("Couldn't request external interrupt 0x1201");
        memset(lowcore_ptr, 0, sizeof(lowcore_ptr));
-       /*
-        *  Initialize prefix pages and stacks for all possible cpus
-        */
        print_cpu_info(&S390_lowcore.cpu_data);
+       smp_alloc_lowcore(smp_processor_id());
 
-       for_each_possible_cpu(i) {
-               lowcore_ptr[i] = (struct _lowcore *)
-                       __get_free_pages(GFP_KERNEL | GFP_DMA,
-                                        sizeof(void*) == 8 ? 1 : 0);
-               stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
-               if (!lowcore_ptr[i] || !stack)
-                       panic("smp_boot_cpus failed to allocate memory\n");
-
-               *(lowcore_ptr[i]) = S390_lowcore;
-               lowcore_ptr[i]->async_stack = stack + ASYNC_SIZE;
-               stack = __get_free_pages(GFP_KERNEL, 0);
-               if (!stack)
-                       panic("smp_boot_cpus failed to allocate memory\n");
-               lowcore_ptr[i]->panic_stack = stack + PAGE_SIZE;
-#ifndef CONFIG_64BIT
-               if (MACHINE_HAS_IEEE) {
-                       lowcore_ptr[i]->extended_save_area_addr =
-                               (__u32) __get_free_pages(GFP_KERNEL, 0);
-                       if (!lowcore_ptr[i]->extended_save_area_addr)
-                               panic("smp_boot_cpus failed to "
-                                     "allocate memory\n");
-               }
-#endif
-       }
 #ifndef CONFIG_64BIT
        if (MACHINE_HAS_IEEE)
                ctl_set_bit(14, 29); /* enable extended save area */
@@ -683,15 +824,17 @@ void __init smp_prepare_boot_cpu(void)
 {
        BUG_ON(smp_processor_id() != 0);
 
+       current_thread_info()->cpu = 0;
+       cpu_set(0, cpu_present_map);
        cpu_set(0, cpu_online_map);
        S390_lowcore.percpu_offset = __per_cpu_offset[0];
        current_set[0] = current;
+       smp_cpu_state[0] = CPU_STATE_CONFIGURED;
        spin_lock_init(&(&__get_cpu_var(s390_idle))->lock);
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
 {
-       cpu_present_map = cpu_possible_map;
 }
 
 /*
@@ -705,7 +848,79 @@ int setup_profiling_timer(unsigned int multiplier)
        return 0;
 }
 
-static DEFINE_PER_CPU(struct cpu, cpu_devices);
+#ifdef CONFIG_HOTPLUG_CPU
+static ssize_t cpu_configure_show(struct sys_device *dev, char *buf)
+{
+       ssize_t count;
+
+       mutex_lock(&smp_cpu_state_mutex);
+       count = sprintf(buf, "%d\n", smp_cpu_state[dev->id]);
+       mutex_unlock(&smp_cpu_state_mutex);
+       return count;
+}
+
+static ssize_t cpu_configure_store(struct sys_device *dev, const char *buf,
+                                  size_t count)
+{
+       int cpu = dev->id;
+       int val, rc;
+       char delim;
+
+       if (sscanf(buf, "%d %c", &val, &delim) != 1)
+               return -EINVAL;
+       if (val != 0 && val != 1)
+               return -EINVAL;
+
+       mutex_lock(&smp_cpu_state_mutex);
+       get_online_cpus();
+       rc = -EBUSY;
+       if (cpu_online(cpu))
+               goto out;
+       rc = 0;
+       switch (val) {
+       case 0:
+               if (smp_cpu_state[cpu] == CPU_STATE_CONFIGURED) {
+                       rc = sclp_cpu_deconfigure(__cpu_logical_map[cpu]);
+                       if (!rc)
+                               smp_cpu_state[cpu] = CPU_STATE_STANDBY;
+               }
+               break;
+       case 1:
+               if (smp_cpu_state[cpu] == CPU_STATE_STANDBY) {
+                       rc = sclp_cpu_configure(__cpu_logical_map[cpu]);
+                       if (!rc)
+                               smp_cpu_state[cpu] = CPU_STATE_CONFIGURED;
+               }
+               break;
+       default:
+               break;
+       }
+out:
+       put_online_cpus();
+       mutex_unlock(&smp_cpu_state_mutex);
+       return rc ? rc : count;
+}
+static SYSDEV_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
+#endif /* CONFIG_HOTPLUG_CPU */
+
+static ssize_t show_cpu_address(struct sys_device *dev, char *buf)
+{
+       return sprintf(buf, "%d\n", __cpu_logical_map[dev->id]);
+}
+static SYSDEV_ATTR(address, 0444, show_cpu_address, NULL);
+
+
+static struct attribute *cpu_common_attrs[] = {
+#ifdef CONFIG_HOTPLUG_CPU
+       &attr_configure.attr,
+#endif
+       &attr_address.attr,
+       NULL,
+};
+
+static struct attribute_group cpu_common_attr_group = {
+       .attrs = cpu_common_attrs,
+};
 
 static ssize_t show_capability(struct sys_device *dev, char *buf)
 {
@@ -750,15 +965,15 @@ static ssize_t show_idle_time(struct sys_device *dev, char *buf)
 }
 static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
 
-static struct attribute *cpu_attrs[] = {
+static struct attribute *cpu_online_attrs[] = {
        &attr_capability.attr,
        &attr_idle_count.attr,
        &attr_idle_time_us.attr,
        NULL,
 };
 
-static struct attribute_group cpu_attr_group = {
-       .attrs = cpu_attrs,
+static struct attribute_group cpu_online_attr_group = {
+       .attrs = cpu_online_attrs,
 };
 
 static int __cpuinit smp_cpu_notify(struct notifier_block *self,
@@ -778,12 +993,12 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
                idle->idle_time = 0;
                idle->idle_count = 0;
                spin_unlock_irq(&idle->lock);
-               if (sysfs_create_group(&s->kobj, &cpu_attr_group))
+               if (sysfs_create_group(&s->kobj, &cpu_online_attr_group))
                        return NOTIFY_BAD;
                break;
        case CPU_DEAD:
        case CPU_DEAD_FROZEN:
-               sysfs_remove_group(&s->kobj, &cpu_attr_group);
+               sysfs_remove_group(&s->kobj, &cpu_online_attr_group);
                break;
        }
        return NOTIFY_OK;
@@ -793,6 +1008,62 @@ static struct notifier_block __cpuinitdata smp_cpu_nb = {
        .notifier_call = smp_cpu_notify,
 };
 
+static int smp_add_present_cpu(int cpu)
+{
+       struct cpu *c = &per_cpu(cpu_devices, cpu);
+       struct sys_device *s = &c->sysdev;
+       int rc;
+
+       c->hotpluggable = 1;
+       rc = register_cpu(c, cpu);
+       if (rc)
+               goto out;
+       rc = sysfs_create_group(&s->kobj, &cpu_common_attr_group);
+       if (rc)
+               goto out_cpu;
+       if (!cpu_online(cpu))
+               goto out;
+       rc = sysfs_create_group(&s->kobj, &cpu_online_attr_group);
+       if (!rc)
+               return 0;
+       sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
+out_cpu:
+#ifdef CONFIG_HOTPLUG_CPU
+       unregister_cpu(c);
+#endif
+out:
+       return rc;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static ssize_t rescan_store(struct sys_device *dev, const char *buf,
+                           size_t count)
+{
+       cpumask_t newcpus;
+       int cpu;
+       int rc;
+
+       mutex_lock(&smp_cpu_state_mutex);
+       get_online_cpus();
+       newcpus = cpu_present_map;
+       rc = smp_rescan_cpus();
+       if (rc)
+               goto out;
+       cpus_andnot(newcpus, cpu_present_map, newcpus);
+       for_each_cpu_mask(cpu, newcpus) {
+               rc = smp_add_present_cpu(cpu);
+               if (rc)
+                       cpu_clear(cpu, cpu_present_map);
+       }
+       rc = 0;
+out:
+       put_online_cpus();
+       mutex_unlock(&smp_cpu_state_mutex);
+       return rc ? rc : count;
+}
+static SYSDEV_ATTR(rescan, 0200, NULL, rescan_store);
+#endif /* CONFIG_HOTPLUG_CPU */
+
 static int __init topology_init(void)
 {
        int cpu;
@@ -800,16 +1071,14 @@ static int __init topology_init(void)
 
        register_cpu_notifier(&smp_cpu_nb);
 
-       for_each_possible_cpu(cpu) {
-               struct cpu *c = &per_cpu(cpu_devices, cpu);
-               struct sys_device *s = &c->sysdev;
-
-               c->hotpluggable = 1;
-               register_cpu(c, cpu);
-               if (!cpu_online(cpu))
-                       continue;
-               s = &c->sysdev;
-               rc = sysfs_create_group(&s->kobj, &cpu_attr_group);
+#ifdef CONFIG_HOTPLUG_CPU
+       rc = sysfs_create_file(&cpu_sysdev_class.kset.kobj,
+                              &attr_rescan.attr);
+       if (rc)
+               return rc;
+#endif
+       for_each_present_cpu(cpu) {
+               rc = smp_add_present_cpu(cpu);
                if (rc)
                        return rc;
        }
index 8ed16a8..52b8342 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/reboot.h>
 #include <linux/kprobes.h>
 #include <linux/bug.h>
+#include <linux/utsname.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -168,9 +169,16 @@ void show_stack(struct task_struct *task, unsigned long *sp)
  */
 void dump_stack(void)
 {
+       printk("CPU: %d %s %s %.*s\n",
+              task_thread_info(current)->cpu, print_tainted(),
+              init_utsname()->release,
+              (int)strcspn(init_utsname()->version, " "),
+              init_utsname()->version);
+       printk("Process %s (pid: %d, task: %p, ksp: %p)\n",
+              current->comm, current->pid, current,
+              (void *) current->thread.ksp);
        show_stack(NULL, NULL);
 }
-
 EXPORT_SYMBOL(dump_stack);
 
 static inline int mask_bits(struct pt_regs *regs, unsigned long bits)
@@ -258,8 +266,14 @@ void die(const char * str, struct pt_regs * regs, long err)
        console_verbose();
        spin_lock_irq(&die_lock);
        bust_spinlocks(1);
-       printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
-       print_modules();
+       printk("%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
+#ifdef CONFIG_PREEMPT
+       printk("PREEMPT ");
+#endif
+#ifdef CONFIG_SMP
+       printk("SMP");
+#endif
+       printk("\n");
        notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV);
        show_regs(regs);
        bust_spinlocks(0);
index 849120e..9361591 100644 (file)
@@ -17,6 +17,12 @@ ENTRY(_start)
 jiffies = jiffies_64;
 #endif
 
+PHDRS {
+       text PT_LOAD FLAGS(5);  /* R_E */
+       data PT_LOAD FLAGS(7);  /* RWE */
+       note PT_NOTE FLAGS(0);  /* ___ */
+}
+
 SECTIONS
 {
        . = 0x00000000;
@@ -33,6 +39,9 @@ SECTIONS
 
        _etext = .;             /* End of text section */
 
+       NOTES :text :note
+       BUG_TABLE :text
+
        RODATA
 
 #ifdef CONFIG_SHARED_KERNEL
@@ -49,9 +58,6 @@ SECTIONS
                __stop___ex_table = .;
        }
 
-       NOTES
-       BUG_TABLE
-
        .data : {               /* Data */
                DATA_DATA
                CONSTRUCTORS
index 8d76403..e41f400 100644 (file)
@@ -39,7 +39,7 @@ static inline void _raw_yield_cpu(int cpu)
                _raw_yield();
 }
 
-void _raw_spin_lock_wait(raw_spinlock_t *lp, unsigned int pc)
+void _raw_spin_lock_wait(raw_spinlock_t *lp)
 {
        int count = spin_retry;
        unsigned int cpu = ~smp_processor_id();
@@ -53,15 +53,36 @@ void _raw_spin_lock_wait(raw_spinlock_t *lp, unsigned int pc)
                }
                if (__raw_spin_is_locked(lp))
                        continue;
-               if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) {
-                       lp->owner_pc = pc;
+               if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
                        return;
-               }
        }
 }
 EXPORT_SYMBOL(_raw_spin_lock_wait);
 
-int _raw_spin_trylock_retry(raw_spinlock_t *lp, unsigned int pc)
+void _raw_spin_lock_wait_flags(raw_spinlock_t *lp, unsigned long flags)
+{
+       int count = spin_retry;
+       unsigned int cpu = ~smp_processor_id();
+
+       local_irq_restore(flags);
+       while (1) {
+               if (count-- <= 0) {
+                       unsigned int owner = lp->owner_cpu;
+                       if (owner != 0)
+                               _raw_yield_cpu(~owner);
+                       count = spin_retry;
+               }
+               if (__raw_spin_is_locked(lp))
+                       continue;
+               local_irq_disable();
+               if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
+                       return;
+               local_irq_restore(flags);
+       }
+}
+EXPORT_SYMBOL(_raw_spin_lock_wait_flags);
+
+int _raw_spin_trylock_retry(raw_spinlock_t *lp)
 {
        unsigned int cpu = ~smp_processor_id();
        int count;
@@ -69,10 +90,8 @@ int _raw_spin_trylock_retry(raw_spinlock_t *lp, unsigned int pc)
        for (count = spin_retry; count > 0; count--) {
                if (__raw_spin_is_locked(lp))
                        continue;
-               if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) {
-                       lp->owner_pc = pc;
+               if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0)
                        return 1;
-               }
        }
        return 0;
 }
index 394980b..880b0eb 100644 (file)
@@ -83,7 +83,7 @@ struct dcss_segment {
 };
 
 static DEFINE_MUTEX(dcss_lock);
-static struct list_head dcss_list = LIST_HEAD_INIT(dcss_list);
+static LIST_HEAD(dcss_list);
 static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC",
                                        "EW/EN-MIXED" };
 
index fb9c5a8..79d13a1 100644 (file)
 #include <asm/setup.h>
 #include <asm/tlbflush.h>
 
-unsigned long vmalloc_end;
-EXPORT_SYMBOL(vmalloc_end);
-
-static struct page *vmem_map;
 static DEFINE_MUTEX(vmem_mutex);
 
 struct memory_segment {
@@ -188,8 +184,8 @@ static int vmem_add_mem_map(unsigned long start, unsigned long size)
        pte_t  pte;
        int ret = -ENOMEM;
 
-       map_start = vmem_map + PFN_DOWN(start);
-       map_end = vmem_map + PFN_DOWN(start + size);
+       map_start = VMEM_MAP + PFN_DOWN(start);
+       map_end = VMEM_MAP + PFN_DOWN(start + size);
 
        start_addr = (unsigned long) map_start & PAGE_MASK;
        end_addr = PFN_ALIGN((unsigned long) map_end);
@@ -240,10 +236,10 @@ static int vmem_add_mem(unsigned long start, unsigned long size)
 {
        int ret;
 
-       ret = vmem_add_range(start, size);
+       ret = vmem_add_mem_map(start, size);
        if (ret)
                return ret;
-       return vmem_add_mem_map(start, size);
+       return vmem_add_range(start, size);
 }
 
 /*
@@ -254,7 +250,7 @@ static int insert_memory_segment(struct memory_segment *seg)
 {
        struct memory_segment *tmp;
 
-       if (PFN_DOWN(seg->start + seg->size) > max_pfn ||
+       if (seg->start + seg->size >= VMALLOC_START ||
            seg->start + seg->size < seg->start)
                return -ERANGE;
 
@@ -357,17 +353,15 @@ out:
 
 /*
  * map whole physical memory to virtual memory (identity mapping)
+ * we reserve enough space in the vmalloc area for vmemmap to hotplug
+ * additional memory segments.
  */
 void __init vmem_map_init(void)
 {
-       unsigned long map_size;
        int i;
 
-       map_size = ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page);
-       vmalloc_end = PFN_ALIGN(VMALLOC_END_INIT) - PFN_ALIGN(map_size);
-       vmem_map = (struct page *) vmalloc_end;
-       NODE_DATA(0)->node_mem_map = vmem_map;
-
+       BUILD_BUG_ON((unsigned long)VMEM_MAP + VMEM_MAP_SIZE > VMEM_MAP_MAX);
+       NODE_DATA(0)->node_mem_map = VMEM_MAP;
        for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
                vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
 }
@@ -382,7 +376,7 @@ static int __init vmem_convert_memory_chunk(void)
        int i;
 
        mutex_lock(&vmem_mutex);
-       for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+       for (i = 0; i < MEMORY_CHUNKS; i++) {
                if (!memory_chunk[i].size)
                        continue;
                seg = kzalloc(sizeof(*seg), GFP_KERNEL);
index 8a70a9e..6b658d8 100644 (file)
@@ -48,8 +48,6 @@ config CRYPTO_DEV_PADLOCK_SHA
          If unsure say M. The compiled module will be
          called padlock-sha.ko
 
-source "arch/s390/crypto/Kconfig"
-
 config CRYPTO_DEV_GEODE
        tristate "Support for the Geode LX AES engine"
        depends on X86_32 && PCI
@@ -83,6 +81,67 @@ config ZCRYPT_MONOLITHIC
          that contains all parts of the crypto device driver (ap bus,
          request router and all the card drivers).
 
+config CRYPTO_SHA1_S390
+       tristate "SHA1 digest algorithm"
+       depends on S390
+       select CRYPTO_ALGAPI
+       help
+         This is the s390 hardware accelerated implementation of the
+         SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
+
+config CRYPTO_SHA256_S390
+       tristate "SHA256 digest algorithm"
+       depends on S390
+       select CRYPTO_ALGAPI
+       help
+         This is the s390 hardware accelerated implementation of the
+         SHA256 secure hash standard (DFIPS 180-2).
+
+         This version of SHA implements a 256 bit hash with 128 bits of
+         security against collision attacks.
+
+config CRYPTO_DES_S390
+       tristate "DES and Triple DES cipher algorithms"
+       depends on S390
+       select CRYPTO_ALGAPI
+       select CRYPTO_BLKCIPHER
+       help
+         This us the s390 hardware accelerated implementation of the
+         DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
+
+config CRYPTO_AES_S390
+       tristate "AES cipher algorithms"
+       depends on S390
+       select CRYPTO_ALGAPI
+       select CRYPTO_BLKCIPHER
+       help
+         This is the s390 hardware accelerated implementation of the
+         AES cipher algorithms (FIPS-197). AES uses the Rijndael
+         algorithm.
+
+         Rijndael appears to be consistently a very good performer in
+         both hardware and software across a wide range of computing
+         environments regardless of its use in feedback or non-feedback
+         modes. Its key setup time is excellent, and its key agility is
+         good. Rijndael's very low memory requirements make it very well
+         suited for restricted-space environments, in which it also
+         demonstrates excellent performance. Rijndael's operations are
+         among the easiest to defend against power and timing attacks.
+
+         On s390 the System z9-109 currently only supports the key size
+         of 128 bit.
+
+config S390_PRNG
+       tristate "Pseudo random number generator device driver"
+       depends on S390
+       default "m"
+       help
+         Select this option if you want to use the s390 pseudo random number
+         generator. The PRNG is part of the cryptographic processor functions
+         and uses triple-DES to generate secure random numbers like the
+         ANSI X9.17 standard. The PRNG is usable via the char device
+         /dev/prandom.
+
 config CRYPTO_DEV_HIFN_795X
        tristate "Driver HIFN 795x crypto accelerator chips"
        select CRYPTO_DES
index be9f22d..0a89e08 100644 (file)
@@ -2,8 +2,8 @@
 # S/390 block devices
 #
 
-dasd_eckd_mod-objs := dasd_eckd.o dasd_3990_erp.o dasd_9343_erp.o
-dasd_fba_mod-objs  := dasd_fba.o dasd_3370_erp.o dasd_9336_erp.o
+dasd_eckd_mod-objs := dasd_eckd.o dasd_3990_erp.o dasd_alias.o
+dasd_fba_mod-objs  := dasd_fba.o
 dasd_diag_mod-objs := dasd_diag.o
 dasd_mod-objs      := dasd.o dasd_ioctl.o dasd_proc.o dasd_devmap.o \
                        dasd_genhd.o dasd_erp.o
index e6bfce6..1db15f3 100644 (file)
@@ -48,13 +48,15 @@ MODULE_LICENSE("GPL");
 /*
  * SECTION: prototypes for static functions of dasd.c
  */
-static int  dasd_alloc_queue(struct dasd_device * device);
-static void dasd_setup_queue(struct dasd_device * device);
-static void dasd_free_queue(struct dasd_device * device);
-static void dasd_flush_request_queue(struct dasd_device *);
-static int dasd_flush_ccw_queue(struct dasd_device *, int);
-static void dasd_tasklet(struct dasd_device *);
+static int  dasd_alloc_queue(struct dasd_block *);
+static void dasd_setup_queue(struct dasd_block *);
+static void dasd_free_queue(struct dasd_block *);
+static void dasd_flush_request_queue(struct dasd_block *);
+static int dasd_flush_block_queue(struct dasd_block *);
+static void dasd_device_tasklet(struct dasd_device *);
+static void dasd_block_tasklet(struct dasd_block *);
 static void do_kick_device(struct work_struct *);
+static void dasd_return_cqr_cb(struct dasd_ccw_req *, void *);
 
 /*
  * SECTION: Operations on the device structure.
@@ -65,26 +67,23 @@ static wait_queue_head_t dasd_flush_wq;
 /*
  * Allocate memory for a new device structure.
  */
-struct dasd_device *
-dasd_alloc_device(void)
+struct dasd_device *dasd_alloc_device(void)
 {
        struct dasd_device *device;
 
-       device = kzalloc(sizeof (struct dasd_device), GFP_ATOMIC);
-       if (device == NULL)
+       device = kzalloc(sizeof(struct dasd_device), GFP_ATOMIC);
+       if (!device)
                return ERR_PTR(-ENOMEM);
-       /* open_count = 0 means device online but not in use */
-       atomic_set(&device->open_count, -1);
 
        /* Get two pages for normal block device operations. */
        device->ccw_mem = (void *) __get_free_pages(GFP_ATOMIC | GFP_DMA, 1);
-       if (device->ccw_mem == NULL) {
+       if (!device->ccw_mem) {
                kfree(device);
                return ERR_PTR(-ENOMEM);
        }
        /* Get one page for error recovery. */
        device->erp_mem = (void *) get_zeroed_page(GFP_ATOMIC | GFP_DMA);
-       if (device->erp_mem == NULL) {
+       if (!device->erp_mem) {
                free_pages((unsigned long) device->ccw_mem, 1);
                kfree(device);
                return ERR_PTR(-ENOMEM);
@@ -93,10 +92,9 @@ dasd_alloc_device(void)
        dasd_init_chunklist(&device->ccw_chunks, device->ccw_mem, PAGE_SIZE*2);
        dasd_init_chunklist(&device->erp_chunks, device->erp_mem, PAGE_SIZE);
        spin_lock_init(&device->mem_lock);
-       spin_lock_init(&device->request_queue_lock);
-       atomic_set (&device->tasklet_scheduled, 0);
+       atomic_set(&device->tasklet_scheduled, 0);
        tasklet_init(&device->tasklet,
-                    (void (*)(unsigned long)) dasd_tasklet,
+                    (void (*)(unsigned long)) dasd_device_tasklet,
                     (unsigned long) device);
        INIT_LIST_HEAD(&device->ccw_queue);
        init_timer(&device->timer);
@@ -110,8 +108,7 @@ dasd_alloc_device(void)
 /*
  * Free memory of a device structure.
  */
-void
-dasd_free_device(struct dasd_device *device)
+void dasd_free_device(struct dasd_device *device)
 {
        kfree(device->private);
        free_page((unsigned long) device->erp_mem);
@@ -119,11 +116,43 @@ dasd_free_device(struct dasd_device *device)
        kfree(device);
 }
 
+/*
+ * Allocate memory for a new device structure.
+ */
+struct dasd_block *dasd_alloc_block(void)
+{
+       struct dasd_block *block;
+
+       block = kzalloc(sizeof(*block), GFP_ATOMIC);
+       if (!block)
+               return ERR_PTR(-ENOMEM);
+       /* open_count = 0 means device online but not in use */
+       atomic_set(&block->open_count, -1);
+
+       spin_lock_init(&block->request_queue_lock);
+       atomic_set(&block->tasklet_scheduled, 0);
+       tasklet_init(&block->tasklet,
+                    (void (*)(unsigned long)) dasd_block_tasklet,
+                    (unsigned long) block);
+       INIT_LIST_HEAD(&block->ccw_queue);
+       spin_lock_init(&block->queue_lock);
+       init_timer(&block->timer);
+
+       return block;
+}
+
+/*
+ * Free memory of a device structure.
+ */
+void dasd_free_block(struct dasd_block *block)
+{
+       kfree(block);
+}
+
 /*
  * Make a new device known to the system.
  */
-static int
-dasd_state_new_to_known(struct dasd_device *device)
+static int dasd_state_new_to_known(struct dasd_device *device)
 {
        int rc;
 
@@ -133,12 +162,13 @@ dasd_state_new_to_known(struct dasd_device *device)
         */
        dasd_get_device(device);
 
-       rc = dasd_alloc_queue(device);
-       if (rc) {
-               dasd_put_device(device);
-               return rc;
+       if (device->block) {
+               rc = dasd_alloc_queue(device->block);
+               if (rc) {
+                       dasd_put_device(device);
+                       return rc;
+               }
        }
-
        device->state = DASD_STATE_KNOWN;
        return 0;
 }
@@ -146,21 +176,24 @@ dasd_state_new_to_known(struct dasd_device *device)
 /*
  * Let the system forget about a device.
  */
-static int
-dasd_state_known_to_new(struct dasd_device * device)
+static int dasd_state_known_to_new(struct dasd_device *device)
 {
        /* Disable extended error reporting for this device. */
        dasd_eer_disable(device);
        /* Forget the discipline information. */
-       if (device->discipline)
+       if (device->discipline) {
+               if (device->discipline->uncheck_device)
+                       device->discipline->uncheck_device(device);
                module_put(device->discipline->owner);
+       }
        device->discipline = NULL;
        if (device->base_discipline)
                module_put(device->base_discipline->owner);
        device->base_discipline = NULL;
        device->state = DASD_STATE_NEW;
 
-       dasd_free_queue(device);
+       if (device->block)
+               dasd_free_queue(device->block);
 
        /* Give up reference we took in dasd_state_new_to_known. */
        dasd_put_device(device);
@@ -170,19 +203,19 @@ dasd_state_known_to_new(struct dasd_device * device)
 /*
  * Request the irq line for the device.
  */
-static int
-dasd_state_known_to_basic(struct dasd_device * device)
+static int dasd_state_known_to_basic(struct dasd_device *device)
 {
        int rc;
 
        /* Allocate and register gendisk structure. */
-       rc = dasd_gendisk_alloc(device);
-       if (rc)
-               return rc;
-
+       if (device->block) {
+               rc = dasd_gendisk_alloc(device->block);
+               if (rc)
+                       return rc;
+       }
        /* register 'device' debug area, used for all DBF_DEV_XXX calls */
-       device->debug_area = debug_register(device->cdev->dev.bus_id, 1, 2,
-                                           8 * sizeof (long));
+       device->debug_area = debug_register(device->cdev->dev.bus_id, 1, 1,
+                                           8 * sizeof(long));
        debug_register_view(device->debug_area, &debug_sprintf_view);
        debug_set_level(device->debug_area, DBF_WARNING);
        DBF_DEV_EVENT(DBF_EMERG, device, "%s", "debug area created");
@@ -194,16 +227,17 @@ dasd_state_known_to_basic(struct dasd_device * device)
 /*
  * Release the irq line for the device. Terminate any running i/o.
  */
-static int
-dasd_state_basic_to_known(struct dasd_device * device)
+static int dasd_state_basic_to_known(struct dasd_device *device)
 {
        int rc;
-
-       dasd_gendisk_free(device);
-       rc = dasd_flush_ccw_queue(device, 1);
+       if (device->block) {
+               dasd_gendisk_free(device->block);
+               dasd_block_clear_timer(device->block);
+       }
+       rc = dasd_flush_device_queue(device);
        if (rc)
                return rc;
-       dasd_clear_timer(device);
+       dasd_device_clear_timer(device);
 
        DBF_DEV_EVENT(DBF_EMERG, device, "%p debug area deleted", device);
        if (device->debug_area != NULL) {
@@ -228,26 +262,32 @@ dasd_state_basic_to_known(struct dasd_device * device)
  * In case the analysis returns an error, the device setup is stopped
  * (a fake disk was already added to allow formatting).
  */
-static int
-dasd_state_basic_to_ready(struct dasd_device * device)
+static int dasd_state_basic_to_ready(struct dasd_device *device)
 {
        int rc;
+       struct dasd_block *block;
 
        rc = 0;
-       if (device->discipline->do_analysis != NULL)
-               rc = device->discipline->do_analysis(device);
-       if (rc) {
-               if (rc != -EAGAIN)
-                       device->state = DASD_STATE_UNFMT;
-               return rc;
-       }
+       block = device->block;
        /* make disk known with correct capacity */
-       dasd_setup_queue(device);
-       set_capacity(device->gdp, device->blocks << device->s2b_shift);
-       device->state = DASD_STATE_READY;
-       rc = dasd_scan_partitions(device);
-       if (rc)
-               device->state = DASD_STATE_BASIC;
+       if (block) {
+               if (block->base->discipline->do_analysis != NULL)
+                       rc = block->base->discipline->do_analysis(block);
+               if (rc) {
+                       if (rc != -EAGAIN)
+                               device->state = DASD_STATE_UNFMT;
+                       return rc;
+               }
+               dasd_setup_queue(block);
+               set_capacity(block->gdp,
+                            block->blocks << block->s2b_shift);
+               device->state = DASD_STATE_READY;
+               rc = dasd_scan_partitions(block);
+               if (rc)
+                       device->state = DASD_STATE_BASIC;
+       } else {
+               device->state = DASD_STATE_READY;
+       }
        return rc;
 }
 
@@ -256,28 +296,31 @@ dasd_state_basic_to_ready(struct dasd_device * device)
  * Forget format information. Check if the target level is basic
  * and if it is create fake disk for formatting.
  */
-static int
-dasd_state_ready_to_basic(struct dasd_device * device)
+static int dasd_state_ready_to_basic(struct dasd_device *device)
 {
        int rc;
 
-       rc = dasd_flush_ccw_queue(device, 0);
-       if (rc)
-               return rc;
-       dasd_destroy_partitions(device);
-       dasd_flush_request_queue(device);
-       device->blocks = 0;
-       device->bp_block = 0;
-       device->s2b_shift = 0;
        device->state = DASD_STATE_BASIC;
+       if (device->block) {
+               struct dasd_block *block = device->block;
+               rc = dasd_flush_block_queue(block);
+               if (rc) {
+                       device->state = DASD_STATE_READY;
+                       return rc;
+               }
+               dasd_destroy_partitions(block);
+               dasd_flush_request_queue(block);
+               block->blocks = 0;
+               block->bp_block = 0;
+               block->s2b_shift = 0;
+       }
        return 0;
 }
 
 /*
  * Back to basic.
  */
-static int
-dasd_state_unfmt_to_basic(struct dasd_device * device)
+static int dasd_state_unfmt_to_basic(struct dasd_device *device)
 {
        device->state = DASD_STATE_BASIC;
        return 0;
@@ -291,17 +334,31 @@ dasd_state_unfmt_to_basic(struct dasd_device * device)
 static int
 dasd_state_ready_to_online(struct dasd_device * device)
 {
+       int rc;
+
+       if (device->discipline->ready_to_online) {
+               rc = device->discipline->ready_to_online(device);
+               if (rc)
+                       return rc;
+       }
        device->state = DASD_STATE_ONLINE;
-       dasd_schedule_bh(device);
+       if (device->block)
+               dasd_schedule_block_bh(device->block);
        return 0;
 }
 
 /*
  * Stop the requeueing of requests again.
  */
-static int
-dasd_state_online_to_ready(struct dasd_device * device)
+static int dasd_state_online_to_ready(struct dasd_device *device)
 {
+       int rc;
+
+       if (device->discipline->online_to_ready) {
+               rc = device->discipline->online_to_ready(device);
+               if (rc)
+                       return rc;
+       }
        device->state = DASD_STATE_READY;
        return 0;
 }
@@ -309,8 +366,7 @@ dasd_state_online_to_ready(struct dasd_device * device)
 /*
  * Device startup state changes.
  */
-static int
-dasd_increase_state(struct dasd_device *device)
+static int dasd_increase_state(struct dasd_device *device)
 {
        int rc;
 
@@ -345,8 +401,7 @@ dasd_increase_state(struct dasd_device *device)
 /*
  * Device shutdown state changes.
  */
-static int
-dasd_decrease_state(struct dasd_device *device)
+static int dasd_decrease_state(struct dasd_device *device)
 {
        int rc;
 
@@ -381,8 +436,7 @@ dasd_decrease_state(struct dasd_device *device)
 /*
  * This is the main startup/shutdown routine.
  */
-static void
-dasd_change_state(struct dasd_device *device)
+static void dasd_change_state(struct dasd_device *device)
 {
         int rc;
 
@@ -409,17 +463,15 @@ dasd_change_state(struct dasd_device *device)
  * dasd_kick_device will schedule a call do do_kick_device to the kernel
  * event daemon.
  */
-static void
-do_kick_device(struct work_struct *work)
+static void do_kick_device(struct work_struct *work)
 {
        struct dasd_device *device = container_of(work, struct dasd_device, kick_work);
        dasd_change_state(device);
-       dasd_schedule_bh(device);
+       dasd_schedule_device_bh(device);
        dasd_put_device(device);
 }
 
-void
-dasd_kick_device(struct dasd_device *device)
+void dasd_kick_device(struct dasd_device *device)
 {
        dasd_get_device(device);
        /* queue call to dasd_kick_device to the kernel event daemon. */
@@ -429,8 +481,7 @@ dasd_kick_device(struct dasd_device *device)
 /*
  * Set the target state for a device and starts the state change.
  */
-void
-dasd_set_target_state(struct dasd_device *device, int target)
+void dasd_set_target_state(struct dasd_device *device, int target)
 {
        /* If we are in probeonly mode stop at DASD_STATE_READY. */
        if (dasd_probeonly && target > DASD_STATE_READY)
@@ -447,14 +498,12 @@ dasd_set_target_state(struct dasd_device *device, int target)
 /*
  * Enable devices with device numbers in [from..to].
  */
-static inline int
-_wait_for_device(struct dasd_device *device)
+static inline int _wait_for_device(struct dasd_device *device)
 {
        return (device->state == device->target);
 }
 
-void
-dasd_enable_device(struct dasd_device *device)
+void dasd_enable_device(struct dasd_device *device)
 {
        dasd_set_target_state(device, DASD_STATE_ONLINE);
        if (device->state <= DASD_STATE_KNOWN)
@@ -475,20 +524,20 @@ unsigned int dasd_profile_level = DASD_PROFILE_OFF;
 /*
  * Increments counter in global and local profiling structures.
  */
-#define dasd_profile_counter(value, counter, device) \
+#define dasd_profile_counter(value, counter, block) \
 { \
        int index; \
        for (index = 0; index < 31 && value >> (2+index); index++); \
        dasd_global_profile.counter[index]++; \
-       device->profile.counter[index]++; \
+       block->profile.counter[index]++; \
 }
 
 /*
  * Add profiling information for cqr before execution.
  */
-static void
-dasd_profile_start(struct dasd_device *device, struct dasd_ccw_req * cqr,
-                  struct request *req)
+static void dasd_profile_start(struct dasd_block *block,
+                              struct dasd_ccw_req *cqr,
+                              struct request *req)
 {
        struct list_head *l;
        unsigned int counter;
@@ -498,19 +547,19 @@ dasd_profile_start(struct dasd_device *device, struct dasd_ccw_req * cqr,
 
        /* count the length of the chanq for statistics */
        counter = 0;
-       list_for_each(l, &device->ccw_queue)
+       list_for_each(l, &block->ccw_queue)
                if (++counter >= 31)
                        break;
        dasd_global_profile.dasd_io_nr_req[counter]++;
-       device->profile.dasd_io_nr_req[counter]++;
+       block->profile.dasd_io_nr_req[counter]++;
 }
 
 /*
  * Add profiling information for cqr after execution.
  */
-static void
-dasd_profile_end(struct dasd_device *device, struct dasd_ccw_req * cqr,
-                struct request *req)
+static void dasd_profile_end(struct dasd_block *block,
+                            struct dasd_ccw_req *cqr,
+                            struct request *req)
 {
        long strtime, irqtime, endtime, tottime;        /* in microseconds */
        long tottimeps, sectors;
@@ -532,27 +581,27 @@ dasd_profile_end(struct dasd_device *device, struct dasd_ccw_req * cqr,
 
        if (!dasd_global_profile.dasd_io_reqs)
                memset(&dasd_global_profile, 0,
-                      sizeof (struct dasd_profile_info_t));
+                      sizeof(struct dasd_profile_info_t));
        dasd_global_profile.dasd_io_reqs++;
        dasd_global_profile.dasd_io_sects += sectors;
 
-       if (!device->profile.dasd_io_reqs)
-               memset(&device->profile, 0,
-                      sizeof (struct dasd_profile_info_t));
-       device->profile.dasd_io_reqs++;
-       device->profile.dasd_io_sects += sectors;
+       if (!block->profile.dasd_io_reqs)
+               memset(&block->profile, 0,
+                      sizeof(struct dasd_profile_info_t));
+       block->profile.dasd_io_reqs++;
+       block->profile.dasd_io_sects += sectors;
 
-       dasd_profile_counter(sectors, dasd_io_secs, device);
-       dasd_profile_counter(tottime, dasd_io_times, device);
-       dasd_profile_counter(tottimeps, dasd_io_timps, device);
-       dasd_profile_counter(strtime, dasd_io_time1, device);
-       dasd_profile_counter(irqtime, dasd_io_time2, device);
-       dasd_profile_counter(irqtime / sectors, dasd_io_time2ps, device);
-       dasd_profile_counter(endtime, dasd_io_time3, device);
+       dasd_profile_counter(sectors, dasd_io_secs, block);
+       dasd_profile_counter(tottime, dasd_io_times, block);
+       dasd_profile_counter(tottimeps, dasd_io_timps, block);
+       dasd_profile_counter(strtime, dasd_io_time1, block);
+       dasd_profile_counter(irqtime, dasd_io_time2, block);
+       dasd_profile_counter(irqtime / sectors, dasd_io_time2ps, block);
+       dasd_profile_counter(endtime, dasd_io_time3, block);
 }
 #else
-#define dasd_profile_start(device, cqr, req) do {} while (0)
-#define dasd_profile_end(device, cqr, req) do {} while (0)
+#define dasd_profile_start(block, cqr, req) do {} while (0)
+#define dasd_profile_end(block, cqr, req) do {} while (0)
 #endif                         /* CONFIG_DASD_PROFILE */
 
 /*
@@ -562,9 +611,9 @@ dasd_profile_end(struct dasd_device *device, struct dasd_ccw_req * cqr,
  * memory and 2) dasd_smalloc_request uses the static ccw memory
  * that gets allocated for each device.
  */
-struct dasd_ccw_req *
-dasd_kmalloc_request(char *magic, int cplength, int datasize,
-                  struct dasd_device * device)
+struct dasd_ccw_req *dasd_kmalloc_request(char *magic, int cplength,
+                                         int datasize,
+                                         struct dasd_device *device)
 {
        struct dasd_ccw_req *cqr;
 
@@ -600,9 +649,9 @@ dasd_kmalloc_request(char *magic, int cplength, int datasize,
        return cqr;
 }
 
-struct dasd_ccw_req *
-dasd_smalloc_request(char *magic, int cplength, int datasize,
-                  struct dasd_device * device)
+struct dasd_ccw_req *dasd_smalloc_request(char *magic, int cplength,
+                                         int datasize,
+                                         struct dasd_device *device)
 {
        unsigned long flags;
        struct dasd_ccw_req *cqr;
@@ -649,8 +698,7 @@ dasd_smalloc_request(char *magic, int cplength, int datasize,
  * idal lists that might have been created by dasd_set_cda and the
  * struct dasd_ccw_req itself.
  */
-void
-dasd_kfree_request(struct dasd_ccw_req * cqr, struct dasd_device * device)
+void dasd_kfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device)
 {
 #ifdef CONFIG_64BIT
        struct ccw1 *ccw;
@@ -667,8 +715,7 @@ dasd_kfree_request(struct dasd_ccw_req * cqr, struct dasd_device * device)
        dasd_put_device(device);
 }
 
-void
-dasd_sfree_request(struct dasd_ccw_req * cqr, struct dasd_device * device)
+void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device)
 {
        unsigned long flags;
 
@@ -681,14 +728,13 @@ dasd_sfree_request(struct dasd_ccw_req * cqr, struct dasd_device * device)
 /*
  * Check discipline magic in cqr.
  */
-static inline int
-dasd_check_cqr(struct dasd_ccw_req *cqr)
+static inline int dasd_check_cqr(struct dasd_ccw_req *cqr)
 {
        struct dasd_device *device;
 
        if (cqr == NULL)
                return -EINVAL;
-       device = cqr->device;
+       device = cqr->startdev;
        if (strncmp((char *) &cqr->magic, device->discipline->ebcname, 4)) {
                DEV_MESSAGE(KERN_WARNING, device,
                            " dasd_ccw_req 0x%08x magic doesn't match"
@@ -706,8 +752,7 @@ dasd_check_cqr(struct dasd_ccw_req *cqr)
  * ccw_device_clear can fail if the i/o subsystem
  * is in a bad mood.
  */
-int
-dasd_term_IO(struct dasd_ccw_req * cqr)
+int dasd_term_IO(struct dasd_ccw_req *cqr)
 {
        struct dasd_device *device;
        int retries, rc;
@@ -717,13 +762,13 @@ dasd_term_IO(struct dasd_ccw_req * cqr)
        if (rc)
                return rc;
        retries = 0;
-       device = (struct dasd_device *) cqr->device;
+       device = (struct dasd_device *) cqr->startdev;
        while ((retries < 5) && (cqr->status == DASD_CQR_IN_IO)) {
                rc = ccw_device_clear(device->cdev, (long) cqr);
                switch (rc) {
                case 0: /* termination successful */
                        cqr->retries--;
-                       cqr->status = DASD_CQR_CLEAR;
+                       cqr->status = DASD_CQR_CLEAR_PENDING;
                        cqr->stopclk = get_clock();
                        cqr->starttime = 0;
                        DBF_DEV_EVENT(DBF_DEBUG, device,
@@ -753,7 +798,7 @@ dasd_term_IO(struct dasd_ccw_req * cqr)
                }
                retries++;
        }
-       dasd_schedule_bh(device);
+       dasd_schedule_device_bh(device);
        return rc;
 }
 
@@ -761,8 +806,7 @@ dasd_term_IO(struct dasd_ccw_req * cqr)
  * Start the i/o. This start_IO can fail if the channel is really busy.
  * In that case set up a timer to start the request later.
  */
-int
-dasd_start_IO(struct dasd_ccw_req * cqr)
+int dasd_start_IO(struct dasd_ccw_req *cqr)
 {
        struct dasd_device *device;
        int rc;
@@ -771,12 +815,12 @@ dasd_start_IO(struct dasd_ccw_req * cqr)
        rc = dasd_check_cqr(cqr);
        if (rc)
                return rc;
-       device = (struct dasd_device *) cqr->device;
+       device = (struct dasd_device *) cqr->startdev;
        if (cqr->retries < 0) {
                DEV_MESSAGE(KERN_DEBUG, device,
                            "start_IO: request %p (%02x/%i) - no retry left.",
                            cqr, cqr->status, cqr->retries);
-               cqr->status = DASD_CQR_FAILED;
+               cqr->status = DASD_CQR_ERROR;
                return -EIO;
        }
        cqr->startclk = get_clock();
@@ -833,8 +877,7 @@ dasd_start_IO(struct dasd_ccw_req * cqr)
  * The head of the ccw queue will have status DASD_CQR_IN_IO for 1),
  * DASD_CQR_QUEUED for 2) and 3).
  */
-static void
-dasd_timeout_device(unsigned long ptr)
+static void dasd_device_timeout(unsigned long ptr)
 {
        unsigned long flags;
        struct dasd_device *device;
@@ -844,14 +887,13 @@ dasd_timeout_device(unsigned long ptr)
        /* re-activate request queue */
         device->stopped &= ~DASD_STOPPED_PENDING;
        spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
-       dasd_schedule_bh(device);
+       dasd_schedule_device_bh(device);
 }
 
 /*
  * Setup timeout for a device in jiffies.
  */
-void
-dasd_set_timer(struct dasd_device *device, int expires)
+void dasd_device_set_timer(struct dasd_device *device, int expires)
 {
        if (expires == 0) {
                if (timer_pending(&device->timer))
@@ -862,7 +904,7 @@ dasd_set_timer(struct dasd_device *device, int expires)
                if (mod_timer(&device->timer, jiffies + expires))
                        return;
        }
-       device->timer.function = dasd_timeout_device;
+       device->timer.function = dasd_device_timeout;
        device->timer.data = (unsigned long) device;
        device->timer.expires = jiffies + expires;
        add_timer(&device->timer);
@@ -871,15 +913,14 @@ dasd_set_timer(struct dasd_device *device, int expires)
 /*
  * Clear timeout for a device.
  */
-void
-dasd_clear_timer(struct dasd_device *device)
+void dasd_device_clear_timer(struct dasd_device *device)
 {
        if (timer_pending(&device->timer))
                del_timer(&device->timer);
 }
 
-static void
-dasd_handle_killed_request(struct ccw_device *cdev, unsigned long intparm)
+static void dasd_handle_killed_request(struct ccw_device *cdev,
+                                      unsigned long intparm)
 {
        struct dasd_ccw_req *cqr;
        struct dasd_device *device;
@@ -893,7 +934,7 @@ dasd_handle_killed_request(struct ccw_device *cdev, unsigned long intparm)
                return;
        }
 
-       device = (struct dasd_device *) cqr->device;
+       device = (struct dasd_device *) cqr->startdev;
        if (device == NULL ||
            device != dasd_device_from_cdev_locked(cdev) ||
            strncmp(device->discipline->ebcname, (char *) &cqr->magic, 4)) {
@@ -905,46 +946,32 @@ dasd_handle_killed_request(struct ccw_device *cdev, unsigned long intparm)
        /* Schedule request to be retried. */
        cqr->status = DASD_CQR_QUEUED;
 
-       dasd_clear_timer(device);
-       dasd_schedule_bh(device);
+       dasd_device_clear_timer(device);
+       dasd_schedule_device_bh(device);
        dasd_put_device(device);
 }
 
-static void
-dasd_handle_state_change_pending(struct dasd_device *device)
+void dasd_generic_handle_state_change(struct dasd_device *device)
 {
-       struct dasd_ccw_req *cqr;
-       struct list_head *l, *n;
-
        /* First of all start sense subsystem status request. */
        dasd_eer_snss(device);
 
        device->stopped &= ~DASD_STOPPED_PENDING;
-
-        /* restart all 'running' IO on queue */
-       list_for_each_safe(l, n, &device->ccw_queue) {
-               cqr = list_entry(l, struct dasd_ccw_req, list);
-                if (cqr->status == DASD_CQR_IN_IO) {
-                        cqr->status = DASD_CQR_QUEUED;
-               }
-        }
-       dasd_clear_timer(device);
-       dasd_schedule_bh(device);
+       dasd_schedule_device_bh(device);
+       if (device->block)
+               dasd_schedule_block_bh(device->block);
 }
 
 /*
  * Interrupt handler for "normal" ssch-io based dasd devices.
  */
-void
-dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
-                struct irb *irb)
+void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
+                     struct irb *irb)
 {
        struct dasd_ccw_req *cqr, *next;
        struct dasd_device *device;
        unsigned long long now;
        int expires;
-       dasd_era_t era;
-       char mask;
 
        if (IS_ERR(irb)) {
                switch (PTR_ERR(irb)) {
@@ -969,29 +996,25 @@ dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
                  cdev->dev.bus_id, ((irb->scsw.cstat<<8)|irb->scsw.dstat),
                  (unsigned int) intparm);
 
-       /* first of all check for state change pending interrupt */
-       mask = DEV_STAT_ATTENTION | DEV_STAT_DEV_END | DEV_STAT_UNIT_EXCEP;
-       if ((irb->scsw.dstat & mask) == mask) {
+       /* check for unsolicited interrupts */
+       cqr = (struct dasd_ccw_req *) intparm;
+       if (!cqr || ((irb->scsw.cc == 1) &&
+                    (irb->scsw.fctl & SCSW_FCTL_START_FUNC) &&
+                    (irb->scsw.stctl & SCSW_STCTL_STATUS_PEND)) ) {
+               if (cqr && cqr->status == DASD_CQR_IN_IO)
+                       cqr->status = DASD_CQR_QUEUED;
                device = dasd_device_from_cdev_locked(cdev);
                if (!IS_ERR(device)) {
-                       dasd_handle_state_change_pending(device);
+                       dasd_device_clear_timer(device);
+                       device->discipline->handle_unsolicited_interrupt(device,
+                                                                        irb);
                        dasd_put_device(device);
                }
                return;
        }
 
-       cqr = (struct dasd_ccw_req *) intparm;
-
-       /* check for unsolicited interrupts */
-       if (cqr == NULL) {
-               MESSAGE(KERN_DEBUG,
-                       "unsolicited interrupt received: bus_id %s",
-                       cdev->dev.bus_id);
-               return;
-       }
-
-       device = (struct dasd_device *) cqr->device;
-       if (device == NULL ||
+       device = (struct dasd_device *) cqr->startdev;
+       if (!device ||
            strncmp(device->discipline->ebcname, (char *) &cqr->magic, 4)) {
                MESSAGE(KERN_DEBUG, "invalid device in request: bus_id %s",
                        cdev->dev.bus_id);
@@ -999,12 +1022,12 @@ dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
        }
 
        /* Check for clear pending */
-       if (cqr->status == DASD_CQR_CLEAR &&
+       if (cqr->status == DASD_CQR_CLEAR_PENDING &&
            irb->scsw.fctl & SCSW_FCTL_CLEAR_FUNC) {
-               cqr->status = DASD_CQR_QUEUED;
-               dasd_clear_timer(device);
+               cqr->status = DASD_CQR_CLEARED;
+               dasd_device_clear_timer(device);
                wake_up(&dasd_flush_wq);
-               dasd_schedule_bh(device);
+               dasd_schedule_device_bh(device);
                return;
        }
 
@@ -1017,277 +1040,170 @@ dasd_int_handler(struct ccw_device *cdev, unsigned long intparm,
        }
        DBF_DEV_EVENT(DBF_DEBUG, device, "Int: CS/DS 0x%04x for cqr %p",
                      ((irb->scsw.cstat << 8) | irb->scsw.dstat), cqr);
-
-       /* Find out the appropriate era_action. */
-       if (irb->scsw.fctl & SCSW_FCTL_HALT_FUNC)
-               era = dasd_era_fatal;
-       else if (irb->scsw.dstat == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) &&
-                irb->scsw.cstat == 0 &&
-                !irb->esw.esw0.erw.cons)
-               era = dasd_era_none;
-       else if (irb->esw.esw0.erw.cons)
-               era = device->discipline->examine_error(cqr, irb);
-       else
-               era = dasd_era_recover;
-
-       DBF_DEV_EVENT(DBF_DEBUG, device, "era_code %d", era);
+       next = NULL;
        expires = 0;
-       if (era == dasd_era_none) {
-               cqr->status = DASD_CQR_DONE;
+       if (irb->scsw.dstat == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) &&
+           irb->scsw.cstat == 0 && !irb->esw.esw0.erw.cons) {
+               /* request was completed successfully */
+               cqr->status = DASD_CQR_SUCCESS;
                cqr->stopclk = now;
                /* Start first request on queue if possible -> fast_io. */
-               if (cqr->list.next != &device->ccw_queue) {
-                       next = list_entry(cqr->list.next,
-                                         struct dasd_ccw_req, list);
-                       if ((next->status == DASD_CQR_QUEUED) &&
-                           (!device->stopped)) {
-                               if (device->discipline->start_IO(next) == 0)
-                                       expires = next->expires;
-                               else
-                                       DEV_MESSAGE(KERN_DEBUG, device, "%s",
-                                                   "Interrupt fastpath "
-                                                   "failed!");
-                       }
+               if (cqr->devlist.next != &device->ccw_queue) {
+                       next = list_entry(cqr->devlist.next,
+                                         struct dasd_ccw_req, devlist);
                }
-       } else {                /* error */
-               memcpy(&cqr->irb, irb, sizeof (struct irb));
+       } else {  /* error */
+               memcpy(&cqr->irb, irb, sizeof(struct irb));
                if (device->features & DASD_FEATURE_ERPLOG) {
-                       /* dump sense data */
                        dasd_log_sense(cqr, irb);
                }
-               switch (era) {
-               case dasd_era_fatal:
-                       cqr->status = DASD_CQR_FAILED;
-                       cqr->stopclk = now;
-                       break;
-               case dasd_era_recover:
+               /* If we have no sense data, or we just don't want complex ERP
+                * for this request, but if we have retries left, then just
+                * reset this request and retry it in the fastpath
+                */
+               if (!(cqr->irb.esw.esw0.erw.cons &&
+                     test_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags)) &&
+                   cqr->retries > 0) {
+                       DEV_MESSAGE(KERN_DEBUG, device,
+                                   "default ERP in fastpath (%i retries left)",
+                                   cqr->retries);
+                       cqr->lpm    = LPM_ANYPATH;
+                       cqr->status = DASD_CQR_QUEUED;
+                       next = cqr;
+               } else
                        cqr->status = DASD_CQR_ERROR;
-                       break;
-               default:
-                       BUG();
-               }
+       }
+       if (next && (next->status == DASD_CQR_QUEUED) &&
+           (!device->stopped)) {
+               if (device->discipline->start_IO(next) == 0)
+                       expires = next->expires;
+               else
+                       DEV_MESSAGE(KERN_DEBUG, device, "%s",
+                                   "Interrupt fastpath "
+                                   "failed!");
        }
        if (expires != 0)
-               dasd_set_timer(device, expires);
+               dasd_device_set_timer(device, expires);
        else
-               dasd_clear_timer(device);
-       dasd_schedule_bh(device);
+               dasd_device_clear_timer(device);
+       dasd_schedule_device_bh(device);
 }
 
 /*
- * posts the buffer_cache about a finalized request
+ * If we have an error on a dasd_block layer request then we cancel
+ * and return all further requests from the same dasd_block as well.
  */
-static inline void
-dasd_end_request(struct request *req, int uptodate)
+static void __dasd_device_recovery(struct dasd_device *device,
+                                  struct dasd_ccw_req *ref_cqr)
 {
-       if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
-               BUG();
-       add_disk_randomness(req->rq_disk);
-       end_that_request_last(req, uptodate);
-}
+       struct list_head *l, *n;
+       struct dasd_ccw_req *cqr;
 
-/*
- * Process finished error recovery ccw.
- */
-static inline void
-__dasd_process_erp(struct dasd_device *device, struct dasd_ccw_req *cqr)
-{
-       dasd_erp_fn_t erp_fn;
+       /*
+        * only requeue request that came from the dasd_block layer
+        */
+       if (!ref_cqr->block)
+               return;
 
-       if (cqr->status == DASD_CQR_DONE)
-               DBF_DEV_EVENT(DBF_NOTICE, device, "%s", "ERP successful");
-       else
-               DEV_MESSAGE(KERN_ERR, device, "%s", "ERP unsuccessful");
-       erp_fn = device->discipline->erp_postaction(cqr);
-       erp_fn(cqr);
-}
+       list_for_each_safe(l, n, &device->ccw_queue) {
+               cqr = list_entry(l, struct dasd_ccw_req, devlist);
+               if (cqr->status == DASD_CQR_QUEUED &&
+                   ref_cqr->block == cqr->block) {
+                       cqr->status = DASD_CQR_CLEARED;
+               }
+       }
+};
 
 /*
- * Process ccw request queue.
+ * Remove those ccw requests from the queue that need to be returned
+ * to the upper layer.
  */
-static void
-__dasd_process_ccw_queue(struct dasd_device * device,
-                        struct list_head *final_queue)
+static void __dasd_device_process_ccw_queue(struct dasd_device *device,
+                                           struct list_head *final_queue)
 {
        struct list_head *l, *n;
        struct dasd_ccw_req *cqr;
-       dasd_erp_fn_t erp_fn;
 
-restart:
        /* Process request with final status. */
        list_for_each_safe(l, n, &device->ccw_queue) {
-               cqr = list_entry(l, struct dasd_ccw_req, list);
+               cqr = list_entry(l, struct dasd_ccw_req, devlist);
+
                /* Stop list processing at the first non-final request. */
-               if (cqr->status != DASD_CQR_DONE &&
-                   cqr->status != DASD_CQR_FAILED &&
-                   cqr->status != DASD_CQR_ERROR)
+               if (cqr->status == DASD_CQR_QUEUED ||
+                   cqr->status == DASD_CQR_IN_IO ||
+                   cqr->status == DASD_CQR_CLEAR_PENDING)
                        break;
-               /*  Process requests with DASD_CQR_ERROR */
                if (cqr->status == DASD_CQR_ERROR) {
-                       if (cqr->irb.scsw.fctl & SCSW_FCTL_HALT_FUNC) {
-                               cqr->status = DASD_CQR_FAILED;
-                               cqr->stopclk = get_clock();
-                       } else {
-                               if (cqr->irb.esw.esw0.erw.cons &&
-                                   test_bit(DASD_CQR_FLAGS_USE_ERP,
-                                            &cqr->flags)) {
-                                       erp_fn = device->discipline->
-                                               erp_action(cqr);
-                                       erp_fn(cqr);
-                               } else
-                                       dasd_default_erp_action(cqr);
-                       }
-                       goto restart;
-               }
-
-               /* First of all call extended error reporting. */
-               if (dasd_eer_enabled(device) &&
-                   cqr->status == DASD_CQR_FAILED) {
-                       dasd_eer_write(device, cqr, DASD_EER_FATALERROR);
-
-                       /* restart request  */
-                       cqr->status = DASD_CQR_QUEUED;
-                       cqr->retries = 255;
-                       device->stopped |= DASD_STOPPED_QUIESCE;
-                       goto restart;
+                       __dasd_device_recovery(device, cqr);
                }
-
-               /* Process finished ERP request. */
-               if (cqr->refers) {
-                       __dasd_process_erp(device, cqr);
-                       goto restart;
-               }
-
                /* Rechain finished requests to final queue */
-               cqr->endclk = get_clock();
-               list_move_tail(&cqr->list, final_queue);
+               list_move_tail(&cqr->devlist, final_queue);
        }
 }
 
-static void
-dasd_end_request_cb(struct dasd_ccw_req * cqr, void *data)
-{
-       struct request *req;
-       struct dasd_device *device;
-       int status;
-
-       req = (struct request *) data;
-       device = cqr->device;
-       dasd_profile_end(device, cqr, req);
-       status = cqr->device->discipline->free_cp(cqr,req);
-       spin_lock_irq(&device->request_queue_lock);
-       dasd_end_request(req, status);
-       spin_unlock_irq(&device->request_queue_lock);
-}
-
-
 /*
- * Fetch requests from the block device queue.
+ * the cqrs from the final queue are returned to the upper layer
+ * by setting a dasd_block state and calling the callback function
  */
-static void
-__dasd_process_blk_queue(struct dasd_device * device)
+static void __dasd_device_process_final_queue(struct dasd_device *device,
+                                             struct list_head *final_queue)
 {
-       struct request_queue *queue;
-       struct request *req;
+       struct list_head *l, *n;
        struct dasd_ccw_req *cqr;
-       int nr_queued;
-
-       queue = device->request_queue;
-       /* No queue ? Then there is nothing to do. */
-       if (queue == NULL)
-               return;
-
-       /*
-        * We requeue request from the block device queue to the ccw
-        * queue only in two states. In state DASD_STATE_READY the
-        * partition detection is done and we need to requeue requests
-        * for that. State DASD_STATE_ONLINE is normal block device
-        * operation.
-        */
-       if (device->state != DASD_STATE_READY &&
-           device->state != DASD_STATE_ONLINE)
-               return;
-       nr_queued = 0;
-       /* Now we try to fetch requests from the request queue */
-       list_for_each_entry(cqr, &device->ccw_queue, list)
-               if (cqr->status == DASD_CQR_QUEUED)
-                       nr_queued++;
-       while (!blk_queue_plugged(queue) &&
-              elv_next_request(queue) &&
-               nr_queued < DASD_CHANQ_MAX_SIZE) {
-               req = elv_next_request(queue);
 
-               if (device->features & DASD_FEATURE_READONLY &&
-                   rq_data_dir(req) == WRITE) {
-                       DBF_DEV_EVENT(DBF_ERR, device,
-                                     "Rejecting write request %p",
-                                     req);
-                       blkdev_dequeue_request(req);
-                       dasd_end_request(req, 0);
-                       continue;
-               }
-               if (device->stopped & DASD_STOPPED_DC_EIO) {
-                       blkdev_dequeue_request(req);
-                       dasd_end_request(req, 0);
-                       continue;
-               }
-               cqr = device->discipline->build_cp(device, req);
-               if (IS_ERR(cqr)) {
-                       if (PTR_ERR(cqr) == -ENOMEM)
-                               break;  /* terminate request queue loop */
-                       if (PTR_ERR(cqr) == -EAGAIN) {
-                               /*
-                                * The current request cannot be build right
-                                * now, we have to try later. If this request
-                                * is the head-of-queue we stop the device
-                                * for 1/2 second.
-                                */
-                               if (!list_empty(&device->ccw_queue))
-                                       break;
-                               device->stopped |= DASD_STOPPED_PENDING;
-                               dasd_set_timer(device, HZ/2);
-                               break;
-                       }
-                       DBF_DEV_EVENT(DBF_ERR, device,
-                                     "CCW creation failed (rc=%ld) "
-                                     "on request %p",
-                                     PTR_ERR(cqr), req);
-                       blkdev_dequeue_request(req);
-                       dasd_end_request(req, 0);
-                       continue;
+       list_for_each_safe(l, n, final_queue) {
+               cqr = list_entry(l, struct dasd_ccw_req, devlist);
+               list_del_init(&cqr->devlist);
+               if (cqr->block)
+                       spin_lock_bh(&cqr->block->queue_lock);
+               switch (cqr->status) {
+               case DASD_CQR_SUCCESS:
+                       cqr->status = DASD_CQR_DONE;
+                       break;
+               case DASD_CQR_ERROR:
+                       cqr->status = DASD_CQR_NEED_ERP;
+                       break;
+               case DASD_CQR_CLEARED:
+                       cqr->status = DASD_CQR_TERMINATED;
+                       break;
+               default:
+                       DEV_MESSAGE(KERN_ERR, device,
+                                   "wrong cqr status in __dasd_process_final_queue "
+                                   "for cqr %p, status %x",
+                                   cqr, cqr->status);
+                       BUG();
                }
-               cqr->callback = dasd_end_request_cb;
-               cqr->callback_data = (void *) req;
-               cqr->status = DASD_CQR_QUEUED;
-               blkdev_dequeue_request(req);
-               list_add_tail(&cqr->list, &device->ccw_queue);
-               dasd_profile_start(device, cqr, req);
-               nr_queued++;
+               if (cqr->block)
+                       spin_unlock_bh(&cqr->block->queue_lock);
+               if (cqr->callback != NULL)
+                       (cqr->callback)(cqr, cqr->callback_data);
        }
 }
 
+
+
 /*
  * Take a look at the first request on the ccw queue and check
  * if it reached its expire time. If so, terminate the IO.
  */
-static void
-__dasd_check_expire(struct dasd_device * device)
+static void __dasd_device_check_expire(struct dasd_device *device)
 {
        struct dasd_ccw_req *cqr;
 
        if (list_empty(&device->ccw_queue))
                return;
-       cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list);
+       cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist);
        if ((cqr->status == DASD_CQR_IN_IO && cqr->expires != 0) &&
            (time_after_eq(jiffies, cqr->expires + cqr->starttime))) {
                if (device->discipline->term_IO(cqr) != 0) {
                        /* Hmpf, try again in 5 sec */
-                       dasd_set_timer(device, 5*HZ);
                        DEV_MESSAGE(KERN_ERR, device,
                                    "internal error - timeout (%is) expired "
                                    "for cqr %p, termination failed, "
                                    "retrying in 5s",
                                    (cqr->expires/HZ), cqr);
+                       cqr->expires += 5*HZ;
+                       dasd_device_set_timer(device, 5*HZ);
                } else {
                        DEV_MESSAGE(KERN_ERR, device,
                                    "internal error - timeout (%is) expired "
@@ -1301,77 +1217,53 @@ __dasd_check_expire(struct dasd_device * device)
  * Take a look at the first request on the ccw queue and check
  * if it needs to be started.
  */
-static void
-__dasd_start_head(struct dasd_device * device)
+static void __dasd_device_start_head(struct dasd_device *device)
 {
        struct dasd_ccw_req *cqr;
        int rc;
 
        if (list_empty(&device->ccw_queue))
                return;
-       cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list);
+       cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist);
        if (cqr->status != DASD_CQR_QUEUED)
                return;
-       /* Non-temporary stop condition will trigger fail fast */
-       if (device->stopped & ~DASD_STOPPED_PENDING &&
-           test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
-           (!dasd_eer_enabled(device))) {
-               cqr->status = DASD_CQR_FAILED;
-               dasd_schedule_bh(device);
+       /* when device is stopped, return request to previous layer */
+       if (device->stopped) {
+               cqr->status = DASD_CQR_CLEARED;
+               dasd_schedule_device_bh(device);
                return;
        }
-       /* Don't try to start requests if device is stopped */
-       if (device->stopped)
-               return;
 
        rc = device->discipline->start_IO(cqr);
        if (rc == 0)
-               dasd_set_timer(device, cqr->expires);
+               dasd_device_set_timer(device, cqr->expires);
        else if (rc == -EACCES) {
-               dasd_schedule_bh(device);
+               dasd_schedule_device_bh(device);
        } else
                /* Hmpf, try again in 1/2 sec */
-               dasd_set_timer(device, 50);
-}
-
-static inline int
-_wait_for_clear(struct dasd_ccw_req *cqr)
-{
-       return (cqr->status == DASD_CQR_QUEUED);
+               dasd_device_set_timer(device, 50);
 }
 
 /*
- * Remove all requests from the ccw queue (all = '1') or only block device
- * requests in case all = '0'.
- * Take care of the erp-chain (chained via cqr->refers) and remove either
- * the whole erp-chain or none of the erp-requests.
- * If a request is currently running, term_IO is called and the request
- * is re-queued. Prior to removing the terminated request we need to wait
- * for the clear-interrupt.
- * In case termination is not possible we stop processing and just finishing
- * the already moved requests.
+ * Go through all request on the dasd_device request queue,
+ * terminate them on the cdev if necessary, and return them to the
+ * submitting layer via callback.
+ * Note:
+ * Make sure that all 'submitting layers' still exist when
+ * this function is called!. In other words, when 'device' is a base
+ * device then all block layer requests must have been removed before
+ * via dasd_flush_block_queue.
  */
-static int
-dasd_flush_ccw_queue(struct dasd_device * device, int all)
+int dasd_flush_device_queue(struct dasd_device *device)
 {
-       struct dasd_ccw_req *cqr, *orig, *n;
-       int rc, i;
-
+       struct dasd_ccw_req *cqr, *n;
+       int rc;
        struct list_head flush_queue;
 
        INIT_LIST_HEAD(&flush_queue);
        spin_lock_irq(get_ccwdev_lock(device->cdev));
        rc = 0;
-restart:
-       list_for_each_entry_safe(cqr, n, &device->ccw_queue, list) {
-               /* get original request of erp request-chain */
-               for (orig = cqr; orig->refers != NULL; orig = orig->refers);
-
-               /* Flush all request or only block device requests? */
-               if (all == 0 && cqr->callback != dasd_end_request_cb &&
-                   orig->callback != dasd_end_request_cb) {
-                       continue;
-               }
+       list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
                /* Check status and move request to flush_queue */
                switch (cqr->status) {
                case DASD_CQR_IN_IO:
@@ -1387,90 +1279,60 @@ restart:
                        }
                        break;
                case DASD_CQR_QUEUED:
-               case DASD_CQR_ERROR:
-                       /* set request to FAILED */
                        cqr->stopclk = get_clock();
-                       cqr->status = DASD_CQR_FAILED;
+                       cqr->status = DASD_CQR_CLEARED;
                        break;
-               default: /* do not touch the others */
+               default: /* no need to modify the others */
                        break;
                }
-               /* Rechain request (including erp chain) */
-               for (i = 0; cqr != NULL; cqr = cqr->refers, i++) {
-                       cqr->endclk = get_clock();
-                       list_move_tail(&cqr->list, &flush_queue);
-               }
-               if (i > 1)
-                       /* moved more than one request - need to restart */
-                       goto restart;
+               list_move_tail(&cqr->devlist, &flush_queue);
        }
-
 finished:
        spin_unlock_irq(get_ccwdev_lock(device->cdev));
-       /* Now call the callback function of flushed requests */
-restart_cb:
-       list_for_each_entry_safe(cqr, n, &flush_queue, list) {
-               if (cqr->status == DASD_CQR_CLEAR) {
-                       /* wait for clear interrupt! */
-                       wait_event(dasd_flush_wq, _wait_for_clear(cqr));
-                       cqr->status = DASD_CQR_FAILED;
-               }
-               /* Process finished ERP request. */
-               if (cqr->refers) {
-                       __dasd_process_erp(device, cqr);
-                       /* restart list_for_xx loop since dasd_process_erp
-                        * might remove multiple elements */
-                       goto restart_cb;
-               }
-               /* call the callback function */
-               cqr->endclk = get_clock();
-               if (cqr->callback != NULL)
-                       (cqr->callback)(cqr, cqr->callback_data);
-       }
+       /*
+        * After this point all requests must be in state CLEAR_PENDING,
+        * CLEARED, SUCCESS or ERROR. Now wait for CLEAR_PENDING to become
+        * one of the others.
+        */
+       list_for_each_entry_safe(cqr, n, &flush_queue, devlist)
+               wait_event(dasd_flush_wq,
+                          (cqr->status != DASD_CQR_CLEAR_PENDING));
+       /*
+        * Now set each request back to TERMINATED, DONE or NEED_ERP
+        * and call the callback function of flushed requests
+        */
+       __dasd_device_process_final_queue(device, &flush_queue);
        return rc;
 }
 
 /*
  * Acquire the device lock and process queues for the device.
  */
-static void
-dasd_tasklet(struct dasd_device * device)
+static void dasd_device_tasklet(struct dasd_device *device)
 {
        struct list_head final_queue;
-       struct list_head *l, *n;
-       struct dasd_ccw_req *cqr;
 
        atomic_set (&device->tasklet_scheduled, 0);
        INIT_LIST_HEAD(&final_queue);
        spin_lock_irq(get_ccwdev_lock(device->cdev));
        /* Check expire time of first request on the ccw queue. */
-       __dasd_check_expire(device);
-       /* Finish off requests on ccw queue */
-       __dasd_process_ccw_queue(device, &final_queue);
+       __dasd_device_check_expire(device);
+       /* find final requests on ccw queue */
+       __dasd_device_process_ccw_queue(device, &final_queue);
        spin_unlock_irq(get_ccwdev_lock(device->cdev));
        /* Now call the callback function of requests with final status */
-       list_for_each_safe(l, n, &final_queue) {
-               cqr = list_entry(l, struct dasd_ccw_req, list);
-               list_del_init(&cqr->list);
-               if (cqr->callback != NULL)
-                       (cqr->callback)(cqr, cqr->callback_data);
-       }
-       spin_lock_irq(&device->request_queue_lock);
-       spin_lock(get_ccwdev_lock(device->cdev));
-       /* Get new request from the block device request queue */
-       __dasd_process_blk_queue(device);
+       __dasd_device_process_final_queue(device, &final_queue);
+       spin_lock_irq(get_ccwdev_lock(device->cdev));
        /* Now check if the head of the ccw queue needs to be started. */
-       __dasd_start_head(device);
-       spin_unlock(get_ccwdev_lock(device->cdev));
-       spin_unlock_irq(&device->request_queue_lock);
+       __dasd_device_start_head(device);
+       spin_unlock_irq(get_ccwdev_lock(device->cdev));
        dasd_put_device(device);
 }
 
 /*
  * Schedules a call to dasd_tasklet over the device tasklet.
  */
-void
-dasd_schedule_bh(struct dasd_device * device)
+void dasd_schedule_device_bh(struct dasd_device *device)
 {
        /* Protect against rescheduling. */
        if (atomic_cmpxchg (&device->tasklet_scheduled, 0, 1) != 0)
@@ -1480,160 +1342,109 @@ dasd_schedule_bh(struct dasd_device * device)
 }
 
 /*
- * Queue a request to the head of the ccw_queue. Start the I/O if
- * possible.
+ * Queue a request to the head of the device ccw_queue.
+ * Start the I/O if possible.
  */
-void
-dasd_add_request_head(struct dasd_ccw_req *req)
+void dasd_add_request_head(struct dasd_ccw_req *cqr)
 {
        struct dasd_device *device;
        unsigned long flags;
 
-       device = req->device;
+       device = cqr->startdev;
        spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
-       req->status = DASD_CQR_QUEUED;
-       req->device = device;
-       list_add(&req->list, &device->ccw_queue);
+       cqr->status = DASD_CQR_QUEUED;
+       list_add(&cqr->devlist, &device->ccw_queue);
        /* let the bh start the request to keep them in order */
-       dasd_schedule_bh(device);
+       dasd_schedule_device_bh(device);
        spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
 }
 
 /*
- * Queue a request to the tail of the ccw_queue. Start the I/O if
- * possible.
+ * Queue a request to the tail of the device ccw_queue.
+ * Start the I/O if possible.
  */
-void
-dasd_add_request_tail(struct dasd_ccw_req *req)
+void dasd_add_request_tail(struct dasd_ccw_req *cqr)
 {
        struct dasd_device *device;
        unsigned long flags;
 
-       device = req->device;
+       device = cqr->startdev;
        spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
-       req->status = DASD_CQR_QUEUED;
-       req->device = device;
-       list_add_tail(&req->list, &device->ccw_queue);
+       cqr->status = DASD_CQR_QUEUED;
+       list_add_tail(&cqr->devlist, &device->ccw_queue);
        /* let the bh start the request to keep them in order */
-       dasd_schedule_bh(device);
+       dasd_schedule_device_bh(device);
        spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
 }
 
 /*
- * Wakeup callback.
+ * Wakeup helper for the 'sleep_on' functions.
  */
-static void
-dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data)
+static void dasd_wakeup_cb(struct dasd_ccw_req *cqr, void *data)
 {
        wake_up((wait_queue_head_t *) data);
 }
 
-static inline int
-_wait_for_wakeup(struct dasd_ccw_req *cqr)
+static inline int _wait_for_wakeup(struct dasd_ccw_req *cqr)
 {
        struct dasd_device *device;
        int rc;
 
-       device = cqr->device;
+       device = cqr->startdev;
        spin_lock_irq(get_ccwdev_lock(device->cdev));
        rc = ((cqr->status == DASD_CQR_DONE ||
-              cqr->status == DASD_CQR_FAILED) &&
-             list_empty(&cqr->list));
+              cqr->status == DASD_CQR_NEED_ERP ||
+              cqr->status == DASD_CQR_TERMINATED) &&
+             list_empty(&cqr->devlist));
        spin_unlock_irq(get_ccwdev_lock(device->cdev));
        return rc;
 }
 
 /*
- * Attempts to start a special ccw queue and waits for its completion.
+ * Queue a request to the tail of the device ccw_queue and wait for
+ * it's completion.
  */
-int
-dasd_sleep_on(struct dasd_ccw_req * cqr)
+int dasd_sleep_on(struct dasd_ccw_req *cqr)
 {
        wait_queue_head_t wait_q;
        struct dasd_device *device;
        int rc;
 
-       device = cqr->device;
-       spin_lock_irq(get_ccwdev_lock(device->cdev));
+       device = cqr->startdev;
 
        init_waitqueue_head (&wait_q);
        cqr->callback = dasd_wakeup_cb;
        cqr->callback_data = (void *) &wait_q;
-       cqr->status = DASD_CQR_QUEUED;
-       list_add_tail(&cqr->list, &device->ccw_queue);
-
-       /* let the bh start the request to keep them in order */
-       dasd_schedule_bh(device);
-
-       spin_unlock_irq(get_ccwdev_lock(device->cdev));
-
+       dasd_add_request_tail(cqr);
        wait_event(wait_q, _wait_for_wakeup(cqr));
 
        /* Request status is either done or failed. */
-       rc = (cqr->status == DASD_CQR_FAILED) ? -EIO : 0;
+       rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
        return rc;
 }
 
 /*
- * Attempts to start a special ccw queue and wait interruptible
- * for its completion.
+ * Queue a request to the tail of the device ccw_queue and wait
+ * interruptible for it's completion.
  */
-int
-dasd_sleep_on_interruptible(struct dasd_ccw_req * cqr)
+int dasd_sleep_on_interruptible(struct dasd_ccw_req *cqr)
 {
        wait_queue_head_t wait_q;
        struct dasd_device *device;
-       int rc, finished;
-
-       device = cqr->device;
-       spin_lock_irq(get_ccwdev_lock(device->cdev));
+       int rc;
 
+       device = cqr->startdev;
        init_waitqueue_head (&wait_q);
        cqr->callback = dasd_wakeup_cb;
        cqr->callback_data = (void *) &wait_q;
-       cqr->status = DASD_CQR_QUEUED;
-       list_add_tail(&cqr->list, &device->ccw_queue);
-
-       /* let the bh start the request to keep them in order */
-       dasd_schedule_bh(device);
-       spin_unlock_irq(get_ccwdev_lock(device->cdev));
-
-       finished = 0;
-       while (!finished) {
-               rc = wait_event_interruptible(wait_q, _wait_for_wakeup(cqr));
-               if (rc != -ERESTARTSYS) {
-                       /* Request is final (done or failed) */
-                       rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
-                       break;
-               }
-               spin_lock_irq(get_ccwdev_lock(device->cdev));
-               switch (cqr->status) {
-               case DASD_CQR_IN_IO:
-                        /* terminate runnig cqr */
-                       if (device->discipline->term_IO) {
-                               cqr->retries = -1;
-                               device->discipline->term_IO(cqr);
-                               /* wait (non-interruptible) for final status
-                                * because signal ist still pending */
-                               spin_unlock_irq(get_ccwdev_lock(device->cdev));
-                               wait_event(wait_q, _wait_for_wakeup(cqr));
-                               spin_lock_irq(get_ccwdev_lock(device->cdev));
-                               rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
-                               finished = 1;
-                       }
-                       break;
-               case DASD_CQR_QUEUED:
-                       /* request  */
-                       list_del_init(&cqr->list);
-                       rc = -EIO;
-                       finished = 1;
-                       break;
-               default:
-                       /* cqr with 'non-interruptable' status - just wait */
-                       break;
-               }
-               spin_unlock_irq(get_ccwdev_lock(device->cdev));
+       dasd_add_request_tail(cqr);
+       rc = wait_event_interruptible(wait_q, _wait_for_wakeup(cqr));
+       if (rc == -ERESTARTSYS) {
+               dasd_cancel_req(cqr);
+               /* wait (non-interruptible) for final status */
+               wait_event(wait_q, _wait_for_wakeup(cqr));
        }
+       rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
        return rc;
 }
 
@@ -1643,25 +1454,23 @@ dasd_sleep_on_interruptible(struct dasd_ccw_req * cqr)
  * and be put back to status queued, before the special request is added
  * to the head of the queue. Then the special request is waited on normally.
  */
-static inline int
-_dasd_term_running_cqr(struct dasd_device *device)
+static inline int _dasd_term_running_cqr(struct dasd_device *device)
 {
        struct dasd_ccw_req *cqr;
 
        if (list_empty(&device->ccw_queue))
                return 0;
-       cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, list);
+       cqr = list_entry(device->ccw_queue.next, struct dasd_ccw_req, devlist);
        return device->discipline->term_IO(cqr);
 }
 
-int
-dasd_sleep_on_immediatly(struct dasd_ccw_req * cqr)
+int dasd_sleep_on_immediatly(struct dasd_ccw_req *cqr)
 {
        wait_queue_head_t wait_q;
        struct dasd_device *device;
        int rc;
 
-       device = cqr->device;
+       device = cqr->startdev;
        spin_lock_irq(get_ccwdev_lock(device->cdev));
        rc = _dasd_term_running_cqr(device);
        if (rc) {
@@ -1673,17 +1482,17 @@ dasd_sleep_on_immediatly(struct dasd_ccw_req * cqr)
        cqr->callback = dasd_wakeup_cb;
        cqr->callback_data = (void *) &wait_q;
        cqr->status = DASD_CQR_QUEUED;
-       list_add(&cqr->list, &device->ccw_queue);
+       list_add(&cqr->devlist, &device->ccw_queue);
 
        /* let the bh start the request to keep them in order */
-       dasd_schedule_bh(device);
+       dasd_schedule_device_bh(device);
 
        spin_unlock_irq(get_ccwdev_lock(device->cdev));
 
        wait_event(wait_q, _wait_for_wakeup(cqr));
 
        /* Request status is either done or failed. */
-       rc = (cqr->status == DASD_CQR_FAILED) ? -EIO : 0;
+       rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
        return rc;
 }
 
@@ -1692,11 +1501,14 @@ dasd_sleep_on_immediatly(struct dasd_ccw_req * cqr)
  * This is useful to timeout requests. The request will be
  * terminated if it is currently in i/o.
  * Returns 1 if the request has been terminated.
+ *        0 if there was no need to terminate the request (not started yet)
+ *        negative error code if termination failed
+ * Cancellation of a request is an asynchronous operation! The calling
+ * function has to wait until the request is properly returned via callback.
  */
-int
-dasd_cancel_req(struct dasd_ccw_req *cqr)
+int dasd_cancel_req(struct dasd_ccw_req *cqr)
 {
-       struct dasd_device *device = cqr->device;
+       struct dasd_device *device = cqr->startdev;
        unsigned long flags;
        int rc;
 
@@ -1704,74 +1516,453 @@ dasd_cancel_req(struct dasd_ccw_req *cqr)
        spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
        switch (cqr->status) {
        case DASD_CQR_QUEUED:
-               /* request was not started - just set to failed */
-               cqr->status = DASD_CQR_FAILED;
+               /* request was not started - just set to cleared */
+               cqr->status = DASD_CQR_CLEARED;
                break;
        case DASD_CQR_IN_IO:
                /* request in IO - terminate IO and release again */
-               if (device->discipline->term_IO(cqr) != 0)
-                       /* what to do if unable to terminate ??????
-                          e.g. not _IN_IO */
-                       cqr->status = DASD_CQR_FAILED;
-               cqr->stopclk = get_clock();
-               rc = 1;
+               rc = device->discipline->term_IO(cqr);
+               if (rc) {
+                       DEV_MESSAGE(KERN_ERR, device,
+                                   "dasd_cancel_req is unable "
+                                   " to terminate request %p, rc = %d",
+                                   cqr, rc);
+               } else {
+                       cqr->stopclk = get_clock();
+                       rc = 1;
+               }
                break;
-       case DASD_CQR_DONE:
-       case DASD_CQR_FAILED:
-               /* already finished - do nothing */
+       default: /* already finished or clear pending - do nothing */
                break;
-       default:
-               DEV_MESSAGE(KERN_ALERT, device,
-                           "invalid status %02x in request",
-                           cqr->status);
+       }
+       spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
+       dasd_schedule_device_bh(device);
+       return rc;
+}
+
+
+/*
+ * SECTION: Operations of the dasd_block layer.
+ */
+
+/*
+ * Timeout function for dasd_block. This is used when the block layer
+ * is waiting for something that may not come reliably, (e.g. a state
+ * change interrupt)
+ */
+static void dasd_block_timeout(unsigned long ptr)
+{
+       unsigned long flags;
+       struct dasd_block *block;
+
+       block = (struct dasd_block *) ptr;
+       spin_lock_irqsave(get_ccwdev_lock(block->base->cdev), flags);
+       /* re-activate request queue */
+       block->base->stopped &= ~DASD_STOPPED_PENDING;
+       spin_unlock_irqrestore(get_ccwdev_lock(block->base->cdev), flags);
+       dasd_schedule_block_bh(block);
+}
+
+/*
+ * Setup timeout for a dasd_block in jiffies.
+ */
+void dasd_block_set_timer(struct dasd_block *block, int expires)
+{
+       if (expires == 0) {
+               if (timer_pending(&block->timer))
+                       del_timer(&block->timer);
+               return;
+       }
+       if (timer_pending(&block->timer)) {
+               if (mod_timer(&block->timer, jiffies + expires))
+                       return;
+       }
+       block->timer.function = dasd_block_timeout;
+       block->timer.data = (unsigned long) block;
+       block->timer.expires = jiffies + expires;
+       add_timer(&block->timer);
+}
+
+/*
+ * Clear timeout for a dasd_block.
+ */
+void dasd_block_clear_timer(struct dasd_block *block)
+{
+       if (timer_pending(&block->timer))
+               del_timer(&block->timer);
+}
+
+/*
+ * posts the buffer_cache about a finalized request
+ */
+static inline void dasd_end_request(struct request *req, int uptodate)
+{
+       if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
                BUG();
+       add_disk_randomness(req->rq_disk);
+       end_that_request_last(req, uptodate);
+}
+
+/*
+ * Process finished error recovery ccw.
+ */
+static inline void __dasd_block_process_erp(struct dasd_block *block,
+                                           struct dasd_ccw_req *cqr)
+{
+       dasd_erp_fn_t erp_fn;
+       struct dasd_device *device = block->base;
+
+       if (cqr->status == DASD_CQR_DONE)
+               DBF_DEV_EVENT(DBF_NOTICE, device, "%s", "ERP successful");
+       else
+               DEV_MESSAGE(KERN_ERR, device, "%s", "ERP unsuccessful");
+       erp_fn = device->discipline->erp_postaction(cqr);
+       erp_fn(cqr);
+}
 
+/*
+ * Fetch requests from the block device queue.
+ */
+static void __dasd_process_request_queue(struct dasd_block *block)
+{
+       struct request_queue *queue;
+       struct request *req;
+       struct dasd_ccw_req *cqr;
+       struct dasd_device *basedev;
+       unsigned long flags;
+       queue = block->request_queue;
+       basedev = block->base;
+       /* No queue ? Then there is nothing to do. */
+       if (queue == NULL)
+               return;
+
+       /*
+        * We requeue request from the block device queue to the ccw
+        * queue only in two states. In state DASD_STATE_READY the
+        * partition detection is done and we need to requeue requests
+        * for that. State DASD_STATE_ONLINE is normal block device
+        * operation.
+        */
+       if (basedev->state < DASD_STATE_READY)
+               return;
+       /* Now we try to fetch requests from the request queue */
+       while (!blk_queue_plugged(queue) &&
+              elv_next_request(queue)) {
+
+               req = elv_next_request(queue);
+
+               if (basedev->features & DASD_FEATURE_READONLY &&
+                   rq_data_dir(req) == WRITE) {
+                       DBF_DEV_EVENT(DBF_ERR, basedev,
+                                     "Rejecting write request %p",
+                                     req);
+                       blkdev_dequeue_request(req);
+                       dasd_end_request(req, 0);
+                       continue;
+               }
+               cqr = basedev->discipline->build_cp(basedev, block, req);
+               if (IS_ERR(cqr)) {
+                       if (PTR_ERR(cqr) == -EBUSY)
+                               break;  /* normal end condition */
+                       if (PTR_ERR(cqr) == -ENOMEM)
+                               break;  /* terminate request queue loop */
+                       if (PTR_ERR(cqr) == -EAGAIN) {
+                               /*
+                                * The current request cannot be build right
+                                * now, we have to try later. If this request
+                                * is the head-of-queue we stop the device
+                                * for 1/2 second.
+                                */
+                               if (!list_empty(&block->ccw_queue))
+                                       break;
+                               spin_lock_irqsave(get_ccwdev_lock(basedev->cdev), flags);
+                               basedev->stopped |= DASD_STOPPED_PENDING;
+                               spin_unlock_irqrestore(get_ccwdev_lock(basedev->cdev), flags);
+                               dasd_block_set_timer(block, HZ/2);
+                               break;
+                       }
+                       DBF_DEV_EVENT(DBF_ERR, basedev,
+                                     "CCW creation failed (rc=%ld) "
+                                     "on request %p",
+                                     PTR_ERR(cqr), req);
+                       blkdev_dequeue_request(req);
+                       dasd_end_request(req, 0);
+                       continue;
+               }
+               /*
+                *  Note: callback is set to dasd_return_cqr_cb in
+                * __dasd_block_start_head to cover erp requests as well
+                */
+               cqr->callback_data = (void *) req;
+               cqr->status = DASD_CQR_FILLED;
+               blkdev_dequeue_request(req);
+               list_add_tail(&cqr->blocklist, &block->ccw_queue);
+               dasd_profile_start(block, cqr, req);
+       }
+}
+
+static void __dasd_cleanup_cqr(struct dasd_ccw_req *cqr)
+{
+       struct request *req;
+       int status;
+
+       req = (struct request *) cqr->callback_data;
+       dasd_profile_end(cqr->block, cqr, req);
+       status = cqr->memdev->discipline->free_cp(cqr, req);
+       dasd_end_request(req, status);
+}
+
+/*
+ * Process ccw request queue.
+ */
+static void __dasd_process_block_ccw_queue(struct dasd_block *block,
+                                          struct list_head *final_queue)
+{
+       struct list_head *l, *n;
+       struct dasd_ccw_req *cqr;
+       dasd_erp_fn_t erp_fn;
+       unsigned long flags;
+       struct dasd_device *base = block->base;
+
+restart:
+       /* Process request with final status. */
+       list_for_each_safe(l, n, &block->ccw_queue) {
+               cqr = list_entry(l, struct dasd_ccw_req, blocklist);
+               if (cqr->status != DASD_CQR_DONE &&
+                   cqr->status != DASD_CQR_FAILED &&
+                   cqr->status != DASD_CQR_NEED_ERP &&
+                   cqr->status != DASD_CQR_TERMINATED)
+                       continue;
+
+               if (cqr->status == DASD_CQR_TERMINATED) {
+                       base->discipline->handle_terminated_request(cqr);
+                       goto restart;
+               }
+
+               /*  Process requests that may be recovered */
+               if (cqr->status == DASD_CQR_NEED_ERP) {
+                       if (cqr->irb.esw.esw0.erw.cons &&
+                           test_bit(DASD_CQR_FLAGS_USE_ERP,
+                                    &cqr->flags)) {
+                               erp_fn = base->discipline->erp_action(cqr);
+                               erp_fn(cqr);
+                       }
+                       goto restart;
+               }
+
+               /* First of all call extended error reporting. */
+               if (dasd_eer_enabled(base) &&
+                   cqr->status == DASD_CQR_FAILED) {
+                       dasd_eer_write(base, cqr, DASD_EER_FATALERROR);
+
+                       /* restart request  */
+                       cqr->status = DASD_CQR_FILLED;
+                       cqr->retries = 255;
+                       spin_lock_irqsave(get_ccwdev_lock(base->cdev), flags);
+                       base->stopped |= DASD_STOPPED_QUIESCE;
+                       spin_unlock_irqrestore(get_ccwdev_lock(base->cdev),
+                                              flags);
+                       goto restart;
+               }
+
+               /* Process finished ERP request. */
+               if (cqr->refers) {
+                       __dasd_block_process_erp(block, cqr);
+                       goto restart;
+               }
+
+               /* Rechain finished requests to final queue */
+               cqr->endclk = get_clock();
+               list_move_tail(&cqr->blocklist, final_queue);
+       }
+}
+
+static void dasd_return_cqr_cb(struct dasd_ccw_req *cqr, void *data)
+{
+       dasd_schedule_block_bh(cqr->block);
+}
+
+static void __dasd_block_start_head(struct dasd_block *block)
+{
+       struct dasd_ccw_req *cqr;
+
+       if (list_empty(&block->ccw_queue))
+               return;
+       /* We allways begin with the first requests on the queue, as some
+        * of previously started requests have to be enqueued on a
+        * dasd_device again for error recovery.
+        */
+       list_for_each_entry(cqr, &block->ccw_queue, blocklist) {
+               if (cqr->status != DASD_CQR_FILLED)
+                       continue;
+               /* Non-temporary stop condition will trigger fail fast */
+               if (block->base->stopped & ~DASD_STOPPED_PENDING &&
+                   test_bit(DASD_CQR_FLAGS_FAILFAST, &cqr->flags) &&
+                   (!dasd_eer_enabled(block->base))) {
+                       cqr->status = DASD_CQR_FAILED;
+                       dasd_schedule_block_bh(block);
+                       continue;
+               }
+               /* Don't try to start requests if device is stopped */
+               if (block->base->stopped)
+                       return;
+
+               /* just a fail safe check, should not happen */
+               if (!cqr->startdev)
+                       cqr->startdev = block->base;
+
+               /* make sure that the requests we submit find their way back */
+               cqr->callback = dasd_return_cqr_cb;
+
+               dasd_add_request_tail(cqr);
+       }
+}
+
+/*
+ * Central dasd_block layer routine. Takes requests from the generic
+ * block layer request queue, creates ccw requests, enqueues them on
+ * a dasd_device and processes ccw requests that have been returned.
+ */
+static void dasd_block_tasklet(struct dasd_block *block)
+{
+       struct list_head final_queue;
+       struct list_head *l, *n;
+       struct dasd_ccw_req *cqr;
+
+       atomic_set(&block->tasklet_scheduled, 0);
+       INIT_LIST_HEAD(&final_queue);
+       spin_lock(&block->queue_lock);
+       /* Finish off requests on ccw queue */
+       __dasd_process_block_ccw_queue(block, &final_queue);
+       spin_unlock(&block->queue_lock);
+       /* Now call the callback function of requests with final status */
+       spin_lock_irq(&block->request_queue_lock);
+       list_for_each_safe(l, n, &final_queue) {
+               cqr = list_entry(l, struct dasd_ccw_req, blocklist);
+               list_del_init(&cqr->blocklist);
+               __dasd_cleanup_cqr(cqr);
+       }
+       spin_lock(&block->queue_lock);
+       /* Get new request from the block device request queue */
+       __dasd_process_request_queue(block);
+       /* Now check if the head of the ccw queue needs to be started. */
+       __dasd_block_start_head(block);
+       spin_unlock(&block->queue_lock);
+       spin_unlock_irq(&block->request_queue_lock);
+       dasd_put_device(block->base);
+}
+
+static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data)
+{
+       wake_up(&dasd_flush_wq);
+}
+
+/*
+ * Go through all request on the dasd_block request queue, cancel them
+ * on the respective dasd_device, and return them to the generic
+ * block layer.
+ */
+static int dasd_flush_block_queue(struct dasd_block *block)
+{
+       struct dasd_ccw_req *cqr, *n;
+       int rc, i;
+       struct list_head flush_queue;
+
+       INIT_LIST_HEAD(&flush_queue);
+       spin_lock_bh(&block->queue_lock);
+       rc = 0;
+restart:
+       list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) {
+               /* if this request currently owned by a dasd_device cancel it */
+               if (cqr->status >= DASD_CQR_QUEUED)
+                       rc = dasd_cancel_req(cqr);
+               if (rc < 0)
+                       break;
+               /* Rechain request (including erp chain) so it won't be
+                * touched by the dasd_block_tasklet anymore.
+                * Replace the callback so we notice when the request
+                * is returned from the dasd_device layer.
+                */
+               cqr->callback = _dasd_wake_block_flush_cb;
+               for (i = 0; cqr != NULL; cqr = cqr->refers, i++)
+                       list_move_tail(&cqr->blocklist, &flush_queue);
+               if (i > 1)
+                       /* moved more than one request - need to restart */
+                       goto restart;
+       }
+       spin_unlock_bh(&block->queue_lock);
+       /* Now call the callback function of flushed requests */
+restart_cb:
+       list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) {
+               wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED));
+               /* Process finished ERP request. */
+               if (cqr->refers) {
+                       __dasd_block_process_erp(block, cqr);
+                       /* restart list_for_xx loop since dasd_process_erp
+                        * might remove multiple elements */
+                       goto restart_cb;
+               }
+               /* call the callback function */
+               cqr->endclk = get_clock();
+               list_del_init(&cqr->blocklist);
+               __dasd_cleanup_cqr(cqr);
        }
-       spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
-       dasd_schedule_bh(device);
        return rc;
 }
 
 /*
- * SECTION: Block device operations (request queue, partitions, open, release).
+ * Schedules a call to dasd_tasklet over the device tasklet.
+ */
+void dasd_schedule_block_bh(struct dasd_block *block)
+{
+       /* Protect against rescheduling. */
+       if (atomic_cmpxchg(&block->tasklet_scheduled, 0, 1) != 0)
+               return;
+       /* life cycle of block is bound to it's base device */
+       dasd_get_device(block->base);
+       tasklet_hi_schedule(&block->tasklet);
+}
+
+
+/*
+ * SECTION: external block device operations
+ * (request queue handling, open, release, etc.)
  */
 
 /*
  * Dasd request queue function. Called from ll_rw_blk.c
  */
-static void
-do_dasd_request(struct request_queue * queue)
+static void do_dasd_request(struct request_queue *queue)
 {
-       struct dasd_device *device;
+       struct dasd_block *block;
 
-       device = (struct dasd_device *) queue->queuedata;
-       spin_lock(get_ccwdev_lock(device->cdev));
+       block = queue->queuedata;
+       spin_lock(&block->queue_lock);
        /* Get new request from the block device request queue */
-       __dasd_process_blk_queue(device);
+       __dasd_process_request_queue(block);
        /* Now check if the head of the ccw queue needs to be started. */
-       __dasd_start_head(device);
-       spin_unlock(get_ccwdev_lock(device->cdev));
+       __dasd_block_start_head(block);
+       spin_unlock(&block->queue_lock);
 }
 
 /*
  * Allocate and initialize request queue and default I/O scheduler.
  */
-static int
-dasd_alloc_queue(struct dasd_device * device)
+static int dasd_alloc_queue(struct dasd_block *block)
 {
        int rc;
 
-       device->request_queue = blk_init_queue(do_dasd_request,
-                                              &device->request_queue_lock);
-       if (device->request_queue == NULL)
+       block->request_queue = blk_init_queue(do_dasd_request,
+                                              &block->request_queue_lock);
+       if (block->request_queue == NULL)
                return -ENOMEM;
 
-       device->request_queue->queuedata = device;
+       block->request_queue->queuedata = block;
 
-       elevator_exit(device->request_queue->elevator);
-       rc = elevator_init(device->request_queue, "deadline");
+       elevator_exit(block->request_queue->elevator);
+       rc = elevator_init(block->request_queue, "deadline");
        if (rc) {
-               blk_cleanup_queue(device->request_queue);
+               blk_cleanup_queue(block->request_queue);
                return rc;
        }
        return 0;
@@ -1780,79 +1971,76 @@ dasd_alloc_queue(struct dasd_device * device)
 /*
  * Allocate and initialize request queue.
  */
-static void
-dasd_setup_queue(struct dasd_device * device)
+static void dasd_setup_queue(struct dasd_block *block)
 {
        int max;
 
-       blk_queue_hardsect_size(device->request_queue, device->bp_block);
-       max = device->discipline->max_blocks << device->s2b_shift;
-       blk_queue_max_sectors(device->request_queue, max);
-       blk_queue_max_phys_segments(device->request_queue, -1L);
-       blk_queue_max_hw_segments(device->request_queue, -1L);
-       blk_queue_max_segment_size(device->request_queue, -1L);
-       blk_queue_segment_boundary(device->request_queue, -1L);
-       blk_queue_ordered(device->request_queue, QUEUE_ORDERED_TAG, NULL);
+       blk_queue_hardsect_size(block->request_queue, block->bp_block);
+       max = block->base->discipline->max_blocks << block->s2b_shift;
+       blk_queue_max_sectors(block->request_queue, max);
+       blk_queue_max_phys_segments(block->request_queue, -1L);
+       blk_queue_max_hw_segments(block->request_queue, -1L);
+       blk_queue_max_segment_size(block->request_queue, -1L);
+       blk_queue_segment_boundary(block->request_queue, -1L);
+       blk_queue_ordered(block->request_queue, QUEUE_ORDERED_DRAIN, NULL);
 }
 
 /*
  * Deactivate and free request queue.
  */
-static void
-dasd_free_queue(struct dasd_device * device)
+static void dasd_free_queue(struct dasd_block *block)
 {
-       if (device->request_queue) {
-               blk_cleanup_queue(device->request_queue);
-               device->request_queue = NULL;
+       if (block->request_queue) {
+               blk_cleanup_queue(block->request_queue);
+               block->request_queue = NULL;
        }
 }
 
 /*
  * Flush request on the request queue.
  */
-static void
-dasd_flush_request_queue(struct dasd_device * device)
+static void dasd_flush_request_queue(struct dasd_block *block)
 {
        struct request *req;
 
-       if (!device->request_queue)
+       if (!block->request_queue)
                return;
 
-       spin_lock_irq(&device->request_queue_lock);
-       while ((req = elv_next_request(device->request_queue))) {
+       spin_lock_irq(&block->request_queue_lock);
+       while ((req = elv_next_request(block->request_queue))) {
                blkdev_dequeue_request(req);
                dasd_end_request(req, 0);
        }
-       spin_unlock_irq(&device->request_queue_lock);
+       spin_unlock_irq(&block->request_queue_lock);
 }
 
-static int
-dasd_open(struct inode *inp, struct file *filp)
+static int dasd_open(struct inode *inp, struct file *filp)
 {
        struct gendisk *disk = inp->i_bdev->bd_disk;
-       struct dasd_device *device = disk->private_data;
+       struct dasd_block *block = disk->private_data;
+       struct dasd_device *base = block->base;
        int rc;
 
-        atomic_inc(&device->open_count);
-       if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
+       atomic_inc(&block->open_count);
+       if (test_bit(DASD_FLAG_OFFLINE, &base->flags)) {
                rc = -ENODEV;
                goto unlock;
        }
 
-       if (!try_module_get(device->discipline->owner)) {
+       if (!try_module_get(base->discipline->owner)) {
                rc = -EINVAL;
                goto unlock;
        }
 
        if (dasd_probeonly) {
-               DEV_MESSAGE(KERN_INFO, device, "%s",
+               DEV_MESSAGE(KERN_INFO, base, "%s",
                            "No access to device due to probeonly mode");
                rc = -EPERM;
                goto out;
        }
 
-       if (device->state <= DASD_STATE_BASIC) {
-               DBF_DEV_EVENT(DBF_ERR, device, " %s",
+       if (base->state <= DASD_STATE_BASIC) {
+               DBF_DEV_EVENT(DBF_ERR, base, " %s",
                              " Cannot open unrecognized device");
                rc = -ENODEV;
                goto out;
@@ -1861,41 +2049,41 @@ dasd_open(struct inode *inp, struct file *filp)
        return 0;
 
 out:
-       module_put(device->discipline->owner);
+       module_put(base->discipline->owner);
 unlock:
-       atomic_dec(&device->open_count);
+       atomic_dec(&block->open_count);
        return rc;
 }
 
-static int
-dasd_release(struct inode *inp, struct file *filp)
+static int dasd_release(struct inode *inp, struct file *filp)
 {
        struct gendisk *disk = inp->i_bdev->bd_disk;
-       struct dasd_device *device = disk->private_data;
+       struct dasd_block *block = disk->private_data;
 
-       atomic_dec(&device->open_count);
-       module_put(device->discipline->owner);
+       atomic_dec(&block->open_count);
+       module_put(block->base->discipline->owner);
        return 0;
 }
 
 /*
  * Return disk geometry.
  */
-static int
-dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+static int dasd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-       struct dasd_device *device;
+       struct dasd_block *block;
+       struct dasd_device *base;
 
-       device = bdev->bd_disk->private_data;
-       if (!device)
+       block = bdev->bd_disk->private_data;
+       base = block->base;
+       if (!block)
                return -ENODEV;
 
-       if (!device->discipline ||
-           !device->discipline->fill_geometry)
+       if (!base->discipline ||
+           !base->discipline->fill_geometry)
                return -EINVAL;
 
-       device->discipline->fill_geometry(device, geo);
-       geo->start = get_start_sect(bdev) >> device->s2b_shift;
+       base->discipline->fill_geometry(block, geo);
+       geo->start = get_start_sect(bdev) >> block->s2b_shift;
        return 0;
 }
 
@@ -1909,6 +2097,9 @@ dasd_device_operations = {
        .getgeo         = dasd_getgeo,
 };
 
+/*******************************************************************************
+ * end of block device operations
+ */
 
 static void
 dasd_exit(void)
@@ -1937,9 +2128,8 @@ dasd_exit(void)
  * Initial attempt at a probe function. this can be simplified once
  * the other detection code is gone.
  */
-int
-dasd_generic_probe (struct ccw_device *cdev,
-                   struct dasd_discipline *discipline)
+int dasd_generic_probe(struct ccw_device *cdev,
+                      struct dasd_discipline *discipline)
 {
        int ret;
 
@@ -1969,19 +2159,20 @@ dasd_generic_probe (struct ccw_device *cdev,
                ret = ccw_device_set_online(cdev);
        if (ret)
                printk(KERN_WARNING
-                      "dasd_generic_probe: could not initially online "
-                      "ccw-device %s\n", cdev->dev.bus_id);
-       return ret;
+                      "dasd_generic_probe: could not initially "
+                      "online ccw-device %s; return code: %d\n",
+                      cdev->dev.bus_id, ret);
+       return 0;
 }
 
 /*
  * This will one day be called from a global not_oper handler.
  * It is also used by driver_unregister during module unload.
  */
-void
-dasd_generic_remove (struct ccw_device *cdev)
+void dasd_generic_remove(struct ccw_device *cdev)
 {
        struct dasd_device *device;
+       struct dasd_block *block;
 
        cdev->handler = NULL;
 
@@ -2001,7 +2192,15 @@ dasd_generic_remove (struct ccw_device *cdev)
         */
        dasd_set_target_state(device, DASD_STATE_NEW);
        /* dasd_delete_device destroys the device reference. */
+       block = device->block;
+       device->block = NULL;
        dasd_delete_device(device);
+       /*
+        * life cycle of block is bound to device, so delete it after
+        * device was safely removed
+        */
+       if (block)
+               dasd_free_block(block);
 }
 
 /*
@@ -2009,10 +2208,8 @@ dasd_generic_remove (struct ccw_device *cdev)
  * the device is detected for the first time and is supposed to be used
  * or the user has started activation through sysfs.
  */
-int
-dasd_generic_set_online (struct ccw_device *cdev,
-                        struct dasd_discipline *base_discipline)
-
+int dasd_generic_set_online(struct ccw_device *cdev,
+                           struct dasd_discipline *base_discipline)
 {
        struct dasd_discipline *discipline;
        struct dasd_device *device;
@@ -2048,6 +2245,7 @@ dasd_generic_set_online (struct ccw_device *cdev,
        device->base_discipline = base_discipline;
        device->discipline = discipline;
 
+       /* check_device will allocate block device if necessary */
        rc = discipline->check_device(device);
        if (rc) {
                printk (KERN_WARNING
@@ -2067,6 +2265,8 @@ dasd_generic_set_online (struct ccw_device *cdev,
                        cdev->dev.bus_id);
                rc = -ENODEV;
                dasd_set_target_state(device, DASD_STATE_NEW);
+               if (device->block)
+                       dasd_free_block(device->block);
                dasd_delete_device(device);
        } else
                pr_debug("dasd_generic device %s found\n",
@@ -2081,10 +2281,10 @@ dasd_generic_set_online (struct ccw_device *cdev,
        return rc;
 }
 
-int
-dasd_generic_set_offline (struct ccw_device *cdev)
+int dasd_generic_set_offline(struct ccw_device *cdev)
 {
        struct dasd_device *device;
+       struct dasd_block *block;
        int max_count, open_count;
 
        device = dasd_device_from_cdev(cdev);
@@ -2101,30 +2301,39 @@ dasd_generic_set_offline (struct ccw_device *cdev)
         * the blkdev_get in dasd_scan_partitions. We are only interested
         * in the other openers.
         */
-       max_count = device->bdev ? 0 : -1;
-       open_count = (int) atomic_read(&device->open_count);
-       if (open_count > max_count) {
-               if (open_count > 0)
-                       printk (KERN_WARNING "Can't offline dasd device with "
-                               "open count = %i.\n",
-                               open_count);
-               else
-                       printk (KERN_WARNING "%s",
-                               "Can't offline dasd device due to internal "
-                               "use\n");
-               clear_bit(DASD_FLAG_OFFLINE, &device->flags);
-               dasd_put_device(device);
-               return -EBUSY;
+       if (device->block) {
+               struct dasd_block *block = device->block;
+               max_count = block->bdev ? 0 : -1;
+               open_count = (int) atomic_read(&block->open_count);
+               if (open_count > max_count) {
+                       if (open_count > 0)
+                               printk(KERN_WARNING "Can't offline dasd "
+                                      "device with open count = %i.\n",
+                                      open_count);
+                       else
+                               printk(KERN_WARNING "%s",
+                                      "Can't offline dasd device due "
+                                      "to internal use\n");
+                       clear_bit(DASD_FLAG_OFFLINE, &device->flags);
+                       dasd_put_device(device);
+                       return -EBUSY;
+               }
        }
        dasd_set_target_state(device, DASD_STATE_NEW);
        /* dasd_delete_device destroys the device reference. */
+       block = device->block;
+       device->block = NULL;
        dasd_delete_device(device);
-
+       /*
+        * life cycle of block is bound to device, so delete it after
+        * device was safely removed
+        */
+       if (block)
+               dasd_free_block(block);
        return 0;
 }
 
-int
-dasd_generic_notify(struct ccw_device *cdev, int event)
+int dasd_generic_notify(struct ccw_device *cdev, int event)
 {
        struct dasd_device *device;
        struct dasd_ccw_req *cqr;
@@ -2145,27 +2354,22 @@ dasd_generic_notify(struct ccw_device *cdev, int event)
                if (device->state < DASD_STATE_BASIC)
                        break;
                /* Device is active. We want to keep it. */
-               if (test_bit(DASD_FLAG_DSC_ERROR, &device->flags)) {
-                       list_for_each_entry(cqr, &device->ccw_queue, list)
-                               if (cqr->status == DASD_CQR_IN_IO)
-                                       cqr->status = DASD_CQR_FAILED;
-                       device->stopped |= DASD_STOPPED_DC_EIO;
-               } else {
-                       list_for_each_entry(cqr, &device->ccw_queue, list)
-                               if (cqr->status == DASD_CQR_IN_IO) {
-                                       cqr->status = DASD_CQR_QUEUED;
-                                       cqr->retries++;
-                               }
-                       device->stopped |= DASD_STOPPED_DC_WAIT;
-                       dasd_set_timer(device, 0);
-               }
-               dasd_schedule_bh(device);
+               list_for_each_entry(cqr, &device->ccw_queue, devlist)
+                       if (cqr->status == DASD_CQR_IN_IO) {
+                               cqr->status = DASD_CQR_QUEUED;
+                               cqr->retries++;
+                       }
+               device->stopped |= DASD_STOPPED_DC_WAIT;
+               dasd_device_clear_timer(device);
+               dasd_schedule_device_bh(device);
                ret = 1;
                break;
        case CIO_OPER:
                /* FIXME: add a sanity check. */
-               device->stopped &= ~(DASD_STOPPED_DC_WAIT|DASD_STOPPED_DC_EIO);
-               dasd_schedule_bh(device);
+               device->stopped &= ~DASD_STOPPED_DC_WAIT;
+               dasd_schedule_device_bh(device);
+               if (device->block)
+                       dasd_schedule_block_bh(device->block);
                ret = 1;
                break;
        }
@@ -2195,7 +2399,8 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
        ccw->cda = (__u32)(addr_t)rdc_buffer;
        ccw->count = rdc_buffer_size;
 
-       cqr->device = device;
+       cqr->startdev = device;
+       cqr->memdev = device;
        cqr->expires = 10*HZ;
        clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
        cqr->retries = 2;
@@ -2217,13 +2422,12 @@ int dasd_generic_read_dev_chars(struct dasd_device *device, char *magic,
                return PTR_ERR(cqr);
 
        ret = dasd_sleep_on(cqr);
-       dasd_sfree_request(cqr, cqr->device);
+       dasd_sfree_request(cqr, cqr->memdev);
        return ret;
 }
 EXPORT_SYMBOL_GPL(dasd_generic_read_dev_chars);
 
-static int __init
-dasd_init(void)
+static int __init dasd_init(void)
 {
        int rc;
 
@@ -2231,7 +2435,7 @@ dasd_init(void)
        init_waitqueue_head(&dasd_flush_wq);
 
        /* register 'common' DASD debug area, used for all DBF_XXX calls */
-       dasd_debug_area = debug_register("dasd", 1, 2, 8 * sizeof (long));
+       dasd_debug_area = debug_register("dasd", 1, 1, 8 * sizeof(long));
        if (dasd_debug_area == NULL) {
                rc = -ENOMEM;
                goto failed;
@@ -2277,15 +2481,18 @@ EXPORT_SYMBOL(dasd_diag_discipline_pointer);
 EXPORT_SYMBOL(dasd_add_request_head);
 EXPORT_SYMBOL(dasd_add_request_tail);
 EXPORT_SYMBOL(dasd_cancel_req);
-EXPORT_SYMBOL(dasd_clear_timer);
+EXPORT_SYMBOL(dasd_device_clear_timer);
+EXPORT_SYMBOL(dasd_block_clear_timer);
 EXPORT_SYMBOL(dasd_enable_device);
 EXPORT_SYMBOL(dasd_int_handler);
 EXPORT_SYMBOL(dasd_kfree_request);
 EXPORT_SYMBOL(dasd_kick_device);
 EXPORT_SYMBOL(dasd_kmalloc_request);
-EXPORT_SYMBOL(dasd_schedule_bh);
+EXPORT_SYMBOL(dasd_schedule_device_bh);
+EXPORT_SYMBOL(dasd_schedule_block_bh);
 EXPORT_SYMBOL(dasd_set_target_state);
-EXPORT_SYMBOL(dasd_set_timer);
+EXPORT_SYMBOL(dasd_device_set_timer);
+EXPORT_SYMBOL(dasd_block_set_timer);
 EXPORT_SYMBOL(dasd_sfree_request);
 EXPORT_SYMBOL(dasd_sleep_on);
 EXPORT_SYMBOL(dasd_sleep_on_immediatly);
@@ -2299,4 +2506,7 @@ EXPORT_SYMBOL_GPL(dasd_generic_remove);
 EXPORT_SYMBOL_GPL(dasd_generic_notify);
 EXPORT_SYMBOL_GPL(dasd_generic_set_online);
 EXPORT_SYMBOL_GPL(dasd_generic_set_offline);
-
+EXPORT_SYMBOL_GPL(dasd_generic_handle_state_change);
+EXPORT_SYMBOL_GPL(dasd_flush_device_queue);
+EXPORT_SYMBOL_GPL(dasd_alloc_block);
+EXPORT_SYMBOL_GPL(dasd_free_block);
diff --git a/drivers/s390/block/dasd_3370_erp.c b/drivers/s390/block/dasd_3370_erp.c
deleted file mode 100644 (file)
index 1ddab89..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * File...........: linux/drivers/s390/block/dasd_3370_erp.c
- * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
- * Bugreports.to..: <Linux390@de.ibm.com>
- * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 2000
- *
- */
-
-#define PRINTK_HEADER "dasd_erp(3370)"
-
-#include "dasd_int.h"
-
-
-/*
- * DASD_3370_ERP_EXAMINE
- *
- * DESCRIPTION
- *   Checks only for fatal/no/recover error.
- *   A detailed examination of the sense data is done later outside
- *   the interrupt handler.
- *
- *   The logic is based on the 'IBM 3880 Storage Control Reference' manual
- *   'Chapter 7. 3370 Sense Data'.
- *
- * RETURN VALUES
- *   dasd_era_none     no error
- *   dasd_era_fatal    for all fatal (unrecoverable errors)
- *   dasd_era_recover  for all others.
- */
-dasd_era_t
-dasd_3370_erp_examine(struct dasd_ccw_req * cqr, struct irb * irb)
-{
-       char *sense = irb->ecw;
-
-       /* check for successful execution first */
-       if (irb->scsw.cstat == 0x00 &&
-           irb->scsw.dstat == (DEV_STAT_CHN_END | DEV_STAT_DEV_END))
-               return dasd_era_none;
-       if (sense[0] & 0x80) {  /* CMD reject */
-               return dasd_era_fatal;
-       }
-       if (sense[0] & 0x40) {  /* Drive offline */
-               return dasd_era_recover;
-       }
-       if (sense[0] & 0x20) {  /* Bus out parity */
-               return dasd_era_recover;
-       }
-       if (sense[0] & 0x10) {  /* equipment check */
-               if (sense[1] & 0x80) {
-                       return dasd_era_fatal;
-               }
-               return dasd_era_recover;
-       }
-       if (sense[0] & 0x08) {  /* data check */
-               if (sense[1] & 0x80) {
-                       return dasd_era_fatal;
-               }
-               return dasd_era_recover;
-       }
-       if (sense[0] & 0x04) {  /* overrun */
-               if (sense[1] & 0x80) {
-                       return dasd_era_fatal;
-               }
-               return dasd_era_recover;
-       }
-       if (sense[1] & 0x40) {  /* invalid blocksize */
-               return dasd_era_fatal;
-       }
-       if (sense[1] & 0x04) {  /* file protected */
-               return dasd_era_recover;
-       }
-       if (sense[1] & 0x01) {  /* operation incomplete */
-               return dasd_era_recover;
-       }
-       if (sense[2] & 0x80) {  /* check data erroor */
-               return dasd_era_recover;
-       }
-       if (sense[2] & 0x10) {  /* Env. data present */
-               return dasd_era_recover;
-       }
-       /* examine the 24 byte sense data */
-       return dasd_era_recover;
-
-}                              /* END dasd_3370_erp_examine */
index 5b7385e..c361ab6 100644 (file)
@@ -24,158 +24,6 @@ struct DCTL_data {
        unsigned short res;        /* reserved */
 } __attribute__ ((packed));
 
-/*
- *****************************************************************************
- * SECTION ERP EXAMINATION
- *****************************************************************************
- */
-
-/*
- * DASD_3990_ERP_EXAMINE_24
- *
- * DESCRIPTION
- *   Checks only for fatal (unrecoverable) error.
- *   A detailed examination of the sense data is done later outside
- *   the interrupt handler.
- *
- *   Each bit configuration leading to an action code 2 (Exit with
- *   programming error or unusual condition indication)
- *   are handled as fatal errors.
- *
- *   All other configurations are handled as recoverable errors.
- *
- * RETURN VALUES
- *   dasd_era_fatal    for all fatal (unrecoverable errors)
- *   dasd_era_recover  for all others.
- */
-static dasd_era_t
-dasd_3990_erp_examine_24(struct dasd_ccw_req * cqr, char *sense)
-{
-
-       struct dasd_device *device = cqr->device;
-
-       /* check for 'Command Reject' */
-       if ((sense[0] & SNS0_CMD_REJECT) &&
-           (!(sense[2] & SNS2_ENV_DATA_PRESENT))) {
-
-               DEV_MESSAGE(KERN_ERR, device, "%s",
-                           "EXAMINE 24: Command Reject detected - "
-                           "fatal error");
-
-               return dasd_era_fatal;
-       }
-
-       /* check for 'Invalid Track Format' */
-       if ((sense[1] & SNS1_INV_TRACK_FORMAT) &&
-           (!(sense[2] & SNS2_ENV_DATA_PRESENT))) {
-
-               DEV_MESSAGE(KERN_ERR, device, "%s",
-                           "EXAMINE 24: Invalid Track Format detected "
-                           "- fatal error");
-
-               return dasd_era_fatal;
-       }
-
-       /* check for 'No Record Found' */
-       if (sense[1] & SNS1_NO_REC_FOUND) {
-
-                /* FIXME: fatal error ?!? */
-               DEV_MESSAGE(KERN_ERR, device,
-                           "EXAMINE 24: No Record Found detected %s",
-                            device->state <= DASD_STATE_BASIC ?
-                           " " : "- fatal error");
-
-               return dasd_era_fatal;
-       }
-
-       /* return recoverable for all others */
-       return dasd_era_recover;
-}                              /* END dasd_3990_erp_examine_24 */
-
-/*
- * DASD_3990_ERP_EXAMINE_32
- *
- * DESCRIPTION
- *   Checks only for fatal/no/recoverable error.
- *   A detailed examination of the sense data is done later outside
- *   the interrupt handler.
- *
- * RETURN VALUES
- *   dasd_era_none     no error
- *   dasd_era_fatal    for all fatal (unrecoverable errors)
- *   dasd_era_recover  for recoverable others.
- */
-static dasd_era_t
-dasd_3990_erp_examine_32(struct dasd_ccw_req * cqr, char *sense)
-{
-
-       struct dasd_device *device = cqr->device;
-
-       switch (sense[25]) {
-       case 0x00:
-               return dasd_era_none;
-
-       case 0x01:
-               DEV_MESSAGE(KERN_ERR, device, "%s", "EXAMINE 32: fatal error");
-
-               return dasd_era_fatal;
-
-       default:
-
-               return dasd_era_recover;
-       }
-
-}                              /* end dasd_3990_erp_examine_32 */
-
-/*
- * DASD_3990_ERP_EXAMINE
- *
- * DESCRIPTION
- *   Checks only for fatal/no/recover error.
- *   A detailed examination of the sense data is done later outside
- *   the interrupt handler.
- *
- *   The logic is based on the 'IBM 3990 Storage Control  Reference' manual
- *   'Chapter 7. Error Recovery Procedures'.
- *
- * RETURN VALUES
- *   dasd_era_none     no error
- *   dasd_era_fatal    for all fatal (unrecoverable errors)
- *   dasd_era_recover  for all others.
- */
-dasd_era_t
-dasd_3990_erp_examine(struct dasd_ccw_req * cqr, struct irb * irb)
-{
-
-       char *sense = irb->ecw;
-       dasd_era_t era = dasd_era_recover;
-       struct dasd_device *device = cqr->device;
-
-       /* check for successful execution first */
-       if (irb->scsw.cstat == 0x00 &&
-           irb->scsw.dstat == (DEV_STAT_CHN_END | DEV_STAT_DEV_END))
-               return dasd_era_none;
-
-       /* distinguish between 24 and 32 byte sense data */
-       if (sense[27] & DASD_SENSE_BIT_0) {
-
-               era = dasd_3990_erp_examine_24(cqr, sense);
-
-       } else {
-
-               era = dasd_3990_erp_examine_32(cqr, sense);
-
-       }
-
-       /* log the erp chain if fatal error occurred */
-       if ((era == dasd_era_fatal) && (device->state >= DASD_STATE_READY)) {
-               dasd_log_sense(cqr, irb);
-       }
-
-       return era;
-
-}                              /* END dasd_3990_erp_examine */
-
 /*
  *****************************************************************************
  * SECTION ERP HANDLING
@@ -206,7 +54,7 @@ dasd_3990_erp_cleanup(struct dasd_ccw_req * erp, char final_status)
 {
        struct dasd_ccw_req *cqr = erp->refers;
 
-       dasd_free_erp_request(erp, erp->device);
+       dasd_free_erp_request(erp, erp->memdev);
        cqr->status = final_status;
        return cqr;
 
@@ -224,15 +72,17 @@ static void
 dasd_3990_erp_block_queue(struct dasd_ccw_req * erp, int expires)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
+       unsigned long flags;
 
        DEV_MESSAGE(KERN_INFO, device,
                    "blocking request queue for %is", expires/HZ);
 
+       spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
        device->stopped |= DASD_STOPPED_PENDING;
-       erp->status = DASD_CQR_QUEUED;
-
-       dasd_set_timer(device, expires);
+       spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
+       erp->status = DASD_CQR_FILLED;
+       dasd_block_set_timer(device->block, expires);
 }
 
 /*
@@ -251,7 +101,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_int_req(struct dasd_ccw_req * erp)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        /* first time set initial retry counter and erp_function */
        /* and retry once without blocking queue                 */
@@ -292,11 +142,14 @@ dasd_3990_erp_int_req(struct dasd_ccw_req * erp)
 static void
 dasd_3990_erp_alternate_path(struct dasd_ccw_req * erp)
 {
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
        __u8 opm;
+       unsigned long flags;
 
        /* try alternate valid path */
+       spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
        opm = ccw_device_get_path_mask(device->cdev);
+       spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
        //FIXME: start with get_opm ?
        if (erp->lpm == 0)
                erp->lpm = LPM_ANYPATH & ~(erp->irb.esw.esw0.sublog.lpum);
@@ -309,9 +162,8 @@ dasd_3990_erp_alternate_path(struct dasd_ccw_req * erp)
                            "try alternate lpm=%x (lpum=%x / opm=%x)",
                            erp->lpm, erp->irb.esw.esw0.sublog.lpum, opm);
 
-               /* reset status to queued to handle the request again... */
-               if (erp->status > DASD_CQR_QUEUED)
-                       erp->status = DASD_CQR_QUEUED;
+               /* reset status to submit the request again... */
+               erp->status = DASD_CQR_FILLED;
                erp->retries = 1;
        } else {
                DEV_MESSAGE(KERN_ERR, device,
@@ -320,8 +172,7 @@ dasd_3990_erp_alternate_path(struct dasd_ccw_req * erp)
                            erp->irb.esw.esw0.sublog.lpum, opm);
 
                /* post request with permanent error */
-               if (erp->status > DASD_CQR_QUEUED)
-                       erp->status = DASD_CQR_FAILED;
+               erp->status = DASD_CQR_FAILED;
        }
 }                              /* end dasd_3990_erp_alternate_path */
 
@@ -344,14 +195,14 @@ static struct dasd_ccw_req *
 dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
        struct DCTL_data *DCTL_data;
        struct ccw1 *ccw;
        struct dasd_ccw_req *dctl_cqr;
 
        dctl_cqr = dasd_alloc_erp_request((char *) &erp->magic, 1,
-                                         sizeof (struct DCTL_data),
-                                         erp->device);
+                                         sizeof(struct DCTL_data),
+                                         device);
        if (IS_ERR(dctl_cqr)) {
                DEV_MESSAGE(KERN_ERR, device, "%s",
                            "Unable to allocate DCTL-CQR");
@@ -365,13 +216,14 @@ dasd_3990_erp_DCTL(struct dasd_ccw_req * erp, char modifier)
        DCTL_data->modifier = modifier;
 
        ccw = dctl_cqr->cpaddr;
-       memset(ccw, 0, sizeof (struct ccw1));
+       memset(ccw, 0, sizeof(struct ccw1));
        ccw->cmd_code = CCW_CMD_DCTL;
        ccw->count = 4;
        ccw->cda = (__u32)(addr_t) DCTL_data;
        dctl_cqr->function = dasd_3990_erp_DCTL;
        dctl_cqr->refers = erp;
-       dctl_cqr->device = erp->device;
+       dctl_cqr->startdev = device;
+       dctl_cqr->memdev = device;
        dctl_cqr->magic = erp->magic;
        dctl_cqr->expires = 5 * 60 * HZ;
        dctl_cqr->retries = 2;
@@ -435,7 +287,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_action_4(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        /* first time set initial retry counter and erp_function    */
        /* and retry once without waiting for state change pending  */
@@ -472,7 +324,7 @@ dasd_3990_erp_action_4(struct dasd_ccw_req * erp, char *sense)
                                     "redriving request immediately, "
                                     "%d retries left",
                                     erp->retries);
-                       erp->status = DASD_CQR_QUEUED;
+                       erp->status = DASD_CQR_FILLED;
                }
        }
 
@@ -530,7 +382,7 @@ static void
 dasd_3990_handle_env_data(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
        char msg_format = (sense[7] & 0xF0);
        char msg_no = (sense[7] & 0x0F);
 
@@ -1157,7 +1009,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_com_rej(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        erp->function = dasd_3990_erp_com_rej;
 
@@ -1198,7 +1050,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_bus_out(struct dasd_ccw_req * erp)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        /* first time set initial retry counter and erp_function */
        /* and retry once without blocking queue                 */
@@ -1237,7 +1089,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_equip_check(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        erp->function = dasd_3990_erp_equip_check;
 
@@ -1279,7 +1131,6 @@ dasd_3990_erp_equip_check(struct dasd_ccw_req * erp, char *sense)
 
                erp = dasd_3990_erp_action_5(erp);
        }
-
        return erp;
 
 }                              /* end dasd_3990_erp_equip_check */
@@ -1299,7 +1150,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_data_check(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        erp->function = dasd_3990_erp_data_check;
 
@@ -1358,7 +1209,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_overrun(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        erp->function = dasd_3990_erp_overrun;
 
@@ -1387,7 +1238,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_inv_format(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        erp->function = dasd_3990_erp_inv_format;
 
@@ -1403,8 +1254,7 @@ dasd_3990_erp_inv_format(struct dasd_ccw_req * erp, char *sense)
 
        } else {
                DEV_MESSAGE(KERN_ERR, device, "%s",
-                           "Invalid Track Format - Fatal error should have "
-                           "been handled within the interrupt handler");
+                           "Invalid Track Format - Fatal error");
 
                erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
        }
@@ -1428,7 +1278,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_EOC(struct dasd_ccw_req * default_erp, char *sense)
 {
 
-       struct dasd_device *device = default_erp->device;
+       struct dasd_device *device = default_erp->startdev;
 
        DEV_MESSAGE(KERN_ERR, device, "%s",
                    "End-of-Cylinder - must never happen");
@@ -1453,7 +1303,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_env_data(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        erp->function = dasd_3990_erp_env_data;
 
@@ -1463,11 +1313,9 @@ dasd_3990_erp_env_data(struct dasd_ccw_req * erp, char *sense)
 
        /* don't retry on disabled interface */
        if (sense[7] != 0x0F) {
-
                erp = dasd_3990_erp_action_4(erp, sense);
        } else {
-
-               erp = dasd_3990_erp_cleanup(erp, DASD_CQR_IN_IO);
+               erp->status = DASD_CQR_FILLED;
        }
 
        return erp;
@@ -1490,11 +1338,10 @@ static struct dasd_ccw_req *
 dasd_3990_erp_no_rec(struct dasd_ccw_req * default_erp, char *sense)
 {
 
-       struct dasd_device *device = default_erp->device;
+       struct dasd_device *device = default_erp->startdev;
 
        DEV_MESSAGE(KERN_ERR, device, "%s",
-                   "No Record Found - Fatal error should "
-                   "have been handled within the interrupt handler");
+                   "No Record Found - Fatal error ");
 
        return dasd_3990_erp_cleanup(default_erp, DASD_CQR_FAILED);
 
@@ -1517,7 +1364,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_file_prot(struct dasd_ccw_req * erp)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        DEV_MESSAGE(KERN_ERR, device, "%s", "File Protected");
 
@@ -1525,6 +1372,43 @@ dasd_3990_erp_file_prot(struct dasd_ccw_req * erp)
 
 }                              /* end dasd_3990_erp_file_prot */
 
+/*
+ * DASD_3990_ERP_INSPECT_ALIAS
+ *
+ * DESCRIPTION
+ *   Checks if the original request was started on an alias device.
+ *   If yes, it modifies the original and the erp request so that
+ *   the erp request can be started on a base device.
+ *
+ * PARAMETER
+ *   erp               pointer to the currently created default ERP
+ *
+ * RETURN VALUES
+ *   erp               pointer to the modified ERP, or NULL
+ */
+
+static struct dasd_ccw_req *dasd_3990_erp_inspect_alias(
+                                               struct dasd_ccw_req *erp)
+{
+       struct dasd_ccw_req *cqr = erp->refers;
+
+       if (cqr->block &&
+           (cqr->block->base != cqr->startdev)) {
+               if (cqr->startdev->features & DASD_FEATURE_ERPLOG) {
+                       DEV_MESSAGE(KERN_ERR, cqr->startdev,
+                                   "ERP on alias device for request %p,"
+                                   " recover on base device %s", cqr,
+                                   cqr->block->base->cdev->dev.bus_id);
+               }
+               dasd_eckd_reset_ccw_to_base_io(cqr);
+               erp->startdev = cqr->block->base;
+               erp->function = dasd_3990_erp_inspect_alias;
+               return erp;
+       } else
+               return NULL;
+}
+
+
 /*
  * DASD_3990_ERP_INSPECT_24
  *
@@ -1623,7 +1507,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_action_10_32(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        erp->retries = 256;
        erp->function = dasd_3990_erp_action_10_32;
@@ -1657,13 +1541,14 @@ static struct dasd_ccw_req *
 dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense)
 {
 
-       struct dasd_device *device = default_erp->device;
+       struct dasd_device *device = default_erp->startdev;
        __u32 cpa = 0;
        struct dasd_ccw_req *cqr;
        struct dasd_ccw_req *erp;
        struct DE_eckd_data *DE_data;
+       struct PFX_eckd_data *PFX_data;
        char *LO_data;          /* LO_eckd_data_t */
-       struct ccw1 *ccw;
+       struct ccw1 *ccw, *oldccw;
 
        DEV_MESSAGE(KERN_DEBUG, device, "%s",
                    "Write not finished because of unexpected condition");
@@ -1702,8 +1587,8 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense)
        /* Build new ERP request including DE/LO */
        erp = dasd_alloc_erp_request((char *) &cqr->magic,
                                     2 + 1,/* DE/LO + TIC */
-                                    sizeof (struct DE_eckd_data) +
-                                    sizeof (struct LO_eckd_data), device);
+                                    sizeof(struct DE_eckd_data) +
+                                    sizeof(struct LO_eckd_data), device);
 
        if (IS_ERR(erp)) {
                DEV_MESSAGE(KERN_ERR, device, "%s", "Unable to allocate ERP");
@@ -1712,10 +1597,16 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense)
 
        /* use original DE */
        DE_data = erp->data;
-       memcpy(DE_data, cqr->data, sizeof (struct DE_eckd_data));
+       oldccw = cqr->cpaddr;
+       if (oldccw->cmd_code == DASD_ECKD_CCW_PFX) {
+               PFX_data = cqr->data;
+               memcpy(DE_data, &PFX_data->define_extend,
+                      sizeof(struct DE_eckd_data));
+       } else
+               memcpy(DE_data, cqr->data, sizeof(struct DE_eckd_data));
 
        /* create LO */
-       LO_data = erp->data + sizeof (struct DE_eckd_data);
+       LO_data = erp->data + sizeof(struct DE_eckd_data);
 
        if ((sense[3] == 0x01) && (LO_data[1] & 0x01)) {
 
@@ -1748,7 +1639,7 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense)
 
        /* create DE ccw */
        ccw = erp->cpaddr;
-       memset(ccw, 0, sizeof (struct ccw1));
+       memset(ccw, 0, sizeof(struct ccw1));
        ccw->cmd_code = DASD_ECKD_CCW_DEFINE_EXTENT;
        ccw->flags = CCW_FLAG_CC;
        ccw->count = 16;
@@ -1756,7 +1647,7 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense)
 
        /* create LO ccw */
        ccw++;
-       memset(ccw, 0, sizeof (struct ccw1));
+       memset(ccw, 0, sizeof(struct ccw1));
        ccw->cmd_code = DASD_ECKD_CCW_LOCATE_RECORD;
        ccw->flags = CCW_FLAG_CC;
        ccw->count = 16;
@@ -1770,7 +1661,8 @@ dasd_3990_erp_action_1B_32(struct dasd_ccw_req * default_erp, char *sense)
        /* fill erp related fields */
        erp->function = dasd_3990_erp_action_1B_32;
        erp->refers = default_erp->refers;
-       erp->device = device;
+       erp->startdev = device;
+       erp->memdev = device;
        erp->magic = default_erp->magic;
        erp->expires = 0;
        erp->retries = 256;
@@ -1803,7 +1695,7 @@ static struct dasd_ccw_req *
 dasd_3990_update_1B(struct dasd_ccw_req * previous_erp, char *sense)
 {
 
-       struct dasd_device *device = previous_erp->device;
+       struct dasd_device *device = previous_erp->startdev;
        __u32 cpa = 0;
        struct dasd_ccw_req *cqr;
        struct dasd_ccw_req *erp;
@@ -1827,7 +1719,7 @@ dasd_3990_update_1B(struct dasd_ccw_req * previous_erp, char *sense)
                DEV_MESSAGE(KERN_DEBUG, device, "%s",
                            "Imprecise ending is set - just retry");
 
-               previous_erp->status = DASD_CQR_QUEUED;
+               previous_erp->status = DASD_CQR_FILLED;
 
                return previous_erp;
        }
@@ -1850,7 +1742,7 @@ dasd_3990_update_1B(struct dasd_ccw_req * previous_erp, char *sense)
        erp = previous_erp;
 
        /* update the LO with the new returned sense data  */
-       LO_data = erp->data + sizeof (struct DE_eckd_data);
+       LO_data = erp->data + sizeof(struct DE_eckd_data);
 
        if ((sense[3] == 0x01) && (LO_data[1] & 0x01)) {
 
@@ -1889,7 +1781,7 @@ dasd_3990_update_1B(struct dasd_ccw_req * previous_erp, char *sense)
        ccw++;                  /* addr of TIC ccw */
        ccw->cda = cpa;
 
-       erp->status = DASD_CQR_QUEUED;
+       erp->status = DASD_CQR_FILLED;
 
        return erp;
 
@@ -1968,9 +1860,7 @@ dasd_3990_erp_compound_path(struct dasd_ccw_req * erp, char *sense)
                         * try further actions. */
 
                        erp->lpm = 0;
-
-                       erp->status = DASD_CQR_ERROR;
-
+                       erp->status = DASD_CQR_NEED_ERP;
                }
        }
 
@@ -2047,7 +1937,7 @@ dasd_3990_erp_compound_config(struct dasd_ccw_req * erp, char *sense)
        if ((sense[25] & DASD_SENSE_BIT_1) && (sense[26] & DASD_SENSE_BIT_2)) {
 
                /* set to suspended duplex state then restart */
-               struct dasd_device *device = erp->device;
+               struct dasd_device *device = erp->startdev;
 
                DEV_MESSAGE(KERN_ERR, device, "%s",
                            "Set device to suspended duplex state should be "
@@ -2081,28 +1971,26 @@ dasd_3990_erp_compound(struct dasd_ccw_req * erp, char *sense)
 {
 
        if ((erp->function == dasd_3990_erp_compound_retry) &&
-           (erp->status == DASD_CQR_ERROR)) {
+           (erp->status == DASD_CQR_NEED_ERP)) {
 
                dasd_3990_erp_compound_path(erp, sense);
        }
 
        if ((erp->function == dasd_3990_erp_compound_path) &&
-           (erp->status == DASD_CQR_ERROR)) {
+           (erp->status == DASD_CQR_NEED_ERP)) {
 
                erp = dasd_3990_erp_compound_code(erp, sense);
        }
 
        if ((erp->function == dasd_3990_erp_compound_code) &&
-           (erp->status == DASD_CQR_ERROR)) {
+           (erp->status == DASD_CQR_NEED_ERP)) {
 
                dasd_3990_erp_compound_config(erp, sense);
        }
 
        /* if no compound action ERP specified, the request failed */
-       if (erp->status == DASD_CQR_ERROR) {
-
+       if (erp->status == DASD_CQR_NEED_ERP)
                erp->status = DASD_CQR_FAILED;
-       }
 
        return erp;
 
@@ -2127,7 +2015,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_inspect_32(struct dasd_ccw_req * erp, char *sense)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
 
        erp->function = dasd_3990_erp_inspect_32;
 
@@ -2149,8 +2037,7 @@ dasd_3990_erp_inspect_32(struct dasd_ccw_req * erp, char *sense)
 
                case 0x01:      /* fatal error */
                        DEV_MESSAGE(KERN_ERR, device, "%s",
-                                   "Fatal error should have been "
-                                   "handled within the interrupt handler");
+                                   "Retry not recommended - Fatal error");
 
                        erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
                        break;
@@ -2253,6 +2140,11 @@ dasd_3990_erp_inspect(struct dasd_ccw_req * erp)
        /* already set up new ERP !                           */
        char *sense = erp->refers->irb.ecw;
 
+       /* if this problem occured on an alias retry on base */
+       erp_new = dasd_3990_erp_inspect_alias(erp);
+       if (erp_new)
+               return erp_new;
+
        /* distinguish between 24 and 32 byte sense data */
        if (sense[27] & DASD_SENSE_BIT_0) {
 
@@ -2287,13 +2179,13 @@ static struct dasd_ccw_req *
 dasd_3990_erp_add_erp(struct dasd_ccw_req * cqr)
 {
 
-       struct dasd_device *device = cqr->device;
+       struct dasd_device *device = cqr->startdev;
        struct ccw1 *ccw;
 
        /* allocate additional request block */
        struct dasd_ccw_req *erp;
 
-       erp = dasd_alloc_erp_request((char *) &cqr->magic, 2, 0, cqr->device);
+       erp = dasd_alloc_erp_request((char *) &cqr->magic, 2, 0, device);
        if (IS_ERR(erp)) {
                 if (cqr->retries <= 0) {
                        DEV_MESSAGE(KERN_ERR, device, "%s",
@@ -2305,7 +2197,7 @@ dasd_3990_erp_add_erp(struct dasd_ccw_req * cqr)
                                      "Unable to allocate ERP request "
                                     "(%i retries left)",
                                      cqr->retries);
-                       dasd_set_timer(device, (HZ << 3));
+                       dasd_block_set_timer(device->block, (HZ << 3));
                 }
                return cqr;
        }
@@ -2319,7 +2211,9 @@ dasd_3990_erp_add_erp(struct dasd_ccw_req * cqr)
        ccw->cda      = (long)(cqr->cpaddr);
        erp->function = dasd_3990_erp_add_erp;
        erp->refers   = cqr;
-       erp->device   = cqr->device;
+       erp->startdev = device;
+       erp->memdev   = device;
+       erp->block    = cqr->block;
        erp->magic    = cqr->magic;
        erp->expires  = 0;
        erp->retries  = 256;
@@ -2466,7 +2360,7 @@ static struct dasd_ccw_req *
 dasd_3990_erp_further_erp(struct dasd_ccw_req *erp)
 {
 
-       struct dasd_device *device = erp->device;
+       struct dasd_device *device = erp->startdev;
        char *sense = erp->irb.ecw;
 
        /* check for 24 byte sense ERP */
@@ -2557,7 +2451,7 @@ dasd_3990_erp_handle_match_erp(struct dasd_ccw_req *erp_head,
                               struct dasd_ccw_req *erp)
 {
 
-       struct dasd_device *device = erp_head->device;
+       struct dasd_device *device = erp_head->startdev;
        struct dasd_ccw_req *erp_done = erp_head;       /* finished req */
        struct dasd_ccw_req *erp_free = NULL;   /* req to be freed */
 
@@ -2569,13 +2463,13 @@ dasd_3990_erp_handle_match_erp(struct dasd_ccw_req *erp_head,
                              "original request was lost\n");
 
                /* remove the request from the device queue */
-               list_del(&erp_done->list);
+               list_del(&erp_done->blocklist);
 
                erp_free = erp_done;
                erp_done = erp_done->refers;
 
                /* free the finished erp request */
-               dasd_free_erp_request(erp_free, erp_free->device);
+               dasd_free_erp_request(erp_free, erp_free->memdev);
 
        }                       /* end while */
 
@@ -2603,7 +2497,7 @@ dasd_3990_erp_handle_match_erp(struct dasd_ccw_req *erp_head,
                                    erp->retries, erp);
 
                        /* handle the request again... */
-                       erp->status = DASD_CQR_QUEUED;
+                       erp->status = DASD_CQR_FILLED;
                }
 
        } else {
@@ -2620,7 +2514,7 @@ dasd_3990_erp_handle_match_erp(struct dasd_ccw_req *erp_head,
  * DASD_3990_ERP_ACTION
  *
  * DESCRIPTION
- *   controll routine for 3990 erp actions.
+ *   control routine for 3990 erp actions.
  *   Has to be called with the queue lock (namely the s390_irq_lock) acquired.
  *
  * PARAMETER
@@ -2636,9 +2530,8 @@ dasd_3990_erp_handle_match_erp(struct dasd_ccw_req *erp_head,
 struct dasd_ccw_req *
 dasd_3990_erp_action(struct dasd_ccw_req * cqr)
 {
-
        struct dasd_ccw_req *erp = NULL;
-       struct dasd_device *device = cqr->device;
+       struct dasd_device *device = cqr->startdev;
        struct dasd_ccw_req *temp_erp = NULL;
 
        if (device->features & DASD_FEATURE_ERPLOG) {
@@ -2704,10 +2597,11 @@ dasd_3990_erp_action(struct dasd_ccw_req * cqr)
                }
        }
 
-       /* enqueue added ERP request */
-       if (erp->status == DASD_CQR_FILLED) {
-               erp->status = DASD_CQR_QUEUED;
-               list_add(&erp->list, &device->ccw_queue);
+       /* enqueue ERP request if it's a new one */
+       if (list_empty(&erp->blocklist)) {
+               cqr->status = DASD_CQR_IN_ERP;
+               /* add erp request before the cqr */
+               list_add_tail(&erp->blocklist, &cqr->blocklist);
        }
 
        return erp;
diff --git a/drivers/s390/block/dasd_9336_erp.c b/drivers/s390/block/dasd_9336_erp.c
deleted file mode 100644 (file)
index 6e08268..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * File...........: linux/drivers/s390/block/dasd_9336_erp.c
- * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
- * Bugreports.to..: <Linux390@de.ibm.com>
- * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 2000
- *
- */
-
-#define PRINTK_HEADER "dasd_erp(9336)"
-
-#include "dasd_int.h"
-
-
-/*
- * DASD_9336_ERP_EXAMINE
- *
- * DESCRIPTION
- *   Checks only for fatal/no/recover error.
- *   A detailed examination of the sense data is done later outside
- *   the interrupt handler.
- *
- *   The logic is based on the 'IBM 3880 Storage Control Reference' manual
- *   'Chapter 7. 9336 Sense Data'.
- *
- * RETURN VALUES
- *   dasd_era_none     no error
- *   dasd_era_fatal    for all fatal (unrecoverable errors)
- *   dasd_era_recover  for all others.
- */
-dasd_era_t
-dasd_9336_erp_examine(struct dasd_ccw_req * cqr, struct irb * irb)
-{
-       /* check for successful execution first */
-       if (irb->scsw.cstat == 0x00 &&
-           irb->scsw.dstat == (DEV_STAT_CHN_END | DEV_STAT_DEV_END))
-               return dasd_era_none;
-
-       /* examine the 24 byte sense data */
-       return dasd_era_recover;
-
-}                              /* END dasd_9336_erp_examine */
diff --git a/drivers/s390/block/dasd_9343_erp.c b/drivers/s390/block/dasd_9343_erp.c
deleted file mode 100644 (file)
index ddecb98..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * File...........: linux/drivers/s390/block/dasd_9345_erp.c
- * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com>
- * Bugreports.to..: <Linux390@de.ibm.com>
- * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 2000
- *
- */
-
-#define PRINTK_HEADER "dasd_erp(9343)"
-
-#include "dasd_int.h"
-
-dasd_era_t
-dasd_9343_erp_examine(struct dasd_ccw_req * cqr, struct irb * irb)
-{
-       if (irb->scsw.cstat == 0x00 &&
-           irb->scsw.dstat == (DEV_STAT_CHN_END | DEV_STAT_DEV_END))
-               return dasd_era_none;
-
-       return dasd_era_recover;
-}
diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c
new file mode 100644 (file)
index 0000000..3a40bee
--- /dev/null
@@ -0,0 +1,903 @@
+/*
+ * PAV alias management for the DASD ECKD discipline
+ *
+ * Copyright IBM Corporation, 2007
+ * Author(s): Stefan Weinhuber <wein@de.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <asm/ebcdic.h>
+#include "dasd_int.h"
+#include "dasd_eckd.h"
+
+#ifdef PRINTK_HEADER
+#undef PRINTK_HEADER
+#endif                         /* PRINTK_HEADER */
+#define PRINTK_HEADER "dasd(eckd):"
+
+
+/*
+ * General concept of alias management:
+ * - PAV and DASD alias management is specific to the eckd discipline.
+ * - A device is connected to an lcu as long as the device exists.
+ *   dasd_alias_make_device_known_to_lcu will be called wenn the
+ *   device is checked by the eckd discipline and
+ *   dasd_alias_disconnect_device_from_lcu will be called
+ *   before the device is deleted.
+ * - The dasd_alias_add_device / dasd_alias_remove_device
+ *   functions mark the point when a device is 'ready for service'.
+ * - A summary unit check is a rare occasion, but it is mandatory to
+ *   support it. It requires some complex recovery actions before the
+ *   devices can be used again (see dasd_alias_handle_summary_unit_check).
+ * - dasd_alias_get_start_dev will find an alias device that can be used
+ *   instead of the base device and does some (very simple) load balancing.
+ *   This is the function that gets called for each I/O, so when improving
+ *   something, this function should get faster or better, the rest has just
+ *   to be correct.
+ */
+
+
+static void summary_unit_check_handling_work(struct work_struct *);
+static void lcu_update_work(struct work_struct *);
+static int _schedule_lcu_update(struct alias_lcu *, struct dasd_device *);
+
+static struct alias_root aliastree = {
+       .serverlist = LIST_HEAD_INIT(aliastree.serverlist),
+       .lock = __SPIN_LOCK_UNLOCKED(aliastree.lock),
+};
+
+static struct alias_server *_find_server(struct dasd_uid *uid)
+{
+       struct alias_server *pos;
+       list_for_each_entry(pos, &aliastree.serverlist, server) {
+               if (!strncmp(pos->uid.vendor, uid->vendor,
+                            sizeof(uid->vendor))
+                   && !strncmp(pos->uid.serial, uid->serial,
+                               sizeof(uid->serial)))
+                       return pos;
+       };
+       return NULL;
+}
+
+static struct alias_lcu *_find_lcu(struct alias_server *server,
+                                  struct dasd_uid *uid)
+{
+       struct alias_lcu *pos;
+       list_for_each_entry(pos, &server->lculist, lcu) {
+               if (pos->uid.ssid == uid->ssid)
+                       return pos;
+       };
+       return NULL;
+}
+
+static struct alias_pav_group *_find_group(struct alias_lcu *lcu,
+                                          struct dasd_uid *uid)
+{
+       struct alias_pav_group *pos;
+       __u8 search_unit_addr;
+
+       /* for hyper pav there is only one group */
+       if (lcu->pav == HYPER_PAV) {
+               if (list_empty(&lcu->grouplist))
+                       return NULL;
+               else
+                       return list_first_entry(&lcu->grouplist,
+                                               struct alias_pav_group, group);
+       }
+
+       /* for base pav we have to find the group that matches the base */
+       if (uid->type == UA_BASE_DEVICE)
+               search_unit_addr = uid->real_unit_addr;
+       else
+               search_unit_addr = uid->base_unit_addr;
+       list_for_each_entry(pos, &lcu->grouplist, group) {
+               if (pos->uid.base_unit_addr == search_unit_addr)
+                       return pos;
+       };
+       return NULL;
+}
+
+static struct alias_server *_allocate_server(struct dasd_uid *uid)
+{
+       struct alias_server *server;
+
+       server = kzalloc(sizeof(*server), GFP_KERNEL);
+       if (!server)
+               return ERR_PTR(-ENOMEM);
+       memcpy(server->uid.vendor, uid->vendor, sizeof(uid->vendor));
+       memcpy(server->uid.serial, uid->serial, sizeof(uid->serial));
+       INIT_LIST_HEAD(&server->server);
+       INIT_LIST_HEAD(&server->lculist);
+       return server;
+}
+
+static void _free_server(struct alias_server *server)
+{
+       kfree(server);
+}
+
+static struct alias_lcu *_allocate_lcu(struct dasd_uid *uid)
+{
+       struct alias_lcu *lcu;
+
+       lcu = kzalloc(sizeof(*lcu), GFP_KERNEL);
+       if (!lcu)
+               return ERR_PTR(-ENOMEM);
+       lcu->uac = kzalloc(sizeof(*(lcu->uac)), GFP_KERNEL | GFP_DMA);
+       if (!lcu->uac)
+               goto out_err1;
+       lcu->rsu_cqr = kzalloc(sizeof(*lcu->rsu_cqr), GFP_KERNEL | GFP_DMA);
+       if (!lcu->rsu_cqr)
+               goto out_err2;
+       lcu->rsu_cqr->cpaddr = kzalloc(sizeof(struct ccw1),
+                                      GFP_KERNEL | GFP_DMA);
+       if (!lcu->rsu_cqr->cpaddr)
+               goto out_err3;
+       lcu->rsu_cqr->data = kzalloc(16, GFP_KERNEL | GFP_DMA);
+       if (!lcu->rsu_cqr->data)
+               goto out_err4;
+
+       memcpy(lcu->uid.vendor, uid->vendor, sizeof(uid->vendor));
+       memcpy(lcu->uid.serial, uid->serial, sizeof(uid->serial));
+       lcu->uid.ssid = uid->ssid;
+       lcu->pav = NO_PAV;
+       lcu->flags = NEED_UAC_UPDATE | UPDATE_PENDING;
+       INIT_LIST_HEAD(&lcu->lcu);
+       INIT_LIST_HEAD(&lcu->inactive_devices);
+       INIT_LIST_HEAD(&lcu->active_devices);
+       INIT_LIST_HEAD(&lcu->grouplist);
+       INIT_WORK(&lcu->suc_data.worker, summary_unit_check_handling_work);
+       INIT_DELAYED_WORK(&lcu->ruac_data.dwork, lcu_update_work);
+       spin_lock_init(&lcu->lock);
+       return lcu;
+
+out_err4:
+       kfree(lcu->rsu_cqr->cpaddr);
+out_err3:
+       kfree(lcu->rsu_cqr);
+out_err2:
+       kfree(lcu->uac);
+out_err1:
+       kfree(lcu);
+       return ERR_PTR(-ENOMEM);
+}
+
+static void _free_lcu(struct alias_lcu *lcu)
+{
+       kfree(lcu->rsu_cqr->data);
+       kfree(lcu->rsu_cqr->cpaddr);
+       kfree(lcu->rsu_cqr);
+       kfree(lcu->uac);
+       kfree(lcu);
+}
+
+/*
+ * This is the function that will allocate all the server and lcu data,
+ * so this function must be called first for a new device.
+ * If the return value is 1, the lcu was already known before, if it
+ * is 0, this is a new lcu.
+ * Negative return code indicates that something went wrong (e.g. -ENOMEM)
+ */
+int dasd_alias_make_device_known_to_lcu(struct dasd_device *device)
+{
+       struct dasd_eckd_private *private;
+       unsigned long flags;
+       struct alias_server *server, *newserver;
+       struct alias_lcu *lcu, *newlcu;
+       int is_lcu_known;
+       struct dasd_uid *uid;
+
+       private = (struct dasd_eckd_private *) device->private;
+       uid = &private->uid;
+       spin_lock_irqsave(&aliastree.lock, flags);
+       is_lcu_known = 1;
+       server = _find_server(uid);
+       if (!server) {
+               spin_unlock_irqrestore(&aliastree.lock, flags);
+               newserver = _allocate_server(uid);
+               if (IS_ERR(newserver))
+                       return PTR_ERR(newserver);
+               spin_lock_irqsave(&aliastree.lock, flags);
+               server = _find_server(uid);
+               if (!server) {
+                       list_add(&newserver->server, &aliastree.serverlist);
+                       server = newserver;
+                       is_lcu_known = 0;
+               } else {
+                       /* someone was faster */
+                       _free_server(newserver);
+               }
+       }
+
+       lcu = _find_lcu(server, uid);
+       if (!lcu) {
+               spin_unlock_irqrestore(&aliastree.lock, flags);
+               newlcu = _allocate_lcu(uid);
+               if (IS_ERR(newlcu))
+                       return PTR_ERR(lcu);
+               spin_lock_irqsave(&aliastree.lock, flags);
+               lcu = _find_lcu(server, uid);
+               if (!lcu) {
+                       list_add(&newlcu->lcu, &server->lculist);
+                       lcu = newlcu;
+                       is_lcu_known = 0;
+               } else {
+                       /* someone was faster */
+                       _free_lcu(newlcu);
+               }
+               is_lcu_known = 0;
+       }
+       spin_lock(&lcu->lock);
+       list_add(&device->alias_list, &lcu->inactive_devices);
+       private->lcu = lcu;
+       spin_unlock(&lcu->lock);
+       spin_unlock_irqrestore(&aliastree.lock, flags);
+
+       return is_lcu_known;
+}
+
+/*
+ * This function removes a device from the scope of alias management.
+ * The complicated part is to make sure that it is not in use by
+ * any of the workers. If necessary cancel the work.
+ */
+void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device)
+{
+       struct dasd_eckd_private *private;
+       unsigned long flags;
+       struct alias_lcu *lcu;
+       struct alias_server *server;
+       int was_pending;
+
+       private = (struct dasd_eckd_private *) device->private;
+       lcu = private->lcu;
+       spin_lock_irqsave(&lcu->lock, flags);
+       list_del_init(&device->alias_list);
+       /* make sure that the workers don't use this device */
+       if (device == lcu->suc_data.device) {
+               spin_unlock_irqrestore(&lcu->lock, flags);
+               cancel_work_sync(&lcu->suc_data.worker);
+               spin_lock_irqsave(&lcu->lock, flags);
+               if (device == lcu->suc_data.device)
+                       lcu->suc_data.device = NULL;
+       }
+       was_pending = 0;
+       if (device == lcu->ruac_data.device) {
+               spin_unlock_irqrestore(&lcu->lock, flags);
+               was_pending = 1;
+               cancel_delayed_work_sync(&lcu->ruac_data.dwork);
+               spin_lock_irqsave(&lcu->lock, flags);
+               if (device == lcu->ruac_data.device)
+                       lcu->ruac_data.device = NULL;
+       }
+       private->lcu = NULL;
+       spin_unlock_irqrestore(&lcu->lock, flags);
+
+       spin_lock_irqsave(&aliastree.lock, flags);
+       spin_lock(&lcu->lock);
+       if (list_empty(&lcu->grouplist) &&
+           list_empty(&lcu->active_devices) &&
+           list_empty(&lcu->inactive_devices)) {
+               list_del(&lcu->lcu);
+               spin_unlock(&lcu->lock);
+               _free_lcu(lcu);
+               lcu = NULL;
+       } else {
+               if (was_pending)
+                       _schedule_lcu_update(lcu, NULL);
+               spin_unlock(&lcu->lock);
+       }
+       server = _find_server(&private->uid);
+       if (server && list_empty(&server->lculist)) {
+               list_del(&server->server);
+               _free_server(server);
+       }
+       spin_unlock_irqrestore(&aliastree.lock, flags);
+}
+
+/*
+ * This function assumes that the unit address configuration stored
+ * in the lcu is up to date and will update the device uid before
+ * adding it to a pav group.
+ */
+static int _add_device_to_lcu(struct alias_lcu *lcu,
+                             struct dasd_device *device)
+{
+
+       struct dasd_eckd_private *private;
+       struct alias_pav_group *group;
+       struct dasd_uid *uid;
+
+       private = (struct dasd_eckd_private *) device->private;
+       uid = &private->uid;
+       uid->type = lcu->uac->unit[uid->real_unit_addr].ua_type;
+       uid->base_unit_addr = lcu->uac->unit[uid->real_unit_addr].base_ua;
+       dasd_set_uid(device->cdev, &private->uid);
+
+       /* if we have no PAV anyway, we don't need to bother with PAV groups */
+       if (lcu->pav == NO_PAV) {
+               list_move(&device->alias_list, &lcu->active_devices);
+               return 0;
+       }
+
+       group = _find_group(lcu, uid);
+       if (!group) {
+               group = kzalloc(sizeof(*group), GFP_ATOMIC);
+               if (!group)
+                       return -ENOMEM;
+               memcpy(group->uid.vendor, uid->vendor, sizeof(uid->vendor));
+               memcpy(group->uid.serial, uid->serial, sizeof(uid->serial));
+               group->uid.ssid = uid->ssid;
+               if (uid->type == UA_BASE_DEVICE)
+                       group->uid.base_unit_addr = uid->real_unit_addr;
+               else
+                       group->uid.base_unit_addr = uid->base_unit_addr;
+               INIT_LIST_HEAD(&group->group);
+               INIT_LIST_HEAD(&group->baselist);
+               INIT_LIST_HEAD(&group->aliaslist);
+               list_add(&group->group, &lcu->grouplist);
+       }
+       if (uid->type == UA_BASE_DEVICE)
+               list_move(&device->alias_list, &group->baselist);
+       else
+               list_move(&device->alias_list, &group->aliaslist);
+       private->pavgroup = group;
+       return 0;
+};
+
+static void _remove_device_from_lcu(struct alias_lcu *lcu,
+                                   struct dasd_device *device)
+{
+       struct dasd_eckd_private *private;
+       struct alias_pav_group *group;
+
+       private = (struct dasd_eckd_private *) device->private;
+       list_move(&device->alias_list, &lcu->inactive_devices);
+       group = private->pavgroup;
+       if (!group)
+               return;
+       private->pavgroup = NULL;
+       if (list_empty(&group->baselist) && list_empty(&group->aliaslist)) {
+               list_del(&group->group);
+               kfree(group);
+               return;
+       }
+       if (group->next == device)
+               group->next = NULL;
+};
+
+static int read_unit_address_configuration(struct dasd_device *device,
+                                          struct alias_lcu *lcu)
+{
+       struct dasd_psf_prssd_data *prssdp;
+       struct dasd_ccw_req *cqr;
+       struct ccw1 *ccw;
+       int rc;
+       unsigned long flags;
+
+       cqr = dasd_kmalloc_request("ECKD",
+                                  1 /* PSF */  + 1 /* RSSD */ ,
+                                  (sizeof(struct dasd_psf_prssd_data)),
+                                  device);
+       if (IS_ERR(cqr))
+               return PTR_ERR(cqr);
+       cqr->startdev = device;
+       cqr->memdev = device;
+       clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
+       cqr->retries = 10;
+       cqr->expires = 20 * HZ;
+
+       /* Prepare for Read Subsystem Data */
+       prssdp = (struct dasd_psf_prssd_data *) cqr->data;
+       memset(prssdp, 0, sizeof(struct dasd_psf_prssd_data));
+       prssdp->order = PSF_ORDER_PRSSD;
+       prssdp->suborder = 0x0e;        /* Read unit address configuration */
+       /* all other bytes of prssdp must be zero */
+
+       ccw = cqr->cpaddr;
+       ccw->cmd_code = DASD_ECKD_CCW_PSF;
+       ccw->count = sizeof(struct dasd_psf_prssd_data);
+       ccw->flags |= CCW_FLAG_CC;
+       ccw->cda = (__u32)(addr_t) prssdp;
+
+       /* Read Subsystem Data - feature codes */
+       memset(lcu->uac, 0, sizeof(*(lcu->uac)));
+
+       ccw++;
+       ccw->cmd_code = DASD_ECKD_CCW_RSSD;
+       ccw->count = sizeof(*(lcu->uac));
+       ccw->cda = (__u32)(addr_t) lcu->uac;
+
+       cqr->buildclk = get_clock();
+       cqr->status = DASD_CQR_FILLED;
+
+       /* need to unset flag here to detect race with summary unit check */
+       spin_lock_irqsave(&lcu->lock, flags);
+       lcu->flags &= ~NEED_UAC_UPDATE;
+       spin_unlock_irqrestore(&lcu->lock, flags);
+
+       do {
+               rc = dasd_sleep_on(cqr);
+       } while (rc && (cqr->retries > 0));
+       if (rc) {
+               spin_lock_irqsave(&lcu->lock, flags);
+               lcu->flags |= NEED_UAC_UPDATE;
+               spin_unlock_irqrestore(&lcu->lock, flags);
+       }
+       dasd_kfree_request(cqr, cqr->memdev);
+       return rc;
+}
+
+static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu)
+{
+       unsigned long flags;
+       struct alias_pav_group *pavgroup, *tempgroup;
+       struct dasd_device *device, *tempdev;
+       int i, rc;
+       struct dasd_eckd_private *private;
+
+       spin_lock_irqsave(&lcu->lock, flags);
+       list_for_each_entry_safe(pavgroup, tempgroup, &lcu->grouplist, group) {
+               list_for_each_entry_safe(device, tempdev, &pavgroup->baselist,
+                                        alias_list) {
+                       list_move(&device->alias_list, &lcu->active_devices);
+                       private = (struct dasd_eckd_private *) device->private;
+                       private->pavgroup = NULL;
+               }
+               list_for_each_entry_safe(device, tempdev, &pavgroup->aliaslist,
+                                        alias_list) {
+                       list_move(&device->alias_list, &lcu->active_devices);
+                       private = (struct dasd_eckd_private *) device->private;
+                       private->pavgroup = NULL;
+               }
+               list_del(&pavgroup->group);
+               kfree(pavgroup);
+       }
+       spin_unlock_irqrestore(&lcu->lock, flags);
+
+       rc = read_unit_address_configuration(refdev, lcu);
+       if (rc)
+               return rc;
+
+       spin_lock_irqsave(&lcu->lock, flags);
+       lcu->pav = NO_PAV;
+       for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) {
+               switch (lcu->uac->unit[i].ua_type) {
+               case UA_BASE_PAV_ALIAS:
+                       lcu->pav = BASE_PAV;
+                       break;
+               case UA_HYPER_PAV_ALIAS:
+                       lcu->pav = HYPER_PAV;
+                       break;
+               }
+               if (lcu->pav != NO_PAV)
+                       break;
+       }
+
+       list_for_each_entry_safe(device, tempdev, &lcu->active_devices,
+                                alias_list) {
+               _add_device_to_lcu(lcu, device);
+       }
+       spin_unlock_irqrestore(&lcu->lock, flags);
+       return 0;
+}
+
+static void lcu_update_work(struct work_struct *work)
+{
+       struct alias_lcu *lcu;
+       struct read_uac_work_data *ruac_data;
+       struct dasd_device *device;
+       unsigned long flags;
+       int rc;
+
+       ruac_data = container_of(work, struct read_uac_work_data, dwork.work);
+       lcu = container_of(ruac_data, struct alias_lcu, ruac_data);
+       device = ruac_data->device;
+       rc = _lcu_update(device, lcu);
+       /*
+        * Need to check flags again, as there could have been another
+        * prepare_update or a new device a new device while we were still
+        * processing the data
+        */
+       spin_lock_irqsave(&lcu->lock, flags);
+       if (rc || (lcu->flags & NEED_UAC_UPDATE)) {
+               DEV_MESSAGE(KERN_WARNING, device, "could not update"
+                           " alias data in lcu (rc = %d), retry later", rc);
+               schedule_delayed_work(&lcu->ruac_data.dwork, 30*HZ);
+       } else {
+               lcu->ruac_data.device = NULL;
+               lcu->flags &= ~UPDATE_PENDING;
+       }
+       spin_unlock_irqrestore(&lcu->lock, flags);
+}
+
+static int _schedule_lcu_update(struct alias_lcu *lcu,
+                               struct dasd_device *device)
+{
+       struct dasd_device *usedev = NULL;
+       struct alias_pav_group *group;
+
+       lcu->flags |= NEED_UAC_UPDATE;
+       if (lcu->ruac_data.device) {
+               /* already scheduled or running */
+               return 0;
+       }
+       if (device && !list_empty(&device->alias_list))
+               usedev = device;
+
+       if (!usedev && !list_empty(&lcu->grouplist)) {
+               group = list_first_entry(&lcu->grouplist,
+                                        struct alias_pav_group, group);
+               if (!list_empty(&group->baselist))
+                       usedev = list_first_entry(&group->baselist,
+                                                 struct dasd_device,
+                                                 alias_list);
+               else if (!list_empty(&group->aliaslist))
+                       usedev = list_first_entry(&group->aliaslist,
+                                                 struct dasd_device,
+                                                 alias_list);
+       }
+       if (!usedev && !list_empty(&lcu->active_devices)) {
+               usedev = list_first_entry(&lcu->active_devices,
+                                         struct dasd_device, alias_list);
+       }
+       /*
+        * if we haven't found a proper device yet, give up for now, the next
+        * device that will be set active will trigger an lcu update
+        */
+       if (!usedev)
+               return -EINVAL;
+       lcu->ruac_data.device = usedev;
+       schedule_delayed_work(&lcu->ruac_data.dwork, 0);
+       return 0;
+}
+
+int dasd_alias_add_device(struct dasd_device *device)
+{
+       struct dasd_eckd_private *private;
+       struct alias_lcu *lcu;
+       unsigned long flags;
+       int rc;
+
+       private = (struct dasd_eckd_private *) device->private;
+       lcu = private->lcu;
+       rc = 0;
+       spin_lock_irqsave(&lcu->lock, flags);
+       if (!(lcu->flags & UPDATE_PENDING)) {
+               rc = _add_device_to_lcu(lcu, device);
+               if (rc)
+                       lcu->flags |= UPDATE_PENDING;
+       }
+       if (lcu->flags & UPDATE_PENDING) {
+               list_move(&device->alias_list, &lcu->active_devices);
+               _schedule_lcu_update(lcu, device);
+       }
+       spin_unlock_irqrestore(&lcu->lock, flags);
+       return rc;
+}
+
+int dasd_alias_remove_device(struct dasd_device *device)
+{
+       struct dasd_eckd_private *private;
+       struct alias_lcu *lcu;
+       unsigned long flags;
+
+       private = (struct dasd_eckd_private *) device->private;
+       lcu = private->lcu;
+       spin_lock_irqsave(&lcu->lock, flags);
+       _remove_device_from_lcu(lcu, device);
+       spin_unlock_irqrestore(&lcu->lock, flags);
+       return 0;
+}
+
+struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device)
+{
+
+       struct dasd_device *alias_device;
+       struct alias_pav_group *group;
+       struct alias_lcu *lcu;
+       struct dasd_eckd_private *private, *alias_priv;
+       unsigned long flags;
+
+       private = (struct dasd_eckd_private *) base_device->private;
+       group = private->pavgroup;
+       lcu = private->lcu;
+       if (!group || !lcu)
+               return NULL;
+       if (lcu->pav == NO_PAV ||
+           lcu->flags & (NEED_UAC_UPDATE | UPDATE_PENDING))
+               return NULL;
+
+       spin_lock_irqsave(&lcu->lock, flags);
+       alias_device = group->next;
+       if (!alias_device) {
+               if (list_empty(&group->aliaslist)) {
+                       spin_unlock_irqrestore(&lcu->lock, flags);
+                       return NULL;
+               } else {
+                       alias_device = list_first_entry(&group->aliaslist,
+                                                       struct dasd_device,
+                                                       alias_list);
+               }
+       }
+       if (list_is_last(&alias_device->alias_list, &group->aliaslist))
+               group->next = list_first_entry(&group->aliaslist,
+                                              struct dasd_device, alias_list);
+       else
+               group->next = list_first_entry(&alias_device->alias_list,
+                                              struct dasd_device, alias_list);
+       spin_unlock_irqrestore(&lcu->lock, flags);
+       alias_priv = (struct dasd_eckd_private *) alias_device->private;
+       if ((alias_priv->count < private->count) && !alias_device->stopped)
+               return alias_device;
+       else
+               return NULL;
+}
+
+/*
+ * Summary unit check handling depends on the way alias devices
+ * are handled so it is done here rather then in dasd_eckd.c
+ */
+static int reset_summary_unit_check(struct alias_lcu *lcu,
+                                   struct dasd_device *device,
+                                   char reason)
+{
+       struct dasd_ccw_req *cqr;
+       int rc = 0;
+
+       cqr = lcu->rsu_cqr;
+       strncpy((char *) &cqr->magic, "ECKD", 4);
+       ASCEBC((char *) &cqr->magic, 4);
+       cqr->cpaddr->cmd_code = DASD_ECKD_CCW_RSCK;
+       cqr->cpaddr->flags = 0 ;
+       cqr->cpaddr->count = 16;
+       cqr->cpaddr->cda = (__u32)(addr_t) cqr->data;
+       ((char *)cqr->data)[0] = reason;
+
+       clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
+       cqr->retries = 255;     /* set retry counter to enable basic ERP */
+       cqr->startdev = device;
+       cqr->memdev = device;
+       cqr->block = NULL;
+       cqr->expires = 5 * HZ;
+       cqr->buildclk = get_clock();
+       cqr->status = DASD_CQR_FILLED;
+
+       rc = dasd_sleep_on_immediatly(cqr);
+       return rc;
+}
+
+static void _restart_all_base_devices_on_lcu(struct alias_lcu *lcu)
+{
+       struct alias_pav_group *pavgroup;
+       struct dasd_device *device;
+       struct dasd_eckd_private *private;
+
+       /* active and inactive list can contain alias as well as base devices */
+       list_for_each_entry(device, &lcu->active_devices, alias_list) {
+               private = (struct dasd_eckd_private *) device->private;
+               if (private->uid.type != UA_BASE_DEVICE)
+                       continue;
+               dasd_schedule_block_bh(device->block);
+               dasd_schedule_device_bh(device);
+       }
+       list_for_each_entry(device, &lcu->inactive_devices, alias_list) {
+               private = (struct dasd_eckd_private *) device->private;
+               if (private->uid.type != UA_BASE_DEVICE)
+                       continue;
+               dasd_schedule_block_bh(device->block);
+               dasd_schedule_device_bh(device);
+       }
+       list_for_each_entry(pavgroup, &lcu->grouplist, group) {
+               list_for_each_entry(device, &pavgroup->baselist, alias_list) {
+                       dasd_schedule_block_bh(device->block);
+                       dasd_schedule_device_bh(device);
+               }
+       }
+}
+
+static void flush_all_alias_devices_on_lcu(struct alias_lcu *lcu)
+{
+       struct alias_pav_group *pavgroup;
+       struct dasd_device *device, *temp;
+       struct dasd_eckd_private *private;
+       int rc;
+       unsigned long flags;
+       LIST_HEAD(active);
+
+       /*
+        * Problem here ist that dasd_flush_device_queue may wait
+        * for termination of a request to complete. We can't keep
+        * the lcu lock during that time, so we must assume that