Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
authorLinus Torvalds <torvalds@g5.osdl.org>
Sat, 23 Sep 2006 23:49:31 +0000 (16:49 -0700)
committerLinus Torvalds <torvalds@g5.osdl.org>
Sat, 23 Sep 2006 23:49:31 +0000 (16:49 -0700)
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (353 commits)
  [IPV6] ADDRCONF: Mobile IPv6 Home Address support.
  [IPV6] ADDRCONF: Allow non-DAD'able addresses.
  [IPV6] NDISC: Fix is_router flag setting.
  [IPV6] ADDRCONF: Convert addrconf_lock to RCU.
  [IPV6] NDISC: Add proxy_ndp sysctl.
  [IPV6] NDISC: Set per-entry is_router flag in Proxy NA.
  [IPV6] NDISC: Avoid updating neighbor cache for proxied address in receiving NA.
  [IPV6]: Don't forward packets to proxied link-local address.
  [IPV6] NDISC: Handle NDP messages to proxied addresses.
  [NETFILTER]: PPTP conntrack: fix another GRE keymap leak
  [NETFILTER]: PPTP conntrack: fix GRE keymap leak
  [NETFILTER]: PPTP conntrack: fix PPTP_IN_CALL message types
  [NETFILTER]: PPTP conntrack: check call ID before changing state
  [NETFILTER]: PPTP conntrack: clean up debugging cruft
  [NETFILTER]: PPTP conntrack: consolidate header parsing
  [NETFILTER]: PPTP conntrack: consolidate header size checks
  [NETFILTER]: PPTP conntrack: simplify expectation handling
  [NETFILTER]: PPTP conntrack: remove unnecessary cid/pcid header pointers
  [NETFILTER]: PPTP conntrack: fix header definitions
  [NETFILTER]: PPTP conntrack: remove more dead code
  ...

182 files changed:
MAINTAINERS
arch/frv/Makefile
arch/frv/boot/Makefile
arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c
arch/i386/kernel/cpu/cpufreq/longhaul.c
arch/i386/kernel/cpu/cpufreq/longhaul.h
arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
arch/powerpc/platforms/powermac/feature.c
arch/powerpc/platforms/powermac/smp.c
arch/sparc/kernel/ebus.c
arch/sparc/kernel/ioport.c
crypto/hmac.c
drivers/char/agp/agp.h
drivers/char/agp/backend.c
drivers/char/agp/efficeon-agp.c
drivers/char/agp/frontend.c
drivers/char/agp/generic.c
drivers/char/agp/intel-agp.c
drivers/char/agp/via-agp.c
drivers/char/briq_panel.c
drivers/char/istallion.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_ondemand.c
drivers/cpufreq/cpufreq_stats.c
drivers/infiniband/Kconfig
drivers/infiniband/Makefile
drivers/infiniband/core/Makefile
drivers/infiniband/core/addr.c
drivers/infiniband/core/cache.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/cma.c
drivers/infiniband/core/device.c
drivers/infiniband/core/iwcm.c [new file with mode: 0644]
drivers/infiniband/core/iwcm.h [new file with mode: 0644]
drivers/infiniband/core/mad.c
drivers/infiniband/core/mad_priv.h
drivers/infiniband/core/mad_rmpp.c
drivers/infiniband/core/sa_query.c
drivers/infiniband/core/smi.c
drivers/infiniband/core/sysfs.c
drivers/infiniband/core/ucm.c
drivers/infiniband/core/user_mad.c
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/amso1100/Kbuild [new file with mode: 0644]
drivers/infiniband/hw/amso1100/Kconfig [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_ae.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_ae.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_alloc.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_cm.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_cq.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_intr.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_mm.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_mq.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_mq.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_pd.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_provider.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_provider.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_qp.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_rnic.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_status.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_user.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_vq.c [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_vq.h [new file with mode: 0644]
drivers/infiniband/hw/amso1100/c2_wr.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/Kconfig [new file with mode: 0644]
drivers/infiniband/hw/ehca/Makefile [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_av.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_classes.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_classes_pSeries.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_cq.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_eq.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_hca.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_irq.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_irq.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_iverbs.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_main.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_mcast.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_mrmw.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_mrmw.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_pd.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_qes.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_qp.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_reqs.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_sqp.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_tools.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ehca_uverbs.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/hcp_if.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/hcp_if.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/hcp_phyp.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/hcp_phyp.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/hipz_fns.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/hipz_fns_core.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/hipz_hw.h [new file with mode: 0644]
drivers/infiniband/hw/ehca/ipz_pt_fn.c [new file with mode: 0644]
drivers/infiniband/hw/ehca/ipz_pt_fn.h [new file with mode: 0644]
drivers/infiniband/hw/ipath/Kconfig
drivers/infiniband/hw/ipath/Makefile
drivers/infiniband/hw/ipath/ipath_common.h
drivers/infiniband/hw/ipath/ipath_cq.c
drivers/infiniband/hw/ipath/ipath_debug.h
drivers/infiniband/hw/ipath/ipath_diag.c
drivers/infiniband/hw/ipath/ipath_driver.c
drivers/infiniband/hw/ipath/ipath_file_ops.c
drivers/infiniband/hw/ipath/ipath_fs.c
drivers/infiniband/hw/ipath/ipath_iba6110.c [moved from drivers/infiniband/hw/ipath/ipath_ht400.c with 97% similarity]
drivers/infiniband/hw/ipath/ipath_iba6120.c [moved from drivers/infiniband/hw/ipath/ipath_pe800.c with 95% similarity]
drivers/infiniband/hw/ipath/ipath_init_chip.c
drivers/infiniband/hw/ipath/ipath_intr.c
drivers/infiniband/hw/ipath/ipath_kernel.h
drivers/infiniband/hw/ipath/ipath_keys.c
drivers/infiniband/hw/ipath/ipath_layer.c
drivers/infiniband/hw/ipath/ipath_layer.h
drivers/infiniband/hw/ipath/ipath_mad.c
drivers/infiniband/hw/ipath/ipath_mmap.c [new file with mode: 0644]
drivers/infiniband/hw/ipath/ipath_mr.c
drivers/infiniband/hw/ipath/ipath_qp.c
drivers/infiniband/hw/ipath/ipath_rc.c
drivers/infiniband/hw/ipath/ipath_registers.h
drivers/infiniband/hw/ipath/ipath_ruc.c
drivers/infiniband/hw/ipath/ipath_srq.c
drivers/infiniband/hw/ipath/ipath_stats.c
drivers/infiniband/hw/ipath/ipath_sysfs.c
drivers/infiniband/hw/ipath/ipath_uc.c
drivers/infiniband/hw/ipath/ipath_ud.c
drivers/infiniband/hw/ipath/ipath_verbs.c
drivers/infiniband/hw/ipath/ipath_verbs.h
drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
drivers/infiniband/hw/ipath/ipath_wc_ppc64.c [new file with mode: 0644]
drivers/infiniband/hw/ipath/verbs_debug.h [deleted file]
drivers/infiniband/hw/mthca/mthca_av.c
drivers/infiniband/hw/mthca/mthca_catas.c
drivers/infiniband/hw/mthca/mthca_cmd.c
drivers/infiniband/hw/mthca/mthca_cq.c
drivers/infiniband/hw/mthca/mthca_dev.h
drivers/infiniband/hw/mthca/mthca_mad.c
drivers/infiniband/hw/mthca/mthca_main.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/mthca/mthca_qp.c
drivers/infiniband/hw/mthca/mthca_srq.c
drivers/infiniband/hw/mthca/mthca_uar.c
drivers/infiniband/ulp/ipoib/ipoib.h
drivers/infiniband/ulp/ipoib/ipoib_ib.c
drivers/infiniband/ulp/ipoib/ipoib_main.c
drivers/infiniband/ulp/ipoib/ipoib_multicast.c
drivers/infiniband/ulp/iser/Kconfig
drivers/infiniband/ulp/iser/iscsi_iser.c
drivers/infiniband/ulp/iser/iscsi_iser.h
drivers/infiniband/ulp/iser/iser_memory.c
drivers/infiniband/ulp/iser/iser_verbs.c
drivers/infiniband/ulp/srp/ib_srp.c
drivers/macintosh/adbhid.c
drivers/net/lp486e.c
drivers/net/mv643xx_eth.c
drivers/usb/input/hid-core.c
drivers/video/console/fbcon.c
drivers/video/riva/fbdev.c
fs/cifs/CHANGES
fs/cifs/cifs_fs_sb.h
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/cifspdu.h
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/xattr.c
include/asm-generic/audit_change_attr.h
include/asm-generic/audit_dir_write.h
include/asm-ppc/ibm4xx.h
include/linux/kernel.h
include/linux/mm.h
include/rdma/ib_addr.h
include/rdma/ib_sa.h
include/rdma/ib_user_verbs.h
include/rdma/ib_verbs.h
include/rdma/iw_cm.h [new file with mode: 0644]
include/rdma/rdma_cm.h
lib/audit.c
mm/mmap.c
sound/aoa/Kconfig

index ed2a83c..b08c537 100644 (file)
@@ -298,6 +298,14 @@ L: info-linux@geode.amd.com
 W:     http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
 S:     Supported
 
+AMSO1100 RNIC DRIVER
+P:     Tom Tucker
+M:     tom@opengridcomputing.com
+P:     Steve Wise
+M:     swise@opengridcomputing.com
+L:     openib-general@openib.org
+S:     Maintained
+
 AOA (Apple Onboard Audio) ALSA DRIVER
 P:     Johannes Berg
 M:     johannes@sipsolutions.net
@@ -991,6 +999,14 @@ EFS FILESYSTEM
 W:     http://aeschi.ch.eu.org/efs/
 S:     Orphan
 
+EHCA (IBM GX bus InfiniBand adapter) DRIVER:
+P:     Hoang-Nam Nguyen
+M:     hnguyen@de.ibm.com
+P:     Christoph Raisch
+M:     raisch@de.ibm.com
+L:     openib-general@openib.org
+S:     Supported
+
 EMU10K1 SOUND DRIVER
 P:     James Courtier-Dutton
 M:     James@superbug.demon.co.uk
index d163747..038e3a8 100644 (file)
@@ -108,11 +108,8 @@ Image: vmlinux
 bootstrap:
        $(Q)$(MAKEBOOT) bootstrap
 
-archmrproper:
-       $(Q)$(MAKE) $(build)=arch/frv/boot mrproper
-
 archclean:
-       $(Q)$(MAKE) $(build)=arch/frv/boot clean
+       $(Q)$(MAKE) $(clean)=arch/frv/boot
 
 archdep: scripts/mkdep symlinks
        $(Q)$(MAKE) $(build)=arch/frv/boot dep
index 5dfc93f..dc6f038 100644 (file)
@@ -8,6 +8,8 @@
 # Copyright (C) 1995-2000 Russell King
 #
 
+targets := Image zImage bootpImage
+
 SYSTEM =$(TOPDIR)/$(LINUX)
 
 ZTEXTADDR       = 0x02080000
@@ -66,7 +68,6 @@ zinstall: $(CONFIGURE) zImage
 # miscellany
 #
 mrproper clean:
-       $(RM) Image zImage bootpImage
 #      @$(MAKE) -C compressed clean
 #      @$(MAKE) -C bootp clean
 
index e6ea00e..ea19d09 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/seq_file.h>
 #include <linux/compiler.h>
 #include <linux/sched.h>       /* current */
+#include <linux/dmi.h>
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/uaccess.h>
@@ -387,6 +388,33 @@ static int acpi_cpufreq_early_init_acpi(void)
        return acpi_processor_preregister_performance(acpi_perf_data);
 }
 
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+       bios_with_sw_any_bug = 1;
+       return 0;
+}
+
+static struct dmi_system_id __initdata sw_any_bug_dmi_table[] = {
+       {
+               .callback = sw_any_bug_found,
+               .ident = "Supermicro Server X6DLP",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+                       DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+               },
+       },
+       { }
+};
+
 static int
 acpi_cpufreq_cpu_init (
        struct cpufreq_policy   *policy)
@@ -422,8 +450,17 @@ acpi_cpufreq_cpu_init (
         * coordination is required.
         */
        if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-           policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+           policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
                policy->cpus = perf->shared_cpu_map;
+       }
+
+#ifdef CONFIG_SMP
+       dmi_check_system(sw_any_bug_dmi_table);
+       if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+               policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+               policy->cpus = cpu_core_map[cpu];
+       }
+#endif
 
        if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) {
                acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS;
index 4f2c3ae..f5cc9f5 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/moduleparam.h>
 #include <linux/init.h>
 #include <linux/cpufreq.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 
 #define        CPU_NEHEMIAH    5
 
 static int cpu_model;
-static unsigned int numscales=16, numvscales;
+static unsigned int numscales=16;
 static unsigned int fsb;
-static int minvid, maxvid;
+
+static struct mV_pos *vrm_mV_table;
+static unsigned char *mV_vrm_table;
+struct f_msr {
+       unsigned char vrm;
+};
+static struct f_msr f_msr_table[32];
+
+static unsigned int highest_speed, lowest_speed; /* kHz */
 static unsigned int minmult, maxmult;
 static int can_scale_voltage;
-static int vrmrev;
 static struct acpi_processor *pr = NULL;
 static struct acpi_processor_cx *cx = NULL;
+static int port22_en;
 
 /* Module parameters */
-static int dont_scale_voltage;
-
+static int scale_voltage;
+static int ignore_latency;
 
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "longhaul", msg)
 
@@ -71,7 +80,6 @@ static int dont_scale_voltage;
 /* Clock ratios multiplied by 10 */
 static int clock_ratio[32];
 static int eblcr_table[32];
-static int voltage_table[32];
 static unsigned int highest_speed, lowest_speed; /* kHz */
 static int longhaul_version;
 static struct cpufreq_frequency_table *longhaul_table;
@@ -124,10 +132,9 @@ static int longhaul_get_cpu_mult(void)
 
 /* For processor with BCR2 MSR */
 
-static void do_longhaul1(int cx_address, unsigned int clock_ratio_index)
+static void do_longhaul1(unsigned int clock_ratio_index)
 {
        union msr_bcr2 bcr2;
-       u32 t;
 
        rdmsrl(MSR_VIA_BCR2, bcr2.val);
        /* Enable software clock multiplier */
@@ -136,13 +143,11 @@ static void do_longhaul1(int cx_address, unsigned int clock_ratio_index)
 
        /* Sync to timer tick */
        safe_halt();
-       ACPI_FLUSH_CPU_CACHE();
        /* Change frequency on next halt or sleep */
        wrmsrl(MSR_VIA_BCR2, bcr2.val);
-       /* Invoke C3 */
-       inb(cx_address);
-       /* Dummy op - must do something useless after P_LVL3 read */
-       t = inl(acpi_fadt.xpm_tmr_blk.address);
+       /* Invoke transition */
+       ACPI_FLUSH_CPU_CACHE();
+       halt();
 
        /* Disable software clock multiplier */
        local_irq_disable();
@@ -164,11 +169,16 @@ static void do_powersaver(int cx_address, unsigned int clock_ratio_index)
        longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4;
        longhaul.bits.EnableSoftBusRatio = 1;
 
+       if (can_scale_voltage) {
+               longhaul.bits.SoftVID = f_msr_table[clock_ratio_index].vrm;
+               longhaul.bits.EnableSoftVID = 1;
+       }
+
        /* Sync to timer tick */
        safe_halt();
-       ACPI_FLUSH_CPU_CACHE();
        /* Change frequency on next halt or sleep */
        wrmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+       ACPI_FLUSH_CPU_CACHE();
        /* Invoke C3 */
        inb(cx_address);
        /* Dummy op - must do something useless after P_LVL3 read */
@@ -227,10 +237,13 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
        outb(0xFF,0xA1);        /* Overkill */
        outb(0xFE,0x21);        /* TMR0 only */
 
-       /* Disable bus master arbitration */
-       if (pr->flags.bm_check) {
+       if (pr->flags.bm_control) {
+               /* Disable bus master arbitration */
                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1,
                                  ACPI_MTX_DO_NOT_LOCK);
+       } else if (port22_en) {
+               /* Disable AGP and PCI arbiters */
+               outb(3, 0x22);
        }
 
        switch (longhaul_version) {
@@ -244,7 +257,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
         */
        case TYPE_LONGHAUL_V1:
        case TYPE_LONGHAUL_V2:
-               do_longhaul1(cx->address, clock_ratio_index);
+               do_longhaul1(clock_ratio_index);
                break;
 
        /*
@@ -259,14 +272,20 @@ static void longhaul_setstate(unsigned int clock_ratio_index)
         * to work in practice.
         */
        case TYPE_POWERSAVER:
+               /* Don't allow wakeup */
+               acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0,
+                                 ACPI_MTX_DO_NOT_LOCK);
                do_powersaver(cx->address, clock_ratio_index);
                break;
        }
 
-       /* Enable bus master arbitration */
-       if (pr->flags.bm_check) {
+       if (pr->flags.bm_control) {
+               /* Enable bus master arbitration */
                acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0,
                                  ACPI_MTX_DO_NOT_LOCK);
+       } else if (port22_en) {
+               /* Enable arbiters */
+               outb(0, 0x22);
        }
 
        outb(pic2_mask,0xA1);   /* restore mask */
@@ -446,53 +465,57 @@ static int __init longhaul_get_ranges(void)
 static void __init longhaul_setup_voltagescaling(void)
 {
        union msr_longhaul longhaul;
+       struct mV_pos minvid, maxvid;
+       unsigned int j, speed, pos, kHz_step, numvscales;
 
-       rdmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-       if (!(longhaul.bits.RevisionID & 1))
+       rdmsrl(MSR_VIA_LONGHAUL, longhaul.val);
+       if (!(longhaul.bits.RevisionID & 1)) {
+               printk(KERN_INFO PFX "Voltage scaling not supported by CPU.\n");
                return;
+       }
+
+       if (!longhaul.bits.VRMRev) {
+               printk (KERN_INFO PFX "VRM 8.5\n");
+               vrm_mV_table = &vrm85_mV[0];
+               mV_vrm_table = &mV_vrm85[0];
+       } else {
+               printk (KERN_INFO PFX "Mobile VRM\n");
+               vrm_mV_table = &mobilevrm_mV[0];
+               mV_vrm_table = &mV_mobilevrm[0];
+       }
 
-       minvid = longhaul.bits.MinimumVID;
-       maxvid = longhaul.bits.MaximumVID;
-       vrmrev = longhaul.bits.VRMRev;
+       minvid = vrm_mV_table[longhaul.bits.MinimumVID];
+       maxvid = vrm_mV_table[longhaul.bits.MaximumVID];
+       numvscales = maxvid.pos - minvid.pos + 1;
+       kHz_step = (highest_speed - lowest_speed) / numvscales;
 
-       if (minvid == 0 || maxvid == 0) {
+       if (minvid.mV == 0 || maxvid.mV == 0 || minvid.mV > maxvid.mV) {
                printk (KERN_INFO PFX "Bogus values Min:%d.%03d Max:%d.%03d. "
                                        "Voltage scaling disabled.\n",
-                                       minvid/1000, minvid%1000, maxvid/1000, maxvid%1000);
+                                       minvid.mV/1000, minvid.mV%1000, maxvid.mV/1000, maxvid.mV%1000);
                return;
        }
 
-       if (minvid == maxvid) {
+       if (minvid.mV == maxvid.mV) {
                printk (KERN_INFO PFX "Claims to support voltage scaling but min & max are "
                                "both %d.%03d. Voltage scaling disabled\n",
-                               maxvid/1000, maxvid%1000);
+                               maxvid.mV/1000, maxvid.mV%1000);
                return;
        }
 
-       if (vrmrev==0) {
-               dprintk ("VRM 8.5\n");
-               memcpy (voltage_table, vrm85scales, sizeof(voltage_table));
-               numvscales = (voltage_table[maxvid]-voltage_table[minvid])/25;
-       } else {
-               dprintk ("Mobile VRM\n");
-               memcpy (voltage_table, mobilevrmscales, sizeof(voltage_table));
-               numvscales = (voltage_table[maxvid]-voltage_table[minvid])/5;
+       printk(KERN_INFO PFX "Max VID=%d.%03d  Min VID=%d.%03d, %d possible voltage scales\n",
+               maxvid.mV/1000, maxvid.mV%1000,
+               minvid.mV/1000, minvid.mV%1000,
+               numvscales);
+       
+       j = 0;
+       while (longhaul_table[j].frequency != CPUFREQ_TABLE_END) {
+               speed = longhaul_table[j].frequency;
+               pos = (speed - lowest_speed) / kHz_step + minvid.pos;
+               f_msr_table[longhaul_table[j].index].vrm = mV_vrm_table[pos];
+               j++;
        }
 
-       /* Current voltage isn't readable at first, so we need to
-          set it to a known value. The spec says to use maxvid */
-       longhaul.bits.RevisionKey = longhaul.bits.RevisionID;   /* FIXME: This is bad. */
-       longhaul.bits.EnableSoftVID = 1;
-       longhaul.bits.SoftVID = maxvid;
-       wrmsrl (MSR_VIA_LONGHAUL, longhaul.val);
-
-       minvid = voltage_table[minvid];
-       maxvid = voltage_table[maxvid];
-
-       dprintk ("Min VID=%d.%03d Max VID=%d.%03d, %d possible voltage scales\n",
-               maxvid/1000, maxvid%1000, minvid/1000, minvid%1000, numvscales);
-
        can_scale_voltage = 1;
 }
 
@@ -540,21 +563,33 @@ static acpi_status longhaul_walk_callback(acpi_handle obj_handle,
        return 1;
 }
 
+/* VIA don't support PM2 reg, but have something similar */
+static int enable_arbiter_disable(void)
+{
+       struct pci_dev *dev;
+       u8 pci_cmd;
+
+       /* Find PLE133 host bridge */
+       dev = pci_find_device(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8601_0, NULL);
+       if (dev != NULL) {
+               /* Enable access to port 0x22 */
+               pci_read_config_byte(dev, 0x78, &pci_cmd);
+               if ( !(pci_cmd & 1<<7) ) {
+                       pci_cmd |= 1<<7;
+                       pci_write_config_byte(dev, 0x78, pci_cmd);
+               }
+               return 1;
+       }
+       return 0;
+}
+
 static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
 {
        struct cpuinfo_x86 *c = cpu_data;
        char *cpuname=NULL;
        int ret;
 
-       /* Check ACPI support for C3 state */
-       acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
-                        &longhaul_walk_callback, NULL, (void *)&pr);
-       if (pr == NULL) goto err_acpi;
-
-       cx = &pr->power.states[ACPI_STATE_C3];
-       if (cx->address == 0 || cx->latency > 1000) goto err_acpi;
-
-       /* Now check what we have on this motherboard */
+       /* Check what we have on this motherboard */
        switch (c->x86_model) {
        case 6:
                cpu_model = CPU_SAMUEL;
@@ -636,12 +671,36 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy)
                break;
        };
 
+       /* Find ACPI data for processor */
+       acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX,
+                           &longhaul_walk_callback, NULL, (void *)&pr);
+       if (pr == NULL)
+               goto err_acpi;
+
+       if (longhaul_version == TYPE_POWERSAVER) {
+               /* Check ACPI support for C3 state */
+               cx = &pr->power.states[ACPI_STATE_C3];
+               if (cx->address == 0 ||
+                  (cx->latency > 1000 && ignore_latency == 0) )
+                       goto err_acpi;
+
+       } else {
+               /* Check ACPI support for bus master arbiter disable */
+               if (!pr->flags.bm_control) {
+                       if (!enable_arbiter_disable()) {
+                               printk(KERN_ERR PFX "No ACPI support. No VT8601 host bridge. Aborting.\n");
+                               return -ENODEV;
+                       } else
+                               port22_en = 1;
+               }
+       }
+
        ret = longhaul_get_ranges();
        if (ret != 0)
                return ret;
 
        if ((longhaul_version==TYPE_LONGHAUL_V2 || longhaul_version==TYPE_POWERSAVER) &&
-                (dont_scale_voltage==0))
+                (scale_voltage != 0))
                longhaul_setup_voltagescaling();
 
        policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
@@ -729,8 +788,10 @@ static void __exit longhaul_exit(void)
        kfree(longhaul_table);
 }
 
-module_param (dont_scale_voltage, int, 0644);
-MODULE_PARM_DESC(dont_scale_voltage, "Don't scale voltage of processor");
+module_param (scale_voltage, int, 0644);
+MODULE_PARM_DESC(scale_voltage, "Scale voltage of processor");
+module_param(ignore_latency, int, 0644);
+MODULE_PARM_DESC(ignore_latency, "Skip ACPI C3 latency test");
 
 MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>");
 MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors.");
@@ -738,4 +799,3 @@ MODULE_LICENSE ("GPL");
 
 late_initcall(longhaul_init);
 module_exit(longhaul_exit);
-
index d3a95d7..bc4682a 100644 (file)
@@ -450,17 +450,45 @@ static int __initdata nehemiah_c_eblcr[32] = {
  * Voltage scales. Div/Mod by 1000 to get actual voltage.
  * Which scale to use depends on the VRM type in use.
  */
-static int __initdata vrm85scales[32] = {
-       1250, 1200, 1150, 1100, 1050, 1800, 1750, 1700,
-       1650, 1600, 1550, 1500, 1450, 1400, 1350, 1300,
-       1275, 1225, 1175, 1125, 1075, 1825, 1775, 1725,
-       1675, 1625, 1575, 1525, 1475, 1425, 1375, 1325,
+
+struct mV_pos {
+       unsigned short mV;
+       unsigned short pos;
+};
+
+static struct mV_pos __initdata vrm85_mV[32] = {
+       {1250, 8},      {1200, 6},      {1150, 4},      {1100, 2},
+       {1050, 0},      {1800, 30},     {1750, 28},     {1700, 26},
+       {1650, 24},     {1600, 22},     {1550, 20},     {1500, 18},
+       {1450, 16},     {1400, 14},     {1350, 12},     {1300, 10},
+       {1275, 9},      {1225, 7},      {1175, 5},      {1125, 3},
+       {1075, 1},      {1825, 31},     {1775, 29},     {1725, 27},
+       {1675, 25},     {1625, 23},     {1575, 21},     {1525, 19},
+       {1475, 17},     {1425, 15},     {1375, 13},     {1325, 11}
+};
+
+static unsigned char __initdata mV_vrm85[32] = {
+       0x04,   0x14,   0x03,   0x13,   0x02,   0x12,   0x01,   0x11,
+       0x00,   0x10,   0x0f,   0x1f,   0x0e,   0x1e,   0x0d,   0x1d,
+       0x0c,   0x1c,   0x0b,   0x1b,   0x0a,   0x1a,   0x09,   0x19,
+       0x08,   0x18,   0x07,   0x17,   0x06,   0x16,   0x05,   0x15
+};
+
+static struct mV_pos __initdata mobilevrm_mV[32] = {
+       {1750, 31},     {1700, 30},     {1650, 29},     {1600, 28},
+       {1550, 27},     {1500, 26},     {1450, 25},     {1400, 24},
+       {1350, 23},     {1300, 22},     {1250, 21},     {1200, 20},
+       {1150, 19},     {1100, 18},     {1050, 17},     {1000, 16},
+       {975, 15},      {950, 14},      {925, 13},      {900, 12},
+       {875, 11},      {850, 10},      {825, 9},       {800, 8},
+       {775, 7},       {750, 6},       {725, 5},       {700, 4},
+       {675, 3},       {650, 2},       {625, 1},       {600, 0}
 };
 
-static int __initdata mobilevrmscales[32] = {
-       2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
-       1600, 1550, 1500, 1450, 1500, 1350, 1300, -1,
-       1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
-       1075, 1050, 1025, 1000, 975, 950, 925, -1,
+static unsigned char __initdata mV_mobilevrm[32] = {
+       0x1f,   0x1e,   0x1d,   0x1c,   0x1b,   0x1a,   0x19,   0x18,
+       0x17,   0x16,   0x15,   0x14,   0x13,   0x12,   0x11,   0x10,
+       0x0f,   0x0e,   0x0d,   0x0c,   0x0b,   0x0a,   0x09,   0x08,
+       0x07,   0x06,   0x05,   0x04,   0x03,   0x02,   0x01,   0x00
 };
 
index b77f135..7a93253 100644 (file)
@@ -23,6 +23,7 @@
 
 #ifdef CONFIG_X86_SPEEDSTEP_CENTRINO_ACPI
 #include <linux/acpi.h>
+#include <linux/dmi.h>
 #include <acpi/processor.h>
 #endif
 
@@ -377,6 +378,35 @@ static int centrino_cpu_early_init_acpi(void)
        return 0;
 }
 
+
+/*
+ * Some BIOSes do SW_ANY coordination internally, either set it up in hw
+ * or do it in BIOS firmware and won't inform about it to OS. If not
+ * detected, this has a side effect of making CPU run at a different speed
+ * than OS intended it to run at. Detect it and handle it cleanly.
+ */
+static int bios_with_sw_any_bug;
+static int __init sw_any_bug_found(struct dmi_system_id *d)
+{
+       bios_with_sw_any_bug = 1;
+       return 0;
+}
+
+
+static struct dmi_system_id sw_any_bug_dmi_table[] = {
+       {
+               .callback = sw_any_bug_found,
+               .ident = "Supermicro Server X6DLP",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Supermicro"),
+                       DMI_MATCH(DMI_BIOS_VERSION, "080010"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "X6DLP"),
+               },
+       },
+       { }
+};
+
+
 /*
  * centrino_cpu_init_acpi - register with ACPI P-States library
  *
@@ -398,14 +428,24 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy)
                dprintk(PFX "obtaining ACPI data failed\n");
                return -EIO;
        }
+
        policy->shared_type = p->shared_type;
        /*
         * Will let policy->cpus know about dependency only when software 
         * coordination is required.
         */
        if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
-           policy->shared_type == CPUFREQ_SHARED_TYPE_ANY)
+           policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
                policy->cpus = p->shared_cpu_map;
+       }
+
+#ifdef CONFIG_SMP
+       dmi_check_system(sw_any_bug_dmi_table);
+       if (bios_with_sw_any_bug && cpus_weight(policy->cpus) == 1) {
+               policy->shared_type = CPUFREQ_SHARED_TYPE_ALL;
+               policy->cpus = cpu_core_map[cpu];
+       }
+#endif
 
        /* verify the acpi_data */
        if (p->state_count <= 1) {
index 13fcaf5..e49621b 100644 (file)
@@ -1058,8 +1058,8 @@ core99_reset_cpu(struct device_node *node, long param, long value)
        if (np == NULL)
                return -ENODEV;
        for (np = np->child; np != NULL; np = np->sibling) {
-               u32 *num = get_property(np, "reg", NULL);
-               u32 *rst = get_property(np, "soft-reset", NULL);
+               const u32 *num = get_property(np, "reg", NULL);
+               const u32 *rst = get_property(np, "soft-reset", NULL);
                if (num == NULL || rst == NULL)
                        continue;
                if (param == *num) {
index 653eeb6..1949b65 100644 (file)
@@ -702,7 +702,7 @@ static void __init smp_core99_setup(int ncpus)
        /* GPIO based HW sync on ppc32 Core99 */
        if (pmac_tb_freeze == NULL && !machine_is_compatible("MacRISC4")) {
                struct device_node *cpu;
-               u32 *tbprop = NULL;
+               const u32 *tbprop = NULL;
 
                core99_tb_gpio = KL_GPIO_TB_ENABLE;     /* default value */
                cpu = of_find_node_by_type(NULL, "cpu");
index 81c0cbd..75ac24d 100644 (file)
@@ -277,7 +277,7 @@ void __init ebus_init(void)
        struct pci_dev *pdev;
        struct pcidev_cookie *cookie;
        struct device_node *dp;
-       unsigned long addr, *base;
+       struct resource *p;
        unsigned short pci_command;
        int len, reg, nreg;
        int num_ebus = 0;
@@ -321,13 +321,12 @@ void __init ebus_init(void)
                }
                nreg = len / sizeof(struct linux_prom_pci_registers);
 
-               base = &ebus->self->resource[0].start;
+               p = &ebus->self->resource[0];
                for (reg = 0; reg < nreg; reg++) {
                        if (!(regs[reg].which_io & 0x03000000))
                                continue;
 
-                       addr = regs[reg].phys_lo;
-                       *base++ = addr;
+                       (p++)->start = regs[reg].phys_lo;
                }
 
                ebus->ofdev.node = dp;
index 8654b44..d33f8a0 100644 (file)
@@ -508,6 +508,7 @@ void __init sbus_arch_bus_ranges_init(struct device_node *pn, struct sbus_bus *s
 
 void __init sbus_setup_iommu(struct sbus_bus *sbus, struct device_node *dp)
 {
+#ifndef CONFIG_SUN4
        struct device_node *parent = dp->parent;
 
        if (sparc_cpu_model != sun4d &&
@@ -524,6 +525,7 @@ void __init sbus_setup_iommu(struct sbus_bus *sbus, struct device_node *dp)
 
                iounit_init(dp->node, parent->node, sbus);
        }
+#endif
 }
 
 void __init sbus_setup_arch_props(struct sbus_bus *sbus, struct device_node *dp)
index f403b69..b521bcd 100644 (file)
@@ -92,13 +92,17 @@ static int hmac_init(struct hash_desc *pdesc)
        struct hmac_ctx *ctx = align_ptr(ipad + bs * 2 + ds, sizeof(void *));
        struct hash_desc desc;
        struct scatterlist tmp;
+       int err;
 
        desc.tfm = ctx->child;
        desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
        sg_set_buf(&tmp, ipad, bs);
 
-       return unlikely(crypto_hash_init(&desc)) ?:
-              crypto_hash_update(&desc, &tmp, 1);
+       err = crypto_hash_init(&desc);
+       if (unlikely(err))
+               return err;
+
+       return crypto_hash_update(&desc, &tmp, bs);
 }
 
 static int hmac_update(struct hash_desc *pdesc,
@@ -123,13 +127,17 @@ static int hmac_final(struct hash_desc *pdesc, u8 *out)
        struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *));
        struct hash_desc desc;
        struct scatterlist tmp;
+       int err;
 
        desc.tfm = ctx->child;
        desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
        sg_set_buf(&tmp, opad, bs + ds);
 
-       return unlikely(crypto_hash_final(&desc, digest)) ?:
-              crypto_hash_digest(&desc, &tmp, bs + ds, out);
+       err = crypto_hash_final(&desc, digest);
+       if (unlikely(err))
+               return err;
+
+       return crypto_hash_digest(&desc, &tmp, bs + ds, out);
 }
 
 static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
@@ -145,6 +153,7 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
        struct hash_desc desc;
        struct scatterlist sg1[2];
        struct scatterlist sg2[1];
+       int err;
 
        desc.tfm = ctx->child;
        desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
@@ -154,8 +163,11 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg,
        sg1[1].length = 0;
        sg_set_buf(sg2, opad, bs + ds);
 
-       return unlikely(crypto_hash_digest(&desc, sg1, nbytes + bs, digest)) ?:
-              crypto_hash_digest(&desc, sg2, bs + ds, out);
+       err = crypto_hash_digest(&desc, sg1, nbytes + bs, digest);
+       if (unlikely(err))
+               return err;
+
+       return crypto_hash_digest(&desc, sg2, bs + ds, out);
 }
 
 static int hmac_init_tfm(struct crypto_tfm *tfm)
index 3c623b6..8b3317f 100644 (file)
@@ -117,7 +117,7 @@ struct agp_bridge_driver {
 };
 
 struct agp_bridge_data {
-       struct agp_version *version;
+       const struct agp_version *version;
        struct agp_bridge_driver *driver;
        struct vm_operations_struct *vm_ops;
        void *previous_size;
index 509adc4..d59e037 100644 (file)
@@ -44,7 +44,7 @@
  * past 0.99 at all due to some boolean logic error. */
 #define AGPGART_VERSION_MAJOR 0
 #define AGPGART_VERSION_MINOR 101
-static struct agp_version agp_current_version =
+static const struct agp_version agp_current_version =
 {
        .major = AGPGART_VERSION_MAJOR,
        .minor = AGPGART_VERSION_MINOR,
index b788b0a..30f730f 100644 (file)
@@ -337,13 +337,6 @@ static struct agp_bridge_driver efficeon_driver = {
        .agp_destroy_page       = agp_generic_destroy_page,
 };
 
-
-static int agp_efficeon_resume(struct pci_dev *pdev)
-{
-       printk(KERN_DEBUG PFX "agp_efficeon_resume()\n");
-       return efficeon_configure();
-}
-
 static int __devinit agp_efficeon_probe(struct pci_dev *pdev,
                                     const struct pci_device_id *ent)
 {
@@ -414,11 +407,18 @@ static void __devexit agp_efficeon_remove(struct pci_dev *pdev)
        agp_put_bridge(bridge);
 }
 
+#ifdef CONFIG_PM
 static int agp_efficeon_suspend(struct pci_dev *dev, pm_message_t state)
 {
        return 0;
 }
 
+static int agp_efficeon_resume(struct pci_dev *pdev)
+{
+       printk(KERN_DEBUG PFX "agp_efficeon_resume()\n");
+       return efficeon_configure();
+}
+#endif
 
 static struct pci_device_id agp_efficeon_pci_table[] = {
        {
@@ -439,8 +439,10 @@ static struct pci_driver agp_efficeon_pci_driver = {
        .id_table       = agp_efficeon_pci_table,
        .probe          = agp_efficeon_probe,
        .remove         = agp_efficeon_remove,
+#ifdef CONFIG_PM
        .suspend        = agp_efficeon_suspend,
        .resume         = agp_efficeon_resume,
+#endif
 };
 
 static int __init agp_efficeon_init(void)
index d9c5a91..0f2ed2a 100644 (file)
@@ -151,35 +151,12 @@ static void agp_add_seg_to_client(struct agp_client *client,
        client->segments = seg;
 }
 
-/* Originally taken from linux/mm/mmap.c from the array
- * protection_map.
- * The original really should be exported to modules, or
- * some routine which does the conversion for you
- */
-
-static const pgprot_t my_protect_map[16] =
-{
-       __P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
-       __S000, __S001, __S010, __S011, __S100, __S101, __S110, __S111
-};
-
 static pgprot_t agp_convert_mmap_flags(int prot)
 {
-#define _trans(x,bit1,bit2) \
-((bit1==bit2)?(x&bit1):(x&bit1)?bit2:0)
-
        unsigned long prot_bits;
-       pgprot_t temp;
-
-       prot_bits = _trans(prot, PROT_READ, VM_READ) |
-           _trans(prot, PROT_WRITE, VM_WRITE) |
-           _trans(prot, PROT_EXEC, VM_EXEC);
-
-       prot_bits |= VM_SHARED;
 
-       temp = my_protect_map[prot_bits & 0x0000000f];
-
-       return temp;
+       prot_bits = calc_vm_prot_bits(prot) | VM_SHARED;
+       return vm_get_page_prot(prot_bits);
 }
 
 static int agp_create_segment(struct agp_client *client, struct agp_region *region)
index cc5ea34..0dcdb36 100644 (file)
@@ -568,25 +568,34 @@ static void agp_v3_parse_one(u32 *requested_mode, u32 *bridge_agpstat, u32 *vga_
                *bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
                goto done;
 
+       } else if (*requested_mode & AGPSTAT3_4X) {
+               *bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+               *bridge_agpstat |= AGPSTAT3_4X;
+               goto done;
+
        } else {
 
                /*
-                * If we didn't specify AGPx8, we can only do x4.
-                * If the hardware can't do x4, we're up shit creek, and never
-                *  should have got this far.
+                * If we didn't specify an AGP mode, we see if both
+                * the graphics card, and the bridge can do x8, and use if so.
+                * If not, we fall back to x4 mode.
                 */
-               *bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
-               if ((*bridge_agpstat & AGPSTAT3_4X) && (*vga_agpstat & AGPSTAT3_4X))
-                       *bridge_agpstat |= AGPSTAT3_4X;
-               else {
-                       printk(KERN_INFO PFX "Badness. Don't know which AGP mode to set. "
-                                                       "[bridge_agpstat:%x vga_agpstat:%x fell back to:- bridge_agpstat:%x vga_agpstat:%x]\n",
-                                                       origbridge, origvga, *bridge_agpstat, *vga_agpstat);
-                       if (!(*bridge_agpstat & AGPSTAT3_4X))
-                               printk(KERN_INFO PFX "Bridge couldn't do AGP x4.\n");
-                       if (!(*vga_agpstat & AGPSTAT3_4X))
-                               printk(KERN_INFO PFX "Graphic card couldn't do AGP x4.\n");
-                       return;
+               if ((*bridge_agpstat & AGPSTAT3_8X) && (*vga_agpstat & AGPSTAT3_8X)) {
+                       printk(KERN_INFO PFX "No AGP mode specified. Setting to highest mode supported by bridge & card (x8).\n");
+                       *bridge_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
+                       *vga_agpstat &= ~(AGPSTAT3_4X | AGPSTAT3_RSVD);
+               } else {
+                       printk(KERN_INFO PFX "Fell back to AGPx4 mode because");
+                       if (!(*bridge_agpstat & AGPSTAT3_8X)) {
+                               printk("bridge couldn't do x8. bridge_agpstat:%x (orig=%x)\n", *bridge_agpstat, origbridge);
+                               *bridge_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+                               *bridge_agpstat |= AGPSTAT3_4X;
+                       }
+                       if (!(*vga_agpstat & AGPSTAT3_8X)) {
+                               printk("graphics card couldn't do x8. vga_agpstat:%x (orig=%x)\n", *vga_agpstat, origvga);
+                               *vga_agpstat &= ~(AGPSTAT3_8X | AGPSTAT3_RSVD);
+                               *vga_agpstat |= AGPSTAT3_4X;
+                       }
                }
        }
 
index 61ac380..d1ede7d 100644 (file)
@@ -2,14 +2,6 @@
  * Intel AGPGART routines.
  */
 
-/*
- * Intel(R) 855GM/852GM and 865G support added by David Dawes
- * <dawes@tungstengraphics.com>.
- *
- * Intel(R) 915G/915GM support added by Alan Hourihane
- * <alanh@tungstengraphics.com>.
- */
-
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/init.h>
@@ -17,6 +9,21 @@
 #include <linux/agp_backend.h>
 #include "agp.h"
 
+#define PCI_DEVICE_ID_INTEL_82946GZ_HB      0x2970
+#define PCI_DEVICE_ID_INTEL_82946GZ_IG      0x2972
+#define PCI_DEVICE_ID_INTEL_82965G_1_HB     0x2980
+#define PCI_DEVICE_ID_INTEL_82965G_1_IG     0x2982
+#define PCI_DEVICE_ID_INTEL_82965Q_HB       0x2990
+#define PCI_DEVICE_ID_INTEL_82965Q_IG       0x2992
+#define PCI_DEVICE_ID_INTEL_82965G_HB       0x29A0
+#define PCI_DEVICE_ID_INTEL_82965G_IG       0x29A2
+
+#define IS_I965 (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82946GZ_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_1_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965Q_HB || \
+                 agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82965G_HB)
+
+
 /* Intel 815 register */
 #define INTEL_815_APCONT       0x51
 #define INTEL_815_ATTBASE_MASK ~0x1FFFFFFF
@@ -40,6 +47,8 @@
 #define I915_GMCH_GMS_STOLEN_48M       (0x6 << 4)
 #define I915_GMCH_GMS_STOLEN_64M       (0x7 << 4)
 
+/* Intel 965G registers */
+#define I965_MSAC 0x62
 
 /* Intel 7505 registers */
 #define INTEL_I7505_APSIZE     0x74
@@ -354,6 +363,7 @@ static struct aper_size_info_fixed intel_i830_sizes[] =
        /* The 64M mode still requires a 128k gatt */
        {64, 16384, 5},
        {256, 65536, 6},
+       {512, 131072, 7},
 };
 
 static struct _intel_i830_private {
@@ -377,7 +387,11 @@ static void intel_i830_init_gtt_entries(void)
        /* We obtain the size of the GTT, which is also stored (for some
         * reason) at the top of stolen memory. Then we add 4KB to that
         * for the video BIOS popup, which is also stored in there. */
-       size = agp_bridge->driver->fetch_size() + 4;
+
+       if (IS_I965)
+               size = 512 + 4;
+       else
+               size = agp_bridge->driver->fetch_size() + 4;
 
        if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82830_HB ||
            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82845G_HB) {
@@ -423,7 +437,7 @@ static void intel_i830_init_gtt_entries(void)
                        if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
                            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
                            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-                           agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
+                           agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965 )
                                gtt_entries = MB(48) - KB(size);
                        else
                                gtt_entries = 0;
@@ -433,7 +447,7 @@ static void intel_i830_init_gtt_entries(void)
                        if (agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915G_HB ||
                            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82915GM_HB ||
                            agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945G_HB ||
-                           agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB)
+                           agp_bridge->dev->device == PCI_DEVICE_ID_INTEL_82945GM_HB || IS_I965)
                                gtt_entries = MB(64) - KB(size);
                        else
                                gtt_entries = 0;
@@ -791,6 +805,77 @@ static int intel_i915_create_gatt_table(struct agp_bridge_data *bridge)
 
        return 0;
 }
+static int intel_i965_fetch_size(void)
+{
+       struct aper_size_info_fixed *values;
+       u32 offset = 0;
+       u8 temp;
+
+#define I965_512MB_ADDRESS_MASK (3<<1)
+
+       values = A_SIZE_FIX(agp_bridge->driver->aperture_sizes);
+
+       pci_read_config_byte(intel_i830_private.i830_dev, I965_MSAC, &temp);
+       temp &= I965_512MB_ADDRESS_MASK;
+       switch (temp) {
+       case 0x00:
+               offset = 0; /* 128MB */
+               break;
+       case 0x06:
+               offset = 3; /* 512MB */
+               break;
+       default:
+       case 0x02:
+               offset = 2; /* 256MB */
+               break;
+       }
+
+       agp_bridge->previous_size = agp_bridge->current_size = (void *)(values + offset);
+
+       return values[offset].size;
+}
+
+/* The intel i965 automatically initializes the agp aperture during POST.
++ * Use the memory already set aside for in the GTT.
++ */
+static int intel_i965_create_gatt_table(struct agp_bridge_data *bridge)
+{
+       int page_order;
+       struct aper_size_info_fixed *size;
+       int num_entries;
+       u32 temp;
+
+       size = agp_bridge->current_size;
+       page_order = size->page_order;
+       num_entries = size->num_entries;
+       agp_bridge->gatt_table_real = NULL;
+
+       pci_read_config_dword(intel_i830_private.i830_dev, I915_MMADDR, &temp);
+
+       temp &= 0xfff00000;
+       intel_i830_private.gtt = ioremap((temp + (512 * 1024)) , 512 * 1024);
+
+       if (!intel_i830_private.gtt)
+               return -ENOMEM;
+
+
+       intel_i830_private.registers = ioremap(temp,128 * 4096);
+       if (!intel_i830_private.registers)
+               return -ENOMEM;
+
+       temp = readl(intel_i830_private.registers+I810_PGETBL_CTL) & 0xfffff000;
+       global_cache_flush();   /* FIXME: ? */
+
+       /* we have to call this as early as possible after the MMIO base address is known */
+       intel_i830_init_gtt_entries();
+
+       agp_bridge->gatt_table = NULL;
+
+       agp_bridge->gatt_bus_addr = temp;
+
+       return 0;
+}
+
 
 static int intel_fetch_size(void)
 {
@@ -1307,7 +1392,7 @@ static struct agp_bridge_driver intel_830_driver = {
        .owner                  = THIS_MODULE,
        .aperture_sizes         = intel_i830_sizes,
        .size_type              = FIXED_APER_SIZE,
-       .num_aperture_sizes     = 3,
+       .num_aperture_sizes     = 4,
        .needs_scratch_page     = TRUE,
        .configure              = intel_i830_configure,
        .fetch_size             = intel_i830_fetch_size,
@@ -1469,7 +1554,7 @@ static struct agp_bridge_driver intel_915_driver = {
        .owner                  = THIS_MODULE,
        .aperture_sizes         = intel_i830_sizes,
        .size_type              = FIXED_APER_SIZE,
-       .num_aperture_sizes     = 3,
+       .num_aperture_sizes     = 4,
        .needs_scratch_page     = TRUE,
        .configure              = intel_i915_configure,
        .fetch_size             = intel_i915_fetch_size,
@@ -1489,6 +1574,29 @@ static struct agp_bridge_driver intel_915_driver = {
        .agp_destroy_page       = agp_generic_destroy_page,
 };
 
+static struct agp_bridge_driver intel_i965_driver = {
+       .owner                  = THIS_MODULE,
+       .aperture_sizes         = intel_i830_sizes,
+       .size_type              = FIXED_APER_SIZE,
+       .num_aperture_sizes     = 4,
+       .needs_scratch_page     = TRUE,
+       .configure              = intel_i915_configure,
+       .fetch_size             = intel_i965_fetch_size,
+       .cleanup                = intel_i915_cleanup,
+       .tlb_flush              = intel_i810_tlbflush,
+       .mask_memory            = intel_i810_mask_memory,
+       .masks                  = intel_i810_masks,
+       .agp_enable             = intel_i810_agp_enable,
+       .cache_flush            = global_cache_flush,
+       .create_gatt_table      = intel_i965_create_gatt_table,
+       .free_gatt_table        = intel_i830_free_gatt_table,
+       .insert_memory          = intel_i915_insert_entries,
+       .remove_memory          = intel_i915_remove_entries,
+       .alloc_by_type          = intel_i830_alloc_by_type,
+       .free_by_type           = intel_i810_free_by_type,
+       .agp_alloc_page         = agp_generic_alloc_page,
+       .agp_destroy_page       = agp_generic_destroy_page,
+};
 
 static struct agp_bridge_driver intel_7505_driver = {
        .owner                  = THIS_MODULE,
@@ -1684,6 +1792,35 @@ static int __devinit agp_intel_probe(struct pci_dev *pdev,
                        bridge->driver = &intel_845_driver;
                name = "945GM";
                break;
+       case PCI_DEVICE_ID_INTEL_82946GZ_HB:
+               if (find_i830(PCI_DEVICE_ID_INTEL_82946GZ_IG))
+                       bridge->driver = &intel_i965_driver;
+               else
+                       bridge->driver = &intel_845_driver;
+               name = "946GZ";
+               break;
+       case PCI_DEVICE_ID_INTEL_82965G_1_HB:
+               if (find_i830(PCI_DEVICE_ID_INTEL_82965G_1_IG))
+                       bridge->driver = &intel_i965_driver;
+               else
+                       bridge->driver = &intel_845_driver;
+               name = "965G";
+               break;
+       case PCI_DEVICE_ID_INTEL_82965Q_HB:
+               if (find_i830(PCI_DEVICE_ID_INTEL_82965Q_IG))
+                       bridge->driver = &intel_i965_driver;
+               else
+                       bridge->driver = &intel_845_driver;
+               name = "965Q";
+               break;
+       case PCI_DEVICE_ID_INTEL_82965G_HB:
+               if (find_i830(PCI_DEVICE_ID_INTEL_82965G_IG))
+                       bridge->driver = &intel_i965_driver;
+               else
+                       bridge->driver = &intel_845_driver;
+               name = "965G";
+               break;
+
        case PCI_DEVICE_ID_INTEL_7505_0:
                bridge->driver = &intel_7505_driver;
                name = "E7505";
@@ -1766,6 +1903,7 @@ static void __devexit agp_intel_remove(struct pci_dev *pdev)
        agp_put_bridge(bridge);
 }
 
+#ifdef CONFIG_PM
 static int agp_intel_resume(struct pci_dev *pdev)
 {
        struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
@@ -1786,9 +1924,12 @@ static int agp_intel_resume(struct pci_dev *pdev)
                intel_i830_configure();
        else if (bridge->driver == &intel_810_driver)
                intel_i810_configure();
+       else if (bridge->driver == &intel_i965_driver)
+               intel_i915_configure();
 
        return 0;
 }
+#endif
 
 static struct pci_device_id agp_intel_pci_table[] = {
 #define ID(x)                                          \
@@ -1825,6 +1966,10 @@ static struct pci_device_id agp_intel_pci_table[] = {
        ID(PCI_DEVICE_ID_INTEL_82915GM_HB),
        ID(PCI_DEVICE_ID_INTEL_82945G_HB),
        ID(PCI_DEVICE_ID_INTEL_82945GM_HB),
+       ID(PCI_DEVICE_ID_INTEL_82946GZ_HB),
+       ID(PCI_DEVICE_ID_INTEL_82965G_1_HB),
+       ID(PCI_DEVICE_ID_INTEL_82965Q_HB),
+       ID(PCI_DEVICE_ID_INTEL_82965G_HB),
        { }
 };
 
@@ -1835,7 +1980,9 @@ static struct pci_driver agp_intel_pci_driver = {
        .id_table       = agp_intel_pci_table,
        .probe          = agp_intel_probe,
        .remove         = __devexit_p(agp_intel_remove),
+#ifdef CONFIG_PM
        .resume         = agp_intel_resume,
+#endif
 };
 
 static int __init agp_intel_init(void)
index b8ec25d..c149ac9 100644 (file)
@@ -9,7 +9,7 @@
 #include <linux/agp_backend.h>
 #include "agp.h"
 
-static struct pci_device_id agp_via_pci_table[];
+static const struct pci_device_id agp_via_pci_table[];
 
 #define VIA_GARTCTRL   0x80
 #define VIA_APSIZE     0x84
@@ -485,7 +485,7 @@ static int agp_via_resume(struct pci_dev *pdev)
 #endif /* CONFIG_PM */
 
 /* must be the same order as name table above */
-static struct pci_device_id agp_via_pci_table[] = {
+static const struct pci_device_id agp_via_pci_table[] = {
 #define ID(x) \
        {                                               \
        .class          = (PCI_CLASS_BRIDGE_HOST << 8), \
index a0e5eac..b8c2225 100644 (file)
@@ -87,7 +87,7 @@ static int briq_panel_release(struct inode *ino, struct file *filep)
        return 0;
 }
 
-static ssize_t briq_panel_read(struct file *file, char *buf, size_t count,
+static ssize_t briq_panel_read(struct file *file, char __user *buf, size_t count,
                         loff_t *ppos)
 {
        unsigned short c;
@@ -135,7 +135,7 @@ static void scroll_vfd( void )
        vfd_cursor = 20;
 }
 
-static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
+static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_t len,
                          loff_t *ppos)
 {
        size_t indx = len;
@@ -150,19 +150,22 @@ static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
                return -EBUSY;
 
        for (;;) {
+               char c;
                if (!indx)
                        break;
+               if (get_user(c, buf))
+                       return -EFAULT;
                if (esc) {
-                       set_led(*buf);
+                       set_led(c);
                        esc = 0;
-               } else if (*buf == 27) {
+               } else if (c == 27) {
                        esc = 1;
-               } else if (*buf == 12) {
+               } else if (c == 12) {
                        /* do a form feed */
                        for (i=0; i<40; i++)
                                vfd[i] = ' ';
                        vfd_cursor = 0;
-               } else if (*buf == 10) {
+               } else if (c == 10) {
                        if (vfd_cursor < 20)
                                vfd_cursor = 20;
                        else if (vfd_cursor < 40)
@@ -175,7 +178,7 @@ static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len,
                        /* just a character */
                        if (vfd_cursor > 39)
                                scroll_vfd();
-                       vfd[vfd_cursor++] = *buf;
+                       vfd[vfd_cursor++] = c;
                }
                indx--;
                buf++;
@@ -202,7 +205,7 @@ static struct miscdevice briq_panel_miscdev = {
 static int __init briq_panel_init(void)
 {
        struct device_node *root = find_path_device("/");
-       char *machine;
+       const char *machine;
        int i;
 
        machine = get_property(root, "model", NULL);
index 84dfc42..8c09997 100644 (file)
@@ -3488,7 +3488,7 @@ static int stli_initecp(stlibrd_t *brdp)
  */
        EBRDENABLE(brdp);
        sigsp = (cdkecpsig_t __iomem *) EBRDGETMEMPTR(brdp, CDK_SIGADDR);
-       memcpy(&sig, sigsp, sizeof(cdkecpsig_t));
+       memcpy_fromio(&sig, sigsp, sizeof(cdkecpsig_t));
        EBRDDISABLE(brdp);
 
        if (sig.magic != cpu_to_le32(ECP_MAGIC))
index b3df613..d35a9f0 100644 (file)
@@ -32,7 +32,7 @@
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "cpufreq-core", msg)
 
 /**
- * The "cpufreq driver" - the arch- or hardware-dependend low
+ * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
  * also protects the cpufreq_cpu_data array.
  */
index 52cf1f0..bf8aa45 100644 (file)
@@ -55,6 +55,10 @@ struct cpu_dbs_info_s {
        struct cpufreq_policy *cur_policy;
        struct work_struct work;
        unsigned int enable;
+       struct cpufreq_frequency_table *freq_table;
+       unsigned int freq_lo;
+       unsigned int freq_lo_jiffies;
+       unsigned int freq_hi_jiffies;
 };
 static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info);
 
@@ -72,15 +76,15 @@ static DEFINE_MUTEX(dbs_mutex);
 
 static struct workqueue_struct *kondemand_wq;
 
-struct dbs_tuners {
+static struct dbs_tuners {
        unsigned int sampling_rate;
        unsigned int up_threshold;
        unsigned int ignore_nice;
-};
-
-static struct dbs_tuners dbs_tuners_ins = {
+       unsigned int powersave_bias;
+} dbs_tuners_ins = {
        .up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
        .ignore_nice = 0,
+       .powersave_bias = 0,
 };
 
 static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
@@ -96,6 +100,70 @@ static inline cputime64_t get_cpu_idle_time(unsigned int cpu)
        return retval;
 }
 
+/*
+ * Find right freq to be set now with powersave_bias on.
+ * Returns the freq_hi to be used right now and will set freq_hi_jiffies,
+ * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs.
+ */
+static unsigned int powersave_bias_target(struct cpufreq_policy *policy,
+                                         unsigned int freq_next,
+                                         unsigned int relation)
+{
+       unsigned int freq_req, freq_reduc, freq_avg;
+       unsigned int freq_hi, freq_lo;
+       unsigned int index = 0;
+       unsigned int jiffies_total, jiffies_hi, jiffies_lo;
+       struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu);
+
+       if (!dbs_info->freq_table) {
+               dbs_info->freq_lo = 0;
+               dbs_info->freq_lo_jiffies = 0;
+               return freq_next;
+       }
+
+       cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next,
+                       relation, &index);
+       freq_req = dbs_info->freq_table[index].frequency;
+       freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000;
+       freq_avg = freq_req - freq_reduc;
+
+       /* Find freq bounds for freq_avg in freq_table */
+       index = 0;
+       cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+                       CPUFREQ_RELATION_H, &index);
+       freq_lo = dbs_info->freq_table[index].frequency;
+       index = 0;
+       cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg,
+                       CPUFREQ_RELATION_L, &index);
+       freq_hi = dbs_info->freq_table[index].frequency;
+
+       /* Find out how long we have to be in hi and lo freqs */
+       if (freq_hi == freq_lo) {
+               dbs_info->freq_lo = 0;
+               dbs_info->freq_lo_jiffies = 0;
+               return freq_lo;
+       }
+       jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+       jiffies_hi = (freq_avg - freq_lo) * jiffies_total;
+       jiffies_hi += ((freq_hi - freq_lo) / 2);
+       jiffies_hi /= (freq_hi - freq_lo);
+       jiffies_lo = jiffies_total - jiffies_hi;
+       dbs_info->freq_lo = freq_lo;
+       dbs_info->freq_lo_jiffies = jiffies_lo;
+       dbs_info->freq_hi_jiffies = jiffies_hi;
+       return freq_hi;
+}
+
+static void ondemand_powersave_bias_init(void)
+{
+       int i;
+       for_each_online_cpu(i) {
+               struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i);
+               dbs_info->freq_table = cpufreq_frequency_get_table(i);
+               dbs_info->freq_lo = 0;
+       }
+}
+
 /************************** sysfs interface ************************/
 static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf)
 {
@@ -124,6 +192,7 @@ static ssize_t show_##file_name                                             \
 show_one(sampling_rate, sampling_rate);
 show_one(up_threshold, up_threshold);
 show_one(ignore_nice_load, ignore_nice);
+show_one(powersave_bias, powersave_bias);
 
 static ssize_t store_sampling_rate(struct cpufreq_policy *unused,
                const char *buf, size_t count)
@@ -198,6 +267,27 @@ static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy,
        return count;
 }
 
+static ssize_t store_powersave_bias(struct cpufreq_policy *unused,
+               const char *buf, size_t count)
+{
+       unsigned int input;
+       int ret;
+       ret = sscanf(buf, "%u", &input);
+
+       if (ret != 1)
+               return -EINVAL;
+
+       if (input > 1000)
+               input = 1000;
+
+       mutex_lock(&dbs_mutex);
+       dbs_tuners_ins.powersave_bias = input;
+       ondemand_powersave_bias_init();
+       mutex_unlock(&dbs_mutex);
+
+       return count;
+}
+
 #define define_one_rw(_name) \
 static struct freq_attr _name = \
 __ATTR(_name, 0644, show_##_name, store_##_name)
@@ -205,6 +295,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 define_one_rw(sampling_rate);
 define_one_rw(up_threshold);
 define_one_rw(ignore_nice_load);
+define_one_rw(powersave_bias);
 
 static struct attribute * dbs_attributes[] = {
        &sampling_rate_max.attr,
@@ -212,6 +303,7 @@ static struct attribute * dbs_attributes[] = {
        &sampling_rate.attr,
        &up_threshold.attr,
        &ignore_nice_load.attr,
+       &powersave_bias.attr,
        NULL
 };
 
@@ -234,6 +326,7 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
        if (!this_dbs_info->enable)
                return;
 
+       this_dbs_info->freq_lo = 0;
        policy = this_dbs_info->cur_policy;
        cur_jiffies = jiffies64_to_cputime64(get_jiffies_64());
        total_ticks = (unsigned int) cputime64_sub(cur_jiffies,
@@ -274,11 +367,18 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
        /* Check for frequency increase */
        if (load > dbs_tuners_ins.up_threshold) {
                /* if we are already at full speed then break out early */
-               if (policy->cur == policy->max)
-                       return;
-
-               __cpufreq_driver_target(policy, policy->max,
-                       CPUFREQ_RELATION_H);
+               if (!dbs_tuners_ins.powersave_bias) {
+                       if (policy->cur == policy->max)
+                               return;
+
+                       __cpufreq_driver_target(policy, policy->max,
+                               CPUFREQ_RELATION_H);
+               } else {
+                       int freq = powersave_bias_target(policy, policy->max,
+                                       CPUFREQ_RELATION_H);
+                       __cpufreq_driver_target(policy, freq,
+                               CPUFREQ_RELATION_L);
+               }
                return;
        }
 
@@ -293,37 +393,64 @@ static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
         * policy. To be safe, we focus 10 points under the threshold.
         */
        if (load < (dbs_tuners_ins.up_threshold - 10)) {
-               unsigned int freq_next;
-               freq_next = (policy->cur * load) /
+               unsigned int freq_next = (policy->cur * load) /
                        (dbs_tuners_ins.up_threshold - 10);
-
-               __cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
+               if (!dbs_tuners_ins.powersave_bias) {
+                       __cpufreq_driver_target(policy, freq_next,
+                                       CPUFREQ_RELATION_L);
+               } else {
+                       int freq = powersave_bias_target(policy, freq_next,
+                                       CPUFREQ_RELATION_L);
+                       __cpufreq_driver_target(policy, freq,
+                               CPUFREQ_RELATION_L);
+               }
        }
 }
 
+/* Sampling types */
+enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
+
 static void do_dbs_timer(void *data)
 {
        unsigned int cpu = smp_processor_id();
        struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+       /* We want all CPUs to do sampling nearly on same jiffy */
+       int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+       delay -= jiffies % delay;
 
        if (!dbs_info->enable)
                return;
-
-       lock_cpu_hotplug();
-       dbs_check_cpu(dbs_info);
-       unlock_cpu_hotplug();
-       queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-                       usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
+       /* Common NORMAL_SAMPLE setup */
+       INIT_WORK(&dbs_info->work, do_dbs_timer, (void *)DBS_NORMAL_SAMPLE);
+       if (!dbs_tuners_ins.powersave_bias ||
+           (unsigned long) data == DBS_NORMAL_SAMPLE) {
+               lock_cpu_hotplug();
+               dbs_check_cpu(dbs_info);
+               unlock_cpu_hotplug();
+               if (dbs_info->freq_lo) {
+                       /* Setup timer for SUB_SAMPLE */
+                       INIT_WORK(&dbs_info->work, do_dbs_timer,
+                                       (void *)DBS_SUB_SAMPLE);
+                       delay = dbs_info->freq_hi_jiffies;
+               }
+       } else {
+               __cpufreq_driver_target(dbs_info->cur_policy,
+                                       dbs_info->freq_lo,
+                                       CPUFREQ_RELATION_H);
+       }
+       queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_init(unsigned int cpu)
 {
        struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, cpu);
+       /* We want all CPUs to do sampling nearly on same jiffy */
+       int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate);
+       delay -= jiffies % delay;
 
-       INIT_WORK(&dbs_info->work, do_dbs_timer, 0);
-       queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work,
-                       usecs_to_jiffies(dbs_tuners_ins.sampling_rate));
-       return;
+       ondemand_powersave_bias_init();
+       INIT_WORK(&dbs_info->work, do_dbs_timer, NULL);
+       queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay);
 }
 
 static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info)
index 25eee53..c2ecc59 100644 (file)
@@ -350,12 +350,10 @@ __init cpufreq_stats_init(void)
        }
 
        register_hotcpu_notifier(&cpufreq_stat_cpu_notifier);
-       lock_cpu_hotplug();
        for_each_online_cpu(cpu) {
                cpufreq_stat_cpu_callback(&cpufreq_stat_cpu_notifier, CPU_ONLINE,
                        (void *)(long)cpu);
        }
-       unlock_cpu_hotplug();
        return 0;
 }
 static void
index 69a53d4..9edface 100644 (file)
@@ -14,7 +14,7 @@ config INFINIBAND_USER_MAD
        ---help---
          Userspace InfiniBand Management Datagram (MAD) support.  This
          is the kernel side of the userspace MAD support, which allows
-         userspace processes to send and receive MADs. You will also 
+         userspace processes to send and receive MADs. You will also
          need libibumad from <http://www.openib.org>.
 
 config INFINIBAND_USER_ACCESS
@@ -36,6 +36,8 @@ config INFINIBAND_ADDR_TRANS
 
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
+source "drivers/infiniband/hw/ehca/Kconfig"
+source "drivers/infiniband/hw/amso1100/Kconfig"
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
index c7ff58c..2b5d109 100644 (file)
@@ -1,6 +1,8 @@
 obj-$(CONFIG_INFINIBAND)               += core/
 obj-$(CONFIG_INFINIBAND_MTHCA)         += hw/mthca/
-obj-$(CONFIG_IPATH_CORE)               += hw/ipath/
+obj-$(CONFIG_INFINIBAND_IPATH)         += hw/ipath/
+obj-$(CONFIG_INFINIBAND_EHCA)          += hw/ehca/
+obj-$(CONFIG_INFINIBAND_AMSO1100)      += hw/amso1100/
 obj-$(CONFIG_INFINIBAND_IPOIB)         += ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)           += ulp/srp/
 obj-$(CONFIG_INFINIBAND_ISER)          += ulp/iser/
index 68e73ec..163d991 100644 (file)
@@ -1,7 +1,7 @@
 infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)     := ib_addr.o rdma_cm.o
 
 obj-$(CONFIG_INFINIBAND) +=            ib_core.o ib_mad.o ib_sa.o \
-                                       ib_cm.o $(infiniband-y)
+                                       ib_cm.o iw_cm.o $(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=   ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=        ib_uverbs.o ib_ucm.o
 
@@ -14,6 +14,8 @@ ib_sa-y :=                    sa_query.o
 
 ib_cm-y :=                     cm.o
 
+iw_cm-y :=                     iwcm.o
+
 rdma_cm-y :=                   cma.o
 
 ib_addr-y :=                   addr.o
index 1205e80..9cbf09e 100644 (file)
@@ -61,12 +61,15 @@ static LIST_HEAD(req_list);
 static DECLARE_WORK(work, process_req, NULL);
 static struct workqueue_struct *addr_wq;
 
-static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
-                    unsigned char *dst_dev_addr)
+int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
+                    const unsigned char *dst_dev_addr)
 {
        switch (dev->type) {
        case ARPHRD_INFINIBAND:
-               dev_addr->dev_type = IB_NODE_CA;
+               dev_addr->dev_type = RDMA_NODE_IB_CA;
+               break;
+       case ARPHRD_ETHER:
+               dev_addr->dev_type = RDMA_NODE_RNIC;
                break;
        default:
                return -EADDRNOTAVAIL;
@@ -78,6 +81,7 @@ static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
                memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
        return 0;
 }
+EXPORT_SYMBOL(rdma_copy_addr);
 
 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 {
@@ -89,7 +93,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
        if (!dev)
                return -EADDRNOTAVAIL;
 
-       ret = copy_addr(dev_addr, dev, NULL);
+       ret = rdma_copy_addr(dev_addr, dev, NULL);
        dev_put(dev);
        return ret;
 }
@@ -161,7 +165,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
 
        /* If the device does ARP internally, return 'done' */
        if (rt->idev->dev->flags & IFF_NOARP) {
-               copy_addr(addr, rt->idev->dev, NULL);
+               rdma_copy_addr(addr, rt->idev->dev, NULL);
                goto put;
        }
 
@@ -181,7 +185,7 @@ static int addr_resolve_remote(struct sockaddr_in *src_in,
                src_in->sin_addr.s_addr = rt->rt_src;
        }
 
-       ret = copy_addr(addr, neigh->dev, neigh->ha);
+       ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
 release:
        neigh_release(neigh);
 put:
@@ -245,7 +249,7 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
        if (ZERONET(src_ip)) {
                src_in->sin_family = dst_in->sin_family;
                src_in->sin_addr.s_addr = dst_ip;
-               ret = copy_addr(addr, dev, dev->dev_addr);
+               ret = rdma_copy_addr(addr, dev, dev->dev_addr);
        } else if (LOOPBACK(src_ip)) {
                ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
                if (!ret)
@@ -327,10 +331,10 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 
-static int netevent_callback(struct notifier_block *self, unsigned long event, 
+static int netevent_callback(struct notifier_block *self, unsigned long event,
        void *ctx)
 {
-       if (event == NETEVENT_NEIGH_UPDATE) {  
+       if (event == NETEVENT_NEIGH_UPDATE) {
                struct neighbour *neigh = ctx;
 
                if (neigh->dev->type == ARPHRD_INFINIBAND &&
index 75313ad..20e9f64 100644 (file)
@@ -62,12 +62,13 @@ struct ib_update_work {
 
 static inline int start_port(struct ib_device *device)
 {
-       return device->node_type == IB_NODE_SWITCH ? 0 : 1;
+       return (device->node_type == RDMA_NODE_IB_SWITCH) ? 0 : 1;
 }
 
 static inline int end_port(struct ib_device *device)
 {
-       return device->node_type == IB_NODE_SWITCH ? 0 : device->phys_port_cnt;
+       return (device->node_type == RDMA_NODE_IB_SWITCH) ?
+               0 : device->phys_port_cnt;
 }
 
 int ib_get_cached_gid(struct ib_device *device,
index 0de335b..f35fcc4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004-2006 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
@@ -41,6 +41,7 @@
 #include <linux/idr.h>
 #include <linux/interrupt.h>
 #include <linux/pci.h>
+#include <linux/random.h>
 #include <linux/rbtree.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
@@ -73,6 +74,7 @@ static struct ib_cm {
        struct rb_root remote_id_table;
        struct rb_root remote_sidr_table;
        struct idr local_id_table;
+       __be32 random_id_operand;
        struct workqueue_struct *wq;
 } cm;
 
@@ -177,7 +179,7 @@ static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
        if (IS_ERR(ah))
                return PTR_ERR(ah);
 
-       m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn, 
+       m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
                               cm_id_priv->av.pkey_index,
                               0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
                               GFP_ATOMIC);
@@ -299,15 +301,17 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 {
        unsigned long flags;
-       int ret;
+       int ret, id;
        static int next_id;
 
        do {
                spin_lock_irqsave(&cm.lock, flags);
-               ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, next_id++,
-                                       (__force int *) &cm_id_priv->id.local_id);
+               ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
+                                       next_id++, &id);
                spin_unlock_irqrestore(&cm.lock, flags);
        } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
+
+       cm_id_priv->id.local_id = (__force __be32) (id ^ cm.random_id_operand);
        return ret;
 }
 
@@ -316,7 +320,8 @@ static void cm_free_id(__be32 local_id)
        unsigned long flags;
 
        spin_lock_irqsave(&cm.lock, flags);
-       idr_remove(&cm.local_id_table, (__force int) local_id);
+       idr_remove(&cm.local_id_table,
+                  (__force int) (local_id ^ cm.random_id_operand));
        spin_unlock_irqrestore(&cm.lock, flags);
 }
 
@@ -324,7 +329,8 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
 {
        struct cm_id_private *cm_id_priv;
 
-       cm_id_priv = idr_find(&cm.local_id_table, (__force int) local_id);
+       cm_id_priv = idr_find(&cm.local_id_table,
+                             (__force int) (local_id ^ cm.random_id_operand));
        if (cm_id_priv) {
                if (cm_id_priv->id.remote_id == remote_id)
                        atomic_inc(&cm_id_priv->refcount);
@@ -679,6 +685,8 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
 {
        int wait_time;
 
+       cm_cleanup_timewait(cm_id_priv->timewait_info);
+
        /*
         * The cm_id could be destroyed by the user before we exit timewait.
         * To protect against this, we search for the cm_id after exiting
@@ -1354,7 +1362,7 @@ static int cm_req_handler(struct cm_work *work)
                                                            id.local_id);
        if (IS_ERR(cm_id_priv->timewait_info)) {
                ret = PTR_ERR(cm_id_priv->timewait_info);
-               goto error1;
+               goto destroy;
        }
        cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
        cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
@@ -1363,7 +1371,8 @@ static int cm_req_handler(struct cm_work *work)
        listen_cm_id_priv = cm_match_req(work, cm_id_priv);
        if (!listen_cm_id_priv) {
                ret = -EINVAL;
-               goto error2;
+               kfree(cm_id_priv->timewait_info);
+               goto destroy;
        }
 
        cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
@@ -1373,12 +1382,22 @@ static int cm_req_handler(struct cm_work *work)
 
        cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
        ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
-       if (ret)
-               goto error3;
+       if (ret) {
+               ib_get_cached_gid(work->port->cm_dev->device,
+                                 work->port->port_num, 0, &work->path[0].sgid);
+               ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
+                              &work->path[0].sgid, sizeof work->path[0].sgid,
+                              NULL, 0);
+               goto rejected;
+       }
        if (req_msg->alt_local_lid) {
                ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av);
-               if (ret)
-                       goto error3;
+               if (ret) {
+                       ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
+                                      &work->path[0].sgid,
+                                      sizeof work->path[0].sgid, NULL, 0);
+                       goto rejected;
+               }
        }
        cm_id_priv->tid = req_msg->hdr.tid;
        cm_id_priv->timeout_ms = cm_convert_to_ms(
@@ -1400,12 +1419,11 @@ static int cm_req_handler(struct cm_work *work)
        cm_deref_id(listen_cm_id_priv);
        return 0;
 
-error3:        atomic_dec(&cm_id_priv->refcount);
+rejected:
+       atomic_dec(&cm_id_priv->refcount);
        cm_deref_id(listen_cm_id_priv);
-       cm_cleanup_timewait(cm_id_priv->timewait_info);
-error2:        kfree(cm_id_priv->timewait_info);
-       cm_id_priv->timewait_info = NULL;
-error1:        ib_destroy_cm_id(&cm_id_priv->id);
+destroy:
+       ib_destroy_cm_id(cm_id);
        return ret;
 }
 
@@ -2072,8 +2090,9 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
                        spin_unlock_irqrestore(&cm.lock, flags);
                        return NULL;
                }
-               cm_id_priv = idr_find(&cm.local_id_table,
-                                     (__force int) timewait_info->work.local_id);
+               cm_id_priv = idr_find(&cm.local_id_table, (__force int)
+                                     (timewait_info->work.local_id ^
+                                      cm.random_id_operand));
                if (cm_id_priv) {
                        if (cm_id_priv->id.remote_id == remote_id)
                                atomic_inc(&cm_id_priv->refcount);
@@ -3125,7 +3144,8 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
                qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
                                           IB_ACCESS_REMOTE_WRITE;
                if (cm_id_priv->responder_resources)
-                       qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ;
+                       qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
+                                                   IB_ACCESS_REMOTE_ATOMIC;
                qp_attr->pkey_index = cm_id_priv->av.pkey_index;
                qp_attr->port_num = cm_id_priv->av.port->port_num;
                ret = 0;
@@ -3262,6 +3282,9 @@ static void cm_add_one(struct ib_device *device)
        int ret;
        u8 i;
 
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
        cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
                         device->phys_port_cnt, GFP_KERNEL);
        if (!cm_dev)
@@ -3349,6 +3372,7 @@ static int __init ib_cm_init(void)
        cm.remote_qp_table = RB_ROOT;
        cm.remote_sidr_table = RB_ROOT;
        idr_init(&cm.local_id_table);
+       get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
        idr_pre_get(&cm.local_id_table, GFP_KERNEL);
 
        cm.wq = create_workqueue("ib_cm");
index 5d625a8..1178bd4 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/mutex.h>
 #include <linux/random.h>
 #include <linux/idr.h>
+#include <linux/inetdevice.h>
 
 #include <net/tcp.h>
 
@@ -43,6 +44,7 @@
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_sa.h>
+#include <rdma/iw_cm.h>
 
 MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("Generic RDMA CM Agent");
@@ -60,6 +62,7 @@ static struct ib_client cma_client = {
        .remove = cma_remove_one
 };
 
+static struct ib_sa_client sa_client;
 static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
@@ -124,6 +127,7 @@ struct rdma_id_private {
        int                     query_id;
        union {
                struct ib_cm_id *ib;
+               struct iw_cm_id *iw;
        } cm_id;
 
        u32                     seq_num;
@@ -259,15 +263,24 @@ static void cma_detach_from_dev(struct rdma_id_private *id_priv)
        id_priv->cma_dev = NULL;
 }
 
-static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
+static int cma_acquire_dev(struct rdma_id_private *id_priv)
 {
+       enum rdma_node_type dev_type = id_priv->id.route.addr.dev_addr.dev_type;
        struct cma_device *cma_dev;
        union ib_gid gid;
        int ret = -ENODEV;
 
-       ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid),
+       switch (rdma_node_get_transport(dev_type)) {
+       case RDMA_TRANSPORT_IB:
+               ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+               break;
+       case RDMA_TRANSPORT_IWARP:
+               iw_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
+               break;
+       default:
+               return -ENODEV;
+       }
 
-       mutex_lock(&lock);
        list_for_each_entry(cma_dev, &dev_list, list) {
                ret = ib_find_cached_gid(cma_dev->device, &gid,
                                         &id_priv->id.port_num, NULL);
@@ -276,20 +289,9 @@ static int cma_acquire_ib_dev(struct rdma_id_private *id_priv)
                        break;
                }
        }
-       mutex_unlock(&lock);
        return ret;
 }
 
-static int cma_acquire_dev(struct rdma_id_private *id_priv)
-{
-       switch (id_priv->id.route.addr.dev_addr.dev_type) {
-       case IB_NODE_CA:
-               return cma_acquire_ib_dev(id_priv);
-       default:
-               return -ENODEV;
-       }
-}
-
 static void cma_deref_id(struct rdma_id_private *id_priv)
 {
        if (atomic_dec_and_test(&id_priv->refcount))
@@ -347,6 +349,16 @@ static int cma_init_ib_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
                                          IB_QP_PKEY_INDEX | IB_QP_PORT);
 }
 
+static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+       struct ib_qp_attr qp_attr;
+
+       qp_attr.qp_state = IB_QPS_INIT;
+       qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+
+       return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+}
+
 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
                   struct ib_qp_init_attr *qp_init_attr)
 {
@@ -362,10 +374,13 @@ int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
        if (IS_ERR(qp))
                return PTR_ERR(qp);
 
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = cma_init_ib_qp(id_priv, qp);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = cma_init_iw_qp(id_priv, qp);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -451,13 +466,17 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
        int ret;
 
        id_priv = container_of(id, struct rdma_id_private, id);
-       switch (id_priv->id.device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
                                         qp_attr_mask);
                if (qp_attr->qp_state == IB_QPS_RTR)
                        qp_attr->rq_psn = id_priv->seq_num;
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
+                                       qp_attr_mask);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -590,8 +609,8 @@ static int cma_notify_user(struct rdma_id_private *id_priv,
 
 static void cma_cancel_route(struct rdma_id_private *id_priv)
 {
-       switch (id_priv->id.device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                if (id_priv->query)
                        ib_sa_cancel_query(id_priv->query_id, id_priv->query);
                break;
@@ -611,11 +630,15 @@ static void cma_destroy_listen(struct rdma_id_private *id_priv)
        cma_exch(id_priv, CMA_DESTROYING);
 
        if (id_priv->cma_dev) {
-               switch (id_priv->id.device->node_type) {
-               case IB_NODE_CA:
-                       if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+               switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
+               case RDMA_TRANSPORT_IB:
+                       if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
                                ib_destroy_cm_id(id_priv->cm_id.ib);
                        break;
+               case RDMA_TRANSPORT_IWARP:
+                       if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+                               iw_destroy_cm_id(id_priv->cm_id.iw);
+                       break;
                default:
                        break;
                }
@@ -689,19 +712,25 @@ void rdma_destroy_id(struct rdma_cm_id *id)
        state = cma_exch(id_priv, CMA_DESTROYING);
        cma_cancel_operation(id_priv, state);
 
+       mutex_lock(&lock);
        if (id_priv->cma_dev) {
-               switch (id->device->node_type) {
-               case IB_NODE_CA:
-                       if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+               mutex_unlock(&lock);
+               switch (rdma_node_get_transport(id->device->node_type)) {
+               case RDMA_TRANSPORT_IB:
+                       if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
                                ib_destroy_cm_id(id_priv->cm_id.ib);
                        break;
+               case RDMA_TRANSPORT_IWARP:
+                       if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
+                               iw_destroy_cm_id(id_priv->cm_id.iw);
+                       break;
                default:
                        break;
                }
-               mutex_lock(&lock);
+               mutex_lock(&lock);
                cma_detach_from_dev(id_priv);
-               mutex_unlock(&lock);
        }
+       mutex_unlock(&lock);
 
        cma_release_port(id_priv);
        cma_deref_id(id_priv);
@@ -869,7 +898,7 @@ static struct rdma_id_private *cma_new_id(struct rdma_cm_id *listen_id,
        ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
        ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
        ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
-       rt->addr.dev_addr.dev_type = IB_NODE_CA;
+       rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
 
        id_priv = container_of(id, struct rdma_id_private, id);
        id_priv->state = CMA_CONNECT;
@@ -898,7 +927,9 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
        }
 
        atomic_inc(&conn_id->dev_remove);
-       ret = cma_acquire_ib_dev(conn_id);
+       mutex_lock(&lock);
+       ret = cma_acquire_dev(conn_id);
+       mutex_unlock(&lock);
        if (ret) {
                ret = -ENODEV;
                cma_release_remove(conn_id);
@@ -982,6 +1013,130 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
        }
 }
 
+static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
+{
+       struct rdma_id_private *id_priv = iw_id->context;
+       enum rdma_cm_event_type event = 0;
+       struct sockaddr_in *sin;
+       int ret = 0;
+
+       atomic_inc(&id_priv->dev_remove);
+
+       switch (iw_event->event) {
+       case IW_CM_EVENT_CLOSE:
+               event = RDMA_CM_EVENT_DISCONNECTED;
+               break;
+       case IW_CM_EVENT_CONNECT_REPLY:
+               sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+               *sin = iw_event->local_addr;
+               sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
+               *sin = iw_event->remote_addr;
+               if (iw_event->status)
+                       event = RDMA_CM_EVENT_REJECTED;
+               else
+                       event = RDMA_CM_EVENT_ESTABLISHED;
+               break;
+       case IW_CM_EVENT_ESTABLISHED:
+               event = RDMA_CM_EVENT_ESTABLISHED;
+               break;
+       default:
+               BUG_ON(1);
+       }
+
+       ret = cma_notify_user(id_priv, event, iw_event->status,
+                             iw_event->private_data,
+                             iw_event->private_data_len);
+       if (ret) {
+               /* Destroy the CM ID by returning a non-zero value. */
+               id_priv->cm_id.iw = NULL;
+               cma_exch(id_priv, CMA_DESTROYING);
+               cma_release_remove(id_priv);
+               rdma_destroy_id(&id_priv->id);
+               return ret;
+       }
+
+       cma_release_remove(id_priv);
+       return ret;
+}
+
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+                              struct iw_cm_event *iw_event)
+{
+       struct rdma_cm_id *new_cm_id;
+       struct rdma_id_private *listen_id, *conn_id;
+       struct sockaddr_in *sin;
+       struct net_device *dev = NULL;
+       int ret;
+
+       listen_id = cm_id->context;
+       atomic_inc(&listen_id->dev_remove);
+       if (!cma_comp(listen_id, CMA_LISTEN)) {
+               ret = -ECONNABORTED;
+               goto out;
+       }
+
+       /* Create a new RDMA id for the new IW CM ID */
+       new_cm_id = rdma_create_id(listen_id->id.event_handler,
+                                  listen_id->id.context,
+                                  RDMA_PS_TCP);
+       if (!new_cm_id) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+       atomic_inc(&conn_id->dev_remove);
+       conn_id->state = CMA_CONNECT;
+
+       dev = ip_dev_find(iw_event->local_addr.sin_addr.s_addr);
+       if (!dev) {
+               ret = -EADDRNOTAVAIL;
+               cma_release_remove(conn_id);
+               rdma_destroy_id(new_cm_id);
+               goto out;
+       }
+       ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
+       if (ret) {
+               cma_release_remove(conn_id);
+               rdma_destroy_id(new_cm_id);
+               goto out;
+       }
+
+       mutex_lock(&lock);
+       ret = cma_acquire_dev(conn_id);
+       mutex_unlock(&lock);
+       if (ret) {
+               cma_release_remove(conn_id);
+               rdma_destroy_id(new_cm_id);
+               goto out;
+       }
+
+       conn_id->cm_id.iw = cm_id;
+       cm_id->context = conn_id;
+       cm_id->cm_handler = cma_iw_handler;
+
+       sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
+       *sin = iw_event->local_addr;
+       sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
+       *sin = iw_event->remote_addr;
+
+       ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
+                             iw_event->private_data,
+                             iw_event->private_data_len);
+       if (ret) {
+               /* User wants to destroy the CM ID */
+               conn_id->cm_id.iw = NULL;
+               cma_exch(conn_id, CMA_DESTROYING);
+               cma_release_remove(conn_id);
+               rdma_destroy_id(&conn_id->id);
+       }
+
+out:
+       if (dev)
+               dev_put(dev);
+       cma_release_remove(listen_id);
+       return ret;
+}
+
 static int cma_ib_listen(struct rdma_id_private *id_priv)
 {
        struct ib_cm_compare_data compare_data;
@@ -1011,6 +1166,30 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
        return ret;
 }
 
+static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
+{
+       int ret;
+       struct sockaddr_in *sin;
+
+       id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+                                           iw_conn_req_handler,
+                                           id_priv);
+       if (IS_ERR(id_priv->cm_id.iw))
+               return PTR_ERR(id_priv->cm_id.iw);
+
+       sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
+       id_priv->cm_id.iw->local_addr = *sin;
+
+       ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
+
+       if (ret) {
+               iw_destroy_cm_id(id_priv->cm_id.iw);
+               id_priv->cm_id.iw = NULL;
+       }
+
+       return ret;
+}
+
 static int cma_listen_handler(struct rdma_cm_id *id,
                              struct rdma_cm_event *event)
 {
@@ -1087,12 +1266,17 @@ int rdma_listen(struct rdma_cm_id *id, int backlog)
 
        id_priv->backlog = backlog;
        if (id->device) {
-               switch (id->device->node_type) {
-               case IB_NODE_CA:
+               switch (rdma_node_get_transport(id->device->node_type)) {
+               case RDMA_TRANSPORT_IB:
                        ret = cma_ib_listen(id_priv);
                        if (ret)
                                goto err;
                        break;
+               case RDMA_TRANSPORT_IWARP:
+                       ret = cma_iw_listen(id_priv, backlog);
+                       if (ret)
+                               goto err;
+                       break;
                default:
                        ret = -ENOSYS;
                        goto err;
@@ -1140,7 +1324,7 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
        path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
        path_rec.numb_path = 1;
 
-       id_priv->query_id = ib_sa_path_rec_get(id_priv->id.device,
+       id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
                                id_priv->id.port_num, &path_rec,
                                IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
                                IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH,
@@ -1231,6 +1415,23 @@ err:
 }
 EXPORT_SYMBOL(rdma_set_ib_paths);
 
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+       struct cma_work *work;
+
+       work = kzalloc(sizeof *work, GFP_KERNEL);
+       if (!work)
+               return -ENOMEM;
+
+       work->id = id_priv;
+       INIT_WORK(&work->work, cma_work_handler, work);
+       work->old_state = CMA_ROUTE_QUERY;
+       work->new_state = CMA_ROUTE_RESOLVED;
+       work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
+       queue_work(cma_wq, &work->work);
+       return 0;
+}
+
 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
 {
        struct rdma_id_private *id_priv;
@@ -1241,10 +1442,13 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
                return -EINVAL;
 
        atomic_inc(&id_priv->refcount);
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = cma_resolve_ib_route(id_priv, timeout_ms);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = cma_resolve_iw_route(id_priv, timeout_ms);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -1309,16 +1513,26 @@ static void addr_handler(int status, struct sockaddr *src_addr,
        enum rdma_cm_event_type event;
 
        atomic_inc(&id_priv->dev_remove);
-       if (!id_priv->cma_dev && !status)
+
+       /*
+        * Grab mutex to block rdma_destroy_id() from removing the device while
+        * we're trying to acquire it.
+        */
+       mutex_lock(&lock);
+       if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
+               mutex_unlock(&lock);
+               goto out;
+       }
+
+       if (!status && !id_priv->cma_dev)
                status = cma_acquire_dev(id_priv);
+       mutex_unlock(&lock);
 
        if (status) {
-               if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND))
+               if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
                        goto out;
                event = RDMA_CM_EVENT_ADDR_ERROR;
        } else {
-               if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
-                       goto out;
                memcpy(&id_priv->id.route.addr.src_addr, src_addr,
                       ip_addr_size(src_addr));
                event = RDMA_CM_EVENT_ADDR_RESOLVED;
@@ -1492,7 +1706,7 @@ static int cma_use_port(struct idr *ps, struct rdma_id_private *id_priv)
        hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
                if (cma_any_addr(&cur_id->id.route.addr.src_addr))
                        return -EADDRNOTAVAIL;
-               
+
                cur_sin = (struct sockaddr_in *) &cur_id->id.route.addr.src_addr;
                if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
                        return -EADDRINUSE;
@@ -1542,8 +1756,11 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 
        if (!cma_any_addr(addr)) {
                ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
-               if (!ret)
+               if (!ret) {
+                       mutex_lock(&lock);
                        ret = cma_acquire_dev(id_priv);
+                       mutex_unlock(&lock);
+               }
                if (ret)
                        goto err;
        }
@@ -1649,6 +1866,47 @@ out:
        return ret;
 }
 
+static int cma_connect_iw(struct rdma_id_private *id_priv,
+                         struct rdma_conn_param *conn_param)
+{
+       struct iw_cm_id *cm_id;
+       struct sockaddr_in* sin;
+       int ret;
+       struct iw_cm_conn_param iw_param;
+
+       cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
+       if (IS_ERR(cm_id)) {
+               ret = PTR_ERR(cm_id);
+               goto out;
+       }
+
+       id_priv->cm_id.iw = cm_id;
+
+       sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
+       cm_id->local_addr = *sin;
+
+       sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
+       cm_id->remote_addr = *sin;
+
+       ret = cma_modify_qp_rtr(&id_priv->id);
+       if (ret) {
+               iw_destroy_cm_id(cm_id);
+               return ret;
+       }
+
+       iw_param.ord = conn_param->initiator_depth;
+       iw_param.ird = conn_param->responder_resources;
+       iw_param.private_data = conn_param->private_data;
+       iw_param.private_data_len = conn_param->private_data_len;
+       if (id_priv->id.qp)
+               iw_param.qpn = id_priv->qp_num;
+       else
+               iw_param.qpn = conn_param->qp_num;
+       ret = iw_cm_connect(cm_id, &iw_param);
+out:
+       return ret;
+}
+
 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
        struct rdma_id_private *id_priv;
@@ -1664,10 +1922,13 @@ int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
                id_priv->srq = conn_param->srq;
        }
 
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = cma_connect_ib(id_priv, conn_param);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = cma_connect_iw(id_priv, conn_param);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -1708,6 +1969,28 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
        return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
 }
 
+static int cma_accept_iw(struct rdma_id_private *id_priv,
+                 struct rdma_conn_param *conn_param)
+{
+       struct iw_cm_conn_param iw_param;
+       int ret;
+
+       ret = cma_modify_qp_rtr(&id_priv->id);
+       if (ret)
+               return ret;
+
+       iw_param.ord = conn_param->initiator_depth;
+       iw_param.ird = conn_param->responder_resources;
+       iw_param.private_data = conn_param->private_data;
+       iw_param.private_data_len = conn_param->private_data_len;
+       if (id_priv->id.qp) {
+               iw_param.qpn = id_priv->qp_num;
+       } else
+               iw_param.qpn = conn_param->qp_num;
+
+       return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
+}
+
 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
 {
        struct rdma_id_private *id_priv;
@@ -1723,13 +2006,16 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
                id_priv->srq = conn_param->srq;
        }
 
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                if (conn_param)
                        ret = cma_accept_ib(id_priv, conn_param);
                else
                        ret = cma_rep_recv(id_priv);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = cma_accept_iw(id_priv, conn_param);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -1756,12 +2042,16 @@ int rdma_reject(struct rdma_cm_id *id, const void *private_data,
        if (!cma_comp(id_priv, CMA_CONNECT))
                return -EINVAL;
 
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
                ret = ib_send_cm_rej(id_priv->cm_id.ib,
                                     IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
                                     private_data, private_data_len);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = iw_cm_reject(id_priv->cm_id.iw,
+                                  private_data, private_data_len);
+               break;
        default:
                ret = -ENOSYS;
                break;
@@ -1780,17 +2070,20 @@ int rdma_disconnect(struct rdma_cm_id *id)
            !cma_comp(id_priv, CMA_DISCONNECT))
                return -EINVAL;
 
-       ret = cma_modify_qp_err(id);
-       if (ret)
-               goto out;
-
-       switch (id->device->node_type) {
-       case IB_NODE_CA:
+       switch (rdma_node_get_transport(id->device->node_type)) {
+       case RDMA_TRANSPORT_IB:
+               ret = cma_modify_qp_err(id);
+               if (ret)
+                       goto out;
                /* Initiate or respond to a disconnect. */
                if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
                        ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
                break;
+       case RDMA_TRANSPORT_IWARP:
+               ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
+               break;
        default:
+               ret = -EINVAL;
                break;
        }
 out:
@@ -1907,12 +2200,15 @@ static int cma_init(void)
        if (!cma_wq)
                return -ENOMEM;
 
+       ib_sa_register_client(&sa_client);
+
        ret = ib_register_client(&cma_client);
        if (ret)
                goto err;
        return 0;
 
 err:
+       ib_sa_unregister_client(&sa_client);
        destroy_workqueue(cma_wq);
        return ret;
 }
@@ -1920,6 +2216,7 @@ err:
 static void cma_cleanup(void)
 {
        ib_unregister_client(&cma_client);
+       ib_sa_unregister_client(&sa_client);
        destroy_workqueue(cma_wq);
        idr_destroy(&sdp_ps);
        idr_destroy(&tcp_ps);
index b2f3cb9..63d2a39 100644 (file)
@@ -385,7 +385,7 @@ void *ib_get_client_data(struct ib_device *device, struct ib_client *client)
 EXPORT_SYMBOL(ib_get_client_data);
 
 /**
- * ib_set_client_data - Get IB client context
+ * ib_set_client_data - Set IB client context
  * @device:Device to set context for
  * @client:Client to set context for
  * @data:Context to set
@@ -505,7 +505,7 @@ int ib_query_port(struct ib_device *device,
                  u8 port_num,
                  struct ib_port_attr *port_attr)
 {
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                if (port_num)
                        return -EINVAL;
        } else if (port_num < 1 || port_num > device->phys_port_cnt)
@@ -580,7 +580,7 @@ int ib_modify_port(struct ib_device *device,
                   u8 port_num, int port_modify_mask,
                   struct ib_port_modify *port_modify)
 {
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                if (port_num)
                        return -EINVAL;
        } else if (port_num < 1 || port_num > device->phys_port_cnt)
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
new file mode 100644 (file)
index 0000000..c3fb304
--- /dev/null
@@ -0,0 +1,1019 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+#include <linux/completion.h>
+
+#include <rdma/iw_cm.h>
+#include <rdma/ib_addr.h>
+
+#include "iwcm.h"
+
+MODULE_AUTHOR("Tom Tucker");
+MODULE_DESCRIPTION("iWARP CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static struct workqueue_struct *iwcm_wq;
+struct iwcm_work {
+       struct work_struct work;
+       struct iwcm_id_private *cm_id;
+       struct list_head list;
+       struct iw_cm_event event;
+       struct list_head free_list;
+};
+
+/*
+ * The following services provide a mechanism for pre-allocating iwcm_work
+ * elements.  The design pre-allocates them  based on the cm_id type:
+ *     LISTENING IDS:  Get enough elements preallocated to handle the
+ *                     listen backlog.
+ *     ACTIVE IDS:     4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE
+ *     PASSIVE IDS:    3: ESTABLISHED, DISCONNECT, CLOSE
+ *
+ * Allocating them in connect and listen avoids having to deal
+ * with allocation failures on the event upcall from the provider (which
+ * is called in the interrupt context).
+ *
+ * One exception is when creating the cm_id for incoming connection requests.
+ * There are two cases:
+ * 1) in the event upcall, cm_event_handler(), for a listening cm_id.  If
+ *    the backlog is exceeded, then no more connection request events will
+ *    be processed.  cm_event_handler() returns -ENOMEM in this case.  Its up
+ *    to the provider to reject the connectino request.
+ * 2) in the connection request workqueue handler, cm_conn_req_handler().
+ *    If work elements cannot be allocated for the new connect request cm_id,
+ *    then IWCM will call the provider reject method.  This is ok since
+ *    cm_conn_req_handler() runs in the workqueue thread context.
+ */
+
+static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv)
+{
+       struct iwcm_work *work;
+
+       if (list_empty(&cm_id_priv->work_free_list))
+               return NULL;
+       work = list_entry(cm_id_priv->work_free_list.next, struct iwcm_work,
+                         free_list);
+       list_del_init(&work->free_list);
+       return work;
+}
+
+static void put_work(struct iwcm_work *work)
+{
+       list_add(&work->free_list, &work->cm_id->work_free_list);
+}
+
+static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv)
+{
+       struct list_head *e, *tmp;
+
+       list_for_each_safe(e, tmp, &cm_id_priv->work_free_list)
+               kfree(list_entry(e, struct iwcm_work, free_list));
+}
+
+static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count)
+{
+       struct iwcm_work *work;
+
+       BUG_ON(!list_empty(&cm_id_priv->work_free_list));
+       while (count--) {
+               work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL);
+               if (!work) {
+                       dealloc_work_entries(cm_id_priv);
+                       return -ENOMEM;
+               }
+               work->cm_id = cm_id_priv;
+               INIT_LIST_HEAD(&work->list);
+               put_work(work);
+       }
+       return 0;
+}
+
+/*
+ * Save private data from incoming connection requests in the
+ * cm_id_priv so the low level driver doesn't have to.  Adjust
+ * the event ptr to point to the local copy.
+ */
+static int copy_private_data(struct iwcm_id_private *cm_id_priv,
+                      struct iw_cm_event *event)
+{
+       void *p;
+
+       p = kmalloc(event->private_data_len, GFP_ATOMIC);
+       if (!p)
+               return -ENOMEM;
+       memcpy(p, event->private_data, event->private_data_len);
+       event->private_data = p;
+       return 0;
+}
+
+/*
+ * Release a reference on cm_id. If the last reference is being removed
+ * and iw_destroy_cm_id is waiting, wake up the waiting thread.
+ */
+static int iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+       int ret = 0;
+
+       BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+       if (atomic_dec_and_test(&cm_id_priv->refcount)) {
+               BUG_ON(!list_empty(&cm_id_priv->work_list));
+               if (waitqueue_active(&cm_id_priv->destroy_comp.wait)) {
+                       BUG_ON(cm_id_priv->state != IW_CM_STATE_DESTROYING);
+                       BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY,
+                                       &cm_id_priv->flags));
+                       ret = 1;
+               }
+               complete(&cm_id_priv->destroy_comp);
+       }
+
+       return ret;
+}
+
+static void add_ref(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       atomic_inc(&cm_id_priv->refcount);
+}
+
+static void rem_ref(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       iwcm_deref_id(cm_id_priv);
+}
+
+static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event);
+
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+                                iw_cm_handler cm_handler,
+                                void *context)
+{
+       struct iwcm_id_private *cm_id_priv;
+
+       cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL);
+       if (!cm_id_priv)
+               return ERR_PTR(-ENOMEM);
+
+       cm_id_priv->state = IW_CM_STATE_IDLE;
+       cm_id_priv->id.device = device;
+       cm_id_priv->id.cm_handler = cm_handler;
+       cm_id_priv->id.context = context;
+       cm_id_priv->id.event_handler = cm_event_handler;
+       cm_id_priv->id.add_ref = add_ref;
+       cm_id_priv->id.rem_ref = rem_ref;
+       spin_lock_init(&cm_id_priv->lock);
+       atomic_set(&cm_id_priv->refcount, 1);
+       init_waitqueue_head(&cm_id_priv->connect_wait);
+       init_completion(&cm_id_priv->destroy_comp);
+       INIT_LIST_HEAD(&cm_id_priv->work_list);
+       INIT_LIST_HEAD(&cm_id_priv->work_free_list);
+
+       return &cm_id_priv->id;
+}
+EXPORT_SYMBOL(iw_create_cm_id);
+
+
+static int iwcm_modify_qp_err(struct ib_qp *qp)
+{
+       struct ib_qp_attr qp_attr;
+
+       if (!qp)
+               return -EINVAL;
+
+       qp_attr.qp_state = IB_QPS_ERR;
+       return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * This is really the RDMAC CLOSING state. It is most similar to the
+ * IB SQD QP state.
+ */
+static int iwcm_modify_qp_sqd(struct ib_qp *qp)
+{
+       struct ib_qp_attr qp_attr;
+
+       BUG_ON(qp == NULL);
+       qp_attr.qp_state = IB_QPS_SQD;
+       return ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * Block if a passive or active connection is currenlty being processed. Then
+ * process the event as follows:
+ * - If we are ESTABLISHED, move to CLOSING and modify the QP state
+ *   based on the abrupt flag
+ * - If the connection is already in the CLOSING or IDLE state, the peer is
+ *   disconnecting concurrently with us and we've already seen the
+ *   DISCONNECT event -- ignore the request and return 0
+ * - Disconnect on a listening endpoint returns -EINVAL
+ */
+int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret = 0;
+       struct ib_qp *qp = NULL;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       /* Wait if we're currently in a connect or accept downcall */
+       wait_event(cm_id_priv->connect_wait,
+                  !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_ESTABLISHED:
+               cm_id_priv->state = IW_CM_STATE_CLOSING;
+
+               /* QP could be <nul> for user-mode client */
+               if (cm_id_priv->qp)
+                       qp = cm_id_priv->qp;
+               else
+                       ret = -EINVAL;
+               break;
+       case IW_CM_STATE_LISTEN:
+               ret = -EINVAL;
+               break;
+       case IW_CM_STATE_CLOSING:
+               /* remote peer closed first */
+       case IW_CM_STATE_IDLE:
+               /* accept or connect returned !0 */
+               break;
+       case IW_CM_STATE_CONN_RECV:
+               /*
+                * App called disconnect before/without calling accept after
+                * connect_request event delivered.
+                */
+               break;
+       case IW_CM_STATE_CONN_SENT:
+               /* Can only get here if wait above fails */
+       default:
+               BUG();
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       if (qp) {
+               if (abrupt)
+                       ret = iwcm_modify_qp_err(qp);
+               else
+                       ret = iwcm_modify_qp_sqd(qp);
+
+               /*
+                * If both sides are disconnecting the QP could
+                * already be in ERR or SQD states
+                */
+               ret = 0;
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_disconnect);
+
+/*
+ * CM_ID <-- DESTROYING
+ *
+ * Clean up all resources associated with the connection and release
+ * the initial reference taken by iw_create_cm_id.
+ */
+static void destroy_cm_id(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       /*
+        * Wait if we're currently in a connect or accept downcall. A
+        * listening endpoint should never block here.
+        */
+       wait_event(cm_id_priv->connect_wait,
+                  !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags));
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_LISTEN:
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               /* destroy the listening endpoint */
+               ret = cm_id->device->iwcm->destroy_listen(cm_id);
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_ESTABLISHED:
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               /* Abrupt close of the connection */
+               (void)iwcm_modify_qp_err(cm_id_priv->qp);
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_IDLE:
+       case IW_CM_STATE_CLOSING:
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               break;
+       case IW_CM_STATE_CONN_RECV:
+               /*
+                * App called destroy before/without calling accept after
+                * receiving connection request event notification.
+                */
+               cm_id_priv->state = IW_CM_STATE_DESTROYING;
+               break;
+       case IW_CM_STATE_CONN_SENT:
+       case IW_CM_STATE_DESTROYING:
+       default:
+               BUG();
+               break;
+       }
+       if (cm_id_priv->qp) {
+               cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+               cm_id_priv->qp = NULL;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       (void)iwcm_deref_id(cm_id_priv);
+}
+
+/*
+ * This function is only called by the application thread and cannot
+ * be called by the event thread. The function will wait for all
+ * references to be released on the cm_id and then kfree the cm_id
+ * object.
+ */
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+       struct iwcm_id_private *cm_id_priv;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       BUG_ON(test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags));
+
+       destroy_cm_id(cm_id);
+
+       wait_for_completion(&cm_id_priv->destroy_comp);
+
+       dealloc_work_entries(cm_id_priv);
+
+       kfree(cm_id_priv);
+}
+EXPORT_SYMBOL(iw_destroy_cm_id);
+
+/*
+ * CM_ID <-- LISTEN
+ *
+ * Start listening for connect requests. Generates one CONNECT_REQUEST
+ * event for each inbound connect request.
+ */
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret = 0;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+       ret = alloc_work_entries(cm_id_priv, backlog);
+       if (ret)
+               return ret;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_IDLE:
+               cm_id_priv->state = IW_CM_STATE_LISTEN;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+               if (ret)
+                       cm_id_priv->state = IW_CM_STATE_IDLE;
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               break;
+       default:
+               ret = -EINVAL;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_listen);
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * Rejects an inbound connection request. No events are generated.
+ */
+int iw_cm_reject(struct iw_cm_id *cm_id,
+                const void *private_data,
+                u8 private_data_len)
+{
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+               return -EINVAL;
+       }
+       cm_id_priv->state = IW_CM_STATE_IDLE;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       ret = cm_id->device->iwcm->reject(cm_id, private_data,
+                                         private_data_len);
+
+       clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       wake_up_all(&cm_id_priv->connect_wait);
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_reject);
+
+/*
+ * CM_ID <-- ESTABLISHED
+ *
+ * Accepts an inbound connection request and generates an ESTABLISHED
+ * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block
+ * until the ESTABLISHED event is received from the provider.
+ */
+int iw_cm_accept(struct iw_cm_id *cm_id,
+                struct iw_cm_conn_param *iw_param)
+{
+       struct iwcm_id_private *cm_id_priv;
+       struct ib_qp *qp;
+       unsigned long flags;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+               return -EINVAL;
+       }
+       /* Get the ib_qp given the QPN */
+       qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+       if (!qp) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               return -EINVAL;
+       }
+       cm_id->device->iwcm->add_ref(qp);
+       cm_id_priv->qp = qp;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       ret = cm_id->device->iwcm->accept(cm_id, iw_param);
+       if (ret) {
+               /* An error on accept precludes provider events */
+               BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               if (cm_id_priv->qp) {
+                       cm_id->device->iwcm->rem_ref(qp);
+                       cm_id_priv->qp = NULL;
+               }
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_accept);
+
+/*
+ * Active Side: CM_ID <-- CONN_SENT
+ *
+ * If successful, results in the generation of a CONNECT_REPLY
+ * event. iw_cm_disconnect and iw_cm_destroy will block until the
+ * CONNECT_REPLY event is received from the provider.
+ */
+int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
+{
+       struct iwcm_id_private *cm_id_priv;
+       int ret = 0;
+       unsigned long flags;
+       struct ib_qp *qp;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+       ret = alloc_work_entries(cm_id_priv, 4);
+       if (ret)
+               return ret;
+
+       set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+       if (cm_id_priv->state != IW_CM_STATE_IDLE) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+               return -EINVAL;
+       }
+
+       /* Get the ib_qp given the QPN */
+       qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
+       if (!qp) {
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               return -EINVAL;
+       }
+       cm_id->device->iwcm->add_ref(qp);
+       cm_id_priv->qp = qp;
+       cm_id_priv->state = IW_CM_STATE_CONN_SENT;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+       if (ret) {
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               if (cm_id_priv->qp) {
+                       cm_id->device->iwcm->rem_ref(qp);
+                       cm_id_priv->qp = NULL;
+               }
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+               clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+               wake_up_all(&cm_id_priv->connect_wait);
+       }
+
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_connect);
+
+/*
+ * Passive Side: new CM_ID <-- CONN_RECV
+ *
+ * Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent.
+ *
+ * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and
+ * device. These are copied when the device is cloned. The event
+ * contains the new four tuple.
+ *
+ * An error on the child should not affect the parent, so this
+ * function does not return a value.
+ */
+static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
+                               struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       struct iw_cm_id *cm_id;
+       struct iwcm_id_private *cm_id_priv;
+       int ret;
+
+       /*
+        * The provider should never generate a connection request
+        * event with a bad status.
+        */
+       BUG_ON(iw_event->status);
+
+       /*
+        * We could be destroying the listening id. If so, ignore this
+        * upcall.
+        */
+       spin_lock_irqsave(&listen_id_priv->lock, flags);
+       if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
+               spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+               return;
+       }
+       spin_unlock_irqrestore(&listen_id_priv->lock, flags);
+
+       cm_id = iw_create_cm_id(listen_id_priv->id.device,
+                               listen_id_priv->id.cm_handler,
+                               listen_id_priv->id.context);
+       /* If the cm_id could not be created, ignore the request */
+       if (IS_ERR(cm_id))
+               return;
+
+       cm_id->provider_data = iw_event->provider_data;
+       cm_id->local_addr = iw_event->local_addr;
+       cm_id->remote_addr = iw_event->remote_addr;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       cm_id_priv->state = IW_CM_STATE_CONN_RECV;
+
+       ret = alloc_work_entries(cm_id_priv, 3);
+       if (ret) {
+               iw_cm_reject(cm_id, NULL, 0);
+               iw_destroy_cm_id(cm_id);
+               return;
+       }
+
+       /* Call the client CM handler */
+       ret = cm_id->cm_handler(cm_id, iw_event);
+       if (ret) {
+               set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+               destroy_cm_id(cm_id);
+               if (atomic_read(&cm_id_priv->refcount)==0)
+                       kfree(cm_id);
+       }
+
+       if (iw_event->private_data_len)
+               kfree(iw_event->private_data);
+}
+
+/*
+ * Passive Side: CM_ID <-- ESTABLISHED
+ *
+ * The provider generated an ESTABLISHED event which means that
+ * the MPA negotion has completed successfully and we are now in MPA
+ * FPDU mode.
+ *
+ * This event can only be received in the CONN_RECV state. If the
+ * remote peer closed, the ESTABLISHED event would be received followed
+ * by the CLOSE event. If the app closes, it will block until we wake
+ * it up after processing this event.
+ */
+static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv,
+                              struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+       /*
+        * We clear the CONNECT_WAIT bit here to allow the callback
+        * function to call iw_cm_disconnect. Calling iw_destroy_cm_id
+        * from a callback handler is not allowed.
+        */
+       clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV);
+       cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+       wake_up_all(&cm_id_priv->connect_wait);
+
+       return ret;
+}
+
+/*
+ * Active Side: CM_ID <-- ESTABLISHED
+ *
+ * The app has called connect and is waiting for the established event to
+ * post it's requests to the server. This event will wake up anyone
+ * blocked in iw_cm_disconnect or iw_destroy_id.
+ */
+static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
+                              struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       /*
+        * Clear the connect wait bit so a callback function calling
+        * iw_cm_disconnect will not wait and deadlock this thread
+        */
+       clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+       BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
+       if (iw_event->status == IW_CM_EVENT_STATUS_ACCEPTED) {
+               cm_id_priv->id.local_addr = iw_event->local_addr;
+               cm_id_priv->id.remote_addr = iw_event->remote_addr;
+               cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
+       } else {
+               /* REJECTED or RESET */
+               cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+               cm_id_priv->qp = NULL;
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+
+       if (iw_event->private_data_len)
+               kfree(iw_event->private_data);
+
+       /* Wake up waiters on connect complete */
+       wake_up_all(&cm_id_priv->connect_wait);
+
+       return ret;
+}
+
+/*
+ * CM_ID <-- CLOSING
+ *
+ * If in the ESTABLISHED state, move to CLOSING.
+ */
+static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv,
+                                 struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED)
+               cm_id_priv->state = IW_CM_STATE_CLOSING;
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * CM_ID <-- IDLE
+ *
+ * If in the ESTBLISHED or CLOSING states, the QP will have have been
+ * moved by the provider to the ERR state. Disassociate the CM_ID from
+ * the QP,  move to IDLE, and remove the 'connected' reference.
+ *
+ * If in some other state, the cm_id was destroyed asynchronously.
+ * This is the last reference that will result in waking up
+ * the app thread blocked in iw_destroy_cm_id.
+ */
+static int cm_close_handler(struct iwcm_id_private *cm_id_priv,
+                                 struct iw_cm_event *iw_event)
+{
+       unsigned long flags;
+       int ret = 0;
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+
+       if (cm_id_priv->qp) {
+               cm_id_priv->id.device->iwcm->rem_ref(cm_id_priv->qp);
+               cm_id_priv->qp = NULL;
+       }
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_ESTABLISHED:
+       case IW_CM_STATE_CLOSING:
+               cm_id_priv->state = IW_CM_STATE_IDLE;
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+               ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event);
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+               break;
+       case IW_CM_STATE_DESTROYING:
+               break;
+       default:
+               BUG();
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+       return ret;
+}
+
+static int process_event(struct iwcm_id_private *cm_id_priv,
+                        struct iw_cm_event *iw_event)
+{
+       int ret = 0;
+
+       switch (iw_event->event) {
+       case IW_CM_EVENT_CONNECT_REQUEST:
+               cm_conn_req_handler(cm_id_priv, iw_event);
+               break;
+       case IW_CM_EVENT_CONNECT_REPLY:
+               ret = cm_conn_rep_handler(cm_id_priv, iw_event);
+               break;
+       case IW_CM_EVENT_ESTABLISHED:
+               ret = cm_conn_est_handler(cm_id_priv, iw_event);
+               break;
+       case IW_CM_EVENT_DISCONNECT:
+               cm_disconnect_handler(cm_id_priv, iw_event);
+               break;
+       case IW_CM_EVENT_CLOSE:
+               ret = cm_close_handler(cm_id_priv, iw_event);
+               break;
+       default:
+               BUG();
+       }
+
+       return ret;
+}
+
+/*
+ * Process events on the work_list for the cm_id. If the callback
+ * function requests that the cm_id be deleted, a flag is set in the
+ * cm_id flags to indicate that when the last reference is
+ * removed, the cm_id is to be destroyed. This is necessary to
+ * distinguish between an object that will be destroyed by the app
+ * thread asleep on the destroy_comp list vs. an object destroyed
+ * here synchronously when the last reference is removed.
+ */
+static void cm_work_handler(void *arg)
+{
+       struct iwcm_work *work = arg, lwork;
+       struct iwcm_id_private *cm_id_priv = work->cm_id;
+       unsigned long flags;
+       int empty;
+       int ret = 0;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       empty = list_empty(&cm_id_priv->work_list);
+       while (!empty) {
+               work = list_entry(cm_id_priv->work_list.next,
+                                 struct iwcm_work, list);
+               list_del_init(&work->list);
+               empty = list_empty(&cm_id_priv->work_list);
+               lwork = *work;
+               put_work(work);
+               spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+               ret = process_event(cm_id_priv, &work->event);
+               if (ret) {
+                       set_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags);
+                       destroy_cm_id(&cm_id_priv->id);
+               }
+               BUG_ON(atomic_read(&cm_id_priv->refcount)==0);
+               if (iwcm_deref_id(cm_id_priv))
+                       return;
+
+               if (atomic_read(&cm_id_priv->refcount)==0 &&
+                   test_bit(IWCM_F_CALLBACK_DESTROY, &cm_id_priv->flags)) {
+                       dealloc_work_entries(cm_id_priv);
+                       kfree(cm_id_priv);
+                       return;
+               }
+               spin_lock_irqsave(&cm_id_priv->lock, flags);
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+}
+
+/*
+ * This function is called on interrupt context. Schedule events on
+ * the iwcm_wq thread to allow callback functions to downcall into
+ * the CM and/or block.  Events are queued to a per-CM_ID
+ * work_list. If this is the first event on the work_list, the work
+ * element is also queued on the iwcm_wq thread.
+ *
+ * Each event holds a reference on the cm_id. Until the last posted
+ * event has been delivered and processed, the cm_id cannot be
+ * deleted.
+ *
+ * Returns:
+ *           0 - the event was handled.
+ *     -ENOMEM - the event was not handled due to lack of resources.
+ */
+static int cm_event_handler(struct iw_cm_id *cm_id,
+                            struct iw_cm_event *iw_event)
+{
+       struct iwcm_work *work;
+       struct iwcm_id_private *cm_id_priv;
+       unsigned long flags;
+       int ret = 0;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       work = get_work(cm_id_priv);
+       if (!work) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       INIT_WORK(&work->work, cm_work_handler, work);
+       work->cm_id = cm_id_priv;
+       work->event = *iw_event;
+
+       if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST ||
+            work->event.event == IW_CM_EVENT_CONNECT_REPLY) &&
+           work->event.private_data_len) {
+               ret = copy_private_data(cm_id_priv, &work->event);
+               if (ret) {
+                       put_work(work);
+                       goto out;
+               }
+       }
+
+       atomic_inc(&cm_id_priv->refcount);
+       if (list_empty(&cm_id_priv->work_list)) {
+               list_add_tail(&work->list, &cm_id_priv->work_list);
+               queue_work(iwcm_wq, &work->work);
+       } else
+               list_add_tail(&work->list, &cm_id_priv->work_list);
+out:
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       return ret;
+}
+
+static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv,
+                                 struct ib_qp_attr *qp_attr,
+                                 int *qp_attr_mask)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_IDLE:
+       case IW_CM_STATE_CONN_SENT:
+       case IW_CM_STATE_CONN_RECV:
+       case IW_CM_STATE_ESTABLISHED:
+               *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
+               qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE |
+                                          IB_ACCESS_REMOTE_WRITE|
+                                          IB_ACCESS_REMOTE_READ;
+               ret = 0;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       return ret;
+}
+
+static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv,
+                                 struct ib_qp_attr *qp_attr,
+                                 int *qp_attr_mask)
+{
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&cm_id_priv->lock, flags);
+       switch (cm_id_priv->state) {
+       case IW_CM_STATE_IDLE:
+       case IW_CM_STATE_CONN_SENT:
+       case IW_CM_STATE_CONN_RECV:
+       case IW_CM_STATE_ESTABLISHED:
+               *qp_attr_mask = 0;
+               ret = 0;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+       spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+       return ret;
+}
+
+int iw_cm_init_qp_attr(struct iw_cm_id *cm_id,
+                      struct ib_qp_attr *qp_attr,
+                      int *qp_attr_mask)
+{
+       struct iwcm_id_private *cm_id_priv;
+       int ret;
+
+       cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+       switch (qp_attr->qp_state) {
+       case IB_QPS_INIT:
+       case IB_QPS_RTR:
+               ret = iwcm_init_qp_init_attr(cm_id_priv,
+                                            qp_attr, qp_attr_mask);
+               break;
+       case IB_QPS_RTS:
+               ret = iwcm_init_qp_rts_attr(cm_id_priv,
+                                           qp_attr, qp_attr_mask);
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+       return ret;
+}
+EXPORT_SYMBOL(iw_cm_init_qp_attr);
+
+static int __init iw_cm_init(void)
+{
+       iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
+       if (!iwcm_wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void __exit iw_cm_cleanup(void)
+{
+       destroy_workqueue(iwcm_wq);
+}
+
+module_init(iw_cm_init);
+module_exit(iw_cm_cleanup);
diff --git a/drivers/infiniband/core/iwcm.h b/drivers/infiniband/core/iwcm.h
new file mode 100644 (file)
index 0000000..3f6cc82
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef IWCM_H
+#define IWCM_H
+
+enum iw_cm_state {
+       IW_CM_STATE_IDLE,             /* unbound, inactive */
+       IW_CM_STATE_LISTEN,           /* listen waiting for connect */
+       IW_CM_STATE_CONN_RECV,        /* inbound waiting for user accept */
+       IW_CM_STATE_CONN_SENT,        /* outbound waiting for peer accept */
+       IW_CM_STATE_ESTABLISHED,      /* established */
+       IW_CM_STATE_CLOSING,          /* disconnect */
+       IW_CM_STATE_DESTROYING        /* object being deleted */
+};
+
+struct iwcm_id_private {
+       struct iw_cm_id id;
+       enum iw_cm_state state;
+       unsigned long flags;
+       struct ib_qp *qp;
+       struct completion destroy_comp;
+       wait_queue_head_t connect_wait;
+       struct list_head work_list;
+       spinlock_t lock;
+       atomic_t refcount;
+       struct list_head work_free_list;
+};
+
+#define IWCM_F_CALLBACK_DESTROY   1
+#define IWCM_F_CONNECT_WAIT       2
+
+#endif /* IWCM_H */
index 1c3cfbb..082f03c 100644 (file)
@@ -1246,8 +1246,8 @@ static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
        int i;
 
        for (i = 0; i < MAX_MGMT_OUI; i++)
-                /* Is there matching OUI for this vendor class ? */
-                if (!memcmp(vendor_class->oui[i], oui, 3))
+               /* Is there matching OUI for this vendor class ? */
+               if (!memcmp(vendor_class->oui[i], oui, 3))
                        return i;
 
        return -1;
@@ -2237,7 +2237,7 @@ static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
        list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
                                 &mad_agent_priv->send_list, agent_list) {
                if (mad_send_wr->status == IB_WC_SUCCESS) {
-                       mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
+                       mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
                        mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
                }
        }
@@ -2528,10 +2528,10 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
                        }
                }
                sg_list.addr = dma_map_single(qp_info->port_priv->
-                                               device->dma_device,
+                                               device->dma_device,
                                              &mad_priv->grh,
                                              sizeof *mad_priv -
-                                               sizeof mad_priv->header,
+                                               sizeof mad_priv->header,
                                              DMA_FROM_DEVICE);
                pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
                recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
@@ -2606,7 +2606,7 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
        struct ib_qp *qp;
 
        attr = kmalloc(sizeof *attr, GFP_KERNEL);
-       if (!attr) {
+       if (!attr) {
                printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
                return -ENOMEM;
        }
@@ -2876,7 +2876,10 @@ static void ib_mad_init_device(struct ib_device *device)
 {
        int start, end, i;
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                start = 0;
                end   = 0;
        } else {
@@ -2923,7 +2926,7 @@ static void ib_mad_remove_device(struct ib_device *device)
 {
        int i, num_ports, cur_port;
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                num_ports = 1;
                cur_port = 0;
        } else {
index d147f3b..d06b590 100644 (file)
@@ -38,8 +38,8 @@
 #define __IB_MAD_PRIV_H__
 
 #include <linux/completion.h>
+#include <linux/err.h>
 #include <linux/pci.h>
-#include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
index ebcd5b1..1ef79d0 100644 (file)
@@ -33,8 +33,6 @@
  * $Id: mad_rmpp.c 1921 2005-03-02 22:58:44Z sean.hefty $
  */
 
-#include <linux/dma-mapping.h>
-
 #include "mad_priv.h"
 #include "mad_rmpp.h"
 
@@ -60,6 +58,7 @@ struct mad_rmpp_recv {
        int last_ack;
        int seg_num;
        int newwin;
+       int repwin;
 
        __be64 tid;
        u32 src_qp;
@@ -170,6 +169,32 @@ static struct ib_mad_send_buf *alloc_response_msg(struct ib_mad_agent *agent,
        return msg;
 }
 
+static void ack_ds_ack(struct ib_mad_agent_private *agent,
+                      struct ib_mad_recv_wc *recv_wc)
+{
+       struct ib_mad_send_buf *msg;
+       struct ib_rmpp_mad *rmpp_mad;
+       int ret;
+
+       msg = alloc_response_msg(&agent->agent, recv_wc);
+       if (IS_ERR(msg))
+               return;
+
+       rmpp_mad = msg->mad;
+       memcpy(rmpp_mad, recv_wc->recv_buf.mad, msg->hdr_len);
+
+       rmpp_mad->mad_hdr.method ^= IB_MGMT_METHOD_RESP;
+       ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
+       rmpp_mad->rmpp_hdr.seg_num = 0;
+       rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(1);
+
+       ret = ib_post_send_mad(msg, NULL);
+       if (ret) {
+               ib_destroy_ah(msg->ah);
+               ib_free_send_mad(msg);
+       }
+}
+
 void ib_rmpp_send_handler(struct ib_mad_send_wc *mad_send_wc)
 {
        struct ib_rmpp_mad *rmpp_mad = mad_send_wc->send_buf->mad;
@@ -271,6 +296,7 @@ create_rmpp_recv(struct ib_mad_agent_private *agent,
        rmpp_recv->newwin = 1;
        rmpp_recv->seg_num = 1;
        rmpp_recv->last_ack = 0;
+       rmpp_recv->repwin = 1;
 
        mad_hdr = &mad_recv_wc->recv_buf.mad->mad_hdr;
        rmpp_recv->tid = mad_hdr->tid;
@@ -365,7 +391,7 @@ static inline int window_size(struct ib_mad_agent_private *agent)
 static struct ib_mad_recv_buf * find_seg_location(struct list_head *rmpp_list,
                                                  int seg_num)
 {
-        struct ib_mad_recv_buf *seg_buf;
+       struct ib_mad_recv_buf *seg_buf;
        int cur_seg_num;
 
        list_for_each_entry_reverse(seg_buf, rmpp_list, list) {
@@ -591,6 +617,16 @@ static inline void adjust_last_ack(struct ib_mad_send_wr_private *wr,
                        break;
 }
 
+static void process_ds_ack(struct ib_mad_agent_private *agent,
+                          struct ib_mad_recv_wc *mad_recv_wc, int newwin)
+{
+       struct mad_rmpp_recv *rmpp_recv;
+
+       rmpp_recv = find_rmpp_recv(agent, mad_recv_wc);
+       if (rmpp_recv && rmpp_recv->state == RMPP_STATE_COMPLETE)
+               rmpp_recv->repwin = newwin;
+}
+
 static void process_rmpp_ack(struct ib_mad_agent_private *agent,
                             struct ib_mad_recv_wc *mad_recv_wc)
 {
@@ -616,8 +652,18 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
 
        spin_lock_irqsave(&agent->lock, flags);
        mad_send_wr = ib_find_send_mad(agent, mad_recv_wc);
-       if (!mad_send_wr)
-               goto out;       /* Unmatched ACK */
+       if (!mad_send_wr) {
+               if (!seg_num)
+                       process_ds_ack(agent, mad_recv_wc, newwin);
+               goto out;       /* Unmatched or DS RMPP ACK */
+       }
+
+       if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) &&
+           (mad_send_wr->timeout)) {
+               spin_unlock_irqrestore(&agent->lock, flags);
+               ack_ds_ack(agent, mad_recv_wc);
+               return;         /* Repeated ACK for DS RMPP transaction */
+       }
 
        if ((mad_send_wr->last_ack == mad_send_wr->send_buf.seg_count) ||
            (!mad_send_wr->timeout) || (mad_send_wr->status != IB_WC_SUCCESS))
@@ -656,6 +702,9 @@ static void process_rmpp_ack(struct ib_mad_agent_private *agent,
                if (mad_send_wr->refcount == 1)
                        ib_reset_mad_timeout(mad_send_wr,
                                             mad_send_wr->send_buf.timeout_ms);
+               spin_unlock_irqrestore(&agent->lock, flags);
+               ack_ds_ack(agent, mad_recv_wc);
+               return;
        } else if (mad_send_wr->refcount == 1 &&
                   mad_send_wr->seg_num < mad_send_wr->newwin &&
                   mad_send_wr->seg_num < mad_send_wr->send_buf.seg_count) {
@@ -772,6 +821,39 @@ out:
        return NULL;
 }
 
+static int init_newwin(struct ib_mad_send_wr_private *mad_send_wr)
+{
+       struct ib_mad_agent_private *agent = mad_send_wr->mad_agent_priv;
+       struct ib_mad_hdr *mad_hdr = mad_send_wr->send_buf.mad;
+       struct mad_rmpp_recv *rmpp_recv;
+       struct ib_ah_attr ah_attr;
+       unsigned long flags;
+       int newwin = 1;
+
+       if (!(mad_hdr->method & IB_MGMT_METHOD_RESP))
+               goto out;
+
+       spin_lock_irqsave(&agent->lock, flags);
+       list_for_each_entry(rmpp_recv, &agent->rmpp_list, list) {
+               if (rmpp_recv->tid != mad_hdr->tid ||
+                   rmpp_recv->mgmt_class != mad_hdr->mgmt_class ||
+                   rmpp_recv->class_version != mad_hdr->class_version ||
+                   (rmpp_recv->method & IB_MGMT_METHOD_RESP))
+                       continue;
+
+               if (ib_query_ah(mad_send_wr->send_buf.ah, &ah_attr))
+                       continue;
+
+               if (rmpp_recv->slid == ah_attr.dlid) {
+                       newwin = rmpp_recv->repwin;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&agent->lock, flags);
+out:
+       return newwin;
+}
+
 int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
 {
        struct ib_rmpp_mad *rmpp_mad;
@@ -787,7 +869,7 @@ int ib_send_rmpp_mad(struct ib_mad_send_wr_private *mad_send_wr)
                return IB_RMPP_RESULT_INTERNAL;
        }
 
-       mad_send_wr->newwin = 1;
+       mad_send_wr->newwin = init_newwin(mad_send_wr);
 
        /* We need to wait for the final ACK even if there isn't a response */
        mad_send_wr->refcount += (mad_send_wr->timeout == 0);
index d6b8422..1706d3c 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
@@ -75,6 +76,7 @@ struct ib_sa_device {
 struct ib_sa_query {
        void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
        void (*release)(struct ib_sa_query *);
+       struct ib_sa_client    *client;
        struct ib_sa_port      *port;
        struct ib_mad_send_buf *mad_buf;
        struct ib_sa_sm_ah     *sm_ah;
@@ -415,6 +417,31 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
        }
 }
 
+void ib_sa_register_client(struct ib_sa_client *client)
+{
+       atomic_set(&client->users, 1);
+       init_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_register_client);
+
+static inline void ib_sa_client_get(struct ib_sa_client *client)
+{
+       atomic_inc(&client->users);
+}
+
+static inline void ib_sa_client_put(struct ib_sa_client *client)
+{
+       if (atomic_dec_and_test(&client->users))
+               complete(&client->comp);
+}
+
+void ib_sa_unregister_client(struct ib_sa_client *client)
+{
+       ib_sa_client_put(client);
+       wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(ib_sa_unregister_client);
+
 /**
  * ib_sa_cancel_query - try to cancel an SA query
  * @id:ID of query to cancel
@@ -557,6 +584,7 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
 
 /**
  * ib_sa_path_rec_get - Start a Path get query
+ * @client:SA client
  * @device:device to send query on
  * @port_num: port number to send query on
  * @rec:Path Record to send in query
@@ -579,7 +607,8 @@ static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
  * error code.  Otherwise it is a query ID that can be used to cancel
  * the query.
  */
-int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
+int ib_sa_path_rec_get(struct ib_sa_client *client,
+                      struct ib_device *device, u8 port_num,
                       struct ib_sa_path_rec *rec,
                       ib_sa_comp_mask comp_mask,
                       int timeout_ms, gfp_t gfp_mask,
@@ -614,8 +643,10 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
                goto err1;
        }
 
-       query->callback = callback;
-       query->context  = context;
+       ib_sa_client_get(client);
+       query->sa_query.client = client;
+       query->callback        = callback;
+       query->context         = context;
 
        mad = query->sa_query.mad_buf->mad;
        init_mad(mad, agent);
@@ -639,6 +670,7 @@ int ib_sa_path_rec_get(struct ib_device *device, u8 port_num,
 
 err2:
        *sa_query = NULL;
+       ib_sa_client_put(query->sa_query.client);
        ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -671,6 +703,7 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
 
 /**
  * ib_sa_service_rec_query - Start Service Record operation
+ * @client:SA client
  * @device:device to send request on
  * @port_num: port number to send request on
  * @method:SA method - should be get, set, or delete
@@ -695,7 +728,8 @@ static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
  * error code.  Otherwise it is a request ID that can be used to cancel
  * the query.
  */
-int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
+int ib_sa_service_rec_query(struct ib_sa_client *client,
+                           struct ib_device *device, u8 port_num, u8 method,
                            struct ib_sa_service_rec *rec,
                            ib_sa_comp_mask comp_mask,
                            int timeout_ms, gfp_t gfp_mask,
@@ -735,8 +769,10 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
                goto err1;
        }
 
-       query->callback = callback;
-       query->context  = context;
+       ib_sa_client_get(client);
+       query->sa_query.client = client;
+       query->callback        = callback;
+       query->context         = context;
 
        mad = query->sa_query.mad_buf->mad;
        init_mad(mad, agent);
@@ -761,6 +797,7 @@ int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, u8 method,
 
 err2:
        *sa_query = NULL;
+       ib_sa_client_put(query->sa_query.client);
        ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -791,7 +828,8 @@ static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
        kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
 }
 
-int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
+int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
+                            struct ib_device *device, u8 port_num,
                             u8 method,
                             struct ib_sa_mcmember_rec *rec,
                             ib_sa_comp_mask comp_mask,
@@ -827,8 +865,10 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
                goto err1;
        }
 
-       query->callback = callback;
-       query->context  = context;
+       ib_sa_client_get(client);
+       query->sa_query.client = client;
+       query->callback        = callback;
+       query->context         = context;
 
        mad = query->sa_query.mad_buf->mad;
        init_mad(mad, agent);
@@ -853,6 +893,7 @@ int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num,
 
 err2:
        *sa_query = NULL;
+       ib_sa_client_put(query->sa_query.client);
        ib_free_send_mad(query->sa_query.mad_buf);
 
 err1:
@@ -887,8 +928,9 @@ static void send_handler(struct ib_mad_agent *agent,
        idr_remove(&query_idr, query->id);
        spin_unlock_irqrestore(&idr_lock, flags);
 
-        ib_free_send_mad(mad_send_wc->send_buf);
+       ib_free_send_mad(mad_send_wc->send_buf);
        kref_put(&query->sm_ah->ref, free_sm_ah);
+       ib_sa_client_put(query->client);
        query->release(query);
 }
 
@@ -919,7 +961,10 @@ static void ib_sa_add_one(struct ib_device *device)
        struct ib_sa_device *sa_dev;
        int s, e, i;
 
-       if (device->node_type == IB_NODE_SWITCH)
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH)
                s = e = 0;
        else {
                s = 1;
index 35852e7..54b81e1 100644 (file)
@@ -64,7 +64,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 
                /* C14-9:2 */
                if (hop_ptr && hop_ptr < hop_cnt) {
-                       if (node_type != IB_NODE_SWITCH)
+                       if (node_type != RDMA_NODE_IB_SWITCH)
                                return 0;
 
                        /* smp->return_path set when received */
@@ -77,7 +77,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
                if (hop_ptr == hop_cnt) {
                        /* smp->return_path set when received */
                        smp->hop_ptr++;
-                       return (node_type == IB_NODE_SWITCH ||
+                       return (node_type == RDMA_NODE_IB_SWITCH ||
                                smp->dr_dlid == IB_LID_PERMISSIVE);
                }
 
@@ -95,7 +95,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
 
                /* C14-13:2 */
                if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-                       if (node_type != IB_NODE_SWITCH)
+                       if (node_type != RDMA_NODE_IB_SWITCH)
                                return 0;
 
                        smp->hop_ptr--;
@@ -107,7 +107,7 @@ int smi_handle_dr_smp_send(struct ib_smp *smp,
                if (hop_ptr == 1) {
                        smp->hop_ptr--;
                        /* C14-13:3 -- SMPs destined for SM shouldn't be here */
-                       return (node_type == IB_NODE_SWITCH ||
+                       return (node_type == RDMA_NODE_IB_SWITCH ||
                                smp->dr_slid == IB_LID_PERMISSIVE);
                }
 
@@ -142,7 +142,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 
                /* C14-9:2 -- intermediate hop */
                if (hop_ptr && hop_ptr < hop_cnt) {
-                       if (node_type != IB_NODE_SWITCH)
+                       if (node_type != RDMA_NODE_IB_SWITCH)
                                return 0;
 
                        smp->return_path[hop_ptr] = port_num;
@@ -156,7 +156,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
                                smp->return_path[hop_ptr] = port_num;
                        /* smp->hop_ptr updated when sending */
 
-                       return (node_type == IB_NODE_SWITCH ||
+                       return (node_type == RDMA_NODE_IB_SWITCH ||
                                smp->dr_dlid == IB_LID_PERMISSIVE);
                }
 
@@ -175,7 +175,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
 
                /* C14-13:2 */
                if (2 <= hop_ptr && hop_ptr <= hop_cnt) {
-                       if (node_type != IB_NODE_SWITCH)
+                       if (node_type != RDMA_NODE_IB_SWITCH)
                                return 0;
 
                        /* smp->hop_ptr updated when sending */
@@ -190,7 +190,7 @@ int smi_handle_dr_smp_recv(struct ib_smp *smp,
                                return 1;
                        }
                        /* smp->hop_ptr updated when sending */
-                       return (node_type == IB_NODE_SWITCH);
+                       return (node_type == RDMA_NODE_IB_SWITCH);
                }
 
                /* C14-13:4 -- hop_ptr = 0 -> give to SM */
index 21f9282..709323c 100644 (file)
@@ -68,7 +68,7 @@ struct port_table_attribute {
        int                     index;
 };
 
-static inline int ibdev_is_alive(const struct ib_device *dev) 
+static inline int ibdev_is_alive(const struct ib_device *dev)
 {
        return dev->reg_state == IB_DEV_REGISTERED;
 }
@@ -589,10 +589,11 @@ static ssize_t show_node_type(struct class_device *cdev, char *buf)
                return -ENODEV;
 
        switch (dev->node_type) {
-       case IB_NODE_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
-       case IB_NODE_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
-       case IB_NODE_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
-       default:             return sprintf(buf, "%d: <unknown>\n", dev->node_type);
+       case RDMA_NODE_IB_CA:     return sprintf(buf, "%d: CA\n", dev->node_type);
+       case RDMA_NODE_RNIC:      return sprintf(buf, "%d: RNIC\n", dev->node_type);
+       case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
+       case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
+       default:                  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
        }
 }
 
@@ -708,7 +709,7 @@ int ib_device_register_sysfs(struct ib_device *device)
        if (ret)
                goto err_put;
 
-       if (device->node_type == IB_NODE_SWITCH) {
+       if (device->node_type == RDMA_NODE_IB_SWITCH) {
                ret = add_port(device, 0);
                if (ret)
                        goto err_put;
index c1c6fda..ad4f4d5 100644 (file)
@@ -309,9 +309,9 @@ static int ib_ucm_event_process(struct ib_cm_event *evt,
                info          = evt->param.apr_rcvd.apr_info;
                break;
        case IB_CM_SIDR_REQ_RECEIVED:
-               uvt->resp.u.sidr_req_resp.pkey = 
+               uvt->resp.u.sidr_req_resp.pkey =
                                        evt->param.sidr_req_rcvd.pkey;
-               uvt->resp.u.sidr_req_resp.port = 
+               uvt->resp.u.sidr_req_resp.port =
                                        evt->param.sidr_req_rcvd.port;
                uvt->data_len = IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE;
                break;
@@ -1237,7 +1237,7 @@ static struct class ucm_class = {
 static ssize_t show_ibdev(struct class_device *class_dev, char *buf)
 {
        struct ib_ucm_device *dev;
-       
+
        dev = container_of(class_dev, struct ib_ucm_device, class_dev);
        return sprintf(buf, "%s\n", dev->ib_dev->name);
 }
@@ -1247,7 +1247,8 @@ static void ib_ucm_add_one(struct ib_device *device)
 {
        struct ib_ucm_device *ucm_dev;
 
-       if (!device->alloc_ucontext)
+       if (!device->alloc_ucontext ||
+           rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
                return;
 
        ucm_dev = kzalloc(sizeof *ucm_dev, GFP_KERNEL);
index 1273f88..807fbd6 100644 (file)
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc. All rights reserved. 
+ * Copyright (c) 2005 Voltaire, Inc. All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  * This software is available to you under a choice of one of two
@@ -1032,7 +1032,10 @@ static void ib_umad_add_one(struct ib_device *device)
        struct ib_umad_device *umad_dev;
        int s, e, i;
 
-       if (device->node_type == IB_NODE_SWITCH)
+       if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+               return;
+
+       if (device->node_type == RDMA_NODE_IB_SWITCH)
                s = e = 0;
        else {
                s = 1;
index 30923eb..b72c7f6 100644 (file)
@@ -155,7 +155,7 @@ static struct ib_uobject *__idr_get_uobj(struct idr *idr, int id,
 }
 
 static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
-                                       struct ib_ucontext *context)
+                                       struct ib_ucontext *context, int nested)
 {
        struct ib_uobject *uobj;
 
@@ -163,7 +163,10 @@ static struct ib_uobject *idr_read_uobj(struct idr *idr, int id,
        if (!uobj)
                return NULL;
 
-       down_read(&uobj->mutex);
+       if (nested)
+               down_read_nested(&uobj->mutex, SINGLE_DEPTH_NESTING);
+       else
+               down_read(&uobj->mutex);
        if (!uobj->live) {
                put_uobj_read(uobj);
                return NULL;
@@ -190,17 +193,18 @@ static struct ib_uobject *idr_write_uobj(struct idr *idr, int id,
        return uobj;
 }
 
-static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context)
+static void *idr_read_obj(struct idr *idr, int id, struct ib_ucontext *context,
+                         int nested)
 {
        struct ib_uobject *uobj;
 
-       uobj = idr_read_uobj(idr, id, context);
+       uobj = idr_read_uobj(idr, id, context, nested);
        return uobj ? uobj->object : NULL;
 }
 
 static struct ib_pd *idr_read_pd(int pd_handle, struct ib_ucontext *context)
 {
-       return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context);
+       return idr_read_obj(&ib_uverbs_pd_idr, pd_handle, context, 0);
 }
 
 static void put_pd_read(struct ib_pd *pd)
@@ -208,9 +212,9 @@ static void put_pd_read(struct ib_pd *pd)
        put_uobj_read(pd->uobject);
 }
 
-static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context)
+static struct ib_cq *idr_read_cq(int cq_handle, struct ib_ucontext *context, int nested)
 {
-       return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context);
+       return idr_read_obj(&ib_uverbs_cq_idr, cq_handle, context, nested);
 }
 
 static void put_cq_read(struct ib_cq *cq)
@@ -220,7 +224,7 @@ static void put_cq_read(struct ib_cq *cq)
 
 static struct ib_ah *idr_read_ah(int ah_handle, struct ib_ucontext *context)
 {
-       return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context);
+       return idr_read_obj(&ib_uverbs_ah_idr, ah_handle, context, 0);
 }
 
 static void put_ah_read(struct ib_ah *ah)
@@ -230,7 +234,7 @@ static void put_ah_read(struct ib_ah *ah)
 
 static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
 {
-       return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context);
+       return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
 }
 
 static void put_qp_read(struct ib_qp *qp)
@@ -240,7 +244,7 @@ static void put_qp_read(struct ib_qp *qp)
 
 static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
 {
-       return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context);
+       return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
 }
 
 static void put_srq_read(struct ib_srq *srq)
@@ -837,7 +841,6 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
 err_copy:
        idr_remove_uobj(&ib_uverbs_cq_idr, &obj->uobject);
 
-
 err_free:
        ib_destroy_cq(cq);
 
@@ -867,7 +870,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
                   (unsigned long) cmd.response + sizeof resp,
                   in_len - sizeof cmd, out_len - sizeof resp);
 
-       cq = idr_read_cq(cmd.cq_handle, file->ucontext);
+       cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
        if (!cq)
                return -EINVAL;
 
@@ -875,11 +878,10 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
        if (ret)
                goto out;
 
-       memset(&resp, 0, sizeof resp);
        resp.cqe = cq->cqe;
 
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
-                        &resp, sizeof resp))
+                        &resp, sizeof resp.cqe))
                ret = -EFAULT;
 
 out:
@@ -894,7 +896,6 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
 {
        struct ib_uverbs_poll_cq       cmd;
        struct ib_uverbs_poll_cq_resp *resp;
-       struct ib_uobject             *uobj;
        struct ib_cq                  *cq;
        struct ib_wc                  *wc;
        int                            ret = 0;
@@ -915,16 +916,15 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
                goto out_wc;
        }
 
-       uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-       if (!uobj) {
+       cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+       if (!cq) {
                ret = -EINVAL;
                goto out;
        }
-       cq = uobj->object;
 
        resp->count = ib_poll_cq(cq, cmd.ne, wc);
 
-       put_uobj_read(uobj);
+       put_cq_read(cq);
 
        for (i = 0; i < resp->count; i++) {
                resp->wc[i].wr_id          = wc[i].wr_id;
@@ -959,21 +959,19 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
                                int out_len)
 {
        struct ib_uverbs_req_notify_cq cmd;
-       struct ib_uobject             *uobj;
        struct ib_cq                  *cq;
 
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
-       uobj = idr_read_uobj(&ib_uverbs_cq_idr, cmd.cq_handle, file->ucontext);
-       if (!uobj)
+       cq = idr_read_cq(cmd.cq_handle, file->ucontext, 0);
+       if (!cq)
                return -EINVAL;
-       cq = uobj->object;
 
        ib_req_notify_cq(cq, cmd.solicited_only ?
                         IB_CQ_SOLICITED : IB_CQ_NEXT_COMP);
 
-       put_uobj_read(uobj);
+       put_cq_read(cq);
 
        return in_len;
 }
@@ -1064,9 +1062,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 
        srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
        pd  = idr_read_pd(cmd.pd_handle, file->ucontext);
-       scq = idr_read_cq(cmd.send_cq_handle, file->ucontext);
+       scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
        rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
-               scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext);
+               scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
 
        if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
                ret = -EINVAL;
@@ -1274,6 +1272,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
                            int out_len)
 {
        struct ib_uverbs_modify_qp cmd;
+       struct ib_udata            udata;
        struct ib_qp              *qp;
        struct ib_qp_attr         *attr;
        int                        ret;
@@ -1281,6 +1280,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
+       INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+                  out_len);
+
        attr = kmalloc(sizeof *attr, GFP_KERNEL);
        if (!attr)
                return -ENOMEM;
@@ -1337,7 +1339,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
        attr->alt_ah_attr.ah_flags          = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
        attr->alt_ah_attr.port_num          = cmd.alt_dest.port_num;
 
-       ret = ib_modify_qp(qp, attr, cmd.attr_mask);
+       ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
 
        put_qp_read(qp);
 
@@ -1674,7 +1676,6 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
                                break;
                }
 
-
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp))
                ret = -EFAULT;
@@ -1724,7 +1725,6 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
                                break;
                }
 
-
        if (copy_to_user((void __user *) (unsigned long) cmd.response,
                         &resp, sizeof resp))
                ret = -EFAULT;
@@ -2055,6 +2055,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
                             int out_len)
 {
        struct ib_uverbs_modify_srq cmd;
+       struct ib_udata             udata;
        struct ib_srq              *srq;
        struct ib_srq_attr          attr;
        int                         ret;
@@ -2062,6 +2063,9 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
        if (copy_from_user(&cmd, buf, sizeof cmd))
                return -EFAULT;
 
+       INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
+                  out_len);
+
        srq = idr_read_srq(cmd.srq_handle, file->ucontext);
        if (!srq)
                return -EINVAL;
@@ -2069,7 +2073,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
        attr.max_wr    = cmd.max_wr;
        attr.srq_limit = cmd.srq_limit;
 
-       ret = ib_modify_srq(srq, &attr, cmd.attr_mask);
+       ret = srq->device->modify_srq(srq, &attr, cmd.attr_mask, &udata);
 
        put_srq_read(srq);
 
index 468999c..8b5dd36 100644 (file)
@@ -79,6 +79,23 @@ enum ib_rate mult_to_ib_rate(int mult)
 }
 EXPORT_SYMBOL(mult_to_ib_rate);
 
+enum rdma_transport_type
+rdma_node_get_transport(enum rdma_node_type node_type)
+{
+       switch (node_type) {
+       case RDMA_NODE_IB_CA:
+       case RDMA_NODE_IB_SWITCH:
+       case RDMA_NODE_IB_ROUTER:
+               return RDMA_TRANSPORT_IB;
+       case RDMA_NODE_RNIC:
+               return RDMA_TRANSPORT_IWARP;
+       default:
+               BUG();
+               return 0;
+       }
+}
+EXPORT_SYMBOL(rdma_node_get_transport);
+
 /* Protection domains */
 
 struct ib_pd *ib_alloc_pd(struct ib_device *device)
@@ -231,7 +248,7 @@ int ib_modify_srq(struct ib_srq *srq,
                  struct ib_srq_attr *srq_attr,
                  enum ib_srq_attr_mask srq_attr_mask)
 {
-       return srq->device->modify_srq(srq, srq_attr, srq_attr_mask);
+       return srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_srq);
 
@@ -547,7 +564,7 @@ int ib_modify_qp(struct ib_qp *qp,
                 struct ib_qp_attr *qp_attr,
                 int qp_attr_mask)
 {
-       return qp->device->modify_qp(qp, qp_attr, qp_attr_mask);
+       return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
 }
 EXPORT_SYMBOL(ib_modify_qp);
 
diff --git a/drivers/infiniband/hw/amso1100/Kbuild b/drivers/infiniband/hw/amso1100/Kbuild
new file mode 100644 (file)
index 0000000..06964c4
--- /dev/null
@@ -0,0 +1,8 @@
+ifdef CONFIG_INFINIBAND_AMSO1100_DEBUG
+EXTRA_CFLAGS += -DDEBUG
+endif
+
+obj-$(CONFIG_INFINIBAND_AMSO1100) += iw_c2.o
+
+iw_c2-y := c2.o c2_provider.o c2_rnic.o c2_alloc.o c2_mq.o c2_ae.o c2_vq.o \
+       c2_intr.o c2_cq.o c2_qp.o c2_cm.o c2_mm.o c2_pd.o
diff --git a/drivers/infiniband/hw/amso1100/Kconfig b/drivers/infiniband/hw/amso1100/Kconfig
new file mode 100644 (file)
index 0000000..809cb14
--- /dev/null
@@ -0,0 +1,15 @@
+config INFINIBAND_AMSO1100
+       tristate "Ammasso 1100 HCA support"
+       depends on PCI && INET && INFINIBAND
+       ---help---
+         This is a low-level driver for the Ammasso 1100 host
+         channel adapter (HCA).
+
+config INFINIBAND_AMSO1100_DEBUG
+       bool "Verbose debugging output"
+       depends on INFINIBAND_AMSO1100
+       default n
+       ---help---
+         This option causes the amso1100 driver to produce a bunch of
+         debug messages.  Select this if you are developing the driver
+         or trying to diagnose a problem.
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
new file mode 100644 (file)
index 0000000..9e9120f
--- /dev/null
@@ -0,0 +1,1255 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/delay.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/init.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+
+#include <rdma/ib_smi.h>
+#include "c2.h"
+#include "c2_provider.h"
+
+MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
+MODULE_DESCRIPTION("Ammasso AMSO1100 Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK
+    | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN;
+
+static int debug = -1;         /* defaults above */
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+static int c2_up(struct net_device *netdev);
+static int c2_down(struct net_device *netdev);
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+static void c2_tx_interrupt(struct net_device *netdev);
+static void c2_rx_interrupt(struct net_device *netdev);
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs);
+static void c2_tx_timeout(struct net_device *netdev);
+static int c2_change_mtu(struct net_device *netdev, int new_mtu);
+static void c2_reset(struct c2_port *c2_port);
+static struct net_device_stats *c2_get_stats(struct net_device *netdev);
+
+static struct pci_device_id c2_pci_table[] = {
+       { PCI_DEVICE(0x18b8, 0xb001) },
+       { 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, c2_pci_table);
+
+static void c2_print_macaddr(struct net_device *netdev)
+{
+       pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
+               "IRQ %u\n", netdev->name,
+               netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+               netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
+               netdev->irq);
+}
+
+static void c2_set_rxbufsize(struct c2_port *c2_port)
+{
+       struct net_device *netdev = c2_port->netdev;
+
+       if (netdev->mtu > RX_BUF_SIZE)
+               c2_port->rx_buf_size =
+                   netdev->mtu + ETH_HLEN + sizeof(struct c2_rxp_hdr) +
+                   NET_IP_ALIGN;
+       else
+               c2_port->rx_buf_size = sizeof(struct c2_rxp_hdr) + RX_BUF_SIZE;
+}
+
+/*
+ * Allocate TX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_tx_ring_alloc(struct c2_ring *tx_ring, void *vaddr,
+                           dma_addr_t base, void __iomem * mmio_txp_ring)
+{
+       struct c2_tx_desc *tx_desc;
+       struct c2_txp_desc __iomem *txp_desc;
+       struct c2_element *elem;
+       int i;
+
+       tx_ring->start = kmalloc(sizeof(*elem) * tx_ring->count, GFP_KERNEL);
+       if (!tx_ring->start)
+               return -ENOMEM;
+
+       elem = tx_ring->start;
+       tx_desc = vaddr;
+       txp_desc = mmio_txp_ring;
+       for (i = 0; i < tx_ring->count; i++, elem++, tx_desc++, txp_desc++) {
+               tx_desc->len = 0;
+               tx_desc->status = 0;
+
+               /* Set TXP_HTXD_UNINIT */
+               __raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+                            (void __iomem *) txp_desc + C2_TXP_ADDR);
+               __raw_writew(0, (void __iomem *) txp_desc + C2_TXP_LEN);
+               __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+                            (void __iomem *) txp_desc + C2_TXP_FLAGS);
+
+               elem->skb = NULL;
+               elem->ht_desc = tx_desc;
+               elem->hw_desc = txp_desc;
+
+               if (i == tx_ring->count - 1) {
+                       elem->next = tx_ring->start;
+                       tx_desc->next_offset = base;
+               } else {
+                       elem->next = elem + 1;
+                       tx_desc->next_offset =
+                           base + (i + 1) * sizeof(*tx_desc);
+               }
+       }
+
+       tx_ring->to_use = tx_ring->to_clean = tx_ring->start;
+
+       return 0;
+}
+
+/*
+ * Allocate RX ring elements and chain them together.
+ * One-to-one association of adapter descriptors with ring elements.
+ */
+static int c2_rx_ring_alloc(struct c2_ring *rx_ring, void *vaddr,
+                           dma_addr_t base, void __iomem * mmio_rxp_ring)
+{
+       struct c2_rx_desc *rx_desc;
+       struct c2_rxp_desc __iomem *rxp_desc;
+       struct c2_element *elem;
+       int i;
+
+       rx_ring->start = kmalloc(sizeof(*elem) * rx_ring->count, GFP_KERNEL);
+       if (!rx_ring->start)
+               return -ENOMEM;
+
+       elem = rx_ring->start;
+       rx_desc = vaddr;
+       rxp_desc = mmio_rxp_ring;
+       for (i = 0; i < rx_ring->count; i++, elem++, rx_desc++, rxp_desc++) {
+               rx_desc->len = 0;
+               rx_desc->status = 0;
+
+               /* Set RXP_HRXD_UNINIT */
+               __raw_writew(cpu_to_be16(RXP_HRXD_OK),
+                      (void __iomem *) rxp_desc + C2_RXP_STATUS);
+               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_COUNT);
+               __raw_writew(0, (void __iomem *) rxp_desc + C2_RXP_LEN);
+               __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+                            (void __iomem *) rxp_desc + C2_RXP_ADDR);
+               __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+                            (void __iomem *) rxp_desc + C2_RXP_FLAGS);
+
+               elem->skb = NULL;
+               elem->ht_desc = rx_desc;
+               elem->hw_desc = rxp_desc;
+
+               if (i == rx_ring->count - 1) {
+                       elem->next = rx_ring->start;
+                       rx_desc->next_offset = base;
+               } else {
+                       elem->next = elem + 1;
+                       rx_desc->next_offset =
+                           base + (i + 1) * sizeof(*rx_desc);
+               }
+       }
+
+       rx_ring->to_use = rx_ring->to_clean = rx_ring->start;
+
+       return 0;
+}
+
+/* Setup buffer for receiving */
+static inline int c2_rx_alloc(struct c2_port *c2_port, struct c2_element *elem)
+{
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_rx_desc *rx_desc = elem->ht_desc;
+       struct sk_buff *skb;
+       dma_addr_t mapaddr;
+       u32 maplen;
+       struct c2_rxp_hdr *rxp_hdr;
+
+       skb = dev_alloc_skb(c2_port->rx_buf_size);
+       if (unlikely(!skb)) {
+               pr_debug("%s: out of memory for receive\n",
+                       c2_port->netdev->name);
+               return -ENOMEM;
+       }
+
+       /* Zero out the rxp hdr in the sk_buff */
+       memset(skb->data, 0, sizeof(*rxp_hdr));
+
+       skb->dev = c2_port->netdev;
+
+       maplen = c2_port->rx_buf_size;
+       mapaddr =
+           pci_map_single(c2dev->pcidev, skb->data, maplen,
+                          PCI_DMA_FROMDEVICE);
+
+       /* Set the sk_buff RXP_header to RXP_HRXD_READY */
+       rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+       rxp_hdr->flags = RXP_HRXD_READY;
+
+       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+       __raw_writew(cpu_to_be16((u16) maplen - sizeof(*rxp_hdr)),
+                    elem->hw_desc + C2_RXP_LEN);
+       __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_RXP_ADDR);
+       __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+       elem->skb = skb;
+       elem->mapaddr = mapaddr;
+       elem->maplen = maplen;
+       rx_desc->len = maplen;
+
+       return 0;
+}
+
+/*
+ * Allocate buffers for the Rx ring
+ * For receive:  rx_ring.to_clean is next received frame
+ */
+static int c2_rx_fill(struct c2_port *c2_port)
+{
+       struct c2_ring *rx_ring = &c2_port->rx_ring;
+       struct c2_element *elem;
+       int ret = 0;
+
+       elem = rx_ring->start;
+       do {
+               if (c2_rx_alloc(c2_port, elem)) {
+                       ret = 1;
+                       break;
+               }
+       } while ((elem = elem->next) != rx_ring->start);
+
+       rx_ring->to_clean = rx_ring->start;
+       return ret;
+}
+
+/* Free all buffers in RX ring, assumes receiver stopped */
+static void c2_rx_clean(struct c2_port *c2_port)
+{
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_ring *rx_ring = &c2_port->rx_ring;
+       struct c2_element *elem;
+       struct c2_rx_desc *rx_desc;
+
+       elem = rx_ring->start;
+       do {
+               rx_desc = elem->ht_desc;
+               rx_desc->len = 0;
+
+               __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+               __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+               __raw_writew(0, elem->hw_desc + C2_RXP_LEN);
+               __raw_writeq(cpu_to_be64(0x99aabbccddeeffULL),
+                            elem->hw_desc + C2_RXP_ADDR);
+               __raw_writew(cpu_to_be16(RXP_HRXD_UNINIT),
+                            elem->hw_desc + C2_RXP_FLAGS);
+
+               if (elem->skb) {
+                       pci_unmap_single(c2dev->pcidev, elem->mapaddr,
+                                        elem->maplen, PCI_DMA_FROMDEVICE);
+                       dev_kfree_skb(elem->skb);
+                       elem->skb = NULL;
+               }
+       } while ((elem = elem->next) != rx_ring->start);
+}
+
+static inline int c2_tx_free(struct c2_dev *c2dev, struct c2_element *elem)
+{
+       struct c2_tx_desc *tx_desc = elem->ht_desc;
+
+       tx_desc->len = 0;
+
+       pci_unmap_single(c2dev->pcidev, elem->mapaddr, elem->maplen,
+                        PCI_DMA_TODEVICE);
+
+       if (elem->skb) {
+               dev_kfree_skb_any(elem->skb);
+               elem->skb = NULL;
+       }
+
+       return 0;
+}
+
+/* Free all buffers in TX ring, assumes transmitter stopped */
+static void c2_tx_clean(struct c2_port *c2_port)
+{
+       struct c2_ring *tx_ring = &c2_port->tx_ring;
+       struct c2_element *elem;
+       struct c2_txp_desc txp_htxd;
+       int retry;
+       unsigned long flags;
+
+       spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+       elem = tx_ring->start;
+
+       do {
+               retry = 0;
+               do {
+                       txp_htxd.flags =
+                           readw(elem->hw_desc + C2_TXP_FLAGS);
+
+                       if (txp_htxd.flags == TXP_HTXD_READY) {
+                               retry = 1;
+                               __raw_writew(0,
+                                            elem->hw_desc + C2_TXP_LEN);
+                               __raw_writeq(0,
+                                            elem->hw_desc + C2_TXP_ADDR);
+                               __raw_writew(cpu_to_be16(TXP_HTXD_DONE),
+                                            elem->hw_desc + C2_TXP_FLAGS);
+                               c2_port->netstats.tx_dropped++;
+                               break;
+                       } else {
+                               __raw_writew(0,
+                                            elem->hw_desc + C2_TXP_LEN);
+                               __raw_writeq(cpu_to_be64(0x1122334455667788ULL),
+                                            elem->hw_desc + C2_TXP_ADDR);
+                               __raw_writew(cpu_to_be16(TXP_HTXD_UNINIT),
+                                            elem->hw_desc + C2_TXP_FLAGS);
+                       }
+
+                       c2_tx_free(c2_port->c2dev, elem);
+
+               } while ((elem = elem->next) != tx_ring->start);
+       } while (retry);
+
+       c2_port->tx_avail = c2_port->tx_ring.count - 1;
+       c2_port->c2dev->cur_tx = tx_ring->to_use - tx_ring->start;
+
+       if (c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+               netif_wake_queue(c2_port->netdev);
+
+       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+}
+
+/*
+ * Process transmit descriptors marked 'DONE' by the firmware,
+ * freeing up their unneeded sk_buffs.
+ */
+static void c2_tx_interrupt(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_ring *tx_ring = &c2_port->tx_ring;
+       struct c2_element *elem;
+       struct c2_txp_desc txp_htxd;
+
+       spin_lock(&c2_port->tx_lock);
+
+       for (elem = tx_ring->to_clean; elem != tx_ring->to_use;
+            elem = elem->next) {
+               txp_htxd.flags =
+                   be16_to_cpu(readw(elem->hw_desc + C2_TXP_FLAGS));
+
+               if (txp_htxd.flags != TXP_HTXD_DONE)
+                       break;
+
+               if (netif_msg_tx_done(c2_port)) {
+                       /* PCI reads are expensive in fast path */
+                       txp_htxd.len =
+                           be16_to_cpu(readw(elem->hw_desc + C2_TXP_LEN));
+                       pr_debug("%s: tx done slot %3Zu status 0x%x len "
+                               "%5u bytes\n",
+                               netdev->name, elem - tx_ring->start,
+                               txp_htxd.flags, txp_htxd.len);
+               }
+
+               c2_tx_free(c2dev, elem);
+               ++(c2_port->tx_avail);
+       }
+
+       tx_ring->to_clean = elem;
+
+       if (netif_queue_stopped(netdev)
+           && c2_port->tx_avail > MAX_SKB_FRAGS + 1)
+               netif_wake_queue(netdev);
+
+       spin_unlock(&c2_port->tx_lock);
+}
+
+static void c2_rx_error(struct c2_port *c2_port, struct c2_element *elem)
+{
+       struct c2_rx_desc *rx_desc = elem->ht_desc;
+       struct c2_rxp_hdr *rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+
+       if (rxp_hdr->status != RXP_HRXD_OK ||
+           rxp_hdr->len > (rx_desc->len - sizeof(*rxp_hdr))) {
+               pr_debug("BAD RXP_HRXD\n");
+               pr_debug("  rx_desc : %p\n", rx_desc);
+               pr_debug("    index : %Zu\n",
+                       elem - c2_port->rx_ring.start);
+               pr_debug("    len   : %u\n", rx_desc->len);
+               pr_debug("  rxp_hdr : %p [PA %p]\n", rxp_hdr,
+                       (void *) __pa((unsigned long) rxp_hdr));
+               pr_debug("    flags : 0x%x\n", rxp_hdr->flags);
+               pr_debug("    status: 0x%x\n", rxp_hdr->status);
+               pr_debug("    len   : %u\n", rxp_hdr->len);
+               pr_debug("    rsvd  : 0x%x\n", rxp_hdr->rsvd);
+       }
+
+       /* Setup the skb for reuse since we're dropping this pkt */
+       elem->skb->tail = elem->skb->data = elem->skb->head;
+
+       /* Zero out the rxp hdr in the sk_buff */
+       memset(elem->skb->data, 0, sizeof(*rxp_hdr));
+
+       /* Write the descriptor to the adapter's rx ring */
+       __raw_writew(0, elem->hw_desc + C2_RXP_STATUS);
+       __raw_writew(0, elem->hw_desc + C2_RXP_COUNT);
+       __raw_writew(cpu_to_be16((u16) elem->maplen - sizeof(*rxp_hdr)),
+                    elem->hw_desc + C2_RXP_LEN);
+       __raw_writeq(cpu_to_be64(elem->mapaddr), elem->hw_desc + C2_RXP_ADDR);
+       __raw_writew(cpu_to_be16(RXP_HRXD_READY), elem->hw_desc + C2_RXP_FLAGS);
+
+       pr_debug("packet dropped\n");
+       c2_port->netstats.rx_dropped++;
+}
+
+static void c2_rx_interrupt(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_ring *rx_ring = &c2_port->rx_ring;
+       struct c2_element *elem;
+       struct c2_rx_desc *rx_desc;
+       struct c2_rxp_hdr *rxp_hdr;
+       struct sk_buff *skb;
+       dma_addr_t mapaddr;
+       u32 maplen, buflen;
+       unsigned long flags;
+
+       spin_lock_irqsave(&c2dev->lock, flags);
+
+       /* Begin where we left off */
+       rx_ring->to_clean = rx_ring->start + c2dev->cur_rx;
+
+       for (elem = rx_ring->to_clean; elem->next != rx_ring->to_clean;
+            elem = elem->next) {
+               rx_desc = elem->ht_desc;
+               mapaddr = elem->mapaddr;
+               maplen = elem->maplen;
+               skb = elem->skb;
+               rxp_hdr = (struct c2_rxp_hdr *) skb->data;
+
+               if (rxp_hdr->flags != RXP_HRXD_DONE)
+                       break;
+               buflen = rxp_hdr->len;
+
+               /* Sanity check the RXP header */
+               if (rxp_hdr->status != RXP_HRXD_OK ||
+                   buflen > (rx_desc->len - sizeof(*rxp_hdr))) {
+                       c2_rx_error(c2_port, elem);
+                       continue;
+               }
+
+               /*
+                * Allocate and map a new skb for replenishing the host
+                * RX desc
+                */
+               if (c2_rx_alloc(c2_port, elem)) {
+                       c2_rx_error(c2_port, elem);
+                       continue;
+               }
+
+               /* Unmap the old skb */
+               pci_unmap_single(c2dev->pcidev, mapaddr, maplen,
+                                PCI_DMA_FROMDEVICE);
+
+               prefetch(skb->data);
+
+               /*
+                * Skip past the leading 8 bytes comprising of the
+                * "struct c2_rxp_hdr", prepended by the adapter
+                * to the usual Ethernet header ("struct ethhdr"),
+                * to the start of the raw Ethernet packet.
+                *
+                * Fix up the various fields in the sk_buff before
+                * passing it up to netif_rx(). The transfer size
+                * (in bytes) specified by the adapter len field of
+                * the "struct rxp_hdr_t" does NOT include the
+                * "sizeof(struct c2_rxp_hdr)".
+                */
+               skb->data += sizeof(*rxp_hdr);
+               skb->tail = skb->data + buflen;
+               skb->len = buflen;
+               skb->dev = netdev;
+               skb->protocol = eth_type_trans(skb, netdev);
+
+               netif_rx(skb);
+
+               netdev->last_rx = jiffies;
+               c2_port->netstats.rx_packets++;
+               c2_port->netstats.rx_bytes += buflen;
+       }
+
+       /* Save where we left off */
+       rx_ring->to_clean = elem;
+       c2dev->cur_rx = elem - rx_ring->start;
+       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+       spin_unlock_irqrestore(&c2dev->lock, flags);
+}
+
+/*
+ * Handle netisr0 TX & RX interrupts.
+ */
+static irqreturn_t c2_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+{
+       unsigned int netisr0, dmaisr;
+       int handled = 0;
+       struct c2_dev *c2dev = (struct c2_dev *) dev_id;
+
+       /* Process CCILNET interrupts */
+       netisr0 = readl(c2dev->regs + C2_NISR0);
+       if (netisr0) {
+
+               /*
+                * There is an issue with the firmware that always
+                * provides the status of RX for both TX & RX
+                * interrupts.  So process both queues here.
+                */
+               c2_rx_interrupt(c2dev->netdev);
+               c2_tx_interrupt(c2dev->netdev);
+
+               /* Clear the interrupt */
+               writel(netisr0, c2dev->regs + C2_NISR0);
+               handled++;
+       }
+
+       /* Process RNIC interrupts */
+       dmaisr = readl(c2dev->regs + C2_DISR);
+       if (dmaisr) {
+               writel(dmaisr, c2dev->regs + C2_DISR);
+               c2_rnic_interrupt(c2dev);
+               handled++;
+       }
+
+       if (handled) {
+               return IRQ_HANDLED;
+       } else {
+               return IRQ_NONE;
+       }
+}
+
+static int c2_up(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_element *elem;
+       struct c2_rxp_hdr *rxp_hdr;
+       struct in_device *in_dev;
+       size_t rx_size, tx_size;
+       int ret, i;
+       unsigned int netimr0;
+
+       if (netif_msg_ifup(c2_port))
+               pr_debug("%s: enabling interface\n", netdev->name);
+
+       /* Set the Rx buffer size based on MTU */
+       c2_set_rxbufsize(c2_port);
+
+       /* Allocate DMA'able memory for Tx/Rx host descriptor rings */
+       rx_size = c2_port->rx_ring.count * sizeof(struct c2_rx_desc);
+       tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
+
+       c2_port->mem_size = tx_size + rx_size;
+       c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
+                                           &c2_port->dma);
+       if (c2_port->mem == NULL) {
+               pr_debug("Unable to allocate memory for "
+                       "host descriptor rings\n");
+               return -ENOMEM;
+       }
+
+       memset(c2_port->mem, 0, c2_port->mem_size);
+
+       /* Create the Rx host descriptor ring */
+       if ((ret =
+            c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
+                             c2dev->mmio_rxp_ring))) {
+               pr_debug("Unable to create RX ring\n");
+               goto bail0;
+       }
+
+       /* Allocate Rx buffers for the host descriptor ring */
+       if (c2_rx_fill(c2_port)) {
+               pr_debug("Unable to fill RX ring\n");
+               goto bail1;
+       }
+
+       /* Create the Tx host descriptor ring */
+       if ((ret = c2_tx_ring_alloc(&c2_port->tx_ring, c2_port->mem + rx_size,
+                                   c2_port->dma + rx_size,
+                                   c2dev->mmio_txp_ring))) {
+               pr_debug("Unable to create TX ring\n");
+               goto bail1;
+       }
+
+       /* Set the TX pointer to where we left off */
+       c2_port->tx_avail = c2_port->tx_ring.count - 1;
+       c2_port->tx_ring.to_use = c2_port->tx_ring.to_clean =
+           c2_port->tx_ring.start + c2dev->cur_tx;
+
+       /* missing: Initialize MAC */
+
+       BUG_ON(c2_port->tx_ring.to_use != c2_port->tx_ring.to_clean);
+
+       /* Reset the adapter, ensures the driver is in sync with the RXP */
+       c2_reset(c2_port);
+
+       /* Reset the READY bit in the sk_buff RXP headers & adapter HRXDQ */
+       for (i = 0, elem = c2_port->rx_ring.start; i < c2_port->rx_ring.count;
+            i++, elem++) {
+               rxp_hdr = (struct c2_rxp_hdr *) elem->skb->data;
+               rxp_hdr->flags = 0;
+               __raw_writew(cpu_to_be16(RXP_HRXD_READY),
+                            elem->hw_desc + C2_RXP_FLAGS);
+       }
+
+       /* Enable network packets */
+       netif_start_queue(netdev);
+
+       /* Enable IRQ */
+       writel(0, c2dev->regs + C2_IDIS);
+       netimr0 = readl(c2dev->regs + C2_NIMR0);
+       netimr0 &= ~(C2_PCI_HTX_INT | C2_PCI_HRX_INT);
+       writel(netimr0, c2dev->regs + C2_NIMR0);
+
+       /* Tell the stack to ignore arp requests for ipaddrs bound to
+        * other interfaces.  This is needed to prevent the host stack
+        * from responding to arp requests to the ipaddr bound on the
+        * rdma interface.
+        */
+       in_dev = in_dev_get(netdev);
+       in_dev->cnf.arp_ignore = 1;
+       in_dev_put(in_dev);
+
+       return 0;
+
+      bail1:
+       c2_rx_clean(c2_port);
+       kfree(c2_port->rx_ring.start);
+
+      bail0:
+       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+                           c2_port->dma);
+
+       return ret;
+}
+
+static int c2_down(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+
+       if (netif_msg_ifdown(c2_port))
+               pr_debug("%s: disabling interface\n",
+                       netdev->name);
+
+       /* Wait for all the queued packets to get sent */
+       c2_tx_interrupt(netdev);
+
+       /* Disable network packets */
+       netif_stop_queue(netdev);
+
+       /* Disable IRQs by clearing the interrupt mask */
+       writel(1, c2dev->regs + C2_IDIS);
+       writel(0, c2dev->regs + C2_NIMR0);
+
+       /* missing: Stop transmitter */
+
+       /* missing: Stop receiver */
+
+       /* Reset the adapter, ensures the driver is in sync with the RXP */
+       c2_reset(c2_port);
+
+       /* missing: Turn off LEDs here */
+
+       /* Free all buffers in the host descriptor rings */
+       c2_tx_clean(c2_port);
+       c2_rx_clean(c2_port);
+
+       /* Free the host descriptor rings */
+       kfree(c2_port->rx_ring.start);
+       kfree(c2_port->tx_ring.start);
+       pci_free_consistent(c2dev->pcidev, c2_port->mem_size, c2_port->mem,
+                           c2_port->dma);
+
+       return 0;
+}
+
+static void c2_reset(struct c2_port *c2_port)
+{
+       struct c2_dev *c2dev = c2_port->c2dev;
+       unsigned int cur_rx = c2dev->cur_rx;
+
+       /* Tell the hardware to quiesce */
+       C2_SET_CUR_RX(c2dev, cur_rx | C2_PCI_HRX_QUI);
+
+       /*
+        * The hardware will reset the C2_PCI_HRX_QUI bit once
+        * the RXP is quiesced.  Wait 2 seconds for this.
+        */
+       ssleep(2);
+
+       cur_rx = C2_GET_CUR_RX(c2dev);
+
+       if (cur_rx & C2_PCI_HRX_QUI)
+               pr_debug("c2_reset: failed to quiesce the hardware!\n");
+
+       cur_rx &= ~C2_PCI_HRX_QUI;
+
+       c2dev->cur_rx = cur_rx;
+
+       pr_debug("Current RX: %u\n", c2dev->cur_rx);
+}
+
+static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+       struct c2_dev *c2dev = c2_port->c2dev;
+       struct c2_ring *tx_ring = &c2_port->tx_ring;
+       struct c2_element *elem;
+       dma_addr_t mapaddr;
+       u32 maplen;
+       unsigned long flags;
+       unsigned int i;
+
+       spin_lock_irqsave(&c2_port->tx_lock, flags);
+
+       if (unlikely(c2_port->tx_avail < (skb_shinfo(skb)->nr_frags + 1))) {
+               netif_stop_queue(netdev);
+               spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+               pr_debug("%s: Tx ring full when queue awake!\n",
+                       netdev->name);
+               return NETDEV_TX_BUSY;
+       }
+
+       maplen = skb_headlen(skb);
+       mapaddr =
+           pci_map_single(c2dev->pcidev, skb->data, maplen, PCI_DMA_TODEVICE);
+
+       elem = tx_ring->to_use;
+       elem->skb = skb;
+       elem->mapaddr = mapaddr;
+       elem->maplen = maplen;
+
+       /* Tell HW to xmit */
+       __raw_writeq(cpu_to_be64(mapaddr), elem->hw_desc + C2_TXP_ADDR);
+       __raw_writew(cpu_to_be16(maplen), elem->hw_desc + C2_TXP_LEN);
+       __raw_writew(cpu_to_be16(TXP_HTXD_READY), elem->hw_desc + C2_TXP_FLAGS);
+
+       c2_port->netstats.tx_packets++;
+       c2_port->netstats.tx_bytes += maplen;
+
+       /* Loop thru additional data fragments and queue them */
+       if (skb_shinfo(skb)->nr_frags) {
+               for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+                       maplen = frag->size;
+                       mapaddr =
+                           pci_map_page(c2dev->pcidev, frag->page,
+                                        frag->page_offset, maplen,
+                                        PCI_DMA_TODEVICE);
+
+                       elem = elem->next;
+                       elem->skb = NULL;
+                       elem->mapaddr = mapaddr;
+                       elem->maplen = maplen;
+
+                       /* Tell HW to xmit */
+                       __raw_writeq(cpu_to_be64(mapaddr),
+                                    elem->hw_desc + C2_TXP_ADDR);
+                       __raw_writew(cpu_to_be16(maplen),
+                                    elem->hw_desc + C2_TXP_LEN);
+                       __raw_writew(cpu_to_be16(TXP_HTXD_READY),
+                                    elem->hw_desc + C2_TXP_FLAGS);
+
+                       c2_port->netstats.tx_packets++;
+                       c2_port->netstats.tx_bytes += maplen;
+               }
+       }
+
+       tx_ring->to_use = elem->next;
+       c2_port->tx_avail -= (skb_shinfo(skb)->nr_frags + 1);
+
+       if (c2_port->tx_avail <= MAX_SKB_FRAGS + 1) {
+               netif_stop_queue(netdev);
+               if (netif_msg_tx_queued(c2_port))
+                       pr_debug("%s: transmit queue full\n",
+                               netdev->name);
+       }
+
+       spin_unlock_irqrestore(&c2_port->tx_lock, flags);
+
+       netdev->trans_start = jiffies;
+
+       return NETDEV_TX_OK;
+}
+
+static struct net_device_stats *c2_get_stats(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+
+       return &c2_port->netstats;
+}
+
+static void c2_tx_timeout(struct net_device *netdev)
+{
+       struct c2_port *c2_port = netdev_priv(netdev);
+
+       if (netif_msg_timer(c2_port))
+               pr_debug("%s: tx timeout\n", netdev->name);
+
+       c2_tx_clean(c2_port);
+}
+
+static int c2_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       int ret = 0;
+
+       if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
+               return -EINVAL;
+
+       netdev->mtu = new_mtu;
+
+       if (netif_running(netdev)) {
+               c2_down(netdev);
+
+               c2_up(netdev);
+       }
+
+       return ret;
+}
+
+/* Initialize network device */
+static struct net_device *c2_devinit(struct c2_dev *c2dev,
+                                    void __iomem * mmio_addr)
+{
+       struct c2_port *c2_port = NULL;
+       struct net_device *netdev = alloc_etherdev(sizeof(*c2_port));
+
+       if (!netdev) {
+               pr_debug("c2_port etherdev alloc failed");
+               return NULL;
+       }
+
+       SET_MODULE_OWNER(netdev);
+       SET_NETDEV_DEV(netdev, &c2dev->pcidev->dev);
+
+       netdev->open = c2_up;
+       netdev->stop = c2_down;
+       netdev->hard_start_xmit = c2_xmit_frame;
+       netdev->get_stats = c2_get_stats;
+       netdev->tx_timeout = c2_tx_timeout;
+       netdev->change_mtu = c2_change_mtu;
+       netdev->watchdog_timeo = C2_TX_TIMEOUT;
+       netdev->irq = c2dev->pcidev->irq;
+
+       c2_port = netdev_priv(netdev);
+       c2_port->netdev = netdev;
+       c2_port->c2dev = c2dev;
+       c2_port->msg_enable = netif_msg_init(debug, default_msg);
+       c2_port->tx_ring.count = C2_NUM_TX_DESC;
+       c2_port->rx_ring.count = C2_NUM_RX_DESC;
+
+       spin_lock_init(&c2_port->tx_lock);
+
+       /* Copy our 48-bit ethernet hardware address */
+       memcpy_fromio(netdev->dev_addr, mmio_addr + C2_REGS_ENADDR, 6);
+
+       /* Validate the MAC address */
+       if (!is_valid_ether_addr(netdev->dev_addr)) {
+               pr_debug("Invalid MAC Address\n");
+               c2_print_macaddr(netdev);
+               free_netdev(netdev);
+               return NULL;
+       }
+
+       c2dev->netdev = netdev;
+
+       return netdev;
+}
+
+static int __devinit c2_probe(struct pci_dev *pcidev,
+                             const struct pci_device_id *ent)
+{
+       int ret = 0, i;
+       unsigned long reg0_start, reg0_flags, reg0_len;
+       unsigned long reg2_start, reg2_flags, reg2_len;
+       unsigned long reg4_start, reg4_flags, reg4_len;
+       unsigned kva_map_size;
+       struct net_device *netdev = NULL;
+       struct c2_dev *c2dev = NULL;
+       void __iomem *mmio_regs = NULL;
+
+       printk(KERN_INFO PFX "AMSO1100 Gigabit Ethernet driver v%s loaded\n",
+               DRV_VERSION);
+
+       /* Enable PCI device */
+       ret = pci_enable_device(pcidev);
+       if (ret) {
+               printk(KERN_ERR PFX "%s: Unable to enable PCI device\n",
+                       pci_name(pcidev));
+               goto bail0;
+       }
+
+       reg0_start = pci_resource_start(pcidev, BAR_0);
+       reg0_len = pci_resource_len(pcidev, BAR_0);
+       reg0_flags = pci_resource_flags(pcidev, BAR_0);
+
+       reg2_start = pci_resource_start(pcidev, BAR_2);
+       reg2_len = pci_resource_len(pcidev, BAR_2);
+       reg2_flags = pci_resource_flags(pcidev, BAR_2);
+
+       reg4_start = pci_resource_start(pcidev, BAR_4);
+       reg4_len = pci_resource_len(pcidev, BAR_4);
+       reg4_flags = pci_resource_flags(pcidev, BAR_4);
+
+       pr_debug("BAR0 size = 0x%lX bytes\n", reg0_len);
+       pr_debug("BAR2 size = 0x%lX bytes\n", reg2_len);
+       pr_debug("BAR4 size = 0x%lX bytes\n", reg4_len);
+
+       /* Make sure PCI base addr are MMIO */
+       if (!(reg0_flags & IORESOURCE_MEM) ||
+           !(reg2_flags & IORESOURCE_MEM) || !(reg4_flags & IORESOURCE_MEM)) {
+               printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+               ret = -ENODEV;
+               goto bail1;
+       }
+
+       /* Check for weird/broken PCI region reporting */
+       if ((reg0_len < C2_REG0_SIZE) ||
+           (reg2_len < C2_REG2_SIZE) || (reg4_len < C2_REG4_SIZE)) {
+               printk(KERN_ERR PFX "Invalid PCI region sizes\n");
+               ret = -ENODEV;
+               goto bail1;
+       }
+
+       /* Reserve PCI I/O and memory resources */
+       ret = pci_request_regions(pcidev, DRV_NAME);
+       if (ret) {
+               printk(KERN_ERR PFX "%s: Unable to request regions\n",
+                       pci_name(pcidev));
+               goto bail1;
+       }
+
+       if ((sizeof(dma_addr_t) > 4)) {
+               ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+               if (ret < 0) {
+                       printk(KERN_ERR PFX "64b DMA configuration failed\n");
+                       goto bail2;
+               }
+       } else {
+               ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+               if (ret < 0) {
+                       printk(KERN_ERR PFX "32b DMA configuration failed\n");
+                       goto bail2;
+               }
+       }
+
+       /* Enables bus-mastering on the device */
+       pci_set_master(pcidev);
+
+       /* Remap the adapter PCI registers in BAR4 */
+       mmio_regs = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+                                   sizeof(struct c2_adapter_pci_regs));
+       if (mmio_regs == 0UL) {
+               printk(KERN_ERR PFX
+                       "Unable to remap adapter PCI registers in BAR4\n");
+               ret = -EIO;
+               goto bail2;
+       }
+
+       /* Validate PCI regs magic */
+       for (i = 0; i < sizeof(c2_magic); i++) {
+               if (c2_magic[i] != readb(mmio_regs + C2_REGS_MAGIC + i)) {
+                       printk(KERN_ERR PFX "Downlevel Firmware boot loader "
+                               "[%d/%Zd: got 0x%x, exp 0x%x]. Use the cc_flash "
+                              "utility to update your boot loader\n",
+                               i + 1, sizeof(c2_magic),
+                               readb(mmio_regs + C2_REGS_MAGIC + i),
+                               c2_magic[i]);
+                       printk(KERN_ERR PFX "Adapter not claimed\n");
+                       iounmap(mmio_regs);
+                       ret = -EIO;
+                       goto bail2;
+               }
+       }
+
+       /* Validate the adapter version */
+       if (be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)) != C2_VERSION) {
+               printk(KERN_ERR PFX "Version mismatch "
+                       "[fw=%u, c2=%u], Adapter not claimed\n",
+                       be32_to_cpu(readl(mmio_regs + C2_REGS_VERS)),
+                       C2_VERSION);
+               ret = -EINVAL;
+               iounmap(mmio_regs);
+               goto bail2;
+       }
+
+       /* Validate the adapter IVN */
+       if (be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)) != C2_IVN) {
+               printk(KERN_ERR PFX "Downlevel FIrmware level. You should be using "
+                      "the OpenIB device support kit. "
+                      "[fw=0x%x, c2=0x%x], Adapter not claimed\n",
+                       be32_to_cpu(readl(mmio_regs + C2_REGS_IVN)),
+                       C2_IVN);
+               ret = -EINVAL;
+               iounmap(mmio_regs);
+               goto bail2;
+       }
+
+       /* Allocate hardware structure */
+       c2dev = (struct c2_dev *) ib_alloc_device(sizeof(*c2dev));
+       if (!c2dev) {
+               printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n",
+                       pci_name(pcidev));
+               ret = -ENOMEM;
+               iounmap(mmio_regs);
+               goto bail2;
+       }
+
+       memset(c2dev, 0, sizeof(*c2dev));
+       spin_lock_init(&c2dev->lock);
+       c2dev->pcidev = pcidev;
+       c2dev->cur_tx = 0;
+
+       /* Get the last RX index */
+       c2dev->cur_rx =
+           (be32_to_cpu(readl(mmio_regs + C2_REGS_HRX_CUR)) -
+            0xffffc000) / sizeof(struct c2_rxp_desc);
+
+       /* Request an interrupt line for the driver */
+       ret = request_irq(pcidev->irq, c2_interrupt, SA_SHIRQ, DRV_NAME, c2dev);
+       if (ret) {
+               printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n",
+                       pci_name(pcidev), pcidev->irq);
+               iounmap(mmio_regs);
+               goto bail3;
+       }
+
+       /* Set driver specific data */
+       pci_set_drvdata(pcidev, c2dev);
+
+       /* Initialize network device */
+       if ((netdev = c2_devinit(c2dev, mmio_regs)) == NULL) {
+               iounmap(mmio_regs);
+               goto bail4;
+       }
+
+       /* Save off the actual size prior to unmapping mmio_regs */
+       kva_map_size = be32_to_cpu(readl(mmio_regs + C2_REGS_PCI_WINSIZE));
+
+       /* Unmap the adapter PCI registers in BAR4 */
+       iounmap(mmio_regs);
+
+       /* Register network device */
+       ret = register_netdev(netdev);
+       if (ret) {
+               printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n",
+                       ret);
+               goto bail5;
+       }
+
+       /* Disable network packets */
+       netif_stop_queue(netdev);
+
+       /* Remap the adapter HRXDQ PA space to kernel VA space */
+       c2dev->mmio_rxp_ring = ioremap_nocache(reg4_start + C2_RXP_HRXDQ_OFFSET,
+                                              C2_RXP_HRXDQ_SIZE);
+       if (c2dev->mmio_rxp_ring == 0UL) {
+               printk(KERN_ERR PFX "Unable to remap MMIO HRXDQ region\n");
+               ret = -EIO;
+               goto bail6;
+       }
+
+       /* Remap the adapter HTXDQ PA space to kernel VA space */
+       c2dev->mmio_txp_ring = ioremap_nocache(reg4_start + C2_TXP_HTXDQ_OFFSET,
+                                              C2_TXP_HTXDQ_SIZE);
+       if (c2dev->mmio_txp_ring == 0UL) {
+               printk(KERN_ERR PFX "Unable to remap MMIO HTXDQ region\n");
+               ret = -EIO;
+               goto bail7;
+       }
+
+       /* Save off the current RX index in the last 4 bytes of the TXP Ring */
+       C2_SET_CUR_RX(c2dev, c2dev->cur_rx);
+
+       /* Remap the PCI registers in adapter BAR0 to kernel VA space */
+       c2dev->regs = ioremap_nocache(reg0_start, reg0_len);
+       if (c2dev->regs == 0UL) {
+               printk(KERN_ERR PFX "Unable to remap BAR0\n");
+               ret = -EIO;
+               goto bail8;
+       }
+
+       /* Remap the PCI registers in adapter BAR4 to kernel VA space */
+       c2dev->pa = reg4_start + C2_PCI_REGS_OFFSET;
+       c2dev->kva = ioremap_nocache(reg4_start + C2_PCI_REGS_OFFSET,
+                                    kva_map_size);
+       if (c2dev->kva == 0UL) {
+               printk(KERN_ERR PFX "Unable to remap BAR4\n");
+               ret = -EIO;
+               goto bail9;
+       }
+
+       /* Print out the MAC address */
+       c2_print_macaddr(netdev);
+
+       ret = c2_rnic_init(c2dev);
+       if (ret) {
+               printk(KERN_ERR PFX "c2_rnic_init failed: %d\n", ret);
+               goto bail10;
+       }
+
+       c2_register_device(c2dev);
+
+       return 0;
+
+ bail10:
+       iounmap(c2dev->kva);
+
+ bail9:
+       iounmap(c2dev->regs);
+
+ bail8:
+       iounmap(c2dev->mmio_txp_ring);
+
+ bail7:
+       iounmap(c2dev->mmio_rxp_ring);
+
+ bail6:
+       unregister_netdev(netdev);
+
+ bail5:
+       free_netdev(netdev);
+
+ bail4:
+       free_irq(pcidev->irq, c2dev);
+
+ bail3:
+       ib_dealloc_device(&c2dev->ibdev);
+
+ bail2:
+       pci_release_regions(pcidev);
+
+ bail1:
+       pci_disable_device(pcidev);
+
+ bail0:
+       return ret;
+}
+
+static void __devexit c2_remove(struct pci_dev *pcidev)
+{
+       struct c2_dev *c2dev = pci_get_drvdata(pcidev);
+       struct net_device *netdev = c2dev->netdev;
+
+       /* Unregister with OpenIB */
+       c2_unregister_device(c2dev);
+
+       /* Clean up the RNIC resources */
+       c2_rnic_term(c2dev);
+
+       /* Remove network device from the kernel */
+       unregister_netdev(netdev);
+
+       /* Free network device */
+       free_netdev(netdev);
+
+       /* Free the interrupt line */
+       free_irq(pcidev->irq, c2dev);
+
+       /* missing: Turn LEDs off here */
+
+       /* Unmap adapter PA space */
+       iounmap(c2dev->kva);
+       iounmap(c2dev->regs);
+       iounmap(c2dev->mmio_txp_ring);
+       iounmap(c2dev->mmio_rxp_ring);
+
+       /* Free the hardware structure */
+       ib_dealloc_device(&c2dev->ibdev);
+
+       /* Release reserved PCI I/O and memory resources */
+       pci_release_regions(pcidev);
+
+       /* Disable PCI device */
+       pci_disable_device(pcidev);
+
+       /* Clear driver specific data */
+       pci_set_drvdata(pcidev, NULL);
+}
+
+static struct pci_driver c2_pci_driver = {
+       .name = DRV_NAME,
+       .id_table = c2_pci_table,
+       .probe = c2_probe,
+       .remove = __devexit_p(c2_remove),
+};
+
+static int __init c2_init_module(void)
+{
+       return pci_module_init(&c2_pci_driver);
+}
+
+static void __exit c2_exit_module(void)
+{
+       pci_unregister_driver(&c2_pci_driver);
+}
+
+module_init(c2_init_module);
+module_exit(c2_exit_module);
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
new file mode 100644 (file)
index 0000000..1b17dcd
--- /dev/null
@@ -0,0 +1,551 @@
+/*
+ * Copyright (c) 2005 Ammasso, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __C2_H
+#define __C2_H
+
+#include <linux/netdevice.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <asm/semaphore.h>
+
+#include "c2_provider.h"
+#include "c2_mq.h"
+#include "c2_status.h"
+
+#define DRV_NAME     "c2"
+#define DRV_VERSION  "1.1"
+#define PFX          DRV_NAME ": "
+
+#define BAR_0                0
+#define BAR_2                2
+#define BAR_4                4
+
+#define RX_BUF_SIZE         (1536 + 8)
+#define ETH_JUMBO_MTU        9000
+#define C2_MAGIC            "CEPHEUS"
+#define C2_VERSION           4
+#define C2_IVN              (18 & 0x7fffffff)
+
+#define C2_REG0_SIZE        (16 * 1024)
+#define C2_REG2_SIZE        (2 * 1024 * 1024)
+#define C2_REG4_SIZE        (256 * 1024 * 1024)
+#define C2_NUM_TX_DESC       341
+#define C2_NUM_RX_DESC       256
+#define C2_PCI_REGS_OFFSET  (0x10000)
+#define C2_RXP_HRXDQ_OFFSET (((C2_REG4_SIZE)/2))
+#define C2_RXP_HRXDQ_SIZE   (4096)
+#define C2_TXP_HTXDQ_OFFSET (((C2_REG4_SIZE)/2) + C2_RXP_HRXDQ_SIZE)
+#define C2_TXP_HTXDQ_SIZE   (4096)
+#define C2_TX_TIMEOUT      (6*HZ)
+
+/* CEPHEUS */
+static const u8 c2_magic[] = {
+       0x43, 0x45, 0x50, 0x48, 0x45, 0x55, 0x53
+};
+
+enum adapter_pci_regs {
+       C2_REGS_MAGIC = 0x0000,
+       C2_REGS_VERS = 0x0008,
+       C2_REGS_IVN = 0x000C,
+       C2_REGS_PCI_WINSIZE = 0x0010,
+       C2_REGS_Q0_QSIZE = 0x0014,
+       C2_REGS_Q0_MSGSIZE = 0x0018,
+       C2_REGS_Q0_POOLSTART = 0x001C,
+       C2_REGS_Q0_SHARED = 0x0020,
+       C2_REGS_Q1_QSIZE = 0x0024,
+       C2_REGS_Q1_MSGSIZE = 0x0028,
+       C2_REGS_Q1_SHARED = 0x0030,
+       C2_REGS_Q2_QSIZE = 0x0034,
+       C2_REGS_Q2_MSGSIZE = 0x0038,
+       C2_REGS_Q2_SHARED = 0x0040,
+       C2_REGS_ENADDR = 0x004C,
+       C2_REGS_RDMA_ENADDR = 0x0054,
+       C2_REGS_HRX_CUR = 0x006C,
+};
+
+struct c2_adapter_pci_regs {
+       char reg_magic[8];
+       u32 version;
+       u32 ivn;
+       u32 pci_window_size;
+       u32 q0_q_size;
+       u32 q0_msg_size;
+       u32 q0_pool_start;
+       u32 q0_shared;
+       u32 q1_q_size;
+       u32 q1_msg_size;
+       u32 q1_pool_start;
+       u32 q1_shared;
+       u32 q2_q_size;
+       u32 q2_msg_size;
+       u32 q2_pool_start;
+       u32 q2_shared;
+       u32 log_start;
+       u32 log_size;
+       u8 host_enaddr[8];
+       u8 rdma_enaddr[8];
+       u32 crash_entry;
+       u32 crash_ready[2];
+       u32 fw_txd_cur;
+       u32 fw_hrxd_cur;
+       u32 fw_rxd_cur;
+};
+
+enum pci_regs {
+       C2_HISR = 0x0000,
+       C2_DISR = 0x0004,
+       C2_HIMR = 0x0008,
+       C2_DIMR = 0x000C,
+       C2_NISR0 = 0x0010,
+       C2_NISR1 = 0x0014,
+       C2_NIMR0 = 0x0018,
+       C2_NIMR1 = 0x001C,
+       C2_IDIS = 0x0020,
+};
+
+enum {
+       C2_PCI_HRX_INT = 1 << 8,
+       C2_PCI_HTX_INT = 1 << 17,
+       C2_PCI_HRX_QUI = 1 << 31,
+};
+
+/*
+ * Cepheus registers in BAR0.
+ */
+struct c2_pci_regs {
+       u32 hostisr;
+       u32 dmaisr;
+       u32 hostimr;
+       u32 dmaimr;
+       u32 netisr0;
+       u32 netisr1;
+       u32 netimr0;
+       u32 netimr1;
+       u32 int_disable;
+};
+
+/* TXP flags */
+enum c2_txp_flags {
+       TXP_HTXD_DONE = 0,
+       TXP_HTXD_READY = 1 << 0,
+       TXP_HTXD_UNINIT = 1 << 1,
+};
+
+/* RXP flags */
+enum c2_rxp_flags {
+       RXP_HRXD_UNINIT = 0,
+       RXP_HRXD_READY = 1 << 0,
+       RXP_HRXD_DONE = 1 << 1,
+};
+
+/* RXP status */
+enum c2_rxp_status {
+       RXP_HRXD_ZERO = 0,
+       RXP_HRXD_OK = 1 << 0,
+       RXP_HRXD_BUF_OV = 1 << 1,
+};
+
+/* TXP descriptor fields */
+enum txp_desc {
+       C2_TXP_FLAGS = 0x0000,
+       C2_TXP_LEN = 0x0002,
+       C2_TXP_ADDR = 0x0004,
+};
+
+/* RXP descriptor fields */
+enum rxp_desc {
+       C2_RXP_FLAGS = 0x0000,
+       C2_RXP_STATUS = 0x0002,
+       C2_RXP_COUNT = 0x0004,
+       C2_RXP_LEN = 0x0006,
+       C2_RXP_ADDR = 0x0008,
+};
+
+struct c2_txp_desc {
+       u16 flags;
+       u16 len;
+       u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_desc {
+       u16 flags;
+       u16 status;
+       u16 count;
+       u16 len;
+       u64 addr;
+} __attribute__ ((packed));
+
+struct c2_rxp_hdr {
+       u16 flags;
+       u16 status;
+       u16 len;
+       u16 rsvd;
+} __attribute__ ((packed));
+
+struct c2_tx_desc {
+       u32 len;
+       u32 status;
+       dma_addr_t next_offset;
+};
+
+struct c2_rx_desc {
+       u32 len;
+       u32 status;
+       dma_addr_t next_offset;
+};
+
+struct c2_alloc {
+       u32 last;
+       u32 max;
+       spinlock_t lock;
+       unsigned long *table;
+};
+
+struct c2_array {
+       struct {
+               void **page;
+               int used;
+       } *page_list;
+};
+
+/*
+ * The MQ shared pointer pool is organized as a linked list of
+ * chunks. Each chunk contains a linked list of free shared pointers
+ * that can be allocated to a given user mode client.
+ *
+ */
+struct sp_chunk {
+       struct sp_chunk *next;
+       dma_addr_t dma_addr;
+       DECLARE_PCI_UNMAP_ADDR(mapping);
+       u16 head;
+       u16 shared_ptr[0];
+};
+
+struct c2_pd_table {
+       u32 last;
+       u32 max;
+       spinlock_t lock;
+       unsigned long *table;
+};
+
+struct c2_qp_table {
+       struct idr idr;
+       spinlock_t lock;
+       int last;
+};
+
+struct c2_element {
+       struct c2_element *next;
+       void *ht_desc;          /* host     descriptor */
+       void __iomem *hw_desc;  /* hardware descriptor */
+       struct sk_buff *skb;
+       dma_addr_t mapaddr;
+       u32 maplen;
+};
+
+struct c2_ring {
+       struct c2_element *to_clean;
+       struct c2_element *to_use;
+       struct c2_element *start;
+       unsigned long count;
+};
+
+struct c2_dev {
+       struct ib_device ibdev;
+       void __iomem *regs;
+       void __iomem *mmio_txp_ring; /* remapped adapter memory for hw rings */
+       void __iomem *mmio_rxp_ring;
+       spinlock_t lock;
+       struct pci_dev *pcidev;
+       struct net_device *netdev;
+       struct net_device *pseudo_netdev;
+       unsigned int cur_tx;
+       unsigned int cur_rx;
+       u32 adapter_handle;
+       int device_cap_flags;
+       void __iomem *kva;      /* KVA device memory */
+       unsigned long pa;       /* PA device memory */
+       void **qptr_array;
+
+       kmem_cache_t *host_msg_cache;
+
+       struct list_head cca_link;              /* adapter list */
+       struct list_head eh_wakeup_list;        /* event wakeup list */
+       wait_queue_head_t req_vq_wo;
+
+       /* Cached RNIC properties */
+       struct ib_device_attr props;
+
+       struct c2_pd_table pd_table;
+       struct c2_qp_table qp_table;
+       int ports;              /* num of GigE ports */
+       int devnum;
+       spinlock_t vqlock;      /* sync vbs req MQ */
+
+       /* Verbs Queues */
+       struct c2_mq req_vq;    /* Verbs Request MQ */
+       struct c2_mq rep_vq;    /* Verbs Reply MQ */
+       struct c2_mq aeq;       /* Async Events MQ */
+
+       /* Kernel client MQs */
+       struct sp_chunk *kern_mqsp_pool;
+
+       /* Device updates these values when posting messages to a host
+        * target queue */
+       u16 req_vq_shared;
+       u16 rep_vq_shared;
+       u16 aeq_shared;
+       u16 irq_claimed;
+
+       /*
+        * Shared host target pages for user-accessible MQs.
+        */
+       int hthead;             /* index of first free entry */
+       void *htpages;          /* kernel vaddr */
+       int htlen;              /* length of htpages memory */
+       void *htuva;            /* user mapped vaddr */
+       spinlock_t htlock;      /* serialize allocation */
+
+       u64 adapter_hint_uva;   /* access to the activity FIFO */
+
+       //      spinlock_t aeq_lock;
+       //      spinlock_t rnic_lock;
+
+       u16 *hint_count;
+       dma_addr_t hint_count_dma;
+       u16 hints_read;
+
+       int init;               /* TRUE if it's ready */
+       char ae_cache_name[16];
+       char vq_cache_name[16];
+};
+
+struct c2_port {
+       u32 msg_enable;
+       struct c2_dev *c2dev;
+       struct net_device *netdev;
+
+       spinlock_t tx_lock;
+       u32 tx_avail;
+       struct c2_ring tx_ring;
+       struct c2_ring rx_ring;
+
+       void *mem;              /* PCI memory for host rings */
+       dma_addr_t dma;
+       unsigned long mem_size;
+
+       u32 rx_buf_size;
+
+       struct net_device_stats netstats;
+};
+
+/*
+ * Activity FIFO registers in BAR0.
+ */
+#define PCI_BAR0_HOST_HINT     0x100
+#define PCI_BAR0_ADAPTER_HINT  0x2000
+
+/*
+ * Ammasso PCI vendor id and Cepheus PCI device id.
+ */
+#define CQ_ARMED       0x01
+#define CQ_WAIT_FOR_DMA        0x80
+
+/*
+ * The format of a hint is as follows:
+ * Lower 16 bits are the count of hints for the queue.
+ * Next 15 bits are the qp_index
+ * Upper most bit depends on who reads it:
+ *    If read by producer, then it means Full (1) or Not-Full (0)
+ *    If read by consumer, then it means Empty (1) or Not-Empty (0)
+ */
+#define C2_HINT_MAKE(q_index, hint_count) (((q_index) << 16) | hint_count)
+#define C2_HINT_GET_INDEX(hint) (((hint) & 0x7FFF0000) >> 16)
+#define C2_HINT_GET_COUNT(hint) ((hint) & 0x0000FFFF)
+
+
+/*
+ * The following defines the offset in SDRAM for the c2_adapter_pci_regs_t
+ * struct.
+ */
+#define C2_ADAPTER_PCI_REGS_OFFSET 0x10000
+
+#ifndef readq
+static inline u64 readq(const void __iomem * addr)
+{
+       u64 ret = readl(addr + 4);
+       ret <<= 32;
+       ret |= readl(addr);
+
+       return ret;
+}
+#endif
+
+#ifndef writeq
+static inline void __raw_writeq(u64 val, void __iomem * addr)
+{
+       __raw_writel((u32) (val), addr);
+       __raw_writel((u32) (val >> 32), (addr + 4));
+}
+#endif
+
+#define C2_SET_CUR_RX(c2dev, cur_rx) \
+       __raw_writel(cpu_to_be32(cur_rx), c2dev->mmio_txp_ring + 4092)
+
+#define C2_GET_CUR_RX(c2dev) \
+       be32_to_cpu(readl(c2dev->mmio_txp_ring + 4092))
+
+static inline struct c2_dev *to_c2dev(struct ib_device *ibdev)
+{
+       return container_of(ibdev, struct c2_dev, ibdev);
+}
+
+static inline int c2_errno(void *r