+/* Fill in bucket_map[], given nsgs (the max number of
+ * scatter gather elements supported) and bucket[],
+ * which is an array of 8 integers. The bucket[] array
+ * contains 8 different DMA transfer sizes (in 16
+ * byte increments) which the controller uses to fetch
+ * commands. This function fills in bucket_map[], which
+ * maps a given number of scatter gather elements to one of
+ * the 8 DMA transfer sizes. The point of it is to allow the
+ * controller to only do as much DMA as needed to fetch the
+ * command, with the DMA transfer size encoded in the lower
+ * bits of the command address.
+ */
+static void calc_bucket_map(int bucket[], int num_buckets,
+ int nsgs, int *bucket_map)
+{
+ int i, j, b, size;
+
+ /* even a command with 0 SGs requires 4 blocks */
+#define MINIMUM_TRANSFER_BLOCKS 4
+#define NUM_BUCKETS 8
+ /* Note, bucket_map must have nsgs+1 entries. */
+ for (i = 0; i <= nsgs; i++) {
+ /* Compute size of a command with i SG entries */
+ size = i + MINIMUM_TRANSFER_BLOCKS;
+ b = num_buckets; /* Assume the biggest bucket */
+ /* Find the bucket that is just big enough */
+ for (j = 0; j < 8; j++) {
+ if (bucket[j] >= size) {
+ b = j;
+ break;
+ }
+ }
+ /* for a command with i SG entries, use bucket b. */
+ bucket_map[i] = b;
+ }
+}
+
+static void __devinit cciss_wait_for_mode_change_ack(ctlr_info_t *h)
+{
+ int i;
+
+ /* under certain very rare conditions, this can take awhile.
+ * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
+ * as we enter this code.) */
+ for (i = 0; i < MAX_CONFIG_WAIT; i++) {
+ if (!(readl(h->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
+ break;
+ msleep(10);
+ }
+}
+
+static __devinit void cciss_enter_performant_mode(ctlr_info_t *h)
+{
+ /* This is a bit complicated. There are 8 registers on
+ * the controller which we write to to tell it 8 different
+ * sizes of commands which there may be. It's a way of
+ * reducing the DMA done to fetch each command. Encoded into
+ * each command's tag are 3 bits which communicate to the controller
+ * which of the eight sizes that command fits within. The size of
+ * each command depends on how many scatter gather entries there are.
+ * Each SG entry requires 16 bytes. The eight registers are programmed
+ * with the number of 16-byte blocks a command of that size requires.
+ * The smallest command possible requires 5 such 16 byte blocks.
+ * the largest command possible requires MAXSGENTRIES + 4 16-byte
+ * blocks. Note, this only extends to the SG entries contained
+ * within the command block, and does not extend to chained blocks
+ * of SG elements. bft[] contains the eight values we write to
+ * the registers. They are not evenly distributed, but have more
+ * sizes for small commands, and fewer sizes for larger commands.
+ */
+ __u32 trans_offset;
+ int bft[8] = { 5, 6, 8, 10, 12, 20, 28, MAXSGENTRIES + 4};
+ /*
+ * 5 = 1 s/g entry or 4k
+ * 6 = 2 s/g entry or 8k
+ * 8 = 4 s/g entry or 16k
+ * 10 = 6 s/g entry or 24k
+ */
+ unsigned long register_value;
+ BUILD_BUG_ON(28 > MAXSGENTRIES + 4);
+
+ h->reply_pool_wraparound = 1; /* spec: init to 1 */
+
+ /* Controller spec: zero out this buffer. */
+ memset(h->reply_pool, 0, h->max_commands * sizeof(__u64));
+ h->reply_pool_head = h->reply_pool;
+
+ trans_offset = readl(&(h->cfgtable->TransMethodOffset));
+ calc_bucket_map(bft, ARRAY_SIZE(bft), h->maxsgentries,
+ h->blockFetchTable);
+ writel(bft[0], &h->transtable->BlockFetch0);
+ writel(bft[1], &h->transtable->BlockFetch1);
+ writel(bft[2], &h->transtable->BlockFetch2);
+ writel(bft[3], &h->transtable->BlockFetch3);
+ writel(bft[4], &h->transtable->BlockFetch4);
+ writel(bft[5], &h->transtable->BlockFetch5);
+ writel(bft[6], &h->transtable->BlockFetch6);
+ writel(bft[7], &h->transtable->BlockFetch7);
+
+ /* size of controller ring buffer */
+ writel(h->max_commands, &h->transtable->RepQSize);
+ writel(1, &h->transtable->RepQCount);
+ writel(0, &h->transtable->RepQCtrAddrLow32);
+ writel(0, &h->transtable->RepQCtrAddrHigh32);
+ writel(h->reply_pool_dhandle, &h->transtable->RepQAddr0Low32);
+ writel(0, &h->transtable->RepQAddr0High32);
+ writel(CFGTBL_Trans_Performant,
+ &(h->cfgtable->HostWrite.TransportRequest));
+
+ writel(CFGTBL_ChangeReq, h->vaddr + SA5_DOORBELL);
+ cciss_wait_for_mode_change_ack(h);
+ register_value = readl(&(h->cfgtable->TransportActive));
+ if (!(register_value & CFGTBL_Trans_Performant))
+ dev_warn(&h->pdev->dev, "cciss: unable to get board into"
+ " performant mode\n");
+}
+
+static void __devinit cciss_put_controller_into_performant_mode(ctlr_info_t *h)
+{
+ __u32 trans_support;
+
+ dev_dbg(&h->pdev->dev, "Trying to put board into Performant mode\n");
+ /* Attempt to put controller into performant mode if supported */
+ /* Does board support performant mode? */
+ trans_support = readl(&(h->cfgtable->TransportSupport));
+ if (!(trans_support & PERFORMANT_MODE))
+ return;
+
+ dev_dbg(&h->pdev->dev, "Placing controller into performant mode\n");
+ /* Performant mode demands commands on a 32 byte boundary
+ * pci_alloc_consistent aligns on page boundarys already.
+ * Just need to check if divisible by 32
+ */
+ if ((sizeof(CommandList_struct) % 32) != 0) {
+ dev_warn(&h->pdev->dev, "%s %d %s\n",
+ "cciss info: command size[",
+ (int)sizeof(CommandList_struct),
+ "] not divisible by 32, no performant mode..\n");
+ return;
+ }
+
+ /* Performant mode ring buffer and supporting data structures */
+ h->reply_pool = (__u64 *)pci_alloc_consistent(
+ h->pdev, h->max_commands * sizeof(__u64),
+ &(h->reply_pool_dhandle));
+
+ /* Need a block fetch table for performant mode */
+ h->blockFetchTable = kmalloc(((h->maxsgentries+1) *
+ sizeof(__u32)), GFP_KERNEL);
+
+ if ((h->reply_pool == NULL) || (h->blockFetchTable == NULL))
+ goto clean_up;
+
+ cciss_enter_performant_mode(h);
+
+ /* Change the access methods to the performant access methods */
+ h->access = SA5_performant_access;
+ h->transMethod = CFGTBL_Trans_Performant;
+
+ return;
+clean_up:
+ kfree(h->blockFetchTable);
+ if (h->reply_pool)
+ pci_free_consistent(h->pdev,
+ h->max_commands * sizeof(__u64),
+ h->reply_pool,
+ h->reply_pool_dhandle);
+ return;
+
+} /* cciss_put_controller_into_performant_mode */
+