Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

[pandora-kernel.git] / drivers / net / virtio_net.c
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

index 68d64f0..eda2042 100644 (file)
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -26,6 +26,7 @@
  #include <linux/scatterlist.h>
  #include <linux/if_vlan.h>
  #include <linux/slab.h>
+#include <linux/cpu.h>
  
  static int napi_weight = 128;
  module_param(napi_weight, int, 0444);
@@ -123,6 +124,12 @@ struct virtnet_info {
  
         /* Does the affinity hint is set for virtqueues? */
         bool affinity_hint_set;
+
+       /* Per-cpu variable to show the mapping from CPU to virtqueue */
+       int __percpu *vq_index;
+
+       /* CPU hot plug notifier */
+       struct notifier_block nb;
  };
  
  struct skb_vnet_hdr {
@@ -130,7 +137,6 @@ struct skb_vnet_hdr {
                 struct virtio_net_hdr hdr;
                 struct virtio_net_hdr_mrg_rxbuf mhdr;
         };
-       unsigned int num_sg;
  };
  
  struct padded_vnet_hdr {
@@ -221,6 +227,7 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page,
         skb->len += size;
         skb->truesize += PAGE_SIZE;
         skb_shinfo(skb)->nr_frags++;
+       skb_shinfo(skb)->gso_type |= SKB_GSO_SHARED_FRAG;
         *len -= size;
  }
  
@@ -380,16 +387,18 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
                  ntohs(skb->protocol), skb->len, skb->pkt_type);
  
         if (hdr->hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+               unsigned short gso_type = 0;
+
                 pr_debug("GSO!\n");
                 switch (hdr->hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
                 case VIRTIO_NET_HDR_GSO_TCPV4:
-                       skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+                       gso_type = SKB_GSO_TCPV4;
                         break;
                 case VIRTIO_NET_HDR_GSO_UDP:
-                       skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
+                       gso_type = SKB_GSO_UDP;
                         break;
                 case VIRTIO_NET_HDR_GSO_TCPV6:
-                       skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+                       gso_type = SKB_GSO_TCPV6;
                         break;
                 default:
                         net_warn_ratelimited("%s: bad gso type %u.\n",
@@ -398,7 +407,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
                 }
  
                 if (hdr->hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
-                       skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
+                       gso_type |= SKB_GSO_TCP_ECN;
  
                 skb_shinfo(skb)->gso_size = hdr->hdr.gso_size;
                 if (skb_shinfo(skb)->gso_size == 0) {
@@ -406,6 +415,7 @@ static void receive_buf(struct receive_queue *rq, void *buf, unsigned int len)
                         goto frame_err;
                 }
  
+               skb_shinfo(skb)->gso_type |= gso_type;
                 /* Header must be checked, and gso_segs computed. */
                 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
                 skb_shinfo(skb)->gso_segs = 0;
@@ -530,10 +540,10 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp)
                         err = add_recvbuf_small(rq, gfp);
  
                 oom = err == -ENOMEM;
-               if (err < 0)
+               if (err)
                         break;
                 ++rq->num;
-       } while (err > 0);
+       } while (rq->vq->num_free);
         if (unlikely(rq->num > rq->max))
                 rq->max = rq->num;
         virtqueue_kick(rq->vq);
@@ -640,10 +650,10 @@ static int virtnet_open(struct net_device *dev)
         return 0;
  }
  
-static unsigned int free_old_xmit_skbs(struct send_queue *sq)
+static void free_old_xmit_skbs(struct send_queue *sq)
  {
         struct sk_buff *skb;
-       unsigned int len, tot_sgs = 0;
+       unsigned int len;
         struct virtnet_info *vi = sq->vq->vdev->priv;
         struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
  
@@ -655,10 +665,8 @@ static unsigned int free_old_xmit_skbs(struct send_queue *sq)
                 stats->tx_packets++;
                 u64_stats_update_end(&stats->tx_syncp);
  
-               tot_sgs += skb_vnet_hdr(skb)->num_sg;
                 dev_kfree_skb_any(skb);
         }
-       return tot_sgs;
  }
  
  static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
@@ -666,6 +674,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
         struct skb_vnet_hdr *hdr = skb_vnet_hdr(skb);
         const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
         struct virtnet_info *vi = sq->vq->vdev->priv;
+       unsigned num_sg;
  
         pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
  
@@ -704,8 +713,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
         else
                 sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
  
-       hdr->num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
-       return virtqueue_add_buf(sq->vq, sq->sg, hdr->num_sg,
+       num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
+       return virtqueue_add_buf(sq->vq, sq->sg, num_sg,
                                  0, skb, GFP_ATOMIC);
  }
  
@@ -714,28 +723,20 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
         struct virtnet_info *vi = netdev_priv(dev);
         int qnum = skb_get_queue_mapping(skb);
         struct send_queue *sq = &vi->sq[qnum];
-       int capacity;
+       int err;
  
         /* Free up any pending old buffers before queueing new ones. */
         free_old_xmit_skbs(sq);
  
         /* Try to transmit */
-       capacity = xmit_skb(sq, skb);
-
-       /* This can happen with OOM and indirect buffers. */
-       if (unlikely(capacity < 0)) {
-               if (likely(capacity == -ENOMEM)) {
-                       if (net_ratelimit())
-                               dev_warn(&dev->dev,
-                                        "TXQ (%d) failure: out of memory\n",
-                                        qnum);
-               } else {
-                       dev->stats.tx_fifo_errors++;
-                       if (net_ratelimit())
-                               dev_warn(&dev->dev,
-                                        "Unexpected TXQ (%d) failure: %d\n",
-                                        qnum, capacity);
-               }
+       err = xmit_skb(sq, skb);
+
+       /* This should not happen! */
+       if (unlikely(err)) {
+               dev->stats.tx_fifo_errors++;
+               if (net_ratelimit())
+                       dev_warn(&dev->dev,
+                                "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
                 dev->stats.tx_dropped++;
                 kfree_skb(skb);
                 return NETDEV_TX_OK;
@@ -748,12 +749,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
  
         /* Apparently nice girls don't return TX_BUSY; stop the queue
          * before it gets out of hand.  Naturally, this wastes entries. */
-       if (capacity < 2+MAX_SKB_FRAGS) {
+       if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
                 netif_stop_subqueue(dev, qnum);
                 if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
                         /* More just got used, free them then recheck. */
-                       capacity += free_old_xmit_skbs(sq);
-                       if (capacity >= 2+MAX_SKB_FRAGS) {
+                       free_old_xmit_skbs(sq);
+                       if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
                                 netif_start_subqueue(dev, qnum);
                                 virtqueue_disable_cb(sq->vq);
                         }
@@ -763,19 +764,77 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
         return NETDEV_TX_OK;
  }
  
+/*
+ * Send command via the control virtqueue and check status.  Commands
+ * supported by the hypervisor, as indicated by feature bits, should
+ * never fail unless improperly formated.
+ */
+static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
+                                struct scatterlist *data, int out, int in)
+{
+       struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
+       struct virtio_net_ctrl_hdr ctrl;
+       virtio_net_ctrl_ack status = ~0;
+       unsigned int tmp;
+       int i;
+
+       /* Caller should know better */
+       BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
+               (out + in > VIRTNET_SEND_COMMAND_SG_MAX));
+
+       out++; /* Add header */
+       in++; /* Add return status */
+
+       ctrl.class = class;
+       ctrl.cmd = cmd;
+
+       sg_init_table(sg, out + in);
+
+       sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
+       for_each_sg(data, s, out + in - 2, i)
+               sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
+       sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
+
+       BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0);
+
+       virtqueue_kick(vi->cvq);
+
+       /* Spin for a response, the kick causes an ioport write, trapping
+        * into the hypervisor, so the request should be handled immediately.
+        */
+       while (!virtqueue_get_buf(vi->cvq, &tmp))
+               cpu_relax();
+
+       return status == VIRTIO_NET_OK;
+}
+
  static int virtnet_set_mac_address(struct net_device *dev, void *p)
  {
         struct virtnet_info *vi = netdev_priv(dev);
         struct virtio_device *vdev = vi->vdev;
         int ret;
+       struct sockaddr *addr = p;
+       struct scatterlist sg;
  
-       ret = eth_mac_addr(dev, p);
+       ret = eth_prepare_mac_addr_change(dev, p);
         if (ret)
                 return ret;
  
-       if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
+               sg_init_one(&sg, addr->sa_data, dev->addr_len);
+               if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
+                                         VIRTIO_NET_CTRL_MAC_ADDR_SET,
+                                         &sg, 1, 0)) {
+                       dev_warn(&vdev->dev,
+                                "Failed to set mac address by vq command.\n");
+                       return -EINVAL;
+               }
+       } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC)) {
                 vdev->config->set(vdev, offsetof(struct virtio_net_config, mac),
-                                 dev->dev_addr, dev->addr_len);
+                                 addr->sa_data, dev->addr_len);
+       }
+
+       eth_commit_mac_addr_change(dev, p);
  
         return 0;
  }
@@ -829,51 +888,6 @@ static void virtnet_netpoll(struct net_device *dev)
  }
  #endif
  
-/*
- * Send command via the control virtqueue and check status.  Commands
- * supported by the hypervisor, as indicated by feature bits, should
- * never fail unless improperly formated.
- */
-static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
-                                struct scatterlist *data, int out, int in)
-{
-       struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
-       struct virtio_net_ctrl_hdr ctrl;
-       virtio_net_ctrl_ack status = ~0;
-       unsigned int tmp;
-       int i;
-
-       /* Caller should know better */
-       BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
-               (out + in > VIRTNET_SEND_COMMAND_SG_MAX));
-
-       out++; /* Add header */
-       in++; /* Add return status */
-
-       ctrl.class = class;
-       ctrl.cmd = cmd;
-
-       sg_init_table(sg, out + in);
-
-       sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
-       for_each_sg(data, s, out + in - 2, i)
-               sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
-       sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
-
-       BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0);
-
-       virtqueue_kick(vi->cvq);
-
-       /*
-        * Spin for a response, the kick causes an ioport write, trapping
-        * into the hypervisor, so the request should be handled immediately.
-        */
-       while (!virtqueue_get_buf(vi->cvq, &tmp))
-               cpu_relax();
-
-       return status == VIRTIO_NET_OK;
-}
-
  static void virtnet_ack_link_announce(struct virtnet_info *vi)
  {
         rtnl_lock();
@@ -1023,32 +1037,75 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
         return 0;
  }
  
-static void virtnet_set_affinity(struct virtnet_info *vi, bool set)
+static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
  {
         int i;
+       int cpu;
+
+       if (vi->affinity_hint_set) {
+               for (i = 0; i < vi->max_queue_pairs; i++) {
+                       virtqueue_set_affinity(vi->rq[i].vq, -1);
+                       virtqueue_set_affinity(vi->sq[i].vq, -1);
+               }
+
+               vi->affinity_hint_set = false;
+       }
+
+       i = 0;
+       for_each_online_cpu(cpu) {
+               if (cpu == hcpu) {
+                       *per_cpu_ptr(vi->vq_index, cpu) = -1;
+               } else {
+                       *per_cpu_ptr(vi->vq_index, cpu) =
+                               ++i % vi->curr_queue_pairs;
+               }
+       }
+}
+
+static void virtnet_set_affinity(struct virtnet_info *vi)
+{
+       int i;
+       int cpu;
  
         /* In multiqueue mode, when the number of cpu is equal to the number of
          * queue pairs, we let the queue pairs to be private to one cpu by
          * setting the affinity hint to eliminate the contention.
          */
-       if ((vi->curr_queue_pairs == 1 ||
-            vi->max_queue_pairs != num_online_cpus()) && set) {
-               if (vi->affinity_hint_set)
-                       set = false;
-               else
-                       return;
+       if (vi->curr_queue_pairs == 1 ||
+           vi->max_queue_pairs != num_online_cpus()) {
+               virtnet_clean_affinity(vi, -1);
+               return;
         }
  
-       for (i = 0; i < vi->max_queue_pairs; i++) {
-               int cpu = set ? i : -1;
+       i = 0;
+       for_each_online_cpu(cpu) {
                 virtqueue_set_affinity(vi->rq[i].vq, cpu);
                 virtqueue_set_affinity(vi->sq[i].vq, cpu);
+               *per_cpu_ptr(vi->vq_index, cpu) = i;
+               i++;
         }
  
-       if (set)
-               vi->affinity_hint_set = true;
-       else
-               vi->affinity_hint_set = false;
+       vi->affinity_hint_set = true;
+}
+
+static int virtnet_cpu_callback(struct notifier_block *nfb,
+                               unsigned long action, void *hcpu)
+{
+       struct virtnet_info *vi = container_of(nfb, struct virtnet_info, nb);
+
+       switch(action & ~CPU_TASKS_FROZEN) {
+       case CPU_ONLINE:
+       case CPU_DOWN_FAILED:
+       case CPU_DEAD:
+               virtnet_set_affinity(vi);
+               break;
+       case CPU_DOWN_PREPARE:
+               virtnet_clean_affinity(vi, (long)hcpu);
+               break;
+       default:
+               break;
+       }
+       return NOTIFY_OK;
  }
  
  static void virtnet_get_ringparam(struct net_device *dev,
@@ -1092,13 +1149,15 @@ static int virtnet_set_channels(struct net_device *dev,
         if (queue_pairs > vi->max_queue_pairs)
                 return -EINVAL;
  
+       get_online_cpus();
         err = virtnet_set_queues(vi, queue_pairs);
         if (!err) {
                 netif_set_real_num_tx_queues(dev, queue_pairs);
                 netif_set_real_num_rx_queues(dev, queue_pairs);
  
-               virtnet_set_affinity(vi, true);
+               virtnet_set_affinity(vi);
         }
+       put_online_cpus();
  
         return err;
  }
@@ -1137,12 +1196,19 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
  
  /* To avoid contending a lock hold by a vcpu who would exit to host, select the
   * txq based on the processor id.
- * TODO: handle cpu hotplug.
   */
  static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
  {
-       int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
-                 smp_processor_id();
+       int txq;
+       struct virtnet_info *vi = netdev_priv(dev);
+
+       if (skb_rx_queue_recorded(skb)) {
+               txq = skb_get_rx_queue(skb);
+       } else {
+               txq = *__this_cpu_ptr(vi->vq_index);
+               if (txq == -1)
+                       txq = 0;
+       }
  
         while (unlikely(txq >= dev->real_num_tx_queues))
                 txq -= dev->real_num_tx_queues;
@@ -1258,7 +1324,7 @@ static void virtnet_del_vqs(struct virtnet_info *vi)
  {
         struct virtio_device *vdev = vi->vdev;
  
-       virtnet_set_affinity(vi, false);
+       virtnet_clean_affinity(vi, -1);
  
         vdev->config->del_vqs(vdev);
  
@@ -1381,7 +1447,10 @@ static int init_vqs(struct virtnet_info *vi)
         if (ret)
                 goto err_free;
  
-       virtnet_set_affinity(vi, true);
+       get_online_cpus();
+       virtnet_set_affinity(vi);
+       put_online_cpus();
+
         return 0;
  
  err_free:
@@ -1463,6 +1532,10 @@ static int virtnet_probe(struct virtio_device *vdev)
         if (vi->stats == NULL)
                 goto free;
  
+       vi->vq_index = alloc_percpu(int);
+       if (vi->vq_index == NULL)
+               goto free_stats;
+
         mutex_init(&vi->config_lock);
         vi->config_enable = true;
         INIT_WORK(&vi->config_work, virtnet_config_changed_work);
@@ -1486,7 +1559,7 @@ static int virtnet_probe(struct virtio_device *vdev)
         /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
         err = init_vqs(vi);
         if (err)
-               goto free_stats;
+               goto free_index;
  
         netif_set_real_num_tx_queues(dev, 1);
         netif_set_real_num_rx_queues(dev, 1);
@@ -1509,6 +1582,13 @@ static int virtnet_probe(struct virtio_device *vdev)
                 }
         }
  
+       vi->nb.notifier_call = &virtnet_cpu_callback;
+       err = register_hotcpu_notifier(&vi->nb);
+       if (err) {
+               pr_debug("virtio_net: registering cpu notifier failed\n");
+               goto free_recv_bufs;
+       }
+
         /* Assume link up if device can't report link status,
            otherwise get link status from config. */
         if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
@@ -1530,6 +1610,8 @@ free_recv_bufs:
  free_vqs:
         cancel_delayed_work_sync(&vi->refill);
         virtnet_del_vqs(vi);
+free_index:
+       free_percpu(vi->vq_index);
  free_stats:
         free_percpu(vi->stats);
  free:
@@ -1553,6 +1635,8 @@ static void virtnet_remove(struct virtio_device *vdev)
  {
         struct virtnet_info *vi = vdev->priv;
  
+       unregister_hotcpu_notifier(&vi->nb);
+
         /* Prevent config work handler from accessing the device. */
         mutex_lock(&vi->config_lock);
         vi->config_enable = false;
@@ -1564,6 +1648,7 @@ static void virtnet_remove(struct virtio_device *vdev)
  
         flush_work(&vi->config_work);
  
+       free_percpu(vi->vq_index);
         free_percpu(vi->stats);
         free_netdev(vi->dev);
  }
@@ -1638,6 +1723,7 @@ static unsigned int features[] = {
         VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
         VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
         VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ,
+       VIRTIO_NET_F_CTRL_MAC_ADDR,
  };
  
  static struct virtio_driver virtio_net_driver = {