unsigned int tp_reserve;
unsigned int tp_loss:1;
unsigned int tp_tstamp;
+ struct net_device __rcu *cached_dev;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
};
static void register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
+
if (!po->running) {
- if (po->fanout)
+ if (po->fanout) {
__fanout_link(sk, po);
- else
+ } else {
dev_add_pack(&po->prot_hook);
+ rcu_assign_pointer(po->cached_dev, po->prot_hook.dev);
+ }
+
sock_hold(sk);
po->running = 1;
}
struct packet_sock *po = pkt_sk(sk);
po->running = 0;
- if (po->fanout)
+ if (po->fanout) {
__fanout_unlink(sk, po);
- else
+ } else {
__dev_remove_pack(&po->prot_hook);
+ RCU_INIT_POINTER(po->cached_dev, NULL);
+ }
+
__sock_put(sk);
if (sync) {
pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
- spin_lock(&rb_queue->lock);
+ spin_lock_bh(&rb_queue->lock);
pkc->delete_blk_timer = 1;
- spin_unlock(&rb_queue->lock);
+ spin_unlock_bh(&rb_queue->lock);
prb_del_retire_blk_timer(pkc);
}
sk_refcnt_debug_dec(sk);
}
-static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
-{
- int x = atomic_read(&f->rr_cur) + 1;
-
- if (x >= num)
- x = 0;
-
- return x;
-}
-
static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
{
u32 idx, hash = skb->rxhash;
static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
{
- int cur, old;
+ unsigned int val = atomic_inc_return(&f->rr_cur);
- cur = atomic_read(&f->rr_cur);
- while ((old = atomic_cmpxchg(&f->rr_cur, cur,
- fanout_rr_next(f, num))) != cur)
- cur = old;
- return f->arr[cur];
+ return f->arr[val % num];
}
static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
struct packet_type *pt, struct net_device *orig_dev)
{
struct packet_fanout *f = pt->af_packet_priv;
- unsigned int num = f->num_members;
+ unsigned int num = ACCESS_ONCE(f->num_members);
struct packet_sock *po;
struct sock *sk;
bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
{
- if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
- return true;
+ if (sk->sk_family != PF_PACKET)
+ return false;
- return false;
+ return ptype->af_packet_priv == pkt_sk(sk)->fanout;
}
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
return -EINVAL;
}
+ mutex_lock(&fanout_mutex);
+
+ err = -EINVAL;
if (!po->running)
- return -EINVAL;
+ goto out;
+ err = -EALREADY;
if (po->fanout)
- return -EALREADY;
+ goto out;
- mutex_lock(&fanout_mutex);
match = NULL;
list_for_each_entry(f, &fanout_list, list) {
if (f->id == id &&
struct packet_sock *po = pkt_sk(sk);
struct packet_fanout *f;
+ mutex_lock(&fanout_mutex);
f = po->fanout;
- if (!f)
- return;
-
- po->fanout = NULL;
+ if (f) {
+ po->fanout = NULL;
- mutex_lock(&fanout_mutex);
- if (atomic_dec_and_test(&f->sk_ref)) {
- list_del(&f->list);
- dev_remove_pack(&f->prot_hook);
- kfree(f);
+ if (atomic_dec_and_test(&f->sk_ref)) {
+ list_del(&f->list);
+ dev_remove_pack(&f->prot_hook);
+ kfree(f);
+ }
}
mutex_unlock(&fanout_mutex);
}
return tp_len;
}
+static struct net_device *packet_cached_dev_get(struct packet_sock *po)
+{
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(po->cached_dev);
+ if (dev)
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ return dev;
+}
+
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- bool need_rls_dev = false;
int err, reserve = 0;
void *ph;
struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
err = -EBUSY;
if (saddr == NULL) {
- dev = po->prot_hook.dev;
+ dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
} else {
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
- need_rls_dev = true;
}
err = -ENXIO;
if (unlikely(dev == NULL))
goto out;
-
- reserve = dev->hard_header_len;
-
err = -ENETDOWN;
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
+ reserve = dev->hard_header_len;
+
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
__packet_set_status(po, ph, status);
kfree_skb(skb);
out_put:
- if (need_rls_dev)
- dev_put(dev);
+ dev_put(dev);
out:
mutex_unlock(&po->pg_vec_lock);
return err;
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- bool need_rls_dev = false;
unsigned char *addr;
int err, reserve = 0;
struct virtio_net_hdr vnet_hdr = { 0 };
*/
if (saddr == NULL) {
- dev = po->prot_hook.dev;
+ dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
} else {
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
- need_rls_dev = true;
}
err = -ENXIO;
- if (dev == NULL)
+ if (unlikely(dev == NULL))
goto out_unlock;
- if (sock->type == SOCK_RAW)
- reserve = dev->hard_header_len;
-
err = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
+ if (unlikely(!(dev->flags & IFF_UP)))
goto out_unlock;
+ if (sock->type == SOCK_RAW)
+ reserve = dev->hard_header_len;
if (po->has_vnet_hdr) {
vnet_hdr_len = sizeof(vnet_hdr);
if (err > 0 && (err = net_xmit_errno(err)) != 0)
goto out_unlock;
- if (need_rls_dev)
- dev_put(dev);
+ dev_put(dev);
return len;
out_free:
kfree_skb(skb);
out_unlock:
- if (dev && need_rls_dev)
+ if (dev)
dev_put(dev);
out:
return err;
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
po->num = proto;
+ RCU_INIT_POINTER(po->cached_dev, NULL);
sk->sk_destruct = packet_sock_destruct;
sk_refcnt_debug_inc(sk);
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
- struct sockaddr_ll *sll;
int vnet_hdr_len = 0;
err = -EINVAL;
goto out_free;
}
- /*
- * If the address length field is there to be filled in, we fill
- * it in now.
- */
-
- sll = &PACKET_SKB_CB(skb)->sa.ll;
- if (sock->type == SOCK_PACKET)
- msg->msg_namelen = sizeof(struct sockaddr_pkt);
- else
- msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
-
- /*
- * You lose any data beyond the buffer you gave. If it worries a
- * user program they can ask the device for its MTU anyway.
+ /* You lose any data beyond the buffer you gave. If it worries
+ * a user program they can ask the device for its MTU
+ * anyway.
*/
-
copied = skb->len;
if (copied > len) {
copied = len;
sock_recv_ts_and_drops(msg, sk, skb);
- if (msg->msg_name)
+ if (msg->msg_name) {
+ /* If the address length field is there to be filled
+ * in, we fill it in now.
+ */
+ if (sock->type == SOCK_PACKET) {
+ msg->msg_namelen = sizeof(struct sockaddr_pkt);
+ } else {
+ struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+ msg->msg_namelen = sll->sll_halen +
+ offsetof(struct sockaddr_ll, sll_addr);
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
msg->msg_namelen);
+ }
if (pkt_sk(sk)->auxdata) {
struct tpacket_auxdata aux;
if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
switch (val) {
case TPACKET_V1:
case TPACKET_V2:
case TPACKET_V3:
- po->tp_version = val;
- return 0;
+ break;
default:
return -EINVAL;
}
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_version = val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_RESERVE:
{
/* Added to avoid minimal code churn */
struct tpacket_req *req = &req_u->req;
+ lock_sock(sk);
/* Opening a Tx-ring is NOT supported in TPACKET_V3 */
if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) {
WARN(1, "Tx-ring is not supported.\n");
goto out;
}
- lock_sock(sk);
/* Detach socket from network */
spin_lock(&po->bind_lock);
if (!tx_ring)
prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
}
- release_sock(sk);
if (pg_vec)
free_pg_vec(pg_vec, order, req->tp_block_nr);
out:
+ release_sock(sk);
return err;
}