unsigned int tp_reserve;
unsigned int tp_loss:1;
unsigned int tp_tstamp;
+ struct net_device __rcu *cached_dev;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
};
static void register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
+
if (!po->running) {
- if (po->fanout)
+ if (po->fanout) {
__fanout_link(sk, po);
- else
+ } else {
dev_add_pack(&po->prot_hook);
+ rcu_assign_pointer(po->cached_dev, po->prot_hook.dev);
+ }
+
sock_hold(sk);
po->running = 1;
}
struct packet_sock *po = pkt_sk(sk);
po->running = 0;
- if (po->fanout)
+ if (po->fanout) {
__fanout_unlink(sk, po);
- else
+ } else {
__dev_remove_pack(&po->prot_hook);
+ RCU_INIT_POINTER(po->cached_dev, NULL);
+ }
+
__sock_put(sk);
if (sync) {
pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
- spin_lock(&rb_queue->lock);
+ spin_lock_bh(&rb_queue->lock);
pkc->delete_blk_timer = 1;
- spin_unlock(&rb_queue->lock);
+ spin_unlock_bh(&rb_queue->lock);
prb_del_retire_blk_timer(pkc);
}
smp_rmb();
- if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) {
-
- /* We could have just memset this but we will lose the
- * flexibility of making the priv area sticky
- */
- BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
- BLOCK_NUM_PKTS(pbd1) = 0;
- BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
- getnstimeofday(&ts);
- h1->ts_first_pkt.ts_sec = ts.tv_sec;
- h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
- pkc1->pkblk_start = (char *)pbd1;
- pkc1->nxt_offset = (char *)(pkc1->pkblk_start +
- BLK_PLUS_PRIV(pkc1->blk_sizeof_priv));
- BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
- BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
- pbd1->version = pkc1->version;
- pkc1->prev = pkc1->nxt_offset;
- pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
- prb_thaw_queue(pkc1);
- _prb_refresh_rx_retire_blk_timer(pkc1);
-
- smp_wmb();
-
- return;
- }
+ /* We could have just memset this but we will lose the
+ * flexibility of making the priv area sticky
+ */
+ BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
+ BLOCK_NUM_PKTS(pbd1) = 0;
+ BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+ getnstimeofday(&ts);
+ h1->ts_first_pkt.ts_sec = ts.tv_sec;
+ h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
+ pkc1->pkblk_start = (char *)pbd1;
+ pkc1->nxt_offset = (char *)(pkc1->pkblk_start +
+ BLK_PLUS_PRIV(pkc1->blk_sizeof_priv));
+ BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
+ BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
+ pbd1->version = pkc1->version;
+ pkc1->prev = pkc1->nxt_offset;
+ pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
+ prb_thaw_queue(pkc1);
+ _prb_refresh_rx_retire_blk_timer(pkc1);
- WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n",
- pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num);
- dump_stack();
- BUG();
+ smp_wmb();
}
/*
prb_close_block(pkc, pbd, po, status);
return;
}
-
- WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd);
- dump_stack();
- BUG();
}
static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc,
sk_refcnt_debug_dec(sk);
}
-static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
-{
- int x = atomic_read(&f->rr_cur) + 1;
-
- if (x >= num)
- x = 0;
-
- return x;
-}
-
static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
{
u32 idx, hash = skb->rxhash;
static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
{
- int cur, old;
+ unsigned int val = atomic_inc_return(&f->rr_cur);
- cur = atomic_read(&f->rr_cur);
- while ((old = atomic_cmpxchg(&f->rr_cur, cur,
- fanout_rr_next(f, num))) != cur)
- cur = old;
- return f->arr[cur];
+ return f->arr[val % num];
}
static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num)
struct packet_type *pt, struct net_device *orig_dev)
{
struct packet_fanout *f = pt->af_packet_priv;
- unsigned int num = f->num_members;
+ unsigned int num = ACCESS_ONCE(f->num_members);
struct packet_sock *po;
struct sock *sk;
spin_unlock(&f->lock);
}
+bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
+{
+ if (sk->sk_family != PF_PACKET)
+ return false;
+
+ return ptype->af_packet_priv == pkt_sk(sk)->fanout;
+}
+
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
struct packet_sock *po = pkt_sk(sk);
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
match->prot_hook.af_packet_priv = match;
+ match->prot_hook.id_match = match_fanout_group;
dev_add_pack(&match->prot_hook);
list_add(&match->list, &fanout_list);
}
if (likely(po->tx_ring.pg_vec)) {
ph = skb_shinfo(skb)->destructor_arg;
- BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
atomic_dec(&po->tx_ring.pending);
__packet_set_status(po, ph, TP_STATUS_AVAILABLE);
return tp_len;
}
+static struct net_device *packet_cached_dev_get(struct packet_sock *po)
+{
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(po->cached_dev);
+ if (dev)
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ return dev;
+}
+
static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
{
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- bool need_rls_dev = false;
int err, reserve = 0;
void *ph;
struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
err = -EBUSY;
if (saddr == NULL) {
- dev = po->prot_hook.dev;
+ dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
} else {
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
- need_rls_dev = true;
}
err = -ENXIO;
if (unlikely(dev == NULL))
goto out;
-
- reserve = dev->hard_header_len;
-
err = -ENETDOWN;
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
+ reserve = dev->hard_header_len;
+
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
__packet_set_status(po, ph, status);
kfree_skb(skb);
out_put:
- if (need_rls_dev)
- dev_put(dev);
+ dev_put(dev);
out:
mutex_unlock(&po->pg_vec_lock);
return err;
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- bool need_rls_dev = false;
unsigned char *addr;
int err, reserve = 0;
struct virtio_net_hdr vnet_hdr = { 0 };
*/
if (saddr == NULL) {
- dev = po->prot_hook.dev;
+ dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
} else {
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
- need_rls_dev = true;
}
err = -ENXIO;
- if (dev == NULL)
+ if (unlikely(dev == NULL))
goto out_unlock;
- if (sock->type == SOCK_RAW)
- reserve = dev->hard_header_len;
-
err = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
+ if (unlikely(!(dev->flags & IFF_UP)))
goto out_unlock;
+ if (sock->type == SOCK_RAW)
+ reserve = dev->hard_header_len;
if (po->has_vnet_hdr) {
vnet_hdr_len = sizeof(vnet_hdr);
if (err > 0 && (err = net_xmit_errno(err)) != 0)
goto out_unlock;
- if (need_rls_dev)
- dev_put(dev);
+ dev_put(dev);
return len;
out_free:
kfree_skb(skb);
out_unlock:
- if (dev && need_rls_dev)
+ if (dev)
dev_put(dev);
out:
return err;
packet_flush_mclist(sk);
- memset(&req_u, 0, sizeof(req_u));
-
- if (po->rx_ring.pg_vec)
+ if (po->rx_ring.pg_vec) {
+ memset(&req_u, 0, sizeof(req_u));
packet_set_ring(sk, &req_u, 1, 0);
+ }
- if (po->tx_ring.pg_vec)
+ if (po->tx_ring.pg_vec) {
+ memset(&req_u, 0, sizeof(req_u));
packet_set_ring(sk, &req_u, 1, 1);
+ }
fanout_release(sk);
po = pkt_sk(sk);
sk->sk_family = PF_PACKET;
po->num = proto;
+ RCU_INIT_POINTER(po->cached_dev, NULL);
sk->sk_destruct = packet_sock_destruct;
sk_refcnt_debug_inc(sk);
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
- struct sockaddr_ll *sll;
int vnet_hdr_len = 0;
err = -EINVAL;
goto out_free;
}
- /*
- * If the address length field is there to be filled in, we fill
- * it in now.
+ /* You lose any data beyond the buffer you gave. If it worries
+ * a user program they can ask the device for its MTU
+ * anyway.
*/
-
- sll = &PACKET_SKB_CB(skb)->sa.ll;
- if (sock->type == SOCK_PACKET)
- msg->msg_namelen = sizeof(struct sockaddr_pkt);
- else
- msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
-
- /*
- * You lose any data beyond the buffer you gave. If it worries a
- * user program they can ask the device for its MTU anyway.
- */
-
copied = skb->len;
if (copied > len) {
copied = len;
sock_recv_ts_and_drops(msg, sk, skb);
- if (msg->msg_name)
+ if (msg->msg_name) {
+ /* If the address length field is there to be filled
+ * in, we fill it in now.
+ */
+ if (sock->type == SOCK_PACKET) {
+ msg->msg_namelen = sizeof(struct sockaddr_pkt);
+ } else {
+ struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+ msg->msg_namelen = sll->sll_halen +
+ offsetof(struct sockaddr_ll, sll_addr);
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
msg->msg_namelen);
+ }
if (pkt_sk(sk)->auxdata) {
struct tpacket_auxdata aux;
return -EOPNOTSUPP;
uaddr->sa_family = AF_PACKET;
+ memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
rcu_read_lock();
dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
if (dev)
- strncpy(uaddr->sa_data, dev->name, 14);
- else
- memset(uaddr->sa_data, 0, 14);
+ strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
rcu_read_unlock();
*uaddr_len = sizeof(*uaddr);