X-Git-Url: https://git.openpandora.org/cgi-bin/gitweb.cgi?p=pandora-kernel.git;a=blobdiff_plain;f=net%2Fpacket%2Faf_packet.c;h=92d57aac9e955839b7394b11ba67fd6918c94446;hp=d9d4970b9b07c0da22f97e29a8ac0f38ec4caff1;hb=1634172286550a62d8a0a98cf8bec5cd975fa09c;hpb=c7f46b7aa4ae5cbef32eb5e016512a14f936affa diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d9d4970b9b07..92d57aac9e95 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -195,6 +195,7 @@ struct tpacket_kbdq_core { char *pkblk_start; char *pkblk_end; int kblk_size; + unsigned int max_frame_len; unsigned int knum_blocks; uint64_t knxt_seq_num; char *prev; @@ -295,6 +296,7 @@ struct packet_sock { unsigned int tp_reserve; unsigned int tp_loss:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; @@ -350,11 +352,15 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { - if (po->fanout) + if (po->fanout) { __fanout_link(sk, po); - else + } else { dev_add_pack(&po->prot_hook); + rcu_assign_pointer(po->cached_dev, po->prot_hook.dev); + } + sock_hold(sk); po->running = 1; } @@ -372,10 +378,13 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; - if (po->fanout) + if (po->fanout) { __fanout_unlink(sk, po); - else + } else { __dev_remove_pack(&po->prot_hook); + RCU_INIT_POINTER(po->cached_dev, NULL); + } + __sock_put(sk); if (sync) { @@ -497,9 +506,9 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } @@ -608,6 +617,7 @@ static void init_prb_bdqc(struct packet_sock *po, p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; + p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); prb_init_ft_ops(p1, req_u); prb_setup_retire_blk_timer(po, tx_ring); prb_open_block(p1, pbd); @@ -813,37 +823,27 @@ static void prb_open_block(struct tpacket_kbdq_core *pkc1, smp_rmb(); - if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd1))) { - - /* We could have just memset this but we will lose the - * flexibility of making the priv area sticky - */ - BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; - BLOCK_NUM_PKTS(pbd1) = 0; - BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - getnstimeofday(&ts); - h1->ts_first_pkt.ts_sec = ts.tv_sec; - h1->ts_first_pkt.ts_nsec = ts.tv_nsec; - pkc1->pkblk_start = (char *)pbd1; - pkc1->nxt_offset = (char *)(pkc1->pkblk_start + - BLK_PLUS_PRIV(pkc1->blk_sizeof_priv)); - BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); - BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; - pbd1->version = pkc1->version; - pkc1->prev = pkc1->nxt_offset; - pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; - prb_thaw_queue(pkc1); - _prb_refresh_rx_retire_blk_timer(pkc1); - - smp_wmb(); - - return; - } + /* We could have just memset this but we will lose the + * flexibility of making the priv area sticky + */ + BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; + BLOCK_NUM_PKTS(pbd1) = 0; + BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + getnstimeofday(&ts); + h1->ts_first_pkt.ts_sec = ts.tv_sec; + h1->ts_first_pkt.ts_nsec = ts.tv_nsec; + pkc1->pkblk_start = (char *)pbd1; + pkc1->nxt_offset = (char *)(pkc1->pkblk_start + + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv)); + BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); + BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; + pbd1->version = pkc1->version; + pkc1->prev = pkc1->nxt_offset; + pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; + prb_thaw_queue(pkc1); + _prb_refresh_rx_retire_blk_timer(pkc1); - WARN(1, "ERROR block:%p is NOT FREE status:%d kactive_blk_num:%d\n", - pbd1, BLOCK_STATUS(pbd1), pkc1->kactive_blk_num); - dump_stack(); - BUG(); + smp_wmb(); } /* @@ -934,10 +934,6 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *pkc, prb_close_block(pkc, pbd, po, status); return; } - - WARN(1, "ERROR-pbd[%d]:%p\n", pkc->kactive_blk_num, pbd); - dump_stack(); - BUG(); } static int prb_curr_blk_in_use(struct tpacket_kbdq_core *pkc, @@ -1176,16 +1172,6 @@ static void packet_sock_destruct(struct sock *sk) sk_refcnt_debug_dec(sk); } -static int fanout_rr_next(struct packet_fanout *f, unsigned int num) -{ - int x = atomic_read(&f->rr_cur) + 1; - - if (x >= num) - x = 0; - - return x; -} - static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { u32 idx, hash = skb->rxhash; @@ -1197,13 +1183,9 @@ static struct sock *fanout_demux_hash(struct packet_fanout *f, struct sk_buff *s static struct sock *fanout_demux_lb(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) { - int cur, old; + unsigned int val = atomic_inc_return(&f->rr_cur); - cur = atomic_read(&f->rr_cur); - while ((old = atomic_cmpxchg(&f->rr_cur, cur, - fanout_rr_next(f, num))) != cur) - cur = old; - return f->arr[cur]; + return f->arr[val % num]; } static struct sock *fanout_demux_cpu(struct packet_fanout *f, struct sk_buff *skb, unsigned int num) @@ -1217,7 +1199,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct packet_fanout *f = pt->af_packet_priv; - unsigned int num = f->num_members; + unsigned int num = ACCESS_ONCE(f->num_members); struct packet_sock *po; struct sock *sk; @@ -1262,6 +1244,8 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po) f->arr[f->num_members] = sk; smp_wmb(); f->num_members++; + if (f->num_members == 1) + dev_add_pack(&f->prot_hook); spin_unlock(&f->lock); } @@ -1278,9 +1262,19 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po) BUG_ON(i >= f->num_members); f->arr[i] = f->arr[f->num_members - 1]; f->num_members--; + if (f->num_members == 0) + __dev_remove_pack(&f->prot_hook); spin_unlock(&f->lock); } +bool match_fanout_group(struct packet_type *ptype, struct sock * sk) +{ + if (sk->sk_family != PF_PACKET) + return false; + + return ptype->af_packet_priv == pkt_sk(sk)->fanout; +} + static int fanout_add(struct sock *sk, u16 id, u16 type_flags) { struct packet_sock *po = pkt_sk(sk); @@ -1298,13 +1292,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) return -EINVAL; } + mutex_lock(&fanout_mutex); + + err = -EINVAL; if (!po->running) - return -EINVAL; + goto out; + err = -EALREADY; if (po->fanout) - return -EALREADY; + goto out; - mutex_lock(&fanout_mutex); match = NULL; list_for_each_entry(f, &fanout_list, list) { if (f->id == id && @@ -1333,7 +1330,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; - dev_add_pack(&match->prot_hook); + match->prot_hook.id_match = match_fanout_group; list_add(&match->list, &fanout_list); } err = -EINVAL; @@ -1354,24 +1351,29 @@ out: return err; } -static void fanout_release(struct sock *sk) +/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes + * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. + * It is the responsibility of the caller to call fanout_release_data() and + * free the returned packet_fanout (after synchronize_net()) + */ +static struct packet_fanout *fanout_release(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); struct packet_fanout *f; + mutex_lock(&fanout_mutex); f = po->fanout; - if (!f) - return; - - po->fanout = NULL; + if (f) { + po->fanout = NULL; - mutex_lock(&fanout_mutex); - if (atomic_dec_and_test(&f->sk_ref)) { - list_del(&f->list); - dev_remove_pack(&f->prot_hook); - kfree(f); + if (atomic_dec_and_test(&f->sk_ref)) + list_del(&f->list); + else + f = NULL; } mutex_unlock(&fanout_mutex); + + return f; } static const struct proto_ops packet_ops; @@ -1775,6 +1777,18 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if ((int)snaplen < 0) snaplen = 0; } + } else if (unlikely(macoff + snaplen > + GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { + u32 nval; + + nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff; + pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n", + snaplen, nval, macoff); + snaplen = nval; + if (unlikely((int)snaplen < 0)) { + snaplen = 0; + macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; + } } spin_lock(&sk->sk_receive_queue.lock); h.raw = packet_current_rx_frame(po, skb, @@ -1931,7 +1945,6 @@ static void tpacket_destruct_skb(struct sk_buff *skb) if (likely(po->tx_ring.pg_vec)) { ph = skb_shinfo(skb)->destructor_arg; - BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING); BUG_ON(atomic_read(&po->tx_ring.pending) == 0); atomic_dec(&po->tx_ring.pending); __packet_set_status(po, ph, TP_STATUS_AVAILABLE); @@ -2038,12 +2051,24 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, return tp_len; } +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (dev) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) { struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -2056,7 +2081,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) err = -EBUSY; if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2070,19 +2095,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2158,8 +2181,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2197,7 +2219,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2211,7 +2232,7 @@ static int packet_snd(struct socket *sock, */ if (saddr == NULL) { - dev = po->prot_hook.dev; + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2223,19 +2244,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; - if (sock->type == SOCK_RAW) - reserve = dev->hard_header_len; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) + if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2356,15 +2375,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2390,6 +2408,7 @@ static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; struct packet_sock *po; + struct packet_fanout *f; struct net *net; union tpacket_req_u req_u; @@ -2414,17 +2433,22 @@ static int packet_release(struct socket *sock) packet_flush_mclist(sk); - memset(&req_u, 0, sizeof(req_u)); - - if (po->rx_ring.pg_vec) + if (po->rx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 0); + } - if (po->tx_ring.pg_vec) + if (po->tx_ring.pg_vec) { + memset(&req_u, 0, sizeof(req_u)); packet_set_ring(sk, &req_u, 1, 1); + } - fanout_release(sk); + f = fanout_release(sk); synchronize_net(); + + kfree(f); + /* * Now the socket is dead. No more input will appear. */ @@ -2579,6 +2603,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po = pkt_sk(sk); sk->sk_family = PF_PACKET; po->num = proto; + RCU_INIT_POINTER(po->cached_dev, NULL); sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -2667,7 +2692,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; - struct sockaddr_ll *sll; int vnet_hdr_len = 0; err = -EINVAL; @@ -2750,22 +2774,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_free; } - /* - * If the address length field is there to be filled in, we fill - * it in now. + /* You lose any data beyond the buffer you gave. If it worries + * a user program they can ask the device for its MTU + * anyway. */ - - sll = &PACKET_SKB_CB(skb)->sa.ll; - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); - - /* - * You lose any data beyond the buffer you gave. If it worries a - * user program they can ask the device for its MTU anyway. - */ - copied = skb->len; if (copied > len) { copied = len; @@ -2778,9 +2790,20 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); - if (msg->msg_name) + if (msg->msg_name) { + /* If the address length field is there to be filled + * in, we fill it in now. + */ + if (sock->type == SOCK_PACKET) { + msg->msg_namelen = sizeof(struct sockaddr_pkt); + } else { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + + offsetof(struct sockaddr_ll, sll_addr); + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, msg->msg_namelen); + } if (pkt_sk(sk)->auxdata) { struct tpacket_auxdata aux; @@ -2824,12 +2847,11 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, return -EOPNOTSUPP; uaddr->sa_family = AF_PACKET; + memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); if (dev) - strncpy(uaddr->sa_data, dev->name, 14); - else - memset(uaddr->sa_data, 0, 14); + strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); *uaddr_len = sizeof(*uaddr); @@ -3084,19 +3106,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; switch (val) { case TPACKET_V1: case TPACKET_V2: case TPACKET_V3: - po->tp_version = val; - return 0; + break; default: return -EINVAL; } + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_version = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_RESERVE: { @@ -3567,6 +3595,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, /* Added to avoid minimal code churn */ struct tpacket_req *req = &req_u->req; + lock_sock(sk); /* Opening a Tx-ring is NOT supported in TPACKET_V3 */ if (!closing && tx_ring && (po->tp_version > TPACKET_V2)) { WARN(1, "Tx-ring is not supported.\n"); @@ -3607,6 +3636,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) goto out; + if (po->tp_version >= TPACKET_V3 && + req->tp_block_size <= + BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv)) + goto out; if (unlikely(req->tp_frame_size < po->tp_hdrlen + po->tp_reserve)) goto out; @@ -3616,6 +3649,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, rb->frames_per_block = req->tp_block_size/req->tp_frame_size; if (unlikely(rb->frames_per_block <= 0)) goto out; + if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) + goto out; if (unlikely((rb->frames_per_block * req->tp_block_nr) != req->tp_frame_nr)) goto out; @@ -3644,7 +3679,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, goto out; } - lock_sock(sk); /* Detach socket from network */ spin_lock(&po->bind_lock); @@ -3693,11 +3727,11 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, if (!tx_ring) prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue); } - release_sock(sk); if (pg_vec) free_pg_vec(pg_vec, order, req->tp_block_nr); out: + release_sock(sk); return err; }