net: Kill link between CSUM and SG features.
[pandora-kernel.git] / net / ipv4 / tcp_probe.c
1 /*
2  * tcpprobe - Observe the TCP flow with kprobes.
3  *
4  * The idea for this came from Werner Almesberger's umlsim
5  * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19  */
20
21 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
22
23 #include <linux/kernel.h>
24 #include <linux/kprobes.h>
25 #include <linux/socket.h>
26 #include <linux/tcp.h>
27 #include <linux/slab.h>
28 #include <linux/proc_fs.h>
29 #include <linux/module.h>
30 #include <linux/ktime.h>
31 #include <linux/time.h>
32 #include <net/net_namespace.h>
33
34 #include <net/tcp.h>
35
36 MODULE_AUTHOR("Stephen Hemminger <shemminger@linux-foundation.org>");
37 MODULE_DESCRIPTION("TCP cwnd snooper");
38 MODULE_LICENSE("GPL");
39 MODULE_VERSION("1.1");
40
41 static int port __read_mostly = 0;
42 MODULE_PARM_DESC(port, "Port to match (0=all)");
43 module_param(port, int, 0);
44
45 static unsigned int bufsize __read_mostly = 4096;
46 MODULE_PARM_DESC(bufsize, "Log buffer size in packets (4096)");
47 module_param(bufsize, uint, 0);
48
49 static int full __read_mostly;
50 MODULE_PARM_DESC(full, "Full log (1=every ack packet received,  0=only cwnd changes)");
51 module_param(full, int, 0);
52
53 static const char procname[] = "tcpprobe";
54
55 struct tcp_log {
56         ktime_t tstamp;
57         __be32  saddr, daddr;
58         __be16  sport, dport;
59         u16     length;
60         u32     snd_nxt;
61         u32     snd_una;
62         u32     snd_wnd;
63         u32     snd_cwnd;
64         u32     ssthresh;
65         u32     srtt;
66 };
67
68 static struct {
69         spinlock_t      lock;
70         wait_queue_head_t wait;
71         ktime_t         start;
72         u32             lastcwnd;
73
74         unsigned long   head, tail;
75         struct tcp_log  *log;
76 } tcp_probe;
77
78
79 static inline int tcp_probe_used(void)
80 {
81         return (tcp_probe.head - tcp_probe.tail) & (bufsize - 1);
82 }
83
84 static inline int tcp_probe_avail(void)
85 {
86         return bufsize - tcp_probe_used() - 1;
87 }
88
89 /*
90  * Hook inserted to be called before each receive packet.
91  * Note: arguments must match tcp_rcv_established()!
92  */
93 static int jtcp_rcv_established(struct sock *sk, struct sk_buff *skb,
94                                struct tcphdr *th, unsigned int len)
95 {
96         const struct tcp_sock *tp = tcp_sk(sk);
97         const struct inet_sock *inet = inet_sk(sk);
98
99         /* Only update if port matches */
100         if ((port == 0 || ntohs(inet->inet_dport) == port ||
101              ntohs(inet->inet_sport) == port) &&
102             (full || tp->snd_cwnd != tcp_probe.lastcwnd)) {
103
104                 spin_lock(&tcp_probe.lock);
105                 /* If log fills, just silently drop */
106                 if (tcp_probe_avail() > 1) {
107                         struct tcp_log *p = tcp_probe.log + tcp_probe.head;
108
109                         p->tstamp = ktime_get();
110                         p->saddr = inet->inet_saddr;
111                         p->sport = inet->inet_sport;
112                         p->daddr = inet->inet_daddr;
113                         p->dport = inet->inet_dport;
114                         p->length = skb->len;
115                         p->snd_nxt = tp->snd_nxt;
116                         p->snd_una = tp->snd_una;
117                         p->snd_cwnd = tp->snd_cwnd;
118                         p->snd_wnd = tp->snd_wnd;
119                         p->ssthresh = tcp_current_ssthresh(sk);
120                         p->srtt = tp->srtt >> 3;
121
122                         tcp_probe.head = (tcp_probe.head + 1) & (bufsize - 1);
123                 }
124                 tcp_probe.lastcwnd = tp->snd_cwnd;
125                 spin_unlock(&tcp_probe.lock);
126
127                 wake_up(&tcp_probe.wait);
128         }
129
130         jprobe_return();
131         return 0;
132 }
133
134 static struct jprobe tcp_jprobe = {
135         .kp = {
136                 .symbol_name    = "tcp_rcv_established",
137         },
138         .entry  = jtcp_rcv_established,
139 };
140
141 static int tcpprobe_open(struct inode *inode, struct file *file)
142 {
143         /* Reset (empty) log */
144         spin_lock_bh(&tcp_probe.lock);
145         tcp_probe.head = tcp_probe.tail = 0;
146         tcp_probe.start = ktime_get();
147         spin_unlock_bh(&tcp_probe.lock);
148
149         return 0;
150 }
151
152 static int tcpprobe_sprint(char *tbuf, int n)
153 {
154         const struct tcp_log *p
155                 = tcp_probe.log + tcp_probe.tail;
156         struct timespec tv
157                 = ktime_to_timespec(ktime_sub(p->tstamp, tcp_probe.start));
158
159         return scnprintf(tbuf, n,
160                         "%lu.%09lu %pI4:%u %pI4:%u %d %#x %#x %u %u %u %u\n",
161                         (unsigned long) tv.tv_sec,
162                         (unsigned long) tv.tv_nsec,
163                         &p->saddr, ntohs(p->sport),
164                         &p->daddr, ntohs(p->dport),
165                         p->length, p->snd_nxt, p->snd_una,
166                         p->snd_cwnd, p->ssthresh, p->snd_wnd, p->srtt);
167 }
168
169 static ssize_t tcpprobe_read(struct file *file, char __user *buf,
170                              size_t len, loff_t *ppos)
171 {
172         int error = 0;
173         size_t cnt = 0;
174
175         if (!buf)
176                 return -EINVAL;
177
178         while (cnt < len) {
179                 char tbuf[164];
180                 int width;
181
182                 /* Wait for data in buffer */
183                 error = wait_event_interruptible(tcp_probe.wait,
184                                                  tcp_probe_used() > 0);
185                 if (error)
186                         break;
187
188                 spin_lock_bh(&tcp_probe.lock);
189                 if (tcp_probe.head == tcp_probe.tail) {
190                         /* multiple readers race? */
191                         spin_unlock_bh(&tcp_probe.lock);
192                         continue;
193                 }
194
195                 width = tcpprobe_sprint(tbuf, sizeof(tbuf));
196
197                 if (cnt + width < len)
198                         tcp_probe.tail = (tcp_probe.tail + 1) & (bufsize - 1);
199
200                 spin_unlock_bh(&tcp_probe.lock);
201
202                 /* if record greater than space available
203                    return partial buffer (so far) */
204                 if (cnt + width >= len)
205                         break;
206
207                 if (copy_to_user(buf + cnt, tbuf, width))
208                         return -EFAULT;
209                 cnt += width;
210         }
211
212         return cnt == 0 ? error : cnt;
213 }
214
215 static const struct file_operations tcpprobe_fops = {
216         .owner   = THIS_MODULE,
217         .open    = tcpprobe_open,
218         .read    = tcpprobe_read,
219         .llseek  = noop_llseek,
220 };
221
222 static __init int tcpprobe_init(void)
223 {
224         int ret = -ENOMEM;
225
226         init_waitqueue_head(&tcp_probe.wait);
227         spin_lock_init(&tcp_probe.lock);
228
229         if (bufsize == 0)
230                 return -EINVAL;
231
232         bufsize = roundup_pow_of_two(bufsize);
233         tcp_probe.log = kcalloc(bufsize, sizeof(struct tcp_log), GFP_KERNEL);
234         if (!tcp_probe.log)
235                 goto err0;
236
237         if (!proc_create(procname, S_IRUSR, init_net.proc_net, &tcpprobe_fops))
238                 goto err0;
239
240         ret = register_jprobe(&tcp_jprobe);
241         if (ret)
242                 goto err1;
243
244         pr_info("probe registered (port=%d) bufsize=%u\n", port, bufsize);
245         return 0;
246  err1:
247         remove_proc_entry(procname, init_net.proc_net);
248  err0:
249         kfree(tcp_probe.log);
250         return ret;
251 }
252 module_init(tcpprobe_init);
253
254 static __exit void tcpprobe_exit(void)
255 {
256         remove_proc_entry(procname, init_net.proc_net);
257         unregister_jprobe(&tcp_jprobe);
258         kfree(tcp_probe.log);
259 }
260 module_exit(tcpprobe_exit);