net: fix race on decreasing number of TX queues
[pandora-kernel.git] / net / core / link_watch.c
1 /*
2  * Linux network device link state notification
3  *
4  * Author:
5  *     Stefan Rompf <sux@loplof.de>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; either version
10  * 2 of the License, or (at your option) any later version.
11  *
12  */
13
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/if.h>
17 #include <net/sock.h>
18 #include <net/pkt_sched.h>
19 #include <linux/rtnetlink.h>
20 #include <linux/jiffies.h>
21 #include <linux/spinlock.h>
22 #include <linux/workqueue.h>
23 #include <linux/bitops.h>
24 #include <asm/types.h>
25
26
27 enum lw_bits {
28         LW_URGENT = 0,
29 };
30
31 static unsigned long linkwatch_flags;
32 static unsigned long linkwatch_nextevent;
33
34 static void linkwatch_event(struct work_struct *dummy);
35 static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event);
36
37 static LIST_HEAD(lweventlist);
38 static DEFINE_SPINLOCK(lweventlist_lock);
39
40 static unsigned char default_operstate(const struct net_device *dev)
41 {
42         if (!netif_carrier_ok(dev))
43                 return (dev->ifindex != dev->iflink ?
44                         IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN);
45
46         if (netif_dormant(dev))
47                 return IF_OPER_DORMANT;
48
49         return IF_OPER_UP;
50 }
51
52
53 static void rfc2863_policy(struct net_device *dev)
54 {
55         unsigned char operstate = default_operstate(dev);
56
57         if (operstate == dev->operstate)
58                 return;
59
60         write_lock_bh(&dev_base_lock);
61
62         switch(dev->link_mode) {
63         case IF_LINK_MODE_DORMANT:
64                 if (operstate == IF_OPER_UP)
65                         operstate = IF_OPER_DORMANT;
66                 break;
67
68         case IF_LINK_MODE_DEFAULT:
69         default:
70                 break;
71         }
72
73         dev->operstate = operstate;
74
75         write_unlock_bh(&dev_base_lock);
76 }
77
78
79 static bool linkwatch_urgent_event(struct net_device *dev)
80 {
81         if (!netif_running(dev))
82                 return false;
83
84         if (dev->ifindex != dev->iflink)
85                 return true;
86
87         return netif_carrier_ok(dev) && qdisc_tx_changing(dev);
88 }
89
90
91 static void linkwatch_add_event(struct net_device *dev)
92 {
93         unsigned long flags;
94
95         spin_lock_irqsave(&lweventlist_lock, flags);
96         if (list_empty(&dev->link_watch_list)) {
97                 list_add_tail(&dev->link_watch_list, &lweventlist);
98                 dev_hold(dev);
99         }
100         spin_unlock_irqrestore(&lweventlist_lock, flags);
101 }
102
103
104 static void linkwatch_schedule_work(int urgent)
105 {
106         unsigned long delay = linkwatch_nextevent - jiffies;
107
108         if (test_bit(LW_URGENT, &linkwatch_flags))
109                 return;
110
111         /* Minimise down-time: drop delay for up event. */
112         if (urgent) {
113                 if (test_and_set_bit(LW_URGENT, &linkwatch_flags))
114                         return;
115                 delay = 0;
116         }
117
118         /* If we wrap around we'll delay it by at most HZ. */
119         if (delay > HZ)
120                 delay = 0;
121
122         /*
123          * This is true if we've scheduled it immeditately or if we don't
124          * need an immediate execution and it's already pending.
125          */
126         if (schedule_delayed_work(&linkwatch_work, delay) == !delay)
127                 return;
128
129         /* Don't bother if there is nothing urgent. */
130         if (!test_bit(LW_URGENT, &linkwatch_flags))
131                 return;
132
133         /* It's already running which is good enough. */
134         if (!__cancel_delayed_work(&linkwatch_work))
135                 return;
136
137         /* Otherwise we reschedule it again for immediate execution. */
138         schedule_delayed_work(&linkwatch_work, 0);
139 }
140
141
142 static void linkwatch_do_dev(struct net_device *dev)
143 {
144         /*
145          * Make sure the above read is complete since it can be
146          * rewritten as soon as we clear the bit below.
147          */
148         smp_mb__before_clear_bit();
149
150         /* We are about to handle this device,
151          * so new events can be accepted
152          */
153         clear_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state);
154
155         rfc2863_policy(dev);
156         if (dev->flags & IFF_UP) {
157                 if (netif_carrier_ok(dev))
158                         dev_activate(dev);
159                 else
160                         dev_deactivate(dev);
161
162                 netdev_state_change(dev);
163         }
164         dev_put(dev);
165 }
166
167 static void __linkwatch_run_queue(int urgent_only)
168 {
169         struct net_device *dev;
170         LIST_HEAD(wrk);
171
172         /*
173          * Limit the number of linkwatch events to one
174          * per second so that a runaway driver does not
175          * cause a storm of messages on the netlink
176          * socket.  This limit does not apply to up events
177          * while the device qdisc is down.
178          */
179         if (!urgent_only)
180                 linkwatch_nextevent = jiffies + HZ;
181         /* Limit wrap-around effect on delay. */
182         else if (time_after(linkwatch_nextevent, jiffies + HZ))
183                 linkwatch_nextevent = jiffies;
184
185         clear_bit(LW_URGENT, &linkwatch_flags);
186
187         spin_lock_irq(&lweventlist_lock);
188         list_splice_init(&lweventlist, &wrk);
189
190         while (!list_empty(&wrk)) {
191
192                 dev = list_first_entry(&wrk, struct net_device, link_watch_list);
193                 list_del_init(&dev->link_watch_list);
194
195                 if (urgent_only && !linkwatch_urgent_event(dev)) {
196                         list_add_tail(&dev->link_watch_list, &lweventlist);
197                         continue;
198                 }
199                 spin_unlock_irq(&lweventlist_lock);
200                 linkwatch_do_dev(dev);
201                 spin_lock_irq(&lweventlist_lock);
202         }
203
204         if (!list_empty(&lweventlist))
205                 linkwatch_schedule_work(0);
206         spin_unlock_irq(&lweventlist_lock);
207 }
208
209 void linkwatch_forget_dev(struct net_device *dev)
210 {
211         unsigned long flags;
212         int clean = 0;
213
214         spin_lock_irqsave(&lweventlist_lock, flags);
215         if (!list_empty(&dev->link_watch_list)) {
216                 list_del_init(&dev->link_watch_list);
217                 clean = 1;
218         }
219         spin_unlock_irqrestore(&lweventlist_lock, flags);
220         if (clean)
221                 linkwatch_do_dev(dev);
222 }
223
224
225 /* Must be called with the rtnl semaphore held */
226 void linkwatch_run_queue(void)
227 {
228         __linkwatch_run_queue(0);
229 }
230
231
232 static void linkwatch_event(struct work_struct *dummy)
233 {
234         rtnl_lock();
235         __linkwatch_run_queue(time_after(linkwatch_nextevent, jiffies));
236         rtnl_unlock();
237 }
238
239
240 void linkwatch_fire_event(struct net_device *dev)
241 {
242         bool urgent = linkwatch_urgent_event(dev);
243
244         if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) {
245                 linkwatch_add_event(dev);
246         } else if (!urgent)
247                 return;
248
249         linkwatch_schedule_work(urgent);
250 }
251 EXPORT_SYMBOL(linkwatch_fire_event);