25481ea6747478de7cfcc19e67e49e2fe9fd9123
[pandora-kernel.git] / drivers / net / iseries_veth.c
1 /* File veth.c created by Kyle A. Lucke on Mon Aug  7 2000. */
2 /*
3  * IBM eServer iSeries Virtual Ethernet Device Driver
4  * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp.
5  * Substantially cleaned up by:
6  * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation.
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU General Public License as
10  * published by the Free Software Foundation; either version 2 of the
11  * License, or (at your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21  * USA
22  *
23  *
24  * This module implements the virtual ethernet device for iSeries LPAR
25  * Linux.  It uses hypervisor message passing to implement an
26  * ethernet-like network device communicating between partitions on
27  * the iSeries.
28  *
29  * The iSeries LPAR hypervisor currently allows for up to 16 different
30  * virtual ethernets.  These are all dynamically configurable on
31  * OS/400 partitions, but dynamic configuration is not supported under
32  * Linux yet.  An ethXX network device will be created for each
33  * virtual ethernet this partition is connected to.
34  *
35  * - This driver is responsible for routing packets to and from other
36  *   partitions.  The MAC addresses used by the virtual ethernets
37  *   contains meaning and must not be modified.
38  *
39  * - Having 2 virtual ethernets to the same remote partition DOES NOT
40  *   double the available bandwidth.  The 2 devices will share the
41  *   available hypervisor bandwidth.
42  *
43  * - If you send a packet to your own mac address, it will just be
44  *   dropped, you won't get it on the receive side.
45  *
46  * - Multicast is implemented by sending the frame frame to every
47  *   other partition.  It is the responsibility of the receiving
48  *   partition to filter the addresses desired.
49  *
50  * Tunable parameters:
51  *
52  * VETH_NUMBUFFERS: This compile time option defaults to 120.  It
53  * controls how much memory Linux will allocate per remote partition
54  * it is communicating with.  It can be thought of as the maximum
55  * number of packets outstanding to a remote partition at a time.
56  */
57
58 #include <linux/config.h>
59 #include <linux/module.h>
60 #include <linux/version.h>
61 #include <linux/types.h>
62 #include <linux/errno.h>
63 #include <linux/ioport.h>
64 #include <linux/kernel.h>
65 #include <linux/netdevice.h>
66 #include <linux/etherdevice.h>
67 #include <linux/skbuff.h>
68 #include <linux/init.h>
69 #include <linux/delay.h>
70 #include <linux/mm.h>
71 #include <linux/ethtool.h>
72 #include <asm/iSeries/mf.h>
73 #include <asm/iSeries/iSeries_pci.h>
74 #include <asm/uaccess.h>
75
76 #include <asm/iSeries/HvLpConfig.h>
77 #include <asm/iSeries/HvTypes.h>
78 #include <asm/iSeries/HvLpEvent.h>
79 #include <asm/iommu.h>
80 #include <asm/vio.h>
81
82 #undef DEBUG
83
84 #include "iseries_veth.h"
85
86 MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>");
87 MODULE_DESCRIPTION("iSeries Virtual ethernet driver");
88 MODULE_LICENSE("GPL");
89
90 #define VETH_NUMBUFFERS         (120)
91 #define VETH_ACKTIMEOUT         (1000000) /* microseconds */
92 #define VETH_MAX_MCAST          (12)
93
94 #define VETH_MAX_MTU            (9000)
95
96 #if VETH_NUMBUFFERS < 10
97 #define ACK_THRESHOLD           (1)
98 #elif VETH_NUMBUFFERS < 20
99 #define ACK_THRESHOLD           (4)
100 #elif VETH_NUMBUFFERS < 40
101 #define ACK_THRESHOLD           (10)
102 #else
103 #define ACK_THRESHOLD           (20)
104 #endif
105
106 #define VETH_STATE_SHUTDOWN     (0x0001)
107 #define VETH_STATE_OPEN         (0x0002)
108 #define VETH_STATE_RESET        (0x0004)
109 #define VETH_STATE_SENTMON      (0x0008)
110 #define VETH_STATE_SENTCAPS     (0x0010)
111 #define VETH_STATE_GOTCAPACK    (0x0020)
112 #define VETH_STATE_GOTCAPS      (0x0040)
113 #define VETH_STATE_SENTCAPACK   (0x0080)
114 #define VETH_STATE_READY        (0x0100)
115
116 struct veth_msg {
117         struct veth_msg *next;
118         struct VethFramesData data;
119         int token;
120         int in_use;
121         struct sk_buff *skb;
122         struct device *dev;
123 };
124
125 struct veth_lpar_connection {
126         HvLpIndex remote_lp;
127         struct work_struct statemachine_wq;
128         struct veth_msg *msgs;
129         int num_events;
130         struct VethCapData local_caps;
131
132         struct kobject kobject;
133         struct timer_list ack_timer;
134
135         struct timer_list reset_timer;
136         unsigned int reset_timeout;
137         unsigned long last_contact;
138         int outstanding_tx;
139
140         spinlock_t lock;
141         unsigned long state;
142         HvLpInstanceId src_inst;
143         HvLpInstanceId dst_inst;
144         struct VethLpEvent cap_event, cap_ack_event;
145         u16 pending_acks[VETH_MAX_ACKS_PER_MSG];
146         u32 num_pending_acks;
147
148         int num_ack_events;
149         struct VethCapData remote_caps;
150         u32 ack_timeout;
151
152         struct veth_msg *msg_stack_head;
153 };
154
155 struct veth_port {
156         struct device *dev;
157         struct net_device_stats stats;
158         u64 mac_addr;
159         HvLpIndexMap lpar_map;
160
161         /* queue_lock protects the stopped_map and dev's queue. */
162         spinlock_t queue_lock;
163         HvLpIndexMap stopped_map;
164
165         /* mcast_gate protects promiscuous, num_mcast & mcast_addr. */
166         rwlock_t mcast_gate;
167         int promiscuous;
168         int num_mcast;
169         u64 mcast_addr[VETH_MAX_MCAST];
170 };
171
172 static HvLpIndex this_lp;
173 static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */
174 static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */
175
176 static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev);
177 static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *);
178 static void veth_wake_queues(struct veth_lpar_connection *cnx);
179 static void veth_stop_queues(struct veth_lpar_connection *cnx);
180 static void veth_receive(struct veth_lpar_connection *, struct VethLpEvent *);
181 static void veth_release_connection(struct kobject *kobject);
182 static void veth_timed_ack(unsigned long ptr);
183 static void veth_timed_reset(unsigned long ptr);
184
185 /*
186  * Utility functions
187  */
188
189 #define veth_info(fmt, args...) \
190         printk(KERN_INFO "iseries_veth: " fmt, ## args)
191
192 #define veth_error(fmt, args...) \
193         printk(KERN_ERR "iseries_veth: Error: " fmt, ## args)
194
195 #ifdef DEBUG
196 #define veth_debug(fmt, args...) \
197         printk(KERN_DEBUG "iseries_veth: " fmt, ## args)
198 #else
199 #define veth_debug(fmt, args...) do {} while (0)
200 #endif
201
202 /* You must hold the connection's lock when you call this function. */
203 static inline void veth_stack_push(struct veth_lpar_connection *cnx,
204                                    struct veth_msg *msg)
205 {
206         msg->next = cnx->msg_stack_head;
207         cnx->msg_stack_head = msg;
208 }
209
210 /* You must hold the connection's lock when you call this function. */
211 static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx)
212 {
213         struct veth_msg *msg;
214
215         msg = cnx->msg_stack_head;
216         if (msg)
217                 cnx->msg_stack_head = cnx->msg_stack_head->next;
218
219         return msg;
220 }
221
222 /* You must hold the connection's lock when you call this function. */
223 static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx)
224 {
225         return cnx->msg_stack_head == NULL;
226 }
227
228 static inline HvLpEvent_Rc
229 veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype,
230                  HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype,
231                  u64 token,
232                  u64 data1, u64 data2, u64 data3, u64 data4, u64 data5)
233 {
234         return HvCallEvent_signalLpEventFast(cnx->remote_lp,
235                                              HvLpEvent_Type_VirtualLan,
236                                              subtype, ackind, acktype,
237                                              cnx->src_inst,
238                                              cnx->dst_inst,
239                                              token, data1, data2, data3,
240                                              data4, data5);
241 }
242
243 static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx,
244                                            u16 subtype, u64 token, void *data)
245 {
246         u64 *p = (u64 *) data;
247
248         return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck,
249                                 HvLpEvent_AckType_ImmediateAck,
250                                 token, p[0], p[1], p[2], p[3], p[4]);
251 }
252
253 struct veth_allocation {
254         struct completion c;
255         int num;
256 };
257
258 static void veth_complete_allocation(void *parm, int number)
259 {
260         struct veth_allocation *vc = (struct veth_allocation *)parm;
261
262         vc->num = number;
263         complete(&vc->c);
264 }
265
266 static int veth_allocate_events(HvLpIndex rlp, int number)
267 {
268         struct veth_allocation vc = { COMPLETION_INITIALIZER(vc.c), 0 };
269
270         mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan,
271                             sizeof(struct VethLpEvent), number,
272                             &veth_complete_allocation, &vc);
273         wait_for_completion(&vc.c);
274
275         return vc.num;
276 }
277
278 /*
279  * sysfs support
280  */
281
282 struct veth_cnx_attribute {
283         struct attribute attr;
284         ssize_t (*show)(struct veth_lpar_connection *, char *buf);
285         ssize_t (*store)(struct veth_lpar_connection *, const char *buf);
286 };
287
288 static ssize_t veth_cnx_attribute_show(struct kobject *kobj,
289                 struct attribute *attr, char *buf)
290 {
291         struct veth_cnx_attribute *cnx_attr;
292         struct veth_lpar_connection *cnx;
293
294         cnx_attr = container_of(attr, struct veth_cnx_attribute, attr);
295         cnx = container_of(kobj, struct veth_lpar_connection, kobject);
296
297         if (!cnx_attr->show)
298                 return -EIO;
299
300         return cnx_attr->show(cnx, buf);
301 }
302
303 #define CUSTOM_CNX_ATTR(_name, _format, _expression)                    \
304 static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\
305 {                                                                       \
306         return sprintf(buf, _format, _expression);                      \
307 }                                                                       \
308 struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name)
309
310 #define SIMPLE_CNX_ATTR(_name)  \
311         CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name)
312
313 SIMPLE_CNX_ATTR(outstanding_tx);
314 SIMPLE_CNX_ATTR(remote_lp);
315 SIMPLE_CNX_ATTR(num_events);
316 SIMPLE_CNX_ATTR(src_inst);
317 SIMPLE_CNX_ATTR(dst_inst);
318 SIMPLE_CNX_ATTR(num_pending_acks);
319 SIMPLE_CNX_ATTR(num_ack_events);
320 CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout));
321 CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout));
322 CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state);
323 CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ?
324                 jiffies_to_msecs(jiffies - cnx->last_contact) : 0);
325
326 #define GET_CNX_ATTR(_name)     (&veth_cnx_attr_##_name.attr)
327
328 static struct attribute *veth_cnx_default_attrs[] = {
329         GET_CNX_ATTR(outstanding_tx),
330         GET_CNX_ATTR(remote_lp),
331         GET_CNX_ATTR(num_events),
332         GET_CNX_ATTR(reset_timeout),
333         GET_CNX_ATTR(last_contact),
334         GET_CNX_ATTR(state),
335         GET_CNX_ATTR(src_inst),
336         GET_CNX_ATTR(dst_inst),
337         GET_CNX_ATTR(num_pending_acks),
338         GET_CNX_ATTR(num_ack_events),
339         GET_CNX_ATTR(ack_timeout),
340         NULL
341 };
342
343 static struct sysfs_ops veth_cnx_sysfs_ops = {
344                 .show = veth_cnx_attribute_show
345 };
346
347 static struct kobj_type veth_lpar_connection_ktype = {
348         .release        = veth_release_connection,
349         .sysfs_ops      = &veth_cnx_sysfs_ops,
350         .default_attrs  = veth_cnx_default_attrs
351 };
352
353 /*
354  * LPAR connection code
355  */
356
357 static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx)
358 {
359         schedule_work(&cnx->statemachine_wq);
360 }
361
362 static void veth_take_cap(struct veth_lpar_connection *cnx,
363                           struct VethLpEvent *event)
364 {
365         unsigned long flags;
366
367         spin_lock_irqsave(&cnx->lock, flags);
368         /* Receiving caps may mean the other end has just come up, so
369          * we need to reload the instance ID of the far end */
370         cnx->dst_inst =
371                 HvCallEvent_getTargetLpInstanceId(cnx->remote_lp,
372                                                   HvLpEvent_Type_VirtualLan);
373
374         if (cnx->state & VETH_STATE_GOTCAPS) {
375                 veth_error("Received a second capabilities from LPAR %d.\n",
376                            cnx->remote_lp);
377                 event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable;
378                 HvCallEvent_ackLpEvent((struct HvLpEvent *) event);
379         } else {
380                 memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event));
381                 cnx->state |= VETH_STATE_GOTCAPS;
382                 veth_kick_statemachine(cnx);
383         }
384         spin_unlock_irqrestore(&cnx->lock, flags);
385 }
386
387 static void veth_take_cap_ack(struct veth_lpar_connection *cnx,
388                               struct VethLpEvent *event)
389 {
390         unsigned long flags;
391
392         spin_lock_irqsave(&cnx->lock, flags);
393         if (cnx->state & VETH_STATE_GOTCAPACK) {
394                 veth_error("Received a second capabilities ack from LPAR %d.\n",
395                            cnx->remote_lp);
396         } else {
397                 memcpy(&cnx->cap_ack_event, event,
398                        sizeof(&cnx->cap_ack_event));
399                 cnx->state |= VETH_STATE_GOTCAPACK;
400                 veth_kick_statemachine(cnx);
401         }
402         spin_unlock_irqrestore(&cnx->lock, flags);
403 }
404
405 static void veth_take_monitor_ack(struct veth_lpar_connection *cnx,
406                                   struct VethLpEvent *event)
407 {
408         unsigned long flags;
409
410         spin_lock_irqsave(&cnx->lock, flags);
411         veth_debug("cnx %d: lost connection.\n", cnx->remote_lp);
412
413         /* Avoid kicking the statemachine once we're shutdown.
414          * It's unnecessary and it could break veth_stop_connection(). */
415
416         if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
417                 cnx->state |= VETH_STATE_RESET;
418                 veth_kick_statemachine(cnx);
419         }
420         spin_unlock_irqrestore(&cnx->lock, flags);
421 }
422
423 static void veth_handle_ack(struct VethLpEvent *event)
424 {
425         HvLpIndex rlp = event->base_event.xTargetLp;
426         struct veth_lpar_connection *cnx = veth_cnx[rlp];
427
428         BUG_ON(! cnx);
429
430         switch (event->base_event.xSubtype) {
431         case VethEventTypeCap:
432                 veth_take_cap_ack(cnx, event);
433                 break;
434         case VethEventTypeMonitor:
435                 veth_take_monitor_ack(cnx, event);
436                 break;
437         default:
438                 veth_error("Unknown ack type %d from LPAR %d.\n",
439                                 event->base_event.xSubtype, rlp);
440         };
441 }
442
443 static void veth_handle_int(struct VethLpEvent *event)
444 {
445         HvLpIndex rlp = event->base_event.xSourceLp;
446         struct veth_lpar_connection *cnx = veth_cnx[rlp];
447         unsigned long flags;
448         int i, acked = 0;
449
450         BUG_ON(! cnx);
451
452         switch (event->base_event.xSubtype) {
453         case VethEventTypeCap:
454                 veth_take_cap(cnx, event);
455                 break;
456         case VethEventTypeMonitor:
457                 /* do nothing... this'll hang out here til we're dead,
458                  * and the hypervisor will return it for us. */
459                 break;
460         case VethEventTypeFramesAck:
461                 spin_lock_irqsave(&cnx->lock, flags);
462
463                 for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) {
464                         u16 msgnum = event->u.frames_ack_data.token[i];
465
466                         if (msgnum < VETH_NUMBUFFERS) {
467                                 veth_recycle_msg(cnx, cnx->msgs + msgnum);
468                                 cnx->outstanding_tx--;
469                                 acked++;
470                         }
471                 }
472
473                 if (acked > 0) {
474                         cnx->last_contact = jiffies;
475                         veth_wake_queues(cnx);
476                 }
477
478                 spin_unlock_irqrestore(&cnx->lock, flags);
479                 break;
480         case VethEventTypeFrames:
481                 veth_receive(cnx, event);
482                 break;
483         default:
484                 veth_error("Unknown interrupt type %d from LPAR %d.\n",
485                                 event->base_event.xSubtype, rlp);
486         };
487 }
488
489 static void veth_handle_event(struct HvLpEvent *event, struct pt_regs *regs)
490 {
491         struct VethLpEvent *veth_event = (struct VethLpEvent *)event;
492
493         if (event->xFlags.xFunction == HvLpEvent_Function_Ack)
494                 veth_handle_ack(veth_event);
495         else if (event->xFlags.xFunction == HvLpEvent_Function_Int)
496                 veth_handle_int(veth_event);
497 }
498
499 static int veth_process_caps(struct veth_lpar_connection *cnx)
500 {
501         struct VethCapData *remote_caps = &cnx->remote_caps;
502         int num_acks_needed;
503
504         /* Convert timer to jiffies */
505         cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000;
506
507         if ( (remote_caps->num_buffers == 0)
508              || (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG)
509              || (remote_caps->ack_threshold == 0)
510              || (cnx->ack_timeout == 0) ) {
511                 veth_error("Received incompatible capabilities from LPAR %d.\n",
512                                 cnx->remote_lp);
513                 return HvLpEvent_Rc_InvalidSubtypeData;
514         }
515
516         num_acks_needed = (remote_caps->num_buffers
517                            / remote_caps->ack_threshold) + 1;
518
519         /* FIXME: locking on num_ack_events? */
520         if (cnx->num_ack_events < num_acks_needed) {
521                 int num;
522
523                 num = veth_allocate_events(cnx->remote_lp,
524                                            num_acks_needed-cnx->num_ack_events);
525                 if (num > 0)
526                         cnx->num_ack_events += num;
527
528                 if (cnx->num_ack_events < num_acks_needed) {
529                         veth_error("Couldn't allocate enough ack events "
530                                         "for LPAR %d.\n", cnx->remote_lp);
531
532                         return HvLpEvent_Rc_BufferNotAvailable;
533                 }
534         }
535
536
537         return HvLpEvent_Rc_Good;
538 }
539
540 /* FIXME: The gotos here are a bit dubious */
541 static void veth_statemachine(void *p)
542 {
543         struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)p;
544         int rlp = cnx->remote_lp;
545         int rc;
546
547         spin_lock_irq(&cnx->lock);
548
549  restart:
550         if (cnx->state & VETH_STATE_RESET) {
551                 if (cnx->state & VETH_STATE_OPEN)
552                         HvCallEvent_closeLpEventPath(cnx->remote_lp,
553                                                      HvLpEvent_Type_VirtualLan);
554
555                 /*
556                  * Reset ack data. This prevents the ack_timer actually
557                  * doing anything, even if it runs one more time when
558                  * we drop the lock below.
559                  */
560                 memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
561                 cnx->num_pending_acks = 0;
562
563                 cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON
564                                 | VETH_STATE_OPEN | VETH_STATE_SENTCAPS
565                                 | VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS
566                                 | VETH_STATE_SENTCAPACK | VETH_STATE_READY);
567
568                 /* Clean up any leftover messages */
569                 if (cnx->msgs) {
570                         int i;
571                         for (i = 0; i < VETH_NUMBUFFERS; ++i)
572                                 veth_recycle_msg(cnx, cnx->msgs + i);
573                 }
574
575                 cnx->outstanding_tx = 0;
576                 veth_wake_queues(cnx);
577
578                 /* Drop the lock so we can do stuff that might sleep or
579                  * take other locks. */
580                 spin_unlock_irq(&cnx->lock);
581
582                 del_timer_sync(&cnx->ack_timer);
583                 del_timer_sync(&cnx->reset_timer);
584
585                 spin_lock_irq(&cnx->lock);
586
587                 if (cnx->state & VETH_STATE_RESET)
588                         goto restart;
589
590                 /* Hack, wait for the other end to reset itself. */
591                 if (! (cnx->state & VETH_STATE_SHUTDOWN)) {
592                         schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ);
593                         goto out;
594                 }
595         }
596
597         if (cnx->state & VETH_STATE_SHUTDOWN)
598                 /* It's all over, do nothing */
599                 goto out;
600
601         if ( !(cnx->state & VETH_STATE_OPEN) ) {
602                 if (! cnx->msgs || (cnx->num_events < (2 + VETH_NUMBUFFERS)) )
603                         goto cant_cope;
604
605                 HvCallEvent_openLpEventPath(rlp, HvLpEvent_Type_VirtualLan);
606                 cnx->src_inst =
607                         HvCallEvent_getSourceLpInstanceId(rlp,
608                                                           HvLpEvent_Type_VirtualLan);
609                 cnx->dst_inst =
610                         HvCallEvent_getTargetLpInstanceId(rlp,
611                                                           HvLpEvent_Type_VirtualLan);
612                 cnx->state |= VETH_STATE_OPEN;
613         }
614
615         if ( (cnx->state & VETH_STATE_OPEN)
616              && !(cnx->state & VETH_STATE_SENTMON) ) {
617                 rc = veth_signalevent(cnx, VethEventTypeMonitor,
618                                       HvLpEvent_AckInd_DoAck,
619                                       HvLpEvent_AckType_DeferredAck,
620                                       0, 0, 0, 0, 0, 0);
621
622                 if (rc == HvLpEvent_Rc_Good) {
623                         cnx->state |= VETH_STATE_SENTMON;
624                 } else {
625                         if ( (rc != HvLpEvent_Rc_PartitionDead)
626                              && (rc != HvLpEvent_Rc_PathClosed) )
627                                 veth_error("Error sending monitor to LPAR %d, "
628                                                 "rc = %d\n", rlp, rc);
629
630                         /* Oh well, hope we get a cap from the other
631                          * end and do better when that kicks us */
632                         goto out;
633                 }
634         }
635
636         if ( (cnx->state & VETH_STATE_OPEN)
637              && !(cnx->state & VETH_STATE_SENTCAPS)) {
638                 u64 *rawcap = (u64 *)&cnx->local_caps;
639
640                 rc = veth_signalevent(cnx, VethEventTypeCap,
641                                       HvLpEvent_AckInd_DoAck,
642                                       HvLpEvent_AckType_ImmediateAck,
643                                       0, rawcap[0], rawcap[1], rawcap[2],
644                                       rawcap[3], rawcap[4]);
645
646                 if (rc == HvLpEvent_Rc_Good) {
647                         cnx->state |= VETH_STATE_SENTCAPS;
648                 } else {
649                         if ( (rc != HvLpEvent_Rc_PartitionDead)
650                              && (rc != HvLpEvent_Rc_PathClosed) )
651                                 veth_error("Error sending caps to LPAR %d, "
652                                                 "rc = %d\n", rlp, rc);
653
654                         /* Oh well, hope we get a cap from the other
655                          * end and do better when that kicks us */
656                         goto out;
657                 }
658         }
659
660         if ((cnx->state & VETH_STATE_GOTCAPS)
661             && !(cnx->state & VETH_STATE_SENTCAPACK)) {
662                 struct VethCapData *remote_caps = &cnx->remote_caps;
663
664                 memcpy(remote_caps, &cnx->cap_event.u.caps_data,
665                        sizeof(*remote_caps));
666
667                 spin_unlock_irq(&cnx->lock);
668                 rc = veth_process_caps(cnx);
669                 spin_lock_irq(&cnx->lock);
670
671                 /* We dropped the lock, so recheck for anything which
672                  * might mess us up */
673                 if (cnx->state & (VETH_STATE_RESET|VETH_STATE_SHUTDOWN))
674                         goto restart;
675
676                 cnx->cap_event.base_event.xRc = rc;
677                 HvCallEvent_ackLpEvent((struct HvLpEvent *)&cnx->cap_event);
678                 if (rc == HvLpEvent_Rc_Good)
679                         cnx->state |= VETH_STATE_SENTCAPACK;
680                 else
681                         goto cant_cope;
682         }
683
684         if ((cnx->state & VETH_STATE_GOTCAPACK)
685             && (cnx->state & VETH_STATE_GOTCAPS)
686             && !(cnx->state & VETH_STATE_READY)) {
687                 if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) {
688                         /* Start the ACK timer */
689                         cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
690                         add_timer(&cnx->ack_timer);
691                         cnx->state |= VETH_STATE_READY;
692                 } else {
693                         veth_error("Caps rejected by LPAR %d, rc = %d\n",
694                                         rlp, cnx->cap_ack_event.base_event.xRc);
695                         goto cant_cope;
696                 }
697         }
698
699  out:
700         spin_unlock_irq(&cnx->lock);
701         return;
702
703  cant_cope:
704         /* FIXME: we get here if something happens we really can't
705          * cope with.  The link will never work once we get here, and
706          * all we can do is not lock the rest of the system up */
707         veth_error("Unrecoverable error on connection to LPAR %d, shutting down"
708                         " (state = 0x%04lx)\n", rlp, cnx->state);
709         cnx->state |= VETH_STATE_SHUTDOWN;
710         spin_unlock_irq(&cnx->lock);
711 }
712
713 static int veth_init_connection(u8 rlp)
714 {
715         struct veth_lpar_connection *cnx;
716         struct veth_msg *msgs;
717         int i, rc;
718
719         if ( (rlp == this_lp)
720              || ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) )
721                 return 0;
722
723         cnx = kmalloc(sizeof(*cnx), GFP_KERNEL);
724         if (! cnx)
725                 return -ENOMEM;
726         memset(cnx, 0, sizeof(*cnx));
727
728         cnx->remote_lp = rlp;
729         spin_lock_init(&cnx->lock);
730         INIT_WORK(&cnx->statemachine_wq, veth_statemachine, cnx);
731
732         init_timer(&cnx->ack_timer);
733         cnx->ack_timer.function = veth_timed_ack;
734         cnx->ack_timer.data = (unsigned long) cnx;
735
736         init_timer(&cnx->reset_timer);
737         cnx->reset_timer.function = veth_timed_reset;
738         cnx->reset_timer.data = (unsigned long) cnx;
739         cnx->reset_timeout = 5 * HZ * (VETH_ACKTIMEOUT / 1000000);
740
741         memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks));
742
743         veth_cnx[rlp] = cnx;
744
745         /* This gets us 1 reference, which is held on behalf of the driver
746          * infrastructure. It's released at module unload. */
747         kobject_init(&cnx->kobject);
748         cnx->kobject.ktype = &veth_lpar_connection_ktype;
749         rc = kobject_set_name(&cnx->kobject, "cnx%.2d", rlp);
750         if (rc != 0)
751                 return rc;
752
753         msgs = kmalloc(VETH_NUMBUFFERS * sizeof(struct veth_msg), GFP_KERNEL);
754         if (! msgs) {
755                 veth_error("Can't allocate buffers for LPAR %d.\n", rlp);
756                 return -ENOMEM;
757         }
758
759         cnx->msgs = msgs;
760         memset(msgs, 0, VETH_NUMBUFFERS * sizeof(struct veth_msg));
761
762         for (i = 0; i < VETH_NUMBUFFERS; i++) {
763                 msgs[i].token = i;
764                 veth_stack_push(cnx, msgs + i);
765         }
766
767         cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS);
768
769         if (cnx->num_events < (2 + VETH_NUMBUFFERS)) {
770                 veth_error("Can't allocate enough events for LPAR %d.\n", rlp);
771                 return -ENOMEM;
772         }
773
774         cnx->local_caps.num_buffers = VETH_NUMBUFFERS;
775         cnx->local_caps.ack_threshold = ACK_THRESHOLD;
776         cnx->local_caps.ack_timeout = VETH_ACKTIMEOUT;
777
778         return 0;
779 }
780
781 static void veth_stop_connection(struct veth_lpar_connection *cnx)
782 {
783         if (!cnx)
784                 return;
785
786         spin_lock_irq(&cnx->lock);
787         cnx->state |= VETH_STATE_RESET | VETH_STATE_SHUTDOWN;
788         veth_kick_statemachine(cnx);
789         spin_unlock_irq(&cnx->lock);
790
791         /* There's a slim chance the reset code has just queued the
792          * statemachine to run in five seconds. If so we need to cancel
793          * that and requeue the work to run now. */
794         if (cancel_delayed_work(&cnx->statemachine_wq)) {
795                 spin_lock_irq(&cnx->lock);
796                 veth_kick_statemachine(cnx);
797                 spin_unlock_irq(&cnx->lock);
798         }
799
800         /* Wait for the state machine to run. */
801         flush_scheduled_work();
802 }
803
804 static void veth_destroy_connection(struct veth_lpar_connection *cnx)
805 {
806         if (!cnx)
807                 return;
808
809         if (cnx->num_events > 0)
810                 mf_deallocate_lp_events(cnx->remote_lp,
811                                       HvLpEvent_Type_VirtualLan,
812                                       cnx->num_events,
813                                       NULL, NULL);
814         if (cnx->num_ack_events > 0)
815                 mf_deallocate_lp_events(cnx->remote_lp,
816                                       HvLpEvent_Type_VirtualLan,
817                                       cnx->num_ack_events,
818                                       NULL, NULL);
819
820         kfree(cnx->msgs);
821         veth_cnx[cnx->remote_lp] = NULL;
822         kfree(cnx);
823 }
824
825 static void veth_release_connection(struct kobject *kobj)
826 {
827         struct veth_lpar_connection *cnx;
828         cnx = container_of(kobj, struct veth_lpar_connection, kobject);
829         veth_stop_connection(cnx);
830         veth_destroy_connection(cnx);
831 }
832
833 /*
834  * net_device code
835  */
836
837 static int veth_open(struct net_device *dev)
838 {
839         struct veth_port *port = (struct veth_port *) dev->priv;
840
841         memset(&port->stats, 0, sizeof (port->stats));
842         netif_start_queue(dev);
843         return 0;
844 }
845
846 static int veth_close(struct net_device *dev)
847 {
848         netif_stop_queue(dev);
849         return 0;
850 }
851
852 static struct net_device_stats *veth_get_stats(struct net_device *dev)
853 {
854         struct veth_port *port = (struct veth_port *) dev->priv;
855
856         return &port->stats;
857 }
858
859 static int veth_change_mtu(struct net_device *dev, int new_mtu)
860 {
861         if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU))
862                 return -EINVAL;
863         dev->mtu = new_mtu;
864         return 0;
865 }
866
867 static void veth_set_multicast_list(struct net_device *dev)
868 {
869         struct veth_port *port = (struct veth_port *) dev->priv;
870         unsigned long flags;
871
872         write_lock_irqsave(&port->mcast_gate, flags);
873
874         if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) ||
875                         (dev->mc_count > VETH_MAX_MCAST)) {
876                 port->promiscuous = 1;
877         } else {
878                 struct dev_mc_list *dmi = dev->mc_list;
879                 int i;
880
881                 port->promiscuous = 0;
882
883                 /* Update table */
884                 port->num_mcast = 0;
885
886                 for (i = 0; i < dev->mc_count; i++) {
887                         u8 *addr = dmi->dmi_addr;
888                         u64 xaddr = 0;
889
890                         if (addr[0] & 0x01) {/* multicast address? */
891                                 memcpy(&xaddr, addr, ETH_ALEN);
892                                 port->mcast_addr[port->num_mcast] = xaddr;
893                                 port->num_mcast++;
894                         }
895                         dmi = dmi->next;
896                 }
897         }
898
899         write_unlock_irqrestore(&port->mcast_gate, flags);
900 }
901
902 static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
903 {
904         strncpy(info->driver, "veth", sizeof(info->driver) - 1);
905         info->driver[sizeof(info->driver) - 1] = '\0';
906         strncpy(info->version, "1.0", sizeof(info->version) - 1);
907 }
908
909 static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
910 {
911         ecmd->supported = (SUPPORTED_1000baseT_Full
912                           | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
913         ecmd->advertising = (SUPPORTED_1000baseT_Full
914                             | SUPPORTED_Autoneg | SUPPORTED_FIBRE);
915         ecmd->port = PORT_FIBRE;
916         ecmd->transceiver = XCVR_INTERNAL;
917         ecmd->phy_address = 0;
918         ecmd->speed = SPEED_1000;
919         ecmd->duplex = DUPLEX_FULL;
920         ecmd->autoneg = AUTONEG_ENABLE;
921         ecmd->maxtxpkt = 120;
922         ecmd->maxrxpkt = 120;
923         return 0;
924 }
925
926 static u32 veth_get_link(struct net_device *dev)
927 {
928         return 1;
929 }
930
931 static struct ethtool_ops ops = {
932         .get_drvinfo = veth_get_drvinfo,
933         .get_settings = veth_get_settings,
934         .get_link = veth_get_link,
935 };
936
937 static struct net_device * __init veth_probe_one(int vlan, struct device *vdev)
938 {
939         struct net_device *dev;
940         struct veth_port *port;
941         int i, rc;
942
943         dev = alloc_etherdev(sizeof (struct veth_port));
944         if (! dev) {
945                 veth_error("Unable to allocate net_device structure!\n");
946                 return NULL;
947         }
948
949         port = (struct veth_port *) dev->priv;
950
951         spin_lock_init(&port->queue_lock);
952         rwlock_init(&port->mcast_gate);
953         port->stopped_map = 0;
954
955         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
956                 HvLpVirtualLanIndexMap map;
957
958                 if (i == this_lp)
959                         continue;
960                 map = HvLpConfig_getVirtualLanIndexMapForLp(i);
961                 if (map & (0x8000 >> vlan))
962                         port->lpar_map |= (1 << i);
963         }
964         port->dev = vdev;
965
966         dev->dev_addr[0] = 0x02;
967         dev->dev_addr[1] = 0x01;
968         dev->dev_addr[2] = 0xff;
969         dev->dev_addr[3] = vlan;
970         dev->dev_addr[4] = 0xff;
971         dev->dev_addr[5] = this_lp;
972
973         dev->mtu = VETH_MAX_MTU;
974
975         memcpy(&port->mac_addr, dev->dev_addr, 6);
976
977         dev->open = veth_open;
978         dev->hard_start_xmit = veth_start_xmit;
979         dev->stop = veth_close;
980         dev->get_stats = veth_get_stats;
981         dev->change_mtu = veth_change_mtu;
982         dev->set_mac_address = NULL;
983         dev->set_multicast_list = veth_set_multicast_list;
984         SET_ETHTOOL_OPS(dev, &ops);
985
986         SET_NETDEV_DEV(dev, vdev);
987
988         rc = register_netdev(dev);
989         if (rc != 0) {
990                 veth_error("Failed registering net device for vlan%d.\n", vlan);
991                 free_netdev(dev);
992                 return NULL;
993         }
994
995         veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n",
996                         dev->name, vlan, port->lpar_map);
997
998         return dev;
999 }
1000
1001 /*
1002  * Tx path
1003  */
1004
1005 static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp,
1006                                 struct net_device *dev)
1007 {
1008         struct veth_lpar_connection *cnx = veth_cnx[rlp];
1009         struct veth_port *port = (struct veth_port *) dev->priv;
1010         HvLpEvent_Rc rc;
1011         struct veth_msg *msg = NULL;
1012         unsigned long flags;
1013
1014         if (! cnx)
1015                 return 0;
1016
1017         spin_lock_irqsave(&cnx->lock, flags);
1018
1019         if (! (cnx->state & VETH_STATE_READY))
1020                 goto no_error;
1021
1022         if ((skb->len - ETH_HLEN) > VETH_MAX_MTU)
1023                 goto drop;
1024
1025         msg = veth_stack_pop(cnx);
1026         if (! msg)
1027                 goto drop;
1028
1029         msg->in_use = 1;
1030         msg->skb = skb_get(skb);
1031
1032         msg->data.addr[0] = dma_map_single(port->dev, skb->data,
1033                                 skb->len, DMA_TO_DEVICE);
1034
1035         if (dma_mapping_error(msg->data.addr[0]))
1036                 goto recycle_and_drop;
1037
1038         msg->dev = port->dev;
1039         msg->data.len[0] = skb->len;
1040         msg->data.eofmask = 1 << VETH_EOF_SHIFT;
1041
1042         rc = veth_signaldata(cnx, VethEventTypeFrames, msg->token, &msg->data);
1043
1044         if (rc != HvLpEvent_Rc_Good)
1045                 goto recycle_and_drop;
1046
1047         /* If the timer's not already running, start it now. */
1048         if (0 == cnx->outstanding_tx)
1049                 mod_timer(&cnx->reset_timer, jiffies + cnx->reset_timeout);
1050
1051         cnx->last_contact = jiffies;
1052         cnx->outstanding_tx++;
1053
1054         if (veth_stack_is_empty(cnx))
1055                 veth_stop_queues(cnx);
1056
1057  no_error:
1058         spin_unlock_irqrestore(&cnx->lock, flags);
1059         return 0;
1060
1061  recycle_and_drop:
1062         veth_recycle_msg(cnx, msg);
1063  drop:
1064         spin_unlock_irqrestore(&cnx->lock, flags);
1065         return 1;
1066 }
1067
1068 static void veth_transmit_to_many(struct sk_buff *skb,
1069                                           HvLpIndexMap lpmask,
1070                                           struct net_device *dev)
1071 {
1072         struct veth_port *port = (struct veth_port *) dev->priv;
1073         int i, success, error;
1074
1075         success = error = 0;
1076
1077         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1078                 if ((lpmask & (1 << i)) == 0)
1079                         continue;
1080
1081                 if (veth_transmit_to_one(skb, i, dev))
1082                         error = 1;
1083                 else
1084                         success = 1;
1085         }
1086
1087         if (error)
1088                 port->stats.tx_errors++;
1089
1090         if (success) {
1091                 port->stats.tx_packets++;
1092                 port->stats.tx_bytes += skb->len;
1093         }
1094 }
1095
1096 static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev)
1097 {
1098         unsigned char *frame = skb->data;
1099         struct veth_port *port = (struct veth_port *) dev->priv;
1100         HvLpIndexMap lpmask;
1101
1102         if (! (frame[0] & 0x01)) {
1103                 /* unicast packet */
1104                 HvLpIndex rlp = frame[5];
1105
1106                 if ( ! ((1 << rlp) & port->lpar_map) ) {
1107                         dev_kfree_skb(skb);
1108                         return 0;
1109                 }
1110
1111                 lpmask = 1 << rlp;
1112         } else {
1113                 lpmask = port->lpar_map;
1114         }
1115
1116         veth_transmit_to_many(skb, lpmask, dev);
1117
1118         dev_kfree_skb(skb);
1119
1120         return 0;
1121 }
1122
1123 /* You must hold the connection's lock when you call this function. */
1124 static void veth_recycle_msg(struct veth_lpar_connection *cnx,
1125                              struct veth_msg *msg)
1126 {
1127         u32 dma_address, dma_length;
1128
1129         if (msg->in_use) {
1130                 msg->in_use = 0;
1131                 dma_address = msg->data.addr[0];
1132                 dma_length = msg->data.len[0];
1133
1134                 if (!dma_mapping_error(dma_address))
1135                         dma_unmap_single(msg->dev, dma_address, dma_length,
1136                                         DMA_TO_DEVICE);
1137
1138                 if (msg->skb) {
1139                         dev_kfree_skb_any(msg->skb);
1140                         msg->skb = NULL;
1141                 }
1142
1143                 memset(&msg->data, 0, sizeof(msg->data));
1144                 veth_stack_push(cnx, msg);
1145         } else if (cnx->state & VETH_STATE_OPEN) {
1146                 veth_error("Non-pending frame (# %d) acked by LPAR %d.\n",
1147                                 cnx->remote_lp, msg->token);
1148         }
1149 }
1150
1151 static void veth_wake_queues(struct veth_lpar_connection *cnx)
1152 {
1153         int i;
1154
1155         for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1156                 struct net_device *dev = veth_dev[i];
1157                 struct veth_port *port;
1158                 unsigned long flags;
1159
1160                 if (! dev)
1161                         continue;
1162
1163                 port = (struct veth_port *)dev->priv;
1164
1165                 if (! (port->lpar_map & (1<<cnx->remote_lp)))
1166                         continue;
1167
1168                 spin_lock_irqsave(&port->queue_lock, flags);
1169
1170                 port->stopped_map &= ~(1 << cnx->remote_lp);
1171
1172                 if (0 == port->stopped_map && netif_queue_stopped(dev)) {
1173                         veth_debug("cnx %d: woke queue for %s.\n",
1174                                         cnx->remote_lp, dev->name);
1175                         netif_wake_queue(dev);
1176                 }
1177                 spin_unlock_irqrestore(&port->queue_lock, flags);
1178         }
1179 }
1180
1181 static void veth_stop_queues(struct veth_lpar_connection *cnx)
1182 {
1183         int i;
1184
1185         for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
1186                 struct net_device *dev = veth_dev[i];
1187                 struct veth_port *port;
1188
1189                 if (! dev)
1190                         continue;
1191
1192                 port = (struct veth_port *)dev->priv;
1193
1194                 /* If this cnx is not on the vlan for this port, continue */
1195                 if (! (port->lpar_map & (1 << cnx->remote_lp)))
1196                         continue;
1197
1198                 spin_lock(&port->queue_lock);
1199
1200                 netif_stop_queue(dev);
1201                 port->stopped_map |= (1 << cnx->remote_lp);
1202
1203                 veth_debug("cnx %d: stopped queue for %s, map = 0x%x.\n",
1204                                 cnx->remote_lp, dev->name, port->stopped_map);
1205
1206                 spin_unlock(&port->queue_lock);
1207         }
1208 }
1209
1210 static void veth_timed_reset(unsigned long ptr)
1211 {
1212         struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)ptr;
1213         unsigned long trigger_time, flags;
1214
1215         /* FIXME is it possible this fires after veth_stop_connection()?
1216          * That would reschedule the statemachine for 5 seconds and probably
1217          * execute it after the module's been unloaded. Hmm. */
1218
1219         spin_lock_irqsave(&cnx->lock, flags);
1220
1221         if (cnx->outstanding_tx > 0) {
1222                 trigger_time = cnx->last_contact + cnx->reset_timeout;
1223
1224                 if (trigger_time < jiffies) {
1225                         cnx->state |= VETH_STATE_RESET;
1226                         veth_kick_statemachine(cnx);
1227                         veth_error("%d packets not acked by LPAR %d within %d "
1228                                         "seconds, resetting.\n",
1229                                         cnx->outstanding_tx, cnx->remote_lp,
1230                                         cnx->reset_timeout / HZ);
1231                 } else {
1232                         /* Reschedule the timer */
1233                         trigger_time = jiffies + cnx->reset_timeout;
1234                         mod_timer(&cnx->reset_timer, trigger_time);
1235                 }
1236         }
1237
1238         spin_unlock_irqrestore(&cnx->lock, flags);
1239 }
1240
1241 /*
1242  * Rx path
1243  */
1244
1245 static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr)
1246 {
1247         int wanted = 0;
1248         int i;
1249         unsigned long flags;
1250
1251         if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) )
1252                 return 1;
1253
1254         read_lock_irqsave(&port->mcast_gate, flags);
1255
1256         if (port->promiscuous) {
1257                 wanted = 1;
1258                 goto out;
1259         }
1260
1261         for (i = 0; i < port->num_mcast; ++i) {
1262                 if (port->mcast_addr[i] == mac_addr) {
1263                         wanted = 1;
1264                         break;
1265                 }
1266         }
1267
1268  out:
1269         read_unlock_irqrestore(&port->mcast_gate, flags);
1270
1271         return wanted;
1272 }
1273
1274 struct dma_chunk {
1275         u64 addr;
1276         u64 size;
1277 };
1278
1279 #define VETH_MAX_PAGES_PER_FRAME ( (VETH_MAX_MTU+PAGE_SIZE-2)/PAGE_SIZE + 1 )
1280
1281 static inline void veth_build_dma_list(struct dma_chunk *list,
1282                                        unsigned char *p, unsigned long length)
1283 {
1284         unsigned long done;
1285         int i = 1;
1286
1287         /* FIXME: skbs are continguous in real addresses.  Do we
1288          * really need to break it into PAGE_SIZE chunks, or can we do
1289          * it just at the granularity of iSeries real->absolute
1290          * mapping?  Indeed, given the way the allocator works, can we
1291          * count on them being absolutely contiguous? */
1292         list[0].addr = ISERIES_HV_ADDR(p);
1293         list[0].size = min(length,
1294                            PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK));
1295
1296         done = list[0].size;
1297         while (done < length) {
1298                 list[i].addr = ISERIES_HV_ADDR(p + done);
1299                 list[i].size = min(length-done, PAGE_SIZE);
1300                 done += list[i].size;
1301                 i++;
1302         }
1303 }
1304
1305 static void veth_flush_acks(struct veth_lpar_connection *cnx)
1306 {
1307         HvLpEvent_Rc rc;
1308
1309         rc = veth_signaldata(cnx, VethEventTypeFramesAck,
1310                              0, &cnx->pending_acks);
1311
1312         if (rc != HvLpEvent_Rc_Good)
1313                 veth_error("Failed acking frames from LPAR %d, rc = %d\n",
1314                                 cnx->remote_lp, (int)rc);
1315
1316         cnx->num_pending_acks = 0;
1317         memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks));
1318 }
1319
1320 static void veth_receive(struct veth_lpar_connection *cnx,
1321                          struct VethLpEvent *event)
1322 {
1323         struct VethFramesData *senddata = &event->u.frames_data;
1324         int startchunk = 0;
1325         int nchunks;
1326         unsigned long flags;
1327         HvLpDma_Rc rc;
1328
1329         do {
1330                 u16 length = 0;
1331                 struct sk_buff *skb;
1332                 struct dma_chunk local_list[VETH_MAX_PAGES_PER_FRAME];
1333                 struct dma_chunk remote_list[VETH_MAX_FRAMES_PER_MSG];
1334                 u64 dest;
1335                 HvLpVirtualLanIndex vlan;
1336                 struct net_device *dev;
1337                 struct veth_port *port;
1338
1339                 /* FIXME: do we need this? */
1340                 memset(local_list, 0, sizeof(local_list));
1341                 memset(remote_list, 0, sizeof(VETH_MAX_FRAMES_PER_MSG));
1342
1343                 /* a 0 address marks the end of the valid entries */
1344                 if (senddata->addr[startchunk] == 0)
1345                         break;
1346
1347                 /* make sure that we have at least 1 EOF entry in the
1348                  * remaining entries */
1349                 if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) {
1350                         veth_error("Missing EOF fragment in event "
1351                                         "eofmask = 0x%x startchunk = %d\n",
1352                                         (unsigned)senddata->eofmask,
1353                                         startchunk);
1354                         break;
1355                 }
1356
1357                 /* build list of chunks in this frame */
1358                 nchunks = 0;
1359                 do {
1360                         remote_list[nchunks].addr =
1361                                 (u64) senddata->addr[startchunk+nchunks] << 32;
1362                         remote_list[nchunks].size =
1363                                 senddata->len[startchunk+nchunks];
1364                         length += remote_list[nchunks].size;
1365                 } while (! (senddata->eofmask &
1366                             (1 << (VETH_EOF_SHIFT + startchunk + nchunks++))));
1367
1368                 /* length == total length of all chunks */
1369                 /* nchunks == # of chunks in this frame */
1370
1371                 if ((length - ETH_HLEN) > VETH_MAX_MTU) {
1372                         veth_error("Received oversize frame from LPAR %d "
1373                                         "(length = %d)\n",
1374                                         cnx->remote_lp, length);
1375                         continue;
1376                 }
1377
1378                 skb = alloc_skb(length, GFP_ATOMIC);
1379                 if (!skb)
1380                         continue;
1381
1382                 veth_build_dma_list(local_list, skb->data, length);
1383
1384                 rc = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan,
1385                                             event->base_event.xSourceLp,
1386                                             HvLpDma_Direction_RemoteToLocal,
1387                                             cnx->src_inst,
1388                                             cnx->dst_inst,
1389                                             HvLpDma_AddressType_RealAddress,
1390                                             HvLpDma_AddressType_TceIndex,
1391                                             ISERIES_HV_ADDR(&local_list),
1392                                             ISERIES_HV_ADDR(&remote_list),
1393                                             length);
1394                 if (rc != HvLpDma_Rc_Good) {
1395                         dev_kfree_skb_irq(skb);
1396                         continue;
1397                 }
1398
1399                 vlan = skb->data[9];
1400                 dev = veth_dev[vlan];
1401                 if (! dev) {
1402                         /*
1403                          * Some earlier versions of the driver sent
1404                          * broadcasts down all connections, even to lpars
1405                          * that weren't on the relevant vlan. So ignore
1406                          * packets belonging to a vlan we're not on.
1407                          * We can also be here if we receive packets while
1408                          * the driver is going down, because then dev is NULL.
1409                          */
1410                         dev_kfree_skb_irq(skb);
1411                         continue;
1412                 }
1413
1414                 port = (struct veth_port *)dev->priv;
1415                 dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000;
1416
1417                 if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) {
1418                         dev_kfree_skb_irq(skb);
1419                         continue;
1420                 }
1421                 if (! veth_frame_wanted(port, dest)) {
1422                         dev_kfree_skb_irq(skb);
1423                         continue;
1424                 }
1425
1426                 skb_put(skb, length);
1427                 skb->dev = dev;
1428                 skb->protocol = eth_type_trans(skb, dev);
1429                 skb->ip_summed = CHECKSUM_NONE;
1430                 netif_rx(skb);  /* send it up */
1431                 port->stats.rx_packets++;
1432                 port->stats.rx_bytes += length;
1433         } while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG);
1434
1435         /* Ack it */
1436         spin_lock_irqsave(&cnx->lock, flags);
1437         BUG_ON(cnx->num_pending_acks > VETH_MAX_ACKS_PER_MSG);
1438
1439         cnx->pending_acks[cnx->num_pending_acks++] =
1440                 event->base_event.xCorrelationToken;
1441
1442         if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold)
1443              || (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) )
1444                 veth_flush_acks(cnx);
1445
1446         spin_unlock_irqrestore(&cnx->lock, flags);
1447 }
1448
1449 static void veth_timed_ack(unsigned long ptr)
1450 {
1451         struct veth_lpar_connection *cnx = (struct veth_lpar_connection *) ptr;
1452         unsigned long flags;
1453
1454         /* Ack all the events */
1455         spin_lock_irqsave(&cnx->lock, flags);
1456         if (cnx->num_pending_acks > 0)
1457                 veth_flush_acks(cnx);
1458
1459         /* Reschedule the timer */
1460         cnx->ack_timer.expires = jiffies + cnx->ack_timeout;
1461         add_timer(&cnx->ack_timer);
1462         spin_unlock_irqrestore(&cnx->lock, flags);
1463 }
1464
1465 static int veth_remove(struct vio_dev *vdev)
1466 {
1467         struct veth_lpar_connection *cnx;
1468         struct net_device *dev;
1469         struct veth_port *port;
1470         int i;
1471
1472         dev = veth_dev[vdev->unit_address];
1473
1474         if (! dev)
1475                 return 0;
1476
1477         port = netdev_priv(dev);
1478
1479         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1480                 cnx = veth_cnx[i];
1481
1482                 if (cnx && (port->lpar_map & (1 << i))) {
1483                         /* Drop our reference to connections on our VLAN */
1484                         kobject_put(&cnx->kobject);
1485                 }
1486         }
1487
1488         veth_dev[vdev->unit_address] = NULL;
1489         unregister_netdev(dev);
1490         free_netdev(dev);
1491
1492         return 0;
1493 }
1494
1495 static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1496 {
1497         int i = vdev->unit_address;
1498         struct net_device *dev;
1499         struct veth_port *port;
1500
1501         dev = veth_probe_one(i, &vdev->dev);
1502         if (dev == NULL) {
1503                 veth_remove(vdev);
1504                 return 1;
1505         }
1506         veth_dev[i] = dev;
1507
1508         port = (struct veth_port*)netdev_priv(dev);
1509
1510         /* Start the state machine on each connection on this vlan. If we're
1511          * the first dev to do so this will commence link negotiation */
1512         for (i = 0; i < HVMAXARCHITECTEDLPS; i++) {
1513                 struct veth_lpar_connection *cnx;
1514
1515                 if (! (port->lpar_map & (1 << i)))
1516                         continue;
1517
1518                 cnx = veth_cnx[i];
1519                 if (!cnx)
1520                         continue;
1521
1522                 kobject_get(&cnx->kobject);
1523                 veth_kick_statemachine(cnx);
1524         }
1525
1526         return 0;
1527 }
1528
1529 /**
1530  * veth_device_table: Used by vio.c to match devices that we
1531  * support.
1532  */
1533 static struct vio_device_id veth_device_table[] __devinitdata = {
1534         { "vlan", "" },
1535         { "", "" }
1536 };
1537 MODULE_DEVICE_TABLE(vio, veth_device_table);
1538
1539 static struct vio_driver veth_driver = {
1540         .name = "iseries_veth",
1541         .id_table = veth_device_table,
1542         .probe = veth_probe,
1543         .remove = veth_remove
1544 };
1545
1546 /*
1547  * Module initialization/cleanup
1548  */
1549
1550 void __exit veth_module_cleanup(void)
1551 {
1552         int i;
1553         struct veth_lpar_connection *cnx;
1554
1555         /* Disconnect our "irq" to stop events coming from the Hypervisor. */
1556         HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan);
1557
1558         /* Make sure any work queued from Hypervisor callbacks is finished. */
1559         flush_scheduled_work();
1560
1561         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1562                 cnx = veth_cnx[i];
1563
1564                 if (!cnx)
1565                         continue;
1566
1567                 /* Remove the connection from sysfs */
1568                 kobject_del(&cnx->kobject);
1569                 /* Drop the driver's reference to the connection */
1570                 kobject_put(&cnx->kobject);
1571         }
1572
1573         /* Unregister the driver, which will close all the netdevs and stop
1574          * the connections when they're no longer referenced. */
1575         vio_unregister_driver(&veth_driver);
1576 }
1577 module_exit(veth_module_cleanup);
1578
1579 int __init veth_module_init(void)
1580 {
1581         int i;
1582         int rc;
1583
1584         this_lp = HvLpConfig_getLpIndex_outline();
1585
1586         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1587                 rc = veth_init_connection(i);
1588                 if (rc != 0)
1589                         goto error;
1590         }
1591
1592         HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan,
1593                                   &veth_handle_event);
1594
1595         rc = vio_register_driver(&veth_driver);
1596         if (rc != 0)
1597                 goto error;
1598
1599         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1600                 struct kobject *kobj;
1601
1602                 if (!veth_cnx[i])
1603                         continue;
1604
1605                 kobj = &veth_cnx[i]->kobject;
1606                 kobj->parent = &veth_driver.driver.kobj;
1607                 /* If the add failes, complain but otherwise continue */
1608                 if (0 != kobject_add(kobj))
1609                         veth_error("cnx %d: Failed adding to sysfs.\n", i);
1610         }
1611
1612         return 0;
1613
1614 error:
1615         for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) {
1616                 veth_destroy_connection(veth_cnx[i]);
1617         }
1618
1619         return rc;
1620 }
1621 module_init(veth_module_init);