net: adding memory barrier to the poll and receive callbacks
[pandora-kernel.git] / net / iucv / af_iucv.c
index a9b3a6f..49c15b4 100644 (file)
@@ -1,11 +1,12 @@
 /*
- *  linux/net/iucv/af_iucv.c
- *
  *  IUCV protocol stack for Linux on zSeries
  *
- *  Copyright 2006 IBM Corporation
+ *  Copyright IBM Corp. 2006, 2009
  *
  *  Author(s): Jennifer Hunt <jenhunt@us.ibm.com>
+ *             Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *  PM functions:
+ *             Ursula Braun <ursula.braun@de.ibm.com>
  */
 
 #define KMSG_COMPONENT "af_iucv"
@@ -53,6 +54,38 @@ static const u8 iprm_shutdown[8] =
 #define CB_TRGCLS(skb) ((skb)->cb + CB_TAG_LEN) /* iucv msg target class */
 #define CB_TRGCLS_LEN  (TRGCLS_SIZE)
 
+#define __iucv_sock_wait(sk, condition, timeo, ret)                    \
+do {                                                                   \
+       DEFINE_WAIT(__wait);                                            \
+       long __timeo = timeo;                                           \
+       ret = 0;                                                        \
+       while (!(condition)) {                                          \
+               prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE); \
+               if (!__timeo) {                                         \
+                       ret = -EAGAIN;                                  \
+                       break;                                          \
+               }                                                       \
+               if (signal_pending(current)) {                          \
+                       ret = sock_intr_errno(__timeo);                 \
+                       break;                                          \
+               }                                                       \
+               release_sock(sk);                                       \
+               __timeo = schedule_timeout(__timeo);                    \
+               lock_sock(sk);                                          \
+               ret = sock_error(sk);                                   \
+               if (ret)                                                \
+                       break;                                          \
+       }                                                               \
+       finish_wait(sk->sk_sleep, &__wait);                             \
+} while (0)
+
+#define iucv_sock_wait(sk, condition, timeo)                           \
+({                                                                     \
+       int __ret = 0;                                                  \
+       if (!(condition))                                               \
+               __iucv_sock_wait(sk, condition, timeo, __ret);          \
+       __ret;                                                          \
+})
 
 static void iucv_sock_kill(struct sock *sk);
 static void iucv_sock_close(struct sock *sk);
@@ -90,6 +123,122 @@ static inline void low_nmcpy(unsigned char *dst, char *src)
        memcpy(&dst[8], src, 8);
 }
 
+static int afiucv_pm_prepare(struct device *dev)
+{
+#ifdef CONFIG_PM_DEBUG
+       printk(KERN_WARNING "afiucv_pm_prepare\n");
+#endif
+       return 0;
+}
+
+static void afiucv_pm_complete(struct device *dev)
+{
+#ifdef CONFIG_PM_DEBUG
+       printk(KERN_WARNING "afiucv_pm_complete\n");
+#endif
+       return;
+}
+
+/**
+ * afiucv_pm_freeze() - Freeze PM callback
+ * @dev:       AFIUCV dummy device
+ *
+ * Sever all established IUCV communication pathes
+ */
+static int afiucv_pm_freeze(struct device *dev)
+{
+       struct iucv_sock *iucv;
+       struct sock *sk;
+       struct hlist_node *node;
+       int err = 0;
+
+#ifdef CONFIG_PM_DEBUG
+       printk(KERN_WARNING "afiucv_pm_freeze\n");
+#endif
+       read_lock(&iucv_sk_list.lock);
+       sk_for_each(sk, node, &iucv_sk_list.head) {
+               iucv = iucv_sk(sk);
+               skb_queue_purge(&iucv->send_skb_q);
+               skb_queue_purge(&iucv->backlog_skb_q);
+               switch (sk->sk_state) {
+               case IUCV_SEVERED:
+               case IUCV_DISCONN:
+               case IUCV_CLOSING:
+               case IUCV_CONNECTED:
+                       if (iucv->path) {
+                               err = iucv_path_sever(iucv->path, NULL);
+                               iucv_path_free(iucv->path);
+                               iucv->path = NULL;
+                       }
+                       break;
+               case IUCV_OPEN:
+               case IUCV_BOUND:
+               case IUCV_LISTEN:
+               case IUCV_CLOSED:
+               default:
+                       break;
+               }
+       }
+       read_unlock(&iucv_sk_list.lock);
+       return err;
+}
+
+/**
+ * afiucv_pm_restore_thaw() - Thaw and restore PM callback
+ * @dev:       AFIUCV dummy device
+ *
+ * socket clean up after freeze
+ */
+static int afiucv_pm_restore_thaw(struct device *dev)
+{
+       struct iucv_sock *iucv;
+       struct sock *sk;
+       struct hlist_node *node;
+
+#ifdef CONFIG_PM_DEBUG
+       printk(KERN_WARNING "afiucv_pm_restore_thaw\n");
+#endif
+       read_lock(&iucv_sk_list.lock);
+       sk_for_each(sk, node, &iucv_sk_list.head) {
+               iucv = iucv_sk(sk);
+               switch (sk->sk_state) {
+               case IUCV_CONNECTED:
+                       sk->sk_err = EPIPE;
+                       sk->sk_state = IUCV_DISCONN;
+                       sk->sk_state_change(sk);
+                       break;
+               case IUCV_DISCONN:
+               case IUCV_SEVERED:
+               case IUCV_CLOSING:
+               case IUCV_LISTEN:
+               case IUCV_BOUND:
+               case IUCV_OPEN:
+               default:
+                       break;
+               }
+       }
+       read_unlock(&iucv_sk_list.lock);
+       return 0;
+}
+
+static struct dev_pm_ops afiucv_pm_ops = {
+       .prepare = afiucv_pm_prepare,
+       .complete = afiucv_pm_complete,
+       .freeze = afiucv_pm_freeze,
+       .thaw = afiucv_pm_restore_thaw,
+       .restore = afiucv_pm_restore_thaw,
+};
+
+static struct device_driver af_iucv_driver = {
+       .owner = THIS_MODULE,
+       .name = "afiucv",
+       .bus  = &iucv_bus,
+       .pm   = &afiucv_pm_ops,
+};
+
+/* dummy device used as trigger for PM functions */
+static struct device *af_iucv_dev;
+
 /**
  * iucv_msg_length() - Returns the length of an iucv message.
  * @msg:       Pointer to struct iucv_message, MUST NOT be NULL
@@ -121,6 +270,48 @@ static inline size_t iucv_msg_length(struct iucv_message *msg)
        return msg->length;
 }
 
+/**
+ * iucv_sock_in_state() - check for specific states
+ * @sk:                sock structure
+ * @state:     first iucv sk state
+ * @state:     second iucv sk state
+ *
+ * Returns true if the socket in either in the first or second state.
+ */
+static int iucv_sock_in_state(struct sock *sk, int state, int state2)
+{
+       return (sk->sk_state == state || sk->sk_state == state2);
+}
+
+/**
+ * iucv_below_msglim() - function to check if messages can be sent
+ * @sk:                sock structure
+ *
+ * Returns true if the send queue length is lower than the message limit.
+ * Always returns true if the socket is not connected (no iucv path for
+ * checking the message limit).
+ */
+static inline int iucv_below_msglim(struct sock *sk)
+{
+       struct iucv_sock *iucv = iucv_sk(sk);
+
+       if (sk->sk_state != IUCV_CONNECTED)
+               return 1;
+       return (skb_queue_len(&iucv->send_skb_q) < iucv->path->msglim);
+}
+
+/**
+ * iucv_sock_wake_msglim() - Wake up thread waiting on msg limit
+ */
+static void iucv_sock_wake_msglim(struct sock *sk)
+{
+       read_lock(&sk->sk_callback_lock);
+       if (sk_has_sleeper(sk))
+               wake_up_interruptible_all(sk->sk_sleep);
+       sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+       read_unlock(&sk->sk_callback_lock);
+}
+
 /* Timers */
 static void iucv_sock_timeout(unsigned long arg)
 {
@@ -212,7 +403,9 @@ static void iucv_sock_close(struct sock *sk)
                                timeo = sk->sk_lingertime;
                        else
                                timeo = IUCV_DISCONN_TIMEOUT;
-                       err = iucv_sock_wait_state(sk, IUCV_CLOSED, 0, timeo);
+                       err = iucv_sock_wait(sk,
+                                       iucv_sock_in_state(sk, IUCV_CLOSED, 0),
+                                       timeo);
                }
 
        case IUCV_CLOSING:   /* fall through */
@@ -393,39 +586,6 @@ struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
        return NULL;
 }
 
-int iucv_sock_wait_state(struct sock *sk, int state, int state2,
-                        unsigned long timeo)
-{
-       DECLARE_WAITQUEUE(wait, current);
-       int err = 0;
-
-       add_wait_queue(sk->sk_sleep, &wait);
-       while (sk->sk_state != state && sk->sk_state != state2) {
-               set_current_state(TASK_INTERRUPTIBLE);
-
-               if (!timeo) {
-                       err = -EAGAIN;
-                       break;
-               }
-
-               if (signal_pending(current)) {
-                       err = sock_intr_errno(timeo);
-                       break;
-               }
-
-               release_sock(sk);
-               timeo = schedule_timeout(timeo);
-               lock_sock(sk);
-
-               err = sock_error(sk);
-               if (err)
-                       break;
-       }
-       set_current_state(TASK_RUNNING);
-       remove_wait_queue(sk->sk_sleep, &wait);
-       return err;
-}
-
 /* Bind an unbound socket */
 static int iucv_sock_bind(struct socket *sock, struct sockaddr *addr,
                          int addr_len)
@@ -570,8 +730,9 @@ static int iucv_sock_connect(struct socket *sock, struct sockaddr *addr,
        }
 
        if (sk->sk_state != IUCV_CONNECTED) {
-               err = iucv_sock_wait_state(sk, IUCV_CONNECTED, IUCV_DISCONN,
-                               sock_sndtimeo(sk, flags & O_NONBLOCK));
+               err = iucv_sock_wait(sk, iucv_sock_in_state(sk, IUCV_CONNECTED,
+                                                           IUCV_DISCONN),
+                                    sock_sndtimeo(sk, flags & O_NONBLOCK));
        }
 
        if (sk->sk_state == IUCV_DISCONN) {
@@ -725,9 +886,11 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
        struct iucv_message txmsg;
        struct cmsghdr *cmsg;
        int cmsg_done;
+       long timeo;
        char user_id[9];
        char appl_id[9];
        int err;
+       int noblock = msg->msg_flags & MSG_DONTWAIT;
 
        err = sock_error(sk);
        if (err)
@@ -747,108 +910,119 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
                goto out;
        }
 
-       if (sk->sk_state == IUCV_CONNECTED) {
-               /* initialize defaults */
-               cmsg_done   = 0;        /* check for duplicate headers */
-               txmsg.class = 0;
+       /* Return if the socket is not in connected state */
+       if (sk->sk_state != IUCV_CONNECTED) {
+               err = -ENOTCONN;
+               goto out;
+       }
 
-               /* iterate over control messages */
-               for (cmsg = CMSG_FIRSTHDR(msg); cmsg;
-                    cmsg = CMSG_NXTHDR(msg, cmsg)) {
+       /* initialize defaults */
+       cmsg_done   = 0;        /* check for duplicate headers */
+       txmsg.class = 0;
 
-                       if (!CMSG_OK(msg, cmsg)) {
-                               err = -EINVAL;
-                               goto out;
-                       }
+       /* iterate over control messages */
+       for (cmsg = CMSG_FIRSTHDR(msg); cmsg;
+               cmsg = CMSG_NXTHDR(msg, cmsg)) {
 
-                       if (cmsg->cmsg_level != SOL_IUCV)
-                               continue;
+               if (!CMSG_OK(msg, cmsg)) {
+                       err = -EINVAL;
+                       goto out;
+               }
 
-                       if (cmsg->cmsg_type & cmsg_done) {
+               if (cmsg->cmsg_level != SOL_IUCV)
+                       continue;
+
+               if (cmsg->cmsg_type & cmsg_done) {
+                       err = -EINVAL;
+                       goto out;
+               }
+               cmsg_done |= cmsg->cmsg_type;
+
+               switch (cmsg->cmsg_type) {
+               case SCM_IUCV_TRGCLS:
+                       if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) {
                                err = -EINVAL;
                                goto out;
                        }
-                       cmsg_done |= cmsg->cmsg_type;
-
-                       switch (cmsg->cmsg_type) {
-                       case SCM_IUCV_TRGCLS:
-                               if (cmsg->cmsg_len != CMSG_LEN(TRGCLS_SIZE)) {
-                                       err = -EINVAL;
-                                       goto out;
-                               }
 
-                               /* set iucv message target class */
-                               memcpy(&txmsg.class,
-                                       (void *) CMSG_DATA(cmsg), TRGCLS_SIZE);
+                       /* set iucv message target class */
+                       memcpy(&txmsg.class,
+                               (void *) CMSG_DATA(cmsg), TRGCLS_SIZE);
 
-                               break;
+                       break;
 
-                       default:
-                               err = -EINVAL;
-                               goto out;
-                               break;
-                       }
+               default:
+                       err = -EINVAL;
+                       goto out;
+                       break;
                }
+       }
 
-               /* allocate one skb for each iucv message:
-                * this is fine for SOCK_SEQPACKET (unless we want to support
-                * segmented records using the MSG_EOR flag), but
-                * for SOCK_STREAM we might want to improve it in future */
-               if (!(skb = sock_alloc_send_skb(sk, len,
-                                               msg->msg_flags & MSG_DONTWAIT,
-                                               &err)))
-                       goto out;
+       /* allocate one skb for each iucv message:
+        * this is fine for SOCK_SEQPACKET (unless we want to support
+        * segmented records using the MSG_EOR flag), but
+        * for SOCK_STREAM we might want to improve it in future */
+       skb = sock_alloc_send_skb(sk, len, noblock, &err);
+       if (!skb)
+               goto out;
+       if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+               err = -EFAULT;
+               goto fail;
+       }
 
-               if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
-                       err = -EFAULT;
-                       goto fail;
-               }
+       /* wait if outstanding messages for iucv path has reached */
+       timeo = sock_sndtimeo(sk, noblock);
+       err = iucv_sock_wait(sk, iucv_below_msglim(sk), timeo);
+       if (err)
+               goto fail;
 
-               /* increment and save iucv message tag for msg_completion cbk */
-               txmsg.tag = iucv->send_tag++;
-               memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN);
-               skb_queue_tail(&iucv->send_skb_q, skb);
+       /* return -ECONNRESET if the socket is no longer connected */
+       if (sk->sk_state != IUCV_CONNECTED) {
+               err = -ECONNRESET;
+               goto fail;
+       }
 
-               if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
-                   && skb->len <= 7) {
-                       err = iucv_send_iprm(iucv->path, &txmsg, skb);
+       /* increment and save iucv message tag for msg_completion cbk */
+       txmsg.tag = iucv->send_tag++;
+       memcpy(CB_TAG(skb), &txmsg.tag, CB_TAG_LEN);
+       skb_queue_tail(&iucv->send_skb_q, skb);
 
-                       /* on success: there is no message_complete callback
-                        * for an IPRMDATA msg; remove skb from send queue */
-                       if (err == 0) {
-                               skb_unlink(skb, &iucv->send_skb_q);
-                               kfree_skb(skb);
-                       }
+       if (((iucv->path->flags & IUCV_IPRMDATA) & iucv->flags)
+             && skb->len <= 7) {
+               err = iucv_send_iprm(iucv->path, &txmsg, skb);
 
-                       /* this error should never happen since the
-                        * IUCV_IPRMDATA path flag is set... sever path */
-                       if (err == 0x15) {
-                               iucv_path_sever(iucv->path, NULL);
-                               skb_unlink(skb, &iucv->send_skb_q);
-                               err = -EPIPE;
-                               goto fail;
-                       }
-               } else
-                       err = iucv_message_send(iucv->path, &txmsg, 0, 0,
-                                               (void *) skb->data, skb->len);
-               if (err) {
-                       if (err == 3) {
-                               user_id[8] = 0;
-                               memcpy(user_id, iucv->dst_user_id, 8);
-                               appl_id[8] = 0;
-                               memcpy(appl_id, iucv->dst_name, 8);
-                               pr_err("Application %s on z/VM guest %s"
-                                      " exceeds message limit\n",
-                                      user_id, appl_id);
-                       }
+               /* on success: there is no message_complete callback
+                * for an IPRMDATA msg; remove skb from send queue */
+               if (err == 0) {
+                       skb_unlink(skb, &iucv->send_skb_q);
+                       kfree_skb(skb);
+               }
+
+               /* this error should never happen since the
+                * IUCV_IPRMDATA path flag is set... sever path */
+               if (err == 0x15) {
+                       iucv_path_sever(iucv->path, NULL);
                        skb_unlink(skb, &iucv->send_skb_q);
                        err = -EPIPE;
                        goto fail;
                }
-
-       } else {
-               err = -ENOTCONN;
-               goto out;
+       } else
+               err = iucv_message_send(iucv->path, &txmsg, 0, 0,
+                                       (void *) skb->data, skb->len);
+       if (err) {
+               if (err == 3) {
+                       user_id[8] = 0;
+                       memcpy(user_id, iucv->dst_user_id, 8);
+                       appl_id[8] = 0;
+                       memcpy(appl_id, iucv->dst_name, 8);
+                       pr_err("Application %s on z/VM guest %s"
+                               " exceeds message limit\n",
+                               appl_id, user_id);
+                       err = -EAGAIN;
+               } else
+                       err = -EPIPE;
+               skb_unlink(skb, &iucv->send_skb_q);
+               goto fail;
        }
 
        release_sock(sk);
@@ -1082,7 +1256,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
        struct sock *sk = sock->sk;
        unsigned int mask = 0;
 
-       poll_wait(file, sk->sk_sleep, wait);
+       sock_poll_wait(file, sk->sk_sleep, wait);
 
        if (sk->sk_state == IUCV_LISTEN)
                return iucv_accept_poll(sk);
@@ -1464,7 +1638,11 @@ static void iucv_callback_txdone(struct iucv_path *path,
 
                spin_unlock_irqrestore(&list->lock, flags);
 
-               kfree_skb(this);
+               if (this) {
+                       kfree_skb(this);
+                       /* wake up any process waiting for sending */
+                       iucv_sock_wake_msglim(sk);
+               }
        }
        BUG_ON(!this);
 
@@ -1556,8 +1734,30 @@ static int __init afiucv_init(void)
        err = sock_register(&iucv_sock_family_ops);
        if (err)
                goto out_proto;
+       /* establish dummy device */
+       err = driver_register(&af_iucv_driver);
+       if (err)
+               goto out_sock;
+       af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
+       if (!af_iucv_dev) {
+               err = -ENOMEM;
+               goto out_driver;
+       }
+       dev_set_name(af_iucv_dev, "af_iucv");
+       af_iucv_dev->bus = &iucv_bus;
+       af_iucv_dev->parent = iucv_root;
+       af_iucv_dev->release = (void (*)(struct device *))kfree;
+       af_iucv_dev->driver = &af_iucv_driver;
+       err = device_register(af_iucv_dev);
+       if (err)
+               goto out_driver;
+
        return 0;
 
+out_driver:
+       driver_unregister(&af_iucv_driver);
+out_sock:
+       sock_unregister(PF_IUCV);
 out_proto:
        proto_unregister(&iucv_proto);
 out_iucv:
@@ -1568,6 +1768,8 @@ out:
 
 static void __exit afiucv_exit(void)
 {
+       device_unregister(af_iucv_dev);
+       driver_unregister(&af_iucv_driver);
        sock_unregister(PF_IUCV);
        proto_unregister(&iucv_proto);
        iucv_unregister(&af_iucv_handler, 0);