Merge branch 'ec-cleanup' into release
[pandora-kernel.git] / block / blk-cgroup.c
1 /*
2  * Common Block IO controller cgroup interface
3  *
4  * Based on ideas and code from CFQ, CFS and BFQ:
5  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6  *
7  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8  *                    Paolo Valente <paolo.valente@unimore.it>
9  *
10  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11  *                    Nauman Rafique <nauman@google.com>
12  */
13 #include <linux/ioprio.h>
14 #include <linux/seq_file.h>
15 #include <linux/kdev_t.h>
16 #include <linux/module.h>
17 #include <linux/err.h>
18 #include <linux/blkdev.h>
19 #include <linux/slab.h>
20 #include "blk-cgroup.h"
21 #include <linux/genhd.h>
22
23 #define MAX_KEY_LEN 100
24
25 static DEFINE_SPINLOCK(blkio_list_lock);
26 static LIST_HEAD(blkio_list);
27
28 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
29 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
30
31 static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
32                                                   struct cgroup *);
33 static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
34                               struct task_struct *, bool);
35 static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
36                            struct cgroup *, struct task_struct *, bool);
37 static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
38 static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
39
40 /* for encoding cft->private value on file */
41 #define BLKIOFILE_PRIVATE(x, val)       (((x) << 16) | (val))
42 /* What policy owns the file, proportional or throttle */
43 #define BLKIOFILE_POLICY(val)           (((val) >> 16) & 0xffff)
44 #define BLKIOFILE_ATTR(val)             ((val) & 0xffff)
45
46 struct cgroup_subsys blkio_subsys = {
47         .name = "blkio",
48         .create = blkiocg_create,
49         .can_attach = blkiocg_can_attach,
50         .attach = blkiocg_attach,
51         .destroy = blkiocg_destroy,
52         .populate = blkiocg_populate,
53 #ifdef CONFIG_BLK_CGROUP
54         /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
55         .subsys_id = blkio_subsys_id,
56 #endif
57         .use_id = 1,
58         .module = THIS_MODULE,
59 };
60 EXPORT_SYMBOL_GPL(blkio_subsys);
61
62 static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
63                                             struct blkio_policy_node *pn)
64 {
65         list_add(&pn->node, &blkcg->policy_list);
66 }
67
68 static inline bool cftype_blkg_same_policy(struct cftype *cft,
69                         struct blkio_group *blkg)
70 {
71         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
72
73         if (blkg->plid == plid)
74                 return 1;
75
76         return 0;
77 }
78
79 /* Determines if policy node matches cgroup file being accessed */
80 static inline bool pn_matches_cftype(struct cftype *cft,
81                         struct blkio_policy_node *pn)
82 {
83         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
84         int fileid = BLKIOFILE_ATTR(cft->private);
85
86         return (plid == pn->plid && fileid == pn->fileid);
87 }
88
89 /* Must be called with blkcg->lock held */
90 static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
91 {
92         list_del(&pn->node);
93 }
94
95 /* Must be called with blkcg->lock held */
96 static struct blkio_policy_node *
97 blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev,
98                 enum blkio_policy_id plid, int fileid)
99 {
100         struct blkio_policy_node *pn;
101
102         list_for_each_entry(pn, &blkcg->policy_list, node) {
103                 if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid)
104                         return pn;
105         }
106
107         return NULL;
108 }
109
110 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
111 {
112         return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
113                             struct blkio_cgroup, css);
114 }
115 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
116
117 struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk)
118 {
119         return container_of(task_subsys_state(tsk, blkio_subsys_id),
120                             struct blkio_cgroup, css);
121 }
122 EXPORT_SYMBOL_GPL(task_blkio_cgroup);
123
124 static inline void
125 blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
126 {
127         struct blkio_policy_type *blkiop;
128
129         list_for_each_entry(blkiop, &blkio_list, list) {
130                 /* If this policy does not own the blkg, do not send updates */
131                 if (blkiop->plid != blkg->plid)
132                         continue;
133                 if (blkiop->ops.blkio_update_group_weight_fn)
134                         blkiop->ops.blkio_update_group_weight_fn(blkg->key,
135                                                         blkg, weight);
136         }
137 }
138
139 static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
140                                 int fileid)
141 {
142         struct blkio_policy_type *blkiop;
143
144         list_for_each_entry(blkiop, &blkio_list, list) {
145
146                 /* If this policy does not own the blkg, do not send updates */
147                 if (blkiop->plid != blkg->plid)
148                         continue;
149
150                 if (fileid == BLKIO_THROTL_read_bps_device
151                     && blkiop->ops.blkio_update_group_read_bps_fn)
152                         blkiop->ops.blkio_update_group_read_bps_fn(blkg->key,
153                                                                 blkg, bps);
154
155                 if (fileid == BLKIO_THROTL_write_bps_device
156                     && blkiop->ops.blkio_update_group_write_bps_fn)
157                         blkiop->ops.blkio_update_group_write_bps_fn(blkg->key,
158                                                                 blkg, bps);
159         }
160 }
161
162 static inline void blkio_update_group_iops(struct blkio_group *blkg,
163                         unsigned int iops, int fileid)
164 {
165         struct blkio_policy_type *blkiop;
166
167         list_for_each_entry(blkiop, &blkio_list, list) {
168
169                 /* If this policy does not own the blkg, do not send updates */
170                 if (blkiop->plid != blkg->plid)
171                         continue;
172
173                 if (fileid == BLKIO_THROTL_read_iops_device
174                     && blkiop->ops.blkio_update_group_read_iops_fn)
175                         blkiop->ops.blkio_update_group_read_iops_fn(blkg->key,
176                                                                 blkg, iops);
177
178                 if (fileid == BLKIO_THROTL_write_iops_device
179                     && blkiop->ops.blkio_update_group_write_iops_fn)
180                         blkiop->ops.blkio_update_group_write_iops_fn(blkg->key,
181                                                                 blkg,iops);
182         }
183 }
184
185 /*
186  * Add to the appropriate stat variable depending on the request type.
187  * This should be called with the blkg->stats_lock held.
188  */
189 static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
190                                 bool sync)
191 {
192         if (direction)
193                 stat[BLKIO_STAT_WRITE] += add;
194         else
195                 stat[BLKIO_STAT_READ] += add;
196         if (sync)
197                 stat[BLKIO_STAT_SYNC] += add;
198         else
199                 stat[BLKIO_STAT_ASYNC] += add;
200 }
201
202 /*
203  * Decrements the appropriate stat variable if non-zero depending on the
204  * request type. Panics on value being zero.
205  * This should be called with the blkg->stats_lock held.
206  */
207 static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
208 {
209         if (direction) {
210                 BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
211                 stat[BLKIO_STAT_WRITE]--;
212         } else {
213                 BUG_ON(stat[BLKIO_STAT_READ] == 0);
214                 stat[BLKIO_STAT_READ]--;
215         }
216         if (sync) {
217                 BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
218                 stat[BLKIO_STAT_SYNC]--;
219         } else {
220                 BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
221                 stat[BLKIO_STAT_ASYNC]--;
222         }
223 }
224
225 #ifdef CONFIG_DEBUG_BLK_CGROUP
226 /* This should be called with the blkg->stats_lock held. */
227 static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
228                                                 struct blkio_group *curr_blkg)
229 {
230         if (blkio_blkg_waiting(&blkg->stats))
231                 return;
232         if (blkg == curr_blkg)
233                 return;
234         blkg->stats.start_group_wait_time = sched_clock();
235         blkio_mark_blkg_waiting(&blkg->stats);
236 }
237
238 /* This should be called with the blkg->stats_lock held. */
239 static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
240 {
241         unsigned long long now;
242
243         if (!blkio_blkg_waiting(stats))
244                 return;
245
246         now = sched_clock();
247         if (time_after64(now, stats->start_group_wait_time))
248                 stats->group_wait_time += now - stats->start_group_wait_time;
249         blkio_clear_blkg_waiting(stats);
250 }
251
252 /* This should be called with the blkg->stats_lock held. */
253 static void blkio_end_empty_time(struct blkio_group_stats *stats)
254 {
255         unsigned long long now;
256
257         if (!blkio_blkg_empty(stats))
258                 return;
259
260         now = sched_clock();
261         if (time_after64(now, stats->start_empty_time))
262                 stats->empty_time += now - stats->start_empty_time;
263         blkio_clear_blkg_empty(stats);
264 }
265
266 void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
267 {
268         unsigned long flags;
269
270         spin_lock_irqsave(&blkg->stats_lock, flags);
271         BUG_ON(blkio_blkg_idling(&blkg->stats));
272         blkg->stats.start_idle_time = sched_clock();
273         blkio_mark_blkg_idling(&blkg->stats);
274         spin_unlock_irqrestore(&blkg->stats_lock, flags);
275 }
276 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
277
278 void blkiocg_update_idle_time_stats(struct blkio_group *blkg)
279 {
280         unsigned long flags;
281         unsigned long long now;
282         struct blkio_group_stats *stats;
283
284         spin_lock_irqsave(&blkg->stats_lock, flags);
285         stats = &blkg->stats;
286         if (blkio_blkg_idling(stats)) {
287                 now = sched_clock();
288                 if (time_after64(now, stats->start_idle_time))
289                         stats->idle_time += now - stats->start_idle_time;
290                 blkio_clear_blkg_idling(stats);
291         }
292         spin_unlock_irqrestore(&blkg->stats_lock, flags);
293 }
294 EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
295
296 void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
297 {
298         unsigned long flags;
299         struct blkio_group_stats *stats;
300
301         spin_lock_irqsave(&blkg->stats_lock, flags);
302         stats = &blkg->stats;
303         stats->avg_queue_size_sum +=
304                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
305                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
306         stats->avg_queue_size_samples++;
307         blkio_update_group_wait_time(stats);
308         spin_unlock_irqrestore(&blkg->stats_lock, flags);
309 }
310 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
311
312 void blkiocg_set_start_empty_time(struct blkio_group *blkg)
313 {
314         unsigned long flags;
315         struct blkio_group_stats *stats;
316
317         spin_lock_irqsave(&blkg->stats_lock, flags);
318         stats = &blkg->stats;
319
320         if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
321                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
322                 spin_unlock_irqrestore(&blkg->stats_lock, flags);
323                 return;
324         }
325
326         /*
327          * group is already marked empty. This can happen if cfqq got new
328          * request in parent group and moved to this group while being added
329          * to service tree. Just ignore the event and move on.
330          */
331         if(blkio_blkg_empty(stats)) {
332                 spin_unlock_irqrestore(&blkg->stats_lock, flags);
333                 return;
334         }
335
336         stats->start_empty_time = sched_clock();
337         blkio_mark_blkg_empty(stats);
338         spin_unlock_irqrestore(&blkg->stats_lock, flags);
339 }
340 EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
341
342 void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
343                         unsigned long dequeue)
344 {
345         blkg->stats.dequeue += dequeue;
346 }
347 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
348 #else
349 static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
350                                         struct blkio_group *curr_blkg) {}
351 static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {}
352 #endif
353
354 void blkiocg_update_io_add_stats(struct blkio_group *blkg,
355                         struct blkio_group *curr_blkg, bool direction,
356                         bool sync)
357 {
358         unsigned long flags;
359
360         spin_lock_irqsave(&blkg->stats_lock, flags);
361         blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
362                         sync);
363         blkio_end_empty_time(&blkg->stats);
364         blkio_set_start_group_wait_time(blkg, curr_blkg);
365         spin_unlock_irqrestore(&blkg->stats_lock, flags);
366 }
367 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
368
369 void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
370                                                 bool direction, bool sync)
371 {
372         unsigned long flags;
373
374         spin_lock_irqsave(&blkg->stats_lock, flags);
375         blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED],
376                                         direction, sync);
377         spin_unlock_irqrestore(&blkg->stats_lock, flags);
378 }
379 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
380
381 void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time,
382                                 unsigned long unaccounted_time)
383 {
384         unsigned long flags;
385
386         spin_lock_irqsave(&blkg->stats_lock, flags);
387         blkg->stats.time += time;
388         blkg->stats.unaccounted_time += unaccounted_time;
389         spin_unlock_irqrestore(&blkg->stats_lock, flags);
390 }
391 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
392
393 void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
394                                 uint64_t bytes, bool direction, bool sync)
395 {
396         struct blkio_group_stats *stats;
397         unsigned long flags;
398
399         spin_lock_irqsave(&blkg->stats_lock, flags);
400         stats = &blkg->stats;
401         stats->sectors += bytes >> 9;
402         blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
403                         sync);
404         blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
405                         direction, sync);
406         spin_unlock_irqrestore(&blkg->stats_lock, flags);
407 }
408 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
409
410 void blkiocg_update_completion_stats(struct blkio_group *blkg,
411         uint64_t start_time, uint64_t io_start_time, bool direction, bool sync)
412 {
413         struct blkio_group_stats *stats;
414         unsigned long flags;
415         unsigned long long now = sched_clock();
416
417         spin_lock_irqsave(&blkg->stats_lock, flags);
418         stats = &blkg->stats;
419         if (time_after64(now, io_start_time))
420                 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
421                                 now - io_start_time, direction, sync);
422         if (time_after64(io_start_time, start_time))
423                 blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
424                                 io_start_time - start_time, direction, sync);
425         spin_unlock_irqrestore(&blkg->stats_lock, flags);
426 }
427 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
428
429 void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
430                                         bool sync)
431 {
432         unsigned long flags;
433
434         spin_lock_irqsave(&blkg->stats_lock, flags);
435         blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction,
436                         sync);
437         spin_unlock_irqrestore(&blkg->stats_lock, flags);
438 }
439 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
440
441 void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
442                 struct blkio_group *blkg, void *key, dev_t dev,
443                 enum blkio_policy_id plid)
444 {
445         unsigned long flags;
446
447         spin_lock_irqsave(&blkcg->lock, flags);
448         spin_lock_init(&blkg->stats_lock);
449         rcu_assign_pointer(blkg->key, key);
450         blkg->blkcg_id = css_id(&blkcg->css);
451         hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
452         blkg->plid = plid;
453         spin_unlock_irqrestore(&blkcg->lock, flags);
454         /* Need to take css reference ? */
455         cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
456         blkg->dev = dev;
457 }
458 EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group);
459
460 static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
461 {
462         hlist_del_init_rcu(&blkg->blkcg_node);
463         blkg->blkcg_id = 0;
464 }
465
466 /*
467  * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
468  * indicating that blk_group was unhashed by the time we got to it.
469  */
470 int blkiocg_del_blkio_group(struct blkio_group *blkg)
471 {
472         struct blkio_cgroup *blkcg;
473         unsigned long flags;
474         struct cgroup_subsys_state *css;
475         int ret = 1;
476
477         rcu_read_lock();
478         css = css_lookup(&blkio_subsys, blkg->blkcg_id);
479         if (css) {
480                 blkcg = container_of(css, struct blkio_cgroup, css);
481                 spin_lock_irqsave(&blkcg->lock, flags);
482                 if (!hlist_unhashed(&blkg->blkcg_node)) {
483                         __blkiocg_del_blkio_group(blkg);
484                         ret = 0;
485                 }
486                 spin_unlock_irqrestore(&blkcg->lock, flags);
487         }
488
489         rcu_read_unlock();
490         return ret;
491 }
492 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
493
494 /* called under rcu_read_lock(). */
495 struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
496 {
497         struct blkio_group *blkg;
498         struct hlist_node *n;
499         void *__key;
500
501         hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
502                 __key = blkg->key;
503                 if (__key == key)
504                         return blkg;
505         }
506
507         return NULL;
508 }
509 EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
510
511 static int
512 blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
513 {
514         struct blkio_cgroup *blkcg;
515         struct blkio_group *blkg;
516         struct blkio_group_stats *stats;
517         struct hlist_node *n;
518         uint64_t queued[BLKIO_STAT_TOTAL];
519         int i;
520 #ifdef CONFIG_DEBUG_BLK_CGROUP
521         bool idling, waiting, empty;
522         unsigned long long now = sched_clock();
523 #endif
524
525         blkcg = cgroup_to_blkio_cgroup(cgroup);
526         spin_lock_irq(&blkcg->lock);
527         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
528                 spin_lock(&blkg->stats_lock);
529                 stats = &blkg->stats;
530 #ifdef CONFIG_DEBUG_BLK_CGROUP
531                 idling = blkio_blkg_idling(stats);
532                 waiting = blkio_blkg_waiting(stats);
533                 empty = blkio_blkg_empty(stats);
534 #endif
535                 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
536                         queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
537                 memset(stats, 0, sizeof(struct blkio_group_stats));
538                 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
539                         stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
540 #ifdef CONFIG_DEBUG_BLK_CGROUP
541                 if (idling) {
542                         blkio_mark_blkg_idling(stats);
543                         stats->start_idle_time = now;
544                 }
545                 if (waiting) {
546                         blkio_mark_blkg_waiting(stats);
547                         stats->start_group_wait_time = now;
548                 }
549                 if (empty) {
550                         blkio_mark_blkg_empty(stats);
551                         stats->start_empty_time = now;
552                 }
553 #endif
554                 spin_unlock(&blkg->stats_lock);
555         }
556         spin_unlock_irq(&blkcg->lock);
557         return 0;
558 }
559
560 static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str,
561                                 int chars_left, bool diskname_only)
562 {
563         snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev));
564         chars_left -= strlen(str);
565         if (chars_left <= 0) {
566                 printk(KERN_WARNING
567                         "Possibly incorrect cgroup stat display format");
568                 return;
569         }
570         if (diskname_only)
571                 return;
572         switch (type) {
573         case BLKIO_STAT_READ:
574                 strlcat(str, " Read", chars_left);
575                 break;
576         case BLKIO_STAT_WRITE:
577                 strlcat(str, " Write", chars_left);
578                 break;
579         case BLKIO_STAT_SYNC:
580                 strlcat(str, " Sync", chars_left);
581                 break;
582         case BLKIO_STAT_ASYNC:
583                 strlcat(str, " Async", chars_left);
584                 break;
585         case BLKIO_STAT_TOTAL:
586                 strlcat(str, " Total", chars_left);
587                 break;
588         default:
589                 strlcat(str, " Invalid", chars_left);
590         }
591 }
592
593 static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
594                                 struct cgroup_map_cb *cb, dev_t dev)
595 {
596         blkio_get_key_name(0, dev, str, chars_left, true);
597         cb->fill(cb, str, val);
598         return val;
599 }
600
601 /* This should be called with blkg->stats_lock held */
602 static uint64_t blkio_get_stat(struct blkio_group *blkg,
603                 struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
604 {
605         uint64_t disk_total;
606         char key_str[MAX_KEY_LEN];
607         enum stat_sub_type sub_type;
608
609         if (type == BLKIO_STAT_TIME)
610                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
611                                         blkg->stats.time, cb, dev);
612         if (type == BLKIO_STAT_SECTORS)
613                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
614                                         blkg->stats.sectors, cb, dev);
615 #ifdef CONFIG_DEBUG_BLK_CGROUP
616         if (type == BLKIO_STAT_UNACCOUNTED_TIME)
617                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
618                                         blkg->stats.unaccounted_time, cb, dev);
619         if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
620                 uint64_t sum = blkg->stats.avg_queue_size_sum;
621                 uint64_t samples = blkg->stats.avg_queue_size_samples;
622                 if (samples)
623                         do_div(sum, samples);
624                 else
625                         sum = 0;
626                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev);
627         }
628         if (type == BLKIO_STAT_GROUP_WAIT_TIME)
629                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
630                                         blkg->stats.group_wait_time, cb, dev);
631         if (type == BLKIO_STAT_IDLE_TIME)
632                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
633                                         blkg->stats.idle_time, cb, dev);
634         if (type == BLKIO_STAT_EMPTY_TIME)
635                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
636                                         blkg->stats.empty_time, cb, dev);
637         if (type == BLKIO_STAT_DEQUEUE)
638                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
639                                         blkg->stats.dequeue, cb, dev);
640 #endif
641
642         for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
643                         sub_type++) {
644                 blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
645                 cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]);
646         }
647         disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] +
648                         blkg->stats.stat_arr[type][BLKIO_STAT_WRITE];
649         blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
650         cb->fill(cb, key_str, disk_total);
651         return disk_total;
652 }
653
654 static int blkio_check_dev_num(dev_t dev)
655 {
656         int part = 0;
657         struct gendisk *disk;
658
659         disk = get_gendisk(dev, &part);
660         if (!disk || part)
661                 return -ENODEV;
662
663         return 0;
664 }
665
666 static int blkio_policy_parse_and_set(char *buf,
667         struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
668 {
669         char *s[4], *p, *major_s = NULL, *minor_s = NULL;
670         int ret;
671         unsigned long major, minor, temp;
672         int i = 0;
673         dev_t dev;
674         u64 bps, iops;
675
676         memset(s, 0, sizeof(s));
677
678         while ((p = strsep(&buf, " ")) != NULL) {
679                 if (!*p)
680                         continue;
681
682                 s[i++] = p;
683
684                 /* Prevent from inputing too many things */
685                 if (i == 3)
686                         break;
687         }
688
689         if (i != 2)
690                 return -EINVAL;
691
692         p = strsep(&s[0], ":");
693         if (p != NULL)
694                 major_s = p;
695         else
696                 return -EINVAL;
697
698         minor_s = s[0];
699         if (!minor_s)
700                 return -EINVAL;
701
702         ret = strict_strtoul(major_s, 10, &major);
703         if (ret)
704                 return -EINVAL;
705
706         ret = strict_strtoul(minor_s, 10, &minor);
707         if (ret)
708                 return -EINVAL;
709
710         dev = MKDEV(major, minor);
711
712         ret = blkio_check_dev_num(dev);
713         if (ret)
714                 return ret;
715
716         newpn->dev = dev;
717
718         if (s[1] == NULL)
719                 return -EINVAL;
720
721         switch (plid) {
722         case BLKIO_POLICY_PROP:
723                 ret = strict_strtoul(s[1], 10, &temp);
724                 if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
725                         temp > BLKIO_WEIGHT_MAX)
726                         return -EINVAL;
727
728                 newpn->plid = plid;
729                 newpn->fileid = fileid;
730                 newpn->val.weight = temp;
731                 break;
732         case BLKIO_POLICY_THROTL:
733                 switch(fileid) {
734                 case BLKIO_THROTL_read_bps_device:
735                 case BLKIO_THROTL_write_bps_device:
736                         ret = strict_strtoull(s[1], 10, &bps);
737                         if (ret)
738                                 return -EINVAL;
739
740                         newpn->plid = plid;
741                         newpn->fileid = fileid;
742                         newpn->val.bps = bps;
743                         break;
744                 case BLKIO_THROTL_read_iops_device:
745                 case BLKIO_THROTL_write_iops_device:
746                         ret = strict_strtoull(s[1], 10, &iops);
747                         if (ret)
748                                 return -EINVAL;
749
750                         if (iops > THROTL_IOPS_MAX)
751                                 return -EINVAL;
752
753                         newpn->plid = plid;
754                         newpn->fileid = fileid;
755                         newpn->val.iops = (unsigned int)iops;
756                         break;
757                 }
758                 break;
759         default:
760                 BUG();
761         }
762
763         return 0;
764 }
765
766 unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
767                               dev_t dev)
768 {
769         struct blkio_policy_node *pn;
770
771         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
772                                 BLKIO_PROP_weight_device);
773         if (pn)
774                 return pn->val.weight;
775         else
776                 return blkcg->weight;
777 }
778 EXPORT_SYMBOL_GPL(blkcg_get_weight);
779
780 uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
781 {
782         struct blkio_policy_node *pn;
783
784         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
785                                 BLKIO_THROTL_read_bps_device);
786         if (pn)
787                 return pn->val.bps;
788         else
789                 return -1;
790 }
791
792 uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
793 {
794         struct blkio_policy_node *pn;
795         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
796                                 BLKIO_THROTL_write_bps_device);
797         if (pn)
798                 return pn->val.bps;
799         else
800                 return -1;
801 }
802
803 unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
804 {
805         struct blkio_policy_node *pn;
806
807         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
808                                 BLKIO_THROTL_read_iops_device);
809         if (pn)
810                 return pn->val.iops;
811         else
812                 return -1;
813 }
814
815 unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
816 {
817         struct blkio_policy_node *pn;
818         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
819                                 BLKIO_THROTL_write_iops_device);
820         if (pn)
821                 return pn->val.iops;
822         else
823                 return -1;
824 }
825
826 /* Checks whether user asked for deleting a policy rule */
827 static bool blkio_delete_rule_command(struct blkio_policy_node *pn)
828 {
829         switch(pn->plid) {
830         case BLKIO_POLICY_PROP:
831                 if (pn->val.weight == 0)
832                         return 1;
833                 break;
834         case BLKIO_POLICY_THROTL:
835                 switch(pn->fileid) {
836                 case BLKIO_THROTL_read_bps_device:
837                 case BLKIO_THROTL_write_bps_device:
838                         if (pn->val.bps == 0)
839                                 return 1;
840                         break;
841                 case BLKIO_THROTL_read_iops_device:
842                 case BLKIO_THROTL_write_iops_device:
843                         if (pn->val.iops == 0)
844                                 return 1;
845                 }
846                 break;
847         default:
848                 BUG();
849         }
850
851         return 0;
852 }
853
854 static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
855                                         struct blkio_policy_node *newpn)
856 {
857         switch(oldpn->plid) {
858         case BLKIO_POLICY_PROP:
859                 oldpn->val.weight = newpn->val.weight;
860                 break;
861         case BLKIO_POLICY_THROTL:
862                 switch(newpn->fileid) {
863                 case BLKIO_THROTL_read_bps_device:
864                 case BLKIO_THROTL_write_bps_device:
865                         oldpn->val.bps = newpn->val.bps;
866                         break;
867                 case BLKIO_THROTL_read_iops_device:
868                 case BLKIO_THROTL_write_iops_device:
869                         oldpn->val.iops = newpn->val.iops;
870                 }
871                 break;
872         default:
873                 BUG();
874         }
875 }
876
877 /*
878  * Some rules/values in blkg have changed. Propagate those to respective
879  * policies.
880  */
881 static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
882                 struct blkio_group *blkg, struct blkio_policy_node *pn)
883 {
884         unsigned int weight, iops;
885         u64 bps;
886
887         switch(pn->plid) {
888         case BLKIO_POLICY_PROP:
889                 weight = pn->val.weight ? pn->val.weight :
890                                 blkcg->weight;
891                 blkio_update_group_weight(blkg, weight);
892                 break;
893         case BLKIO_POLICY_THROTL:
894                 switch(pn->fileid) {
895                 case BLKIO_THROTL_read_bps_device:
896                 case BLKIO_THROTL_write_bps_device:
897                         bps = pn->val.bps ? pn->val.bps : (-1);
898                         blkio_update_group_bps(blkg, bps, pn->fileid);
899                         break;
900                 case BLKIO_THROTL_read_iops_device:
901                 case BLKIO_THROTL_write_iops_device:
902                         iops = pn->val.iops ? pn->val.iops : (-1);
903                         blkio_update_group_iops(blkg, iops, pn->fileid);
904                         break;
905                 }
906                 break;
907         default:
908                 BUG();
909         }
910 }
911
912 /*
913  * A policy node rule has been updated. Propagate this update to all the
914  * block groups which might be affected by this update.
915  */
916 static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg,
917                                 struct blkio_policy_node *pn)
918 {
919         struct blkio_group *blkg;
920         struct hlist_node *n;
921
922         spin_lock(&blkio_list_lock);
923         spin_lock_irq(&blkcg->lock);
924
925         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
926                 if (pn->dev != blkg->dev || pn->plid != blkg->plid)
927                         continue;
928                 blkio_update_blkg_policy(blkcg, blkg, pn);
929         }
930
931         spin_unlock_irq(&blkcg->lock);
932         spin_unlock(&blkio_list_lock);
933 }
934
935 static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
936                                        const char *buffer)
937 {
938         int ret = 0;
939         char *buf;
940         struct blkio_policy_node *newpn, *pn;
941         struct blkio_cgroup *blkcg;
942         int keep_newpn = 0;
943         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
944         int fileid = BLKIOFILE_ATTR(cft->private);
945
946         buf = kstrdup(buffer, GFP_KERNEL);
947         if (!buf)
948                 return -ENOMEM;
949
950         newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
951         if (!newpn) {
952                 ret = -ENOMEM;
953                 goto free_buf;
954         }
955
956         ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid);
957         if (ret)
958                 goto free_newpn;
959
960         blkcg = cgroup_to_blkio_cgroup(cgrp);
961
962         spin_lock_irq(&blkcg->lock);
963
964         pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
965         if (!pn) {
966                 if (!blkio_delete_rule_command(newpn)) {
967                         blkio_policy_insert_node(blkcg, newpn);
968                         keep_newpn = 1;
969                 }
970                 spin_unlock_irq(&blkcg->lock);
971                 goto update_io_group;
972         }
973
974         if (blkio_delete_rule_command(newpn)) {
975                 blkio_policy_delete_node(pn);
976                 spin_unlock_irq(&blkcg->lock);
977                 goto update_io_group;
978         }
979         spin_unlock_irq(&blkcg->lock);
980
981         blkio_update_policy_rule(pn, newpn);
982
983 update_io_group:
984         blkio_update_policy_node_blkg(blkcg, newpn);
985
986 free_newpn:
987         if (!keep_newpn)
988                 kfree(newpn);
989 free_buf:
990         kfree(buf);
991         return ret;
992 }
993
994 static void
995 blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
996 {
997         switch(pn->plid) {
998                 case BLKIO_POLICY_PROP:
999                         if (pn->fileid == BLKIO_PROP_weight_device)
1000                                 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
1001                                         MINOR(pn->dev), pn->val.weight);
1002                         break;
1003                 case BLKIO_POLICY_THROTL:
1004                         switch(pn->fileid) {
1005                         case BLKIO_THROTL_read_bps_device:
1006                         case BLKIO_THROTL_write_bps_device:
1007                                 seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev),
1008                                         MINOR(pn->dev), pn->val.bps);
1009                                 break;
1010                         case BLKIO_THROTL_read_iops_device:
1011                         case BLKIO_THROTL_write_iops_device:
1012                                 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
1013                                         MINOR(pn->dev), pn->val.iops);
1014                                 break;
1015                         }
1016                         break;
1017                 default:
1018                         BUG();
1019         }
1020 }
1021
1022 /* cgroup files which read their data from policy nodes end up here */
1023 static void blkio_read_policy_node_files(struct cftype *cft,
1024                         struct blkio_cgroup *blkcg, struct seq_file *m)
1025 {
1026         struct blkio_policy_node *pn;
1027
1028         if (!list_empty(&blkcg->policy_list)) {
1029                 spin_lock_irq(&blkcg->lock);
1030                 list_for_each_entry(pn, &blkcg->policy_list, node) {
1031                         if (!pn_matches_cftype(cft, pn))
1032                                 continue;
1033                         blkio_print_policy_node(m, pn);
1034                 }
1035                 spin_unlock_irq(&blkcg->lock);
1036         }
1037 }
1038
1039 static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
1040                                 struct seq_file *m)
1041 {
1042         struct blkio_cgroup *blkcg;
1043         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1044         int name = BLKIOFILE_ATTR(cft->private);
1045
1046         blkcg = cgroup_to_blkio_cgroup(cgrp);
1047
1048         switch(plid) {
1049         case BLKIO_POLICY_PROP:
1050                 switch(name) {
1051                 case BLKIO_PROP_weight_device:
1052                         blkio_read_policy_node_files(cft, blkcg, m);
1053                         return 0;
1054                 default:
1055                         BUG();
1056                 }
1057                 break;
1058         case BLKIO_POLICY_THROTL:
1059                 switch(name){
1060                 case BLKIO_THROTL_read_bps_device:
1061                 case BLKIO_THROTL_write_bps_device:
1062                 case BLKIO_THROTL_read_iops_device:
1063                 case BLKIO_THROTL_write_iops_device:
1064                         blkio_read_policy_node_files(cft, blkcg, m);
1065                         return 0;
1066                 default:
1067                         BUG();
1068                 }
1069                 break;
1070         default:
1071                 BUG();
1072         }
1073
1074         return 0;
1075 }
1076
1077 static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1078                 struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
1079                 bool show_total)
1080 {
1081         struct blkio_group *blkg;
1082         struct hlist_node *n;
1083         uint64_t cgroup_total = 0;
1084
1085         rcu_read_lock();
1086         hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
1087                 if (blkg->dev) {
1088                         if (!cftype_blkg_same_policy(cft, blkg))
1089                                 continue;
1090                         spin_lock_irq(&blkg->stats_lock);
1091                         cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
1092                                                 type);
1093                         spin_unlock_irq(&blkg->stats_lock);
1094                 }
1095         }
1096         if (show_total)
1097                 cb->fill(cb, "Total", cgroup_total);
1098         rcu_read_unlock();
1099         return 0;
1100 }
1101
1102 /* All map kind of cgroup file get serviced by this function */
1103 static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
1104                                 struct cgroup_map_cb *cb)
1105 {
1106         struct blkio_cgroup *blkcg;
1107         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1108         int name = BLKIOFILE_ATTR(cft->private);
1109
1110         blkcg = cgroup_to_blkio_cgroup(cgrp);
1111
1112         switch(plid) {
1113         case BLKIO_POLICY_PROP:
1114                 switch(name) {
1115                 case BLKIO_PROP_time:
1116                         return blkio_read_blkg_stats(blkcg, cft, cb,
1117                                                 BLKIO_STAT_TIME, 0);
1118                 case BLKIO_PROP_sectors:
1119                         return blkio_read_blkg_stats(blkcg, cft, cb,
1120                                                 BLKIO_STAT_SECTORS, 0);
1121                 case BLKIO_PROP_io_service_bytes:
1122                         return blkio_read_blkg_stats(blkcg, cft, cb,
1123                                                 BLKIO_STAT_SERVICE_BYTES, 1);
1124                 case BLKIO_PROP_io_serviced:
1125                         return blkio_read_blkg_stats(blkcg, cft, cb,
1126                                                 BLKIO_STAT_SERVICED, 1);
1127                 case BLKIO_PROP_io_service_time:
1128                         return blkio_read_blkg_stats(blkcg, cft, cb,
1129                                                 BLKIO_STAT_SERVICE_TIME, 1);
1130                 case BLKIO_PROP_io_wait_time:
1131                         return blkio_read_blkg_stats(blkcg, cft, cb,
1132                                                 BLKIO_STAT_WAIT_TIME, 1);
1133                 case BLKIO_PROP_io_merged:
1134                         return blkio_read_blkg_stats(blkcg, cft, cb,
1135                                                 BLKIO_STAT_MERGED, 1);
1136                 case BLKIO_PROP_io_queued:
1137                         return blkio_read_blkg_stats(blkcg, cft, cb,
1138                                                 BLKIO_STAT_QUEUED, 1);
1139 #ifdef CONFIG_DEBUG_BLK_CGROUP
1140                 case BLKIO_PROP_unaccounted_time:
1141                         return blkio_read_blkg_stats(blkcg, cft, cb,
1142                                                 BLKIO_STAT_UNACCOUNTED_TIME, 0);
1143                 case BLKIO_PROP_dequeue:
1144                         return blkio_read_blkg_stats(blkcg, cft, cb,
1145                                                 BLKIO_STAT_DEQUEUE, 0);
1146                 case BLKIO_PROP_avg_queue_size:
1147                         return blkio_read_blkg_stats(blkcg, cft, cb,
1148                                                 BLKIO_STAT_AVG_QUEUE_SIZE, 0);
1149                 case BLKIO_PROP_group_wait_time:
1150                         return blkio_read_blkg_stats(blkcg, cft, cb,
1151                                                 BLKIO_STAT_GROUP_WAIT_TIME, 0);
1152                 case BLKIO_PROP_idle_time:
1153                         return blkio_read_blkg_stats(blkcg, cft, cb,
1154                                                 BLKIO_STAT_IDLE_TIME, 0);
1155                 case BLKIO_PROP_empty_time:
1156                         return blkio_read_blkg_stats(blkcg, cft, cb,
1157                                                 BLKIO_STAT_EMPTY_TIME, 0);
1158 #endif
1159                 default:
1160                         BUG();
1161                 }
1162                 break;
1163         case BLKIO_POLICY_THROTL:
1164                 switch(name){
1165                 case BLKIO_THROTL_io_service_bytes:
1166                         return blkio_read_blkg_stats(blkcg, cft, cb,
1167                                                 BLKIO_STAT_SERVICE_BYTES, 1);
1168                 case BLKIO_THROTL_io_serviced:
1169                         return blkio_read_blkg_stats(blkcg, cft, cb,
1170                                                 BLKIO_STAT_SERVICED, 1);
1171                 default:
1172                         BUG();
1173                 }
1174                 break;
1175         default:
1176                 BUG();
1177         }
1178
1179         return 0;
1180 }
1181
1182 static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val)
1183 {
1184         struct blkio_group *blkg;
1185         struct hlist_node *n;
1186         struct blkio_policy_node *pn;
1187
1188         if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
1189                 return -EINVAL;
1190
1191         spin_lock(&blkio_list_lock);
1192         spin_lock_irq(&blkcg->lock);
1193         blkcg->weight = (unsigned int)val;
1194
1195         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1196                 pn = blkio_policy_search_node(blkcg, blkg->dev,
1197                                 BLKIO_POLICY_PROP, BLKIO_PROP_weight_device);
1198                 if (pn)
1199                         continue;
1200
1201                 blkio_update_group_weight(blkg, blkcg->weight);
1202         }
1203         spin_unlock_irq(&blkcg->lock);
1204         spin_unlock(&blkio_list_lock);
1205         return 0;
1206 }
1207
1208 static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
1209         struct blkio_cgroup *blkcg;
1210         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1211         int name = BLKIOFILE_ATTR(cft->private);
1212
1213         blkcg = cgroup_to_blkio_cgroup(cgrp);
1214
1215         switch(plid) {
1216         case BLKIO_POLICY_PROP:
1217                 switch(name) {
1218                 case BLKIO_PROP_weight:
1219                         return (u64)blkcg->weight;
1220                 }
1221                 break;
1222         default:
1223                 BUG();
1224         }
1225         return 0;
1226 }
1227
1228 static int
1229 blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1230 {
1231         struct blkio_cgroup *blkcg;
1232         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1233         int name = BLKIOFILE_ATTR(cft->private);
1234
1235         blkcg = cgroup_to_blkio_cgroup(cgrp);
1236
1237         switch(plid) {
1238         case BLKIO_POLICY_PROP:
1239                 switch(name) {
1240                 case BLKIO_PROP_weight:
1241                         return blkio_weight_write(blkcg, val);
1242                 }
1243                 break;
1244         default:
1245                 BUG();
1246         }
1247
1248         return 0;
1249 }
1250
1251 struct cftype blkio_files[] = {
1252         {
1253                 .name = "weight_device",
1254                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1255                                 BLKIO_PROP_weight_device),
1256                 .read_seq_string = blkiocg_file_read,
1257                 .write_string = blkiocg_file_write,
1258                 .max_write_len = 256,
1259         },
1260         {
1261                 .name = "weight",
1262                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1263                                 BLKIO_PROP_weight),
1264                 .read_u64 = blkiocg_file_read_u64,
1265                 .write_u64 = blkiocg_file_write_u64,
1266         },
1267         {
1268                 .name = "time",
1269                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1270                                 BLKIO_PROP_time),
1271                 .read_map = blkiocg_file_read_map,
1272         },
1273         {
1274                 .name = "sectors",
1275                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1276                                 BLKIO_PROP_sectors),
1277                 .read_map = blkiocg_file_read_map,
1278         },
1279         {
1280                 .name = "io_service_bytes",
1281                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1282                                 BLKIO_PROP_io_service_bytes),
1283                 .read_map = blkiocg_file_read_map,
1284         },
1285         {
1286                 .name = "io_serviced",
1287                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1288                                 BLKIO_PROP_io_serviced),
1289                 .read_map = blkiocg_file_read_map,
1290         },
1291         {
1292                 .name = "io_service_time",
1293                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1294                                 BLKIO_PROP_io_service_time),
1295                 .read_map = blkiocg_file_read_map,
1296         },
1297         {
1298                 .name = "io_wait_time",
1299                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1300                                 BLKIO_PROP_io_wait_time),
1301                 .read_map = blkiocg_file_read_map,
1302         },
1303         {
1304                 .name = "io_merged",
1305                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1306                                 BLKIO_PROP_io_merged),
1307                 .read_map = blkiocg_file_read_map,
1308         },
1309         {
1310                 .name = "io_queued",
1311                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1312                                 BLKIO_PROP_io_queued),
1313                 .read_map = blkiocg_file_read_map,
1314         },
1315         {
1316                 .name = "reset_stats",
1317                 .write_u64 = blkiocg_reset_stats,
1318         },
1319 #ifdef CONFIG_BLK_DEV_THROTTLING
1320         {
1321                 .name = "throttle.read_bps_device",
1322                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1323                                 BLKIO_THROTL_read_bps_device),
1324                 .read_seq_string = blkiocg_file_read,
1325                 .write_string = blkiocg_file_write,
1326                 .max_write_len = 256,
1327         },
1328
1329         {
1330                 .name = "throttle.write_bps_device",
1331                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1332                                 BLKIO_THROTL_write_bps_device),
1333                 .read_seq_string = blkiocg_file_read,
1334                 .write_string = blkiocg_file_write,
1335                 .max_write_len = 256,
1336         },
1337
1338         {
1339                 .name = "throttle.read_iops_device",
1340                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1341                                 BLKIO_THROTL_read_iops_device),
1342                 .read_seq_string = blkiocg_file_read,
1343                 .write_string = blkiocg_file_write,
1344                 .max_write_len = 256,
1345         },
1346
1347         {
1348                 .name = "throttle.write_iops_device",
1349                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1350                                 BLKIO_THROTL_write_iops_device),
1351                 .read_seq_string = blkiocg_file_read,
1352                 .write_string = blkiocg_file_write,
1353                 .max_write_len = 256,
1354         },
1355         {
1356                 .name = "throttle.io_service_bytes",
1357                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1358                                 BLKIO_THROTL_io_service_bytes),
1359                 .read_map = blkiocg_file_read_map,
1360         },
1361         {
1362                 .name = "throttle.io_serviced",
1363                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1364                                 BLKIO_THROTL_io_serviced),
1365                 .read_map = blkiocg_file_read_map,
1366         },
1367 #endif /* CONFIG_BLK_DEV_THROTTLING */
1368
1369 #ifdef CONFIG_DEBUG_BLK_CGROUP
1370         {
1371                 .name = "avg_queue_size",
1372                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1373                                 BLKIO_PROP_avg_queue_size),
1374                 .read_map = blkiocg_file_read_map,
1375         },
1376         {
1377                 .name = "group_wait_time",
1378                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1379                                 BLKIO_PROP_group_wait_time),
1380                 .read_map = blkiocg_file_read_map,
1381         },
1382         {
1383                 .name = "idle_time",
1384                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1385                                 BLKIO_PROP_idle_time),
1386                 .read_map = blkiocg_file_read_map,
1387         },
1388         {
1389                 .name = "empty_time",
1390                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1391                                 BLKIO_PROP_empty_time),
1392                 .read_map = blkiocg_file_read_map,
1393         },
1394         {
1395                 .name = "dequeue",
1396                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1397                                 BLKIO_PROP_dequeue),
1398                 .read_map = blkiocg_file_read_map,
1399         },
1400         {
1401                 .name = "unaccounted_time",
1402                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1403                                 BLKIO_PROP_unaccounted_time),
1404                 .read_map = blkiocg_file_read_map,
1405         },
1406 #endif
1407 };
1408
1409 static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1410 {
1411         return cgroup_add_files(cgroup, subsys, blkio_files,
1412                                 ARRAY_SIZE(blkio_files));
1413 }
1414
1415 static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1416 {
1417         struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1418         unsigned long flags;
1419         struct blkio_group *blkg;
1420         void *key;
1421         struct blkio_policy_type *blkiop;
1422         struct blkio_policy_node *pn, *pntmp;
1423
1424         rcu_read_lock();
1425         do {
1426                 spin_lock_irqsave(&blkcg->lock, flags);
1427
1428                 if (hlist_empty(&blkcg->blkg_list)) {
1429                         spin_unlock_irqrestore(&blkcg->lock, flags);
1430                         break;
1431                 }
1432
1433                 blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
1434                                         blkcg_node);
1435                 key = rcu_dereference(blkg->key);
1436                 __blkiocg_del_blkio_group(blkg);
1437
1438                 spin_unlock_irqrestore(&blkcg->lock, flags);
1439
1440                 /*
1441                  * This blkio_group is being unlinked as associated cgroup is
1442                  * going away. Let all the IO controlling policies know about
1443                  * this event.
1444                  */
1445                 spin_lock(&blkio_list_lock);
1446                 list_for_each_entry(blkiop, &blkio_list, list) {
1447                         if (blkiop->plid != blkg->plid)
1448                                 continue;
1449                         blkiop->ops.blkio_unlink_group_fn(key, blkg);
1450                 }
1451                 spin_unlock(&blkio_list_lock);
1452         } while (1);
1453
1454         list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
1455                 blkio_policy_delete_node(pn);
1456                 kfree(pn);
1457         }
1458
1459         free_css_id(&blkio_subsys, &blkcg->css);
1460         rcu_read_unlock();
1461         if (blkcg != &blkio_root_cgroup)
1462                 kfree(blkcg);
1463 }
1464
1465 static struct cgroup_subsys_state *
1466 blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1467 {
1468         struct blkio_cgroup *blkcg;
1469         struct cgroup *parent = cgroup->parent;
1470
1471         if (!parent) {
1472                 blkcg = &blkio_root_cgroup;
1473                 goto done;
1474         }
1475
1476         blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1477         if (!blkcg)
1478                 return ERR_PTR(-ENOMEM);
1479
1480         blkcg->weight = BLKIO_WEIGHT_DEFAULT;
1481 done:
1482         spin_lock_init(&blkcg->lock);
1483         INIT_HLIST_HEAD(&blkcg->blkg_list);
1484
1485         INIT_LIST_HEAD(&blkcg->policy_list);
1486         return &blkcg->css;
1487 }
1488
1489 /*
1490  * We cannot support shared io contexts, as we have no mean to support
1491  * two tasks with the same ioc in two different groups without major rework
1492  * of the main cic data structures.  For now we allow a task to change
1493  * its cgroup only if it's the only owner of its ioc.
1494  */
1495 static int blkiocg_can_attach(struct cgroup_subsys *subsys,
1496                                 struct cgroup *cgroup, struct task_struct *tsk,
1497                                 bool threadgroup)
1498 {
1499         struct io_context *ioc;
1500         int ret = 0;
1501
1502         /* task_lock() is needed to avoid races with exit_io_context() */
1503         task_lock(tsk);
1504         ioc = tsk->io_context;
1505         if (ioc && atomic_read(&ioc->nr_tasks) > 1)
1506                 ret = -EINVAL;
1507         task_unlock(tsk);
1508
1509         return ret;
1510 }
1511
1512 static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
1513                                 struct cgroup *prev, struct task_struct *tsk,
1514                                 bool threadgroup)
1515 {
1516         struct io_context *ioc;
1517
1518         task_lock(tsk);
1519         ioc = tsk->io_context;
1520         if (ioc)
1521                 ioc->cgroup_changed = 1;
1522         task_unlock(tsk);
1523 }
1524
1525 void blkio_policy_register(struct blkio_policy_type *blkiop)
1526 {
1527         spin_lock(&blkio_list_lock);
1528         list_add_tail(&blkiop->list, &blkio_list);
1529         spin_unlock(&blkio_list_lock);
1530 }
1531 EXPORT_SYMBOL_GPL(blkio_policy_register);
1532
1533 void blkio_policy_unregister(struct blkio_policy_type *blkiop)
1534 {
1535         spin_lock(&blkio_list_lock);
1536         list_del_init(&blkiop->list);
1537         spin_unlock(&blkio_list_lock);
1538 }
1539 EXPORT_SYMBOL_GPL(blkio_policy_unregister);
1540
1541 static int __init init_cgroup_blkio(void)
1542 {
1543         return cgroup_load_subsys(&blkio_subsys);
1544 }
1545
1546 static void __exit exit_cgroup_blkio(void)
1547 {
1548         cgroup_unload_subsys(&blkio_subsys);
1549 }
1550
1551 module_init(init_cgroup_blkio);
1552 module_exit(exit_cgroup_blkio);
1553 MODULE_LICENSE("GPL");