Merge branch 'devel-stable' of master.kernel.org:/home/rmk/linux-2.6-arm
[pandora-kernel.git] / block / blk-cgroup.c
1 /*
2  * Common Block IO controller cgroup interface
3  *
4  * Based on ideas and code from CFQ, CFS and BFQ:
5  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6  *
7  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8  *                    Paolo Valente <paolo.valente@unimore.it>
9  *
10  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11  *                    Nauman Rafique <nauman@google.com>
12  */
13 #include <linux/ioprio.h>
14 #include <linux/seq_file.h>
15 #include <linux/kdev_t.h>
16 #include <linux/module.h>
17 #include <linux/err.h>
18 #include <linux/blkdev.h>
19 #include <linux/slab.h>
20 #include "blk-cgroup.h"
21 #include <linux/genhd.h>
22
23 #define MAX_KEY_LEN 100
24
25 static DEFINE_SPINLOCK(blkio_list_lock);
26 static LIST_HEAD(blkio_list);
27
28 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
29 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
30
31 static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
32                                                   struct cgroup *);
33 static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
34                               struct task_struct *, bool);
35 static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
36                            struct cgroup *, struct task_struct *, bool);
37 static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
38 static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
39
40 /* for encoding cft->private value on file */
41 #define BLKIOFILE_PRIVATE(x, val)       (((x) << 16) | (val))
42 /* What policy owns the file, proportional or throttle */
43 #define BLKIOFILE_POLICY(val)           (((val) >> 16) & 0xffff)
44 #define BLKIOFILE_ATTR(val)             ((val) & 0xffff)
45
46 struct cgroup_subsys blkio_subsys = {
47         .name = "blkio",
48         .create = blkiocg_create,
49         .can_attach = blkiocg_can_attach,
50         .attach = blkiocg_attach,
51         .destroy = blkiocg_destroy,
52         .populate = blkiocg_populate,
53 #ifdef CONFIG_BLK_CGROUP
54         /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
55         .subsys_id = blkio_subsys_id,
56 #endif
57         .use_id = 1,
58         .module = THIS_MODULE,
59 };
60 EXPORT_SYMBOL_GPL(blkio_subsys);
61
62 static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
63                                             struct blkio_policy_node *pn)
64 {
65         list_add(&pn->node, &blkcg->policy_list);
66 }
67
68 static inline bool cftype_blkg_same_policy(struct cftype *cft,
69                         struct blkio_group *blkg)
70 {
71         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
72
73         if (blkg->plid == plid)
74                 return 1;
75
76         return 0;
77 }
78
79 /* Determines if policy node matches cgroup file being accessed */
80 static inline bool pn_matches_cftype(struct cftype *cft,
81                         struct blkio_policy_node *pn)
82 {
83         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
84         int fileid = BLKIOFILE_ATTR(cft->private);
85
86         return (plid == pn->plid && fileid == pn->fileid);
87 }
88
89 /* Must be called with blkcg->lock held */
90 static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
91 {
92         list_del(&pn->node);
93 }
94
95 /* Must be called with blkcg->lock held */
96 static struct blkio_policy_node *
97 blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev,
98                 enum blkio_policy_id plid, int fileid)
99 {
100         struct blkio_policy_node *pn;
101
102         list_for_each_entry(pn, &blkcg->policy_list, node) {
103                 if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid)
104                         return pn;
105         }
106
107         return NULL;
108 }
109
110 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
111 {
112         return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
113                             struct blkio_cgroup, css);
114 }
115 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
116
117 static inline void
118 blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
119 {
120         struct blkio_policy_type *blkiop;
121
122         list_for_each_entry(blkiop, &blkio_list, list) {
123                 /* If this policy does not own the blkg, do not send updates */
124                 if (blkiop->plid != blkg->plid)
125                         continue;
126                 if (blkiop->ops.blkio_update_group_weight_fn)
127                         blkiop->ops.blkio_update_group_weight_fn(blkg->key,
128                                                         blkg, weight);
129         }
130 }
131
132 static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
133                                 int fileid)
134 {
135         struct blkio_policy_type *blkiop;
136
137         list_for_each_entry(blkiop, &blkio_list, list) {
138
139                 /* If this policy does not own the blkg, do not send updates */
140                 if (blkiop->plid != blkg->plid)
141                         continue;
142
143                 if (fileid == BLKIO_THROTL_read_bps_device
144                     && blkiop->ops.blkio_update_group_read_bps_fn)
145                         blkiop->ops.blkio_update_group_read_bps_fn(blkg->key,
146                                                                 blkg, bps);
147
148                 if (fileid == BLKIO_THROTL_write_bps_device
149                     && blkiop->ops.blkio_update_group_write_bps_fn)
150                         blkiop->ops.blkio_update_group_write_bps_fn(blkg->key,
151                                                                 blkg, bps);
152         }
153 }
154
155 static inline void blkio_update_group_iops(struct blkio_group *blkg,
156                         unsigned int iops, int fileid)
157 {
158         struct blkio_policy_type *blkiop;
159
160         list_for_each_entry(blkiop, &blkio_list, list) {
161
162                 /* If this policy does not own the blkg, do not send updates */
163                 if (blkiop->plid != blkg->plid)
164                         continue;
165
166                 if (fileid == BLKIO_THROTL_read_iops_device
167                     && blkiop->ops.blkio_update_group_read_iops_fn)
168                         blkiop->ops.blkio_update_group_read_iops_fn(blkg->key,
169                                                                 blkg, iops);
170
171                 if (fileid == BLKIO_THROTL_write_iops_device
172                     && blkiop->ops.blkio_update_group_write_iops_fn)
173                         blkiop->ops.blkio_update_group_write_iops_fn(blkg->key,
174                                                                 blkg,iops);
175         }
176 }
177
178 /*
179  * Add to the appropriate stat variable depending on the request type.
180  * This should be called with the blkg->stats_lock held.
181  */
182 static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
183                                 bool sync)
184 {
185         if (direction)
186                 stat[BLKIO_STAT_WRITE] += add;
187         else
188                 stat[BLKIO_STAT_READ] += add;
189         if (sync)
190                 stat[BLKIO_STAT_SYNC] += add;
191         else
192                 stat[BLKIO_STAT_ASYNC] += add;
193 }
194
195 /*
196  * Decrements the appropriate stat variable if non-zero depending on the
197  * request type. Panics on value being zero.
198  * This should be called with the blkg->stats_lock held.
199  */
200 static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
201 {
202         if (direction) {
203                 BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
204                 stat[BLKIO_STAT_WRITE]--;
205         } else {
206                 BUG_ON(stat[BLKIO_STAT_READ] == 0);
207                 stat[BLKIO_STAT_READ]--;
208         }
209         if (sync) {
210                 BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
211                 stat[BLKIO_STAT_SYNC]--;
212         } else {
213                 BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
214                 stat[BLKIO_STAT_ASYNC]--;
215         }
216 }
217
218 #ifdef CONFIG_DEBUG_BLK_CGROUP
219 /* This should be called with the blkg->stats_lock held. */
220 static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
221                                                 struct blkio_group *curr_blkg)
222 {
223         if (blkio_blkg_waiting(&blkg->stats))
224                 return;
225         if (blkg == curr_blkg)
226                 return;
227         blkg->stats.start_group_wait_time = sched_clock();
228         blkio_mark_blkg_waiting(&blkg->stats);
229 }
230
231 /* This should be called with the blkg->stats_lock held. */
232 static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
233 {
234         unsigned long long now;
235
236         if (!blkio_blkg_waiting(stats))
237                 return;
238
239         now = sched_clock();
240         if (time_after64(now, stats->start_group_wait_time))
241                 stats->group_wait_time += now - stats->start_group_wait_time;
242         blkio_clear_blkg_waiting(stats);
243 }
244
245 /* This should be called with the blkg->stats_lock held. */
246 static void blkio_end_empty_time(struct blkio_group_stats *stats)
247 {
248         unsigned long long now;
249
250         if (!blkio_blkg_empty(stats))
251                 return;
252
253         now = sched_clock();
254         if (time_after64(now, stats->start_empty_time))
255                 stats->empty_time += now - stats->start_empty_time;
256         blkio_clear_blkg_empty(stats);
257 }
258
259 void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
260 {
261         unsigned long flags;
262
263         spin_lock_irqsave(&blkg->stats_lock, flags);
264         BUG_ON(blkio_blkg_idling(&blkg->stats));
265         blkg->stats.start_idle_time = sched_clock();
266         blkio_mark_blkg_idling(&blkg->stats);
267         spin_unlock_irqrestore(&blkg->stats_lock, flags);
268 }
269 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
270
271 void blkiocg_update_idle_time_stats(struct blkio_group *blkg)
272 {
273         unsigned long flags;
274         unsigned long long now;
275         struct blkio_group_stats *stats;
276
277         spin_lock_irqsave(&blkg->stats_lock, flags);
278         stats = &blkg->stats;
279         if (blkio_blkg_idling(stats)) {
280                 now = sched_clock();
281                 if (time_after64(now, stats->start_idle_time))
282                         stats->idle_time += now - stats->start_idle_time;
283                 blkio_clear_blkg_idling(stats);
284         }
285         spin_unlock_irqrestore(&blkg->stats_lock, flags);
286 }
287 EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
288
289 void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
290 {
291         unsigned long flags;
292         struct blkio_group_stats *stats;
293
294         spin_lock_irqsave(&blkg->stats_lock, flags);
295         stats = &blkg->stats;
296         stats->avg_queue_size_sum +=
297                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
298                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
299         stats->avg_queue_size_samples++;
300         blkio_update_group_wait_time(stats);
301         spin_unlock_irqrestore(&blkg->stats_lock, flags);
302 }
303 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
304
305 void blkiocg_set_start_empty_time(struct blkio_group *blkg)
306 {
307         unsigned long flags;
308         struct blkio_group_stats *stats;
309
310         spin_lock_irqsave(&blkg->stats_lock, flags);
311         stats = &blkg->stats;
312
313         if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
314                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
315                 spin_unlock_irqrestore(&blkg->stats_lock, flags);
316                 return;
317         }
318
319         /*
320          * group is already marked empty. This can happen if cfqq got new
321          * request in parent group and moved to this group while being added
322          * to service tree. Just ignore the event and move on.
323          */
324         if(blkio_blkg_empty(stats)) {
325                 spin_unlock_irqrestore(&blkg->stats_lock, flags);
326                 return;
327         }
328
329         stats->start_empty_time = sched_clock();
330         blkio_mark_blkg_empty(stats);
331         spin_unlock_irqrestore(&blkg->stats_lock, flags);
332 }
333 EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
334
335 void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
336                         unsigned long dequeue)
337 {
338         blkg->stats.dequeue += dequeue;
339 }
340 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
341 #else
342 static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
343                                         struct blkio_group *curr_blkg) {}
344 static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {}
345 #endif
346
347 void blkiocg_update_io_add_stats(struct blkio_group *blkg,
348                         struct blkio_group *curr_blkg, bool direction,
349                         bool sync)
350 {
351         unsigned long flags;
352
353         spin_lock_irqsave(&blkg->stats_lock, flags);
354         blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
355                         sync);
356         blkio_end_empty_time(&blkg->stats);
357         blkio_set_start_group_wait_time(blkg, curr_blkg);
358         spin_unlock_irqrestore(&blkg->stats_lock, flags);
359 }
360 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
361
362 void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
363                                                 bool direction, bool sync)
364 {
365         unsigned long flags;
366
367         spin_lock_irqsave(&blkg->stats_lock, flags);
368         blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED],
369                                         direction, sync);
370         spin_unlock_irqrestore(&blkg->stats_lock, flags);
371 }
372 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
373
374 void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
375 {
376         unsigned long flags;
377
378         spin_lock_irqsave(&blkg->stats_lock, flags);
379         blkg->stats.time += time;
380         spin_unlock_irqrestore(&blkg->stats_lock, flags);
381 }
382 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
383
384 void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
385                                 uint64_t bytes, bool direction, bool sync)
386 {
387         struct blkio_group_stats *stats;
388         unsigned long flags;
389
390         spin_lock_irqsave(&blkg->stats_lock, flags);
391         stats = &blkg->stats;
392         stats->sectors += bytes >> 9;
393         blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
394                         sync);
395         blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
396                         direction, sync);
397         spin_unlock_irqrestore(&blkg->stats_lock, flags);
398 }
399 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
400
401 void blkiocg_update_completion_stats(struct blkio_group *blkg,
402         uint64_t start_time, uint64_t io_start_time, bool direction, bool sync)
403 {
404         struct blkio_group_stats *stats;
405         unsigned long flags;
406         unsigned long long now = sched_clock();
407
408         spin_lock_irqsave(&blkg->stats_lock, flags);
409         stats = &blkg->stats;
410         if (time_after64(now, io_start_time))
411                 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
412                                 now - io_start_time, direction, sync);
413         if (time_after64(io_start_time, start_time))
414                 blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
415                                 io_start_time - start_time, direction, sync);
416         spin_unlock_irqrestore(&blkg->stats_lock, flags);
417 }
418 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
419
420 void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
421                                         bool sync)
422 {
423         unsigned long flags;
424
425         spin_lock_irqsave(&blkg->stats_lock, flags);
426         blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction,
427                         sync);
428         spin_unlock_irqrestore(&blkg->stats_lock, flags);
429 }
430 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
431
432 void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
433                 struct blkio_group *blkg, void *key, dev_t dev,
434                 enum blkio_policy_id plid)
435 {
436         unsigned long flags;
437
438         spin_lock_irqsave(&blkcg->lock, flags);
439         spin_lock_init(&blkg->stats_lock);
440         rcu_assign_pointer(blkg->key, key);
441         blkg->blkcg_id = css_id(&blkcg->css);
442         hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
443         blkg->plid = plid;
444         spin_unlock_irqrestore(&blkcg->lock, flags);
445         /* Need to take css reference ? */
446         cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
447         blkg->dev = dev;
448 }
449 EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group);
450
451 static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
452 {
453         hlist_del_init_rcu(&blkg->blkcg_node);
454         blkg->blkcg_id = 0;
455 }
456
457 /*
458  * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
459  * indicating that blk_group was unhashed by the time we got to it.
460  */
461 int blkiocg_del_blkio_group(struct blkio_group *blkg)
462 {
463         struct blkio_cgroup *blkcg;
464         unsigned long flags;
465         struct cgroup_subsys_state *css;
466         int ret = 1;
467
468         rcu_read_lock();
469         css = css_lookup(&blkio_subsys, blkg->blkcg_id);
470         if (css) {
471                 blkcg = container_of(css, struct blkio_cgroup, css);
472                 spin_lock_irqsave(&blkcg->lock, flags);
473                 if (!hlist_unhashed(&blkg->blkcg_node)) {
474                         __blkiocg_del_blkio_group(blkg);
475                         ret = 0;
476                 }
477                 spin_unlock_irqrestore(&blkcg->lock, flags);
478         }
479
480         rcu_read_unlock();
481         return ret;
482 }
483 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
484
485 /* called under rcu_read_lock(). */
486 struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
487 {
488         struct blkio_group *blkg;
489         struct hlist_node *n;
490         void *__key;
491
492         hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
493                 __key = blkg->key;
494                 if (__key == key)
495                         return blkg;
496         }
497
498         return NULL;
499 }
500 EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
501
502 static int
503 blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
504 {
505         struct blkio_cgroup *blkcg;
506         struct blkio_group *blkg;
507         struct blkio_group_stats *stats;
508         struct hlist_node *n;
509         uint64_t queued[BLKIO_STAT_TOTAL];
510         int i;
511 #ifdef CONFIG_DEBUG_BLK_CGROUP
512         bool idling, waiting, empty;
513         unsigned long long now = sched_clock();
514 #endif
515
516         blkcg = cgroup_to_blkio_cgroup(cgroup);
517         spin_lock_irq(&blkcg->lock);
518         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
519                 spin_lock(&blkg->stats_lock);
520                 stats = &blkg->stats;
521 #ifdef CONFIG_DEBUG_BLK_CGROUP
522                 idling = blkio_blkg_idling(stats);
523                 waiting = blkio_blkg_waiting(stats);
524                 empty = blkio_blkg_empty(stats);
525 #endif
526                 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
527                         queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
528                 memset(stats, 0, sizeof(struct blkio_group_stats));
529                 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
530                         stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
531 #ifdef CONFIG_DEBUG_BLK_CGROUP
532                 if (idling) {
533                         blkio_mark_blkg_idling(stats);
534                         stats->start_idle_time = now;
535                 }
536                 if (waiting) {
537                         blkio_mark_blkg_waiting(stats);
538                         stats->start_group_wait_time = now;
539                 }
540                 if (empty) {
541                         blkio_mark_blkg_empty(stats);
542                         stats->start_empty_time = now;
543                 }
544 #endif
545                 spin_unlock(&blkg->stats_lock);
546         }
547         spin_unlock_irq(&blkcg->lock);
548         return 0;
549 }
550
551 static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str,
552                                 int chars_left, bool diskname_only)
553 {
554         snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev));
555         chars_left -= strlen(str);
556         if (chars_left <= 0) {
557                 printk(KERN_WARNING
558                         "Possibly incorrect cgroup stat display format");
559                 return;
560         }
561         if (diskname_only)
562                 return;
563         switch (type) {
564         case BLKIO_STAT_READ:
565                 strlcat(str, " Read", chars_left);
566                 break;
567         case BLKIO_STAT_WRITE:
568                 strlcat(str, " Write", chars_left);
569                 break;
570         case BLKIO_STAT_SYNC:
571                 strlcat(str, " Sync", chars_left);
572                 break;
573         case BLKIO_STAT_ASYNC:
574                 strlcat(str, " Async", chars_left);
575                 break;
576         case BLKIO_STAT_TOTAL:
577                 strlcat(str, " Total", chars_left);
578                 break;
579         default:
580                 strlcat(str, " Invalid", chars_left);
581         }
582 }
583
584 static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
585                                 struct cgroup_map_cb *cb, dev_t dev)
586 {
587         blkio_get_key_name(0, dev, str, chars_left, true);
588         cb->fill(cb, str, val);
589         return val;
590 }
591
592 /* This should be called with blkg->stats_lock held */
593 static uint64_t blkio_get_stat(struct blkio_group *blkg,
594                 struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
595 {
596         uint64_t disk_total;
597         char key_str[MAX_KEY_LEN];
598         enum stat_sub_type sub_type;
599
600         if (type == BLKIO_STAT_TIME)
601                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
602                                         blkg->stats.time, cb, dev);
603         if (type == BLKIO_STAT_SECTORS)
604                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
605                                         blkg->stats.sectors, cb, dev);
606 #ifdef CONFIG_DEBUG_BLK_CGROUP
607         if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
608                 uint64_t sum = blkg->stats.avg_queue_size_sum;
609                 uint64_t samples = blkg->stats.avg_queue_size_samples;
610                 if (samples)
611                         do_div(sum, samples);
612                 else
613                         sum = 0;
614                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev);
615         }
616         if (type == BLKIO_STAT_GROUP_WAIT_TIME)
617                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
618                                         blkg->stats.group_wait_time, cb, dev);
619         if (type == BLKIO_STAT_IDLE_TIME)
620                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
621                                         blkg->stats.idle_time, cb, dev);
622         if (type == BLKIO_STAT_EMPTY_TIME)
623                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
624                                         blkg->stats.empty_time, cb, dev);
625         if (type == BLKIO_STAT_DEQUEUE)
626                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
627                                         blkg->stats.dequeue, cb, dev);
628 #endif
629
630         for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
631                         sub_type++) {
632                 blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
633                 cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]);
634         }
635         disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] +
636                         blkg->stats.stat_arr[type][BLKIO_STAT_WRITE];
637         blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
638         cb->fill(cb, key_str, disk_total);
639         return disk_total;
640 }
641
642 static int blkio_check_dev_num(dev_t dev)
643 {
644         int part = 0;
645         struct gendisk *disk;
646
647         disk = get_gendisk(dev, &part);
648         if (!disk || part)
649                 return -ENODEV;
650
651         return 0;
652 }
653
654 static int blkio_policy_parse_and_set(char *buf,
655         struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
656 {
657         char *s[4], *p, *major_s = NULL, *minor_s = NULL;
658         int ret;
659         unsigned long major, minor, temp;
660         int i = 0;
661         dev_t dev;
662         u64 bps, iops;
663
664         memset(s, 0, sizeof(s));
665
666         while ((p = strsep(&buf, " ")) != NULL) {
667                 if (!*p)
668                         continue;
669
670                 s[i++] = p;
671
672                 /* Prevent from inputing too many things */
673                 if (i == 3)
674                         break;
675         }
676
677         if (i != 2)
678                 return -EINVAL;
679
680         p = strsep(&s[0], ":");
681         if (p != NULL)
682                 major_s = p;
683         else
684                 return -EINVAL;
685
686         minor_s = s[0];
687         if (!minor_s)
688                 return -EINVAL;
689
690         ret = strict_strtoul(major_s, 10, &major);
691         if (ret)
692                 return -EINVAL;
693
694         ret = strict_strtoul(minor_s, 10, &minor);
695         if (ret)
696                 return -EINVAL;
697
698         dev = MKDEV(major, minor);
699
700         ret = blkio_check_dev_num(dev);
701         if (ret)
702                 return ret;
703
704         newpn->dev = dev;
705
706         if (s[1] == NULL)
707                 return -EINVAL;
708
709         switch (plid) {
710         case BLKIO_POLICY_PROP:
711                 ret = strict_strtoul(s[1], 10, &temp);
712                 if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
713                         temp > BLKIO_WEIGHT_MAX)
714                         return -EINVAL;
715
716                 newpn->plid = plid;
717                 newpn->fileid = fileid;
718                 newpn->val.weight = temp;
719                 break;
720         case BLKIO_POLICY_THROTL:
721                 switch(fileid) {
722                 case BLKIO_THROTL_read_bps_device:
723                 case BLKIO_THROTL_write_bps_device:
724                         ret = strict_strtoull(s[1], 10, &bps);
725                         if (ret)
726                                 return -EINVAL;
727
728                         newpn->plid = plid;
729                         newpn->fileid = fileid;
730                         newpn->val.bps = bps;
731                         break;
732                 case BLKIO_THROTL_read_iops_device:
733                 case BLKIO_THROTL_write_iops_device:
734                         ret = strict_strtoull(s[1], 10, &iops);
735                         if (ret)
736                                 return -EINVAL;
737
738                         if (iops > THROTL_IOPS_MAX)
739                                 return -EINVAL;
740
741                         newpn->plid = plid;
742                         newpn->fileid = fileid;
743                         newpn->val.iops = (unsigned int)iops;
744                         break;
745                 }
746                 break;
747         default:
748                 BUG();
749         }
750
751         return 0;
752 }
753
754 unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
755                               dev_t dev)
756 {
757         struct blkio_policy_node *pn;
758
759         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
760                                 BLKIO_PROP_weight_device);
761         if (pn)
762                 return pn->val.weight;
763         else
764                 return blkcg->weight;
765 }
766 EXPORT_SYMBOL_GPL(blkcg_get_weight);
767
768 uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
769 {
770         struct blkio_policy_node *pn;
771
772         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
773                                 BLKIO_THROTL_read_bps_device);
774         if (pn)
775                 return pn->val.bps;
776         else
777                 return -1;
778 }
779
780 uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
781 {
782         struct blkio_policy_node *pn;
783         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
784                                 BLKIO_THROTL_write_bps_device);
785         if (pn)
786                 return pn->val.bps;
787         else
788                 return -1;
789 }
790
791 unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
792 {
793         struct blkio_policy_node *pn;
794
795         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
796                                 BLKIO_THROTL_read_iops_device);
797         if (pn)
798                 return pn->val.iops;
799         else
800                 return -1;
801 }
802
803 unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
804 {
805         struct blkio_policy_node *pn;
806         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
807                                 BLKIO_THROTL_write_iops_device);
808         if (pn)
809                 return pn->val.iops;
810         else
811                 return -1;
812 }
813
814 /* Checks whether user asked for deleting a policy rule */
815 static bool blkio_delete_rule_command(struct blkio_policy_node *pn)
816 {
817         switch(pn->plid) {
818         case BLKIO_POLICY_PROP:
819                 if (pn->val.weight == 0)
820                         return 1;
821                 break;
822         case BLKIO_POLICY_THROTL:
823                 switch(pn->fileid) {
824                 case BLKIO_THROTL_read_bps_device:
825                 case BLKIO_THROTL_write_bps_device:
826                         if (pn->val.bps == 0)
827                                 return 1;
828                         break;
829                 case BLKIO_THROTL_read_iops_device:
830                 case BLKIO_THROTL_write_iops_device:
831                         if (pn->val.iops == 0)
832                                 return 1;
833                 }
834                 break;
835         default:
836                 BUG();
837         }
838
839         return 0;
840 }
841
842 static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
843                                         struct blkio_policy_node *newpn)
844 {
845         switch(oldpn->plid) {
846         case BLKIO_POLICY_PROP:
847                 oldpn->val.weight = newpn->val.weight;
848                 break;
849         case BLKIO_POLICY_THROTL:
850                 switch(newpn->fileid) {
851                 case BLKIO_THROTL_read_bps_device:
852                 case BLKIO_THROTL_write_bps_device:
853                         oldpn->val.bps = newpn->val.bps;
854                         break;
855                 case BLKIO_THROTL_read_iops_device:
856                 case BLKIO_THROTL_write_iops_device:
857                         oldpn->val.iops = newpn->val.iops;
858                 }
859                 break;
860         default:
861                 BUG();
862         }
863 }
864
865 /*
866  * Some rules/values in blkg have changed. Propogate those to respective
867  * policies.
868  */
869 static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
870                 struct blkio_group *blkg, struct blkio_policy_node *pn)
871 {
872         unsigned int weight, iops;
873         u64 bps;
874
875         switch(pn->plid) {
876         case BLKIO_POLICY_PROP:
877                 weight = pn->val.weight ? pn->val.weight :
878                                 blkcg->weight;
879                 blkio_update_group_weight(blkg, weight);
880                 break;
881         case BLKIO_POLICY_THROTL:
882                 switch(pn->fileid) {
883                 case BLKIO_THROTL_read_bps_device:
884                 case BLKIO_THROTL_write_bps_device:
885                         bps = pn->val.bps ? pn->val.bps : (-1);
886                         blkio_update_group_bps(blkg, bps, pn->fileid);
887                         break;
888                 case BLKIO_THROTL_read_iops_device:
889                 case BLKIO_THROTL_write_iops_device:
890                         iops = pn->val.iops ? pn->val.iops : (-1);
891                         blkio_update_group_iops(blkg, iops, pn->fileid);
892                         break;
893                 }
894                 break;
895         default:
896                 BUG();
897         }
898 }
899
900 /*
901  * A policy node rule has been updated. Propogate this update to all the
902  * block groups which might be affected by this update.
903  */
904 static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg,
905                                 struct blkio_policy_node *pn)
906 {
907         struct blkio_group *blkg;
908         struct hlist_node *n;
909
910         spin_lock(&blkio_list_lock);
911         spin_lock_irq(&blkcg->lock);
912
913         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
914                 if (pn->dev != blkg->dev || pn->plid != blkg->plid)
915                         continue;
916                 blkio_update_blkg_policy(blkcg, blkg, pn);
917         }
918
919         spin_unlock_irq(&blkcg->lock);
920         spin_unlock(&blkio_list_lock);
921 }
922
923 static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
924                                        const char *buffer)
925 {
926         int ret = 0;
927         char *buf;
928         struct blkio_policy_node *newpn, *pn;
929         struct blkio_cgroup *blkcg;
930         int keep_newpn = 0;
931         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
932         int fileid = BLKIOFILE_ATTR(cft->private);
933
934         buf = kstrdup(buffer, GFP_KERNEL);
935         if (!buf)
936                 return -ENOMEM;
937
938         newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
939         if (!newpn) {
940                 ret = -ENOMEM;
941                 goto free_buf;
942         }
943
944         ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid);
945         if (ret)
946                 goto free_newpn;
947
948         blkcg = cgroup_to_blkio_cgroup(cgrp);
949
950         spin_lock_irq(&blkcg->lock);
951
952         pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
953         if (!pn) {
954                 if (!blkio_delete_rule_command(newpn)) {
955                         blkio_policy_insert_node(blkcg, newpn);
956                         keep_newpn = 1;
957                 }
958                 spin_unlock_irq(&blkcg->lock);
959                 goto update_io_group;
960         }
961
962         if (blkio_delete_rule_command(newpn)) {
963                 blkio_policy_delete_node(pn);
964                 spin_unlock_irq(&blkcg->lock);
965                 goto update_io_group;
966         }
967         spin_unlock_irq(&blkcg->lock);
968
969         blkio_update_policy_rule(pn, newpn);
970
971 update_io_group:
972         blkio_update_policy_node_blkg(blkcg, newpn);
973
974 free_newpn:
975         if (!keep_newpn)
976                 kfree(newpn);
977 free_buf:
978         kfree(buf);
979         return ret;
980 }
981
982 static void
983 blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
984 {
985         switch(pn->plid) {
986                 case BLKIO_POLICY_PROP:
987                         if (pn->fileid == BLKIO_PROP_weight_device)
988                                 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
989                                         MINOR(pn->dev), pn->val.weight);
990                         break;
991                 case BLKIO_POLICY_THROTL:
992                         switch(pn->fileid) {
993                         case BLKIO_THROTL_read_bps_device:
994                         case BLKIO_THROTL_write_bps_device:
995                                 seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev),
996                                         MINOR(pn->dev), pn->val.bps);
997                                 break;
998                         case BLKIO_THROTL_read_iops_device:
999                         case BLKIO_THROTL_write_iops_device:
1000                                 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
1001                                         MINOR(pn->dev), pn->val.iops);
1002                                 break;
1003                         }
1004                         break;
1005                 default:
1006                         BUG();
1007         }
1008 }
1009
1010 /* cgroup files which read their data from policy nodes end up here */
1011 static void blkio_read_policy_node_files(struct cftype *cft,
1012                         struct blkio_cgroup *blkcg, struct seq_file *m)
1013 {
1014         struct blkio_policy_node *pn;
1015
1016         if (!list_empty(&blkcg->policy_list)) {
1017                 spin_lock_irq(&blkcg->lock);
1018                 list_for_each_entry(pn, &blkcg->policy_list, node) {
1019                         if (!pn_matches_cftype(cft, pn))
1020                                 continue;
1021                         blkio_print_policy_node(m, pn);
1022                 }
1023                 spin_unlock_irq(&blkcg->lock);
1024         }
1025 }
1026
1027 static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
1028                                 struct seq_file *m)
1029 {
1030         struct blkio_cgroup *blkcg;
1031         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1032         int name = BLKIOFILE_ATTR(cft->private);
1033
1034         blkcg = cgroup_to_blkio_cgroup(cgrp);
1035
1036         switch(plid) {
1037         case BLKIO_POLICY_PROP:
1038                 switch(name) {
1039                 case BLKIO_PROP_weight_device:
1040                         blkio_read_policy_node_files(cft, blkcg, m);
1041                         return 0;
1042                 default:
1043                         BUG();
1044                 }
1045                 break;
1046         case BLKIO_POLICY_THROTL:
1047                 switch(name){
1048                 case BLKIO_THROTL_read_bps_device:
1049                 case BLKIO_THROTL_write_bps_device:
1050                 case BLKIO_THROTL_read_iops_device:
1051                 case BLKIO_THROTL_write_iops_device:
1052                         blkio_read_policy_node_files(cft, blkcg, m);
1053                         return 0;
1054                 default:
1055                         BUG();
1056                 }
1057                 break;
1058         default:
1059                 BUG();
1060         }
1061
1062         return 0;
1063 }
1064
1065 static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1066                 struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
1067                 bool show_total)
1068 {
1069         struct blkio_group *blkg;
1070         struct hlist_node *n;
1071         uint64_t cgroup_total = 0;
1072
1073         rcu_read_lock();
1074         hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
1075                 if (blkg->dev) {
1076                         if (!cftype_blkg_same_policy(cft, blkg))
1077                                 continue;
1078                         spin_lock_irq(&blkg->stats_lock);
1079                         cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
1080                                                 type);
1081                         spin_unlock_irq(&blkg->stats_lock);
1082                 }
1083         }
1084         if (show_total)
1085                 cb->fill(cb, "Total", cgroup_total);
1086         rcu_read_unlock();
1087         return 0;
1088 }
1089
1090 /* All map kind of cgroup file get serviced by this function */
1091 static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
1092                                 struct cgroup_map_cb *cb)
1093 {
1094         struct blkio_cgroup *blkcg;
1095         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1096         int name = BLKIOFILE_ATTR(cft->private);
1097
1098         blkcg = cgroup_to_blkio_cgroup(cgrp);
1099
1100         switch(plid) {
1101         case BLKIO_POLICY_PROP:
1102                 switch(name) {
1103                 case BLKIO_PROP_time:
1104                         return blkio_read_blkg_stats(blkcg, cft, cb,
1105                                                 BLKIO_STAT_TIME, 0);
1106                 case BLKIO_PROP_sectors:
1107                         return blkio_read_blkg_stats(blkcg, cft, cb,
1108                                                 BLKIO_STAT_SECTORS, 0);
1109                 case BLKIO_PROP_io_service_bytes:
1110                         return blkio_read_blkg_stats(blkcg, cft, cb,
1111                                                 BLKIO_STAT_SERVICE_BYTES, 1);
1112                 case BLKIO_PROP_io_serviced:
1113                         return blkio_read_blkg_stats(blkcg, cft, cb,
1114                                                 BLKIO_STAT_SERVICED, 1);
1115                 case BLKIO_PROP_io_service_time:
1116                         return blkio_read_blkg_stats(blkcg, cft, cb,
1117                                                 BLKIO_STAT_SERVICE_TIME, 1);
1118                 case BLKIO_PROP_io_wait_time:
1119                         return blkio_read_blkg_stats(blkcg, cft, cb,
1120                                                 BLKIO_STAT_WAIT_TIME, 1);
1121                 case BLKIO_PROP_io_merged:
1122                         return blkio_read_blkg_stats(blkcg, cft, cb,
1123                                                 BLKIO_STAT_MERGED, 1);
1124                 case BLKIO_PROP_io_queued:
1125                         return blkio_read_blkg_stats(blkcg, cft, cb,
1126                                                 BLKIO_STAT_QUEUED, 1);
1127 #ifdef CONFIG_DEBUG_BLK_CGROUP
1128                 case BLKIO_PROP_dequeue:
1129                         return blkio_read_blkg_stats(blkcg, cft, cb,
1130                                                 BLKIO_STAT_DEQUEUE, 0);
1131                 case BLKIO_PROP_avg_queue_size:
1132                         return blkio_read_blkg_stats(blkcg, cft, cb,
1133                                                 BLKIO_STAT_AVG_QUEUE_SIZE, 0);
1134                 case BLKIO_PROP_group_wait_time:
1135                         return blkio_read_blkg_stats(blkcg, cft, cb,
1136                                                 BLKIO_STAT_GROUP_WAIT_TIME, 0);
1137                 case BLKIO_PROP_idle_time:
1138                         return blkio_read_blkg_stats(blkcg, cft, cb,
1139                                                 BLKIO_STAT_IDLE_TIME, 0);
1140                 case BLKIO_PROP_empty_time:
1141                         return blkio_read_blkg_stats(blkcg, cft, cb,
1142                                                 BLKIO_STAT_EMPTY_TIME, 0);
1143 #endif
1144                 default:
1145                         BUG();
1146                 }
1147                 break;
1148         case BLKIO_POLICY_THROTL:
1149                 switch(name){
1150                 case BLKIO_THROTL_io_service_bytes:
1151                         return blkio_read_blkg_stats(blkcg, cft, cb,
1152                                                 BLKIO_STAT_SERVICE_BYTES, 1);
1153                 case BLKIO_THROTL_io_serviced:
1154                         return blkio_read_blkg_stats(blkcg, cft, cb,
1155                                                 BLKIO_STAT_SERVICED, 1);
1156                 default:
1157                         BUG();
1158                 }
1159                 break;
1160         default:
1161                 BUG();
1162         }
1163
1164         return 0;
1165 }
1166
1167 static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val)
1168 {
1169         struct blkio_group *blkg;
1170         struct hlist_node *n;
1171         struct blkio_policy_node *pn;
1172
1173         if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
1174                 return -EINVAL;
1175
1176         spin_lock(&blkio_list_lock);
1177         spin_lock_irq(&blkcg->lock);
1178         blkcg->weight = (unsigned int)val;
1179
1180         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1181                 pn = blkio_policy_search_node(blkcg, blkg->dev,
1182                                 BLKIO_POLICY_PROP, BLKIO_PROP_weight_device);
1183                 if (pn)
1184                         continue;
1185
1186                 blkio_update_group_weight(blkg, blkcg->weight);
1187         }
1188         spin_unlock_irq(&blkcg->lock);
1189         spin_unlock(&blkio_list_lock);
1190         return 0;
1191 }
1192
1193 static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
1194         struct blkio_cgroup *blkcg;
1195         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1196         int name = BLKIOFILE_ATTR(cft->private);
1197
1198         blkcg = cgroup_to_blkio_cgroup(cgrp);
1199
1200         switch(plid) {
1201         case BLKIO_POLICY_PROP:
1202                 switch(name) {
1203                 case BLKIO_PROP_weight:
1204                         return (u64)blkcg->weight;
1205                 }
1206                 break;
1207         default:
1208                 BUG();
1209         }
1210         return 0;
1211 }
1212
1213 static int
1214 blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1215 {
1216         struct blkio_cgroup *blkcg;
1217         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1218         int name = BLKIOFILE_ATTR(cft->private);
1219
1220         blkcg = cgroup_to_blkio_cgroup(cgrp);
1221
1222         switch(plid) {
1223         case BLKIO_POLICY_PROP:
1224                 switch(name) {
1225                 case BLKIO_PROP_weight:
1226                         return blkio_weight_write(blkcg, val);
1227                 }
1228                 break;
1229         default:
1230                 BUG();
1231         }
1232
1233         return 0;
1234 }
1235
1236 struct cftype blkio_files[] = {
1237         {
1238                 .name = "weight_device",
1239                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1240                                 BLKIO_PROP_weight_device),
1241                 .read_seq_string = blkiocg_file_read,
1242                 .write_string = blkiocg_file_write,
1243                 .max_write_len = 256,
1244         },
1245         {
1246                 .name = "weight",
1247                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1248                                 BLKIO_PROP_weight),
1249                 .read_u64 = blkiocg_file_read_u64,
1250                 .write_u64 = blkiocg_file_write_u64,
1251         },
1252         {
1253                 .name = "time",
1254                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1255                                 BLKIO_PROP_time),
1256                 .read_map = blkiocg_file_read_map,
1257         },
1258         {
1259                 .name = "sectors",
1260                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1261                                 BLKIO_PROP_sectors),
1262                 .read_map = blkiocg_file_read_map,
1263         },
1264         {
1265                 .name = "io_service_bytes",
1266                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1267                                 BLKIO_PROP_io_service_bytes),
1268                 .read_map = blkiocg_file_read_map,
1269         },
1270         {
1271                 .name = "io_serviced",
1272                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1273                                 BLKIO_PROP_io_serviced),
1274                 .read_map = blkiocg_file_read_map,
1275         },
1276         {
1277                 .name = "io_service_time",
1278                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1279                                 BLKIO_PROP_io_service_time),
1280                 .read_map = blkiocg_file_read_map,
1281         },
1282         {
1283                 .name = "io_wait_time",
1284                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1285                                 BLKIO_PROP_io_wait_time),
1286                 .read_map = blkiocg_file_read_map,
1287         },
1288         {
1289                 .name = "io_merged",
1290                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1291                                 BLKIO_PROP_io_merged),
1292                 .read_map = blkiocg_file_read_map,
1293         },
1294         {
1295                 .name = "io_queued",
1296                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1297                                 BLKIO_PROP_io_queued),
1298                 .read_map = blkiocg_file_read_map,
1299         },
1300         {
1301                 .name = "reset_stats",
1302                 .write_u64 = blkiocg_reset_stats,
1303         },
1304 #ifdef CONFIG_BLK_DEV_THROTTLING
1305         {
1306                 .name = "throttle.read_bps_device",
1307                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1308                                 BLKIO_THROTL_read_bps_device),
1309                 .read_seq_string = blkiocg_file_read,
1310                 .write_string = blkiocg_file_write,
1311                 .max_write_len = 256,
1312         },
1313
1314         {
1315                 .name = "throttle.write_bps_device",
1316                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1317                                 BLKIO_THROTL_write_bps_device),
1318                 .read_seq_string = blkiocg_file_read,
1319                 .write_string = blkiocg_file_write,
1320                 .max_write_len = 256,
1321         },
1322
1323         {
1324                 .name = "throttle.read_iops_device",
1325                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1326                                 BLKIO_THROTL_read_iops_device),
1327                 .read_seq_string = blkiocg_file_read,
1328                 .write_string = blkiocg_file_write,
1329                 .max_write_len = 256,
1330         },
1331
1332         {
1333                 .name = "throttle.write_iops_device",
1334                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1335                                 BLKIO_THROTL_write_iops_device),
1336                 .read_seq_string = blkiocg_file_read,
1337                 .write_string = blkiocg_file_write,
1338                 .max_write_len = 256,
1339         },
1340         {
1341                 .name = "throttle.io_service_bytes",
1342                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1343                                 BLKIO_THROTL_io_service_bytes),
1344                 .read_map = blkiocg_file_read_map,
1345         },
1346         {
1347                 .name = "throttle.io_serviced",
1348                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1349                                 BLKIO_THROTL_io_serviced),
1350                 .read_map = blkiocg_file_read_map,
1351         },
1352 #endif /* CONFIG_BLK_DEV_THROTTLING */
1353
1354 #ifdef CONFIG_DEBUG_BLK_CGROUP
1355         {
1356                 .name = "avg_queue_size",
1357                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1358                                 BLKIO_PROP_avg_queue_size),
1359                 .read_map = blkiocg_file_read_map,
1360         },
1361         {
1362                 .name = "group_wait_time",
1363                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1364                                 BLKIO_PROP_group_wait_time),
1365                 .read_map = blkiocg_file_read_map,
1366         },
1367         {
1368                 .name = "idle_time",
1369                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1370                                 BLKIO_PROP_idle_time),
1371                 .read_map = blkiocg_file_read_map,
1372         },
1373         {
1374                 .name = "empty_time",
1375                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1376                                 BLKIO_PROP_empty_time),
1377                 .read_map = blkiocg_file_read_map,
1378         },
1379         {
1380                 .name = "dequeue",
1381                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1382                                 BLKIO_PROP_dequeue),
1383                 .read_map = blkiocg_file_read_map,
1384         },
1385 #endif
1386 };
1387
1388 static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1389 {
1390         return cgroup_add_files(cgroup, subsys, blkio_files,
1391                                 ARRAY_SIZE(blkio_files));
1392 }
1393
1394 static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1395 {
1396         struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1397         unsigned long flags;
1398         struct blkio_group *blkg;
1399         void *key;
1400         struct blkio_policy_type *blkiop;
1401         struct blkio_policy_node *pn, *pntmp;
1402
1403         rcu_read_lock();
1404         do {
1405                 spin_lock_irqsave(&blkcg->lock, flags);
1406
1407                 if (hlist_empty(&blkcg->blkg_list)) {
1408                         spin_unlock_irqrestore(&blkcg->lock, flags);
1409                         break;
1410                 }
1411
1412                 blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
1413                                         blkcg_node);
1414                 key = rcu_dereference(blkg->key);
1415                 __blkiocg_del_blkio_group(blkg);
1416
1417                 spin_unlock_irqrestore(&blkcg->lock, flags);
1418
1419                 /*
1420                  * This blkio_group is being unlinked as associated cgroup is
1421                  * going away. Let all the IO controlling policies know about
1422                  * this event.
1423                  */
1424                 spin_lock(&blkio_list_lock);
1425                 list_for_each_entry(blkiop, &blkio_list, list) {
1426                         if (blkiop->plid != blkg->plid)
1427                                 continue;
1428                         blkiop->ops.blkio_unlink_group_fn(key, blkg);
1429                 }
1430                 spin_unlock(&blkio_list_lock);
1431         } while (1);
1432
1433         list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
1434                 blkio_policy_delete_node(pn);
1435                 kfree(pn);
1436         }
1437
1438         free_css_id(&blkio_subsys, &blkcg->css);
1439         rcu_read_unlock();
1440         if (blkcg != &blkio_root_cgroup)
1441                 kfree(blkcg);
1442 }
1443
1444 static struct cgroup_subsys_state *
1445 blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1446 {
1447         struct blkio_cgroup *blkcg;
1448         struct cgroup *parent = cgroup->parent;
1449
1450         if (!parent) {
1451                 blkcg = &blkio_root_cgroup;
1452                 goto done;
1453         }
1454
1455         blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1456         if (!blkcg)
1457                 return ERR_PTR(-ENOMEM);
1458
1459         blkcg->weight = BLKIO_WEIGHT_DEFAULT;
1460 done:
1461         spin_lock_init(&blkcg->lock);
1462         INIT_HLIST_HEAD(&blkcg->blkg_list);
1463
1464         INIT_LIST_HEAD(&blkcg->policy_list);
1465         return &blkcg->css;
1466 }
1467
1468 /*
1469  * We cannot support shared io contexts, as we have no mean to support
1470  * two tasks with the same ioc in two different groups without major rework
1471  * of the main cic data structures.  For now we allow a task to change
1472  * its cgroup only if it's the only owner of its ioc.
1473  */
1474 static int blkiocg_can_attach(struct cgroup_subsys *subsys,
1475                                 struct cgroup *cgroup, struct task_struct *tsk,
1476                                 bool threadgroup)
1477 {
1478         struct io_context *ioc;
1479         int ret = 0;
1480
1481         /* task_lock() is needed to avoid races with exit_io_context() */
1482         task_lock(tsk);
1483         ioc = tsk->io_context;
1484         if (ioc && atomic_read(&ioc->nr_tasks) > 1)
1485                 ret = -EINVAL;
1486         task_unlock(tsk);
1487
1488         return ret;
1489 }
1490
1491 static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
1492                                 struct cgroup *prev, struct task_struct *tsk,
1493                                 bool threadgroup)
1494 {
1495         struct io_context *ioc;
1496
1497         task_lock(tsk);
1498         ioc = tsk->io_context;
1499         if (ioc)
1500                 ioc->cgroup_changed = 1;
1501         task_unlock(tsk);
1502 }
1503
1504 void blkio_policy_register(struct blkio_policy_type *blkiop)
1505 {
1506         spin_lock(&blkio_list_lock);
1507         list_add_tail(&blkiop->list, &blkio_list);
1508         spin_unlock(&blkio_list_lock);
1509 }
1510 EXPORT_SYMBOL_GPL(blkio_policy_register);
1511
1512 void blkio_policy_unregister(struct blkio_policy_type *blkiop)
1513 {
1514         spin_lock(&blkio_list_lock);
1515         list_del_init(&blkiop->list);
1516         spin_unlock(&blkio_list_lock);
1517 }
1518 EXPORT_SYMBOL_GPL(blkio_policy_unregister);
1519
1520 static int __init init_cgroup_blkio(void)
1521 {
1522         return cgroup_load_subsys(&blkio_subsys);
1523 }
1524
1525 static void __exit exit_cgroup_blkio(void)
1526 {
1527         cgroup_unload_subsys(&blkio_subsys);
1528 }
1529
1530 module_init(init_cgroup_blkio);
1531 module_exit(exit_cgroup_blkio);
1532 MODULE_LICENSE("GPL");