Merge omap-drivers
[pandora-kernel.git] / security / lowmem.c
1 #include <linux/module.h>
2 #include <linux/kernel.h>
3 #include <linux/mman.h>
4 #include <linux/init.h>
5 #include <linux/security.h>
6 #include <linux/sysctl.h>
7 #include <linux/swap.h>
8 #include <linux/kobject.h>
9 #include <linux/pagemap.h>
10 #include <linux/hugetlb.h>
11 #include <linux/sysfs.h>
12
13 #define MY_NAME "lowmem"
14
15 #define LOWMEM_MAX_UIDS 8
16
17 enum {
18         VM_LOWMEM_DENY = 1,
19         VM_LOWMEM_LEVEL1_NOTIFY,
20         VM_LOWMEM_LEVEL2_NOTIFY,
21         VM_LOWMEM_NR_DECAY_PAGES,
22         VM_LOWMEM_ALLOWED_UIDS,
23         VM_LOWMEM_ALLOWED_PAGES,
24         VM_LOWMEM_USED_PAGES,
25 };
26
27 static unsigned int deny_percentage;
28 static unsigned int l1_notify, l2_notify;
29 static unsigned int nr_decay_pages;
30 static unsigned long allowed_pages;
31 static long used_pages;
32 static unsigned int allowed_uids[LOWMEM_MAX_UIDS];
33 static unsigned int minuid = 1;
34 static unsigned int maxuid = 65535;
35
36 static ctl_table lowmem_table[] = {
37         {
38                 .ctl_name = VM_LOWMEM_DENY,
39                 .procname = "lowmem_deny_watermark",
40                 .data = &deny_percentage,
41                 .maxlen = sizeof(unsigned int),
42                 .mode = 0644,
43                 .child = NULL,
44                 .proc_handler = &proc_dointvec,
45                 .strategy = &sysctl_intvec,
46         }, {
47                 .ctl_name = VM_LOWMEM_LEVEL1_NOTIFY,
48                 .procname = "lowmem_notify_low",
49                 .data = &l1_notify,
50                 .maxlen = sizeof(unsigned int),
51                 .mode = 0644,
52                 .child = NULL,
53                 .proc_handler = &proc_dointvec,
54                 .strategy = &sysctl_intvec,
55         }, {
56                 .ctl_name = VM_LOWMEM_LEVEL2_NOTIFY,
57                 .procname = "lowmem_notify_high",
58                 .data = &l2_notify,
59                 .maxlen = sizeof(unsigned int),
60                 .mode = 0644,
61                 .child = NULL,
62                 .proc_handler = &proc_dointvec,
63                 .strategy = &sysctl_intvec,
64         }, {
65                 .ctl_name = VM_LOWMEM_NR_DECAY_PAGES,
66                 .procname = "lowmem_nr_decay_pages",
67                 .data = &nr_decay_pages,
68                 .maxlen = sizeof(unsigned int),
69                 .mode = 0644,
70                 .child = NULL,
71                 .proc_handler = &proc_dointvec_minmax,
72                 .strategy = &sysctl_intvec,
73         }, {
74                 .ctl_name = VM_LOWMEM_ALLOWED_UIDS,
75                 .procname = "lowmem_allowed_uids",
76                 .data = &allowed_uids,
77                 .maxlen = LOWMEM_MAX_UIDS * sizeof(unsigned int),
78                 .mode = 0644,
79                 .child = NULL,
80                 .proc_handler = &proc_dointvec_minmax,
81                 .strategy = &sysctl_intvec,
82                 .extra1 = &minuid,
83                 .extra2 = &maxuid,
84         }, {
85                 .ctl_name = VM_LOWMEM_ALLOWED_PAGES,
86                 .procname = "lowmem_allowed_pages",
87                 .data = &allowed_pages,
88                 .maxlen = sizeof(unsigned long),
89                 .mode = 0444,
90                 .child = NULL,
91                 .proc_handler = &proc_dointvec_minmax,
92                 .strategy = &sysctl_intvec,
93         }, {
94                 .ctl_name = VM_LOWMEM_USED_PAGES,
95                 .procname = "lowmem_used_pages",
96                 .data = &used_pages,
97                 .maxlen = sizeof(long),
98                 .mode = 0444,
99                 .child = NULL,
100                 .proc_handler = &proc_dointvec_minmax,
101                 .strategy = &sysctl_intvec,
102         }, {
103                 .ctl_name = 0
104         }
105 };
106
107 static ctl_table lowmem_root_table[] = {
108         {
109                 .ctl_name = CTL_VM,
110                 .procname = "vm",
111                 .mode = 0555,
112                 .child = lowmem_table,
113         }, {
114                 .ctl_name = 0
115         }
116 };
117
118 #define KERNEL_ATTR_RO(_name) \
119 static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
120
121 static int low_watermark_reached, high_watermark_reached;
122
123 static ssize_t low_watermark_show(struct subsystem *subsys, char *page)
124 {
125         return sprintf(page, "%u\n", low_watermark_reached);
126 }
127
128 static ssize_t high_watermark_show(struct subsystem *subsys, char *page)
129 {
130         return sprintf(page, "%u\n", high_watermark_reached);
131 }
132
133 KERNEL_ATTR_RO(low_watermark);
134 KERNEL_ATTR_RO(high_watermark);
135
136 static void low_watermark_state(int new_state)
137 {
138         int changed = 0;
139
140         if (low_watermark_reached != new_state) {
141                 low_watermark_reached = new_state;
142                 changed = 1;
143         }
144
145         if (changed)
146                 sysfs_notify(&kernel_subsys.kset.kobj, NULL, "low_watermark");
147 }
148
149 static void high_watermark_state(int new_state)
150 {
151         int changed = 0;
152
153         if (high_watermark_reached != new_state) {
154                 high_watermark_reached = new_state;
155                 changed = 1;
156         }
157
158         if (changed)
159                 sysfs_notify(&kernel_subsys.kset.kobj, NULL, "high_watermark");
160 }
161
162 static int low_vm_enough_memory(long pages)
163 {
164         unsigned long free, allowed;
165         long deny_threshold, level1, level2, used;
166         int cap_sys_admin = 0, notify;
167
168         if (cap_capable(current, CAP_SYS_ADMIN) == 0)
169                 cap_sys_admin = 1;
170
171         /* We activate ourselves only after both parameters have been
172          * configured. */
173         if (deny_percentage == 0 || l1_notify == 0 || l2_notify == 0)
174                 return __vm_enough_memory(pages, cap_sys_admin);
175
176         allowed = totalram_pages - hugetlb_total_pages();
177         deny_threshold = allowed * deny_percentage / 100;
178         level1 = allowed * l1_notify / 100;
179         level2 = allowed * l2_notify / 100;
180
181         vm_acct_memory(pages);
182
183         /* Easily freed pages when under VM pressure or direct reclaim */
184         free = global_page_state(NR_FILE_PAGES);
185         free += nr_swap_pages;
186         free += global_page_state(NR_SLAB_RECLAIMABLE);
187
188         used = allowed - free;
189         if (unlikely(used < 0))
190                 used = 0;
191
192         /* The hot path, plenty of memory */
193         if (likely(used < level1))
194                 goto enough_memory;
195
196         /* No luck, lets make it more expensive and try again.. */
197         used -= nr_free_pages();
198
199         if (used >= deny_threshold) {
200                 int i;
201
202                 allowed_pages = allowed;
203                 used_pages = used;
204                 low_watermark_state(1);
205                 high_watermark_state(1);
206                 /* Memory allocations by root are always allowed */
207                 if (cap_sys_admin)
208                         return 0;
209
210                 /* uids from allowed_uids vector are also allowed no matter what */
211                 for (i = 0; i < LOWMEM_MAX_UIDS && allowed_uids[i]; i++)
212                         if (current->uid == allowed_uids[i])
213                                 return 0;
214
215                 vm_unacct_memory(pages);
216                 if (printk_ratelimit()) {
217                         printk(MY_NAME ": denying memory allocation to process %d (%s)\n",
218                                current->pid, current->comm);
219                 }
220                 return -ENOMEM;
221         }
222
223 enough_memory:
224         /* See if we need to notify level 1 */
225         low_watermark_state(used >= level1);
226
227         /*
228          * In the level 2 notification case things are more complicated,
229          * as the level that we drop the state and send a notification
230          * should be lower than when it is first triggered. Having this
231          * on the same watermark level ends up bouncing back and forth
232          * when applications are being stupid.
233          */
234         notify = used >= level2;
235         if (notify || used + nr_decay_pages < level2)
236                 high_watermark_state(notify);
237
238         /* We have plenty of memory */
239         allowed_pages = allowed;
240         used_pages = used;
241         return 0;
242 }
243
244 static struct security_operations lowmem_security_ops = {
245         /* Use the capability functions for some of the hooks */
246         .ptrace = cap_ptrace,
247         .capget = cap_capget,
248         .capset_check = cap_capset_check,
249         .capset_set = cap_capset_set,
250         .capable = cap_capable,
251
252         .bprm_apply_creds = cap_bprm_apply_creds,
253         .bprm_set_security = cap_bprm_set_security,
254
255         .task_post_setuid = cap_task_post_setuid,
256         .task_reparent_to_init = cap_task_reparent_to_init,
257         .vm_enough_memory = low_vm_enough_memory,
258 };
259
260 static struct ctl_table_header *lowmem_table_header;
261 /* flag to keep track of how we were registered */
262 static int secondary;
263
264 static struct attribute *lowmem_attrs[] = {
265         &low_watermark_attr.attr,
266         &high_watermark_attr.attr,
267         NULL,
268 };
269
270 static struct attribute_group lowmem_attr_group = {
271         .attrs  = lowmem_attrs,
272 };
273
274 static int __init lowmem_init(void)
275 {
276         int r;
277
278         /* register ourselves with the security framework */
279         if (register_security(&lowmem_security_ops)) {
280                 printk(KERN_ERR MY_NAME ": Failure registering with the kernel\n");
281                 /* try registering with primary module */
282                 if (mod_reg_security(MY_NAME, &lowmem_security_ops)) {
283                         printk(KERN_ERR ": Failure registering with the primary"
284                                "security module.\n");
285                         return -EINVAL;
286                 }
287                 secondary = 1;
288         }
289
290         /* initialize the uids vector */
291         memset(allowed_uids, 0, sizeof(allowed_uids));
292
293         lowmem_table_header = register_sysctl_table(lowmem_root_table);
294         if (unlikely(!lowmem_table_header))
295                 return -EPERM;
296
297         kernel_subsys.kset.kobj.kset = &kernel_subsys.kset;
298
299         r = sysfs_create_group(&kernel_subsys.kset.kobj,
300                                &lowmem_attr_group);
301         if (unlikely(r))
302                 return r;
303
304         printk(KERN_INFO MY_NAME ": Module initialized.\n");
305
306         return 0;
307 }
308
309 static void __exit lowmem_exit(void)
310 {
311         /* remove ourselves from the security framework */
312         if (secondary) {
313                 if (mod_unreg_security(MY_NAME, &lowmem_security_ops))
314                         printk(KERN_ERR MY_NAME ": Failure unregistering "
315                                "with the primary security module.\n");
316         } else {
317                 if (unregister_security(&lowmem_security_ops)) {
318                         printk(KERN_ERR MY_NAME ": Failure unregistering "
319                                "with the kernel.\n");
320                 }
321         }
322
323         unregister_sysctl_table(lowmem_table_header);
324
325         sysfs_remove_group(&kernel_subsys.kset.kobj, &lowmem_attr_group);
326
327         printk(KERN_INFO MY_NAME ": Module removed.\n");
328 }
329
330 module_init(lowmem_init);
331 module_exit(lowmem_exit);
332
333 MODULE_DESCRIPTION("Low watermark LSM module");
334 MODULE_LICENSE("GPL");