1 #include <linux/module.h>
2 #include <linux/kernel.h>
3 #include <linux/mman.h>
4 #include <linux/init.h>
5 #include <linux/security.h>
6 #include <linux/sysctl.h>
7 #include <linux/swap.h>
8 #include <linux/kobject.h>
9 #include <linux/pagemap.h>
10 #include <linux/hugetlb.h>
11 #include <linux/sysfs.h>
12 #include <linux/oom.h>
14 #define MY_NAME "lowmem"
16 #define LOWMEM_MAX_UIDS 8
19 VM_LOWMEM_DENY_PAGES = 1,
20 VM_LOWMEM_NOTIFY_LOW_PAGES,
21 VM_LOWMEM_NOTIFY_HIGH_PAGES,
22 VM_LOWMEM_NR_DECAY_PAGES,
23 VM_LOWMEM_ALLOWED_UIDS,
24 VM_LOWMEM_ALLOWED_PAGES,
28 static long deny_pages;
29 static long notify_low_pages, notify_high_pages;
30 static unsigned int nr_decay_pages;
31 static unsigned long allowed_pages;
32 static unsigned long lowmem_free_pages;
33 static unsigned int allowed_uids[LOWMEM_MAX_UIDS];
34 static unsigned int minuid = 1;
35 static unsigned int maxuid = 65535;
37 static ctl_table lowmem_table[] = {
39 .ctl_name = VM_LOWMEM_DENY_PAGES,
40 .procname = "lowmem_deny_watermark_pages",
42 .maxlen = sizeof(long),
45 .proc_handler = &proc_dointvec,
46 .strategy = &sysctl_intvec,
48 .ctl_name = VM_LOWMEM_NOTIFY_LOW_PAGES,
49 .procname = "lowmem_notify_low_pages",
50 .data = ¬ify_low_pages,
51 .maxlen = sizeof(long),
54 .proc_handler = &proc_dointvec,
55 .strategy = &sysctl_intvec,
57 .ctl_name = VM_LOWMEM_NOTIFY_HIGH_PAGES,
58 .procname = "lowmem_notify_high_pages",
59 .data = ¬ify_high_pages,
60 .maxlen = sizeof(long),
63 .proc_handler = &proc_dointvec,
64 .strategy = &sysctl_intvec,
66 .ctl_name = VM_LOWMEM_NR_DECAY_PAGES,
67 .procname = "lowmem_nr_decay_pages",
68 .data = &nr_decay_pages,
69 .maxlen = sizeof(unsigned int),
72 .proc_handler = &proc_dointvec,
73 .strategy = &sysctl_intvec,
75 .ctl_name = VM_LOWMEM_ALLOWED_UIDS,
76 .procname = "lowmem_allowed_uids",
77 .data = &allowed_uids,
78 .maxlen = LOWMEM_MAX_UIDS * sizeof(unsigned int),
81 .proc_handler = &proc_dointvec_minmax,
82 .strategy = &sysctl_intvec,
86 .ctl_name = VM_LOWMEM_ALLOWED_PAGES,
87 .procname = "lowmem_allowed_pages",
88 .data = &allowed_pages,
89 .maxlen = sizeof(unsigned long),
92 .proc_handler = &proc_dointvec,
93 .strategy = &sysctl_intvec,
95 .ctl_name = VM_LOWMEM_FREE_PAGES,
96 .procname = "lowmem_free_pages",
97 .data = &lowmem_free_pages,
98 .maxlen = sizeof(unsigned long),
101 .proc_handler = &proc_dointvec,
102 .strategy = &sysctl_intvec,
108 static ctl_table lowmem_root_table[] = {
113 .child = lowmem_table,
119 #define KERNEL_ATTR_RO(_name) \
120 static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
122 static int low_watermark_reached, high_watermark_reached;
124 static ssize_t low_watermark_show(struct subsystem *subsys, char *page)
126 return sprintf(page, "%u\n", low_watermark_reached);
129 static ssize_t high_watermark_show(struct subsystem *subsys, char *page)
131 return sprintf(page, "%u\n", high_watermark_reached);
134 KERNEL_ATTR_RO(low_watermark);
135 KERNEL_ATTR_RO(high_watermark);
137 static void low_watermark_state(int new_state)
139 if (low_watermark_reached != new_state) {
140 low_watermark_reached = new_state;
141 sysfs_notify(&kernel_subsys.kset.kobj, NULL, "low_watermark");
145 static void high_watermark_state(int new_state)
147 if (high_watermark_reached != new_state) {
148 high_watermark_reached = new_state;
149 sysfs_notify(&kernel_subsys.kset.kobj, NULL, "high_watermark");
153 static int low_vm_enough_memory(long pages)
155 unsigned long free, allowed;
156 int cap_sys_admin = 0, notify;
158 if (cap_capable(current, CAP_SYS_ADMIN) == 0)
161 allowed = totalram_pages - hugetlb_total_pages();
162 allowed_pages = allowed;
164 /* We activate ourselves only after both parameters have been
166 if (deny_pages == 0 || notify_low_pages == 0 || notify_high_pages == 0)
167 return __vm_enough_memory(pages, cap_sys_admin);
169 vm_acct_memory(pages);
171 /* Easily freed pages when under VM pressure or direct reclaim */
172 free = global_page_state(NR_FILE_PAGES);
173 free += nr_swap_pages;
174 free += global_page_state(NR_SLAB_RECLAIMABLE);
176 if (likely(free > notify_low_pages))
179 /* No luck, lets make it more expensive and try again.. */
180 free += nr_free_pages();
182 if (free < deny_pages) {
185 lowmem_free_pages = free;
186 low_watermark_state(1);
187 high_watermark_state(1);
188 /* Memory allocations by root are always allowed */
192 /* OOM unkillable process is allowed to consume memory */
193 if (current->oomkilladj == OOM_DISABLE)
196 /* uids from allowed_uids vector are also allowed no matter what */
197 for (i = 0; i < LOWMEM_MAX_UIDS && allowed_uids[i]; i++)
198 if (current->uid == allowed_uids[i])
201 vm_unacct_memory(pages);
202 if (printk_ratelimit()) {
203 printk(MY_NAME ": denying memory allocation to process %d (%s)\n",
204 current->pid, current->comm);
210 /* See if we need to notify level 1 */
211 low_watermark_state(free < notify_low_pages);
214 * In the level 2 notification case things are more complicated,
215 * as the level that we drop the state and send a notification
216 * should be lower than when it is first triggered. Having this
217 * on the same watermark level ends up bouncing back and forth
218 * when applications are being stupid.
220 notify = free < notify_high_pages;
221 if (notify || free - nr_decay_pages > notify_high_pages)
222 high_watermark_state(notify);
224 /* We have plenty of memory */
225 lowmem_free_pages = free;
229 static struct security_operations lowmem_security_ops = {
230 /* Use the capability functions for some of the hooks */
231 .ptrace = cap_ptrace,
232 .capget = cap_capget,
233 .capset_check = cap_capset_check,
234 .capset_set = cap_capset_set,
235 .capable = cap_capable,
237 .bprm_apply_creds = cap_bprm_apply_creds,
238 .bprm_set_security = cap_bprm_set_security,
240 .task_post_setuid = cap_task_post_setuid,
241 .task_reparent_to_init = cap_task_reparent_to_init,
242 .vm_enough_memory = low_vm_enough_memory,
245 static struct ctl_table_header *lowmem_table_header;
246 /* flag to keep track of how we were registered */
247 static int secondary;
249 static struct attribute *lowmem_attrs[] = {
250 &low_watermark_attr.attr,
251 &high_watermark_attr.attr,
255 static struct attribute_group lowmem_attr_group = {
256 .attrs = lowmem_attrs,
259 static int __init lowmem_init(void)
263 /* register ourselves with the security framework */
264 if (register_security(&lowmem_security_ops)) {
265 printk(KERN_ERR MY_NAME ": Failure registering with the kernel\n");
266 /* try registering with primary module */
267 if (mod_reg_security(MY_NAME, &lowmem_security_ops)) {
268 printk(KERN_ERR ": Failure registering with the primary"
269 "security module.\n");
275 /* initialize the uids vector */
276 memset(allowed_uids, 0, sizeof(allowed_uids));
278 lowmem_table_header = register_sysctl_table(lowmem_root_table);
279 if (unlikely(!lowmem_table_header))
282 kernel_subsys.kset.kobj.kset = &kernel_subsys.kset;
284 r = sysfs_create_group(&kernel_subsys.kset.kobj,
289 printk(KERN_INFO MY_NAME ": Module initialized.\n");
294 static void __exit lowmem_exit(void)
296 /* remove ourselves from the security framework */
298 if (mod_unreg_security(MY_NAME, &lowmem_security_ops))
299 printk(KERN_ERR MY_NAME ": Failure unregistering "
300 "with the primary security module.\n");
302 if (unregister_security(&lowmem_security_ops)) {
303 printk(KERN_ERR MY_NAME ": Failure unregistering "
304 "with the kernel.\n");
308 unregister_sysctl_table(lowmem_table_header);
310 sysfs_remove_group(&kernel_subsys.kset.kobj, &lowmem_attr_group);
312 printk(KERN_INFO MY_NAME ": Module removed.\n");
315 module_init(lowmem_init);
316 module_exit(lowmem_exit);
318 MODULE_DESCRIPTION("Low watermark LSM module");
319 MODULE_LICENSE("GPL");