Pull 1024-cpu into release branch
[pandora-kernel.git] / fs / proc / base.c
1 /*
2  *  linux/fs/proc/base.c
3  *
4  *  Copyright (C) 1991, 1992 Linus Torvalds
5  *
6  *  proc base directory handling functions
7  *
8  *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
9  *  Instead of using magical inumbers to determine the kind of object
10  *  we allocate and fill in-core inodes upon lookup. They don't even
11  *  go into icache. We cache the reference to task_struct upon lookup too.
12  *  Eventually it should become a filesystem in its own. We don't use the
13  *  rest of procfs anymore.
14  *
15  *
16  *  Changelog:
17  *  17-Jan-2005
18  *  Allan Bezerra
19  *  Bruna Moreira <bruna.moreira@indt.org.br>
20  *  Edjard Mota <edjard.mota@indt.org.br>
21  *  Ilias Biris <ilias.biris@indt.org.br>
22  *  Mauricio Lin <mauricio.lin@indt.org.br>
23  *
24  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
25  *
26  *  A new process specific entry (smaps) included in /proc. It shows the
27  *  size of rss for each memory area. The maps entry lacks information
28  *  about physical memory size (rss) for each mapped file, i.e.,
29  *  rss information for executables and library files.
30  *  This additional information is useful for any tools that need to know
31  *  about physical memory consumption for a process specific library.
32  *
33  *  Changelog:
34  *  21-Feb-2005
35  *  Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT
36  *  Pud inclusion in the page table walking.
37  *
38  *  ChangeLog:
39  *  10-Mar-2005
40  *  10LE Instituto Nokia de Tecnologia - INdT:
41  *  A better way to walks through the page table as suggested by Hugh Dickins.
42  *
43  *  Simo Piiroinen <simo.piiroinen@nokia.com>:
44  *  Smaps information related to shared, private, clean and dirty pages.
45  *
46  *  Paul Mundt <paul.mundt@nokia.com>:
47  *  Overall revision about smaps.
48  */
49
50 #include <asm/uaccess.h>
51
52 #include <linux/config.h>
53 #include <linux/errno.h>
54 #include <linux/time.h>
55 #include <linux/proc_fs.h>
56 #include <linux/stat.h>
57 #include <linux/init.h>
58 #include <linux/file.h>
59 #include <linux/string.h>
60 #include <linux/seq_file.h>
61 #include <linux/namei.h>
62 #include <linux/namespace.h>
63 #include <linux/mm.h>
64 #include <linux/smp_lock.h>
65 #include <linux/rcupdate.h>
66 #include <linux/kallsyms.h>
67 #include <linux/mount.h>
68 #include <linux/security.h>
69 #include <linux/ptrace.h>
70 #include <linux/seccomp.h>
71 #include <linux/cpuset.h>
72 #include <linux/audit.h>
73 #include "internal.h"
74
75 /*
76  * For hysterical raisins we keep the same inumbers as in the old procfs.
77  * Feel free to change the macro below - just keep the range distinct from
78  * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
79  * As soon as we'll get a separate superblock we will be able to forget
80  * about magical ranges too.
81  */
82
83 #define fake_ino(pid,ino) (((pid)<<16)|(ino))
84
85 enum pid_directory_inos {
86         PROC_TGID_INO = 2,
87         PROC_TGID_TASK,
88         PROC_TGID_STATUS,
89         PROC_TGID_MEM,
90 #ifdef CONFIG_SECCOMP
91         PROC_TGID_SECCOMP,
92 #endif
93         PROC_TGID_CWD,
94         PROC_TGID_ROOT,
95         PROC_TGID_EXE,
96         PROC_TGID_FD,
97         PROC_TGID_ENVIRON,
98         PROC_TGID_AUXV,
99         PROC_TGID_CMDLINE,
100         PROC_TGID_STAT,
101         PROC_TGID_STATM,
102         PROC_TGID_MAPS,
103         PROC_TGID_NUMA_MAPS,
104         PROC_TGID_MOUNTS,
105         PROC_TGID_WCHAN,
106 #ifdef CONFIG_MMU
107         PROC_TGID_SMAPS,
108 #endif
109 #ifdef CONFIG_SCHEDSTATS
110         PROC_TGID_SCHEDSTAT,
111 #endif
112 #ifdef CONFIG_CPUSETS
113         PROC_TGID_CPUSET,
114 #endif
115 #ifdef CONFIG_SECURITY
116         PROC_TGID_ATTR,
117         PROC_TGID_ATTR_CURRENT,
118         PROC_TGID_ATTR_PREV,
119         PROC_TGID_ATTR_EXEC,
120         PROC_TGID_ATTR_FSCREATE,
121 #endif
122 #ifdef CONFIG_AUDITSYSCALL
123         PROC_TGID_LOGINUID,
124 #endif
125         PROC_TGID_OOM_SCORE,
126         PROC_TGID_OOM_ADJUST,
127         PROC_TID_INO,
128         PROC_TID_STATUS,
129         PROC_TID_MEM,
130 #ifdef CONFIG_SECCOMP
131         PROC_TID_SECCOMP,
132 #endif
133         PROC_TID_CWD,
134         PROC_TID_ROOT,
135         PROC_TID_EXE,
136         PROC_TID_FD,
137         PROC_TID_ENVIRON,
138         PROC_TID_AUXV,
139         PROC_TID_CMDLINE,
140         PROC_TID_STAT,
141         PROC_TID_STATM,
142         PROC_TID_MAPS,
143         PROC_TID_NUMA_MAPS,
144         PROC_TID_MOUNTS,
145         PROC_TID_WCHAN,
146 #ifdef CONFIG_MMU
147         PROC_TID_SMAPS,
148 #endif
149 #ifdef CONFIG_SCHEDSTATS
150         PROC_TID_SCHEDSTAT,
151 #endif
152 #ifdef CONFIG_CPUSETS
153         PROC_TID_CPUSET,
154 #endif
155 #ifdef CONFIG_SECURITY
156         PROC_TID_ATTR,
157         PROC_TID_ATTR_CURRENT,
158         PROC_TID_ATTR_PREV,
159         PROC_TID_ATTR_EXEC,
160         PROC_TID_ATTR_FSCREATE,
161 #endif
162 #ifdef CONFIG_AUDITSYSCALL
163         PROC_TID_LOGINUID,
164 #endif
165         PROC_TID_OOM_SCORE,
166         PROC_TID_OOM_ADJUST,
167
168         /* Add new entries before this */
169         PROC_TID_FD_DIR = 0x8000,       /* 0x8000-0xffff */
170 };
171
172 struct pid_entry {
173         int type;
174         int len;
175         char *name;
176         mode_t mode;
177 };
178
179 #define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
180
181 static struct pid_entry tgid_base_stuff[] = {
182         E(PROC_TGID_TASK,      "task",    S_IFDIR|S_IRUGO|S_IXUGO),
183         E(PROC_TGID_FD,        "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
184         E(PROC_TGID_ENVIRON,   "environ", S_IFREG|S_IRUSR),
185         E(PROC_TGID_AUXV,      "auxv",    S_IFREG|S_IRUSR),
186         E(PROC_TGID_STATUS,    "status",  S_IFREG|S_IRUGO),
187         E(PROC_TGID_CMDLINE,   "cmdline", S_IFREG|S_IRUGO),
188         E(PROC_TGID_STAT,      "stat",    S_IFREG|S_IRUGO),
189         E(PROC_TGID_STATM,     "statm",   S_IFREG|S_IRUGO),
190         E(PROC_TGID_MAPS,      "maps",    S_IFREG|S_IRUGO),
191 #ifdef CONFIG_NUMA
192         E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO),
193 #endif
194         E(PROC_TGID_MEM,       "mem",     S_IFREG|S_IRUSR|S_IWUSR),
195 #ifdef CONFIG_SECCOMP
196         E(PROC_TGID_SECCOMP,   "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
197 #endif
198         E(PROC_TGID_CWD,       "cwd",     S_IFLNK|S_IRWXUGO),
199         E(PROC_TGID_ROOT,      "root",    S_IFLNK|S_IRWXUGO),
200         E(PROC_TGID_EXE,       "exe",     S_IFLNK|S_IRWXUGO),
201         E(PROC_TGID_MOUNTS,    "mounts",  S_IFREG|S_IRUGO),
202 #ifdef CONFIG_MMU
203         E(PROC_TGID_SMAPS,     "smaps",   S_IFREG|S_IRUGO),
204 #endif
205 #ifdef CONFIG_SECURITY
206         E(PROC_TGID_ATTR,      "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
207 #endif
208 #ifdef CONFIG_KALLSYMS
209         E(PROC_TGID_WCHAN,     "wchan",   S_IFREG|S_IRUGO),
210 #endif
211 #ifdef CONFIG_SCHEDSTATS
212         E(PROC_TGID_SCHEDSTAT, "schedstat", S_IFREG|S_IRUGO),
213 #endif
214 #ifdef CONFIG_CPUSETS
215         E(PROC_TGID_CPUSET,    "cpuset",  S_IFREG|S_IRUGO),
216 #endif
217         E(PROC_TGID_OOM_SCORE, "oom_score",S_IFREG|S_IRUGO),
218         E(PROC_TGID_OOM_ADJUST,"oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
219 #ifdef CONFIG_AUDITSYSCALL
220         E(PROC_TGID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
221 #endif
222         {0,0,NULL,0}
223 };
224 static struct pid_entry tid_base_stuff[] = {
225         E(PROC_TID_FD,         "fd",      S_IFDIR|S_IRUSR|S_IXUSR),
226         E(PROC_TID_ENVIRON,    "environ", S_IFREG|S_IRUSR),
227         E(PROC_TID_AUXV,       "auxv",    S_IFREG|S_IRUSR),
228         E(PROC_TID_STATUS,     "status",  S_IFREG|S_IRUGO),
229         E(PROC_TID_CMDLINE,    "cmdline", S_IFREG|S_IRUGO),
230         E(PROC_TID_STAT,       "stat",    S_IFREG|S_IRUGO),
231         E(PROC_TID_STATM,      "statm",   S_IFREG|S_IRUGO),
232         E(PROC_TID_MAPS,       "maps",    S_IFREG|S_IRUGO),
233 #ifdef CONFIG_NUMA
234         E(PROC_TID_NUMA_MAPS,  "numa_maps",    S_IFREG|S_IRUGO),
235 #endif
236         E(PROC_TID_MEM,        "mem",     S_IFREG|S_IRUSR|S_IWUSR),
237 #ifdef CONFIG_SECCOMP
238         E(PROC_TID_SECCOMP,    "seccomp", S_IFREG|S_IRUSR|S_IWUSR),
239 #endif
240         E(PROC_TID_CWD,        "cwd",     S_IFLNK|S_IRWXUGO),
241         E(PROC_TID_ROOT,       "root",    S_IFLNK|S_IRWXUGO),
242         E(PROC_TID_EXE,        "exe",     S_IFLNK|S_IRWXUGO),
243         E(PROC_TID_MOUNTS,     "mounts",  S_IFREG|S_IRUGO),
244 #ifdef CONFIG_MMU
245         E(PROC_TID_SMAPS,      "smaps",   S_IFREG|S_IRUGO),
246 #endif
247 #ifdef CONFIG_SECURITY
248         E(PROC_TID_ATTR,       "attr",    S_IFDIR|S_IRUGO|S_IXUGO),
249 #endif
250 #ifdef CONFIG_KALLSYMS
251         E(PROC_TID_WCHAN,      "wchan",   S_IFREG|S_IRUGO),
252 #endif
253 #ifdef CONFIG_SCHEDSTATS
254         E(PROC_TID_SCHEDSTAT, "schedstat",S_IFREG|S_IRUGO),
255 #endif
256 #ifdef CONFIG_CPUSETS
257         E(PROC_TID_CPUSET,     "cpuset",  S_IFREG|S_IRUGO),
258 #endif
259         E(PROC_TID_OOM_SCORE,  "oom_score",S_IFREG|S_IRUGO),
260         E(PROC_TID_OOM_ADJUST, "oom_adj", S_IFREG|S_IRUGO|S_IWUSR),
261 #ifdef CONFIG_AUDITSYSCALL
262         E(PROC_TID_LOGINUID, "loginuid", S_IFREG|S_IWUSR|S_IRUGO),
263 #endif
264         {0,0,NULL,0}
265 };
266
267 #ifdef CONFIG_SECURITY
268 static struct pid_entry tgid_attr_stuff[] = {
269         E(PROC_TGID_ATTR_CURRENT,  "current",  S_IFREG|S_IRUGO|S_IWUGO),
270         E(PROC_TGID_ATTR_PREV,     "prev",     S_IFREG|S_IRUGO),
271         E(PROC_TGID_ATTR_EXEC,     "exec",     S_IFREG|S_IRUGO|S_IWUGO),
272         E(PROC_TGID_ATTR_FSCREATE, "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
273         {0,0,NULL,0}
274 };
275 static struct pid_entry tid_attr_stuff[] = {
276         E(PROC_TID_ATTR_CURRENT,   "current",  S_IFREG|S_IRUGO|S_IWUGO),
277         E(PROC_TID_ATTR_PREV,      "prev",     S_IFREG|S_IRUGO),
278         E(PROC_TID_ATTR_EXEC,      "exec",     S_IFREG|S_IRUGO|S_IWUGO),
279         E(PROC_TID_ATTR_FSCREATE,  "fscreate", S_IFREG|S_IRUGO|S_IWUGO),
280         {0,0,NULL,0}
281 };
282 #endif
283
284 #undef E
285
286 static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
287 {
288         struct task_struct *task = proc_task(inode);
289         struct files_struct *files;
290         struct file *file;
291         int fd = proc_type(inode) - PROC_TID_FD_DIR;
292
293         files = get_files_struct(task);
294         if (files) {
295                 rcu_read_lock();
296                 file = fcheck_files(files, fd);
297                 if (file) {
298                         *mnt = mntget(file->f_vfsmnt);
299                         *dentry = dget(file->f_dentry);
300                         rcu_read_unlock();
301                         put_files_struct(files);
302                         return 0;
303                 }
304                 rcu_read_unlock();
305                 put_files_struct(files);
306         }
307         return -ENOENT;
308 }
309
310 static struct fs_struct *get_fs_struct(struct task_struct *task)
311 {
312         struct fs_struct *fs;
313         task_lock(task);
314         fs = task->fs;
315         if(fs)
316                 atomic_inc(&fs->count);
317         task_unlock(task);
318         return fs;
319 }
320
321 static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
322 {
323         struct fs_struct *fs = get_fs_struct(proc_task(inode));
324         int result = -ENOENT;
325         if (fs) {
326                 read_lock(&fs->lock);
327                 *mnt = mntget(fs->pwdmnt);
328                 *dentry = dget(fs->pwd);
329                 read_unlock(&fs->lock);
330                 result = 0;
331                 put_fs_struct(fs);
332         }
333         return result;
334 }
335
336 static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
337 {
338         struct fs_struct *fs = get_fs_struct(proc_task(inode));
339         int result = -ENOENT;
340         if (fs) {
341                 read_lock(&fs->lock);
342                 *mnt = mntget(fs->rootmnt);
343                 *dentry = dget(fs->root);
344                 read_unlock(&fs->lock);
345                 result = 0;
346                 put_fs_struct(fs);
347         }
348         return result;
349 }
350
351
352 /* Same as proc_root_link, but this addionally tries to get fs from other
353  * threads in the group */
354 static int proc_task_root_link(struct inode *inode, struct dentry **dentry,
355                                 struct vfsmount **mnt)
356 {
357         struct fs_struct *fs;
358         int result = -ENOENT;
359         struct task_struct *leader = proc_task(inode);
360
361         task_lock(leader);
362         fs = leader->fs;
363         if (fs) {
364                 atomic_inc(&fs->count);
365                 task_unlock(leader);
366         } else {
367                 /* Try to get fs from other threads */
368                 task_unlock(leader);
369                 read_lock(&tasklist_lock);
370                 if (pid_alive(leader)) {
371                         struct task_struct *task = leader;
372
373                         while ((task = next_thread(task)) != leader) {
374                                 task_lock(task);
375                                 fs = task->fs;
376                                 if (fs) {
377                                         atomic_inc(&fs->count);
378                                         task_unlock(task);
379                                         break;
380                                 }
381                                 task_unlock(task);
382                         }
383                 }
384                 read_unlock(&tasklist_lock);
385         }
386
387         if (fs) {
388                 read_lock(&fs->lock);
389                 *mnt = mntget(fs->rootmnt);
390                 *dentry = dget(fs->root);
391                 read_unlock(&fs->lock);
392                 result = 0;
393                 put_fs_struct(fs);
394         }
395         return result;
396 }
397
398
399 #define MAY_PTRACE(task) \
400         (task == current || \
401         (task->parent == current && \
402         (task->ptrace & PT_PTRACED) && \
403          (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \
404          security_ptrace(current,task) == 0))
405
406 static int proc_pid_environ(struct task_struct *task, char * buffer)
407 {
408         int res = 0;
409         struct mm_struct *mm = get_task_mm(task);
410         if (mm) {
411                 unsigned int len = mm->env_end - mm->env_start;
412                 if (len > PAGE_SIZE)
413                         len = PAGE_SIZE;
414                 res = access_process_vm(task, mm->env_start, buffer, len, 0);
415                 if (!ptrace_may_attach(task))
416                         res = -ESRCH;
417                 mmput(mm);
418         }
419         return res;
420 }
421
422 static int proc_pid_cmdline(struct task_struct *task, char * buffer)
423 {
424         int res = 0;
425         unsigned int len;
426         struct mm_struct *mm = get_task_mm(task);
427         if (!mm)
428                 goto out;
429         if (!mm->arg_end)
430                 goto out_mm;    /* Shh! No looking before we're done */
431
432         len = mm->arg_end - mm->arg_start;
433  
434         if (len > PAGE_SIZE)
435                 len = PAGE_SIZE;
436  
437         res = access_process_vm(task, mm->arg_start, buffer, len, 0);
438
439         // If the nul at the end of args has been overwritten, then
440         // assume application is using setproctitle(3).
441         if (res > 0 && buffer[res-1] != '\0' && len < PAGE_SIZE) {
442                 len = strnlen(buffer, res);
443                 if (len < res) {
444                     res = len;
445                 } else {
446                         len = mm->env_end - mm->env_start;
447                         if (len > PAGE_SIZE - res)
448                                 len = PAGE_SIZE - res;
449                         res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
450                         res = strnlen(buffer, res);
451                 }
452         }
453 out_mm:
454         mmput(mm);
455 out:
456         return res;
457 }
458
459 static int proc_pid_auxv(struct task_struct *task, char *buffer)
460 {
461         int res = 0;
462         struct mm_struct *mm = get_task_mm(task);
463         if (mm) {
464                 unsigned int nwords = 0;
465                 do
466                         nwords += 2;
467                 while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */
468                 res = nwords * sizeof(mm->saved_auxv[0]);
469                 if (res > PAGE_SIZE)
470                         res = PAGE_SIZE;
471                 memcpy(buffer, mm->saved_auxv, res);
472                 mmput(mm);
473         }
474         return res;
475 }
476
477
478 #ifdef CONFIG_KALLSYMS
479 /*
480  * Provides a wchan file via kallsyms in a proper one-value-per-file format.
481  * Returns the resolved symbol.  If that fails, simply return the address.
482  */
483 static int proc_pid_wchan(struct task_struct *task, char *buffer)
484 {
485         char *modname;
486         const char *sym_name;
487         unsigned long wchan, size, offset;
488         char namebuf[KSYM_NAME_LEN+1];
489
490         wchan = get_wchan(task);
491
492         sym_name = kallsyms_lookup(wchan, &size, &offset, &modname, namebuf);
493         if (sym_name)
494                 return sprintf(buffer, "%s", sym_name);
495         return sprintf(buffer, "%lu", wchan);
496 }
497 #endif /* CONFIG_KALLSYMS */
498
499 #ifdef CONFIG_SCHEDSTATS
500 /*
501  * Provides /proc/PID/schedstat
502  */
503 static int proc_pid_schedstat(struct task_struct *task, char *buffer)
504 {
505         return sprintf(buffer, "%lu %lu %lu\n",
506                         task->sched_info.cpu_time,
507                         task->sched_info.run_delay,
508                         task->sched_info.pcnt);
509 }
510 #endif
511
512 /* The badness from the OOM killer */
513 unsigned long badness(struct task_struct *p, unsigned long uptime);
514 static int proc_oom_score(struct task_struct *task, char *buffer)
515 {
516         unsigned long points;
517         struct timespec uptime;
518
519         do_posix_clock_monotonic_gettime(&uptime);
520         points = badness(task, uptime.tv_sec);
521         return sprintf(buffer, "%lu\n", points);
522 }
523
524 /************************************************************************/
525 /*                       Here the fs part begins                        */
526 /************************************************************************/
527
528 /* permission checks */
529
530 /* If the process being read is separated by chroot from the reading process,
531  * don't let the reader access the threads.
532  */
533 static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt)
534 {
535         struct dentry *de, *base;
536         struct vfsmount *our_vfsmnt, *mnt;
537         int res = 0;
538         read_lock(&current->fs->lock);
539         our_vfsmnt = mntget(current->fs->rootmnt);
540         base = dget(current->fs->root);
541         read_unlock(&current->fs->lock);
542
543         spin_lock(&vfsmount_lock);
544         de = root;
545         mnt = vfsmnt;
546
547         while (vfsmnt != our_vfsmnt) {
548                 if (vfsmnt == vfsmnt->mnt_parent)
549                         goto out;
550                 de = vfsmnt->mnt_mountpoint;
551                 vfsmnt = vfsmnt->mnt_parent;
552         }
553
554         if (!is_subdir(de, base))
555                 goto out;
556         spin_unlock(&vfsmount_lock);
557
558 exit:
559         dput(base);
560         mntput(our_vfsmnt);
561         dput(root);
562         mntput(mnt);
563         return res;
564 out:
565         spin_unlock(&vfsmount_lock);
566         res = -EACCES;
567         goto exit;
568 }
569
570 static int proc_check_root(struct inode *inode)
571 {
572         struct dentry *root;
573         struct vfsmount *vfsmnt;
574
575         if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
576                 return -ENOENT;
577         return proc_check_chroot(root, vfsmnt);
578 }
579
580 static int proc_permission(struct inode *inode, int mask, struct nameidata *nd)
581 {
582         if (generic_permission(inode, mask, NULL) != 0)
583                 return -EACCES;
584         return proc_check_root(inode);
585 }
586
587 static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd)
588 {
589         struct dentry *root;
590         struct vfsmount *vfsmnt;
591
592         if (generic_permission(inode, mask, NULL) != 0)
593                 return -EACCES;
594
595         if (proc_task_root_link(inode, &root, &vfsmnt))
596                 return -ENOENT;
597
598         return proc_check_chroot(root, vfsmnt);
599 }
600
601 extern struct seq_operations proc_pid_maps_op;
602 static int maps_open(struct inode *inode, struct file *file)
603 {
604         struct task_struct *task = proc_task(inode);
605         int ret = seq_open(file, &proc_pid_maps_op);
606         if (!ret) {
607                 struct seq_file *m = file->private_data;
608                 m->private = task;
609         }
610         return ret;
611 }
612
613 static struct file_operations proc_maps_operations = {
614         .open           = maps_open,
615         .read           = seq_read,
616         .llseek         = seq_lseek,
617         .release        = seq_release,
618 };
619
620 #ifdef CONFIG_NUMA
621 extern struct seq_operations proc_pid_numa_maps_op;
622 static int numa_maps_open(struct inode *inode, struct file *file)
623 {
624         struct task_struct *task = proc_task(inode);
625         int ret = seq_open(file, &proc_pid_numa_maps_op);
626         if (!ret) {
627                 struct seq_file *m = file->private_data;
628                 m->private = task;
629         }
630         return ret;
631 }
632
633 static struct file_operations proc_numa_maps_operations = {
634         .open           = numa_maps_open,
635         .read           = seq_read,
636         .llseek         = seq_lseek,
637         .release        = seq_release,
638 };
639 #endif
640
641 #ifdef CONFIG_MMU
642 extern struct seq_operations proc_pid_smaps_op;
643 static int smaps_open(struct inode *inode, struct file *file)
644 {
645         struct task_struct *task = proc_task(inode);
646         int ret = seq_open(file, &proc_pid_smaps_op);
647         if (!ret) {
648                 struct seq_file *m = file->private_data;
649                 m->private = task;
650         }
651         return ret;
652 }
653
654 static struct file_operations proc_smaps_operations = {
655         .open           = smaps_open,
656         .read           = seq_read,
657         .llseek         = seq_lseek,
658         .release        = seq_release,
659 };
660 #endif
661
662 extern struct seq_operations mounts_op;
663 static int mounts_open(struct inode *inode, struct file *file)
664 {
665         struct task_struct *task = proc_task(inode);
666         int ret = seq_open(file, &mounts_op);
667
668         if (!ret) {
669                 struct seq_file *m = file->private_data;
670                 struct namespace *namespace;
671                 task_lock(task);
672                 namespace = task->namespace;
673                 if (namespace)
674                         get_namespace(namespace);
675                 task_unlock(task);
676
677                 if (namespace)
678                         m->private = namespace;
679                 else {
680                         seq_release(inode, file);
681                         ret = -EINVAL;
682                 }
683         }
684         return ret;
685 }
686
687 static int mounts_release(struct inode *inode, struct file *file)
688 {
689         struct seq_file *m = file->private_data;
690         struct namespace *namespace = m->private;
691         put_namespace(namespace);
692         return seq_release(inode, file);
693 }
694
695 static struct file_operations proc_mounts_operations = {
696         .open           = mounts_open,
697         .read           = seq_read,
698         .llseek         = seq_lseek,
699         .release        = mounts_release,
700 };
701
702 #define PROC_BLOCK_SIZE (3*1024)                /* 4K page size but our output routines use some slack for overruns */
703
704 static ssize_t proc_info_read(struct file * file, char __user * buf,
705                           size_t count, loff_t *ppos)
706 {
707         struct inode * inode = file->f_dentry->d_inode;
708         unsigned long page;
709         ssize_t length;
710         struct task_struct *task = proc_task(inode);
711
712         if (count > PROC_BLOCK_SIZE)
713                 count = PROC_BLOCK_SIZE;
714         if (!(page = __get_free_page(GFP_KERNEL)))
715                 return -ENOMEM;
716
717         length = PROC_I(inode)->op.proc_read(task, (char*)page);
718
719         if (length >= 0)
720                 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
721         free_page(page);
722         return length;
723 }
724
725 static struct file_operations proc_info_file_operations = {
726         .read           = proc_info_read,
727 };
728
729 static int mem_open(struct inode* inode, struct file* file)
730 {
731         file->private_data = (void*)((long)current->self_exec_id);
732         return 0;
733 }
734
735 static ssize_t mem_read(struct file * file, char __user * buf,
736                         size_t count, loff_t *ppos)
737 {
738         struct task_struct *task = proc_task(file->f_dentry->d_inode);
739         char *page;
740         unsigned long src = *ppos;
741         int ret = -ESRCH;
742         struct mm_struct *mm;
743
744         if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
745                 goto out;
746
747         ret = -ENOMEM;
748         page = (char *)__get_free_page(GFP_USER);
749         if (!page)
750                 goto out;
751
752         ret = 0;
753  
754         mm = get_task_mm(task);
755         if (!mm)
756                 goto out_free;
757
758         ret = -EIO;
759  
760         if (file->private_data != (void*)((long)current->self_exec_id))
761                 goto out_put;
762
763         ret = 0;
764  
765         while (count > 0) {
766                 int this_len, retval;
767
768                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
769                 retval = access_process_vm(task, src, page, this_len, 0);
770                 if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) {
771                         if (!ret)
772                                 ret = -EIO;
773                         break;
774                 }
775
776                 if (copy_to_user(buf, page, retval)) {
777                         ret = -EFAULT;
778                         break;
779                 }
780  
781                 ret += retval;
782                 src += retval;
783                 buf += retval;
784                 count -= retval;
785         }
786         *ppos = src;
787
788 out_put:
789         mmput(mm);
790 out_free:
791         free_page((unsigned long) page);
792 out:
793         return ret;
794 }
795
796 #define mem_write NULL
797
798 #ifndef mem_write
799 /* This is a security hazard */
800 static ssize_t mem_write(struct file * file, const char * buf,
801                          size_t count, loff_t *ppos)
802 {
803         int copied = 0;
804         char *page;
805         struct task_struct *task = proc_task(file->f_dentry->d_inode);
806         unsigned long dst = *ppos;
807
808         if (!MAY_PTRACE(task) || !ptrace_may_attach(task))
809                 return -ESRCH;
810
811         page = (char *)__get_free_page(GFP_USER);
812         if (!page)
813                 return -ENOMEM;
814
815         while (count > 0) {
816                 int this_len, retval;
817
818                 this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
819                 if (copy_from_user(page, buf, this_len)) {
820                         copied = -EFAULT;
821                         break;
822                 }
823                 retval = access_process_vm(task, dst, page, this_len, 1);
824                 if (!retval) {
825                         if (!copied)
826                                 copied = -EIO;
827                         break;
828                 }
829                 copied += retval;
830                 buf += retval;
831                 dst += retval;
832                 count -= retval;                        
833         }
834         *ppos = dst;
835         free_page((unsigned long) page);
836         return copied;
837 }
838 #endif
839
840 static loff_t mem_lseek(struct file * file, loff_t offset, int orig)
841 {
842         switch (orig) {
843         case 0:
844                 file->f_pos = offset;
845                 break;
846         case 1:
847                 file->f_pos += offset;
848                 break;
849         default:
850                 return -EINVAL;
851         }
852         force_successful_syscall_return();
853         return file->f_pos;
854 }
855
856 static struct file_operations proc_mem_operations = {
857         .llseek         = mem_lseek,
858         .read           = mem_read,
859         .write          = mem_write,
860         .open           = mem_open,
861 };
862
863 static ssize_t oom_adjust_read(struct file *file, char __user *buf,
864                                 size_t count, loff_t *ppos)
865 {
866         struct task_struct *task = proc_task(file->f_dentry->d_inode);
867         char buffer[8];
868         size_t len;
869         int oom_adjust = task->oomkilladj;
870         loff_t __ppos = *ppos;
871
872         len = sprintf(buffer, "%i\n", oom_adjust);
873         if (__ppos >= len)
874                 return 0;
875         if (count > len-__ppos)
876                 count = len-__ppos;
877         if (copy_to_user(buf, buffer + __ppos, count))
878                 return -EFAULT;
879         *ppos = __ppos + count;
880         return count;
881 }
882
883 static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
884                                 size_t count, loff_t *ppos)
885 {
886         struct task_struct *task = proc_task(file->f_dentry->d_inode);
887         char buffer[8], *end;
888         int oom_adjust;
889
890         if (!capable(CAP_SYS_RESOURCE))
891                 return -EPERM;
892         memset(buffer, 0, 8);
893         if (count > 6)
894                 count = 6;
895         if (copy_from_user(buffer, buf, count))
896                 return -EFAULT;
897         oom_adjust = simple_strtol(buffer, &end, 0);
898         if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE)
899                 return -EINVAL;
900         if (*end == '\n')
901                 end++;
902         task->oomkilladj = oom_adjust;
903         if (end - buffer == 0)
904                 return -EIO;
905         return end - buffer;
906 }
907
908 static struct file_operations proc_oom_adjust_operations = {
909         .read           = oom_adjust_read,
910         .write          = oom_adjust_write,
911 };
912
913 static struct inode_operations proc_mem_inode_operations = {
914         .permission     = proc_permission,
915 };
916
917 #ifdef CONFIG_AUDITSYSCALL
918 #define TMPBUFLEN 21
919 static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
920                                   size_t count, loff_t *ppos)
921 {
922         struct inode * inode = file->f_dentry->d_inode;
923         struct task_struct *task = proc_task(inode);
924         ssize_t length;
925         char tmpbuf[TMPBUFLEN];
926
927         length = scnprintf(tmpbuf, TMPBUFLEN, "%u",
928                                 audit_get_loginuid(task->audit_context));
929         return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
930 }
931
932 static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
933                                    size_t count, loff_t *ppos)
934 {
935         struct inode * inode = file->f_dentry->d_inode;
936         char *page, *tmp;
937         ssize_t length;
938         struct task_struct *task = proc_task(inode);
939         uid_t loginuid;
940
941         if (!capable(CAP_AUDIT_CONTROL))
942                 return -EPERM;
943
944         if (current != task)
945                 return -EPERM;
946
947         if (count > PAGE_SIZE)
948                 count = PAGE_SIZE;
949
950         if (*ppos != 0) {
951                 /* No partial writes. */
952                 return -EINVAL;
953         }
954         page = (char*)__get_free_page(GFP_USER);
955         if (!page)
956                 return -ENOMEM;
957         length = -EFAULT;
958         if (copy_from_user(page, buf, count))
959                 goto out_free_page;
960
961         loginuid = simple_strtoul(page, &tmp, 10);
962         if (tmp == page) {
963                 length = -EINVAL;
964                 goto out_free_page;
965
966         }
967         length = audit_set_loginuid(task, loginuid);
968         if (likely(length == 0))
969                 length = count;
970
971 out_free_page:
972         free_page((unsigned long) page);
973         return length;
974 }
975
976 static struct file_operations proc_loginuid_operations = {
977         .read           = proc_loginuid_read,
978         .write          = proc_loginuid_write,
979 };
980 #endif
981
982 #ifdef CONFIG_SECCOMP
983 static ssize_t seccomp_read(struct file *file, char __user *buf,
984                             size_t count, loff_t *ppos)
985 {
986         struct task_struct *tsk = proc_task(file->f_dentry->d_inode);
987         char __buf[20];
988         loff_t __ppos = *ppos;
989         size_t len;
990
991         /* no need to print the trailing zero, so use only len */
992         len = sprintf(__buf, "%u\n", tsk->seccomp.mode);
993         if (__ppos >= len)
994                 return 0;
995         if (count > len - __ppos)
996                 count = len - __ppos;
997         if (copy_to_user(buf, __buf + __ppos, count))
998                 return -EFAULT;
999         *ppos = __ppos + count;
1000         return count;
1001 }
1002
1003 static ssize_t seccomp_write(struct file *file, const char __user *buf,
1004                              size_t count, loff_t *ppos)
1005 {
1006         struct task_struct *tsk = proc_task(file->f_dentry->d_inode);
1007         char __buf[20], *end;
1008         unsigned int seccomp_mode;
1009
1010         /* can set it only once to be even more secure */
1011         if (unlikely(tsk->seccomp.mode))
1012                 return -EPERM;
1013
1014         memset(__buf, 0, sizeof(__buf));
1015         count = min(count, sizeof(__buf) - 1);
1016         if (copy_from_user(__buf, buf, count))
1017                 return -EFAULT;
1018         seccomp_mode = simple_strtoul(__buf, &end, 0);
1019         if (*end == '\n')
1020                 end++;
1021         if (seccomp_mode && seccomp_mode <= NR_SECCOMP_MODES) {
1022                 tsk->seccomp.mode = seccomp_mode;
1023                 set_tsk_thread_flag(tsk, TIF_SECCOMP);
1024         } else
1025                 return -EINVAL;
1026         if (unlikely(!(end - __buf)))
1027                 return -EIO;
1028         return end - __buf;
1029 }
1030
1031 static struct file_operations proc_seccomp_operations = {
1032         .read           = seccomp_read,
1033         .write          = seccomp_write,
1034 };
1035 #endif /* CONFIG_SECCOMP */
1036
1037 static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
1038 {
1039         struct inode *inode = dentry->d_inode;
1040         int error = -EACCES;
1041
1042         /* We don't need a base pointer in the /proc filesystem */
1043         path_release(nd);
1044
1045         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
1046                 goto out;
1047         error = proc_check_root(inode);
1048         if (error)
1049                 goto out;
1050
1051         error = PROC_I(inode)->op.proc_get_link(inode, &nd->dentry, &nd->mnt);
1052         nd->last_type = LAST_BIND;
1053 out:
1054         return ERR_PTR(error);
1055 }
1056
1057 static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
1058                             char __user *buffer, int buflen)
1059 {
1060         struct inode * inode;
1061         char *tmp = (char*)__get_free_page(GFP_KERNEL), *path;
1062         int len;
1063
1064         if (!tmp)
1065                 return -ENOMEM;
1066                 
1067         inode = dentry->d_inode;
1068         path = d_path(dentry, mnt, tmp, PAGE_SIZE);
1069         len = PTR_ERR(path);
1070         if (IS_ERR(path))
1071                 goto out;
1072         len = tmp + PAGE_SIZE - 1 - path;
1073
1074         if (len > buflen)
1075                 len = buflen;
1076         if (copy_to_user(buffer, path, len))
1077                 len = -EFAULT;
1078  out:
1079         free_page((unsigned long)tmp);
1080         return len;
1081 }
1082
1083 static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
1084 {
1085         int error = -EACCES;
1086         struct inode *inode = dentry->d_inode;
1087         struct dentry *de;
1088         struct vfsmount *mnt = NULL;
1089
1090         lock_kernel();
1091
1092         if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
1093                 goto out;
1094         error = proc_check_root(inode);
1095         if (error)
1096                 goto out;
1097
1098         error = PROC_I(inode)->op.proc_get_link(inode, &de, &mnt);
1099         if (error)
1100                 goto out;
1101
1102         error = do_proc_readlink(de, mnt, buffer, buflen);
1103         dput(de);
1104         mntput(mnt);
1105 out:
1106         unlock_kernel();
1107         return error;
1108 }
1109
1110 static struct inode_operations proc_pid_link_inode_operations = {
1111         .readlink       = proc_pid_readlink,
1112         .follow_link    = proc_pid_follow_link
1113 };
1114
1115 #define NUMBUF 10
1116
1117 static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
1118 {
1119         struct inode *inode = filp->f_dentry->d_inode;
1120         struct task_struct *p = proc_task(inode);
1121         unsigned int fd, tid, ino;
1122         int retval;
1123         char buf[NUMBUF];
1124         struct files_struct * files;
1125         struct fdtable *fdt;
1126
1127         retval = -ENOENT;
1128         if (!pid_alive(p))
1129                 goto out;
1130         retval = 0;
1131         tid = p->pid;
1132
1133         fd = filp->f_pos;
1134         switch (fd) {
1135                 case 0:
1136                         if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
1137                                 goto out;
1138                         filp->f_pos++;
1139                 case 1:
1140                         ino = fake_ino(tid, PROC_TID_INO);
1141                         if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
1142                                 goto out;
1143                         filp->f_pos++;
1144                 default:
1145                         files = get_files_struct(p);
1146                         if (!files)
1147                                 goto out;
1148                         rcu_read_lock();
1149                         fdt = files_fdtable(files);
1150                         for (fd = filp->f_pos-2;
1151                              fd < fdt->max_fds;
1152                              fd++, filp->f_pos++) {
1153                                 unsigned int i,j;
1154
1155                                 if (!fcheck_files(files, fd))
1156                                         continue;
1157                                 rcu_read_unlock();
1158
1159                                 j = NUMBUF;
1160                                 i = fd;
1161                                 do {
1162                                         j--;
1163                                         buf[j] = '0' + (i % 10);
1164                                         i /= 10;
1165                                 } while (i);
1166
1167                                 ino = fake_ino(tid, PROC_TID_FD_DIR + fd);
1168                                 if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0) {
1169                                         rcu_read_lock();
1170                                         break;
1171                                 }
1172                                 rcu_read_lock();
1173                         }
1174                         rcu_read_unlock();
1175                         put_files_struct(files);
1176         }
1177 out:
1178         return retval;
1179 }
1180
1181 static int proc_pident_readdir(struct file *filp,
1182                 void *dirent, filldir_t filldir,
1183                 struct pid_entry *ents, unsigned int nents)
1184 {
1185         int i;
1186         int pid;
1187         struct dentry *dentry = filp->f_dentry;
1188         struct inode *inode = dentry->d_inode;
1189         struct pid_entry *p;
1190         ino_t ino;
1191         int ret;
1192
1193         ret = -ENOENT;
1194         if (!pid_alive(proc_task(inode)))
1195                 goto out;
1196
1197         ret = 0;
1198         pid = proc_task(inode)->pid;
1199         i = filp->f_pos;
1200         switch (i) {
1201         case 0:
1202                 ino = inode->i_ino;
1203                 if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
1204                         goto out;
1205                 i++;
1206                 filp->f_pos++;
1207                 /* fall through */
1208         case 1:
1209                 ino = parent_ino(dentry);
1210                 if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
1211                         goto out;
1212                 i++;
1213                 filp->f_pos++;
1214                 /* fall through */
1215         default:
1216                 i -= 2;
1217                 if (i >= nents) {
1218                         ret = 1;
1219                         goto out;
1220                 }
1221                 p = ents + i;
1222                 while (p->name) {
1223                         if (filldir(dirent, p->name, p->len, filp->f_pos,
1224                                     fake_ino(pid, p->type), p->mode >> 12) < 0)
1225                                 goto out;
1226                         filp->f_pos++;
1227                         p++;
1228                 }
1229         }
1230
1231         ret = 1;
1232 out:
1233         return ret;
1234 }
1235
1236 static int proc_tgid_base_readdir(struct file * filp,
1237                              void * dirent, filldir_t filldir)
1238 {
1239         return proc_pident_readdir(filp,dirent,filldir,
1240                                    tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
1241 }
1242
1243 static int proc_tid_base_readdir(struct file * filp,
1244                              void * dirent, filldir_t filldir)
1245 {
1246         return proc_pident_readdir(filp,dirent,filldir,
1247                                    tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
1248 }
1249
1250 /* building an inode */
1251
1252 static int task_dumpable(struct task_struct *task)
1253 {
1254         int dumpable = 0;
1255         struct mm_struct *mm;
1256
1257         task_lock(task);
1258         mm = task->mm;
1259         if (mm)
1260                 dumpable = mm->dumpable;
1261         task_unlock(task);
1262         if(dumpable == 1)
1263                 return 1;
1264         return 0;
1265 }
1266
1267
1268 static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
1269 {
1270         struct inode * inode;
1271         struct proc_inode *ei;
1272
1273         /* We need a new inode */
1274         
1275         inode = new_inode(sb);
1276         if (!inode)
1277                 goto out;
1278
1279         /* Common stuff */
1280         ei = PROC_I(inode);
1281         ei->task = NULL;
1282         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1283         inode->i_ino = fake_ino(task->pid, ino);
1284
1285         if (!pid_alive(task))
1286                 goto out_unlock;
1287
1288         /*
1289          * grab the reference to task.
1290          */
1291         get_task_struct(task);
1292         ei->task = task;
1293         ei->type = ino;
1294         inode->i_uid = 0;
1295         inode->i_gid = 0;
1296         if (ino == PROC_TGID_INO || ino == PROC_TID_INO || task_dumpable(task)) {
1297                 inode->i_uid = task->euid;
1298                 inode->i_gid = task->egid;
1299         }
1300         security_task_to_inode(task, inode);
1301
1302 out:
1303         return inode;
1304
1305 out_unlock:
1306         ei->pde = NULL;
1307         iput(inode);
1308         return NULL;
1309 }
1310
1311 /* dentry stuff */
1312
1313 /*
1314  *      Exceptional case: normally we are not allowed to unhash a busy
1315  * directory. In this case, however, we can do it - no aliasing problems
1316  * due to the way we treat inodes.
1317  *
1318  * Rewrite the inode's ownerships here because the owning task may have
1319  * performed a setuid(), etc.
1320  */
1321 static int pid_revalidate(struct dentry *dentry, struct nameidata *nd)
1322 {
1323         struct inode *inode = dentry->d_inode;
1324         struct task_struct *task = proc_task(inode);
1325         if (pid_alive(task)) {
1326                 if (proc_type(inode) == PROC_TGID_INO || proc_type(inode) == PROC_TID_INO || task_dumpable(task)) {
1327                         inode->i_uid = task->euid;
1328                         inode->i_gid = task->egid;
1329                 } else {
1330                         inode->i_uid = 0;
1331                         inode->i_gid = 0;
1332                 }
1333                 security_task_to_inode(task, inode);
1334                 return 1;
1335         }
1336         d_drop(dentry);
1337         return 0;
1338 }
1339
1340 static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
1341 {
1342         struct inode *inode = dentry->d_inode;
1343         struct task_struct *task = proc_task(inode);
1344         int fd = proc_type(inode) - PROC_TID_FD_DIR;
1345         struct files_struct *files;
1346
1347         files = get_files_struct(task);
1348         if (files) {
1349                 rcu_read_lock();
1350                 if (fcheck_files(files, fd)) {
1351                         rcu_read_unlock();
1352                         put_files_struct(files);
1353                         if (task_dumpable(task)) {
1354                                 inode->i_uid = task->euid;
1355                                 inode->i_gid = task->egid;
1356                         } else {
1357                                 inode->i_uid = 0;
1358                                 inode->i_gid = 0;
1359                         }
1360                         security_task_to_inode(task, inode);
1361                         return 1;
1362                 }
1363                 rcu_read_unlock();
1364                 put_files_struct(files);
1365         }
1366         d_drop(dentry);
1367         return 0;
1368 }
1369
1370 static void pid_base_iput(struct dentry *dentry, struct inode *inode)
1371 {
1372         struct task_struct *task = proc_task(inode);
1373         spin_lock(&task->proc_lock);
1374         if (task->proc_dentry == dentry)
1375                 task->proc_dentry = NULL;
1376         spin_unlock(&task->proc_lock);
1377         iput(inode);
1378 }
1379
1380 static int pid_delete_dentry(struct dentry * dentry)
1381 {
1382         /* Is the task we represent dead?
1383          * If so, then don't put the dentry on the lru list,
1384          * kill it immediately.
1385          */
1386         return !pid_alive(proc_task(dentry->d_inode));
1387 }
1388
1389 static struct dentry_operations tid_fd_dentry_operations =
1390 {
1391         .d_revalidate   = tid_fd_revalidate,
1392         .d_delete       = pid_delete_dentry,
1393 };
1394
1395 static struct dentry_operations pid_dentry_operations =
1396 {
1397         .d_revalidate   = pid_revalidate,
1398         .d_delete       = pid_delete_dentry,
1399 };
1400
1401 static struct dentry_operations pid_base_dentry_operations =
1402 {
1403         .d_revalidate   = pid_revalidate,
1404         .d_iput         = pid_base_iput,
1405         .d_delete       = pid_delete_dentry,
1406 };
1407
1408 /* Lookups */
1409
1410 static unsigned name_to_int(struct dentry *dentry)
1411 {
1412         const char *name = dentry->d_name.name;
1413         int len = dentry->d_name.len;
1414         unsigned n = 0;
1415
1416         if (len > 1 && *name == '0')
1417                 goto out;
1418         while (len-- > 0) {
1419                 unsigned c = *name++ - '0';
1420                 if (c > 9)
1421                         goto out;
1422                 if (n >= (~0U-9)/10)
1423                         goto out;
1424                 n *= 10;
1425                 n += c;
1426         }
1427         return n;
1428 out:
1429         return ~0U;
1430 }
1431
1432 /* SMP-safe */
1433 static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry, struct nameidata *nd)
1434 {
1435         struct task_struct *task = proc_task(dir);
1436         unsigned fd = name_to_int(dentry);
1437         struct file * file;
1438         struct files_struct * files;
1439         struct inode *inode;
1440         struct proc_inode *ei;
1441
1442         if (fd == ~0U)
1443                 goto out;
1444         if (!pid_alive(task))
1445                 goto out;
1446
1447         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_FD_DIR+fd);
1448         if (!inode)
1449                 goto out;
1450         ei = PROC_I(inode);
1451         files = get_files_struct(task);
1452         if (!files)
1453                 goto out_unlock;
1454         inode->i_mode = S_IFLNK;
1455         rcu_read_lock();
1456         file = fcheck_files(files, fd);
1457         if (!file)
1458                 goto out_unlock2;
1459         if (file->f_mode & 1)
1460                 inode->i_mode |= S_IRUSR | S_IXUSR;
1461         if (file->f_mode & 2)
1462                 inode->i_mode |= S_IWUSR | S_IXUSR;
1463         rcu_read_unlock();
1464         put_files_struct(files);
1465         inode->i_op = &proc_pid_link_inode_operations;
1466         inode->i_size = 64;
1467         ei->op.proc_get_link = proc_fd_link;
1468         dentry->d_op = &tid_fd_dentry_operations;
1469         d_add(dentry, inode);
1470         return NULL;
1471
1472 out_unlock2:
1473         rcu_read_unlock();
1474         put_files_struct(files);
1475 out_unlock:
1476         iput(inode);
1477 out:
1478         return ERR_PTR(-ENOENT);
1479 }
1480
1481 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir);
1482 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd);
1483
1484 static struct file_operations proc_fd_operations = {
1485         .read           = generic_read_dir,
1486         .readdir        = proc_readfd,
1487 };
1488
1489 static struct file_operations proc_task_operations = {
1490         .read           = generic_read_dir,
1491         .readdir        = proc_task_readdir,
1492 };
1493
1494 /*
1495  * proc directories can do almost nothing..
1496  */
1497 static struct inode_operations proc_fd_inode_operations = {
1498         .lookup         = proc_lookupfd,
1499         .permission     = proc_permission,
1500 };
1501
1502 static struct inode_operations proc_task_inode_operations = {
1503         .lookup         = proc_task_lookup,
1504         .permission     = proc_task_permission,
1505 };
1506
1507 #ifdef CONFIG_SECURITY
1508 static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
1509                                   size_t count, loff_t *ppos)
1510 {
1511         struct inode * inode = file->f_dentry->d_inode;
1512         unsigned long page;
1513         ssize_t length;
1514         struct task_struct *task = proc_task(inode);
1515
1516         if (count > PAGE_SIZE)
1517                 count = PAGE_SIZE;
1518         if (!(page = __get_free_page(GFP_KERNEL)))
1519                 return -ENOMEM;
1520
1521         length = security_getprocattr(task, 
1522                                       (char*)file->f_dentry->d_name.name, 
1523                                       (void*)page, count);
1524         if (length >= 0)
1525                 length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
1526         free_page(page);
1527         return length;
1528 }
1529
1530 static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
1531                                    size_t count, loff_t *ppos)
1532
1533         struct inode * inode = file->f_dentry->d_inode;
1534         char *page; 
1535         ssize_t length; 
1536         struct task_struct *task = proc_task(inode); 
1537
1538         if (count > PAGE_SIZE) 
1539                 count = PAGE_SIZE; 
1540         if (*ppos != 0) {
1541                 /* No partial writes. */
1542                 return -EINVAL;
1543         }
1544         page = (char*)__get_free_page(GFP_USER); 
1545         if (!page) 
1546                 return -ENOMEM;
1547         length = -EFAULT; 
1548         if (copy_from_user(page, buf, count)) 
1549                 goto out;
1550
1551         length = security_setprocattr(task, 
1552                                       (char*)file->f_dentry->d_name.name, 
1553                                       (void*)page, count);
1554 out:
1555         free_page((unsigned long) page);
1556         return length;
1557
1558
1559 static struct file_operations proc_pid_attr_operations = {
1560         .read           = proc_pid_attr_read,
1561         .write          = proc_pid_attr_write,
1562 };
1563
1564 static struct file_operations proc_tid_attr_operations;
1565 static struct inode_operations proc_tid_attr_inode_operations;
1566 static struct file_operations proc_tgid_attr_operations;
1567 static struct inode_operations proc_tgid_attr_inode_operations;
1568 #endif
1569
1570 static int get_tid_list(int index, unsigned int *tids, struct inode *dir);
1571
1572 /* SMP-safe */
1573 static struct dentry *proc_pident_lookup(struct inode *dir, 
1574                                          struct dentry *dentry,
1575                                          struct pid_entry *ents)
1576 {
1577         struct inode *inode;
1578         int error;
1579         struct task_struct *task = proc_task(dir);
1580         struct pid_entry *p;
1581         struct proc_inode *ei;
1582
1583         error = -ENOENT;
1584         inode = NULL;
1585
1586         if (!pid_alive(task))
1587                 goto out;
1588
1589         for (p = ents; p->name; p++) {
1590                 if (p->len != dentry->d_name.len)
1591                         continue;
1592                 if (!memcmp(dentry->d_name.name, p->name, p->len))
1593                         break;
1594         }
1595         if (!p->name)
1596                 goto out;
1597
1598         error = -EINVAL;
1599         inode = proc_pid_make_inode(dir->i_sb, task, p->type);
1600         if (!inode)
1601                 goto out;
1602
1603         ei = PROC_I(inode);
1604         inode->i_mode = p->mode;
1605         /*
1606          * Yes, it does not scale. And it should not. Don't add
1607          * new entries into /proc/<tgid>/ without very good reasons.
1608          */
1609         switch(p->type) {
1610                 case PROC_TGID_TASK:
1611                         inode->i_nlink = 2 + get_tid_list(2, NULL, dir);
1612                         inode->i_op = &proc_task_inode_operations;
1613                         inode->i_fop = &proc_task_operations;
1614                         break;
1615                 case PROC_TID_FD:
1616                 case PROC_TGID_FD:
1617                         inode->i_nlink = 2;
1618                         inode->i_op = &proc_fd_inode_operations;
1619                         inode->i_fop = &proc_fd_operations;
1620                         break;
1621                 case PROC_TID_EXE:
1622                 case PROC_TGID_EXE:
1623                         inode->i_op = &proc_pid_link_inode_operations;
1624                         ei->op.proc_get_link = proc_exe_link;
1625                         break;
1626                 case PROC_TID_CWD:
1627                 case PROC_TGID_CWD:
1628                         inode->i_op = &proc_pid_link_inode_operations;
1629                         ei->op.proc_get_link = proc_cwd_link;
1630                         break;
1631                 case PROC_TID_ROOT:
1632                 case PROC_TGID_ROOT:
1633                         inode->i_op = &proc_pid_link_inode_operations;
1634                         ei->op.proc_get_link = proc_root_link;
1635                         break;
1636                 case PROC_TID_ENVIRON:
1637                 case PROC_TGID_ENVIRON:
1638                         inode->i_fop = &proc_info_file_operations;
1639                         ei->op.proc_read = proc_pid_environ;
1640                         break;
1641                 case PROC_TID_AUXV:
1642                 case PROC_TGID_AUXV:
1643                         inode->i_fop = &proc_info_file_operations;
1644                         ei->op.proc_read = proc_pid_auxv;
1645                         break;
1646                 case PROC_TID_STATUS:
1647                 case PROC_TGID_STATUS:
1648                         inode->i_fop = &proc_info_file_operations;
1649                         ei->op.proc_read = proc_pid_status;
1650                         break;
1651                 case PROC_TID_STAT:
1652                         inode->i_fop = &proc_info_file_operations;
1653                         ei->op.proc_read = proc_tid_stat;
1654                         break;
1655                 case PROC_TGID_STAT:
1656                         inode->i_fop = &proc_info_file_operations;
1657                         ei->op.proc_read = proc_tgid_stat;
1658                         break;
1659                 case PROC_TID_CMDLINE:
1660                 case PROC_TGID_CMDLINE:
1661                         inode->i_fop = &proc_info_file_operations;
1662                         ei->op.proc_read = proc_pid_cmdline;
1663                         break;
1664                 case PROC_TID_STATM:
1665                 case PROC_TGID_STATM:
1666                         inode->i_fop = &proc_info_file_operations;
1667                         ei->op.proc_read = proc_pid_statm;
1668                         break;
1669                 case PROC_TID_MAPS:
1670                 case PROC_TGID_MAPS:
1671                         inode->i_fop = &proc_maps_operations;
1672                         break;
1673 #ifdef CONFIG_NUMA
1674                 case PROC_TID_NUMA_MAPS:
1675                 case PROC_TGID_NUMA_MAPS:
1676                         inode->i_fop = &proc_numa_maps_operations;
1677                         break;
1678 #endif
1679                 case PROC_TID_MEM:
1680                 case PROC_TGID_MEM:
1681                         inode->i_op = &proc_mem_inode_operations;
1682                         inode->i_fop = &proc_mem_operations;
1683                         break;
1684 #ifdef CONFIG_SECCOMP
1685                 case PROC_TID_SECCOMP:
1686                 case PROC_TGID_SECCOMP:
1687                         inode->i_fop = &proc_seccomp_operations;
1688                         break;
1689 #endif /* CONFIG_SECCOMP */
1690                 case PROC_TID_MOUNTS:
1691                 case PROC_TGID_MOUNTS:
1692                         inode->i_fop = &proc_mounts_operations;
1693                         break;
1694 #ifdef CONFIG_MMU
1695                 case PROC_TID_SMAPS:
1696                 case PROC_TGID_SMAPS:
1697                         inode->i_fop = &proc_smaps_operations;
1698                         break;
1699 #endif
1700 #ifdef CONFIG_SECURITY
1701                 case PROC_TID_ATTR:
1702                         inode->i_nlink = 2;
1703                         inode->i_op = &proc_tid_attr_inode_operations;
1704                         inode->i_fop = &proc_tid_attr_operations;
1705                         break;
1706                 case PROC_TGID_ATTR:
1707                         inode->i_nlink = 2;
1708                         inode->i_op = &proc_tgid_attr_inode_operations;
1709                         inode->i_fop = &proc_tgid_attr_operations;
1710                         break;
1711                 case PROC_TID_ATTR_CURRENT:
1712                 case PROC_TGID_ATTR_CURRENT:
1713                 case PROC_TID_ATTR_PREV:
1714                 case PROC_TGID_ATTR_PREV:
1715                 case PROC_TID_ATTR_EXEC:
1716                 case PROC_TGID_ATTR_EXEC:
1717                 case PROC_TID_ATTR_FSCREATE:
1718                 case PROC_TGID_ATTR_FSCREATE:
1719                         inode->i_fop = &proc_pid_attr_operations;
1720                         break;
1721 #endif
1722 #ifdef CONFIG_KALLSYMS
1723                 case PROC_TID_WCHAN:
1724                 case PROC_TGID_WCHAN:
1725                         inode->i_fop = &proc_info_file_operations;
1726                         ei->op.proc_read = proc_pid_wchan;
1727                         break;
1728 #endif
1729 #ifdef CONFIG_SCHEDSTATS
1730                 case PROC_TID_SCHEDSTAT:
1731                 case PROC_TGID_SCHEDSTAT:
1732                         inode->i_fop = &proc_info_file_operations;
1733                         ei->op.proc_read = proc_pid_schedstat;
1734                         break;
1735 #endif
1736 #ifdef CONFIG_CPUSETS
1737                 case PROC_TID_CPUSET:
1738                 case PROC_TGID_CPUSET:
1739                         inode->i_fop = &proc_cpuset_operations;
1740                         break;
1741 #endif
1742                 case PROC_TID_OOM_SCORE:
1743                 case PROC_TGID_OOM_SCORE:
1744                         inode->i_fop = &proc_info_file_operations;
1745                         ei->op.proc_read = proc_oom_score;
1746                         break;
1747                 case PROC_TID_OOM_ADJUST:
1748                 case PROC_TGID_OOM_ADJUST:
1749                         inode->i_fop = &proc_oom_adjust_operations;
1750                         break;
1751 #ifdef CONFIG_AUDITSYSCALL
1752                 case PROC_TID_LOGINUID:
1753                 case PROC_TGID_LOGINUID:
1754                         inode->i_fop = &proc_loginuid_operations;
1755                         break;
1756 #endif
1757                 default:
1758                         printk("procfs: impossible type (%d)",p->type);
1759                         iput(inode);
1760                         return ERR_PTR(-EINVAL);
1761         }
1762         dentry->d_op = &pid_dentry_operations;
1763         d_add(dentry, inode);
1764         return NULL;
1765
1766 out:
1767         return ERR_PTR(error);
1768 }
1769
1770 static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1771         return proc_pident_lookup(dir, dentry, tgid_base_stuff);
1772 }
1773
1774 static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){
1775         return proc_pident_lookup(dir, dentry, tid_base_stuff);
1776 }
1777
1778 static struct file_operations proc_tgid_base_operations = {
1779         .read           = generic_read_dir,
1780         .readdir        = proc_tgid_base_readdir,
1781 };
1782
1783 static struct file_operations proc_tid_base_operations = {
1784         .read           = generic_read_dir,
1785         .readdir        = proc_tid_base_readdir,
1786 };
1787
1788 static struct inode_operations proc_tgid_base_inode_operations = {
1789         .lookup         = proc_tgid_base_lookup,
1790 };
1791
1792 static struct inode_operations proc_tid_base_inode_operations = {
1793         .lookup         = proc_tid_base_lookup,
1794 };
1795
1796 #ifdef CONFIG_SECURITY
1797 static int proc_tgid_attr_readdir(struct file * filp,
1798                              void * dirent, filldir_t filldir)
1799 {
1800         return proc_pident_readdir(filp,dirent,filldir,
1801                                    tgid_attr_stuff,ARRAY_SIZE(tgid_attr_stuff));
1802 }
1803
1804 static int proc_tid_attr_readdir(struct file * filp,
1805                              void * dirent, filldir_t filldir)
1806 {
1807         return proc_pident_readdir(filp,dirent,filldir,
1808                                    tid_attr_stuff,ARRAY_SIZE(tid_attr_stuff));
1809 }
1810
1811 static struct file_operations proc_tgid_attr_operations = {
1812         .read           = generic_read_dir,
1813         .readdir        = proc_tgid_attr_readdir,
1814 };
1815
1816 static struct file_operations proc_tid_attr_operations = {
1817         .read           = generic_read_dir,
1818         .readdir        = proc_tid_attr_readdir,
1819 };
1820
1821 static struct dentry *proc_tgid_attr_lookup(struct inode *dir,
1822                                 struct dentry *dentry, struct nameidata *nd)
1823 {
1824         return proc_pident_lookup(dir, dentry, tgid_attr_stuff);
1825 }
1826
1827 static struct dentry *proc_tid_attr_lookup(struct inode *dir,
1828                                 struct dentry *dentry, struct nameidata *nd)
1829 {
1830         return proc_pident_lookup(dir, dentry, tid_attr_stuff);
1831 }
1832
1833 static struct inode_operations proc_tgid_attr_inode_operations = {
1834         .lookup         = proc_tgid_attr_lookup,
1835 };
1836
1837 static struct inode_operations proc_tid_attr_inode_operations = {
1838         .lookup         = proc_tid_attr_lookup,
1839 };
1840 #endif
1841
1842 /*
1843  * /proc/self:
1844  */
1845 static int proc_self_readlink(struct dentry *dentry, char __user *buffer,
1846                               int buflen)
1847 {
1848         char tmp[30];
1849         sprintf(tmp, "%d", current->tgid);
1850         return vfs_readlink(dentry,buffer,buflen,tmp);
1851 }
1852
1853 static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
1854 {
1855         char tmp[30];
1856         sprintf(tmp, "%d", current->tgid);
1857         return ERR_PTR(vfs_follow_link(nd,tmp));
1858 }       
1859
1860 static struct inode_operations proc_self_inode_operations = {
1861         .readlink       = proc_self_readlink,
1862         .follow_link    = proc_self_follow_link,
1863 };
1864
1865 /**
1866  * proc_pid_unhash -  Unhash /proc/@pid entry from the dcache.
1867  * @p: task that should be flushed.
1868  *
1869  * Drops the /proc/@pid dcache entry from the hash chains.
1870  *
1871  * Dropping /proc/@pid entries and detach_pid must be synchroneous,
1872  * otherwise e.g. /proc/@pid/exe might point to the wrong executable,
1873  * if the pid value is immediately reused. This is enforced by
1874  * - caller must acquire spin_lock(p->proc_lock)
1875  * - must be called before detach_pid()
1876  * - proc_pid_lookup acquires proc_lock, and checks that
1877  *   the target is not dead by looking at the attach count
1878  *   of PIDTYPE_PID.
1879  */
1880
1881 struct dentry *proc_pid_unhash(struct task_struct *p)
1882 {
1883         struct dentry *proc_dentry;
1884
1885         proc_dentry = p->proc_dentry;
1886         if (proc_dentry != NULL) {
1887
1888                 spin_lock(&dcache_lock);
1889                 spin_lock(&proc_dentry->d_lock);
1890                 if (!d_unhashed(proc_dentry)) {
1891                         dget_locked(proc_dentry);
1892                         __d_drop(proc_dentry);
1893                         spin_unlock(&proc_dentry->d_lock);
1894                 } else {
1895                         spin_unlock(&proc_dentry->d_lock);
1896                         proc_dentry = NULL;
1897                 }
1898                 spin_unlock(&dcache_lock);
1899         }
1900         return proc_dentry;
1901 }
1902
1903 /**
1904  * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
1905  * @proc_dentry: directoy to prune.
1906  *
1907  * Shrink the /proc directory that was used by the just killed thread.
1908  */
1909         
1910 void proc_pid_flush(struct dentry *proc_dentry)
1911 {
1912         might_sleep();
1913         if(proc_dentry != NULL) {
1914                 shrink_dcache_parent(proc_dentry);
1915                 dput(proc_dentry);
1916         }
1917 }
1918
1919 /* SMP-safe */
1920 struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1921 {
1922         struct task_struct *task;
1923         struct inode *inode;
1924         struct proc_inode *ei;
1925         unsigned tgid;
1926         int died;
1927
1928         if (dentry->d_name.len == 4 && !memcmp(dentry->d_name.name,"self",4)) {
1929                 inode = new_inode(dir->i_sb);
1930                 if (!inode)
1931                         return ERR_PTR(-ENOMEM);
1932                 ei = PROC_I(inode);
1933                 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1934                 inode->i_ino = fake_ino(0, PROC_TGID_INO);
1935                 ei->pde = NULL;
1936                 inode->i_mode = S_IFLNK|S_IRWXUGO;
1937                 inode->i_uid = inode->i_gid = 0;
1938                 inode->i_size = 64;
1939                 inode->i_op = &proc_self_inode_operations;
1940                 d_add(dentry, inode);
1941                 return NULL;
1942         }
1943         tgid = name_to_int(dentry);
1944         if (tgid == ~0U)
1945                 goto out;
1946
1947         read_lock(&tasklist_lock);
1948         task = find_task_by_pid(tgid);
1949         if (task)
1950                 get_task_struct(task);
1951         read_unlock(&tasklist_lock);
1952         if (!task)
1953                 goto out;
1954
1955         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TGID_INO);
1956
1957
1958         if (!inode) {
1959                 put_task_struct(task);
1960                 goto out;
1961         }
1962         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
1963         inode->i_op = &proc_tgid_base_inode_operations;
1964         inode->i_fop = &proc_tgid_base_operations;
1965         inode->i_flags|=S_IMMUTABLE;
1966 #ifdef CONFIG_SECURITY
1967         inode->i_nlink = 5;
1968 #else
1969         inode->i_nlink = 4;
1970 #endif
1971
1972         dentry->d_op = &pid_base_dentry_operations;
1973
1974         died = 0;
1975         d_add(dentry, inode);
1976         spin_lock(&task->proc_lock);
1977         task->proc_dentry = dentry;
1978         if (!pid_alive(task)) {
1979                 dentry = proc_pid_unhash(task);
1980                 died = 1;
1981         }
1982         spin_unlock(&task->proc_lock);
1983
1984         put_task_struct(task);
1985         if (died) {
1986                 proc_pid_flush(dentry);
1987                 goto out;
1988         }
1989         return NULL;
1990 out:
1991         return ERR_PTR(-ENOENT);
1992 }
1993
1994 /* SMP-safe */
1995 static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
1996 {
1997         struct task_struct *task;
1998         struct task_struct *leader = proc_task(dir);
1999         struct inode *inode;
2000         unsigned tid;
2001
2002         tid = name_to_int(dentry);
2003         if (tid == ~0U)
2004                 goto out;
2005
2006         read_lock(&tasklist_lock);
2007         task = find_task_by_pid(tid);
2008         if (task)
2009                 get_task_struct(task);
2010         read_unlock(&tasklist_lock);
2011         if (!task)
2012                 goto out;
2013         if (leader->tgid != task->tgid)
2014                 goto out_drop_task;
2015
2016         inode = proc_pid_make_inode(dir->i_sb, task, PROC_TID_INO);
2017
2018
2019         if (!inode)
2020                 goto out_drop_task;
2021         inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
2022         inode->i_op = &proc_tid_base_inode_operations;
2023         inode->i_fop = &proc_tid_base_operations;
2024         inode->i_flags|=S_IMMUTABLE;
2025 #ifdef CONFIG_SECURITY
2026         inode->i_nlink = 4;
2027 #else
2028         inode->i_nlink = 3;
2029 #endif
2030
2031         dentry->d_op = &pid_base_dentry_operations;
2032
2033         d_add(dentry, inode);
2034
2035         put_task_struct(task);
2036         return NULL;
2037 out_drop_task:
2038         put_task_struct(task);
2039 out:
2040         return ERR_PTR(-ENOENT);
2041 }
2042
2043 #define PROC_NUMBUF 10
2044 #define PROC_MAXPIDS 20
2045
2046 /*
2047  * Get a few tgid's to return for filldir - we need to hold the
2048  * tasklist lock while doing this, and we must release it before
2049  * we actually do the filldir itself, so we use a temp buffer..
2050  */
2051 static int get_tgid_list(int index, unsigned long version, unsigned int *tgids)
2052 {
2053         struct task_struct *p;
2054         int nr_tgids = 0;
2055
2056         index--;
2057         read_lock(&tasklist_lock);
2058         p = NULL;
2059         if (version) {
2060                 p = find_task_by_pid(version);
2061                 if (p && !thread_group_leader(p))
2062                         p = NULL;
2063         }
2064
2065         if (p)
2066                 index = 0;
2067         else
2068                 p = next_task(&init_task);
2069
2070         for ( ; p != &init_task; p = next_task(p)) {
2071                 int tgid = p->pid;
2072                 if (!pid_alive(p))
2073                         continue;
2074                 if (--index >= 0)
2075                         continue;
2076                 tgids[nr_tgids] = tgid;
2077                 nr_tgids++;
2078                 if (nr_tgids >= PROC_MAXPIDS)
2079                         break;
2080         }
2081         read_unlock(&tasklist_lock);
2082         return nr_tgids;
2083 }
2084
2085 /*
2086  * Get a few tid's to return for filldir - we need to hold the
2087  * tasklist lock while doing this, and we must release it before
2088  * we actually do the filldir itself, so we use a temp buffer..
2089  */
2090 static int get_tid_list(int index, unsigned int *tids, struct inode *dir)
2091 {
2092         struct task_struct *leader_task = proc_task(dir);
2093         struct task_struct *task = leader_task;
2094         int nr_tids = 0;
2095
2096         index -= 2;
2097         read_lock(&tasklist_lock);
2098         /*
2099          * The starting point task (leader_task) might be an already
2100          * unlinked task, which cannot be used to access the task-list
2101          * via next_thread().
2102          */
2103         if (pid_alive(task)) do {
2104                 int tid = task->pid;
2105
2106                 if (--index >= 0)
2107                         continue;
2108                 if (tids != NULL)
2109                         tids[nr_tids] = tid;
2110                 nr_tids++;
2111                 if (nr_tids >= PROC_MAXPIDS)
2112                         break;
2113         } while ((task = next_thread(task)) != leader_task);
2114         read_unlock(&tasklist_lock);
2115         return nr_tids;
2116 }
2117
2118 /* for the /proc/ directory itself, after non-process stuff has been done */
2119 int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
2120 {
2121         unsigned int tgid_array[PROC_MAXPIDS];
2122         char buf[PROC_NUMBUF];
2123         unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
2124         unsigned int nr_tgids, i;
2125         int next_tgid;
2126
2127         if (!nr) {
2128                 ino_t ino = fake_ino(0,PROC_TGID_INO);
2129                 if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
2130                         return 0;
2131                 filp->f_pos++;
2132                 nr++;
2133         }
2134
2135         /* f_version caches the tgid value that the last readdir call couldn't
2136          * return. lseek aka telldir automagically resets f_version to 0.
2137          */
2138         next_tgid = filp->f_version;
2139         filp->f_version = 0;
2140         for (;;) {
2141                 nr_tgids = get_tgid_list(nr, next_tgid, tgid_array);
2142                 if (!nr_tgids) {
2143                         /* no more entries ! */
2144                         break;
2145                 }
2146                 next_tgid = 0;
2147
2148                 /* do not use the last found pid, reserve it for next_tgid */
2149                 if (nr_tgids == PROC_MAXPIDS) {
2150                         nr_tgids--;
2151                         next_tgid = tgid_array[nr_tgids];
2152                 }
2153
2154                 for (i=0;i<nr_tgids;i++) {
2155                         int tgid = tgid_array[i];
2156                         ino_t ino = fake_ino(tgid,PROC_TGID_INO);
2157                         unsigned long j = PROC_NUMBUF;
2158
2159                         do
2160                                 buf[--j] = '0' + (tgid % 10);
2161                         while ((tgid /= 10) != 0);
2162
2163                         if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
2164                                 /* returning this tgid failed, save it as the first
2165                                  * pid for the next readir call */
2166                                 filp->f_version = tgid_array[i];
2167                                 goto out;
2168                         }
2169                         filp->f_pos++;
2170                         nr++;
2171                 }
2172         }
2173 out:
2174         return 0;
2175 }
2176
2177 /* for the /proc/TGID/task/ directories */
2178 static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
2179 {
2180         unsigned int tid_array[PROC_MAXPIDS];
2181         char buf[PROC_NUMBUF];
2182         unsigned int nr_tids, i;
2183         struct dentry *dentry = filp->f_dentry;
2184         struct inode *inode = dentry->d_inode;
2185         int retval = -ENOENT;
2186         ino_t ino;
2187         unsigned long pos = filp->f_pos;  /* avoiding "long long" filp->f_pos */
2188
2189         if (!pid_alive(proc_task(inode)))
2190                 goto out;
2191         retval = 0;
2192
2193         switch (pos) {
2194         case 0:
2195                 ino = inode->i_ino;
2196                 if (filldir(dirent, ".", 1, pos, ino, DT_DIR) < 0)
2197                         goto out;
2198                 pos++;
2199                 /* fall through */
2200         case 1:
2201                 ino = parent_ino(dentry);
2202                 if (filldir(dirent, "..", 2, pos, ino, DT_DIR) < 0)
2203                         goto out;
2204                 pos++;
2205                 /* fall through */
2206         }
2207
2208         nr_tids = get_tid_list(pos, tid_array, inode);
2209         inode->i_nlink = pos + nr_tids;
2210
2211         for (i = 0; i < nr_tids; i++) {
2212                 unsigned long j = PROC_NUMBUF;
2213                 int tid = tid_array[i];
2214
2215                 ino = fake_ino(tid,PROC_TID_INO);
2216
2217                 do
2218                         buf[--j] = '0' + (tid % 10);
2219                 while ((tid /= 10) != 0);
2220
2221                 if (filldir(dirent, buf+j, PROC_NUMBUF-j, pos, ino, DT_DIR) < 0)
2222                         break;
2223                 pos++;
2224         }
2225 out:
2226         filp->f_pos = pos;
2227         return retval;
2228 }