mm: Tighten x86 /dev/mem with zeroing reads
[pandora-kernel.git] / drivers / char / mem.c
1 /*
2  *  linux/drivers/char/mem.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  *
6  *  Added devfs support.
7  *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
8  *  Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
9  */
10
11 #include <linux/mm.h>
12 #include <linux/miscdevice.h>
13 #include <linux/slab.h>
14 #include <linux/vmalloc.h>
15 #include <linux/mman.h>
16 #include <linux/random.h>
17 #include <linux/init.h>
18 #include <linux/raw.h>
19 #include <linux/tty.h>
20 #include <linux/capability.h>
21 #include <linux/ptrace.h>
22 #include <linux/device.h>
23 #include <linux/highmem.h>
24 #include <linux/crash_dump.h>
25 #include <linux/backing-dev.h>
26 #include <linux/bootmem.h>
27 #include <linux/splice.h>
28 #include <linux/pfn.h>
29 #include <linux/export.h>
30
31 #include <asm/uaccess.h>
32 #include <asm/io.h>
33
34 #ifdef CONFIG_IA64
35 # include <linux/efi.h>
36 #endif
37
38 static inline unsigned long size_inside_page(unsigned long start,
39                                              unsigned long size)
40 {
41         unsigned long sz;
42
43         sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
44
45         return min(sz, size);
46 }
47
48 #ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
49 static inline int valid_phys_addr_range(unsigned long addr, size_t count)
50 {
51         return addr + count <= __pa(high_memory);
52 }
53
54 static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
55 {
56         return 1;
57 }
58 #endif
59
60 #ifdef CONFIG_STRICT_DEVMEM
61 static inline int page_is_allowed(unsigned long pfn)
62 {
63         return devmem_is_allowed(pfn);
64 }
65 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
66 {
67         u64 from = ((u64)pfn) << PAGE_SHIFT;
68         u64 to = from + size;
69         u64 cursor = from;
70
71         while (cursor < to) {
72                 if (!devmem_is_allowed(pfn)) {
73                         printk(KERN_INFO
74                 "Program %s tried to access /dev/mem between %Lx->%Lx.\n",
75                                 current->comm, from, to);
76                         return 0;
77                 }
78                 cursor += PAGE_SIZE;
79                 pfn++;
80         }
81         return 1;
82 }
83 #else
84 static inline int page_is_allowed(unsigned long pfn)
85 {
86         return 1;
87 }
88 static inline int range_is_allowed(unsigned long pfn, unsigned long size)
89 {
90         return 1;
91 }
92 #endif
93
94 void __weak unxlate_dev_mem_ptr(unsigned long phys, void *addr)
95 {
96 }
97
98 /*
99  * This funcion reads the *physical* memory. The f_pos points directly to the
100  * memory location.
101  */
102 static ssize_t read_mem(struct file *file, char __user *buf,
103                         size_t count, loff_t *ppos)
104 {
105         unsigned long p = *ppos;
106         ssize_t read, sz;
107         char *ptr;
108
109         if (!valid_phys_addr_range(p, count))
110                 return -EFAULT;
111         read = 0;
112 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
113         /* we don't have page 0 mapped on sparc and m68k.. */
114         if (p < PAGE_SIZE) {
115                 sz = size_inside_page(p, count);
116                 if (sz > 0) {
117                         if (clear_user(buf, sz))
118                                 return -EFAULT;
119                         buf += sz;
120                         p += sz;
121                         count -= sz;
122                         read += sz;
123                 }
124         }
125 #endif
126
127         while (count > 0) {
128                 unsigned long remaining;
129                 int allowed;
130
131                 sz = size_inside_page(p, count);
132
133                 allowed = page_is_allowed(p >> PAGE_SHIFT);
134                 if (!allowed)
135                         return -EPERM;
136                 if (allowed == 2) {
137                         /* Show zeros for restricted memory. */
138                         remaining = clear_user(buf, sz);
139                 } else {
140                         /*
141                          * On ia64 if a page has been mapped somewhere as
142                          * uncached, then it must also be accessed uncached
143                          * by the kernel or data corruption may occur.
144                          */
145                         ptr = xlate_dev_mem_ptr(p);
146                         if (!ptr)
147                                 return -EFAULT;
148
149                         remaining = copy_to_user(buf, ptr, sz);
150
151                         unxlate_dev_mem_ptr(p, ptr);
152                 }
153
154                 if (remaining)
155                         return -EFAULT;
156
157                 buf += sz;
158                 p += sz;
159                 count -= sz;
160                 read += sz;
161         }
162
163         *ppos += read;
164         return read;
165 }
166
167 static ssize_t write_mem(struct file *file, const char __user *buf,
168                          size_t count, loff_t *ppos)
169 {
170         unsigned long p = *ppos;
171         ssize_t written, sz;
172         unsigned long copied;
173         void *ptr;
174
175         if (!valid_phys_addr_range(p, count))
176                 return -EFAULT;
177
178         written = 0;
179
180 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
181         /* we don't have page 0 mapped on sparc and m68k.. */
182         if (p < PAGE_SIZE) {
183                 sz = size_inside_page(p, count);
184                 /* Hmm. Do something? */
185                 buf += sz;
186                 p += sz;
187                 count -= sz;
188                 written += sz;
189         }
190 #endif
191
192         while (count > 0) {
193                 int allowed;
194
195                 sz = size_inside_page(p, count);
196
197                 allowed = page_is_allowed(p >> PAGE_SHIFT);
198                 if (!allowed)
199                         return -EPERM;
200
201                 /* Skip actual writing when a page is marked as restricted. */
202                 if (allowed == 1) {
203                         /*
204                          * On ia64 if a page has been mapped somewhere as
205                          * uncached, then it must also be accessed uncached
206                          * by the kernel or data corruption may occur.
207                          */
208                         ptr = xlate_dev_mem_ptr(p);
209                         if (!ptr) {
210                                 if (written)
211                                         break;
212                                 return -EFAULT;
213                         }
214
215                         copied = copy_from_user(ptr, buf, sz);
216                         unxlate_dev_mem_ptr(p, ptr);
217                         if (copied) {
218                                 written += sz - copied;
219                                 if (written)
220                                         break;
221                                 return -EFAULT;
222                         }
223                 }
224
225                 buf += sz;
226                 p += sz;
227                 count -= sz;
228                 written += sz;
229         }
230
231         *ppos += written;
232         return written;
233 }
234
235 int __weak phys_mem_access_prot_allowed(struct file *file,
236         unsigned long pfn, unsigned long size, pgprot_t *vma_prot)
237 {
238         return 1;
239 }
240
241 #ifndef __HAVE_PHYS_MEM_ACCESS_PROT
242
243 /*
244  * Architectures vary in how they handle caching for addresses
245  * outside of main memory.
246  *
247  */
248 #ifdef pgprot_noncached
249 static int uncached_access(struct file *file, unsigned long addr)
250 {
251 #if defined(CONFIG_IA64)
252         /*
253          * On ia64, we ignore O_DSYNC because we cannot tolerate memory
254          * attribute aliases.
255          */
256         return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
257 #elif defined(CONFIG_MIPS)
258         {
259                 extern int __uncached_access(struct file *file,
260                                              unsigned long addr);
261
262                 return __uncached_access(file, addr);
263         }
264 #else
265         /*
266          * Accessing memory above the top the kernel knows about or through a
267          * file pointer
268          * that was marked O_DSYNC will be done non-cached.
269          */
270         if (file->f_flags & O_DSYNC)
271                 return 1;
272         return addr >= __pa(high_memory);
273 #endif
274 }
275 #endif
276
277 static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
278                                      unsigned long size, pgprot_t vma_prot)
279 {
280 #ifdef pgprot_noncached
281         unsigned long offset = pfn << PAGE_SHIFT;
282
283         if (uncached_access(file, offset))
284                 return pgprot_noncached(vma_prot);
285 #endif
286         return vma_prot;
287 }
288 #endif
289
290 #ifndef CONFIG_MMU
291 static unsigned long get_unmapped_area_mem(struct file *file,
292                                            unsigned long addr,
293                                            unsigned long len,
294                                            unsigned long pgoff,
295                                            unsigned long flags)
296 {
297         if (!valid_mmap_phys_addr_range(pgoff, len))
298                 return (unsigned long) -EINVAL;
299         return pgoff << PAGE_SHIFT;
300 }
301
302 /* can't do an in-place private mapping if there's no MMU */
303 static inline int private_mapping_ok(struct vm_area_struct *vma)
304 {
305         return vma->vm_flags & VM_MAYSHARE;
306 }
307 #else
308 #define get_unmapped_area_mem   NULL
309
310 static inline int private_mapping_ok(struct vm_area_struct *vma)
311 {
312         return 1;
313 }
314 #endif
315
316 static const struct vm_operations_struct mmap_mem_ops = {
317 #ifdef CONFIG_HAVE_IOREMAP_PROT
318         .access = generic_access_phys
319 #endif
320 };
321
322 static int mmap_mem(struct file *file, struct vm_area_struct *vma)
323 {
324         size_t size = vma->vm_end - vma->vm_start;
325
326         if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
327                 return -EINVAL;
328
329         if (!private_mapping_ok(vma))
330                 return -ENOSYS;
331
332         if (!range_is_allowed(vma->vm_pgoff, size))
333                 return -EPERM;
334
335         if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
336                                                 &vma->vm_page_prot))
337                 return -EINVAL;
338
339         vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
340                                                  size,
341                                                  vma->vm_page_prot);
342
343         vma->vm_ops = &mmap_mem_ops;
344
345         /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
346         if (remap_pfn_range(vma,
347                             vma->vm_start,
348                             vma->vm_pgoff,
349                             size,
350                             vma->vm_page_prot)) {
351                 return -EAGAIN;
352         }
353         return 0;
354 }
355
356 #ifdef CONFIG_DEVKMEM
357 static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
358 {
359         unsigned long pfn;
360
361         /* Turn a kernel-virtual address into a physical page frame */
362         pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
363
364         /*
365          * RED-PEN: on some architectures there is more mapped memory than
366          * available in mem_map which pfn_valid checks for. Perhaps should add a
367          * new macro here.
368          *
369          * RED-PEN: vmalloc is not supported right now.
370          */
371         if (!pfn_valid(pfn))
372                 return -EIO;
373
374         vma->vm_pgoff = pfn;
375         return mmap_mem(file, vma);
376 }
377 #endif
378
379 #ifdef CONFIG_CRASH_DUMP
380 /*
381  * Read memory corresponding to the old kernel.
382  */
383 static ssize_t read_oldmem(struct file *file, char __user *buf,
384                                 size_t count, loff_t *ppos)
385 {
386         unsigned long pfn, offset;
387         size_t read = 0, csize;
388         int rc = 0;
389
390         while (count) {
391                 pfn = *ppos / PAGE_SIZE;
392                 if (pfn > saved_max_pfn)
393                         return read;
394
395                 offset = (unsigned long)(*ppos % PAGE_SIZE);
396                 if (count > PAGE_SIZE - offset)
397                         csize = PAGE_SIZE - offset;
398                 else
399                         csize = count;
400
401                 rc = copy_oldmem_page(pfn, buf, csize, offset, 1);
402                 if (rc < 0)
403                         return rc;
404                 buf += csize;
405                 *ppos += csize;
406                 read += csize;
407                 count -= csize;
408         }
409         return read;
410 }
411 #endif
412
413 #ifdef CONFIG_DEVKMEM
414 /*
415  * This function reads the *virtual* memory as seen by the kernel.
416  */
417 static ssize_t read_kmem(struct file *file, char __user *buf,
418                          size_t count, loff_t *ppos)
419 {
420         unsigned long p = *ppos;
421         ssize_t low_count, read, sz;
422         char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
423         int err = 0;
424
425         read = 0;
426         if (p < (unsigned long) high_memory) {
427                 low_count = count;
428                 if (count > (unsigned long)high_memory - p)
429                         low_count = (unsigned long)high_memory - p;
430
431 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
432                 /* we don't have page 0 mapped on sparc and m68k.. */
433                 if (p < PAGE_SIZE && low_count > 0) {
434                         sz = size_inside_page(p, low_count);
435                         if (clear_user(buf, sz))
436                                 return -EFAULT;
437                         buf += sz;
438                         p += sz;
439                         read += sz;
440                         low_count -= sz;
441                         count -= sz;
442                 }
443 #endif
444                 while (low_count > 0) {
445                         sz = size_inside_page(p, low_count);
446
447                         /*
448                          * On ia64 if a page has been mapped somewhere as
449                          * uncached, then it must also be accessed uncached
450                          * by the kernel or data corruption may occur
451                          */
452                         kbuf = xlate_dev_kmem_ptr((char *)p);
453
454                         if (copy_to_user(buf, kbuf, sz))
455                                 return -EFAULT;
456                         buf += sz;
457                         p += sz;
458                         read += sz;
459                         low_count -= sz;
460                         count -= sz;
461                 }
462         }
463
464         if (count > 0) {
465                 kbuf = (char *)__get_free_page(GFP_KERNEL);
466                 if (!kbuf)
467                         return -ENOMEM;
468                 while (count > 0) {
469                         sz = size_inside_page(p, count);
470                         if (!is_vmalloc_or_module_addr((void *)p)) {
471                                 err = -ENXIO;
472                                 break;
473                         }
474                         sz = vread(kbuf, (char *)p, sz);
475                         if (!sz)
476                                 break;
477                         if (copy_to_user(buf, kbuf, sz)) {
478                                 err = -EFAULT;
479                                 break;
480                         }
481                         count -= sz;
482                         buf += sz;
483                         read += sz;
484                         p += sz;
485                 }
486                 free_page((unsigned long)kbuf);
487         }
488         *ppos = p;
489         return read ? read : err;
490 }
491
492
493 static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
494                                 size_t count, loff_t *ppos)
495 {
496         ssize_t written, sz;
497         unsigned long copied;
498
499         written = 0;
500 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
501         /* we don't have page 0 mapped on sparc and m68k.. */
502         if (p < PAGE_SIZE) {
503                 sz = size_inside_page(p, count);
504                 /* Hmm. Do something? */
505                 buf += sz;
506                 p += sz;
507                 count -= sz;
508                 written += sz;
509         }
510 #endif
511
512         while (count > 0) {
513                 char *ptr;
514
515                 sz = size_inside_page(p, count);
516
517                 /*
518                  * On ia64 if a page has been mapped somewhere as uncached, then
519                  * it must also be accessed uncached by the kernel or data
520                  * corruption may occur.
521                  */
522                 ptr = xlate_dev_kmem_ptr((char *)p);
523
524                 copied = copy_from_user(ptr, buf, sz);
525                 if (copied) {
526                         written += sz - copied;
527                         if (written)
528                                 break;
529                         return -EFAULT;
530                 }
531                 buf += sz;
532                 p += sz;
533                 count -= sz;
534                 written += sz;
535         }
536
537         *ppos += written;
538         return written;
539 }
540
541 /*
542  * This function writes to the *virtual* memory as seen by the kernel.
543  */
544 static ssize_t write_kmem(struct file *file, const char __user *buf,
545                           size_t count, loff_t *ppos)
546 {
547         unsigned long p = *ppos;
548         ssize_t wrote = 0;
549         ssize_t virtr = 0;
550         char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
551         int err = 0;
552
553         if (p < (unsigned long) high_memory) {
554                 unsigned long to_write = min_t(unsigned long, count,
555                                                (unsigned long)high_memory - p);
556                 wrote = do_write_kmem(p, buf, to_write, ppos);
557                 if (wrote != to_write)
558                         return wrote;
559                 p += wrote;
560                 buf += wrote;
561                 count -= wrote;
562         }
563
564         if (count > 0) {
565                 kbuf = (char *)__get_free_page(GFP_KERNEL);
566                 if (!kbuf)
567                         return wrote ? wrote : -ENOMEM;
568                 while (count > 0) {
569                         unsigned long sz = size_inside_page(p, count);
570                         unsigned long n;
571
572                         if (!is_vmalloc_or_module_addr((void *)p)) {
573                                 err = -ENXIO;
574                                 break;
575                         }
576                         n = copy_from_user(kbuf, buf, sz);
577                         if (n) {
578                                 err = -EFAULT;
579                                 break;
580                         }
581                         vwrite(kbuf, (char *)p, sz);
582                         count -= sz;
583                         buf += sz;
584                         virtr += sz;
585                         p += sz;
586                 }
587                 free_page((unsigned long)kbuf);
588         }
589
590         *ppos = p;
591         return virtr + wrote ? : err;
592 }
593 #endif
594
595 #ifdef CONFIG_DEVPORT
596 static ssize_t read_port(struct file *file, char __user *buf,
597                          size_t count, loff_t *ppos)
598 {
599         unsigned long i = *ppos;
600         char __user *tmp = buf;
601
602         if (!access_ok(VERIFY_WRITE, buf, count))
603                 return -EFAULT;
604         while (count-- > 0 && i < 65536) {
605                 if (__put_user(inb(i), tmp) < 0)
606                         return -EFAULT;
607                 i++;
608                 tmp++;
609         }
610         *ppos = i;
611         return tmp-buf;
612 }
613
614 static ssize_t write_port(struct file *file, const char __user *buf,
615                           size_t count, loff_t *ppos)
616 {
617         unsigned long i = *ppos;
618         const char __user * tmp = buf;
619
620         if (!access_ok(VERIFY_READ, buf, count))
621                 return -EFAULT;
622         while (count-- > 0 && i < 65536) {
623                 char c;
624                 if (__get_user(c, tmp)) {
625                         if (tmp > buf)
626                                 break;
627                         return -EFAULT;
628                 }
629                 outb(c, i);
630                 i++;
631                 tmp++;
632         }
633         *ppos = i;
634         return tmp-buf;
635 }
636 #endif
637
638 static ssize_t read_null(struct file *file, char __user *buf,
639                          size_t count, loff_t *ppos)
640 {
641         return 0;
642 }
643
644 static ssize_t write_null(struct file *file, const char __user *buf,
645                           size_t count, loff_t *ppos)
646 {
647         return count;
648 }
649
650 static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
651                         struct splice_desc *sd)
652 {
653         return sd->len;
654 }
655
656 static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out,
657                                  loff_t *ppos, size_t len, unsigned int flags)
658 {
659         return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
660 }
661
662 static ssize_t read_zero(struct file *file, char __user *buf,
663                          size_t count, loff_t *ppos)
664 {
665         size_t written;
666
667         if (!count)
668                 return 0;
669
670         if (!access_ok(VERIFY_WRITE, buf, count))
671                 return -EFAULT;
672
673         written = 0;
674         while (count) {
675                 unsigned long unwritten;
676                 size_t chunk = count;
677
678                 if (chunk > PAGE_SIZE)
679                         chunk = PAGE_SIZE;      /* Just for latency reasons */
680                 unwritten = __clear_user(buf, chunk);
681                 written += chunk - unwritten;
682                 if (unwritten)
683                         break;
684                 if (signal_pending(current))
685                         return written ? written : -ERESTARTSYS;
686                 buf += chunk;
687                 count -= chunk;
688                 cond_resched();
689         }
690         return written ? written : -EFAULT;
691 }
692
693 static int mmap_zero(struct file *file, struct vm_area_struct *vma)
694 {
695 #ifndef CONFIG_MMU
696         return -ENOSYS;
697 #endif
698         if (vma->vm_flags & VM_SHARED)
699                 return shmem_zero_setup(vma);
700         return 0;
701 }
702
703 static ssize_t write_full(struct file *file, const char __user *buf,
704                           size_t count, loff_t *ppos)
705 {
706         return -ENOSPC;
707 }
708
709 /*
710  * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
711  * can fopen() both devices with "a" now.  This was previously impossible.
712  * -- SRB.
713  */
714 static loff_t null_lseek(struct file *file, loff_t offset, int orig)
715 {
716         return file->f_pos = 0;
717 }
718
719 /*
720  * The memory devices use the full 32/64 bits of the offset, and so we cannot
721  * check against negative addresses: they are ok. The return value is weird,
722  * though, in that case (0).
723  *
724  * also note that seeking relative to the "end of file" isn't supported:
725  * it has no meaning, so it returns -EINVAL.
726  */
727 static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
728 {
729         loff_t ret;
730
731         mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
732         switch (orig) {
733         case SEEK_CUR:
734                 offset += file->f_pos;
735         case SEEK_SET:
736                 /* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */
737                 if ((unsigned long long)offset >= ~0xFFFULL) {
738                         ret = -EOVERFLOW;
739                         break;
740                 }
741                 file->f_pos = offset;
742                 ret = file->f_pos;
743                 force_successful_syscall_return();
744                 break;
745         default:
746                 ret = -EINVAL;
747         }
748         mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
749         return ret;
750 }
751
752 static int open_port(struct inode * inode, struct file * filp)
753 {
754         return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
755 }
756
757 #define zero_lseek      null_lseek
758 #define full_lseek      null_lseek
759 #define write_zero      write_null
760 #define read_full       read_zero
761 #define open_mem        open_port
762 #define open_kmem       open_mem
763 #define open_oldmem     open_mem
764
765 static const struct file_operations mem_fops = {
766         .llseek         = memory_lseek,
767         .read           = read_mem,
768         .write          = write_mem,
769         .mmap           = mmap_mem,
770         .open           = open_mem,
771         .get_unmapped_area = get_unmapped_area_mem,
772 };
773
774 #ifdef CONFIG_DEVKMEM
775 static const struct file_operations kmem_fops = {
776         .llseek         = memory_lseek,
777         .read           = read_kmem,
778         .write          = write_kmem,
779         .mmap           = mmap_kmem,
780         .open           = open_kmem,
781         .get_unmapped_area = get_unmapped_area_mem,
782 };
783 #endif
784
785 static const struct file_operations null_fops = {
786         .llseek         = null_lseek,
787         .read           = read_null,
788         .write          = write_null,
789         .splice_write   = splice_write_null,
790 };
791
792 #ifdef CONFIG_DEVPORT
793 static const struct file_operations port_fops = {
794         .llseek         = memory_lseek,
795         .read           = read_port,
796         .write          = write_port,
797         .open           = open_port,
798 };
799 #endif
800
801 static const struct file_operations zero_fops = {
802         .llseek         = zero_lseek,
803         .read           = read_zero,
804         .write          = write_zero,
805         .mmap           = mmap_zero,
806 };
807
808 /*
809  * capabilities for /dev/zero
810  * - permits private mappings, "copies" are taken of the source of zeros
811  * - no writeback happens
812  */
813 static struct backing_dev_info zero_bdi = {
814         .name           = "char/mem",
815         .capabilities   = BDI_CAP_MAP_COPY | BDI_CAP_NO_ACCT_AND_WRITEBACK,
816 };
817
818 static const struct file_operations full_fops = {
819         .llseek         = full_lseek,
820         .read           = read_full,
821         .write          = write_full,
822 };
823
824 #ifdef CONFIG_CRASH_DUMP
825 static const struct file_operations oldmem_fops = {
826         .read   = read_oldmem,
827         .open   = open_oldmem,
828         .llseek = default_llseek,
829 };
830 #endif
831
832 static ssize_t kmsg_writev(struct kiocb *iocb, const struct iovec *iv,
833                            unsigned long count, loff_t pos)
834 {
835         char *line, *p;
836         int i;
837         ssize_t ret = -EFAULT;
838         size_t len = iov_length(iv, count);
839
840         line = kmalloc(len + 1, GFP_KERNEL);
841         if (line == NULL)
842                 return -ENOMEM;
843
844         /*
845          * copy all vectors into a single string, to ensure we do
846          * not interleave our log line with other printk calls
847          */
848         p = line;
849         for (i = 0; i < count; i++) {
850                 if (copy_from_user(p, iv[i].iov_base, iv[i].iov_len))
851                         goto out;
852                 p += iv[i].iov_len;
853         }
854         p[0] = '\0';
855
856         ret = printk("%s", line);
857         /* printk can add a prefix */
858         if (ret > len)
859                 ret = len;
860 out:
861         kfree(line);
862         return ret;
863 }
864
865 static const struct file_operations kmsg_fops = {
866         .aio_write = kmsg_writev,
867         .llseek = noop_llseek,
868 };
869
870 static const struct memdev {
871         const char *name;
872         mode_t mode;
873         const struct file_operations *fops;
874         struct backing_dev_info *dev_info;
875 } devlist[] = {
876          [1] = { "mem", 0, &mem_fops, &directly_mappable_cdev_bdi },
877 #ifdef CONFIG_DEVKMEM
878          [2] = { "kmem", 0, &kmem_fops, &directly_mappable_cdev_bdi },
879 #endif
880          [3] = { "null", 0666, &null_fops, NULL },
881 #ifdef CONFIG_DEVPORT
882          [4] = { "port", 0, &port_fops, NULL },
883 #endif
884          [5] = { "zero", 0666, &zero_fops, &zero_bdi },
885          [7] = { "full", 0666, &full_fops, NULL },
886          [8] = { "random", 0666, &random_fops, NULL },
887          [9] = { "urandom", 0666, &urandom_fops, NULL },
888         [11] = { "kmsg", 0, &kmsg_fops, NULL },
889 #ifdef CONFIG_CRASH_DUMP
890         [12] = { "oldmem", 0, &oldmem_fops, NULL },
891 #endif
892 };
893
894 static int memory_open(struct inode *inode, struct file *filp)
895 {
896         int minor;
897         const struct memdev *dev;
898
899         minor = iminor(inode);
900         if (minor >= ARRAY_SIZE(devlist))
901                 return -ENXIO;
902
903         dev = &devlist[minor];
904         if (!dev->fops)
905                 return -ENXIO;
906
907         filp->f_op = dev->fops;
908         if (dev->dev_info)
909                 filp->f_mapping->backing_dev_info = dev->dev_info;
910
911         /* Is /dev/mem or /dev/kmem ? */
912         if (dev->dev_info == &directly_mappable_cdev_bdi)
913                 filp->f_mode |= FMODE_UNSIGNED_OFFSET;
914
915         if (dev->fops->open)
916                 return dev->fops->open(inode, filp);
917
918         return 0;
919 }
920
921 static const struct file_operations memory_fops = {
922         .open = memory_open,
923         .llseek = noop_llseek,
924 };
925
926 static char *mem_devnode(struct device *dev, mode_t *mode)
927 {
928         if (mode && devlist[MINOR(dev->devt)].mode)
929                 *mode = devlist[MINOR(dev->devt)].mode;
930         return NULL;
931 }
932
933 static struct class *mem_class;
934
935 static int __init chr_dev_init(void)
936 {
937         int minor;
938         int err;
939
940         err = bdi_init(&zero_bdi);
941         if (err)
942                 return err;
943
944         if (register_chrdev(MEM_MAJOR, "mem", &memory_fops))
945                 printk("unable to get major %d for memory devs\n", MEM_MAJOR);
946
947         mem_class = class_create(THIS_MODULE, "mem");
948         if (IS_ERR(mem_class))
949                 return PTR_ERR(mem_class);
950
951         mem_class->devnode = mem_devnode;
952         for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) {
953                 if (!devlist[minor].name)
954                         continue;
955                 device_create(mem_class, NULL, MKDEV(MEM_MAJOR, minor),
956                               NULL, devlist[minor].name);
957         }
958
959         return tty_init();
960 }
961
962 fs_initcall(chr_dev_init);