xref: /linux/drivers/char/mem.c (revision 2b8232ce512105e28453f301d1510de8363bccd1)
1 /*
2  *  linux/drivers/char/mem.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  *
6  *  Added devfs support.
7  *    Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
8  *  Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
9  */
10 
11 #include <linux/mm.h>
12 #include <linux/miscdevice.h>
13 #include <linux/slab.h>
14 #include <linux/vmalloc.h>
15 #include <linux/mman.h>
16 #include <linux/random.h>
17 #include <linux/init.h>
18 #include <linux/raw.h>
19 #include <linux/tty.h>
20 #include <linux/capability.h>
21 #include <linux/ptrace.h>
22 #include <linux/device.h>
23 #include <linux/highmem.h>
24 #include <linux/crash_dump.h>
25 #include <linux/backing-dev.h>
26 #include <linux/bootmem.h>
27 #include <linux/splice.h>
28 #include <linux/pfn.h>
29 
30 #include <asm/uaccess.h>
31 #include <asm/io.h>
32 
33 #ifdef CONFIG_IA64
34 # include <linux/efi.h>
35 #endif
36 
37 /*
38  * Architectures vary in how they handle caching for addresses
39  * outside of main memory.
40  *
41  */
42 static inline int uncached_access(struct file *file, unsigned long addr)
43 {
44 #if defined(__i386__)
45 	/*
46 	 * On the PPro and successors, the MTRRs are used to set
47 	 * memory types for physical addresses outside main memory,
48 	 * so blindly setting PCD or PWT on those pages is wrong.
49 	 * For Pentiums and earlier, the surround logic should disable
50 	 * caching for the high addresses through the KEN pin, but
51 	 * we maintain the tradition of paranoia in this code.
52 	 */
53 	if (file->f_flags & O_SYNC)
54 		return 1;
55  	return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
56 		  test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
57 		  test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
58 		  test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) )
59 	  && addr >= __pa(high_memory);
60 #elif defined(__x86_64__)
61 	/*
62 	 * This is broken because it can generate memory type aliases,
63 	 * which can cause cache corruptions
64 	 * But it is only available for root and we have to be bug-to-bug
65 	 * compatible with i386.
66 	 */
67 	if (file->f_flags & O_SYNC)
68 		return 1;
69 	/* same behaviour as i386. PAT always set to cached and MTRRs control the
70 	   caching behaviour.
71 	   Hopefully a full PAT implementation will fix that soon. */
72 	return 0;
73 #elif defined(CONFIG_IA64)
74 	/*
75 	 * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases.
76 	 */
77 	return !(efi_mem_attributes(addr) & EFI_MEMORY_WB);
78 #elif defined(CONFIG_MIPS)
79 	{
80 		extern int __uncached_access(struct file *file,
81 					     unsigned long addr);
82 
83 		return __uncached_access(file, addr);
84 	}
85 #else
86 	/*
87 	 * Accessing memory above the top the kernel knows about or through a file pointer
88 	 * that was marked O_SYNC will be done non-cached.
89 	 */
90 	if (file->f_flags & O_SYNC)
91 		return 1;
92 	return addr >= __pa(high_memory);
93 #endif
94 }
95 
96 #ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE
97 static inline int valid_phys_addr_range(unsigned long addr, size_t count)
98 {
99 	if (addr + count > __pa(high_memory))
100 		return 0;
101 
102 	return 1;
103 }
104 
105 static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size)
106 {
107 	return 1;
108 }
109 #endif
110 
111 /*
112  * This funcion reads the *physical* memory. The f_pos points directly to the
113  * memory location.
114  */
115 static ssize_t read_mem(struct file * file, char __user * buf,
116 			size_t count, loff_t *ppos)
117 {
118 	unsigned long p = *ppos;
119 	ssize_t read, sz;
120 	char *ptr;
121 
122 	if (!valid_phys_addr_range(p, count))
123 		return -EFAULT;
124 	read = 0;
125 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
126 	/* we don't have page 0 mapped on sparc and m68k.. */
127 	if (p < PAGE_SIZE) {
128 		sz = PAGE_SIZE - p;
129 		if (sz > count)
130 			sz = count;
131 		if (sz > 0) {
132 			if (clear_user(buf, sz))
133 				return -EFAULT;
134 			buf += sz;
135 			p += sz;
136 			count -= sz;
137 			read += sz;
138 		}
139 	}
140 #endif
141 
142 	while (count > 0) {
143 		/*
144 		 * Handle first page in case it's not aligned
145 		 */
146 		if (-p & (PAGE_SIZE - 1))
147 			sz = -p & (PAGE_SIZE - 1);
148 		else
149 			sz = PAGE_SIZE;
150 
151 		sz = min_t(unsigned long, sz, count);
152 
153 		/*
154 		 * On ia64 if a page has been mapped somewhere as
155 		 * uncached, then it must also be accessed uncached
156 		 * by the kernel or data corruption may occur
157 		 */
158 		ptr = xlate_dev_mem_ptr(p);
159 
160 		if (copy_to_user(buf, ptr, sz))
161 			return -EFAULT;
162 		buf += sz;
163 		p += sz;
164 		count -= sz;
165 		read += sz;
166 	}
167 
168 	*ppos += read;
169 	return read;
170 }
171 
172 static ssize_t write_mem(struct file * file, const char __user * buf,
173 			 size_t count, loff_t *ppos)
174 {
175 	unsigned long p = *ppos;
176 	ssize_t written, sz;
177 	unsigned long copied;
178 	void *ptr;
179 
180 	if (!valid_phys_addr_range(p, count))
181 		return -EFAULT;
182 
183 	written = 0;
184 
185 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
186 	/* we don't have page 0 mapped on sparc and m68k.. */
187 	if (p < PAGE_SIZE) {
188 		unsigned long sz = PAGE_SIZE - p;
189 		if (sz > count)
190 			sz = count;
191 		/* Hmm. Do something? */
192 		buf += sz;
193 		p += sz;
194 		count -= sz;
195 		written += sz;
196 	}
197 #endif
198 
199 	while (count > 0) {
200 		/*
201 		 * Handle first page in case it's not aligned
202 		 */
203 		if (-p & (PAGE_SIZE - 1))
204 			sz = -p & (PAGE_SIZE - 1);
205 		else
206 			sz = PAGE_SIZE;
207 
208 		sz = min_t(unsigned long, sz, count);
209 
210 		/*
211 		 * On ia64 if a page has been mapped somewhere as
212 		 * uncached, then it must also be accessed uncached
213 		 * by the kernel or data corruption may occur
214 		 */
215 		ptr = xlate_dev_mem_ptr(p);
216 
217 		copied = copy_from_user(ptr, buf, sz);
218 		if (copied) {
219 			written += sz - copied;
220 			if (written)
221 				break;
222 			return -EFAULT;
223 		}
224 		buf += sz;
225 		p += sz;
226 		count -= sz;
227 		written += sz;
228 	}
229 
230 	*ppos += written;
231 	return written;
232 }
233 
234 #ifndef __HAVE_PHYS_MEM_ACCESS_PROT
235 static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
236 				     unsigned long size, pgprot_t vma_prot)
237 {
238 #ifdef pgprot_noncached
239 	unsigned long offset = pfn << PAGE_SHIFT;
240 
241 	if (uncached_access(file, offset))
242 		return pgprot_noncached(vma_prot);
243 #endif
244 	return vma_prot;
245 }
246 #endif
247 
248 #ifndef CONFIG_MMU
249 static unsigned long get_unmapped_area_mem(struct file *file,
250 					   unsigned long addr,
251 					   unsigned long len,
252 					   unsigned long pgoff,
253 					   unsigned long flags)
254 {
255 	if (!valid_mmap_phys_addr_range(pgoff, len))
256 		return (unsigned long) -EINVAL;
257 	return pgoff << PAGE_SHIFT;
258 }
259 
260 /* can't do an in-place private mapping if there's no MMU */
261 static inline int private_mapping_ok(struct vm_area_struct *vma)
262 {
263 	return vma->vm_flags & VM_MAYSHARE;
264 }
265 #else
266 #define get_unmapped_area_mem	NULL
267 
268 static inline int private_mapping_ok(struct vm_area_struct *vma)
269 {
270 	return 1;
271 }
272 #endif
273 
274 static int mmap_mem(struct file * file, struct vm_area_struct * vma)
275 {
276 	size_t size = vma->vm_end - vma->vm_start;
277 
278 	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
279 		return -EINVAL;
280 
281 	if (!private_mapping_ok(vma))
282 		return -ENOSYS;
283 
284 	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
285 						 size,
286 						 vma->vm_page_prot);
287 
288 	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
289 	if (remap_pfn_range(vma,
290 			    vma->vm_start,
291 			    vma->vm_pgoff,
292 			    size,
293 			    vma->vm_page_prot))
294 		return -EAGAIN;
295 	return 0;
296 }
297 
298 static int mmap_kmem(struct file * file, struct vm_area_struct * vma)
299 {
300 	unsigned long pfn;
301 
302 	/* Turn a kernel-virtual address into a physical page frame */
303 	pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
304 
305 	/*
306 	 * RED-PEN: on some architectures there is more mapped memory
307 	 * than available in mem_map which pfn_valid checks
308 	 * for. Perhaps should add a new macro here.
309 	 *
310 	 * RED-PEN: vmalloc is not supported right now.
311 	 */
312 	if (!pfn_valid(pfn))
313 		return -EIO;
314 
315 	vma->vm_pgoff = pfn;
316 	return mmap_mem(file, vma);
317 }
318 
319 #ifdef CONFIG_CRASH_DUMP
320 /*
321  * Read memory corresponding to the old kernel.
322  */
323 static ssize_t read_oldmem(struct file *file, char __user *buf,
324 				size_t count, loff_t *ppos)
325 {
326 	unsigned long pfn, offset;
327 	size_t read = 0, csize;
328 	int rc = 0;
329 
330 	while (count) {
331 		pfn = *ppos / PAGE_SIZE;
332 		if (pfn > saved_max_pfn)
333 			return read;
334 
335 		offset = (unsigned long)(*ppos % PAGE_SIZE);
336 		if (count > PAGE_SIZE - offset)
337 			csize = PAGE_SIZE - offset;
338 		else
339 			csize = count;
340 
341 		rc = copy_oldmem_page(pfn, buf, csize, offset, 1);
342 		if (rc < 0)
343 			return rc;
344 		buf += csize;
345 		*ppos += csize;
346 		read += csize;
347 		count -= csize;
348 	}
349 	return read;
350 }
351 #endif
352 
353 extern long vread(char *buf, char *addr, unsigned long count);
354 extern long vwrite(char *buf, char *addr, unsigned long count);
355 
356 /*
357  * This function reads the *virtual* memory as seen by the kernel.
358  */
359 static ssize_t read_kmem(struct file *file, char __user *buf,
360 			 size_t count, loff_t *ppos)
361 {
362 	unsigned long p = *ppos;
363 	ssize_t low_count, read, sz;
364 	char * kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
365 
366 	read = 0;
367 	if (p < (unsigned long) high_memory) {
368 		low_count = count;
369 		if (count > (unsigned long) high_memory - p)
370 			low_count = (unsigned long) high_memory - p;
371 
372 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
373 		/* we don't have page 0 mapped on sparc and m68k.. */
374 		if (p < PAGE_SIZE && low_count > 0) {
375 			size_t tmp = PAGE_SIZE - p;
376 			if (tmp > low_count) tmp = low_count;
377 			if (clear_user(buf, tmp))
378 				return -EFAULT;
379 			buf += tmp;
380 			p += tmp;
381 			read += tmp;
382 			low_count -= tmp;
383 			count -= tmp;
384 		}
385 #endif
386 		while (low_count > 0) {
387 			/*
388 			 * Handle first page in case it's not aligned
389 			 */
390 			if (-p & (PAGE_SIZE - 1))
391 				sz = -p & (PAGE_SIZE - 1);
392 			else
393 				sz = PAGE_SIZE;
394 
395 			sz = min_t(unsigned long, sz, low_count);
396 
397 			/*
398 			 * On ia64 if a page has been mapped somewhere as
399 			 * uncached, then it must also be accessed uncached
400 			 * by the kernel or data corruption may occur
401 			 */
402 			kbuf = xlate_dev_kmem_ptr((char *)p);
403 
404 			if (copy_to_user(buf, kbuf, sz))
405 				return -EFAULT;
406 			buf += sz;
407 			p += sz;
408 			read += sz;
409 			low_count -= sz;
410 			count -= sz;
411 		}
412 	}
413 
414 	if (count > 0) {
415 		kbuf = (char *)__get_free_page(GFP_KERNEL);
416 		if (!kbuf)
417 			return -ENOMEM;
418 		while (count > 0) {
419 			int len = count;
420 
421 			if (len > PAGE_SIZE)
422 				len = PAGE_SIZE;
423 			len = vread(kbuf, (char *)p, len);
424 			if (!len)
425 				break;
426 			if (copy_to_user(buf, kbuf, len)) {
427 				free_page((unsigned long)kbuf);
428 				return -EFAULT;
429 			}
430 			count -= len;
431 			buf += len;
432 			read += len;
433 			p += len;
434 		}
435 		free_page((unsigned long)kbuf);
436 	}
437  	*ppos = p;
438  	return read;
439 }
440 
441 
442 static inline ssize_t
443 do_write_kmem(void *p, unsigned long realp, const char __user * buf,
444 	      size_t count, loff_t *ppos)
445 {
446 	ssize_t written, sz;
447 	unsigned long copied;
448 
449 	written = 0;
450 #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
451 	/* we don't have page 0 mapped on sparc and m68k.. */
452 	if (realp < PAGE_SIZE) {
453 		unsigned long sz = PAGE_SIZE - realp;
454 		if (sz > count)
455 			sz = count;
456 		/* Hmm. Do something? */
457 		buf += sz;
458 		p += sz;
459 		realp += sz;
460 		count -= sz;
461 		written += sz;
462 	}
463 #endif
464 
465 	while (count > 0) {
466 		char *ptr;
467 		/*
468 		 * Handle first page in case it's not aligned
469 		 */
470 		if (-realp & (PAGE_SIZE - 1))
471 			sz = -realp & (PAGE_SIZE - 1);
472 		else
473 			sz = PAGE_SIZE;
474 
475 		sz = min_t(unsigned long, sz, count);
476 
477 		/*
478 		 * On ia64 if a page has been mapped somewhere as
479 		 * uncached, then it must also be accessed uncached
480 		 * by the kernel or data corruption may occur
481 		 */
482 		ptr = xlate_dev_kmem_ptr(p);
483 
484 		copied = copy_from_user(ptr, buf, sz);
485 		if (copied) {
486 			written += sz - copied;
487 			if (written)
488 				break;
489 			return -EFAULT;
490 		}
491 		buf += sz;
492 		p += sz;
493 		realp += sz;
494 		count -= sz;
495 		written += sz;
496 	}
497 
498 	*ppos += written;
499 	return written;
500 }
501 
502 
503 /*
504  * This function writes to the *virtual* memory as seen by the kernel.
505  */
506 static ssize_t write_kmem(struct file * file, const char __user * buf,
507 			  size_t count, loff_t *ppos)
508 {
509 	unsigned long p = *ppos;
510 	ssize_t wrote = 0;
511 	ssize_t virtr = 0;
512 	ssize_t written;
513 	char * kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
514 
515 	if (p < (unsigned long) high_memory) {
516 
517 		wrote = count;
518 		if (count > (unsigned long) high_memory - p)
519 			wrote = (unsigned long) high_memory - p;
520 
521 		written = do_write_kmem((void*)p, p, buf, wrote, ppos);
522 		if (written != wrote)
523 			return written;
524 		wrote = written;
525 		p += wrote;
526 		buf += wrote;
527 		count -= wrote;
528 	}
529 
530 	if (count > 0) {
531 		kbuf = (char *)__get_free_page(GFP_KERNEL);
532 		if (!kbuf)
533 			return wrote ? wrote : -ENOMEM;
534 		while (count > 0) {
535 			int len = count;
536 
537 			if (len > PAGE_SIZE)
538 				len = PAGE_SIZE;
539 			if (len) {
540 				written = copy_from_user(kbuf, buf, len);
541 				if (written) {
542 					if (wrote + virtr)
543 						break;
544 					free_page((unsigned long)kbuf);
545 					return -EFAULT;
546 				}
547 			}
548 			len = vwrite(kbuf, (char *)p, len);
549 			count -= len;
550 			buf += len;
551 			virtr += len;
552 			p += len;
553 		}
554 		free_page((unsigned long)kbuf);
555 	}
556 
557  	*ppos = p;
558  	return virtr + wrote;
559 }
560 
561 #ifdef CONFIG_DEVPORT
562 static ssize_t read_port(struct file * file, char __user * buf,
563 			 size_t count, loff_t *ppos)
564 {
565 	unsigned long i = *ppos;
566 	char __user *tmp = buf;
567 
568 	if (!access_ok(VERIFY_WRITE, buf, count))
569 		return -EFAULT;
570 	while (count-- > 0 && i < 65536) {
571 		if (__put_user(inb(i),tmp) < 0)
572 			return -EFAULT;
573 		i++;
574 		tmp++;
575 	}
576 	*ppos = i;
577 	return tmp-buf;
578 }
579 
580 static ssize_t write_port(struct file * file, const char __user * buf,
581 			  size_t count, loff_t *ppos)
582 {
583 	unsigned long i = *ppos;
584 	const char __user * tmp = buf;
585 
586 	if (!access_ok(VERIFY_READ,buf,count))
587 		return -EFAULT;
588 	while (count-- > 0 && i < 65536) {
589 		char c;
590 		if (__get_user(c, tmp)) {
591 			if (tmp > buf)
592 				break;
593 			return -EFAULT;
594 		}
595 		outb(c,i);
596 		i++;
597 		tmp++;
598 	}
599 	*ppos = i;
600 	return tmp-buf;
601 }
602 #endif
603 
604 static ssize_t read_null(struct file * file, char __user * buf,
605 			 size_t count, loff_t *ppos)
606 {
607 	return 0;
608 }
609 
610 static ssize_t write_null(struct file * file, const char __user * buf,
611 			  size_t count, loff_t *ppos)
612 {
613 	return count;
614 }
615 
616 static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
617 			struct splice_desc *sd)
618 {
619 	return sd->len;
620 }
621 
622 static ssize_t splice_write_null(struct pipe_inode_info *pipe,struct file *out,
623 				 loff_t *ppos, size_t len, unsigned int flags)
624 {
625 	return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null);
626 }
627 
628 #ifdef CONFIG_MMU
629 /*
630  * For fun, we are using the MMU for this.
631  */
632 static inline size_t read_zero_pagealigned(char __user * buf, size_t size)
633 {
634 	struct mm_struct *mm;
635 	struct vm_area_struct * vma;
636 	unsigned long addr=(unsigned long)buf;
637 
638 	mm = current->mm;
639 	/* Oops, this was forgotten before. -ben */
640 	down_read(&mm->mmap_sem);
641 
642 	/* For private mappings, just map in zero pages. */
643 	for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
644 		unsigned long count;
645 
646 		if (vma->vm_start > addr || (vma->vm_flags & VM_WRITE) == 0)
647 			goto out_up;
648 		if (vma->vm_flags & (VM_SHARED | VM_HUGETLB))
649 			break;
650 		count = vma->vm_end - addr;
651 		if (count > size)
652 			count = size;
653 
654 		zap_page_range(vma, addr, count, NULL);
655         	if (zeromap_page_range(vma, addr, count, PAGE_COPY))
656 			break;
657 
658 		size -= count;
659 		buf += count;
660 		addr += count;
661 		if (size == 0)
662 			goto out_up;
663 	}
664 
665 	up_read(&mm->mmap_sem);
666 
667 	/* The shared case is hard. Let's do the conventional zeroing. */
668 	do {
669 		unsigned long unwritten = clear_user(buf, PAGE_SIZE);
670 		if (unwritten)
671 			return size + unwritten - PAGE_SIZE;
672 		cond_resched();
673 		buf += PAGE_SIZE;
674 		size -= PAGE_SIZE;
675 	} while (size);
676 
677 	return size;
678 out_up:
679 	up_read(&mm->mmap_sem);
680 	return size;
681 }
682 
683 static ssize_t read_zero(struct file * file, char __user * buf,
684 			 size_t count, loff_t *ppos)
685 {
686 	unsigned long left, unwritten, written = 0;
687 
688 	if (!count)
689 		return 0;
690 
691 	if (!access_ok(VERIFY_WRITE, buf, count))
692 		return -EFAULT;
693 
694 	left = count;
695 
696 	/* do we want to be clever? Arbitrary cut-off */
697 	if (count >= PAGE_SIZE*4) {
698 		unsigned long partial;
699 
700 		/* How much left of the page? */
701 		partial = (PAGE_SIZE-1) & -(unsigned long) buf;
702 		unwritten = clear_user(buf, partial);
703 		written = partial - unwritten;
704 		if (unwritten)
705 			goto out;
706 		left -= partial;
707 		buf += partial;
708 		unwritten = read_zero_pagealigned(buf, left & PAGE_MASK);
709 		written += (left & PAGE_MASK) - unwritten;
710 		if (unwritten)
711 			goto out;
712 		buf += left & PAGE_MASK;
713 		left &= ~PAGE_MASK;
714 	}
715 	unwritten = clear_user(buf, left);
716 	written += left - unwritten;
717 out:
718 	return written ? written : -EFAULT;
719 }
720 
721 static int mmap_zero(struct file * file, struct vm_area_struct * vma)
722 {
723 	int err;
724 
725 	if (vma->vm_flags & VM_SHARED)
726 		return shmem_zero_setup(vma);
727 	err = zeromap_page_range(vma, vma->vm_start,
728 			vma->vm_end - vma->vm_start, vma->vm_page_prot);
729 	BUG_ON(err == -EEXIST);
730 	return err;
731 }
732 #else /* CONFIG_MMU */
733 static ssize_t read_zero(struct file * file, char * buf,
734 			 size_t count, loff_t *ppos)
735 {
736 	size_t todo = count;
737 
738 	while (todo) {
739 		size_t chunk = todo;
740 
741 		if (chunk > 4096)
742 			chunk = 4096;	/* Just for latency reasons */
743 		if (clear_user(buf, chunk))
744 			return -EFAULT;
745 		buf += chunk;
746 		todo -= chunk;
747 		cond_resched();
748 	}
749 	return count;
750 }
751 
752 static int mmap_zero(struct file * file, struct vm_area_struct * vma)
753 {
754 	return -ENOSYS;
755 }
756 #endif /* CONFIG_MMU */
757 
758 static ssize_t write_full(struct file * file, const char __user * buf,
759 			  size_t count, loff_t *ppos)
760 {
761 	return -ENOSPC;
762 }
763 
764 /*
765  * Special lseek() function for /dev/null and /dev/zero.  Most notably, you
766  * can fopen() both devices with "a" now.  This was previously impossible.
767  * -- SRB.
768  */
769 
770 static loff_t null_lseek(struct file * file, loff_t offset, int orig)
771 {
772 	return file->f_pos = 0;
773 }
774 
775 /*
776  * The memory devices use the full 32/64 bits of the offset, and so we cannot
777  * check against negative addresses: they are ok. The return value is weird,
778  * though, in that case (0).
779  *
780  * also note that seeking relative to the "end of file" isn't supported:
781  * it has no meaning, so it returns -EINVAL.
782  */
783 static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
784 {
785 	loff_t ret;
786 
787 	mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
788 	switch (orig) {
789 		case 0:
790 			file->f_pos = offset;
791 			ret = file->f_pos;
792 			force_successful_syscall_return();
793 			break;
794 		case 1:
795 			file->f_pos += offset;
796 			ret = file->f_pos;
797 			force_successful_syscall_return();
798 			break;
799 		default:
800 			ret = -EINVAL;
801 	}
802 	mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
803 	return ret;
804 }
805 
806 static int open_port(struct inode * inode, struct file * filp)
807 {
808 	return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
809 }
810 
811 #define zero_lseek	null_lseek
812 #define full_lseek      null_lseek
813 #define write_zero	write_null
814 #define read_full       read_zero
815 #define open_mem	open_port
816 #define open_kmem	open_mem
817 #define open_oldmem	open_mem
818 
819 static const struct file_operations mem_fops = {
820 	.llseek		= memory_lseek,
821 	.read		= read_mem,
822 	.write		= write_mem,
823 	.mmap		= mmap_mem,
824 	.open		= open_mem,
825 	.get_unmapped_area = get_unmapped_area_mem,
826 };
827 
828 static const struct file_operations kmem_fops = {
829 	.llseek		= memory_lseek,
830 	.read		= read_kmem,
831 	.write		= write_kmem,
832 	.mmap		= mmap_kmem,
833 	.open		= open_kmem,
834 	.get_unmapped_area = get_unmapped_area_mem,
835 };
836 
837 static const struct file_operations null_fops = {
838 	.llseek		= null_lseek,
839 	.read		= read_null,
840 	.write		= write_null,
841 	.splice_write	= splice_write_null,
842 };
843 
844 #ifdef CONFIG_DEVPORT
845 static const struct file_operations port_fops = {
846 	.llseek		= memory_lseek,
847 	.read		= read_port,
848 	.write		= write_port,
849 	.open		= open_port,
850 };
851 #endif
852 
853 static const struct file_operations zero_fops = {
854 	.llseek		= zero_lseek,
855 	.read		= read_zero,
856 	.write		= write_zero,
857 	.mmap		= mmap_zero,
858 };
859 
860 /*
861  * capabilities for /dev/zero
862  * - permits private mappings, "copies" are taken of the source of zeros
863  */
864 static struct backing_dev_info zero_bdi = {
865 	.capabilities	= BDI_CAP_MAP_COPY,
866 };
867 
868 static const struct file_operations full_fops = {
869 	.llseek		= full_lseek,
870 	.read		= read_full,
871 	.write		= write_full,
872 };
873 
874 #ifdef CONFIG_CRASH_DUMP
875 static const struct file_operations oldmem_fops = {
876 	.read	= read_oldmem,
877 	.open	= open_oldmem,
878 };
879 #endif
880 
881 static ssize_t kmsg_write(struct file * file, const char __user * buf,
882 			  size_t count, loff_t *ppos)
883 {
884 	char *tmp;
885 	ssize_t ret;
886 
887 	tmp = kmalloc(count + 1, GFP_KERNEL);
888 	if (tmp == NULL)
889 		return -ENOMEM;
890 	ret = -EFAULT;
891 	if (!copy_from_user(tmp, buf, count)) {
892 		tmp[count] = 0;
893 		ret = printk("%s", tmp);
894 		if (ret > count)
895 			/* printk can add a prefix */
896 			ret = count;
897 	}
898 	kfree(tmp);
899 	return ret;
900 }
901 
902 static const struct file_operations kmsg_fops = {
903 	.write =	kmsg_write,
904 };
905 
906 static int memory_open(struct inode * inode, struct file * filp)
907 {
908 	switch (iminor(inode)) {
909 		case 1:
910 			filp->f_op = &mem_fops;
911 			filp->f_mapping->backing_dev_info =
912 				&directly_mappable_cdev_bdi;
913 			break;
914 		case 2:
915 			filp->f_op = &kmem_fops;
916 			filp->f_mapping->backing_dev_info =
917 				&directly_mappable_cdev_bdi;
918 			break;
919 		case 3:
920 			filp->f_op = &null_fops;
921 			break;
922 #ifdef CONFIG_DEVPORT
923 		case 4:
924 			filp->f_op = &port_fops;
925 			break;
926 #endif
927 		case 5:
928 			filp->f_mapping->backing_dev_info = &zero_bdi;
929 			filp->f_op = &zero_fops;
930 			break;
931 		case 7:
932 			filp->f_op = &full_fops;
933 			break;
934 		case 8:
935 			filp->f_op = &random_fops;
936 			break;
937 		case 9:
938 			filp->f_op = &urandom_fops;
939 			break;
940 		case 11:
941 			filp->f_op = &kmsg_fops;
942 			break;
943 #ifdef CONFIG_CRASH_DUMP
944 		case 12:
945 			filp->f_op = &oldmem_fops;
946 			break;
947 #endif
948 		default:
949 			return -ENXIO;
950 	}
951 	if (filp->f_op && filp->f_op->open)
952 		return filp->f_op->open(inode,filp);
953 	return 0;
954 }
955 
956 static const struct file_operations memory_fops = {
957 	.open		= memory_open,	/* just a selector for the real open */
958 };
959 
960 static const struct {
961 	unsigned int		minor;
962 	char			*name;
963 	umode_t			mode;
964 	const struct file_operations	*fops;
965 } devlist[] = { /* list of minor devices */
966 	{1, "mem",     S_IRUSR | S_IWUSR | S_IRGRP, &mem_fops},
967 	{2, "kmem",    S_IRUSR | S_IWUSR | S_IRGRP, &kmem_fops},
968 	{3, "null",    S_IRUGO | S_IWUGO,           &null_fops},
969 #ifdef CONFIG_DEVPORT
970 	{4, "port",    S_IRUSR | S_IWUSR | S_IRGRP, &port_fops},
971 #endif
972 	{5, "zero",    S_IRUGO | S_IWUGO,           &zero_fops},
973 	{7, "full",    S_IRUGO | S_IWUGO,           &full_fops},
974 	{8, "random",  S_IRUGO | S_IWUSR,           &random_fops},
975 	{9, "urandom", S_IRUGO | S_IWUSR,           &urandom_fops},
976 	{11,"kmsg",    S_IRUGO | S_IWUSR,           &kmsg_fops},
977 #ifdef CONFIG_CRASH_DUMP
978 	{12,"oldmem",    S_IRUSR | S_IWUSR | S_IRGRP, &oldmem_fops},
979 #endif
980 };
981 
982 static struct class *mem_class;
983 
984 static int __init chr_dev_init(void)
985 {
986 	int i;
987 
988 	if (register_chrdev(MEM_MAJOR,"mem",&memory_fops))
989 		printk("unable to get major %d for memory devs\n", MEM_MAJOR);
990 
991 	mem_class = class_create(THIS_MODULE, "mem");
992 	for (i = 0; i < ARRAY_SIZE(devlist); i++)
993 		device_create(mem_class, NULL,
994 			      MKDEV(MEM_MAJOR, devlist[i].minor),
995 			      devlist[i].name);
996 
997 	return 0;
998 }
999 
1000 fs_initcall(chr_dev_init);
1001