xref: /linux/arch/parisc/lib/io.c (revision 4413e16d9d21673bb5048a2e542f1aaa00015c2e)
1 /*
2  * arch/parisc/lib/io.c
3  *
4  * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
5  * Copyright (c) Randolph Chung 2001 <tausq@debian.org>
6  *
7  * IO accessing functions which shouldn't be inlined because they're too big
8  */
9 
10 #include <linux/kernel.h>
11 #include <linux/module.h>
12 #include <asm/io.h>
13 
14 /* Copies a block of memory to a device in an efficient manner.
15  * Assumes the device can cope with 32-bit transfers.  If it can't,
16  * don't use this function.
17  */
18 void memcpy_toio(volatile void __iomem *dst, const void *src, int count)
19 {
20 	if (((unsigned long)dst & 3) != ((unsigned long)src & 3))
21 		goto bytecopy;
22 	while ((unsigned long)dst & 3) {
23 		writeb(*(char *)src, dst++);
24 		src++;
25 		count--;
26 	}
27 	while (count > 3) {
28 		__raw_writel(*(u32 *)src, dst);
29 		src += 4;
30 		dst += 4;
31 		count -= 4;
32 	}
33  bytecopy:
34 	while (count--) {
35 		writeb(*(char *)src, dst++);
36 		src++;
37 	}
38 }
39 
40 /*
41 ** Copies a block of memory from a device in an efficient manner.
42 ** Assumes the device can cope with 32-bit transfers.  If it can't,
43 ** don't use this function.
44 **
45 ** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
46 **	27341/64    = 427 cyc per int
47 **	61311/128   = 478 cyc per short
48 **	122637/256  = 479 cyc per byte
49 ** Ergo bus latencies dominant (not transfer size).
50 **      Minimize total number of transfers at cost of CPU cycles.
51 **	TODO: only look at src alignment and adjust the stores to dest.
52 */
53 void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
54 {
55 	/* first compare alignment of src/dst */
56 	if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
57 		goto bytecopy;
58 
59 	if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
60 		goto shortcopy;
61 
62 	/* Then check for misaligned start address */
63 	if ((unsigned long)src & 1) {
64 		*(u8 *)dst = readb(src);
65 		src++;
66 		dst++;
67 		count--;
68 		if (count < 2) goto bytecopy;
69 	}
70 
71 	if ((unsigned long)src & 2) {
72 		*(u16 *)dst = __raw_readw(src);
73 		src += 2;
74 		dst += 2;
75 		count -= 2;
76 	}
77 
78 	while (count > 3) {
79 		*(u32 *)dst = __raw_readl(src);
80 		dst += 4;
81 		src += 4;
82 		count -= 4;
83 	}
84 
85  shortcopy:
86 	while (count > 1) {
87 		*(u16 *)dst = __raw_readw(src);
88 		src += 2;
89 		dst += 2;
90 		count -= 2;
91 	}
92 
93  bytecopy:
94 	while (count--) {
95 		*(char *)dst = readb(src);
96 		src++;
97 		dst++;
98 	}
99 }
100 
101 /* Sets a block of memory on a device to a given value.
102  * Assumes the device can cope with 32-bit transfers.  If it can't,
103  * don't use this function.
104  */
105 void memset_io(volatile void __iomem *addr, unsigned char val, int count)
106 {
107 	u32 val32 = (val << 24) | (val << 16) | (val << 8) | val;
108 	while ((unsigned long)addr & 3) {
109 		writeb(val, addr++);
110 		count--;
111 	}
112 	while (count > 3) {
113 		__raw_writel(val32, addr);
114 		addr += 4;
115 		count -= 4;
116 	}
117 	while (count--) {
118 		writeb(val, addr++);
119 	}
120 }
121 
122 /*
123  * Read COUNT 8-bit bytes from port PORT into memory starting at
124  * SRC.
125  */
126 void insb (unsigned long port, void *dst, unsigned long count)
127 {
128 	unsigned char *p;
129 
130 	p = (unsigned char *)dst;
131 
132 	while (((unsigned long)p) & 0x3) {
133 		if (!count)
134 			return;
135 		count--;
136 		*p = inb(port);
137 		p++;
138 	}
139 
140 	while (count >= 4) {
141 		unsigned int w;
142 		count -= 4;
143 		w = inb(port) << 24;
144 		w |= inb(port) << 16;
145 		w |= inb(port) << 8;
146 		w |= inb(port);
147 		*(unsigned int *) p = w;
148 		p += 4;
149 	}
150 
151 	while (count) {
152 		--count;
153 		*p = inb(port);
154 		p++;
155 	}
156 }
157 
158 
159 /*
160  * Read COUNT 16-bit words from port PORT into memory starting at
161  * SRC.  SRC must be at least short aligned.  This is used by the
162  * IDE driver to read disk sectors.  Performance is important, but
163  * the interfaces seems to be slow: just using the inlined version
164  * of the inw() breaks things.
165  */
166 void insw (unsigned long port, void *dst, unsigned long count)
167 {
168 	unsigned int l = 0, l2;
169 	unsigned char *p;
170 
171 	p = (unsigned char *)dst;
172 
173 	if (!count)
174 		return;
175 
176 	switch (((unsigned long)p) & 0x3)
177 	{
178 	 case 0x00:			/* Buffer 32-bit aligned */
179 		while (count>=2) {
180 
181 			count -= 2;
182 			l = cpu_to_le16(inw(port)) << 16;
183 			l |= cpu_to_le16(inw(port));
184 			*(unsigned int *)p = l;
185 			p += 4;
186 		}
187 		if (count) {
188 			*(unsigned short *)p = cpu_to_le16(inw(port));
189 		}
190 		break;
191 
192 	 case 0x02:			/* Buffer 16-bit aligned */
193 		*(unsigned short *)p = cpu_to_le16(inw(port));
194 		p += 2;
195 		count--;
196 		while (count>=2) {
197 
198 			count -= 2;
199 			l = cpu_to_le16(inw(port)) << 16;
200 			l |= cpu_to_le16(inw(port));
201 			*(unsigned int *)p = l;
202 			p += 4;
203 		}
204 		if (count) {
205 			*(unsigned short *)p = cpu_to_le16(inw(port));
206 		}
207 		break;
208 
209 	 case 0x01:			/* Buffer 8-bit aligned */
210 	 case 0x03:
211 		/* I don't bother with 32bit transfers
212 		 * in this case, 16bit will have to do -- DE */
213 		--count;
214 
215 		l = cpu_to_le16(inw(port));
216 		*p = l >> 8;
217 		p++;
218 		while (count--)
219 		{
220 			l2 = cpu_to_le16(inw(port));
221 			*(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
222 			p += 2;
223 			l = l2;
224 		}
225 		*p = l & 0xff;
226 		break;
227 	}
228 }
229 
230 
231 
232 /*
233  * Read COUNT 32-bit words from port PORT into memory starting at
234  * SRC. Now works with any alignment in SRC. Performance is important,
235  * but the interfaces seems to be slow: just using the inlined version
236  * of the inl() breaks things.
237  */
238 void insl (unsigned long port, void *dst, unsigned long count)
239 {
240 	unsigned int l = 0, l2;
241 	unsigned char *p;
242 
243 	p = (unsigned char *)dst;
244 
245 	if (!count)
246 		return;
247 
248 	switch (((unsigned long) dst) & 0x3)
249 	{
250 	 case 0x00:			/* Buffer 32-bit aligned */
251 		while (count--)
252 		{
253 			*(unsigned int *)p = cpu_to_le32(inl(port));
254 			p += 4;
255 		}
256 		break;
257 
258 	 case 0x02:			/* Buffer 16-bit aligned */
259 		--count;
260 
261 		l = cpu_to_le32(inl(port));
262 		*(unsigned short *)p = l >> 16;
263 		p += 2;
264 
265 		while (count--)
266 		{
267 			l2 = cpu_to_le32(inl(port));
268 			*(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
269 			p += 4;
270 			l = l2;
271 		}
272 		*(unsigned short *)p = l & 0xffff;
273 		break;
274 	 case 0x01:			/* Buffer 8-bit aligned */
275 		--count;
276 
277 		l = cpu_to_le32(inl(port));
278 		*(unsigned char *)p = l >> 24;
279 		p++;
280 		*(unsigned short *)p = (l >> 8) & 0xffff;
281 		p += 2;
282 		while (count--)
283 		{
284 			l2 = cpu_to_le32(inl(port));
285 			*(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
286 			p += 4;
287 			l = l2;
288 		}
289 		*p = l & 0xff;
290 		break;
291 	 case 0x03:			/* Buffer 8-bit aligned */
292 		--count;
293 
294 		l = cpu_to_le32(inl(port));
295 		*p = l >> 24;
296 		p++;
297 		while (count--)
298 		{
299 			l2 = cpu_to_le32(inl(port));
300 			*(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
301 			p += 4;
302 			l = l2;
303 		}
304 		*(unsigned short *)p = (l >> 8) & 0xffff;
305 		p += 2;
306 		*p = l & 0xff;
307 		break;
308 	}
309 }
310 
311 
312 /*
313  * Like insb but in the opposite direction.
314  * Don't worry as much about doing aligned memory transfers:
315  * doing byte reads the "slow" way isn't nearly as slow as
316  * doing byte writes the slow way (no r-m-w cycle).
317  */
318 void outsb(unsigned long port, const void * src, unsigned long count)
319 {
320 	const unsigned char *p;
321 
322 	p = (const unsigned char *)src;
323 	while (count) {
324 		count--;
325 		outb(*p, port);
326 		p++;
327 	}
328 }
329 
330 /*
331  * Like insw but in the opposite direction.  This is used by the IDE
332  * driver to write disk sectors.  Performance is important, but the
333  * interfaces seems to be slow: just using the inlined version of the
334  * outw() breaks things.
335  */
336 void outsw (unsigned long port, const void *src, unsigned long count)
337 {
338 	unsigned int l = 0, l2;
339 	const unsigned char *p;
340 
341 	p = (const unsigned char *)src;
342 
343 	if (!count)
344 		return;
345 
346 	switch (((unsigned long)p) & 0x3)
347 	{
348 	 case 0x00:			/* Buffer 32-bit aligned */
349 		while (count>=2) {
350 			count -= 2;
351 			l = *(unsigned int *)p;
352 			p += 4;
353 			outw(le16_to_cpu(l >> 16), port);
354 			outw(le16_to_cpu(l & 0xffff), port);
355 		}
356 		if (count) {
357 			outw(le16_to_cpu(*(unsigned short*)p), port);
358 		}
359 		break;
360 
361 	 case 0x02:			/* Buffer 16-bit aligned */
362 
363 		outw(le16_to_cpu(*(unsigned short*)p), port);
364 		p += 2;
365 		count--;
366 
367 		while (count>=2) {
368 			count -= 2;
369 			l = *(unsigned int *)p;
370 			p += 4;
371 			outw(le16_to_cpu(l >> 16), port);
372 			outw(le16_to_cpu(l & 0xffff), port);
373 		}
374 		if (count) {
375 			outw(le16_to_cpu(*(unsigned short *)p), port);
376 		}
377 		break;
378 
379 	 case 0x01:			/* Buffer 8-bit aligned */
380 		/* I don't bother with 32bit transfers
381 		 * in this case, 16bit will have to do -- DE */
382 
383 		l  = *p << 8;
384 		p++;
385 		count--;
386 		while (count)
387 		{
388 			count--;
389 			l2 = *(unsigned short *)p;
390 			p += 2;
391 			outw(le16_to_cpu(l | l2 >> 8), port);
392 		        l = l2 << 8;
393 		}
394 		l2 = *(unsigned char *)p;
395 		outw (le16_to_cpu(l | l2>>8), port);
396 		break;
397 
398 	}
399 }
400 
401 
402 /*
403  * Like insl but in the opposite direction.  This is used by the IDE
404  * driver to write disk sectors.  Works with any alignment in SRC.
405  *  Performance is important, but the interfaces seems to be slow:
406  * just using the inlined version of the outl() breaks things.
407  */
408 void outsl (unsigned long port, const void *src, unsigned long count)
409 {
410 	unsigned int l = 0, l2;
411 	const unsigned char *p;
412 
413 	p = (const unsigned char *)src;
414 
415 	if (!count)
416 		return;
417 
418 	switch (((unsigned long)p) & 0x3)
419 	{
420 	 case 0x00:			/* Buffer 32-bit aligned */
421 		while (count--)
422 		{
423 			outl(le32_to_cpu(*(unsigned int *)p), port);
424 			p += 4;
425 		}
426 		break;
427 
428 	 case 0x02:			/* Buffer 16-bit aligned */
429 		--count;
430 
431 		l = *(unsigned short *)p;
432 		p += 2;
433 
434 		while (count--)
435 		{
436 			l2 = *(unsigned int *)p;
437 			p += 4;
438 			outl (le32_to_cpu(l << 16 | l2 >> 16), port);
439 			l = l2;
440 		}
441 		l2 = *(unsigned short *)p;
442 		outl (le32_to_cpu(l << 16 | l2), port);
443 		break;
444 	 case 0x01:			/* Buffer 8-bit aligned */
445 		--count;
446 
447 		l = *p << 24;
448 		p++;
449 		l |= *(unsigned short *)p << 8;
450 		p += 2;
451 
452 		while (count--)
453 		{
454 			l2 = *(unsigned int *)p;
455 			p += 4;
456 			outl (le32_to_cpu(l | l2 >> 24), port);
457 			l = l2 << 8;
458 		}
459 		l2 = *p;
460 		outl (le32_to_cpu(l | l2), port);
461 		break;
462 	 case 0x03:			/* Buffer 8-bit aligned */
463 		--count;
464 
465 		l = *p << 24;
466 		p++;
467 
468 		while (count--)
469 		{
470 			l2 = *(unsigned int *)p;
471 			p += 4;
472 			outl (le32_to_cpu(l | l2 >> 8), port);
473 			l = l2 << 24;
474 		}
475 		l2 = *(unsigned short *)p << 16;
476 		p += 2;
477 		l2 |= *p;
478 		outl (le32_to_cpu(l | l2), port);
479 		break;
480 	}
481 }
482 
483 EXPORT_SYMBOL(insb);
484 EXPORT_SYMBOL(insw);
485 EXPORT_SYMBOL(insl);
486 EXPORT_SYMBOL(outsb);
487 EXPORT_SYMBOL(outsw);
488 EXPORT_SYMBOL(outsl);
489