xref: /freebsd/contrib/nvi/vi/v_word.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #ifndef lint
13 static const char sccsid[] = "@(#)v_word.c	10.5 (Berkeley) 3/6/96";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
19 
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
24 
25 #include "../common/common.h"
26 #include "vi.h"
27 
28 /*
29  * There are two types of "words".  Bigwords are easy -- groups of anything
30  * delimited by whitespace.  Normal words are trickier.  They are either a
31  * group of characters, numbers and underscores, or a group of anything but,
32  * delimited by whitespace.  When for a word, if you're in whitespace, it's
33  * easy, just remove the whitespace and go to the beginning or end of the
34  * word.  Otherwise, figure out if the next character is in a different group.
35  * If it is, go to the beginning or end of that group, otherwise, go to the
36  * beginning or end of the current group.  The historic version of vi didn't
37  * get this right, so, for example, there were cases where "4e" was not the
38  * same as "eeee" -- in particular, single character words, and commands that
39  * began in whitespace were almost always handled incorrectly.  To get it right
40  * you have to resolve the cursor after each search so that the look-ahead to
41  * figure out what type of "word" the cursor is in will be correct.
42  *
43  * Empty lines, and lines that consist of only white-space characters count
44  * as a single word, and the beginning and end of the file counts as an
45  * infinite number of words.
46  *
47  * Movements associated with commands are different than movement commands.
48  * For example, in "abc  def", with the cursor on the 'a', "cw" is from
49  * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
50  * space is discarded from the change movement.  Another example is that,
51  * in the same string, a "cw" on any white space character replaces that
52  * single character, and nothing else.  Ain't nothin' in here that's easy.
53  *
54  * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
55  * would treat groups of empty lines as individual words, i.e. the command
56  * would move the cursor to each new empty line.  The 'e' and 'E' commands
57  * would treat groups of empty lines as a single word, i.e. the first use
58  * would move past the group of lines.  The 'b' command would just beep at
59  * you, or, if you did it from the start of the line as part of a motion
60  * command, go absolutely nuts.  If the lines contained only white-space
61  * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
62  * 'b', 'E' and 'e' commands would treat the group as a single word, and
63  * the 'B' and 'b' commands will treat the lines as individual words.  This
64  * implementation treats all of these cases as a single white-space word.
65  */
66 
67 enum which {BIGWORD, LITTLEWORD};
68 
69 static int bword __P((SCR *, VICMD *, enum which));
70 static int eword __P((SCR *, VICMD *, enum which));
71 static int fword __P((SCR *, VICMD *, enum which));
72 
73 /*
74  * v_wordW -- [count]W
75  *	Move forward a bigword at a time.
76  *
77  * PUBLIC: int v_wordW __P((SCR *, VICMD *));
78  */
79 int
80 v_wordW(sp, vp)
81 	SCR *sp;
82 	VICMD *vp;
83 {
84 	return (fword(sp, vp, BIGWORD));
85 }
86 
87 /*
88  * v_wordw -- [count]w
89  *	Move forward a word at a time.
90  *
91  * PUBLIC: int v_wordw __P((SCR *, VICMD *));
92  */
93 int
94 v_wordw(sp, vp)
95 	SCR *sp;
96 	VICMD *vp;
97 {
98 	return (fword(sp, vp, LITTLEWORD));
99 }
100 
101 /*
102  * fword --
103  *	Move forward by words.
104  */
105 static int
106 fword(sp, vp, type)
107 	SCR *sp;
108 	VICMD *vp;
109 	enum which type;
110 {
111 	enum { INWORD, NOTWORD } state;
112 	VCS cs;
113 	u_long cnt;
114 
115 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
116 	cs.cs_lno = vp->m_start.lno;
117 	cs.cs_cno = vp->m_start.cno;
118 	if (cs_init(sp, &cs))
119 		return (1);
120 
121 	/*
122 	 * If in white-space:
123 	 *	If the count is 1, and it's a change command, we're done.
124 	 *	Else, move to the first non-white-space character, which
125 	 *	counts as a single word move.  If it's a motion command,
126 	 *	don't move off the end of the line.
127 	 */
128 	if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) {
129 		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
130 			if (ISCMD(vp->rkp, 'c'))
131 				return (0);
132 			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
133 				if (cs_fspace(sp, &cs))
134 					return (1);
135 				goto ret;
136 			}
137 		}
138 		if (cs_fblank(sp, &cs))
139 			return (1);
140 		--cnt;
141 	}
142 
143 	/*
144 	 * Cyclically move to the next word -- this involves skipping
145 	 * over word characters and then any trailing non-word characters.
146 	 * Note, for the 'w' command, the definition of a word keeps
147 	 * switching.
148 	 */
149 	if (type == BIGWORD)
150 		while (cnt--) {
151 			for (;;) {
152 				if (cs_next(sp, &cs))
153 					return (1);
154 				if (cs.cs_flags == CS_EOF)
155 					goto ret;
156 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
157 					break;
158 			}
159 			/*
160 			 * If a motion command and we're at the end of the
161 			 * last word, we're done.  Delete and yank eat any
162 			 * trailing blanks, but we don't move off the end
163 			 * of the line regardless.
164 			 */
165 			if (cnt == 0 && ISMOTION(vp)) {
166 				if ((ISCMD(vp->rkp, 'd') ||
167 				    ISCMD(vp->rkp, 'y')) &&
168 				    cs_fspace(sp, &cs))
169 					return (1);
170 				break;
171 			}
172 
173 			/* Eat whitespace characters. */
174 			if (cs_fblank(sp, &cs))
175 				return (1);
176 			if (cs.cs_flags == CS_EOF)
177 				goto ret;
178 		}
179 	else
180 		while (cnt--) {
181 			state = cs.cs_flags == 0 &&
182 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
183 			for (;;) {
184 				if (cs_next(sp, &cs))
185 					return (1);
186 				if (cs.cs_flags == CS_EOF)
187 					goto ret;
188 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
189 					break;
190 				if (state == INWORD) {
191 					if (!inword(cs.cs_ch))
192 						break;
193 				} else
194 					if (inword(cs.cs_ch))
195 						break;
196 			}
197 			/* See comment above. */
198 			if (cnt == 0 && ISMOTION(vp)) {
199 				if ((ISCMD(vp->rkp, 'd') ||
200 				    ISCMD(vp->rkp, 'y')) &&
201 				    cs_fspace(sp, &cs))
202 					return (1);
203 				break;
204 			}
205 
206 			/* Eat whitespace characters. */
207 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
208 				if (cs_fblank(sp, &cs))
209 					return (1);
210 			if (cs.cs_flags == CS_EOF)
211 				goto ret;
212 		}
213 
214 	/*
215 	 * If we didn't move, we must be at EOF.
216 	 *
217 	 * !!!
218 	 * That's okay for motion commands, however.
219 	 */
220 ret:	if (!ISMOTION(vp) &&
221 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
222 		v_eof(sp, &vp->m_start);
223 		return (1);
224 	}
225 
226 	/* Adjust the end of the range for motion commands. */
227 	vp->m_stop.lno = cs.cs_lno;
228 	vp->m_stop.cno = cs.cs_cno;
229 	if (ISMOTION(vp) && cs.cs_flags == 0)
230 		--vp->m_stop.cno;
231 
232 	/*
233 	 * Non-motion commands move to the end of the range.  Delete
234 	 * and yank stay at the start, ignore others.
235 	 */
236 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
237 	return (0);
238 }
239 
240 /*
241  * v_wordE -- [count]E
242  *	Move forward to the end of the bigword.
243  *
244  * PUBLIC: int v_wordE __P((SCR *, VICMD *));
245  */
246 int
247 v_wordE(sp, vp)
248 	SCR *sp;
249 	VICMD *vp;
250 {
251 	return (eword(sp, vp, BIGWORD));
252 }
253 
254 /*
255  * v_worde -- [count]e
256  *	Move forward to the end of the word.
257  *
258  * PUBLIC: int v_worde __P((SCR *, VICMD *));
259  */
260 int
261 v_worde(sp, vp)
262 	SCR *sp;
263 	VICMD *vp;
264 {
265 	return (eword(sp, vp, LITTLEWORD));
266 }
267 
268 /*
269  * eword --
270  *	Move forward to the end of the word.
271  */
272 static int
273 eword(sp, vp, type)
274 	SCR *sp;
275 	VICMD *vp;
276 	enum which type;
277 {
278 	enum { INWORD, NOTWORD } state;
279 	VCS cs;
280 	u_long cnt;
281 
282 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
283 	cs.cs_lno = vp->m_start.lno;
284 	cs.cs_cno = vp->m_start.cno;
285 	if (cs_init(sp, &cs))
286 		return (1);
287 
288 	/*
289 	 * !!!
290 	 * If in whitespace, or the next character is whitespace, move past
291 	 * it.  (This doesn't count as a word move.)  Stay at the character
292 	 * past the current one, it sets word "state" for the 'e' command.
293 	 */
294 	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
295 		if (cs_next(sp, &cs))
296 			return (1);
297 		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
298 			goto start;
299 	}
300 	if (cs_fblank(sp, &cs))
301 		return (1);
302 
303 	/*
304 	 * Cyclically move to the next word -- this involves skipping
305 	 * over word characters and then any trailing non-word characters.
306 	 * Note, for the 'e' command, the definition of a word keeps
307 	 * switching.
308 	 */
309 start:	if (type == BIGWORD)
310 		while (cnt--) {
311 			for (;;) {
312 				if (cs_next(sp, &cs))
313 					return (1);
314 				if (cs.cs_flags == CS_EOF)
315 					goto ret;
316 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
317 					break;
318 			}
319 			/*
320 			 * When we reach the start of the word after the last
321 			 * word, we're done.  If we changed state, back up one
322 			 * to the end of the previous word.
323 			 */
324 			if (cnt == 0) {
325 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
326 					return (1);
327 				break;
328 			}
329 
330 			/* Eat whitespace characters. */
331 			if (cs_fblank(sp, &cs))
332 				return (1);
333 			if (cs.cs_flags == CS_EOF)
334 				goto ret;
335 		}
336 	else
337 		while (cnt--) {
338 			state = cs.cs_flags == 0 &&
339 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
340 			for (;;) {
341 				if (cs_next(sp, &cs))
342 					return (1);
343 				if (cs.cs_flags == CS_EOF)
344 					goto ret;
345 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
346 					break;
347 				if (state == INWORD) {
348 					if (!inword(cs.cs_ch))
349 						break;
350 				} else
351 					if (inword(cs.cs_ch))
352 						break;
353 			}
354 			/* See comment above. */
355 			if (cnt == 0) {
356 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
357 					return (1);
358 				break;
359 			}
360 
361 			/* Eat whitespace characters. */
362 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
363 				if (cs_fblank(sp, &cs))
364 					return (1);
365 			if (cs.cs_flags == CS_EOF)
366 				goto ret;
367 		}
368 
369 	/*
370 	 * If we didn't move, we must be at EOF.
371 	 *
372 	 * !!!
373 	 * That's okay for motion commands, however.
374 	 */
375 ret:	if (!ISMOTION(vp) &&
376 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
377 		v_eof(sp, &vp->m_start);
378 		return (1);
379 	}
380 
381 	/* Set the end of the range for motion commands. */
382 	vp->m_stop.lno = cs.cs_lno;
383 	vp->m_stop.cno = cs.cs_cno;
384 
385 	/*
386 	 * Non-motion commands move to the end of the range.
387 	 * Delete and yank stay at the start, ignore others.
388 	 */
389 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
390 	return (0);
391 }
392 
393 /*
394  * v_WordB -- [count]B
395  *	Move backward a bigword at a time.
396  *
397  * PUBLIC: int v_wordB __P((SCR *, VICMD *));
398  */
399 int
400 v_wordB(sp, vp)
401 	SCR *sp;
402 	VICMD *vp;
403 {
404 	return (bword(sp, vp, BIGWORD));
405 }
406 
407 /*
408  * v_wordb -- [count]b
409  *	Move backward a word at a time.
410  *
411  * PUBLIC: int v_wordb __P((SCR *, VICMD *));
412  */
413 int
414 v_wordb(sp, vp)
415 	SCR *sp;
416 	VICMD *vp;
417 {
418 	return (bword(sp, vp, LITTLEWORD));
419 }
420 
421 /*
422  * bword --
423  *	Move backward by words.
424  */
425 static int
426 bword(sp, vp, type)
427 	SCR *sp;
428 	VICMD *vp;
429 	enum which type;
430 {
431 	enum { INWORD, NOTWORD } state;
432 	VCS cs;
433 	u_long cnt;
434 
435 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
436 	cs.cs_lno = vp->m_start.lno;
437 	cs.cs_cno = vp->m_start.cno;
438 	if (cs_init(sp, &cs))
439 		return (1);
440 
441 	/*
442 	 * !!!
443 	 * If in whitespace, or the previous character is whitespace, move
444 	 * past it.  (This doesn't count as a word move.)  Stay at the
445 	 * character before the current one, it sets word "state" for the
446 	 * 'b' command.
447 	 */
448 	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
449 		if (cs_prev(sp, &cs))
450 			return (1);
451 		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
452 			goto start;
453 	}
454 	if (cs_bblank(sp, &cs))
455 		return (1);
456 
457 	/*
458 	 * Cyclically move to the beginning of the previous word -- this
459 	 * involves skipping over word characters and then any trailing
460 	 * non-word characters.  Note, for the 'b' command, the definition
461 	 * of a word keeps switching.
462 	 */
463 start:	if (type == BIGWORD)
464 		while (cnt--) {
465 			for (;;) {
466 				if (cs_prev(sp, &cs))
467 					return (1);
468 				if (cs.cs_flags == CS_SOF)
469 					goto ret;
470 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
471 					break;
472 			}
473 			/*
474 			 * When we reach the end of the word before the last
475 			 * word, we're done.  If we changed state, move forward
476 			 * one to the end of the next word.
477 			 */
478 			if (cnt == 0) {
479 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
480 					return (1);
481 				break;
482 			}
483 
484 			/* Eat whitespace characters. */
485 			if (cs_bblank(sp, &cs))
486 				return (1);
487 			if (cs.cs_flags == CS_SOF)
488 				goto ret;
489 		}
490 	else
491 		while (cnt--) {
492 			state = cs.cs_flags == 0 &&
493 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
494 			for (;;) {
495 				if (cs_prev(sp, &cs))
496 					return (1);
497 				if (cs.cs_flags == CS_SOF)
498 					goto ret;
499 				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
500 					break;
501 				if (state == INWORD) {
502 					if (!inword(cs.cs_ch))
503 						break;
504 				} else
505 					if (inword(cs.cs_ch))
506 						break;
507 			}
508 			/* See comment above. */
509 			if (cnt == 0) {
510 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
511 					return (1);
512 				break;
513 			}
514 
515 			/* Eat whitespace characters. */
516 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
517 				if (cs_bblank(sp, &cs))
518 					return (1);
519 			if (cs.cs_flags == CS_SOF)
520 				goto ret;
521 		}
522 
523 	/* If we didn't move, we must be at SOF. */
524 ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
525 		v_sof(sp, &vp->m_start);
526 		return (1);
527 	}
528 
529 	/* Set the end of the range for motion commands. */
530 	vp->m_stop.lno = cs.cs_lno;
531 	vp->m_stop.cno = cs.cs_cno;
532 
533 	/*
534 	 * All commands move to the end of the range.  Motion commands
535 	 * adjust the starting point to the character before the current
536 	 * one.
537 	 *
538 	 * !!!
539 	 * The historic vi didn't get this right -- the `yb' command yanked
540 	 * the right stuff and even updated the cursor value, but the cursor
541 	 * was not actually updated on the screen.
542 	 */
543 	vp->m_final = vp->m_stop;
544 	if (ISMOTION(vp))
545 		--vp->m_start.cno;
546 	return (0);
547 }
548