xref: /freebsd/contrib/nvi/vi/v_word.c (revision 6683132d54bd6d589889e43dabdc53d35e38a028)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #ifndef lint
13 static const char sccsid[] = "$Id: v_word.c,v 10.7 2011/12/27 00:49:31 zy Exp $";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
19 
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
24 
25 #include "../common/common.h"
26 #include "vi.h"
27 
28 /*
29  * There are two types of "words".  Bigwords are easy -- groups of anything
30  * delimited by whitespace.  Normal words are trickier.  They are either a
31  * group of characters, numbers and underscores, or a group of anything but,
32  * delimited by whitespace.  When for a word, if you're in whitespace, it's
33  * easy, just remove the whitespace and go to the beginning or end of the
34  * word.  Otherwise, figure out if the next character is in a different group.
35  * If it is, go to the beginning or end of that group, otherwise, go to the
36  * beginning or end of the current group.  The historic version of vi didn't
37  * get this right, so, for example, there were cases where "4e" was not the
38  * same as "eeee" -- in particular, single character words, and commands that
39  * began in whitespace were almost always handled incorrectly.  To get it right
40  * you have to resolve the cursor after each search so that the look-ahead to
41  * figure out what type of "word" the cursor is in will be correct.
42  *
43  * Empty lines, and lines that consist of only white-space characters count
44  * as a single word, and the beginning and end of the file counts as an
45  * infinite number of words.
46  *
47  * Movements associated with commands are different than movement commands.
48  * For example, in "abc  def", with the cursor on the 'a', "cw" is from
49  * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
50  * space is discarded from the change movement.  Another example is that,
51  * in the same string, a "cw" on any white space character replaces that
52  * single character, and nothing else.  Ain't nothin' in here that's easy.
53  *
54  * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
55  * would treat groups of empty lines as individual words, i.e. the command
56  * would move the cursor to each new empty line.  The 'e' and 'E' commands
57  * would treat groups of empty lines as a single word, i.e. the first use
58  * would move past the group of lines.  The 'b' command would just beep at
59  * you, or, if you did it from the start of the line as part of a motion
60  * command, go absolutely nuts.  If the lines contained only white-space
61  * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
62  * 'b', 'E' and 'e' commands would treat the group as a single word, and
63  * the 'B' and 'b' commands will treat the lines as individual words.  This
64  * implementation treats all of these cases as a single white-space word.
65  */
66 
67 enum which {BIGWORD, LITTLEWORD};
68 
69 static int bword(SCR *, VICMD *, enum which);
70 static int eword(SCR *, VICMD *, enum which);
71 static int fword(SCR *, VICMD *, enum which);
72 
73 /*
74  * v_wordW -- [count]W
75  *	Move forward a bigword at a time.
76  *
77  * PUBLIC: int v_wordW(SCR *, VICMD *);
78  */
79 int
80 v_wordW(SCR *sp, VICMD *vp)
81 {
82 	return (fword(sp, vp, BIGWORD));
83 }
84 
85 /*
86  * v_wordw -- [count]w
87  *	Move forward a word at a time.
88  *
89  * PUBLIC: int v_wordw(SCR *, VICMD *);
90  */
91 int
92 v_wordw(SCR *sp, VICMD *vp)
93 {
94 	return (fword(sp, vp, LITTLEWORD));
95 }
96 
97 /*
98  * fword --
99  *	Move forward by words.
100  */
101 static int
102 fword(SCR *sp, VICMD *vp, enum which type)
103 {
104 	enum { INWORD, NOTWORD } state;
105 	VCS cs;
106 	u_long cnt;
107 
108 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
109 	cs.cs_lno = vp->m_start.lno;
110 	cs.cs_cno = vp->m_start.cno;
111 	if (cs_init(sp, &cs))
112 		return (1);
113 
114 	/*
115 	 * If in white-space:
116 	 *	If the count is 1, and it's a change command, we're done.
117 	 *	Else, move to the first non-white-space character, which
118 	 *	counts as a single word move.  If it's a motion command,
119 	 *	don't move off the end of the line.
120 	 */
121 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK(cs.cs_ch))) {
122 		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
123 			if (ISCMD(vp->rkp, 'c'))
124 				return (0);
125 			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
126 				if (cs_fspace(sp, &cs))
127 					return (1);
128 				goto ret;
129 			}
130 		}
131 		if (cs_fblank(sp, &cs))
132 			return (1);
133 		--cnt;
134 	}
135 
136 	/*
137 	 * Cyclically move to the next word -- this involves skipping
138 	 * over word characters and then any trailing non-word characters.
139 	 * Note, for the 'w' command, the definition of a word keeps
140 	 * switching.
141 	 */
142 	if (type == BIGWORD)
143 		while (cnt--) {
144 			for (;;) {
145 				if (cs_next(sp, &cs))
146 					return (1);
147 				if (cs.cs_flags == CS_EOF)
148 					goto ret;
149 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
150 					break;
151 			}
152 			/*
153 			 * If a motion command and we're at the end of the
154 			 * last word, we're done.  Delete and yank eat any
155 			 * trailing blanks, but we don't move off the end
156 			 * of the line regardless.
157 			 */
158 			if (cnt == 0 && ISMOTION(vp)) {
159 				if ((ISCMD(vp->rkp, 'd') ||
160 				    ISCMD(vp->rkp, 'y')) &&
161 				    cs_fspace(sp, &cs))
162 					return (1);
163 				break;
164 			}
165 
166 			/* Eat whitespace characters. */
167 			if (cs_fblank(sp, &cs))
168 				return (1);
169 			if (cs.cs_flags == CS_EOF)
170 				goto ret;
171 		}
172 	else
173 		while (cnt--) {
174 			state = cs.cs_flags == 0 &&
175 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
176 			for (;;) {
177 				if (cs_next(sp, &cs))
178 					return (1);
179 				if (cs.cs_flags == CS_EOF)
180 					goto ret;
181 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
182 					break;
183 				if (state == INWORD) {
184 					if (!inword(cs.cs_ch))
185 						break;
186 				} else
187 					if (inword(cs.cs_ch))
188 						break;
189 			}
190 			/* See comment above. */
191 			if (cnt == 0 && ISMOTION(vp)) {
192 				if ((ISCMD(vp->rkp, 'd') ||
193 				    ISCMD(vp->rkp, 'y')) &&
194 				    cs_fspace(sp, &cs))
195 					return (1);
196 				break;
197 			}
198 
199 			/* Eat whitespace characters. */
200 			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
201 				if (cs_fblank(sp, &cs))
202 					return (1);
203 			if (cs.cs_flags == CS_EOF)
204 				goto ret;
205 		}
206 
207 	/*
208 	 * If we didn't move, we must be at EOF.
209 	 *
210 	 * !!!
211 	 * That's okay for motion commands, however.
212 	 */
213 ret:	if (!ISMOTION(vp) &&
214 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
215 		v_eof(sp, &vp->m_start);
216 		return (1);
217 	}
218 
219 	/* Adjust the end of the range for motion commands. */
220 	vp->m_stop.lno = cs.cs_lno;
221 	vp->m_stop.cno = cs.cs_cno;
222 	if (ISMOTION(vp) && cs.cs_flags == 0)
223 		--vp->m_stop.cno;
224 
225 	/*
226 	 * Non-motion commands move to the end of the range.  Delete
227 	 * and yank stay at the start, ignore others.
228 	 */
229 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
230 	return (0);
231 }
232 
233 /*
234  * v_wordE -- [count]E
235  *	Move forward to the end of the bigword.
236  *
237  * PUBLIC: int v_wordE(SCR *, VICMD *);
238  */
239 int
240 v_wordE(SCR *sp, VICMD *vp)
241 {
242 	return (eword(sp, vp, BIGWORD));
243 }
244 
245 /*
246  * v_worde -- [count]e
247  *	Move forward to the end of the word.
248  *
249  * PUBLIC: int v_worde(SCR *, VICMD *);
250  */
251 int
252 v_worde(SCR *sp, VICMD *vp)
253 {
254 	return (eword(sp, vp, LITTLEWORD));
255 }
256 
257 /*
258  * eword --
259  *	Move forward to the end of the word.
260  */
261 static int
262 eword(SCR *sp, VICMD *vp, enum which type)
263 {
264 	enum { INWORD, NOTWORD } state;
265 	VCS cs;
266 	u_long cnt;
267 
268 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
269 	cs.cs_lno = vp->m_start.lno;
270 	cs.cs_cno = vp->m_start.cno;
271 	if (cs_init(sp, &cs))
272 		return (1);
273 
274 	/*
275 	 * !!!
276 	 * If in whitespace, or the next character is whitespace, move past
277 	 * it.  (This doesn't count as a word move.)  Stay at the character
278 	 * past the current one, it sets word "state" for the 'e' command.
279 	 */
280 	if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
281 		if (cs_next(sp, &cs))
282 			return (1);
283 		if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
284 			goto start;
285 	}
286 	if (cs_fblank(sp, &cs))
287 		return (1);
288 
289 	/*
290 	 * Cyclically move to the next word -- this involves skipping
291 	 * over word characters and then any trailing non-word characters.
292 	 * Note, for the 'e' command, the definition of a word keeps
293 	 * switching.
294 	 */
295 start:	if (type == BIGWORD)
296 		while (cnt--) {
297 			for (;;) {
298 				if (cs_next(sp, &cs))
299 					return (1);
300 				if (cs.cs_flags == CS_EOF)
301 					goto ret;
302 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
303 					break;
304 			}
305 			/*
306 			 * When we reach the start of the word after the last
307 			 * word, we're done.  If we changed state, back up one
308 			 * to the end of the previous word.
309 			 */
310 			if (cnt == 0) {
311 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
312 					return (1);
313 				break;
314 			}
315 
316 			/* Eat whitespace characters. */
317 			if (cs_fblank(sp, &cs))
318 				return (1);
319 			if (cs.cs_flags == CS_EOF)
320 				goto ret;
321 		}
322 	else
323 		while (cnt--) {
324 			state = cs.cs_flags == 0 &&
325 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
326 			for (;;) {
327 				if (cs_next(sp, &cs))
328 					return (1);
329 				if (cs.cs_flags == CS_EOF)
330 					goto ret;
331 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
332 					break;
333 				if (state == INWORD) {
334 					if (!inword(cs.cs_ch))
335 						break;
336 				} else
337 					if (inword(cs.cs_ch))
338 						break;
339 			}
340 			/* See comment above. */
341 			if (cnt == 0) {
342 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
343 					return (1);
344 				break;
345 			}
346 
347 			/* Eat whitespace characters. */
348 			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
349 				if (cs_fblank(sp, &cs))
350 					return (1);
351 			if (cs.cs_flags == CS_EOF)
352 				goto ret;
353 		}
354 
355 	/*
356 	 * If we didn't move, we must be at EOF.
357 	 *
358 	 * !!!
359 	 * That's okay for motion commands, however.
360 	 */
361 ret:	if (!ISMOTION(vp) &&
362 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
363 		v_eof(sp, &vp->m_start);
364 		return (1);
365 	}
366 
367 	/* Set the end of the range for motion commands. */
368 	vp->m_stop.lno = cs.cs_lno;
369 	vp->m_stop.cno = cs.cs_cno;
370 
371 	/*
372 	 * Non-motion commands move to the end of the range.
373 	 * Delete and yank stay at the start, ignore others.
374 	 */
375 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
376 	return (0);
377 }
378 
379 /*
380  * v_WordB -- [count]B
381  *	Move backward a bigword at a time.
382  *
383  * PUBLIC: int v_wordB(SCR *, VICMD *);
384  */
385 int
386 v_wordB(SCR *sp, VICMD *vp)
387 {
388 	return (bword(sp, vp, BIGWORD));
389 }
390 
391 /*
392  * v_wordb -- [count]b
393  *	Move backward a word at a time.
394  *
395  * PUBLIC: int v_wordb(SCR *, VICMD *);
396  */
397 int
398 v_wordb(SCR *sp, VICMD *vp)
399 {
400 	return (bword(sp, vp, LITTLEWORD));
401 }
402 
403 /*
404  * bword --
405  *	Move backward by words.
406  */
407 static int
408 bword(SCR *sp, VICMD *vp, enum which type)
409 {
410 	enum { INWORD, NOTWORD } state;
411 	VCS cs;
412 	u_long cnt;
413 
414 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
415 	cs.cs_lno = vp->m_start.lno;
416 	cs.cs_cno = vp->m_start.cno;
417 	if (cs_init(sp, &cs))
418 		return (1);
419 
420 	/*
421 	 * !!!
422 	 * If in whitespace, or the previous character is whitespace, move
423 	 * past it.  (This doesn't count as a word move.)  Stay at the
424 	 * character before the current one, it sets word "state" for the
425 	 * 'b' command.
426 	 */
427 	if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
428 		if (cs_prev(sp, &cs))
429 			return (1);
430 		if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
431 			goto start;
432 	}
433 	if (cs_bblank(sp, &cs))
434 		return (1);
435 
436 	/*
437 	 * Cyclically move to the beginning of the previous word -- this
438 	 * involves skipping over word characters and then any trailing
439 	 * non-word characters.  Note, for the 'b' command, the definition
440 	 * of a word keeps switching.
441 	 */
442 start:	if (type == BIGWORD)
443 		while (cnt--) {
444 			for (;;) {
445 				if (cs_prev(sp, &cs))
446 					return (1);
447 				if (cs.cs_flags == CS_SOF)
448 					goto ret;
449 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
450 					break;
451 			}
452 			/*
453 			 * When we reach the end of the word before the last
454 			 * word, we're done.  If we changed state, move forward
455 			 * one to the end of the next word.
456 			 */
457 			if (cnt == 0) {
458 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
459 					return (1);
460 				break;
461 			}
462 
463 			/* Eat whitespace characters. */
464 			if (cs_bblank(sp, &cs))
465 				return (1);
466 			if (cs.cs_flags == CS_SOF)
467 				goto ret;
468 		}
469 	else
470 		while (cnt--) {
471 			state = cs.cs_flags == 0 &&
472 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
473 			for (;;) {
474 				if (cs_prev(sp, &cs))
475 					return (1);
476 				if (cs.cs_flags == CS_SOF)
477 					goto ret;
478 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
479 					break;
480 				if (state == INWORD) {
481 					if (!inword(cs.cs_ch))
482 						break;
483 				} else
484 					if (inword(cs.cs_ch))
485 						break;
486 			}
487 			/* See comment above. */
488 			if (cnt == 0) {
489 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
490 					return (1);
491 				break;
492 			}
493 
494 			/* Eat whitespace characters. */
495 			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
496 				if (cs_bblank(sp, &cs))
497 					return (1);
498 			if (cs.cs_flags == CS_SOF)
499 				goto ret;
500 		}
501 
502 	/* If we didn't move, we must be at SOF. */
503 ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
504 		v_sof(sp, &vp->m_start);
505 		return (1);
506 	}
507 
508 	/* Set the end of the range for motion commands. */
509 	vp->m_stop.lno = cs.cs_lno;
510 	vp->m_stop.cno = cs.cs_cno;
511 
512 	/*
513 	 * All commands move to the end of the range.  Motion commands
514 	 * adjust the starting point to the character before the current
515 	 * one.
516 	 *
517 	 * !!!
518 	 * The historic vi didn't get this right -- the `yb' command yanked
519 	 * the right stuff and even updated the cursor value, but the cursor
520 	 * was not actually updated on the screen.
521 	 */
522 	vp->m_final = vp->m_stop;
523 	if (ISMOTION(vp))
524 		--vp->m_start.cno;
525 	return (0);
526 }
527