xref: /freebsd/contrib/nvi/vi/v_word.c (revision d4eeb02986980bf33dd56c41ceb9fc5f180c0d47)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #include <sys/types.h>
13 #include <sys/queue.h>
14 #include <sys/time.h>
15 
16 #include <bitstring.h>
17 #include <ctype.h>
18 #include <limits.h>
19 #include <stdio.h>
20 
21 #include "../common/common.h"
22 #include "vi.h"
23 
24 /*
25  * There are two types of "words".  Bigwords are easy -- groups of anything
26  * delimited by whitespace.  Normal words are trickier.  They are either a
27  * group of characters, numbers and underscores, or a group of anything but,
28  * delimited by whitespace.  When for a word, if you're in whitespace, it's
29  * easy, just remove the whitespace and go to the beginning or end of the
30  * word.  Otherwise, figure out if the next character is in a different group.
31  * If it is, go to the beginning or end of that group, otherwise, go to the
32  * beginning or end of the current group.  The historic version of vi didn't
33  * get this right, so, for example, there were cases where "4e" was not the
34  * same as "eeee" -- in particular, single character words, and commands that
35  * began in whitespace were almost always handled incorrectly.  To get it right
36  * you have to resolve the cursor after each search so that the look-ahead to
37  * figure out what type of "word" the cursor is in will be correct.
38  *
39  * Empty lines, and lines that consist of only white-space characters count
40  * as a single word, and the beginning and end of the file counts as an
41  * infinite number of words.
42  *
43  * Movements associated with commands are different than movement commands.
44  * For example, in "abc  def", with the cursor on the 'a', "cw" is from
45  * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
46  * space is discarded from the change movement.  Another example is that,
47  * in the same string, a "cw" on any white space character replaces that
48  * single character, and nothing else.  Ain't nothin' in here that's easy.
49  *
50  * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
51  * would treat groups of empty lines as individual words, i.e. the command
52  * would move the cursor to each new empty line.  The 'e' and 'E' commands
53  * would treat groups of empty lines as a single word, i.e. the first use
54  * would move past the group of lines.  The 'b' command would just beep at
55  * you, or, if you did it from the start of the line as part of a motion
56  * command, go absolutely nuts.  If the lines contained only white-space
57  * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
58  * 'b', 'E' and 'e' commands would treat the group as a single word, and
59  * the 'B' and 'b' commands will treat the lines as individual words.  This
60  * implementation treats all of these cases as a single white-space word.
61  */
62 
63 enum which {BIGWORD, LITTLEWORD};
64 
65 static int bword(SCR *, VICMD *, enum which);
66 static int eword(SCR *, VICMD *, enum which);
67 static int fword(SCR *, VICMD *, enum which);
68 
69 /*
70  * v_wordW -- [count]W
71  *	Move forward a bigword at a time.
72  *
73  * PUBLIC: int v_wordW(SCR *, VICMD *);
74  */
75 int
76 v_wordW(SCR *sp, VICMD *vp)
77 {
78 	return (fword(sp, vp, BIGWORD));
79 }
80 
81 /*
82  * v_wordw -- [count]w
83  *	Move forward a word at a time.
84  *
85  * PUBLIC: int v_wordw(SCR *, VICMD *);
86  */
87 int
88 v_wordw(SCR *sp, VICMD *vp)
89 {
90 	return (fword(sp, vp, LITTLEWORD));
91 }
92 
93 /*
94  * fword --
95  *	Move forward by words.
96  */
97 static int
98 fword(SCR *sp, VICMD *vp, enum which type)
99 {
100 	enum { INWORD, NOTWORD } state;
101 	VCS cs;
102 	u_long cnt;
103 
104 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
105 	cs.cs_lno = vp->m_start.lno;
106 	cs.cs_cno = vp->m_start.cno;
107 	if (cs_init(sp, &cs))
108 		return (1);
109 
110 	/*
111 	 * If in white-space:
112 	 *	If the count is 1, and it's a change command, we're done.
113 	 *	Else, move to the first non-white-space character, which
114 	 *	counts as a single word move.  If it's a motion command,
115 	 *	don't move off the end of the line.
116 	 */
117 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK(cs.cs_ch))) {
118 		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
119 			if (ISCMD(vp->rkp, 'c'))
120 				return (0);
121 			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
122 				if (cs_fspace(sp, &cs))
123 					return (1);
124 				goto ret;
125 			}
126 		}
127 		if (cs_fblank(sp, &cs))
128 			return (1);
129 		--cnt;
130 	}
131 
132 	/*
133 	 * Cyclically move to the next word -- this involves skipping
134 	 * over word characters and then any trailing non-word characters.
135 	 * Note, for the 'w' command, the definition of a word keeps
136 	 * switching.
137 	 */
138 	if (type == BIGWORD)
139 		while (cnt--) {
140 			for (;;) {
141 				if (cs_next(sp, &cs))
142 					return (1);
143 				if (cs.cs_flags == CS_EOF)
144 					goto ret;
145 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
146 					break;
147 			}
148 			/*
149 			 * If a motion command and we're at the end of the
150 			 * last word, we're done.  Delete and yank eat any
151 			 * trailing blanks, but we don't move off the end
152 			 * of the line regardless.
153 			 */
154 			if (cnt == 0 && ISMOTION(vp)) {
155 				if ((ISCMD(vp->rkp, 'd') ||
156 				    ISCMD(vp->rkp, 'y')) &&
157 				    cs_fspace(sp, &cs))
158 					return (1);
159 				break;
160 			}
161 
162 			/* Eat whitespace characters. */
163 			if (cs_fblank(sp, &cs))
164 				return (1);
165 			if (cs.cs_flags == CS_EOF)
166 				goto ret;
167 		}
168 	else
169 		while (cnt--) {
170 			state = cs.cs_flags == 0 &&
171 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
172 			for (;;) {
173 				if (cs_next(sp, &cs))
174 					return (1);
175 				if (cs.cs_flags == CS_EOF)
176 					goto ret;
177 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
178 					break;
179 				if (state == INWORD) {
180 					if (!inword(cs.cs_ch))
181 						break;
182 				} else
183 					if (inword(cs.cs_ch))
184 						break;
185 			}
186 			/* See comment above. */
187 			if (cnt == 0 && ISMOTION(vp)) {
188 				if ((ISCMD(vp->rkp, 'd') ||
189 				    ISCMD(vp->rkp, 'y')) &&
190 				    cs_fspace(sp, &cs))
191 					return (1);
192 				break;
193 			}
194 
195 			/* Eat whitespace characters. */
196 			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
197 				if (cs_fblank(sp, &cs))
198 					return (1);
199 			if (cs.cs_flags == CS_EOF)
200 				goto ret;
201 		}
202 
203 	/*
204 	 * If we didn't move, we must be at EOF.
205 	 *
206 	 * !!!
207 	 * That's okay for motion commands, however.
208 	 */
209 ret:	if (!ISMOTION(vp) &&
210 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
211 		v_eof(sp, &vp->m_start);
212 		return (1);
213 	}
214 
215 	/* Adjust the end of the range for motion commands. */
216 	vp->m_stop.lno = cs.cs_lno;
217 	vp->m_stop.cno = cs.cs_cno;
218 	if (ISMOTION(vp) && cs.cs_flags == 0)
219 		--vp->m_stop.cno;
220 
221 	/*
222 	 * Non-motion commands move to the end of the range.  Delete
223 	 * and yank stay at the start, ignore others.
224 	 */
225 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
226 	return (0);
227 }
228 
229 /*
230  * v_wordE -- [count]E
231  *	Move forward to the end of the bigword.
232  *
233  * PUBLIC: int v_wordE(SCR *, VICMD *);
234  */
235 int
236 v_wordE(SCR *sp, VICMD *vp)
237 {
238 	return (eword(sp, vp, BIGWORD));
239 }
240 
241 /*
242  * v_worde -- [count]e
243  *	Move forward to the end of the word.
244  *
245  * PUBLIC: int v_worde(SCR *, VICMD *);
246  */
247 int
248 v_worde(SCR *sp, VICMD *vp)
249 {
250 	return (eword(sp, vp, LITTLEWORD));
251 }
252 
253 /*
254  * eword --
255  *	Move forward to the end of the word.
256  */
257 static int
258 eword(SCR *sp, VICMD *vp, enum which type)
259 {
260 	enum { INWORD, NOTWORD } state;
261 	VCS cs;
262 	u_long cnt;
263 
264 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
265 	cs.cs_lno = vp->m_start.lno;
266 	cs.cs_cno = vp->m_start.cno;
267 	if (cs_init(sp, &cs))
268 		return (1);
269 
270 	/*
271 	 * !!!
272 	 * If in whitespace, or the next character is whitespace, move past
273 	 * it.  (This doesn't count as a word move.)  Stay at the character
274 	 * past the current one, it sets word "state" for the 'e' command.
275 	 */
276 	if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
277 		if (cs_next(sp, &cs))
278 			return (1);
279 		if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
280 			goto start;
281 	}
282 	if (cs_fblank(sp, &cs))
283 		return (1);
284 
285 	/*
286 	 * Cyclically move to the next word -- this involves skipping
287 	 * over word characters and then any trailing non-word characters.
288 	 * Note, for the 'e' command, the definition of a word keeps
289 	 * switching.
290 	 */
291 start:	if (type == BIGWORD)
292 		while (cnt--) {
293 			for (;;) {
294 				if (cs_next(sp, &cs))
295 					return (1);
296 				if (cs.cs_flags == CS_EOF)
297 					goto ret;
298 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
299 					break;
300 			}
301 			/*
302 			 * When we reach the start of the word after the last
303 			 * word, we're done.  If we changed state, back up one
304 			 * to the end of the previous word.
305 			 */
306 			if (cnt == 0) {
307 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
308 					return (1);
309 				break;
310 			}
311 
312 			/* Eat whitespace characters. */
313 			if (cs_fblank(sp, &cs))
314 				return (1);
315 			if (cs.cs_flags == CS_EOF)
316 				goto ret;
317 		}
318 	else
319 		while (cnt--) {
320 			state = cs.cs_flags == 0 &&
321 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
322 			for (;;) {
323 				if (cs_next(sp, &cs))
324 					return (1);
325 				if (cs.cs_flags == CS_EOF)
326 					goto ret;
327 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
328 					break;
329 				if (state == INWORD) {
330 					if (!inword(cs.cs_ch))
331 						break;
332 				} else
333 					if (inword(cs.cs_ch))
334 						break;
335 			}
336 			/* See comment above. */
337 			if (cnt == 0) {
338 				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
339 					return (1);
340 				break;
341 			}
342 
343 			/* Eat whitespace characters. */
344 			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
345 				if (cs_fblank(sp, &cs))
346 					return (1);
347 			if (cs.cs_flags == CS_EOF)
348 				goto ret;
349 		}
350 
351 	/*
352 	 * If we didn't move, we must be at EOF.
353 	 *
354 	 * !!!
355 	 * That's okay for motion commands, however.
356 	 */
357 ret:	if (!ISMOTION(vp) &&
358 	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
359 		v_eof(sp, &vp->m_start);
360 		return (1);
361 	}
362 
363 	/* Set the end of the range for motion commands. */
364 	vp->m_stop.lno = cs.cs_lno;
365 	vp->m_stop.cno = cs.cs_cno;
366 
367 	/*
368 	 * Non-motion commands move to the end of the range.
369 	 * Delete and yank stay at the start, ignore others.
370 	 */
371 	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
372 	return (0);
373 }
374 
375 /*
376  * v_WordB -- [count]B
377  *	Move backward a bigword at a time.
378  *
379  * PUBLIC: int v_wordB(SCR *, VICMD *);
380  */
381 int
382 v_wordB(SCR *sp, VICMD *vp)
383 {
384 	return (bword(sp, vp, BIGWORD));
385 }
386 
387 /*
388  * v_wordb -- [count]b
389  *	Move backward a word at a time.
390  *
391  * PUBLIC: int v_wordb(SCR *, VICMD *);
392  */
393 int
394 v_wordb(SCR *sp, VICMD *vp)
395 {
396 	return (bword(sp, vp, LITTLEWORD));
397 }
398 
399 /*
400  * bword --
401  *	Move backward by words.
402  */
403 static int
404 bword(SCR *sp, VICMD *vp, enum which type)
405 {
406 	enum { INWORD, NOTWORD } state;
407 	VCS cs;
408 	u_long cnt;
409 
410 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
411 	cs.cs_lno = vp->m_start.lno;
412 	cs.cs_cno = vp->m_start.cno;
413 	if (cs_init(sp, &cs))
414 		return (1);
415 
416 	/*
417 	 * !!!
418 	 * If in whitespace, or the previous character is whitespace, move
419 	 * past it.  (This doesn't count as a word move.)  Stay at the
420 	 * character before the current one, it sets word "state" for the
421 	 * 'b' command.
422 	 */
423 	if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
424 		if (cs_prev(sp, &cs))
425 			return (1);
426 		if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
427 			goto start;
428 	}
429 	if (cs_bblank(sp, &cs))
430 		return (1);
431 
432 	/*
433 	 * Cyclically move to the beginning of the previous word -- this
434 	 * involves skipping over word characters and then any trailing
435 	 * non-word characters.  Note, for the 'b' command, the definition
436 	 * of a word keeps switching.
437 	 */
438 start:	if (type == BIGWORD)
439 		while (cnt--) {
440 			for (;;) {
441 				if (cs_prev(sp, &cs))
442 					return (1);
443 				if (cs.cs_flags == CS_SOF)
444 					goto ret;
445 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
446 					break;
447 			}
448 			/*
449 			 * When we reach the end of the word before the last
450 			 * word, we're done.  If we changed state, move forward
451 			 * one to the end of the next word.
452 			 */
453 			if (cnt == 0) {
454 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
455 					return (1);
456 				break;
457 			}
458 
459 			/* Eat whitespace characters. */
460 			if (cs_bblank(sp, &cs))
461 				return (1);
462 			if (cs.cs_flags == CS_SOF)
463 				goto ret;
464 		}
465 	else
466 		while (cnt--) {
467 			state = cs.cs_flags == 0 &&
468 			    inword(cs.cs_ch) ? INWORD : NOTWORD;
469 			for (;;) {
470 				if (cs_prev(sp, &cs))
471 					return (1);
472 				if (cs.cs_flags == CS_SOF)
473 					goto ret;
474 				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
475 					break;
476 				if (state == INWORD) {
477 					if (!inword(cs.cs_ch))
478 						break;
479 				} else
480 					if (inword(cs.cs_ch))
481 						break;
482 			}
483 			/* See comment above. */
484 			if (cnt == 0) {
485 				if (cs.cs_flags == 0 && cs_next(sp, &cs))
486 					return (1);
487 				break;
488 			}
489 
490 			/* Eat whitespace characters. */
491 			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
492 				if (cs_bblank(sp, &cs))
493 					return (1);
494 			if (cs.cs_flags == CS_SOF)
495 				goto ret;
496 		}
497 
498 	/* If we didn't move, we must be at SOF. */
499 ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
500 		v_sof(sp, &vp->m_start);
501 		return (1);
502 	}
503 
504 	/* Set the end of the range for motion commands. */
505 	vp->m_stop.lno = cs.cs_lno;
506 	vp->m_stop.cno = cs.cs_cno;
507 
508 	/*
509 	 * All commands move to the end of the range.  Motion commands
510 	 * adjust the starting point to the character before the current
511 	 * one.
512 	 *
513 	 * !!!
514 	 * The historic vi didn't get this right -- the `yb' command yanked
515 	 * the right stuff and even updated the cursor value, but the cursor
516 	 * was not actually updated on the screen.
517 	 */
518 	vp->m_final = vp->m_stop;
519 	if (ISMOTION(vp))
520 		--vp->m_start.cno;
521 	return (0);
522 }
523