1 /*-
2 * Copyright (c) 1992, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 * Keith Bostic. All rights reserved.
6 *
7 * See the LICENSE file for redistribution information.
8 */
9
10 #include "config.h"
11
12 #include <sys/types.h>
13 #include <sys/queue.h>
14 #include <sys/time.h>
15
16 #include <bitstring.h>
17 #include <ctype.h>
18 #include <limits.h>
19 #include <stdio.h>
20
21 #include "../common/common.h"
22 #include "vi.h"
23
24 /*
25 * There are two types of "words". Bigwords are easy -- groups of anything
26 * delimited by whitespace. Normal words are trickier. They are either a
27 * group of characters, numbers and underscores, or a group of anything but,
28 * delimited by whitespace. When for a word, if you're in whitespace, it's
29 * easy, just remove the whitespace and go to the beginning or end of the
30 * word. Otherwise, figure out if the next character is in a different group.
31 * If it is, go to the beginning or end of that group, otherwise, go to the
32 * beginning or end of the current group. The historic version of vi didn't
33 * get this right, so, for example, there were cases where "4e" was not the
34 * same as "eeee" -- in particular, single character words, and commands that
35 * began in whitespace were almost always handled incorrectly. To get it right
36 * you have to resolve the cursor after each search so that the look-ahead to
37 * figure out what type of "word" the cursor is in will be correct.
38 *
39 * Empty lines, and lines that consist of only white-space characters count
40 * as a single word, and the beginning and end of the file counts as an
41 * infinite number of words.
42 *
43 * Movements associated with commands are different than movement commands.
44 * For example, in "abc def", with the cursor on the 'a', "cw" is from
45 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white
46 * space is discarded from the change movement. Another example is that,
47 * in the same string, a "cw" on any white space character replaces that
48 * single character, and nothing else. Ain't nothin' in here that's easy.
49 *
50 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
51 * would treat groups of empty lines as individual words, i.e. the command
52 * would move the cursor to each new empty line. The 'e' and 'E' commands
53 * would treat groups of empty lines as a single word, i.e. the first use
54 * would move past the group of lines. The 'b' command would just beep at
55 * you, or, if you did it from the start of the line as part of a motion
56 * command, go absolutely nuts. If the lines contained only white-space
57 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
58 * 'b', 'E' and 'e' commands would treat the group as a single word, and
59 * the 'B' and 'b' commands will treat the lines as individual words. This
60 * implementation treats all of these cases as a single white-space word.
61 */
62
63 enum which {BIGWORD, LITTLEWORD};
64
65 static int bword(SCR *, VICMD *, enum which);
66 static int eword(SCR *, VICMD *, enum which);
67 static int fword(SCR *, VICMD *, enum which);
68
69 /*
70 * v_wordW -- [count]W
71 * Move forward a bigword at a time.
72 *
73 * PUBLIC: int v_wordW(SCR *, VICMD *);
74 */
75 int
v_wordW(SCR * sp,VICMD * vp)76 v_wordW(SCR *sp, VICMD *vp)
77 {
78 return (fword(sp, vp, BIGWORD));
79 }
80
81 /*
82 * v_wordw -- [count]w
83 * Move forward a word at a time.
84 *
85 * PUBLIC: int v_wordw(SCR *, VICMD *);
86 */
87 int
v_wordw(SCR * sp,VICMD * vp)88 v_wordw(SCR *sp, VICMD *vp)
89 {
90 return (fword(sp, vp, LITTLEWORD));
91 }
92
93 /*
94 * fword --
95 * Move forward by words.
96 */
97 static int
fword(SCR * sp,VICMD * vp,enum which type)98 fword(SCR *sp, VICMD *vp, enum which type)
99 {
100 enum { INWORD, NOTWORD } state;
101 VCS cs;
102 u_long cnt;
103
104 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
105 cs.cs_lno = vp->m_start.lno;
106 cs.cs_cno = vp->m_start.cno;
107 if (cs_init(sp, &cs))
108 return (1);
109
110 /*
111 * If in white-space:
112 * If the count is 1, and it's a change command, we're done.
113 * Else, move to the first non-white-space character, which
114 * counts as a single word move. If it's a motion command,
115 * don't move off the end of the line.
116 */
117 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK(cs.cs_ch))) {
118 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
119 if (ISCMD(vp->rkp, 'c'))
120 return (0);
121 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
122 if (cs_fspace(sp, &cs))
123 return (1);
124 goto ret;
125 }
126 }
127 if (cs_fblank(sp, &cs))
128 return (1);
129 --cnt;
130 }
131
132 /*
133 * Cyclically move to the next word -- this involves skipping
134 * over word characters and then any trailing non-word characters.
135 * Note, for the 'w' command, the definition of a word keeps
136 * switching.
137 */
138 if (type == BIGWORD)
139 while (cnt--) {
140 for (;;) {
141 if (cs_next(sp, &cs))
142 return (1);
143 if (cs.cs_flags == CS_EOF)
144 goto ret;
145 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
146 break;
147 }
148 /*
149 * If a motion command and we're at the end of the
150 * last word, we're done. Delete and yank eat any
151 * trailing blanks, but we don't move off the end
152 * of the line regardless.
153 */
154 if (cnt == 0 && ISMOTION(vp)) {
155 if ((ISCMD(vp->rkp, 'd') ||
156 ISCMD(vp->rkp, 'y')) &&
157 cs_fspace(sp, &cs))
158 return (1);
159 break;
160 }
161
162 /* Eat whitespace characters. */
163 if (cs_fblank(sp, &cs))
164 return (1);
165 if (cs.cs_flags == CS_EOF)
166 goto ret;
167 }
168 else
169 while (cnt--) {
170 state = cs.cs_flags == 0 &&
171 inword(cs.cs_ch) ? INWORD : NOTWORD;
172 for (;;) {
173 if (cs_next(sp, &cs))
174 return (1);
175 if (cs.cs_flags == CS_EOF)
176 goto ret;
177 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
178 break;
179 if (state == INWORD) {
180 if (!inword(cs.cs_ch))
181 break;
182 } else
183 if (inword(cs.cs_ch))
184 break;
185 }
186 /* See comment above. */
187 if (cnt == 0 && ISMOTION(vp)) {
188 if ((ISCMD(vp->rkp, 'd') ||
189 ISCMD(vp->rkp, 'y')) &&
190 cs_fspace(sp, &cs))
191 return (1);
192 break;
193 }
194
195 /* Eat whitespace characters. */
196 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
197 if (cs_fblank(sp, &cs))
198 return (1);
199 if (cs.cs_flags == CS_EOF)
200 goto ret;
201 }
202
203 /*
204 * If we didn't move, we must be at EOF.
205 *
206 * !!!
207 * That's okay for motion commands, however.
208 */
209 ret: if (!ISMOTION(vp) &&
210 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
211 v_eof(sp, &vp->m_start);
212 return (1);
213 }
214
215 /* Adjust the end of the range for motion commands. */
216 vp->m_stop.lno = cs.cs_lno;
217 vp->m_stop.cno = cs.cs_cno;
218 if (ISMOTION(vp) && cs.cs_flags == 0)
219 --vp->m_stop.cno;
220
221 /*
222 * Non-motion commands move to the end of the range. Delete
223 * and yank stay at the start, ignore others.
224 */
225 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
226 return (0);
227 }
228
229 /*
230 * v_wordE -- [count]E
231 * Move forward to the end of the bigword.
232 *
233 * PUBLIC: int v_wordE(SCR *, VICMD *);
234 */
235 int
v_wordE(SCR * sp,VICMD * vp)236 v_wordE(SCR *sp, VICMD *vp)
237 {
238 return (eword(sp, vp, BIGWORD));
239 }
240
241 /*
242 * v_worde -- [count]e
243 * Move forward to the end of the word.
244 *
245 * PUBLIC: int v_worde(SCR *, VICMD *);
246 */
247 int
v_worde(SCR * sp,VICMD * vp)248 v_worde(SCR *sp, VICMD *vp)
249 {
250 return (eword(sp, vp, LITTLEWORD));
251 }
252
253 /*
254 * eword --
255 * Move forward to the end of the word.
256 */
257 static int
eword(SCR * sp,VICMD * vp,enum which type)258 eword(SCR *sp, VICMD *vp, enum which type)
259 {
260 enum { INWORD, NOTWORD } state;
261 VCS cs;
262 u_long cnt;
263
264 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
265 cs.cs_lno = vp->m_start.lno;
266 cs.cs_cno = vp->m_start.cno;
267 if (cs_init(sp, &cs))
268 return (1);
269
270 /*
271 * !!!
272 * If in whitespace, or the next character is whitespace, move past
273 * it. (This doesn't count as a word move.) Stay at the character
274 * past the current one, it sets word "state" for the 'e' command.
275 */
276 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
277 if (cs_next(sp, &cs))
278 return (1);
279 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
280 goto start;
281 }
282 if (cs_fblank(sp, &cs))
283 return (1);
284
285 /*
286 * Cyclically move to the next word -- this involves skipping
287 * over word characters and then any trailing non-word characters.
288 * Note, for the 'e' command, the definition of a word keeps
289 * switching.
290 */
291 start: if (type == BIGWORD)
292 while (cnt--) {
293 for (;;) {
294 if (cs_next(sp, &cs))
295 return (1);
296 if (cs.cs_flags == CS_EOF)
297 goto ret;
298 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
299 break;
300 }
301 /*
302 * When we reach the start of the word after the last
303 * word, we're done. If we changed state, back up one
304 * to the end of the previous word.
305 */
306 if (cnt == 0) {
307 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
308 return (1);
309 break;
310 }
311
312 /* Eat whitespace characters. */
313 if (cs_fblank(sp, &cs))
314 return (1);
315 if (cs.cs_flags == CS_EOF)
316 goto ret;
317 }
318 else
319 while (cnt--) {
320 state = cs.cs_flags == 0 &&
321 inword(cs.cs_ch) ? INWORD : NOTWORD;
322 for (;;) {
323 if (cs_next(sp, &cs))
324 return (1);
325 if (cs.cs_flags == CS_EOF)
326 goto ret;
327 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
328 break;
329 if (state == INWORD) {
330 if (!inword(cs.cs_ch))
331 break;
332 } else
333 if (inword(cs.cs_ch))
334 break;
335 }
336 /* See comment above. */
337 if (cnt == 0) {
338 if (cs.cs_flags == 0 && cs_prev(sp, &cs))
339 return (1);
340 break;
341 }
342
343 /* Eat whitespace characters. */
344 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
345 if (cs_fblank(sp, &cs))
346 return (1);
347 if (cs.cs_flags == CS_EOF)
348 goto ret;
349 }
350
351 /*
352 * If we didn't move, we must be at EOF.
353 *
354 * !!!
355 * That's okay for motion commands, however.
356 */
357 ret: if (!ISMOTION(vp) &&
358 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
359 v_eof(sp, &vp->m_start);
360 return (1);
361 }
362
363 /* Set the end of the range for motion commands. */
364 vp->m_stop.lno = cs.cs_lno;
365 vp->m_stop.cno = cs.cs_cno;
366
367 /*
368 * Non-motion commands move to the end of the range.
369 * Delete and yank stay at the start, ignore others.
370 */
371 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
372 return (0);
373 }
374
375 /*
376 * v_WordB -- [count]B
377 * Move backward a bigword at a time.
378 *
379 * PUBLIC: int v_wordB(SCR *, VICMD *);
380 */
381 int
v_wordB(SCR * sp,VICMD * vp)382 v_wordB(SCR *sp, VICMD *vp)
383 {
384 return (bword(sp, vp, BIGWORD));
385 }
386
387 /*
388 * v_wordb -- [count]b
389 * Move backward a word at a time.
390 *
391 * PUBLIC: int v_wordb(SCR *, VICMD *);
392 */
393 int
v_wordb(SCR * sp,VICMD * vp)394 v_wordb(SCR *sp, VICMD *vp)
395 {
396 return (bword(sp, vp, LITTLEWORD));
397 }
398
399 /*
400 * bword --
401 * Move backward by words.
402 */
403 static int
bword(SCR * sp,VICMD * vp,enum which type)404 bword(SCR *sp, VICMD *vp, enum which type)
405 {
406 enum { INWORD, NOTWORD } state;
407 VCS cs;
408 u_long cnt;
409
410 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
411 cs.cs_lno = vp->m_start.lno;
412 cs.cs_cno = vp->m_start.cno;
413 if (cs_init(sp, &cs))
414 return (1);
415
416 /*
417 * !!!
418 * If in whitespace, or the previous character is whitespace, move
419 * past it. (This doesn't count as a word move.) Stay at the
420 * character before the current one, it sets word "state" for the
421 * 'b' command.
422 */
423 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
424 if (cs_prev(sp, &cs))
425 return (1);
426 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
427 goto start;
428 }
429 if (cs_bblank(sp, &cs))
430 return (1);
431
432 /*
433 * Cyclically move to the beginning of the previous word -- this
434 * involves skipping over word characters and then any trailing
435 * non-word characters. Note, for the 'b' command, the definition
436 * of a word keeps switching.
437 */
438 start: if (type == BIGWORD)
439 while (cnt--) {
440 for (;;) {
441 if (cs_prev(sp, &cs))
442 return (1);
443 if (cs.cs_flags == CS_SOF)
444 goto ret;
445 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
446 break;
447 }
448 /*
449 * When we reach the end of the word before the last
450 * word, we're done. If we changed state, move forward
451 * one to the end of the next word.
452 */
453 if (cnt == 0) {
454 if (cs.cs_flags == 0 && cs_next(sp, &cs))
455 return (1);
456 break;
457 }
458
459 /* Eat whitespace characters. */
460 if (cs_bblank(sp, &cs))
461 return (1);
462 if (cs.cs_flags == CS_SOF)
463 goto ret;
464 }
465 else
466 while (cnt--) {
467 state = cs.cs_flags == 0 &&
468 inword(cs.cs_ch) ? INWORD : NOTWORD;
469 for (;;) {
470 if (cs_prev(sp, &cs))
471 return (1);
472 if (cs.cs_flags == CS_SOF)
473 goto ret;
474 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
475 break;
476 if (state == INWORD) {
477 if (!inword(cs.cs_ch))
478 break;
479 } else
480 if (inword(cs.cs_ch))
481 break;
482 }
483 /* See comment above. */
484 if (cnt == 0) {
485 if (cs.cs_flags == 0 && cs_next(sp, &cs))
486 return (1);
487 break;
488 }
489
490 /* Eat whitespace characters. */
491 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
492 if (cs_bblank(sp, &cs))
493 return (1);
494 if (cs.cs_flags == CS_SOF)
495 goto ret;
496 }
497
498 /* If we didn't move, we must be at SOF. */
499 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
500 v_sof(sp, &vp->m_start);
501 return (1);
502 }
503
504 /* Set the end of the range for motion commands. */
505 vp->m_stop.lno = cs.cs_lno;
506 vp->m_stop.cno = cs.cs_cno;
507
508 /*
509 * All commands move to the end of the range. Motion commands
510 * adjust the starting point to the character before the current
511 * one.
512 *
513 * !!!
514 * The historic vi didn't get this right -- the `yb' command yanked
515 * the right stuff and even updated the cursor value, but the cursor
516 * was not actually updated on the screen.
517 */
518 vp->m_final = vp->m_stop;
519 if (ISMOTION(vp))
520 --vp->m_start.cno;
521 return (0);
522 }
523