1 /*- 2 * Copyright (c) 1992, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1992, 1993, 1994, 1995, 1996 5 * Keith Bostic. All rights reserved. 6 * 7 * See the LICENSE file for redistribution information. 8 */ 9 10 #include "config.h" 11 12 #ifndef lint 13 static const char sccsid[] = "@(#)v_word.c 10.5 (Berkeley) 3/6/96"; 14 #endif /* not lint */ 15 16 #include <sys/types.h> 17 #include <sys/queue.h> 18 #include <sys/time.h> 19 20 #include <bitstring.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stdio.h> 24 25 #include "../common/common.h" 26 #include "vi.h" 27 28 /* 29 * There are two types of "words". Bigwords are easy -- groups of anything 30 * delimited by whitespace. Normal words are trickier. They are either a 31 * group of characters, numbers and underscores, or a group of anything but, 32 * delimited by whitespace. When for a word, if you're in whitespace, it's 33 * easy, just remove the whitespace and go to the beginning or end of the 34 * word. Otherwise, figure out if the next character is in a different group. 35 * If it is, go to the beginning or end of that group, otherwise, go to the 36 * beginning or end of the current group. The historic version of vi didn't 37 * get this right, so, for example, there were cases where "4e" was not the 38 * same as "eeee" -- in particular, single character words, and commands that 39 * began in whitespace were almost always handled incorrectly. To get it right 40 * you have to resolve the cursor after each search so that the look-ahead to 41 * figure out what type of "word" the cursor is in will be correct. 42 * 43 * Empty lines, and lines that consist of only white-space characters count 44 * as a single word, and the beginning and end of the file counts as an 45 * infinite number of words. 46 * 47 * Movements associated with commands are different than movement commands. 48 * For example, in "abc def", with the cursor on the 'a', "cw" is from 49 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 50 * space is discarded from the change movement. Another example is that, 51 * in the same string, a "cw" on any white space character replaces that 52 * single character, and nothing else. Ain't nothin' in here that's easy. 53 * 54 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 55 * would treat groups of empty lines as individual words, i.e. the command 56 * would move the cursor to each new empty line. The 'e' and 'E' commands 57 * would treat groups of empty lines as a single word, i.e. the first use 58 * would move past the group of lines. The 'b' command would just beep at 59 * you, or, if you did it from the start of the line as part of a motion 60 * command, go absolutely nuts. If the lines contained only white-space 61 * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 62 * 'b', 'E' and 'e' commands would treat the group as a single word, and 63 * the 'B' and 'b' commands will treat the lines as individual words. This 64 * implementation treats all of these cases as a single white-space word. 65 */ 66 67 enum which {BIGWORD, LITTLEWORD}; 68 69 static int bword __P((SCR *, VICMD *, enum which)); 70 static int eword __P((SCR *, VICMD *, enum which)); 71 static int fword __P((SCR *, VICMD *, enum which)); 72 73 /* 74 * v_wordW -- [count]W 75 * Move forward a bigword at a time. 76 * 77 * PUBLIC: int v_wordW __P((SCR *, VICMD *)); 78 */ 79 int 80 v_wordW(sp, vp) 81 SCR *sp; 82 VICMD *vp; 83 { 84 return (fword(sp, vp, BIGWORD)); 85 } 86 87 /* 88 * v_wordw -- [count]w 89 * Move forward a word at a time. 90 * 91 * PUBLIC: int v_wordw __P((SCR *, VICMD *)); 92 */ 93 int 94 v_wordw(sp, vp) 95 SCR *sp; 96 VICMD *vp; 97 { 98 return (fword(sp, vp, LITTLEWORD)); 99 } 100 101 /* 102 * fword -- 103 * Move forward by words. 104 */ 105 static int 106 fword(sp, vp, type) 107 SCR *sp; 108 VICMD *vp; 109 enum which type; 110 { 111 enum { INWORD, NOTWORD } state; 112 VCS cs; 113 u_long cnt; 114 115 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 116 cs.cs_lno = vp->m_start.lno; 117 cs.cs_cno = vp->m_start.cno; 118 if (cs_init(sp, &cs)) 119 return (1); 120 121 /* 122 * If in white-space: 123 * If the count is 1, and it's a change command, we're done. 124 * Else, move to the first non-white-space character, which 125 * counts as a single word move. If it's a motion command, 126 * don't move off the end of the line. 127 */ 128 if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) { 129 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 130 if (ISCMD(vp->rkp, 'c')) 131 return (0); 132 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 133 if (cs_fspace(sp, &cs)) 134 return (1); 135 goto ret; 136 } 137 } 138 if (cs_fblank(sp, &cs)) 139 return (1); 140 --cnt; 141 } 142 143 /* 144 * Cyclically move to the next word -- this involves skipping 145 * over word characters and then any trailing non-word characters. 146 * Note, for the 'w' command, the definition of a word keeps 147 * switching. 148 */ 149 if (type == BIGWORD) 150 while (cnt--) { 151 for (;;) { 152 if (cs_next(sp, &cs)) 153 return (1); 154 if (cs.cs_flags == CS_EOF) 155 goto ret; 156 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 157 break; 158 } 159 /* 160 * If a motion command and we're at the end of the 161 * last word, we're done. Delete and yank eat any 162 * trailing blanks, but we don't move off the end 163 * of the line regardless. 164 */ 165 if (cnt == 0 && ISMOTION(vp)) { 166 if ((ISCMD(vp->rkp, 'd') || 167 ISCMD(vp->rkp, 'y')) && 168 cs_fspace(sp, &cs)) 169 return (1); 170 break; 171 } 172 173 /* Eat whitespace characters. */ 174 if (cs_fblank(sp, &cs)) 175 return (1); 176 if (cs.cs_flags == CS_EOF) 177 goto ret; 178 } 179 else 180 while (cnt--) { 181 state = cs.cs_flags == 0 && 182 inword(cs.cs_ch) ? INWORD : NOTWORD; 183 for (;;) { 184 if (cs_next(sp, &cs)) 185 return (1); 186 if (cs.cs_flags == CS_EOF) 187 goto ret; 188 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 189 break; 190 if (state == INWORD) { 191 if (!inword(cs.cs_ch)) 192 break; 193 } else 194 if (inword(cs.cs_ch)) 195 break; 196 } 197 /* See comment above. */ 198 if (cnt == 0 && ISMOTION(vp)) { 199 if ((ISCMD(vp->rkp, 'd') || 200 ISCMD(vp->rkp, 'y')) && 201 cs_fspace(sp, &cs)) 202 return (1); 203 break; 204 } 205 206 /* Eat whitespace characters. */ 207 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 208 if (cs_fblank(sp, &cs)) 209 return (1); 210 if (cs.cs_flags == CS_EOF) 211 goto ret; 212 } 213 214 /* 215 * If we didn't move, we must be at EOF. 216 * 217 * !!! 218 * That's okay for motion commands, however. 219 */ 220 ret: if (!ISMOTION(vp) && 221 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 222 v_eof(sp, &vp->m_start); 223 return (1); 224 } 225 226 /* Adjust the end of the range for motion commands. */ 227 vp->m_stop.lno = cs.cs_lno; 228 vp->m_stop.cno = cs.cs_cno; 229 if (ISMOTION(vp) && cs.cs_flags == 0) 230 --vp->m_stop.cno; 231 232 /* 233 * Non-motion commands move to the end of the range. Delete 234 * and yank stay at the start, ignore others. 235 */ 236 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 237 return (0); 238 } 239 240 /* 241 * v_wordE -- [count]E 242 * Move forward to the end of the bigword. 243 * 244 * PUBLIC: int v_wordE __P((SCR *, VICMD *)); 245 */ 246 int 247 v_wordE(sp, vp) 248 SCR *sp; 249 VICMD *vp; 250 { 251 return (eword(sp, vp, BIGWORD)); 252 } 253 254 /* 255 * v_worde -- [count]e 256 * Move forward to the end of the word. 257 * 258 * PUBLIC: int v_worde __P((SCR *, VICMD *)); 259 */ 260 int 261 v_worde(sp, vp) 262 SCR *sp; 263 VICMD *vp; 264 { 265 return (eword(sp, vp, LITTLEWORD)); 266 } 267 268 /* 269 * eword -- 270 * Move forward to the end of the word. 271 */ 272 static int 273 eword(sp, vp, type) 274 SCR *sp; 275 VICMD *vp; 276 enum which type; 277 { 278 enum { INWORD, NOTWORD } state; 279 VCS cs; 280 u_long cnt; 281 282 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 283 cs.cs_lno = vp->m_start.lno; 284 cs.cs_cno = vp->m_start.cno; 285 if (cs_init(sp, &cs)) 286 return (1); 287 288 /* 289 * !!! 290 * If in whitespace, or the next character is whitespace, move past 291 * it. (This doesn't count as a word move.) Stay at the character 292 * past the current one, it sets word "state" for the 'e' command. 293 */ 294 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { 295 if (cs_next(sp, &cs)) 296 return (1); 297 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) 298 goto start; 299 } 300 if (cs_fblank(sp, &cs)) 301 return (1); 302 303 /* 304 * Cyclically move to the next word -- this involves skipping 305 * over word characters and then any trailing non-word characters. 306 * Note, for the 'e' command, the definition of a word keeps 307 * switching. 308 */ 309 start: if (type == BIGWORD) 310 while (cnt--) { 311 for (;;) { 312 if (cs_next(sp, &cs)) 313 return (1); 314 if (cs.cs_flags == CS_EOF) 315 goto ret; 316 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 317 break; 318 } 319 /* 320 * When we reach the start of the word after the last 321 * word, we're done. If we changed state, back up one 322 * to the end of the previous word. 323 */ 324 if (cnt == 0) { 325 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 326 return (1); 327 break; 328 } 329 330 /* Eat whitespace characters. */ 331 if (cs_fblank(sp, &cs)) 332 return (1); 333 if (cs.cs_flags == CS_EOF) 334 goto ret; 335 } 336 else 337 while (cnt--) { 338 state = cs.cs_flags == 0 && 339 inword(cs.cs_ch) ? INWORD : NOTWORD; 340 for (;;) { 341 if (cs_next(sp, &cs)) 342 return (1); 343 if (cs.cs_flags == CS_EOF) 344 goto ret; 345 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 346 break; 347 if (state == INWORD) { 348 if (!inword(cs.cs_ch)) 349 break; 350 } else 351 if (inword(cs.cs_ch)) 352 break; 353 } 354 /* See comment above. */ 355 if (cnt == 0) { 356 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 357 return (1); 358 break; 359 } 360 361 /* Eat whitespace characters. */ 362 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 363 if (cs_fblank(sp, &cs)) 364 return (1); 365 if (cs.cs_flags == CS_EOF) 366 goto ret; 367 } 368 369 /* 370 * If we didn't move, we must be at EOF. 371 * 372 * !!! 373 * That's okay for motion commands, however. 374 */ 375 ret: if (!ISMOTION(vp) && 376 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 377 v_eof(sp, &vp->m_start); 378 return (1); 379 } 380 381 /* Set the end of the range for motion commands. */ 382 vp->m_stop.lno = cs.cs_lno; 383 vp->m_stop.cno = cs.cs_cno; 384 385 /* 386 * Non-motion commands move to the end of the range. 387 * Delete and yank stay at the start, ignore others. 388 */ 389 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 390 return (0); 391 } 392 393 /* 394 * v_WordB -- [count]B 395 * Move backward a bigword at a time. 396 * 397 * PUBLIC: int v_wordB __P((SCR *, VICMD *)); 398 */ 399 int 400 v_wordB(sp, vp) 401 SCR *sp; 402 VICMD *vp; 403 { 404 return (bword(sp, vp, BIGWORD)); 405 } 406 407 /* 408 * v_wordb -- [count]b 409 * Move backward a word at a time. 410 * 411 * PUBLIC: int v_wordb __P((SCR *, VICMD *)); 412 */ 413 int 414 v_wordb(sp, vp) 415 SCR *sp; 416 VICMD *vp; 417 { 418 return (bword(sp, vp, LITTLEWORD)); 419 } 420 421 /* 422 * bword -- 423 * Move backward by words. 424 */ 425 static int 426 bword(sp, vp, type) 427 SCR *sp; 428 VICMD *vp; 429 enum which type; 430 { 431 enum { INWORD, NOTWORD } state; 432 VCS cs; 433 u_long cnt; 434 435 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 436 cs.cs_lno = vp->m_start.lno; 437 cs.cs_cno = vp->m_start.cno; 438 if (cs_init(sp, &cs)) 439 return (1); 440 441 /* 442 * !!! 443 * If in whitespace, or the previous character is whitespace, move 444 * past it. (This doesn't count as a word move.) Stay at the 445 * character before the current one, it sets word "state" for the 446 * 'b' command. 447 */ 448 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) { 449 if (cs_prev(sp, &cs)) 450 return (1); 451 if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) 452 goto start; 453 } 454 if (cs_bblank(sp, &cs)) 455 return (1); 456 457 /* 458 * Cyclically move to the beginning of the previous word -- this 459 * involves skipping over word characters and then any trailing 460 * non-word characters. Note, for the 'b' command, the definition 461 * of a word keeps switching. 462 */ 463 start: if (type == BIGWORD) 464 while (cnt--) { 465 for (;;) { 466 if (cs_prev(sp, &cs)) 467 return (1); 468 if (cs.cs_flags == CS_SOF) 469 goto ret; 470 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 471 break; 472 } 473 /* 474 * When we reach the end of the word before the last 475 * word, we're done. If we changed state, move forward 476 * one to the end of the next word. 477 */ 478 if (cnt == 0) { 479 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 480 return (1); 481 break; 482 } 483 484 /* Eat whitespace characters. */ 485 if (cs_bblank(sp, &cs)) 486 return (1); 487 if (cs.cs_flags == CS_SOF) 488 goto ret; 489 } 490 else 491 while (cnt--) { 492 state = cs.cs_flags == 0 && 493 inword(cs.cs_ch) ? INWORD : NOTWORD; 494 for (;;) { 495 if (cs_prev(sp, &cs)) 496 return (1); 497 if (cs.cs_flags == CS_SOF) 498 goto ret; 499 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 500 break; 501 if (state == INWORD) { 502 if (!inword(cs.cs_ch)) 503 break; 504 } else 505 if (inword(cs.cs_ch)) 506 break; 507 } 508 /* See comment above. */ 509 if (cnt == 0) { 510 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 511 return (1); 512 break; 513 } 514 515 /* Eat whitespace characters. */ 516 if (cs.cs_flags != 0 || isblank(cs.cs_ch)) 517 if (cs_bblank(sp, &cs)) 518 return (1); 519 if (cs.cs_flags == CS_SOF) 520 goto ret; 521 } 522 523 /* If we didn't move, we must be at SOF. */ 524 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 525 v_sof(sp, &vp->m_start); 526 return (1); 527 } 528 529 /* Set the end of the range for motion commands. */ 530 vp->m_stop.lno = cs.cs_lno; 531 vp->m_stop.cno = cs.cs_cno; 532 533 /* 534 * All commands move to the end of the range. Motion commands 535 * adjust the starting point to the character before the current 536 * one. 537 * 538 * !!! 539 * The historic vi didn't get this right -- the `yb' command yanked 540 * the right stuff and even updated the cursor value, but the cursor 541 * was not actually updated on the screen. 542 */ 543 vp->m_final = vp->m_stop; 544 if (ISMOTION(vp)) 545 --vp->m_start.cno; 546 return (0); 547 } 548