1 /*- 2 * Copyright (c) 1992, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1992, 1993, 1994, 1995, 1996 5 * Keith Bostic. All rights reserved. 6 * 7 * See the LICENSE file for redistribution information. 8 */ 9 10 #include "config.h" 11 12 #ifndef lint 13 static const char sccsid[] = "$Id: v_word.c,v 10.7 2011/12/27 00:49:31 zy Exp $"; 14 #endif /* not lint */ 15 16 #include <sys/types.h> 17 #include <sys/queue.h> 18 #include <sys/time.h> 19 20 #include <bitstring.h> 21 #include <ctype.h> 22 #include <limits.h> 23 #include <stdio.h> 24 25 #include "../common/common.h" 26 #include "vi.h" 27 28 /* 29 * There are two types of "words". Bigwords are easy -- groups of anything 30 * delimited by whitespace. Normal words are trickier. They are either a 31 * group of characters, numbers and underscores, or a group of anything but, 32 * delimited by whitespace. When for a word, if you're in whitespace, it's 33 * easy, just remove the whitespace and go to the beginning or end of the 34 * word. Otherwise, figure out if the next character is in a different group. 35 * If it is, go to the beginning or end of that group, otherwise, go to the 36 * beginning or end of the current group. The historic version of vi didn't 37 * get this right, so, for example, there were cases where "4e" was not the 38 * same as "eeee" -- in particular, single character words, and commands that 39 * began in whitespace were almost always handled incorrectly. To get it right 40 * you have to resolve the cursor after each search so that the look-ahead to 41 * figure out what type of "word" the cursor is in will be correct. 42 * 43 * Empty lines, and lines that consist of only white-space characters count 44 * as a single word, and the beginning and end of the file counts as an 45 * infinite number of words. 46 * 47 * Movements associated with commands are different than movement commands. 48 * For example, in "abc def", with the cursor on the 'a', "cw" is from 49 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 50 * space is discarded from the change movement. Another example is that, 51 * in the same string, a "cw" on any white space character replaces that 52 * single character, and nothing else. Ain't nothin' in here that's easy. 53 * 54 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 55 * would treat groups of empty lines as individual words, i.e. the command 56 * would move the cursor to each new empty line. The 'e' and 'E' commands 57 * would treat groups of empty lines as a single word, i.e. the first use 58 * would move past the group of lines. The 'b' command would just beep at 59 * you, or, if you did it from the start of the line as part of a motion 60 * command, go absolutely nuts. If the lines contained only white-space 61 * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 62 * 'b', 'E' and 'e' commands would treat the group as a single word, and 63 * the 'B' and 'b' commands will treat the lines as individual words. This 64 * implementation treats all of these cases as a single white-space word. 65 */ 66 67 enum which {BIGWORD, LITTLEWORD}; 68 69 static int bword(SCR *, VICMD *, enum which); 70 static int eword(SCR *, VICMD *, enum which); 71 static int fword(SCR *, VICMD *, enum which); 72 73 /* 74 * v_wordW -- [count]W 75 * Move forward a bigword at a time. 76 * 77 * PUBLIC: int v_wordW(SCR *, VICMD *); 78 */ 79 int 80 v_wordW(SCR *sp, VICMD *vp) 81 { 82 return (fword(sp, vp, BIGWORD)); 83 } 84 85 /* 86 * v_wordw -- [count]w 87 * Move forward a word at a time. 88 * 89 * PUBLIC: int v_wordw(SCR *, VICMD *); 90 */ 91 int 92 v_wordw(SCR *sp, VICMD *vp) 93 { 94 return (fword(sp, vp, LITTLEWORD)); 95 } 96 97 /* 98 * fword -- 99 * Move forward by words. 100 */ 101 static int 102 fword(SCR *sp, VICMD *vp, enum which type) 103 { 104 enum { INWORD, NOTWORD } state; 105 VCS cs; 106 u_long cnt; 107 108 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 109 cs.cs_lno = vp->m_start.lno; 110 cs.cs_cno = vp->m_start.cno; 111 if (cs_init(sp, &cs)) 112 return (1); 113 114 /* 115 * If in white-space: 116 * If the count is 1, and it's a change command, we're done. 117 * Else, move to the first non-white-space character, which 118 * counts as a single word move. If it's a motion command, 119 * don't move off the end of the line. 120 */ 121 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK(cs.cs_ch))) { 122 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 123 if (ISCMD(vp->rkp, 'c')) 124 return (0); 125 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 126 if (cs_fspace(sp, &cs)) 127 return (1); 128 goto ret; 129 } 130 } 131 if (cs_fblank(sp, &cs)) 132 return (1); 133 --cnt; 134 } 135 136 /* 137 * Cyclically move to the next word -- this involves skipping 138 * over word characters and then any trailing non-word characters. 139 * Note, for the 'w' command, the definition of a word keeps 140 * switching. 141 */ 142 if (type == BIGWORD) 143 while (cnt--) { 144 for (;;) { 145 if (cs_next(sp, &cs)) 146 return (1); 147 if (cs.cs_flags == CS_EOF) 148 goto ret; 149 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 150 break; 151 } 152 /* 153 * If a motion command and we're at the end of the 154 * last word, we're done. Delete and yank eat any 155 * trailing blanks, but we don't move off the end 156 * of the line regardless. 157 */ 158 if (cnt == 0 && ISMOTION(vp)) { 159 if ((ISCMD(vp->rkp, 'd') || 160 ISCMD(vp->rkp, 'y')) && 161 cs_fspace(sp, &cs)) 162 return (1); 163 break; 164 } 165 166 /* Eat whitespace characters. */ 167 if (cs_fblank(sp, &cs)) 168 return (1); 169 if (cs.cs_flags == CS_EOF) 170 goto ret; 171 } 172 else 173 while (cnt--) { 174 state = cs.cs_flags == 0 && 175 inword(cs.cs_ch) ? INWORD : NOTWORD; 176 for (;;) { 177 if (cs_next(sp, &cs)) 178 return (1); 179 if (cs.cs_flags == CS_EOF) 180 goto ret; 181 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 182 break; 183 if (state == INWORD) { 184 if (!inword(cs.cs_ch)) 185 break; 186 } else 187 if (inword(cs.cs_ch)) 188 break; 189 } 190 /* See comment above. */ 191 if (cnt == 0 && ISMOTION(vp)) { 192 if ((ISCMD(vp->rkp, 'd') || 193 ISCMD(vp->rkp, 'y')) && 194 cs_fspace(sp, &cs)) 195 return (1); 196 break; 197 } 198 199 /* Eat whitespace characters. */ 200 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 201 if (cs_fblank(sp, &cs)) 202 return (1); 203 if (cs.cs_flags == CS_EOF) 204 goto ret; 205 } 206 207 /* 208 * If we didn't move, we must be at EOF. 209 * 210 * !!! 211 * That's okay for motion commands, however. 212 */ 213 ret: if (!ISMOTION(vp) && 214 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 215 v_eof(sp, &vp->m_start); 216 return (1); 217 } 218 219 /* Adjust the end of the range for motion commands. */ 220 vp->m_stop.lno = cs.cs_lno; 221 vp->m_stop.cno = cs.cs_cno; 222 if (ISMOTION(vp) && cs.cs_flags == 0) 223 --vp->m_stop.cno; 224 225 /* 226 * Non-motion commands move to the end of the range. Delete 227 * and yank stay at the start, ignore others. 228 */ 229 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 230 return (0); 231 } 232 233 /* 234 * v_wordE -- [count]E 235 * Move forward to the end of the bigword. 236 * 237 * PUBLIC: int v_wordE(SCR *, VICMD *); 238 */ 239 int 240 v_wordE(SCR *sp, VICMD *vp) 241 { 242 return (eword(sp, vp, BIGWORD)); 243 } 244 245 /* 246 * v_worde -- [count]e 247 * Move forward to the end of the word. 248 * 249 * PUBLIC: int v_worde(SCR *, VICMD *); 250 */ 251 int 252 v_worde(SCR *sp, VICMD *vp) 253 { 254 return (eword(sp, vp, LITTLEWORD)); 255 } 256 257 /* 258 * eword -- 259 * Move forward to the end of the word. 260 */ 261 static int 262 eword(SCR *sp, VICMD *vp, enum which type) 263 { 264 enum { INWORD, NOTWORD } state; 265 VCS cs; 266 u_long cnt; 267 268 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 269 cs.cs_lno = vp->m_start.lno; 270 cs.cs_cno = vp->m_start.cno; 271 if (cs_init(sp, &cs)) 272 return (1); 273 274 /* 275 * !!! 276 * If in whitespace, or the next character is whitespace, move past 277 * it. (This doesn't count as a word move.) Stay at the character 278 * past the current one, it sets word "state" for the 'e' command. 279 */ 280 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) { 281 if (cs_next(sp, &cs)) 282 return (1); 283 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) 284 goto start; 285 } 286 if (cs_fblank(sp, &cs)) 287 return (1); 288 289 /* 290 * Cyclically move to the next word -- this involves skipping 291 * over word characters and then any trailing non-word characters. 292 * Note, for the 'e' command, the definition of a word keeps 293 * switching. 294 */ 295 start: if (type == BIGWORD) 296 while (cnt--) { 297 for (;;) { 298 if (cs_next(sp, &cs)) 299 return (1); 300 if (cs.cs_flags == CS_EOF) 301 goto ret; 302 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 303 break; 304 } 305 /* 306 * When we reach the start of the word after the last 307 * word, we're done. If we changed state, back up one 308 * to the end of the previous word. 309 */ 310 if (cnt == 0) { 311 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 312 return (1); 313 break; 314 } 315 316 /* Eat whitespace characters. */ 317 if (cs_fblank(sp, &cs)) 318 return (1); 319 if (cs.cs_flags == CS_EOF) 320 goto ret; 321 } 322 else 323 while (cnt--) { 324 state = cs.cs_flags == 0 && 325 inword(cs.cs_ch) ? INWORD : NOTWORD; 326 for (;;) { 327 if (cs_next(sp, &cs)) 328 return (1); 329 if (cs.cs_flags == CS_EOF) 330 goto ret; 331 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 332 break; 333 if (state == INWORD) { 334 if (!inword(cs.cs_ch)) 335 break; 336 } else 337 if (inword(cs.cs_ch)) 338 break; 339 } 340 /* See comment above. */ 341 if (cnt == 0) { 342 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 343 return (1); 344 break; 345 } 346 347 /* Eat whitespace characters. */ 348 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 349 if (cs_fblank(sp, &cs)) 350 return (1); 351 if (cs.cs_flags == CS_EOF) 352 goto ret; 353 } 354 355 /* 356 * If we didn't move, we must be at EOF. 357 * 358 * !!! 359 * That's okay for motion commands, however. 360 */ 361 ret: if (!ISMOTION(vp) && 362 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 363 v_eof(sp, &vp->m_start); 364 return (1); 365 } 366 367 /* Set the end of the range for motion commands. */ 368 vp->m_stop.lno = cs.cs_lno; 369 vp->m_stop.cno = cs.cs_cno; 370 371 /* 372 * Non-motion commands move to the end of the range. 373 * Delete and yank stay at the start, ignore others. 374 */ 375 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 376 return (0); 377 } 378 379 /* 380 * v_WordB -- [count]B 381 * Move backward a bigword at a time. 382 * 383 * PUBLIC: int v_wordB(SCR *, VICMD *); 384 */ 385 int 386 v_wordB(SCR *sp, VICMD *vp) 387 { 388 return (bword(sp, vp, BIGWORD)); 389 } 390 391 /* 392 * v_wordb -- [count]b 393 * Move backward a word at a time. 394 * 395 * PUBLIC: int v_wordb(SCR *, VICMD *); 396 */ 397 int 398 v_wordb(SCR *sp, VICMD *vp) 399 { 400 return (bword(sp, vp, LITTLEWORD)); 401 } 402 403 /* 404 * bword -- 405 * Move backward by words. 406 */ 407 static int 408 bword(SCR *sp, VICMD *vp, enum which type) 409 { 410 enum { INWORD, NOTWORD } state; 411 VCS cs; 412 u_long cnt; 413 414 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 415 cs.cs_lno = vp->m_start.lno; 416 cs.cs_cno = vp->m_start.cno; 417 if (cs_init(sp, &cs)) 418 return (1); 419 420 /* 421 * !!! 422 * If in whitespace, or the previous character is whitespace, move 423 * past it. (This doesn't count as a word move.) Stay at the 424 * character before the current one, it sets word "state" for the 425 * 'b' command. 426 */ 427 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) { 428 if (cs_prev(sp, &cs)) 429 return (1); 430 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) 431 goto start; 432 } 433 if (cs_bblank(sp, &cs)) 434 return (1); 435 436 /* 437 * Cyclically move to the beginning of the previous word -- this 438 * involves skipping over word characters and then any trailing 439 * non-word characters. Note, for the 'b' command, the definition 440 * of a word keeps switching. 441 */ 442 start: if (type == BIGWORD) 443 while (cnt--) { 444 for (;;) { 445 if (cs_prev(sp, &cs)) 446 return (1); 447 if (cs.cs_flags == CS_SOF) 448 goto ret; 449 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 450 break; 451 } 452 /* 453 * When we reach the end of the word before the last 454 * word, we're done. If we changed state, move forward 455 * one to the end of the next word. 456 */ 457 if (cnt == 0) { 458 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 459 return (1); 460 break; 461 } 462 463 /* Eat whitespace characters. */ 464 if (cs_bblank(sp, &cs)) 465 return (1); 466 if (cs.cs_flags == CS_SOF) 467 goto ret; 468 } 469 else 470 while (cnt--) { 471 state = cs.cs_flags == 0 && 472 inword(cs.cs_ch) ? INWORD : NOTWORD; 473 for (;;) { 474 if (cs_prev(sp, &cs)) 475 return (1); 476 if (cs.cs_flags == CS_SOF) 477 goto ret; 478 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 479 break; 480 if (state == INWORD) { 481 if (!inword(cs.cs_ch)) 482 break; 483 } else 484 if (inword(cs.cs_ch)) 485 break; 486 } 487 /* See comment above. */ 488 if (cnt == 0) { 489 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 490 return (1); 491 break; 492 } 493 494 /* Eat whitespace characters. */ 495 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 496 if (cs_bblank(sp, &cs)) 497 return (1); 498 if (cs.cs_flags == CS_SOF) 499 goto ret; 500 } 501 502 /* If we didn't move, we must be at SOF. */ 503 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 504 v_sof(sp, &vp->m_start); 505 return (1); 506 } 507 508 /* Set the end of the range for motion commands. */ 509 vp->m_stop.lno = cs.cs_lno; 510 vp->m_stop.cno = cs.cs_cno; 511 512 /* 513 * All commands move to the end of the range. Motion commands 514 * adjust the starting point to the character before the current 515 * one. 516 * 517 * !!! 518 * The historic vi didn't get this right -- the `yb' command yanked 519 * the right stuff and even updated the cursor value, but the cursor 520 * was not actually updated on the screen. 521 */ 522 vp->m_final = vp->m_stop; 523 if (ISMOTION(vp)) 524 --vp->m_start.cno; 525 return (0); 526 } 527