1 /*- 2 * Copyright (c) 1992, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 1992, 1993, 1994, 1995, 1996 5 * Keith Bostic. All rights reserved. 6 * 7 * See the LICENSE file for redistribution information. 8 */ 9 10 #include "config.h" 11 12 #include <sys/types.h> 13 #include <sys/queue.h> 14 #include <sys/time.h> 15 16 #include <bitstring.h> 17 #include <ctype.h> 18 #include <limits.h> 19 #include <stdio.h> 20 21 #include "../common/common.h" 22 #include "vi.h" 23 24 /* 25 * There are two types of "words". Bigwords are easy -- groups of anything 26 * delimited by whitespace. Normal words are trickier. They are either a 27 * group of characters, numbers and underscores, or a group of anything but, 28 * delimited by whitespace. When for a word, if you're in whitespace, it's 29 * easy, just remove the whitespace and go to the beginning or end of the 30 * word. Otherwise, figure out if the next character is in a different group. 31 * If it is, go to the beginning or end of that group, otherwise, go to the 32 * beginning or end of the current group. The historic version of vi didn't 33 * get this right, so, for example, there were cases where "4e" was not the 34 * same as "eeee" -- in particular, single character words, and commands that 35 * began in whitespace were almost always handled incorrectly. To get it right 36 * you have to resolve the cursor after each search so that the look-ahead to 37 * figure out what type of "word" the cursor is in will be correct. 38 * 39 * Empty lines, and lines that consist of only white-space characters count 40 * as a single word, and the beginning and end of the file counts as an 41 * infinite number of words. 42 * 43 * Movements associated with commands are different than movement commands. 44 * For example, in "abc def", with the cursor on the 'a', "cw" is from 45 * 'a' to 'c', while "w" is from 'a' to 'd'. In general, trailing white 46 * space is discarded from the change movement. Another example is that, 47 * in the same string, a "cw" on any white space character replaces that 48 * single character, and nothing else. Ain't nothin' in here that's easy. 49 * 50 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands 51 * would treat groups of empty lines as individual words, i.e. the command 52 * would move the cursor to each new empty line. The 'e' and 'E' commands 53 * would treat groups of empty lines as a single word, i.e. the first use 54 * would move past the group of lines. The 'b' command would just beep at 55 * you, or, if you did it from the start of the line as part of a motion 56 * command, go absolutely nuts. If the lines contained only white-space 57 * characters, the 'w' and 'W' commands would just beep at you, and the 'B', 58 * 'b', 'E' and 'e' commands would treat the group as a single word, and 59 * the 'B' and 'b' commands will treat the lines as individual words. This 60 * implementation treats all of these cases as a single white-space word. 61 */ 62 63 enum which {BIGWORD, LITTLEWORD}; 64 65 static int bword(SCR *, VICMD *, enum which); 66 static int eword(SCR *, VICMD *, enum which); 67 static int fword(SCR *, VICMD *, enum which); 68 69 /* 70 * v_wordW -- [count]W 71 * Move forward a bigword at a time. 72 * 73 * PUBLIC: int v_wordW(SCR *, VICMD *); 74 */ 75 int 76 v_wordW(SCR *sp, VICMD *vp) 77 { 78 return (fword(sp, vp, BIGWORD)); 79 } 80 81 /* 82 * v_wordw -- [count]w 83 * Move forward a word at a time. 84 * 85 * PUBLIC: int v_wordw(SCR *, VICMD *); 86 */ 87 int 88 v_wordw(SCR *sp, VICMD *vp) 89 { 90 return (fword(sp, vp, LITTLEWORD)); 91 } 92 93 /* 94 * fword -- 95 * Move forward by words. 96 */ 97 static int 98 fword(SCR *sp, VICMD *vp, enum which type) 99 { 100 enum { INWORD, NOTWORD } state; 101 VCS cs; 102 u_long cnt; 103 104 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 105 cs.cs_lno = vp->m_start.lno; 106 cs.cs_cno = vp->m_start.cno; 107 if (cs_init(sp, &cs)) 108 return (1); 109 110 /* 111 * If in white-space: 112 * If the count is 1, and it's a change command, we're done. 113 * Else, move to the first non-white-space character, which 114 * counts as a single word move. If it's a motion command, 115 * don't move off the end of the line. 116 */ 117 if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK(cs.cs_ch))) { 118 if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) { 119 if (ISCMD(vp->rkp, 'c')) 120 return (0); 121 if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) { 122 if (cs_fspace(sp, &cs)) 123 return (1); 124 goto ret; 125 } 126 } 127 if (cs_fblank(sp, &cs)) 128 return (1); 129 --cnt; 130 } 131 132 /* 133 * Cyclically move to the next word -- this involves skipping 134 * over word characters and then any trailing non-word characters. 135 * Note, for the 'w' command, the definition of a word keeps 136 * switching. 137 */ 138 if (type == BIGWORD) 139 while (cnt--) { 140 for (;;) { 141 if (cs_next(sp, &cs)) 142 return (1); 143 if (cs.cs_flags == CS_EOF) 144 goto ret; 145 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 146 break; 147 } 148 /* 149 * If a motion command and we're at the end of the 150 * last word, we're done. Delete and yank eat any 151 * trailing blanks, but we don't move off the end 152 * of the line regardless. 153 */ 154 if (cnt == 0 && ISMOTION(vp)) { 155 if ((ISCMD(vp->rkp, 'd') || 156 ISCMD(vp->rkp, 'y')) && 157 cs_fspace(sp, &cs)) 158 return (1); 159 break; 160 } 161 162 /* Eat whitespace characters. */ 163 if (cs_fblank(sp, &cs)) 164 return (1); 165 if (cs.cs_flags == CS_EOF) 166 goto ret; 167 } 168 else 169 while (cnt--) { 170 state = cs.cs_flags == 0 && 171 inword(cs.cs_ch) ? INWORD : NOTWORD; 172 for (;;) { 173 if (cs_next(sp, &cs)) 174 return (1); 175 if (cs.cs_flags == CS_EOF) 176 goto ret; 177 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 178 break; 179 if (state == INWORD) { 180 if (!inword(cs.cs_ch)) 181 break; 182 } else 183 if (inword(cs.cs_ch)) 184 break; 185 } 186 /* See comment above. */ 187 if (cnt == 0 && ISMOTION(vp)) { 188 if ((ISCMD(vp->rkp, 'd') || 189 ISCMD(vp->rkp, 'y')) && 190 cs_fspace(sp, &cs)) 191 return (1); 192 break; 193 } 194 195 /* Eat whitespace characters. */ 196 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 197 if (cs_fblank(sp, &cs)) 198 return (1); 199 if (cs.cs_flags == CS_EOF) 200 goto ret; 201 } 202 203 /* 204 * If we didn't move, we must be at EOF. 205 * 206 * !!! 207 * That's okay for motion commands, however. 208 */ 209 ret: if (!ISMOTION(vp) && 210 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 211 v_eof(sp, &vp->m_start); 212 return (1); 213 } 214 215 /* Adjust the end of the range for motion commands. */ 216 vp->m_stop.lno = cs.cs_lno; 217 vp->m_stop.cno = cs.cs_cno; 218 if (ISMOTION(vp) && cs.cs_flags == 0) 219 --vp->m_stop.cno; 220 221 /* 222 * Non-motion commands move to the end of the range. Delete 223 * and yank stay at the start, ignore others. 224 */ 225 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 226 return (0); 227 } 228 229 /* 230 * v_wordE -- [count]E 231 * Move forward to the end of the bigword. 232 * 233 * PUBLIC: int v_wordE(SCR *, VICMD *); 234 */ 235 int 236 v_wordE(SCR *sp, VICMD *vp) 237 { 238 return (eword(sp, vp, BIGWORD)); 239 } 240 241 /* 242 * v_worde -- [count]e 243 * Move forward to the end of the word. 244 * 245 * PUBLIC: int v_worde(SCR *, VICMD *); 246 */ 247 int 248 v_worde(SCR *sp, VICMD *vp) 249 { 250 return (eword(sp, vp, LITTLEWORD)); 251 } 252 253 /* 254 * eword -- 255 * Move forward to the end of the word. 256 */ 257 static int 258 eword(SCR *sp, VICMD *vp, enum which type) 259 { 260 enum { INWORD, NOTWORD } state; 261 VCS cs; 262 u_long cnt; 263 264 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 265 cs.cs_lno = vp->m_start.lno; 266 cs.cs_cno = vp->m_start.cno; 267 if (cs_init(sp, &cs)) 268 return (1); 269 270 /* 271 * !!! 272 * If in whitespace, or the next character is whitespace, move past 273 * it. (This doesn't count as a word move.) Stay at the character 274 * past the current one, it sets word "state" for the 'e' command. 275 */ 276 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) { 277 if (cs_next(sp, &cs)) 278 return (1); 279 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) 280 goto start; 281 } 282 if (cs_fblank(sp, &cs)) 283 return (1); 284 285 /* 286 * Cyclically move to the next word -- this involves skipping 287 * over word characters and then any trailing non-word characters. 288 * Note, for the 'e' command, the definition of a word keeps 289 * switching. 290 */ 291 start: if (type == BIGWORD) 292 while (cnt--) { 293 for (;;) { 294 if (cs_next(sp, &cs)) 295 return (1); 296 if (cs.cs_flags == CS_EOF) 297 goto ret; 298 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 299 break; 300 } 301 /* 302 * When we reach the start of the word after the last 303 * word, we're done. If we changed state, back up one 304 * to the end of the previous word. 305 */ 306 if (cnt == 0) { 307 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 308 return (1); 309 break; 310 } 311 312 /* Eat whitespace characters. */ 313 if (cs_fblank(sp, &cs)) 314 return (1); 315 if (cs.cs_flags == CS_EOF) 316 goto ret; 317 } 318 else 319 while (cnt--) { 320 state = cs.cs_flags == 0 && 321 inword(cs.cs_ch) ? INWORD : NOTWORD; 322 for (;;) { 323 if (cs_next(sp, &cs)) 324 return (1); 325 if (cs.cs_flags == CS_EOF) 326 goto ret; 327 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 328 break; 329 if (state == INWORD) { 330 if (!inword(cs.cs_ch)) 331 break; 332 } else 333 if (inword(cs.cs_ch)) 334 break; 335 } 336 /* See comment above. */ 337 if (cnt == 0) { 338 if (cs.cs_flags == 0 && cs_prev(sp, &cs)) 339 return (1); 340 break; 341 } 342 343 /* Eat whitespace characters. */ 344 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 345 if (cs_fblank(sp, &cs)) 346 return (1); 347 if (cs.cs_flags == CS_EOF) 348 goto ret; 349 } 350 351 /* 352 * If we didn't move, we must be at EOF. 353 * 354 * !!! 355 * That's okay for motion commands, however. 356 */ 357 ret: if (!ISMOTION(vp) && 358 cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 359 v_eof(sp, &vp->m_start); 360 return (1); 361 } 362 363 /* Set the end of the range for motion commands. */ 364 vp->m_stop.lno = cs.cs_lno; 365 vp->m_stop.cno = cs.cs_cno; 366 367 /* 368 * Non-motion commands move to the end of the range. 369 * Delete and yank stay at the start, ignore others. 370 */ 371 vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop; 372 return (0); 373 } 374 375 /* 376 * v_WordB -- [count]B 377 * Move backward a bigword at a time. 378 * 379 * PUBLIC: int v_wordB(SCR *, VICMD *); 380 */ 381 int 382 v_wordB(SCR *sp, VICMD *vp) 383 { 384 return (bword(sp, vp, BIGWORD)); 385 } 386 387 /* 388 * v_wordb -- [count]b 389 * Move backward a word at a time. 390 * 391 * PUBLIC: int v_wordb(SCR *, VICMD *); 392 */ 393 int 394 v_wordb(SCR *sp, VICMD *vp) 395 { 396 return (bword(sp, vp, LITTLEWORD)); 397 } 398 399 /* 400 * bword -- 401 * Move backward by words. 402 */ 403 static int 404 bword(SCR *sp, VICMD *vp, enum which type) 405 { 406 enum { INWORD, NOTWORD } state; 407 VCS cs; 408 u_long cnt; 409 410 cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1; 411 cs.cs_lno = vp->m_start.lno; 412 cs.cs_cno = vp->m_start.cno; 413 if (cs_init(sp, &cs)) 414 return (1); 415 416 /* 417 * !!! 418 * If in whitespace, or the previous character is whitespace, move 419 * past it. (This doesn't count as a word move.) Stay at the 420 * character before the current one, it sets word "state" for the 421 * 'b' command. 422 */ 423 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) { 424 if (cs_prev(sp, &cs)) 425 return (1); 426 if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) 427 goto start; 428 } 429 if (cs_bblank(sp, &cs)) 430 return (1); 431 432 /* 433 * Cyclically move to the beginning of the previous word -- this 434 * involves skipping over word characters and then any trailing 435 * non-word characters. Note, for the 'b' command, the definition 436 * of a word keeps switching. 437 */ 438 start: if (type == BIGWORD) 439 while (cnt--) { 440 for (;;) { 441 if (cs_prev(sp, &cs)) 442 return (1); 443 if (cs.cs_flags == CS_SOF) 444 goto ret; 445 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 446 break; 447 } 448 /* 449 * When we reach the end of the word before the last 450 * word, we're done. If we changed state, move forward 451 * one to the end of the next word. 452 */ 453 if (cnt == 0) { 454 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 455 return (1); 456 break; 457 } 458 459 /* Eat whitespace characters. */ 460 if (cs_bblank(sp, &cs)) 461 return (1); 462 if (cs.cs_flags == CS_SOF) 463 goto ret; 464 } 465 else 466 while (cnt--) { 467 state = cs.cs_flags == 0 && 468 inword(cs.cs_ch) ? INWORD : NOTWORD; 469 for (;;) { 470 if (cs_prev(sp, &cs)) 471 return (1); 472 if (cs.cs_flags == CS_SOF) 473 goto ret; 474 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 475 break; 476 if (state == INWORD) { 477 if (!inword(cs.cs_ch)) 478 break; 479 } else 480 if (inword(cs.cs_ch)) 481 break; 482 } 483 /* See comment above. */ 484 if (cnt == 0) { 485 if (cs.cs_flags == 0 && cs_next(sp, &cs)) 486 return (1); 487 break; 488 } 489 490 /* Eat whitespace characters. */ 491 if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch)) 492 if (cs_bblank(sp, &cs)) 493 return (1); 494 if (cs.cs_flags == CS_SOF) 495 goto ret; 496 } 497 498 /* If we didn't move, we must be at SOF. */ 499 ret: if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) { 500 v_sof(sp, &vp->m_start); 501 return (1); 502 } 503 504 /* Set the end of the range for motion commands. */ 505 vp->m_stop.lno = cs.cs_lno; 506 vp->m_stop.cno = cs.cs_cno; 507 508 /* 509 * All commands move to the end of the range. Motion commands 510 * adjust the starting point to the character before the current 511 * one. 512 * 513 * !!! 514 * The historic vi didn't get this right -- the `yb' command yanked 515 * the right stuff and even updated the cursor value, but the cursor 516 * was not actually updated on the screen. 517 */ 518 vp->m_final = vp->m_stop; 519 if (ISMOTION(vp)) 520 --vp->m_start.cno; 521 return (0); 522 } 523