xref: /freebsd/contrib/nvi/vi/v_sentence.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #ifndef lint
13 static const char sccsid[] = "@(#)v_sentence.c	10.7 (Berkeley) 3/6/96";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
19 
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
24 
25 #include "../common/common.h"
26 #include "vi.h"
27 
28 /*
29  * !!!
30  * In historic vi, a sentence was delimited by a '.', '?' or '!' character
31  * followed by TWO spaces or a newline.  One or more empty lines was also
32  * treated as a separate sentence.  The Berkeley documentation for historical
33  * vi states that any number of ')', ']', '"' and '\'' characters can be
34  * between the delimiter character and the spaces or end of line, however,
35  * the historical implementation did not handle additional '"' characters.
36  * We follow the documentation here, not the implementation.
37  *
38  * Once again, historical vi didn't do sentence movements associated with
39  * counts consistently, mostly in the presence of lines containing only
40  * white-space characters.
41  *
42  * This implementation also permits a single tab to delimit sentences, and
43  * treats lines containing only white-space characters as empty lines.
44  * Finally, tabs are eaten (along with spaces) when skipping to the start
45  * of the text following a "sentence".
46  */
47 
48 /*
49  * v_sentencef -- [count])
50  *	Move forward count sentences.
51  *
52  * PUBLIC: int v_sentencef __P((SCR *, VICMD *));
53  */
54 int
55 v_sentencef(sp, vp)
56 	SCR *sp;
57 	VICMD *vp;
58 {
59 	enum { BLANK, NONE, PERIOD } state;
60 	VCS cs;
61 	size_t len;
62 	u_long cnt;
63 
64 	cs.cs_lno = vp->m_start.lno;
65 	cs.cs_cno = vp->m_start.cno;
66 	if (cs_init(sp, &cs))
67 		return (1);
68 
69 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
70 
71 	/*
72 	 * !!!
73 	 * If in white-space, the next start of sentence counts as one.
74 	 * This may not handle "  .  " correctly, but it's real unclear
75 	 * what correctly means in that case.
76 	 */
77 	if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) {
78 		if (cs_fblank(sp, &cs))
79 			return (1);
80 		if (--cnt == 0) {
81 			if (vp->m_start.lno != cs.cs_lno ||
82 			    vp->m_start.cno != cs.cs_cno)
83 				goto okret;
84 			return (1);
85 		}
86 	}
87 
88 	for (state = NONE;;) {
89 		if (cs_next(sp, &cs))
90 			return (1);
91 		if (cs.cs_flags == CS_EOF)
92 			break;
93 		if (cs.cs_flags == CS_EOL) {
94 			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
95 				if (cs_next(sp, &cs))
96 					return (1);
97 				if (cs.cs_flags == 0 &&
98 				    isblank(cs.cs_ch) && cs_fblank(sp, &cs))
99 					return (1);
100 				goto okret;
101 			}
102 			state = NONE;
103 			continue;
104 		}
105 		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
106 			if (--cnt == 0)
107 				goto okret;
108 			if (cs_fblank(sp, &cs))
109 				return (1);
110 			if (--cnt == 0)
111 				goto okret;
112 			state = NONE;
113 			continue;
114 		}
115 		switch (cs.cs_ch) {
116 		case '.':
117 		case '?':
118 		case '!':
119 			state = PERIOD;
120 			break;
121 		case ')':
122 		case ']':
123 		case '"':
124 		case '\'':
125 			if (state != PERIOD)
126 				state = NONE;
127 			break;
128 		case '\t':
129 			if (state == PERIOD)
130 				state = BLANK;
131 			/* FALLTHROUGH */
132 		case ' ':
133 			if (state == PERIOD) {
134 				state = BLANK;
135 				break;
136 			}
137 			if (state == BLANK && --cnt == 0) {
138 				if (cs_fblank(sp, &cs))
139 					return (1);
140 				goto okret;
141 			}
142 			/* FALLTHROUGH */
143 		default:
144 			state = NONE;
145 			break;
146 		}
147 	}
148 
149 	/* EOF is a movement sink, but it's an error not to have moved. */
150 	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
151 		v_eof(sp, NULL);
152 		return (1);
153 	}
154 
155 okret:	vp->m_stop.lno = cs.cs_lno;
156 	vp->m_stop.cno = cs.cs_cno;
157 
158 	/*
159 	 * !!!
160 	 * Historic, uh, features, yeah, that's right, call 'em features.
161 	 * If the starting and ending cursor positions are at the first
162 	 * column in their lines, i.e. the movement is cutting entire lines,
163 	 * the buffer is in line mode, and the ending position is the last
164 	 * character of the previous line.  Note check to make sure that
165 	 * it's not within a single line.
166 	 *
167 	 * Non-motion commands move to the end of the range.  Delete and
168 	 * yank stay at the start.  Ignore others.  Adjust the end of the
169 	 * range for motion commands.
170 	 */
171 	if (ISMOTION(vp)) {
172 		if (vp->m_start.cno == 0 &&
173 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
174 			if (vp->m_start.lno < vp->m_stop.lno) {
175 				if (db_get(sp,
176 				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
177 					return (1);
178 				vp->m_stop.cno = len ? len - 1 : 0;
179 			}
180 			F_SET(vp, VM_LMODE);
181 		} else
182 			--vp->m_stop.cno;
183 		vp->m_final = vp->m_start;
184 	} else
185 		vp->m_final = vp->m_stop;
186 	return (0);
187 }
188 
189 /*
190  * v_sentenceb -- [count](
191  *	Move backward count sentences.
192  *
193  * PUBLIC: int v_sentenceb __P((SCR *, VICMD *));
194  */
195 int
196 v_sentenceb(sp, vp)
197 	SCR *sp;
198 	VICMD *vp;
199 {
200 	VCS cs;
201 	recno_t slno;
202 	size_t len, scno;
203 	u_long cnt;
204 	int last;
205 
206 	/*
207 	 * !!!
208 	 * Historic vi permitted the user to hit SOF repeatedly.
209 	 */
210 	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
211 		return (0);
212 
213 	cs.cs_lno = vp->m_start.lno;
214 	cs.cs_cno = vp->m_start.cno;
215 	if (cs_init(sp, &cs))
216 		return (1);
217 
218 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
219 
220 	/*
221 	 * !!!
222 	 * In empty lines, skip to the previous non-white-space character.
223 	 * If in text, skip to the prevous white-space character.  Believe
224 	 * it or not, in the paragraph:
225 	 *	ab cd.
226 	 *	AB CD.
227 	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
228 	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
229 	 * Berkeley was once a major center of drug activity.
230 	 */
231 	if (cs.cs_flags == CS_EMP) {
232 		if (cs_bblank(sp, &cs))
233 			return (1);
234 		for (;;) {
235 			if (cs_prev(sp, &cs))
236 				return (1);
237 			if (cs.cs_flags != CS_EOL)
238 				break;
239 		}
240 	} else if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
241 		for (;;) {
242 			if (cs_prev(sp, &cs))
243 				return (1);
244 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
245 				break;
246 		}
247 
248 	for (last = 0;;) {
249 		if (cs_prev(sp, &cs))
250 			return (1);
251 		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
252 			break;
253 		if (cs.cs_flags == CS_EOL) {
254 			last = 1;
255 			continue;
256 		}
257 		if (cs.cs_flags == CS_EMP) {
258 			if (--cnt == 0)
259 				goto ret;
260 			if (cs_bblank(sp, &cs))
261 				return (1);
262 			last = 0;
263 			continue;
264 		}
265 		switch (cs.cs_ch) {
266 		case '.':
267 		case '?':
268 		case '!':
269 			if (!last || --cnt != 0) {
270 				last = 0;
271 				continue;
272 			}
273 
274 ret:			slno = cs.cs_lno;
275 			scno = cs.cs_cno;
276 
277 			/*
278 			 * Move to the start of the sentence, skipping blanks
279 			 * and special characters.
280 			 */
281 			do {
282 				if (cs_next(sp, &cs))
283 					return (1);
284 			} while (!cs.cs_flags &&
285 			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
286 			    cs.cs_ch == '"' || cs.cs_ch == '\''));
287 			if ((cs.cs_flags || isblank(cs.cs_ch)) &&
288 			    cs_fblank(sp, &cs))
289 				return (1);
290 
291 			/*
292 			 * If it was ".  xyz", with the cursor on the 'x', or
293 			 * "end.  ", with the cursor in the spaces, or the
294 			 * beginning of a sentence preceded by an empty line,
295 			 * we can end up where we started.  Fix it.
296 			 */
297 			if (vp->m_start.lno != cs.cs_lno ||
298 			    vp->m_start.cno != cs.cs_cno)
299 				goto okret;
300 
301 			/*
302 			 * Well, if an empty line preceded possible blanks
303 			 * and the sentence, it could be a real sentence.
304 			 */
305 			for (;;) {
306 				if (cs_prev(sp, &cs))
307 					return (1);
308 				if (cs.cs_flags == CS_EOL)
309 					continue;
310 				if (cs.cs_flags == 0 && isblank(cs.cs_ch))
311 					continue;
312 				break;
313 			}
314 			if (cs.cs_flags == CS_EMP)
315 				goto okret;
316 
317 			/* But it wasn't; try again. */
318 			++cnt;
319 			cs.cs_lno = slno;
320 			cs.cs_cno = scno;
321 			last = 0;
322 			break;
323 		case '\t':
324 			last = 1;
325 			break;
326 		default:
327 			last =
328 			    cs.cs_flags == CS_EOL || isblank(cs.cs_ch) ||
329 			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
330 			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
331 		}
332 	}
333 
334 okret:	vp->m_stop.lno = cs.cs_lno;
335 	vp->m_stop.cno = cs.cs_cno;
336 
337 	/*
338 	 * !!!
339 	 * If the starting and stopping cursor positions are at the first
340 	 * columns in the line, i.e. the movement is cutting an entire line,
341 	 * the buffer is in line mode, and the starting position is the last
342 	 * character of the previous line.
343 	 *
344 	 * All commands move to the end of the range.  Adjust the start of
345 	 * the range for motion commands.
346 	 */
347 	if (ISMOTION(vp))
348 		if (vp->m_start.cno == 0 &&
349 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
350 			if (db_get(sp,
351 			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
352 				return (1);
353 			vp->m_start.cno = len ? len - 1 : 0;
354 			F_SET(vp, VM_LMODE);
355 		} else
356 			--vp->m_start.cno;
357 	vp->m_final = vp->m_stop;
358 	return (0);
359 }
360