xref: /freebsd/contrib/nvi/vi/v_sentence.c (revision d8a0fe102c0cfdfcd5b818f850eff09d8536c9bc)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #ifndef lint
13 static const char sccsid[] = "$Id: v_sentence.c,v 10.9 2001/06/25 15:19:35 skimo Exp $";
14 #endif /* not lint */
15 
16 #include <sys/types.h>
17 #include <sys/queue.h>
18 #include <sys/time.h>
19 
20 #include <bitstring.h>
21 #include <ctype.h>
22 #include <limits.h>
23 #include <stdio.h>
24 
25 #include "../common/common.h"
26 #include "vi.h"
27 
28 /*
29  * !!!
30  * In historic vi, a sentence was delimited by a '.', '?' or '!' character
31  * followed by TWO spaces or a newline.  One or more empty lines was also
32  * treated as a separate sentence.  The Berkeley documentation for historical
33  * vi states that any number of ')', ']', '"' and '\'' characters can be
34  * between the delimiter character and the spaces or end of line, however,
35  * the historical implementation did not handle additional '"' characters.
36  * We follow the documentation here, not the implementation.
37  *
38  * Once again, historical vi didn't do sentence movements associated with
39  * counts consistently, mostly in the presence of lines containing only
40  * white-space characters.
41  *
42  * This implementation also permits a single tab to delimit sentences, and
43  * treats lines containing only white-space characters as empty lines.
44  * Finally, tabs are eaten (along with spaces) when skipping to the start
45  * of the text following a "sentence".
46  */
47 
48 /*
49  * v_sentencef -- [count])
50  *	Move forward count sentences.
51  *
52  * PUBLIC: int v_sentencef(SCR *, VICMD *);
53  */
54 int
55 v_sentencef(SCR *sp, VICMD *vp)
56 {
57 	enum { BLANK, NONE, PERIOD } state;
58 	VCS cs;
59 	size_t len;
60 	u_long cnt;
61 
62 	cs.cs_lno = vp->m_start.lno;
63 	cs.cs_cno = vp->m_start.cno;
64 	if (cs_init(sp, &cs))
65 		return (1);
66 
67 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
68 
69 	/*
70 	 * !!!
71 	 * If in white-space, the next start of sentence counts as one.
72 	 * This may not handle "  .  " correctly, but it's real unclear
73 	 * what correctly means in that case.
74 	 */
75 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
76 		if (cs_fblank(sp, &cs))
77 			return (1);
78 		if (--cnt == 0) {
79 			if (vp->m_start.lno != cs.cs_lno ||
80 			    vp->m_start.cno != cs.cs_cno)
81 				goto okret;
82 			return (1);
83 		}
84 	}
85 
86 	for (state = NONE;;) {
87 		if (cs_next(sp, &cs))
88 			return (1);
89 		if (cs.cs_flags == CS_EOF)
90 			break;
91 		if (cs.cs_flags == CS_EOL) {
92 			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
93 				if (cs_next(sp, &cs))
94 					return (1);
95 				if (cs.cs_flags == 0 &&
96 				    isblank(cs.cs_ch) && cs_fblank(sp, &cs))
97 					return (1);
98 				goto okret;
99 			}
100 			state = NONE;
101 			continue;
102 		}
103 		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
104 			if (--cnt == 0)
105 				goto okret;
106 			if (cs_fblank(sp, &cs))
107 				return (1);
108 			if (--cnt == 0)
109 				goto okret;
110 			state = NONE;
111 			continue;
112 		}
113 		switch (cs.cs_ch) {
114 		case '.':
115 		case '?':
116 		case '!':
117 			state = PERIOD;
118 			break;
119 		case ')':
120 		case ']':
121 		case '"':
122 		case '\'':
123 			if (state != PERIOD)
124 				state = NONE;
125 			break;
126 		case '\t':
127 			if (state == PERIOD)
128 				state = BLANK;
129 			/* FALLTHROUGH */
130 		case ' ':
131 			if (state == PERIOD) {
132 				state = BLANK;
133 				break;
134 			}
135 			if (state == BLANK && --cnt == 0) {
136 				if (cs_fblank(sp, &cs))
137 					return (1);
138 				goto okret;
139 			}
140 			/* FALLTHROUGH */
141 		default:
142 			state = NONE;
143 			break;
144 		}
145 	}
146 
147 	/* EOF is a movement sink, but it's an error not to have moved. */
148 	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
149 		v_eof(sp, NULL);
150 		return (1);
151 	}
152 
153 okret:	vp->m_stop.lno = cs.cs_lno;
154 	vp->m_stop.cno = cs.cs_cno;
155 
156 	/*
157 	 * !!!
158 	 * Historic, uh, features, yeah, that's right, call 'em features.
159 	 * If the starting and ending cursor positions are at the first
160 	 * column in their lines, i.e. the movement is cutting entire lines,
161 	 * the buffer is in line mode, and the ending position is the last
162 	 * character of the previous line.  Note check to make sure that
163 	 * it's not within a single line.
164 	 *
165 	 * Non-motion commands move to the end of the range.  Delete and
166 	 * yank stay at the start.  Ignore others.  Adjust the end of the
167 	 * range for motion commands.
168 	 */
169 	if (ISMOTION(vp)) {
170 		if (vp->m_start.cno == 0 &&
171 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
172 			if (vp->m_start.lno < vp->m_stop.lno) {
173 				if (db_get(sp,
174 				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
175 					return (1);
176 				vp->m_stop.cno = len ? len - 1 : 0;
177 			}
178 			F_SET(vp, VM_LMODE);
179 		} else
180 			--vp->m_stop.cno;
181 		vp->m_final = vp->m_start;
182 	} else
183 		vp->m_final = vp->m_stop;
184 	return (0);
185 }
186 
187 /*
188  * v_sentenceb -- [count](
189  *	Move backward count sentences.
190  *
191  * PUBLIC: int v_sentenceb(SCR *, VICMD *);
192  */
193 int
194 v_sentenceb(SCR *sp, VICMD *vp)
195 {
196 	VCS cs;
197 	recno_t slno;
198 	size_t len, scno;
199 	u_long cnt;
200 	int last;
201 
202 	/*
203 	 * !!!
204 	 * Historic vi permitted the user to hit SOF repeatedly.
205 	 */
206 	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
207 		return (0);
208 
209 	cs.cs_lno = vp->m_start.lno;
210 	cs.cs_cno = vp->m_start.cno;
211 	if (cs_init(sp, &cs))
212 		return (1);
213 
214 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
215 
216 	/*
217 	 * !!!
218 	 * In empty lines, skip to the previous non-white-space character.
219 	 * If in text, skip to the prevous white-space character.  Believe
220 	 * it or not, in the paragraph:
221 	 *	ab cd.
222 	 *	AB CD.
223 	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
224 	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
225 	 * Berkeley was once a major center of drug activity.
226 	 */
227 	if (cs.cs_flags == CS_EMP) {
228 		if (cs_bblank(sp, &cs))
229 			return (1);
230 		for (;;) {
231 			if (cs_prev(sp, &cs))
232 				return (1);
233 			if (cs.cs_flags != CS_EOL)
234 				break;
235 		}
236 	} else if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
237 		for (;;) {
238 			if (cs_prev(sp, &cs))
239 				return (1);
240 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
241 				break;
242 		}
243 
244 	for (last = 0;;) {
245 		if (cs_prev(sp, &cs))
246 			return (1);
247 		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
248 			break;
249 		if (cs.cs_flags == CS_EOL) {
250 			last = 1;
251 			continue;
252 		}
253 		if (cs.cs_flags == CS_EMP) {
254 			if (--cnt == 0)
255 				goto ret;
256 			if (cs_bblank(sp, &cs))
257 				return (1);
258 			last = 0;
259 			continue;
260 		}
261 		switch (cs.cs_ch) {
262 		case '.':
263 		case '?':
264 		case '!':
265 			if (!last || --cnt != 0) {
266 				last = 0;
267 				continue;
268 			}
269 
270 ret:			slno = cs.cs_lno;
271 			scno = cs.cs_cno;
272 
273 			/*
274 			 * Move to the start of the sentence, skipping blanks
275 			 * and special characters.
276 			 */
277 			do {
278 				if (cs_next(sp, &cs))
279 					return (1);
280 			} while (!cs.cs_flags &&
281 			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
282 			    cs.cs_ch == '"' || cs.cs_ch == '\''));
283 			if ((cs.cs_flags || isblank(cs.cs_ch)) &&
284 			    cs_fblank(sp, &cs))
285 				return (1);
286 
287 			/*
288 			 * If it was ".  xyz", with the cursor on the 'x', or
289 			 * "end.  ", with the cursor in the spaces, or the
290 			 * beginning of a sentence preceded by an empty line,
291 			 * we can end up where we started.  Fix it.
292 			 */
293 			if (vp->m_start.lno != cs.cs_lno ||
294 			    vp->m_start.cno != cs.cs_cno)
295 				goto okret;
296 
297 			/*
298 			 * Well, if an empty line preceded possible blanks
299 			 * and the sentence, it could be a real sentence.
300 			 */
301 			for (;;) {
302 				if (cs_prev(sp, &cs))
303 					return (1);
304 				if (cs.cs_flags == CS_EOL)
305 					continue;
306 				if (cs.cs_flags == 0 && isblank(cs.cs_ch))
307 					continue;
308 				break;
309 			}
310 			if (cs.cs_flags == CS_EMP)
311 				goto okret;
312 
313 			/* But it wasn't; try again. */
314 			++cnt;
315 			cs.cs_lno = slno;
316 			cs.cs_cno = scno;
317 			last = 0;
318 			break;
319 		case '\t':
320 			last = 1;
321 			break;
322 		default:
323 			last =
324 			    cs.cs_flags == CS_EOL || isblank(cs.cs_ch) ||
325 			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
326 			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
327 		}
328 	}
329 
330 okret:	vp->m_stop.lno = cs.cs_lno;
331 	vp->m_stop.cno = cs.cs_cno;
332 
333 	/*
334 	 * !!!
335 	 * If the starting and stopping cursor positions are at the first
336 	 * columns in the line, i.e. the movement is cutting an entire line,
337 	 * the buffer is in line mode, and the starting position is the last
338 	 * character of the previous line.
339 	 *
340 	 * All commands move to the end of the range.  Adjust the start of
341 	 * the range for motion commands.
342 	 */
343 	if (ISMOTION(vp))
344 		if (vp->m_start.cno == 0 &&
345 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
346 			if (db_get(sp,
347 			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
348 				return (1);
349 			vp->m_start.cno = len ? len - 1 : 0;
350 			F_SET(vp, VM_LMODE);
351 		} else
352 			--vp->m_start.cno;
353 	vp->m_final = vp->m_stop;
354 	return (0);
355 }
356