xref: /freebsd/contrib/nvi/vi/v_sentence.c (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1 /*-
2  * Copyright (c) 1992, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5  *	Keith Bostic.  All rights reserved.
6  *
7  * See the LICENSE file for redistribution information.
8  */
9 
10 #include "config.h"
11 
12 #include <sys/types.h>
13 #include <sys/queue.h>
14 #include <sys/time.h>
15 
16 #include <bitstring.h>
17 #include <ctype.h>
18 #include <limits.h>
19 #include <stdio.h>
20 
21 #include "../common/common.h"
22 #include "vi.h"
23 
24 /*
25  * !!!
26  * In historic vi, a sentence was delimited by a '.', '?' or '!' character
27  * followed by TWO spaces or a newline.  One or more empty lines was also
28  * treated as a separate sentence.  The Berkeley documentation for historical
29  * vi states that any number of ')', ']', '"' and '\'' characters can be
30  * between the delimiter character and the spaces or end of line, however,
31  * the historical implementation did not handle additional '"' characters.
32  * We follow the documentation here, not the implementation.
33  *
34  * Once again, historical vi didn't do sentence movements associated with
35  * counts consistently, mostly in the presence of lines containing only
36  * white-space characters.
37  *
38  * This implementation also permits a single tab to delimit sentences, and
39  * treats lines containing only white-space characters as empty lines.
40  * Finally, tabs are eaten (along with spaces) when skipping to the start
41  * of the text following a "sentence".
42  */
43 
44 /*
45  * v_sentencef -- [count])
46  *	Move forward count sentences.
47  *
48  * PUBLIC: int v_sentencef(SCR *, VICMD *);
49  */
50 int
51 v_sentencef(SCR *sp, VICMD *vp)
52 {
53 	enum { BLANK, NONE, PERIOD } state;
54 	VCS cs;
55 	size_t len;
56 	u_long cnt;
57 
58 	cs.cs_lno = vp->m_start.lno;
59 	cs.cs_cno = vp->m_start.cno;
60 	if (cs_init(sp, &cs))
61 		return (1);
62 
63 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
64 
65 	/*
66 	 * !!!
67 	 * If in white-space, the next start of sentence counts as one.
68 	 * This may not handle "  .  " correctly, but it's real unclear
69 	 * what correctly means in that case.
70 	 */
71 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
72 		if (cs_fblank(sp, &cs))
73 			return (1);
74 		if (--cnt == 0) {
75 			if (vp->m_start.lno != cs.cs_lno ||
76 			    vp->m_start.cno != cs.cs_cno)
77 				goto okret;
78 			return (1);
79 		}
80 	}
81 
82 	for (state = NONE;;) {
83 		if (cs_next(sp, &cs))
84 			return (1);
85 		if (cs.cs_flags == CS_EOF)
86 			break;
87 		if (cs.cs_flags == CS_EOL) {
88 			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
89 				if (cs_next(sp, &cs))
90 					return (1);
91 				if (cs.cs_flags == 0 &&
92 				    isblank(cs.cs_ch) && cs_fblank(sp, &cs))
93 					return (1);
94 				goto okret;
95 			}
96 			state = NONE;
97 			continue;
98 		}
99 		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
100 			if (--cnt == 0)
101 				goto okret;
102 			if (cs_fblank(sp, &cs))
103 				return (1);
104 			if (--cnt == 0)
105 				goto okret;
106 			state = NONE;
107 			continue;
108 		}
109 		switch (cs.cs_ch) {
110 		case '.':
111 		case '?':
112 		case '!':
113 			state = PERIOD;
114 			break;
115 		case ')':
116 		case ']':
117 		case '"':
118 		case '\'':
119 			if (state != PERIOD)
120 				state = NONE;
121 			break;
122 		case '\t':
123 			if (state == PERIOD)
124 				state = BLANK;
125 			/* FALLTHROUGH */
126 		case ' ':
127 			if (state == PERIOD) {
128 				state = BLANK;
129 				break;
130 			}
131 			if (state == BLANK && --cnt == 0) {
132 				if (cs_fblank(sp, &cs))
133 					return (1);
134 				goto okret;
135 			}
136 			/* FALLTHROUGH */
137 		default:
138 			state = NONE;
139 			break;
140 		}
141 	}
142 
143 	/* EOF is a movement sink, but it's an error not to have moved. */
144 	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
145 		v_eof(sp, NULL);
146 		return (1);
147 	}
148 
149 okret:	vp->m_stop.lno = cs.cs_lno;
150 	vp->m_stop.cno = cs.cs_cno;
151 
152 	/*
153 	 * !!!
154 	 * Historic, uh, features, yeah, that's right, call 'em features.
155 	 * If the starting and ending cursor positions are at the first
156 	 * column in their lines, i.e. the movement is cutting entire lines,
157 	 * the buffer is in line mode, and the ending position is the last
158 	 * character of the previous line.  Note check to make sure that
159 	 * it's not within a single line.
160 	 *
161 	 * Non-motion commands move to the end of the range.  Delete and
162 	 * yank stay at the start.  Ignore others.  Adjust the end of the
163 	 * range for motion commands.
164 	 */
165 	if (ISMOTION(vp)) {
166 		if (vp->m_start.cno == 0 &&
167 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
168 			if (vp->m_start.lno < vp->m_stop.lno) {
169 				if (db_get(sp,
170 				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
171 					return (1);
172 				vp->m_stop.cno = len ? len - 1 : 0;
173 			}
174 			F_SET(vp, VM_LMODE);
175 		} else
176 			--vp->m_stop.cno;
177 		vp->m_final = vp->m_start;
178 	} else
179 		vp->m_final = vp->m_stop;
180 	return (0);
181 }
182 
183 /*
184  * v_sentenceb -- [count](
185  *	Move backward count sentences.
186  *
187  * PUBLIC: int v_sentenceb(SCR *, VICMD *);
188  */
189 int
190 v_sentenceb(SCR *sp, VICMD *vp)
191 {
192 	VCS cs;
193 	recno_t slno;
194 	size_t len, scno;
195 	u_long cnt;
196 	int last;
197 
198 	/*
199 	 * !!!
200 	 * Historic vi permitted the user to hit SOF repeatedly.
201 	 */
202 	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
203 		return (0);
204 
205 	cs.cs_lno = vp->m_start.lno;
206 	cs.cs_cno = vp->m_start.cno;
207 	if (cs_init(sp, &cs))
208 		return (1);
209 
210 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
211 
212 	/*
213 	 * !!!
214 	 * In empty lines, skip to the previous non-white-space character.
215 	 * If in text, skip to the prevous white-space character.  Believe
216 	 * it or not, in the paragraph:
217 	 *	ab cd.
218 	 *	AB CD.
219 	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
220 	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
221 	 * Berkeley was once a major center of drug activity.
222 	 */
223 	if (cs.cs_flags == CS_EMP) {
224 		if (cs_bblank(sp, &cs))
225 			return (1);
226 		for (;;) {
227 			if (cs_prev(sp, &cs))
228 				return (1);
229 			if (cs.cs_flags != CS_EOL)
230 				break;
231 		}
232 	} else if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
233 		for (;;) {
234 			if (cs_prev(sp, &cs))
235 				return (1);
236 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
237 				break;
238 		}
239 
240 	for (last = 0;;) {
241 		if (cs_prev(sp, &cs))
242 			return (1);
243 		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
244 			break;
245 		if (cs.cs_flags == CS_EOL) {
246 			last = 1;
247 			continue;
248 		}
249 		if (cs.cs_flags == CS_EMP) {
250 			if (--cnt == 0)
251 				goto ret;
252 			if (cs_bblank(sp, &cs))
253 				return (1);
254 			last = 0;
255 			continue;
256 		}
257 		switch (cs.cs_ch) {
258 		case '.':
259 		case '?':
260 		case '!':
261 			if (!last || --cnt != 0) {
262 				last = 0;
263 				continue;
264 			}
265 
266 ret:			slno = cs.cs_lno;
267 			scno = cs.cs_cno;
268 
269 			/*
270 			 * Move to the start of the sentence, skipping blanks
271 			 * and special characters.
272 			 */
273 			do {
274 				if (cs_next(sp, &cs))
275 					return (1);
276 			} while (!cs.cs_flags &&
277 			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
278 			    cs.cs_ch == '"' || cs.cs_ch == '\''));
279 			if ((cs.cs_flags || isblank(cs.cs_ch)) &&
280 			    cs_fblank(sp, &cs))
281 				return (1);
282 
283 			/*
284 			 * If it was ".  xyz", with the cursor on the 'x', or
285 			 * "end.  ", with the cursor in the spaces, or the
286 			 * beginning of a sentence preceded by an empty line,
287 			 * we can end up where we started.  Fix it.
288 			 */
289 			if (vp->m_start.lno != cs.cs_lno ||
290 			    vp->m_start.cno > cs.cs_cno)
291 				goto okret;
292 
293 			/*
294 			 * Well, if an empty line preceded possible blanks
295 			 * and the sentence, it could be a real sentence.
296 			 */
297 			for (;;) {
298 				if (cs_prev(sp, &cs))
299 					return (1);
300 				if (cs.cs_flags == CS_EOL)
301 					continue;
302 				if (cs.cs_flags == 0 && isblank(cs.cs_ch))
303 					continue;
304 				break;
305 			}
306 			if (cs.cs_flags == CS_EMP)
307 				goto okret;
308 
309 			/* But it wasn't; try again. */
310 			++cnt;
311 			cs.cs_lno = slno;
312 			cs.cs_cno = scno;
313 			last = 0;
314 			break;
315 		case '\t':
316 			last = 1;
317 			break;
318 		default:
319 			last =
320 			    cs.cs_flags == CS_EOL || isblank(cs.cs_ch) ||
321 			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
322 			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
323 		}
324 	}
325 
326 okret:	vp->m_stop.lno = cs.cs_lno;
327 	vp->m_stop.cno = cs.cs_cno;
328 
329 	/*
330 	 * !!!
331 	 * If the starting and stopping cursor positions are at the first
332 	 * columns in the line, i.e. the movement is cutting an entire line,
333 	 * the buffer is in line mode, and the starting position is the last
334 	 * character of the previous line.
335 	 *
336 	 * All commands move to the end of the range.  Adjust the start of
337 	 * the range for motion commands.
338 	 */
339 	if (ISMOTION(vp)) {
340 		if (vp->m_start.cno == 0 &&
341 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
342 			if (db_get(sp,
343 			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
344 				return (1);
345 			vp->m_start.cno = len ? len - 1 : 0;
346 			F_SET(vp, VM_LMODE);
347 		} else
348 			--vp->m_start.cno;
349 	}
350 	vp->m_final = vp->m_stop;
351 	return (0);
352 }
353