xref: /freebsd/contrib/nvi/vi/v_sentence.c (revision 755cc40c21ca63388c6a67ba848a908b429e9391)
1b8ba871bSPeter Wemm /*-
2b8ba871bSPeter Wemm  * Copyright (c) 1992, 1993, 1994
3b8ba871bSPeter Wemm  *	The Regents of the University of California.  All rights reserved.
4b8ba871bSPeter Wemm  * Copyright (c) 1992, 1993, 1994, 1995, 1996
5b8ba871bSPeter Wemm  *	Keith Bostic.  All rights reserved.
6b8ba871bSPeter Wemm  *
7b8ba871bSPeter Wemm  * See the LICENSE file for redistribution information.
8b8ba871bSPeter Wemm  */
9b8ba871bSPeter Wemm 
10b8ba871bSPeter Wemm #include "config.h"
11b8ba871bSPeter Wemm 
12b8ba871bSPeter Wemm #include <sys/types.h>
13b8ba871bSPeter Wemm #include <sys/queue.h>
14b8ba871bSPeter Wemm #include <sys/time.h>
15b8ba871bSPeter Wemm 
16b8ba871bSPeter Wemm #include <bitstring.h>
17b8ba871bSPeter Wemm #include <ctype.h>
18b8ba871bSPeter Wemm #include <limits.h>
19b8ba871bSPeter Wemm #include <stdio.h>
20b8ba871bSPeter Wemm 
21b8ba871bSPeter Wemm #include "../common/common.h"
22b8ba871bSPeter Wemm #include "vi.h"
23b8ba871bSPeter Wemm 
24b8ba871bSPeter Wemm /*
25b8ba871bSPeter Wemm  * !!!
26b8ba871bSPeter Wemm  * In historic vi, a sentence was delimited by a '.', '?' or '!' character
27b8ba871bSPeter Wemm  * followed by TWO spaces or a newline.  One or more empty lines was also
28b8ba871bSPeter Wemm  * treated as a separate sentence.  The Berkeley documentation for historical
29b8ba871bSPeter Wemm  * vi states that any number of ')', ']', '"' and '\'' characters can be
30b8ba871bSPeter Wemm  * between the delimiter character and the spaces or end of line, however,
31b8ba871bSPeter Wemm  * the historical implementation did not handle additional '"' characters.
32b8ba871bSPeter Wemm  * We follow the documentation here, not the implementation.
33b8ba871bSPeter Wemm  *
34b8ba871bSPeter Wemm  * Once again, historical vi didn't do sentence movements associated with
35b8ba871bSPeter Wemm  * counts consistently, mostly in the presence of lines containing only
36b8ba871bSPeter Wemm  * white-space characters.
37b8ba871bSPeter Wemm  *
38b8ba871bSPeter Wemm  * This implementation also permits a single tab to delimit sentences, and
39b8ba871bSPeter Wemm  * treats lines containing only white-space characters as empty lines.
40b8ba871bSPeter Wemm  * Finally, tabs are eaten (along with spaces) when skipping to the start
41b8ba871bSPeter Wemm  * of the text following a "sentence".
42b8ba871bSPeter Wemm  */
43b8ba871bSPeter Wemm 
44b8ba871bSPeter Wemm /*
45b8ba871bSPeter Wemm  * v_sentencef -- [count])
46b8ba871bSPeter Wemm  *	Move forward count sentences.
47b8ba871bSPeter Wemm  *
48c271fa92SBaptiste Daroussin  * PUBLIC: int v_sentencef(SCR *, VICMD *);
49b8ba871bSPeter Wemm  */
50b8ba871bSPeter Wemm int
v_sentencef(SCR * sp,VICMD * vp)51f0957ccaSPeter Wemm v_sentencef(SCR *sp, VICMD *vp)
52b8ba871bSPeter Wemm {
53b8ba871bSPeter Wemm 	enum { BLANK, NONE, PERIOD } state;
54b8ba871bSPeter Wemm 	VCS cs;
55b8ba871bSPeter Wemm 	size_t len;
56b8ba871bSPeter Wemm 	u_long cnt;
57b8ba871bSPeter Wemm 
58b8ba871bSPeter Wemm 	cs.cs_lno = vp->m_start.lno;
59b8ba871bSPeter Wemm 	cs.cs_cno = vp->m_start.cno;
60b8ba871bSPeter Wemm 	if (cs_init(sp, &cs))
61b8ba871bSPeter Wemm 		return (1);
62b8ba871bSPeter Wemm 
63b8ba871bSPeter Wemm 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
64b8ba871bSPeter Wemm 
65b8ba871bSPeter Wemm 	/*
66b8ba871bSPeter Wemm 	 * !!!
67b8ba871bSPeter Wemm 	 * If in white-space, the next start of sentence counts as one.
68b8ba871bSPeter Wemm 	 * This may not handle "  .  " correctly, but it's real unclear
69b8ba871bSPeter Wemm 	 * what correctly means in that case.
70b8ba871bSPeter Wemm 	 */
71f0957ccaSPeter Wemm 	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
72b8ba871bSPeter Wemm 		if (cs_fblank(sp, &cs))
73b8ba871bSPeter Wemm 			return (1);
74b8ba871bSPeter Wemm 		if (--cnt == 0) {
75b8ba871bSPeter Wemm 			if (vp->m_start.lno != cs.cs_lno ||
76b8ba871bSPeter Wemm 			    vp->m_start.cno != cs.cs_cno)
77b8ba871bSPeter Wemm 				goto okret;
78b8ba871bSPeter Wemm 			return (1);
79b8ba871bSPeter Wemm 		}
80b8ba871bSPeter Wemm 	}
81b8ba871bSPeter Wemm 
82b8ba871bSPeter Wemm 	for (state = NONE;;) {
83b8ba871bSPeter Wemm 		if (cs_next(sp, &cs))
84b8ba871bSPeter Wemm 			return (1);
85b8ba871bSPeter Wemm 		if (cs.cs_flags == CS_EOF)
86b8ba871bSPeter Wemm 			break;
87b8ba871bSPeter Wemm 		if (cs.cs_flags == CS_EOL) {
88b8ba871bSPeter Wemm 			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
89b8ba871bSPeter Wemm 				if (cs_next(sp, &cs))
90b8ba871bSPeter Wemm 					return (1);
91b8ba871bSPeter Wemm 				if (cs.cs_flags == 0 &&
92b8ba871bSPeter Wemm 				    isblank(cs.cs_ch) && cs_fblank(sp, &cs))
93b8ba871bSPeter Wemm 					return (1);
94b8ba871bSPeter Wemm 				goto okret;
95b8ba871bSPeter Wemm 			}
96b8ba871bSPeter Wemm 			state = NONE;
97b8ba871bSPeter Wemm 			continue;
98b8ba871bSPeter Wemm 		}
99b8ba871bSPeter Wemm 		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
100b8ba871bSPeter Wemm 			if (--cnt == 0)
101b8ba871bSPeter Wemm 				goto okret;
102b8ba871bSPeter Wemm 			if (cs_fblank(sp, &cs))
103b8ba871bSPeter Wemm 				return (1);
104b8ba871bSPeter Wemm 			if (--cnt == 0)
105b8ba871bSPeter Wemm 				goto okret;
106b8ba871bSPeter Wemm 			state = NONE;
107b8ba871bSPeter Wemm 			continue;
108b8ba871bSPeter Wemm 		}
109b8ba871bSPeter Wemm 		switch (cs.cs_ch) {
110b8ba871bSPeter Wemm 		case '.':
111b8ba871bSPeter Wemm 		case '?':
112b8ba871bSPeter Wemm 		case '!':
113b8ba871bSPeter Wemm 			state = PERIOD;
114b8ba871bSPeter Wemm 			break;
115b8ba871bSPeter Wemm 		case ')':
116b8ba871bSPeter Wemm 		case ']':
117b8ba871bSPeter Wemm 		case '"':
118b8ba871bSPeter Wemm 		case '\'':
119b8ba871bSPeter Wemm 			if (state != PERIOD)
120b8ba871bSPeter Wemm 				state = NONE;
121b8ba871bSPeter Wemm 			break;
122b8ba871bSPeter Wemm 		case '\t':
123b8ba871bSPeter Wemm 			if (state == PERIOD)
124b8ba871bSPeter Wemm 				state = BLANK;
125b8ba871bSPeter Wemm 			/* FALLTHROUGH */
126b8ba871bSPeter Wemm 		case ' ':
127b8ba871bSPeter Wemm 			if (state == PERIOD) {
128b8ba871bSPeter Wemm 				state = BLANK;
129b8ba871bSPeter Wemm 				break;
130b8ba871bSPeter Wemm 			}
131b8ba871bSPeter Wemm 			if (state == BLANK && --cnt == 0) {
132b8ba871bSPeter Wemm 				if (cs_fblank(sp, &cs))
133b8ba871bSPeter Wemm 					return (1);
134b8ba871bSPeter Wemm 				goto okret;
135b8ba871bSPeter Wemm 			}
136b8ba871bSPeter Wemm 			/* FALLTHROUGH */
137b8ba871bSPeter Wemm 		default:
138b8ba871bSPeter Wemm 			state = NONE;
139b8ba871bSPeter Wemm 			break;
140b8ba871bSPeter Wemm 		}
141b8ba871bSPeter Wemm 	}
142b8ba871bSPeter Wemm 
143b8ba871bSPeter Wemm 	/* EOF is a movement sink, but it's an error not to have moved. */
144b8ba871bSPeter Wemm 	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
145b8ba871bSPeter Wemm 		v_eof(sp, NULL);
146b8ba871bSPeter Wemm 		return (1);
147b8ba871bSPeter Wemm 	}
148b8ba871bSPeter Wemm 
149b8ba871bSPeter Wemm okret:	vp->m_stop.lno = cs.cs_lno;
150b8ba871bSPeter Wemm 	vp->m_stop.cno = cs.cs_cno;
151b8ba871bSPeter Wemm 
152b8ba871bSPeter Wemm 	/*
153b8ba871bSPeter Wemm 	 * !!!
154b8ba871bSPeter Wemm 	 * Historic, uh, features, yeah, that's right, call 'em features.
155b8ba871bSPeter Wemm 	 * If the starting and ending cursor positions are at the first
156b8ba871bSPeter Wemm 	 * column in their lines, i.e. the movement is cutting entire lines,
157b8ba871bSPeter Wemm 	 * the buffer is in line mode, and the ending position is the last
158b8ba871bSPeter Wemm 	 * character of the previous line.  Note check to make sure that
159b8ba871bSPeter Wemm 	 * it's not within a single line.
160b8ba871bSPeter Wemm 	 *
161b8ba871bSPeter Wemm 	 * Non-motion commands move to the end of the range.  Delete and
162b8ba871bSPeter Wemm 	 * yank stay at the start.  Ignore others.  Adjust the end of the
163b8ba871bSPeter Wemm 	 * range for motion commands.
164b8ba871bSPeter Wemm 	 */
165b8ba871bSPeter Wemm 	if (ISMOTION(vp)) {
166b8ba871bSPeter Wemm 		if (vp->m_start.cno == 0 &&
167b8ba871bSPeter Wemm 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
168b8ba871bSPeter Wemm 			if (vp->m_start.lno < vp->m_stop.lno) {
169b8ba871bSPeter Wemm 				if (db_get(sp,
170b8ba871bSPeter Wemm 				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
171b8ba871bSPeter Wemm 					return (1);
172b8ba871bSPeter Wemm 				vp->m_stop.cno = len ? len - 1 : 0;
173b8ba871bSPeter Wemm 			}
174b8ba871bSPeter Wemm 			F_SET(vp, VM_LMODE);
175b8ba871bSPeter Wemm 		} else
176b8ba871bSPeter Wemm 			--vp->m_stop.cno;
177b8ba871bSPeter Wemm 		vp->m_final = vp->m_start;
178b8ba871bSPeter Wemm 	} else
179b8ba871bSPeter Wemm 		vp->m_final = vp->m_stop;
180b8ba871bSPeter Wemm 	return (0);
181b8ba871bSPeter Wemm }
182b8ba871bSPeter Wemm 
183b8ba871bSPeter Wemm /*
184b8ba871bSPeter Wemm  * v_sentenceb -- [count](
185b8ba871bSPeter Wemm  *	Move backward count sentences.
186b8ba871bSPeter Wemm  *
187c271fa92SBaptiste Daroussin  * PUBLIC: int v_sentenceb(SCR *, VICMD *);
188b8ba871bSPeter Wemm  */
189b8ba871bSPeter Wemm int
v_sentenceb(SCR * sp,VICMD * vp)190f0957ccaSPeter Wemm v_sentenceb(SCR *sp, VICMD *vp)
191b8ba871bSPeter Wemm {
192b8ba871bSPeter Wemm 	VCS cs;
193b8ba871bSPeter Wemm 	recno_t slno;
194b8ba871bSPeter Wemm 	size_t len, scno;
195b8ba871bSPeter Wemm 	u_long cnt;
196b8ba871bSPeter Wemm 	int last;
197b8ba871bSPeter Wemm 
198b8ba871bSPeter Wemm 	/*
199b8ba871bSPeter Wemm 	 * !!!
200b8ba871bSPeter Wemm 	 * Historic vi permitted the user to hit SOF repeatedly.
201b8ba871bSPeter Wemm 	 */
202b8ba871bSPeter Wemm 	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
203b8ba871bSPeter Wemm 		return (0);
204b8ba871bSPeter Wemm 
205b8ba871bSPeter Wemm 	cs.cs_lno = vp->m_start.lno;
206b8ba871bSPeter Wemm 	cs.cs_cno = vp->m_start.cno;
207b8ba871bSPeter Wemm 	if (cs_init(sp, &cs))
208b8ba871bSPeter Wemm 		return (1);
209b8ba871bSPeter Wemm 
210b8ba871bSPeter Wemm 	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
211b8ba871bSPeter Wemm 
212b8ba871bSPeter Wemm 	/*
213b8ba871bSPeter Wemm 	 * !!!
214b8ba871bSPeter Wemm 	 * In empty lines, skip to the previous non-white-space character.
215b8ba871bSPeter Wemm 	 * If in text, skip to the prevous white-space character.  Believe
216b8ba871bSPeter Wemm 	 * it or not, in the paragraph:
217b8ba871bSPeter Wemm 	 *	ab cd.
218b8ba871bSPeter Wemm 	 *	AB CD.
219b8ba871bSPeter Wemm 	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
220b8ba871bSPeter Wemm 	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
221b8ba871bSPeter Wemm 	 * Berkeley was once a major center of drug activity.
222b8ba871bSPeter Wemm 	 */
223b8ba871bSPeter Wemm 	if (cs.cs_flags == CS_EMP) {
224b8ba871bSPeter Wemm 		if (cs_bblank(sp, &cs))
225b8ba871bSPeter Wemm 			return (1);
226b8ba871bSPeter Wemm 		for (;;) {
227b8ba871bSPeter Wemm 			if (cs_prev(sp, &cs))
228b8ba871bSPeter Wemm 				return (1);
229b8ba871bSPeter Wemm 			if (cs.cs_flags != CS_EOL)
230b8ba871bSPeter Wemm 				break;
231b8ba871bSPeter Wemm 		}
232b8ba871bSPeter Wemm 	} else if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
233b8ba871bSPeter Wemm 		for (;;) {
234b8ba871bSPeter Wemm 			if (cs_prev(sp, &cs))
235b8ba871bSPeter Wemm 				return (1);
236b8ba871bSPeter Wemm 			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
237b8ba871bSPeter Wemm 				break;
238b8ba871bSPeter Wemm 		}
239b8ba871bSPeter Wemm 
240b8ba871bSPeter Wemm 	for (last = 0;;) {
241b8ba871bSPeter Wemm 		if (cs_prev(sp, &cs))
242b8ba871bSPeter Wemm 			return (1);
243b8ba871bSPeter Wemm 		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
244b8ba871bSPeter Wemm 			break;
245b8ba871bSPeter Wemm 		if (cs.cs_flags == CS_EOL) {
246b8ba871bSPeter Wemm 			last = 1;
247b8ba871bSPeter Wemm 			continue;
248b8ba871bSPeter Wemm 		}
249b8ba871bSPeter Wemm 		if (cs.cs_flags == CS_EMP) {
250b8ba871bSPeter Wemm 			if (--cnt == 0)
251b8ba871bSPeter Wemm 				goto ret;
252b8ba871bSPeter Wemm 			if (cs_bblank(sp, &cs))
253b8ba871bSPeter Wemm 				return (1);
254b8ba871bSPeter Wemm 			last = 0;
255b8ba871bSPeter Wemm 			continue;
256b8ba871bSPeter Wemm 		}
257b8ba871bSPeter Wemm 		switch (cs.cs_ch) {
258b8ba871bSPeter Wemm 		case '.':
259b8ba871bSPeter Wemm 		case '?':
260b8ba871bSPeter Wemm 		case '!':
261b8ba871bSPeter Wemm 			if (!last || --cnt != 0) {
262b8ba871bSPeter Wemm 				last = 0;
263b8ba871bSPeter Wemm 				continue;
264b8ba871bSPeter Wemm 			}
265b8ba871bSPeter Wemm 
266b8ba871bSPeter Wemm ret:			slno = cs.cs_lno;
267b8ba871bSPeter Wemm 			scno = cs.cs_cno;
268b8ba871bSPeter Wemm 
269b8ba871bSPeter Wemm 			/*
270b8ba871bSPeter Wemm 			 * Move to the start of the sentence, skipping blanks
271b8ba871bSPeter Wemm 			 * and special characters.
272b8ba871bSPeter Wemm 			 */
273b8ba871bSPeter Wemm 			do {
274b8ba871bSPeter Wemm 				if (cs_next(sp, &cs))
275b8ba871bSPeter Wemm 					return (1);
276b8ba871bSPeter Wemm 			} while (!cs.cs_flags &&
277b8ba871bSPeter Wemm 			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
278b8ba871bSPeter Wemm 			    cs.cs_ch == '"' || cs.cs_ch == '\''));
279b8ba871bSPeter Wemm 			if ((cs.cs_flags || isblank(cs.cs_ch)) &&
280b8ba871bSPeter Wemm 			    cs_fblank(sp, &cs))
281b8ba871bSPeter Wemm 				return (1);
282b8ba871bSPeter Wemm 
283b8ba871bSPeter Wemm 			/*
284b8ba871bSPeter Wemm 			 * If it was ".  xyz", with the cursor on the 'x', or
285b8ba871bSPeter Wemm 			 * "end.  ", with the cursor in the spaces, or the
286b8ba871bSPeter Wemm 			 * beginning of a sentence preceded by an empty line,
287b8ba871bSPeter Wemm 			 * we can end up where we started.  Fix it.
288b8ba871bSPeter Wemm 			 */
289b8ba871bSPeter Wemm 			if (vp->m_start.lno != cs.cs_lno ||
290110d525eSBaptiste Daroussin 			    vp->m_start.cno > cs.cs_cno)
291b8ba871bSPeter Wemm 				goto okret;
292b8ba871bSPeter Wemm 
293b8ba871bSPeter Wemm 			/*
294b8ba871bSPeter Wemm 			 * Well, if an empty line preceded possible blanks
295b8ba871bSPeter Wemm 			 * and the sentence, it could be a real sentence.
296b8ba871bSPeter Wemm 			 */
297b8ba871bSPeter Wemm 			for (;;) {
298b8ba871bSPeter Wemm 				if (cs_prev(sp, &cs))
299b8ba871bSPeter Wemm 					return (1);
300b8ba871bSPeter Wemm 				if (cs.cs_flags == CS_EOL)
301b8ba871bSPeter Wemm 					continue;
302b8ba871bSPeter Wemm 				if (cs.cs_flags == 0 && isblank(cs.cs_ch))
303b8ba871bSPeter Wemm 					continue;
304b8ba871bSPeter Wemm 				break;
305b8ba871bSPeter Wemm 			}
306b8ba871bSPeter Wemm 			if (cs.cs_flags == CS_EMP)
307b8ba871bSPeter Wemm 				goto okret;
308b8ba871bSPeter Wemm 
309b8ba871bSPeter Wemm 			/* But it wasn't; try again. */
310b8ba871bSPeter Wemm 			++cnt;
311b8ba871bSPeter Wemm 			cs.cs_lno = slno;
312b8ba871bSPeter Wemm 			cs.cs_cno = scno;
313b8ba871bSPeter Wemm 			last = 0;
314b8ba871bSPeter Wemm 			break;
315b8ba871bSPeter Wemm 		case '\t':
316b8ba871bSPeter Wemm 			last = 1;
317b8ba871bSPeter Wemm 			break;
318b8ba871bSPeter Wemm 		default:
319b8ba871bSPeter Wemm 			last =
320b8ba871bSPeter Wemm 			    cs.cs_flags == CS_EOL || isblank(cs.cs_ch) ||
321b8ba871bSPeter Wemm 			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
322b8ba871bSPeter Wemm 			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
323b8ba871bSPeter Wemm 		}
324b8ba871bSPeter Wemm 	}
325b8ba871bSPeter Wemm 
326b8ba871bSPeter Wemm okret:	vp->m_stop.lno = cs.cs_lno;
327b8ba871bSPeter Wemm 	vp->m_stop.cno = cs.cs_cno;
328b8ba871bSPeter Wemm 
329b8ba871bSPeter Wemm 	/*
330b8ba871bSPeter Wemm 	 * !!!
331b8ba871bSPeter Wemm 	 * If the starting and stopping cursor positions are at the first
332b8ba871bSPeter Wemm 	 * columns in the line, i.e. the movement is cutting an entire line,
333b8ba871bSPeter Wemm 	 * the buffer is in line mode, and the starting position is the last
334b8ba871bSPeter Wemm 	 * character of the previous line.
335b8ba871bSPeter Wemm 	 *
336b8ba871bSPeter Wemm 	 * All commands move to the end of the range.  Adjust the start of
337b8ba871bSPeter Wemm 	 * the range for motion commands.
338b8ba871bSPeter Wemm 	 */
339*755cc40cSBaptiste Daroussin 	if (ISMOTION(vp)) {
340b8ba871bSPeter Wemm 		if (vp->m_start.cno == 0 &&
341b8ba871bSPeter Wemm 		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
342b8ba871bSPeter Wemm 			if (db_get(sp,
343b8ba871bSPeter Wemm 			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
344b8ba871bSPeter Wemm 				return (1);
345b8ba871bSPeter Wemm 			vp->m_start.cno = len ? len - 1 : 0;
346b8ba871bSPeter Wemm 			F_SET(vp, VM_LMODE);
347b8ba871bSPeter Wemm 		} else
348b8ba871bSPeter Wemm 			--vp->m_start.cno;
349*755cc40cSBaptiste Daroussin 	}
350b8ba871bSPeter Wemm 	vp->m_final = vp->m_stop;
351b8ba871bSPeter Wemm 	return (0);
352b8ba871bSPeter Wemm }
353