xref: /illumos-gate/usr/src/cmd/troff/n8.c (revision f875b4ebb1dd9fdbeb043557cab38ab3bf7f6e01)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
41 
42 #include	<ctype.h>
43 #include	"tdef.h"
44 #include "ext.h"
45 #define	HY_BIT	0200	/* stuff in here only works for ascii */
46 
47 /*
48  * troff8.c
49  *
50  * hyphenation
51  */
52 
53 char	hbuf[NHEX];
54 char	*nexth = hbuf;
55 tchar	*hyend;
56 #define THRESH 160 /*digram goodness threshold*/
57 int	thresh = THRESH;
58 
59 int
60 hyphen(wp)
61 	tchar *wp;
62 {
63 	int	j;
64 	tchar *i;
65 
66 	i = wp;
67 	while (punct(cbits(*i++)))
68 		;
69 	if (!alph(cbits(*--i)))
70 		return (0);
71 	wdstart = i++;
72 	while (alph(cbits(*i++)))
73 		;
74 	hyend = wdend = --i - 1;
75 	while (punct(cbits(*i++)))
76 		;
77 	if (*--i)
78 		return (0);
79 	if ((wdend - wdstart - 4) < 0)
80 		return (0);
81 	hyp = hyptr;
82 	*hyp = 0;
83 	hyoff = 2;
84 	if (!exword() && !suffix())
85 		digram();
86 	*hyp++ = 0;
87 	if (*hyptr)
88 		for (j = 1; j; ) {
89 			j = 0;
90 			for (hyp = hyptr + 1; *hyp != 0; hyp++) {
91 				if (*(hyp - 1) > *hyp) {
92 					j++;
93 					i = *hyp;
94 					*hyp = *(hyp - 1);
95 					*(hyp - 1) = i;
96 				}
97 			}
98 		}
99 
100 	return (0);
101 }
102 
103 
104 int
105 punct(i)
106 {
107 	if (!i || alph(i))
108 		return(0);
109 	else
110 		return(1);
111 }
112 
113 
114 int
115 alph(i)
116 {
117 	if (i >= 'a' && i <= 'z' || i >= 'A' && i <= 'Z')
118 		return(1);
119 	else
120 		return(0);
121 }
122 
123 
124 int
125 caseht()
126 {
127 	thresh = THRESH;
128 	if (skip())
129 		return (0);
130 	noscale++;
131 	thresh = atoi();
132 	noscale = 0;
133 
134 	return (0);
135 }
136 
137 
138 int
139 casehw()
140 {
141 	int	i, k;
142 	char	*j;
143 	tchar t;
144 
145 	k = 0;
146 	while (!skip()) {
147 		if ((j = nexth) >= (hbuf + NHEX - 2))
148 			goto full;
149 		for (; ; ) {
150 			if (ismot(t = getch()))
151 				continue;
152 			i = cbits(t);
153 			if (i == ' ' || i == '\n') {
154 				*j++ = 0;
155 				nexth = j;
156 				*j = 0;
157 				if (i == ' ')
158 					break;
159 				else
160 					return (0);
161 			}
162 			if (i == '-') {
163 				k = HY_BIT;
164 				continue;
165 			}
166 			*j++ = maplow(i) | k;
167 			k = 0;
168 			if (j >= (hbuf + NHEX - 2))
169 				goto full;
170 		}
171 	}
172 	return (0);
173 full:
174 	errprint(gettext("exception word list full."));
175 	*nexth = 0;
176 
177 	return (0);
178 }
179 
180 
181 int
182 exword()
183 {
184 	tchar *w;
185 	char	*e;
186 	char	*save;
187 
188 	e = hbuf;
189 	while (1) {
190 		save = e;
191 		if (*e == 0)
192 			return(0);
193 		w = wdstart;
194 		while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
195 			e++;
196 			w++;
197 		};
198 		if (!*e) {
199 			if (w-1 == hyend || (w == wdend && maplow(cbits(*w)) == 's')) {
200 				w = wdstart;
201 				for (e = save; *e; e++) {
202 					if (*e & HY_BIT)
203 						*hyp++ = w;
204 					if (hyp > (hyptr + NHYP - 1))
205 						hyp = hyptr + NHYP - 1;
206 					w++;
207 				}
208 				return(1);
209 			} else {
210 				e++;
211 				continue;
212 			}
213 		} else
214 			while (*e++)
215 				;
216 	}
217 
218 	return (0);
219 }
220 
221 
222 int
223 suffix()
224 {
225 	tchar *w;
226 	char	*s, *s0;
227 	tchar i;
228 	extern char	*suftab[];
229 	extern tchar *chkvow();
230 
231 again:
232 	if (!alph(cbits(i = cbits(*hyend))))
233 		return(0);
234 	if (i < 'a')
235 		i -= 'A' - 'a';
236 	if ((s0 = suftab[i-'a']) == 0)
237 		return(0);
238 	for (; ; ) {
239 		if ((i = *s0 & 017) == 0)
240 			return(0);
241 		s = s0 + i - 1;
242 		w = hyend - 1;
243 		while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
244 			s--;
245 			w--;
246 		}
247 		if (s == s0)
248 			break;
249 		s0 += i;
250 	}
251 	s = s0 + i - 1;
252 	w = hyend;
253 	if (*s0 & HY_BIT)
254 		goto mark;
255 	while (s > s0) {
256 		w--;
257 		if (*s-- & HY_BIT) {
258 mark:
259 			hyend = w - 1;
260 			if (*s0 & 0100)
261 				continue;
262 			if (!chkvow(w))
263 				return(0);
264 			*hyp++ = w;
265 		}
266 	}
267 	if (*s0 & 040)
268 		return(0);
269 	if (exword())
270 		return(1);
271 	goto again;
272 }
273 
274 
275 int
276 maplow(i)
277 int	i;
278 {
279 	if (ischar(i) && isupper(i))
280 		i = tolower(i);
281 	return(i);
282 }
283 
284 
285 int
286 vowel(i)
287 int	i;
288 {
289 	switch (maplow(i)) {
290 	case 'a':
291 	case 'e':
292 	case 'i':
293 	case 'o':
294 	case 'u':
295 	case 'y':
296 		return(1);
297 	default:
298 		return(0);
299 	}
300 }
301 
302 
303 tchar *chkvow(w)
304 tchar *w;
305 {
306 	while (--w >= wdstart)
307 		if (vowel(cbits(*w)))
308 			return(w);
309 	return(0);
310 }
311 
312 
313 int
314 digram()
315 {
316 	tchar *w;
317 	int	val;
318 	tchar * nhyend, *maxw;
319 	int	maxval;
320 	extern char	bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
321 
322 again:
323 	if (!(w = chkvow(hyend + 1)))
324 		return (0);
325 	hyend = w;
326 	if (!(w = chkvow(hyend)))
327 		return (0);
328 	nhyend = w;
329 	maxval = 0;
330 	w--;
331 	while ((++w < hyend) && (w < (wdend - 1))) {
332 		val = 1;
333 		if (w == wdstart)
334 			val *= dilook('a', cbits(*w), bxh);
335 		else if (w == wdstart + 1)
336 			val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
337 		else
338 			val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
339 		val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
340 		val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
341 		if (val > maxval) {
342 			maxval = val;
343 			maxw = w + 1;
344 		}
345 	}
346 	hyend = nhyend;
347 	if (maxval > thresh)
348 		*hyp++ = maxw;
349 	goto again;
350 }
351 
352 
353 int
354 dilook(a, b, t)
355 int	a, b;
356 char	t[26][13];
357 {
358 	int	i, j;
359 
360 	i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
361 	if (!(j & 01))
362 		i >>= 4;
363 	return(i & 017);
364 }
365 
366 
367