xref: /illumos-gate/usr/src/cmd/mandoc/roff.h (revision 2ae4c13b79c29604a25f0cedb59f2acff223bd26)
1 /*	$Id: roff.h,v 1.58 2017/07/08 14:51:05 schwarze Exp $	*/
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 struct	ohash;
20 struct	mdoc_arg;
21 union	mdoc_data;
22 
23 enum	roff_macroset {
24 	MACROSET_NONE = 0,
25 	MACROSET_MDOC,
26 	MACROSET_MAN
27 };
28 
29 enum	roff_sec {
30 	SEC_NONE = 0,
31 	SEC_NAME,
32 	SEC_LIBRARY,
33 	SEC_SYNOPSIS,
34 	SEC_DESCRIPTION,
35 	SEC_CONTEXT,
36 	SEC_IMPLEMENTATION,	/* IMPLEMENTATION NOTES */
37 	SEC_RETURN_VALUES,
38 	SEC_ENVIRONMENT,
39 	SEC_FILES,
40 	SEC_EXIT_STATUS,
41 	SEC_EXAMPLES,
42 	SEC_DIAGNOSTICS,
43 	SEC_COMPATIBILITY,
44 	SEC_ERRORS,
45 	SEC_SEE_ALSO,
46 	SEC_STANDARDS,
47 	SEC_HISTORY,
48 	SEC_AUTHORS,
49 	SEC_CAVEATS,
50 	SEC_BUGS,
51 	SEC_SECURITY,
52 	SEC_CUSTOM,
53 	SEC__MAX
54 };
55 
56 enum	roff_type {
57 	ROFFT_ROOT,
58 	ROFFT_BLOCK,
59 	ROFFT_HEAD,
60 	ROFFT_BODY,
61 	ROFFT_TAIL,
62 	ROFFT_ELEM,
63 	ROFFT_TEXT,
64 	ROFFT_TBL,
65 	ROFFT_EQN
66 };
67 
68 enum	roff_tok {
69 	ROFF_br = 0,
70 	ROFF_ce,
71 	ROFF_ft,
72 	ROFF_ll,
73 	ROFF_mc,
74 	ROFF_po,
75 	ROFF_rj,
76 	ROFF_sp,
77 	ROFF_ta,
78 	ROFF_ti,
79 	ROFF_MAX,
80 	ROFF_ab,
81 	ROFF_ad,
82 	ROFF_af,
83 	ROFF_aln,
84 	ROFF_als,
85 	ROFF_am,
86 	ROFF_am1,
87 	ROFF_ami,
88 	ROFF_ami1,
89 	ROFF_as,
90 	ROFF_as1,
91 	ROFF_asciify,
92 	ROFF_backtrace,
93 	ROFF_bd,
94 	ROFF_bleedat,
95 	ROFF_blm,
96 	ROFF_box,
97 	ROFF_boxa,
98 	ROFF_bp,
99 	ROFF_BP,
100 	ROFF_break,
101 	ROFF_breakchar,
102 	ROFF_brnl,
103 	ROFF_brp,
104 	ROFF_brpnl,
105 	ROFF_c2,
106 	ROFF_cc,
107 	ROFF_cf,
108 	ROFF_cflags,
109 	ROFF_ch,
110 	ROFF_char,
111 	ROFF_chop,
112 	ROFF_class,
113 	ROFF_close,
114 	ROFF_CL,
115 	ROFF_color,
116 	ROFF_composite,
117 	ROFF_continue,
118 	ROFF_cp,
119 	ROFF_cropat,
120 	ROFF_cs,
121 	ROFF_cu,
122 	ROFF_da,
123 	ROFF_dch,
124 	ROFF_Dd,
125 	ROFF_de,
126 	ROFF_de1,
127 	ROFF_defcolor,
128 	ROFF_dei,
129 	ROFF_dei1,
130 	ROFF_device,
131 	ROFF_devicem,
132 	ROFF_di,
133 	ROFF_do,
134 	ROFF_ds,
135 	ROFF_ds1,
136 	ROFF_dwh,
137 	ROFF_dt,
138 	ROFF_ec,
139 	ROFF_ecr,
140 	ROFF_ecs,
141 	ROFF_el,
142 	ROFF_em,
143 	ROFF_EN,
144 	ROFF_eo,
145 	ROFF_EP,
146 	ROFF_EQ,
147 	ROFF_errprint,
148 	ROFF_ev,
149 	ROFF_evc,
150 	ROFF_ex,
151 	ROFF_fallback,
152 	ROFF_fam,
153 	ROFF_fc,
154 	ROFF_fchar,
155 	ROFF_fcolor,
156 	ROFF_fdeferlig,
157 	ROFF_feature,
158 	/* MAN_fi; ignored in mdoc(7) */
159 	ROFF_fkern,
160 	ROFF_fl,
161 	ROFF_flig,
162 	ROFF_fp,
163 	ROFF_fps,
164 	ROFF_fschar,
165 	ROFF_fspacewidth,
166 	ROFF_fspecial,
167 	ROFF_ftr,
168 	ROFF_fzoom,
169 	ROFF_gcolor,
170 	ROFF_hc,
171 	ROFF_hcode,
172 	ROFF_hidechar,
173 	ROFF_hla,
174 	ROFF_hlm,
175 	ROFF_hpf,
176 	ROFF_hpfa,
177 	ROFF_hpfcode,
178 	ROFF_hw,
179 	ROFF_hy,
180 	ROFF_hylang,
181 	ROFF_hylen,
182 	ROFF_hym,
183 	ROFF_hypp,
184 	ROFF_hys,
185 	ROFF_ie,
186 	ROFF_if,
187 	ROFF_ig,
188 	/* MAN_in; ignored in mdoc(7) */
189 	ROFF_index,
190 	ROFF_it,
191 	ROFF_itc,
192 	ROFF_IX,
193 	ROFF_kern,
194 	ROFF_kernafter,
195 	ROFF_kernbefore,
196 	ROFF_kernpair,
197 	ROFF_lc,
198 	ROFF_lc_ctype,
199 	ROFF_lds,
200 	ROFF_length,
201 	ROFF_letadj,
202 	ROFF_lf,
203 	ROFF_lg,
204 	ROFF_lhang,
205 	ROFF_linetabs,
206 	ROFF_lnr,
207 	ROFF_lnrf,
208 	ROFF_lpfx,
209 	ROFF_ls,
210 	ROFF_lsm,
211 	ROFF_lt,
212 	ROFF_mediasize,
213 	ROFF_minss,
214 	ROFF_mk,
215 	ROFF_mso,
216 	ROFF_na,
217 	ROFF_ne,
218 	/* MAN_nf; ignored in mdoc(7) */
219 	ROFF_nh,
220 	ROFF_nhychar,
221 	ROFF_nm,
222 	ROFF_nn,
223 	ROFF_nop,
224 	ROFF_nr,
225 	ROFF_nrf,
226 	ROFF_nroff,
227 	ROFF_ns,
228 	ROFF_nx,
229 	ROFF_open,
230 	ROFF_opena,
231 	ROFF_os,
232 	ROFF_output,
233 	ROFF_padj,
234 	ROFF_papersize,
235 	ROFF_pc,
236 	ROFF_pev,
237 	ROFF_pi,
238 	ROFF_PI,
239 	ROFF_pl,
240 	ROFF_pm,
241 	ROFF_pn,
242 	ROFF_pnr,
243 	ROFF_ps,
244 	ROFF_psbb,
245 	ROFF_pshape,
246 	ROFF_pso,
247 	ROFF_ptr,
248 	ROFF_pvs,
249 	ROFF_rchar,
250 	ROFF_rd,
251 	ROFF_recursionlimit,
252 	ROFF_return,
253 	ROFF_rfschar,
254 	ROFF_rhang,
255 	ROFF_rm,
256 	ROFF_rn,
257 	ROFF_rnn,
258 	ROFF_rr,
259 	ROFF_rs,
260 	ROFF_rt,
261 	ROFF_schar,
262 	ROFF_sentchar,
263 	ROFF_shc,
264 	ROFF_shift,
265 	ROFF_sizes,
266 	ROFF_so,
267 	ROFF_spacewidth,
268 	ROFF_special,
269 	ROFF_spreadwarn,
270 	ROFF_ss,
271 	ROFF_sty,
272 	ROFF_substring,
273 	ROFF_sv,
274 	ROFF_sy,
275 	ROFF_T_,
276 	ROFF_tc,
277 	ROFF_TE,
278 	ROFF_TH,
279 	ROFF_tkf,
280 	ROFF_tl,
281 	ROFF_tm,
282 	ROFF_tm1,
283 	ROFF_tmc,
284 	ROFF_tr,
285 	ROFF_track,
286 	ROFF_transchar,
287 	ROFF_trf,
288 	ROFF_trimat,
289 	ROFF_trin,
290 	ROFF_trnt,
291 	ROFF_troff,
292 	ROFF_TS,
293 	ROFF_uf,
294 	ROFF_ul,
295 	ROFF_unformat,
296 	ROFF_unwatch,
297 	ROFF_unwatchn,
298 	ROFF_vpt,
299 	ROFF_vs,
300 	ROFF_warn,
301 	ROFF_warnscale,
302 	ROFF_watch,
303 	ROFF_watchlength,
304 	ROFF_watchn,
305 	ROFF_wh,
306 	ROFF_while,
307 	ROFF_write,
308 	ROFF_writec,
309 	ROFF_writem,
310 	ROFF_xflag,
311 	ROFF_cblock,
312 	ROFF_RENAMED,
313 	ROFF_USERDEF,
314 	TOKEN_NONE,
315 	MDOC_Dd,
316 	MDOC_Dt,
317 	MDOC_Os,
318 	MDOC_Sh,
319 	MDOC_Ss,
320 	MDOC_Pp,
321 	MDOC_D1,
322 	MDOC_Dl,
323 	MDOC_Bd,
324 	MDOC_Ed,
325 	MDOC_Bl,
326 	MDOC_El,
327 	MDOC_It,
328 	MDOC_Ad,
329 	MDOC_An,
330 	MDOC_Ap,
331 	MDOC_Ar,
332 	MDOC_Cd,
333 	MDOC_Cm,
334 	MDOC_Dv,
335 	MDOC_Er,
336 	MDOC_Ev,
337 	MDOC_Ex,
338 	MDOC_Fa,
339 	MDOC_Fd,
340 	MDOC_Fl,
341 	MDOC_Fn,
342 	MDOC_Ft,
343 	MDOC_Ic,
344 	MDOC_In,
345 	MDOC_Li,
346 	MDOC_Nd,
347 	MDOC_Nm,
348 	MDOC_Op,
349 	MDOC_Ot,
350 	MDOC_Pa,
351 	MDOC_Rv,
352 	MDOC_St,
353 	MDOC_Va,
354 	MDOC_Vt,
355 	MDOC_Xr,
356 	MDOC__A,
357 	MDOC__B,
358 	MDOC__D,
359 	MDOC__I,
360 	MDOC__J,
361 	MDOC__N,
362 	MDOC__O,
363 	MDOC__P,
364 	MDOC__R,
365 	MDOC__T,
366 	MDOC__V,
367 	MDOC_Ac,
368 	MDOC_Ao,
369 	MDOC_Aq,
370 	MDOC_At,
371 	MDOC_Bc,
372 	MDOC_Bf,
373 	MDOC_Bo,
374 	MDOC_Bq,
375 	MDOC_Bsx,
376 	MDOC_Bx,
377 	MDOC_Db,
378 	MDOC_Dc,
379 	MDOC_Do,
380 	MDOC_Dq,
381 	MDOC_Ec,
382 	MDOC_Ef,
383 	MDOC_Em,
384 	MDOC_Eo,
385 	MDOC_Fx,
386 	MDOC_Ms,
387 	MDOC_No,
388 	MDOC_Ns,
389 	MDOC_Nx,
390 	MDOC_Ox,
391 	MDOC_Pc,
392 	MDOC_Pf,
393 	MDOC_Po,
394 	MDOC_Pq,
395 	MDOC_Qc,
396 	MDOC_Ql,
397 	MDOC_Qo,
398 	MDOC_Qq,
399 	MDOC_Re,
400 	MDOC_Rs,
401 	MDOC_Sc,
402 	MDOC_So,
403 	MDOC_Sq,
404 	MDOC_Sm,
405 	MDOC_Sx,
406 	MDOC_Sy,
407 	MDOC_Tn,
408 	MDOC_Ux,
409 	MDOC_Xc,
410 	MDOC_Xo,
411 	MDOC_Fo,
412 	MDOC_Fc,
413 	MDOC_Oo,
414 	MDOC_Oc,
415 	MDOC_Bk,
416 	MDOC_Ek,
417 	MDOC_Bt,
418 	MDOC_Hf,
419 	MDOC_Fr,
420 	MDOC_Ud,
421 	MDOC_Lb,
422 	MDOC_Lp,
423 	MDOC_Lk,
424 	MDOC_Mt,
425 	MDOC_Brq,
426 	MDOC_Bro,
427 	MDOC_Brc,
428 	MDOC__C,
429 	MDOC_Es,
430 	MDOC_En,
431 	MDOC_Dx,
432 	MDOC__Q,
433 	MDOC__U,
434 	MDOC_Ta,
435 	MDOC_MAX,
436 	MAN_TH,
437 	MAN_SH,
438 	MAN_SS,
439 	MAN_TP,
440 	MAN_LP,
441 	MAN_PP,
442 	MAN_P,
443 	MAN_IP,
444 	MAN_HP,
445 	MAN_SM,
446 	MAN_SB,
447 	MAN_BI,
448 	MAN_IB,
449 	MAN_BR,
450 	MAN_RB,
451 	MAN_R,
452 	MAN_B,
453 	MAN_I,
454 	MAN_IR,
455 	MAN_RI,
456 	MAN_nf,
457 	MAN_fi,
458 	MAN_RE,
459 	MAN_RS,
460 	MAN_DT,
461 	MAN_UC,
462 	MAN_PD,
463 	MAN_AT,
464 	MAN_in,
465 	MAN_OP,
466 	MAN_EX,
467 	MAN_EE,
468 	MAN_UR,
469 	MAN_UE,
470 	MAN_MT,
471 	MAN_ME,
472 	MAN_MAX
473 };
474 
475 enum	roff_next {
476 	ROFF_NEXT_SIBLING = 0,
477 	ROFF_NEXT_CHILD
478 };
479 
480 /*
481  * Indicates that a BODY's formatting has ended, but
482  * the scope is still open.  Used for badly nested blocks.
483  */
484 enum	mdoc_endbody {
485 	ENDBODY_NOT = 0,
486 	ENDBODY_SPACE	/* Is broken: append a space. */
487 };
488 
489 struct	roff_node {
490 	struct roff_node *parent;  /* Parent AST node. */
491 	struct roff_node *child;   /* First child AST node. */
492 	struct roff_node *last;    /* Last child AST node. */
493 	struct roff_node *next;    /* Sibling AST node. */
494 	struct roff_node *prev;    /* Prior sibling AST node. */
495 	struct roff_node *head;    /* BLOCK */
496 	struct roff_node *body;    /* BLOCK/ENDBODY */
497 	struct roff_node *tail;    /* BLOCK */
498 	struct mdoc_arg	 *args;    /* BLOCK/ELEM */
499 	union mdoc_data	 *norm;    /* Normalized arguments. */
500 	char		 *string;  /* TEXT */
501 	const struct tbl_span *span; /* TBL */
502 	struct eqn_box	 *eqn;     /* EQN */
503 	int		  line;    /* Input file line number. */
504 	int		  pos;     /* Input file column number. */
505 	int		  flags;
506 #define	NODE_VALID	 (1 << 0)  /* Has been validated. */
507 #define	NODE_ENDED	 (1 << 1)  /* Gone past body end mark. */
508 #define	NODE_EOS	 (1 << 2)  /* At sentence boundary. */
509 #define	NODE_LINE	 (1 << 3)  /* First macro/text on line. */
510 #define	NODE_SYNPRETTY	 (1 << 4)  /* SYNOPSIS-style formatting. */
511 #define	NODE_BROKEN	 (1 << 5)  /* Must validate parent when ending. */
512 #define	NODE_DELIMO	 (1 << 6)
513 #define	NODE_DELIMC	 (1 << 7)
514 #define	NODE_NOSRC	 (1 << 8)  /* Generated node, not in input file. */
515 #define	NODE_NOPRT	 (1 << 9)  /* Shall not print anything. */
516 	int		  prev_font; /* Before entering this node. */
517 	int		  aux;     /* Decoded node data, type-dependent. */
518 	enum roff_tok	  tok;     /* Request or macro ID. */
519 	enum roff_type	  type;    /* AST node type. */
520 	enum roff_sec	  sec;     /* Current named section. */
521 	enum mdoc_endbody end;     /* BODY */
522 };
523 
524 struct	roff_meta {
525 	char		 *msec;    /* Manual section, usually a digit. */
526 	char		 *vol;     /* Manual volume title. */
527 	char		 *os;      /* Operating system. */
528 	char		 *arch;    /* Machine architecture. */
529 	char		 *title;   /* Manual title, usually CAPS. */
530 	char		 *name;    /* Leading manual name. */
531 	char		 *date;    /* Normalized date. */
532 	int		  hasbody; /* Document is not empty. */
533 	int		  rcsids;  /* Bits indexed by enum mandoc_os. */
534 	enum mandoc_os	  os_e;    /* Operating system. */
535 };
536 
537 struct	roff_man {
538 	struct roff_meta  meta;    /* Document meta-data. */
539 	struct mparse	 *parse;   /* Parse pointer. */
540 	struct roff	 *roff;    /* Roff parser state data. */
541 	struct ohash	 *mdocmac; /* Mdoc macro lookup table. */
542 	struct ohash	 *manmac;  /* Man macro lookup table. */
543 	const char	 *os_s;    /* Default operating system. */
544 	struct roff_node *first;   /* The first node parsed. */
545 	struct roff_node *last;    /* The last node parsed. */
546 	struct roff_node *last_es; /* The most recent Es node. */
547 	int		  quick;   /* Abort parse early. */
548 	int		  flags;   /* Parse flags. */
549 #define	MDOC_LITERAL	 (1 << 1)  /* In a literal scope. */
550 #define	MDOC_PBODY	 (1 << 2)  /* In the document body. */
551 #define	MDOC_NEWLINE	 (1 << 3)  /* First macro/text in a line. */
552 #define	MDOC_PHRASE	 (1 << 4)  /* In a Bl -column phrase. */
553 #define	MDOC_PHRASELIT	 (1 << 5)  /* Literal within a phrase. */
554 #define	MDOC_FREECOL	 (1 << 6)  /* `It' invocation should close. */
555 #define	MDOC_SYNOPSIS	 (1 << 7)  /* SYNOPSIS-style formatting. */
556 #define	MDOC_KEEP	 (1 << 8)  /* In a word keep. */
557 #define	MDOC_SMOFF	 (1 << 9)  /* Spacing is off. */
558 #define	MDOC_NODELIMC	 (1 << 10) /* Disable closing delimiter handling. */
559 #define	MAN_ELINE	 (1 << 11) /* Next-line element scope. */
560 #define	MAN_BLINE	 (1 << 12) /* Next-line block scope. */
561 #define	MDOC_PHRASEQF	 (1 << 13) /* Quote first word encountered. */
562 #define	MDOC_PHRASEQL	 (1 << 14) /* Quote last word of this phrase. */
563 #define	MDOC_PHRASEQN	 (1 << 15) /* Quote first word of the next phrase. */
564 #define	MAN_LITERAL	  MDOC_LITERAL
565 #define	MAN_NEWLINE	  MDOC_NEWLINE
566 	enum roff_macroset macroset; /* Kind of high-level macros used. */
567 	enum roff_sec	  lastsec; /* Last section seen. */
568 	enum roff_sec	  lastnamed; /* Last standard section seen. */
569 	enum roff_next	  next;    /* Where to put the next node. */
570 };
571 
572 extern	const char *const *roff_name;
573 
574 
575 void		 deroff(char **, const struct roff_node *);
576 struct ohash	*roffhash_alloc(enum roff_tok, enum roff_tok);
577 enum roff_tok	 roffhash_find(struct ohash *, const char *, size_t);
578 void		 roffhash_free(struct ohash *);
579 void		 roff_validate(struct roff_man *);
580