xref: /illumos-gate/usr/src/cmd/mandoc/roff.h (revision c5749750a3e052f1194f65a303456224c51dea63)
1 /*	$Id: roff.h,v 1.59 2018/04/11 17:11:13 schwarze Exp $	*/
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013, 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 struct	ohash;
20 struct	mdoc_arg;
21 union	mdoc_data;
22 
23 enum	roff_macroset {
24 	MACROSET_NONE = 0,
25 	MACROSET_MDOC,
26 	MACROSET_MAN
27 };
28 
29 enum	roff_sec {
30 	SEC_NONE = 0,
31 	SEC_NAME,
32 	SEC_LIBRARY,
33 	SEC_SYNOPSIS,
34 	SEC_DESCRIPTION,
35 	SEC_CONTEXT,
36 	SEC_IMPLEMENTATION,	/* IMPLEMENTATION NOTES */
37 	SEC_RETURN_VALUES,
38 	SEC_ENVIRONMENT,
39 	SEC_FILES,
40 	SEC_EXIT_STATUS,
41 	SEC_EXAMPLES,
42 	SEC_DIAGNOSTICS,
43 	SEC_COMPATIBILITY,
44 	SEC_ERRORS,
45 	SEC_SEE_ALSO,
46 	SEC_STANDARDS,
47 	SEC_HISTORY,
48 	SEC_AUTHORS,
49 	SEC_CAVEATS,
50 	SEC_BUGS,
51 	SEC_SECURITY,
52 	SEC_CUSTOM,
53 	SEC__MAX
54 };
55 
56 enum	roff_type {
57 	ROFFT_ROOT,
58 	ROFFT_BLOCK,
59 	ROFFT_HEAD,
60 	ROFFT_BODY,
61 	ROFFT_TAIL,
62 	ROFFT_ELEM,
63 	ROFFT_TEXT,
64 	ROFFT_COMMENT,
65 	ROFFT_TBL,
66 	ROFFT_EQN
67 };
68 
69 enum	roff_tok {
70 	ROFF_br = 0,
71 	ROFF_ce,
72 	ROFF_ft,
73 	ROFF_ll,
74 	ROFF_mc,
75 	ROFF_po,
76 	ROFF_rj,
77 	ROFF_sp,
78 	ROFF_ta,
79 	ROFF_ti,
80 	ROFF_MAX,
81 	ROFF_ab,
82 	ROFF_ad,
83 	ROFF_af,
84 	ROFF_aln,
85 	ROFF_als,
86 	ROFF_am,
87 	ROFF_am1,
88 	ROFF_ami,
89 	ROFF_ami1,
90 	ROFF_as,
91 	ROFF_as1,
92 	ROFF_asciify,
93 	ROFF_backtrace,
94 	ROFF_bd,
95 	ROFF_bleedat,
96 	ROFF_blm,
97 	ROFF_box,
98 	ROFF_boxa,
99 	ROFF_bp,
100 	ROFF_BP,
101 	ROFF_break,
102 	ROFF_breakchar,
103 	ROFF_brnl,
104 	ROFF_brp,
105 	ROFF_brpnl,
106 	ROFF_c2,
107 	ROFF_cc,
108 	ROFF_cf,
109 	ROFF_cflags,
110 	ROFF_ch,
111 	ROFF_char,
112 	ROFF_chop,
113 	ROFF_class,
114 	ROFF_close,
115 	ROFF_CL,
116 	ROFF_color,
117 	ROFF_composite,
118 	ROFF_continue,
119 	ROFF_cp,
120 	ROFF_cropat,
121 	ROFF_cs,
122 	ROFF_cu,
123 	ROFF_da,
124 	ROFF_dch,
125 	ROFF_Dd,
126 	ROFF_de,
127 	ROFF_de1,
128 	ROFF_defcolor,
129 	ROFF_dei,
130 	ROFF_dei1,
131 	ROFF_device,
132 	ROFF_devicem,
133 	ROFF_di,
134 	ROFF_do,
135 	ROFF_ds,
136 	ROFF_ds1,
137 	ROFF_dwh,
138 	ROFF_dt,
139 	ROFF_ec,
140 	ROFF_ecr,
141 	ROFF_ecs,
142 	ROFF_el,
143 	ROFF_em,
144 	ROFF_EN,
145 	ROFF_eo,
146 	ROFF_EP,
147 	ROFF_EQ,
148 	ROFF_errprint,
149 	ROFF_ev,
150 	ROFF_evc,
151 	ROFF_ex,
152 	ROFF_fallback,
153 	ROFF_fam,
154 	ROFF_fc,
155 	ROFF_fchar,
156 	ROFF_fcolor,
157 	ROFF_fdeferlig,
158 	ROFF_feature,
159 	/* MAN_fi; ignored in mdoc(7) */
160 	ROFF_fkern,
161 	ROFF_fl,
162 	ROFF_flig,
163 	ROFF_fp,
164 	ROFF_fps,
165 	ROFF_fschar,
166 	ROFF_fspacewidth,
167 	ROFF_fspecial,
168 	ROFF_ftr,
169 	ROFF_fzoom,
170 	ROFF_gcolor,
171 	ROFF_hc,
172 	ROFF_hcode,
173 	ROFF_hidechar,
174 	ROFF_hla,
175 	ROFF_hlm,
176 	ROFF_hpf,
177 	ROFF_hpfa,
178 	ROFF_hpfcode,
179 	ROFF_hw,
180 	ROFF_hy,
181 	ROFF_hylang,
182 	ROFF_hylen,
183 	ROFF_hym,
184 	ROFF_hypp,
185 	ROFF_hys,
186 	ROFF_ie,
187 	ROFF_if,
188 	ROFF_ig,
189 	/* MAN_in; ignored in mdoc(7) */
190 	ROFF_index,
191 	ROFF_it,
192 	ROFF_itc,
193 	ROFF_IX,
194 	ROFF_kern,
195 	ROFF_kernafter,
196 	ROFF_kernbefore,
197 	ROFF_kernpair,
198 	ROFF_lc,
199 	ROFF_lc_ctype,
200 	ROFF_lds,
201 	ROFF_length,
202 	ROFF_letadj,
203 	ROFF_lf,
204 	ROFF_lg,
205 	ROFF_lhang,
206 	ROFF_linetabs,
207 	ROFF_lnr,
208 	ROFF_lnrf,
209 	ROFF_lpfx,
210 	ROFF_ls,
211 	ROFF_lsm,
212 	ROFF_lt,
213 	ROFF_mediasize,
214 	ROFF_minss,
215 	ROFF_mk,
216 	ROFF_mso,
217 	ROFF_na,
218 	ROFF_ne,
219 	/* MAN_nf; ignored in mdoc(7) */
220 	ROFF_nh,
221 	ROFF_nhychar,
222 	ROFF_nm,
223 	ROFF_nn,
224 	ROFF_nop,
225 	ROFF_nr,
226 	ROFF_nrf,
227 	ROFF_nroff,
228 	ROFF_ns,
229 	ROFF_nx,
230 	ROFF_open,
231 	ROFF_opena,
232 	ROFF_os,
233 	ROFF_output,
234 	ROFF_padj,
235 	ROFF_papersize,
236 	ROFF_pc,
237 	ROFF_pev,
238 	ROFF_pi,
239 	ROFF_PI,
240 	ROFF_pl,
241 	ROFF_pm,
242 	ROFF_pn,
243 	ROFF_pnr,
244 	ROFF_ps,
245 	ROFF_psbb,
246 	ROFF_pshape,
247 	ROFF_pso,
248 	ROFF_ptr,
249 	ROFF_pvs,
250 	ROFF_rchar,
251 	ROFF_rd,
252 	ROFF_recursionlimit,
253 	ROFF_return,
254 	ROFF_rfschar,
255 	ROFF_rhang,
256 	ROFF_rm,
257 	ROFF_rn,
258 	ROFF_rnn,
259 	ROFF_rr,
260 	ROFF_rs,
261 	ROFF_rt,
262 	ROFF_schar,
263 	ROFF_sentchar,
264 	ROFF_shc,
265 	ROFF_shift,
266 	ROFF_sizes,
267 	ROFF_so,
268 	ROFF_spacewidth,
269 	ROFF_special,
270 	ROFF_spreadwarn,
271 	ROFF_ss,
272 	ROFF_sty,
273 	ROFF_substring,
274 	ROFF_sv,
275 	ROFF_sy,
276 	ROFF_T_,
277 	ROFF_tc,
278 	ROFF_TE,
279 	ROFF_TH,
280 	ROFF_tkf,
281 	ROFF_tl,
282 	ROFF_tm,
283 	ROFF_tm1,
284 	ROFF_tmc,
285 	ROFF_tr,
286 	ROFF_track,
287 	ROFF_transchar,
288 	ROFF_trf,
289 	ROFF_trimat,
290 	ROFF_trin,
291 	ROFF_trnt,
292 	ROFF_troff,
293 	ROFF_TS,
294 	ROFF_uf,
295 	ROFF_ul,
296 	ROFF_unformat,
297 	ROFF_unwatch,
298 	ROFF_unwatchn,
299 	ROFF_vpt,
300 	ROFF_vs,
301 	ROFF_warn,
302 	ROFF_warnscale,
303 	ROFF_watch,
304 	ROFF_watchlength,
305 	ROFF_watchn,
306 	ROFF_wh,
307 	ROFF_while,
308 	ROFF_write,
309 	ROFF_writec,
310 	ROFF_writem,
311 	ROFF_xflag,
312 	ROFF_cblock,
313 	ROFF_RENAMED,
314 	ROFF_USERDEF,
315 	TOKEN_NONE,
316 	MDOC_Dd,
317 	MDOC_Dt,
318 	MDOC_Os,
319 	MDOC_Sh,
320 	MDOC_Ss,
321 	MDOC_Pp,
322 	MDOC_D1,
323 	MDOC_Dl,
324 	MDOC_Bd,
325 	MDOC_Ed,
326 	MDOC_Bl,
327 	MDOC_El,
328 	MDOC_It,
329 	MDOC_Ad,
330 	MDOC_An,
331 	MDOC_Ap,
332 	MDOC_Ar,
333 	MDOC_Cd,
334 	MDOC_Cm,
335 	MDOC_Dv,
336 	MDOC_Er,
337 	MDOC_Ev,
338 	MDOC_Ex,
339 	MDOC_Fa,
340 	MDOC_Fd,
341 	MDOC_Fl,
342 	MDOC_Fn,
343 	MDOC_Ft,
344 	MDOC_Ic,
345 	MDOC_In,
346 	MDOC_Li,
347 	MDOC_Nd,
348 	MDOC_Nm,
349 	MDOC_Op,
350 	MDOC_Ot,
351 	MDOC_Pa,
352 	MDOC_Rv,
353 	MDOC_St,
354 	MDOC_Va,
355 	MDOC_Vt,
356 	MDOC_Xr,
357 	MDOC__A,
358 	MDOC__B,
359 	MDOC__D,
360 	MDOC__I,
361 	MDOC__J,
362 	MDOC__N,
363 	MDOC__O,
364 	MDOC__P,
365 	MDOC__R,
366 	MDOC__T,
367 	MDOC__V,
368 	MDOC_Ac,
369 	MDOC_Ao,
370 	MDOC_Aq,
371 	MDOC_At,
372 	MDOC_Bc,
373 	MDOC_Bf,
374 	MDOC_Bo,
375 	MDOC_Bq,
376 	MDOC_Bsx,
377 	MDOC_Bx,
378 	MDOC_Db,
379 	MDOC_Dc,
380 	MDOC_Do,
381 	MDOC_Dq,
382 	MDOC_Ec,
383 	MDOC_Ef,
384 	MDOC_Em,
385 	MDOC_Eo,
386 	MDOC_Fx,
387 	MDOC_Ms,
388 	MDOC_No,
389 	MDOC_Ns,
390 	MDOC_Nx,
391 	MDOC_Ox,
392 	MDOC_Pc,
393 	MDOC_Pf,
394 	MDOC_Po,
395 	MDOC_Pq,
396 	MDOC_Qc,
397 	MDOC_Ql,
398 	MDOC_Qo,
399 	MDOC_Qq,
400 	MDOC_Re,
401 	MDOC_Rs,
402 	MDOC_Sc,
403 	MDOC_So,
404 	MDOC_Sq,
405 	MDOC_Sm,
406 	MDOC_Sx,
407 	MDOC_Sy,
408 	MDOC_Tn,
409 	MDOC_Ux,
410 	MDOC_Xc,
411 	MDOC_Xo,
412 	MDOC_Fo,
413 	MDOC_Fc,
414 	MDOC_Oo,
415 	MDOC_Oc,
416 	MDOC_Bk,
417 	MDOC_Ek,
418 	MDOC_Bt,
419 	MDOC_Hf,
420 	MDOC_Fr,
421 	MDOC_Ud,
422 	MDOC_Lb,
423 	MDOC_Lp,
424 	MDOC_Lk,
425 	MDOC_Mt,
426 	MDOC_Brq,
427 	MDOC_Bro,
428 	MDOC_Brc,
429 	MDOC__C,
430 	MDOC_Es,
431 	MDOC_En,
432 	MDOC_Dx,
433 	MDOC__Q,
434 	MDOC__U,
435 	MDOC_Ta,
436 	MDOC_MAX,
437 	MAN_TH,
438 	MAN_SH,
439 	MAN_SS,
440 	MAN_TP,
441 	MAN_LP,
442 	MAN_PP,
443 	MAN_P,
444 	MAN_IP,
445 	MAN_HP,
446 	MAN_SM,
447 	MAN_SB,
448 	MAN_BI,
449 	MAN_IB,
450 	MAN_BR,
451 	MAN_RB,
452 	MAN_R,
453 	MAN_B,
454 	MAN_I,
455 	MAN_IR,
456 	MAN_RI,
457 	MAN_nf,
458 	MAN_fi,
459 	MAN_RE,
460 	MAN_RS,
461 	MAN_DT,
462 	MAN_UC,
463 	MAN_PD,
464 	MAN_AT,
465 	MAN_in,
466 	MAN_OP,
467 	MAN_EX,
468 	MAN_EE,
469 	MAN_UR,
470 	MAN_UE,
471 	MAN_MT,
472 	MAN_ME,
473 	MAN_MAX
474 };
475 
476 enum	roff_next {
477 	ROFF_NEXT_SIBLING = 0,
478 	ROFF_NEXT_CHILD
479 };
480 
481 /*
482  * Indicates that a BODY's formatting has ended, but
483  * the scope is still open.  Used for badly nested blocks.
484  */
485 enum	mdoc_endbody {
486 	ENDBODY_NOT = 0,
487 	ENDBODY_SPACE	/* Is broken: append a space. */
488 };
489 
490 struct	roff_node {
491 	struct roff_node *parent;  /* Parent AST node. */
492 	struct roff_node *child;   /* First child AST node. */
493 	struct roff_node *last;    /* Last child AST node. */
494 	struct roff_node *next;    /* Sibling AST node. */
495 	struct roff_node *prev;    /* Prior sibling AST node. */
496 	struct roff_node *head;    /* BLOCK */
497 	struct roff_node *body;    /* BLOCK/ENDBODY */
498 	struct roff_node *tail;    /* BLOCK */
499 	struct mdoc_arg	 *args;    /* BLOCK/ELEM */
500 	union mdoc_data	 *norm;    /* Normalized arguments. */
501 	char		 *string;  /* TEXT */
502 	const struct tbl_span *span; /* TBL */
503 	struct eqn_box	 *eqn;     /* EQN */
504 	int		  line;    /* Input file line number. */
505 	int		  pos;     /* Input file column number. */
506 	int		  flags;
507 #define	NODE_VALID	 (1 << 0)  /* Has been validated. */
508 #define	NODE_ENDED	 (1 << 1)  /* Gone past body end mark. */
509 #define	NODE_EOS	 (1 << 2)  /* At sentence boundary. */
510 #define	NODE_LINE	 (1 << 3)  /* First macro/text on line. */
511 #define	NODE_SYNPRETTY	 (1 << 4)  /* SYNOPSIS-style formatting. */
512 #define	NODE_BROKEN	 (1 << 5)  /* Must validate parent when ending. */
513 #define	NODE_DELIMO	 (1 << 6)
514 #define	NODE_DELIMC	 (1 << 7)
515 #define	NODE_NOSRC	 (1 << 8)  /* Generated node, not in input file. */
516 #define	NODE_NOPRT	 (1 << 9)  /* Shall not print anything. */
517 	int		  prev_font; /* Before entering this node. */
518 	int		  aux;     /* Decoded node data, type-dependent. */
519 	enum roff_tok	  tok;     /* Request or macro ID. */
520 	enum roff_type	  type;    /* AST node type. */
521 	enum roff_sec	  sec;     /* Current named section. */
522 	enum mdoc_endbody end;     /* BODY */
523 };
524 
525 struct	roff_meta {
526 	char		 *msec;    /* Manual section, usually a digit. */
527 	char		 *vol;     /* Manual volume title. */
528 	char		 *os;      /* Operating system. */
529 	char		 *arch;    /* Machine architecture. */
530 	char		 *title;   /* Manual title, usually CAPS. */
531 	char		 *name;    /* Leading manual name. */
532 	char		 *date;    /* Normalized date. */
533 	int		  hasbody; /* Document is not empty. */
534 	int		  rcsids;  /* Bits indexed by enum mandoc_os. */
535 	enum mandoc_os	  os_e;    /* Operating system. */
536 };
537 
538 struct	roff_man {
539 	struct roff_meta  meta;    /* Document meta-data. */
540 	struct mparse	 *parse;   /* Parse pointer. */
541 	struct roff	 *roff;    /* Roff parser state data. */
542 	struct ohash	 *mdocmac; /* Mdoc macro lookup table. */
543 	struct ohash	 *manmac;  /* Man macro lookup table. */
544 	const char	 *os_s;    /* Default operating system. */
545 	struct roff_node *first;   /* The first node parsed. */
546 	struct roff_node *last;    /* The last node parsed. */
547 	struct roff_node *last_es; /* The most recent Es node. */
548 	int		  quick;   /* Abort parse early. */
549 	int		  flags;   /* Parse flags. */
550 #define	MDOC_LITERAL	 (1 << 1)  /* In a literal scope. */
551 #define	MDOC_PBODY	 (1 << 2)  /* In the document body. */
552 #define	MDOC_NEWLINE	 (1 << 3)  /* First macro/text in a line. */
553 #define	MDOC_PHRASE	 (1 << 4)  /* In a Bl -column phrase. */
554 #define	MDOC_PHRASELIT	 (1 << 5)  /* Literal within a phrase. */
555 #define	MDOC_FREECOL	 (1 << 6)  /* `It' invocation should close. */
556 #define	MDOC_SYNOPSIS	 (1 << 7)  /* SYNOPSIS-style formatting. */
557 #define	MDOC_KEEP	 (1 << 8)  /* In a word keep. */
558 #define	MDOC_SMOFF	 (1 << 9)  /* Spacing is off. */
559 #define	MDOC_NODELIMC	 (1 << 10) /* Disable closing delimiter handling. */
560 #define	MAN_ELINE	 (1 << 11) /* Next-line element scope. */
561 #define	MAN_BLINE	 (1 << 12) /* Next-line block scope. */
562 #define	MDOC_PHRASEQF	 (1 << 13) /* Quote first word encountered. */
563 #define	MDOC_PHRASEQL	 (1 << 14) /* Quote last word of this phrase. */
564 #define	MDOC_PHRASEQN	 (1 << 15) /* Quote first word of the next phrase. */
565 #define	MAN_LITERAL	  MDOC_LITERAL
566 #define	MAN_NEWLINE	  MDOC_NEWLINE
567 	enum roff_macroset macroset; /* Kind of high-level macros used. */
568 	enum roff_sec	  lastsec; /* Last section seen. */
569 	enum roff_sec	  lastnamed; /* Last standard section seen. */
570 	enum roff_next	  next;    /* Where to put the next node. */
571 };
572 
573 extern	const char *const *roff_name;
574 
575 
576 void		 deroff(char **, const struct roff_node *);
577 struct ohash	*roffhash_alloc(enum roff_tok, enum roff_tok);
578 enum roff_tok	 roffhash_find(struct ohash *, const char *, size_t);
579 void		 roffhash_free(struct ohash *);
580 void		 roff_validate(struct roff_man *);
581