1*c1c95addSBrooks Davis /* $Id: chars.c,v 1.81 2022/06/26 20:33:43 schwarze Exp $ */
261d06d6bSBaptiste Daroussin /*
361d06d6bSBaptiste Daroussin * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
46d38604fSBaptiste Daroussin * Copyright (c) 2011, 2014, 2015, 2017, 2018, 2020
56d38604fSBaptiste Daroussin * Ingo Schwarze <schwarze@openbsd.org>
661d06d6bSBaptiste Daroussin *
761d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any
861d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above
961d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies.
1061d06d6bSBaptiste Daroussin *
1161d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1261d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1361d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1461d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1561d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1661d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1761d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1861d06d6bSBaptiste Daroussin */
1961d06d6bSBaptiste Daroussin #include "config.h"
2061d06d6bSBaptiste Daroussin
2161d06d6bSBaptiste Daroussin #include <sys/types.h>
2261d06d6bSBaptiste Daroussin
2361d06d6bSBaptiste Daroussin #include <assert.h>
2461d06d6bSBaptiste Daroussin #include <ctype.h>
2561d06d6bSBaptiste Daroussin #include <stddef.h>
2661d06d6bSBaptiste Daroussin #include <stdint.h>
277295610fSBaptiste Daroussin #include <stdio.h>
2861d06d6bSBaptiste Daroussin #include <stdlib.h>
2961d06d6bSBaptiste Daroussin #include <string.h>
3061d06d6bSBaptiste Daroussin
3161d06d6bSBaptiste Daroussin #include "mandoc.h"
3261d06d6bSBaptiste Daroussin #include "mandoc_aux.h"
3361d06d6bSBaptiste Daroussin #include "mandoc_ohash.h"
3461d06d6bSBaptiste Daroussin #include "libmandoc.h"
3561d06d6bSBaptiste Daroussin
3661d06d6bSBaptiste Daroussin struct ln {
3761d06d6bSBaptiste Daroussin const char roffcode[16];
3861d06d6bSBaptiste Daroussin const char *ascii;
3961d06d6bSBaptiste Daroussin int unicode;
4061d06d6bSBaptiste Daroussin };
4161d06d6bSBaptiste Daroussin
4261d06d6bSBaptiste Daroussin /* Special break control characters. */
4361d06d6bSBaptiste Daroussin static const char ascii_nbrsp[2] = { ASCII_NBRSP, '\0' };
4461d06d6bSBaptiste Daroussin static const char ascii_break[2] = { ASCII_BREAK, '\0' };
4561d06d6bSBaptiste Daroussin
4661d06d6bSBaptiste Daroussin static struct ln lines[] = {
4761d06d6bSBaptiste Daroussin
4861d06d6bSBaptiste Daroussin /* Spacing. */
4961d06d6bSBaptiste Daroussin { " ", ascii_nbrsp, 0x00a0 },
5061d06d6bSBaptiste Daroussin { "~", ascii_nbrsp, 0x00a0 },
516d38604fSBaptiste Daroussin { "0", ascii_nbrsp, 0x00a0 },
5261d06d6bSBaptiste Daroussin { ":", ascii_break, 0 },
5361d06d6bSBaptiste Daroussin
5461d06d6bSBaptiste Daroussin /* Lines. */
5561d06d6bSBaptiste Daroussin { "ba", "|", 0x007c },
5661d06d6bSBaptiste Daroussin { "br", "|", 0x2502 },
5761d06d6bSBaptiste Daroussin { "ul", "_", 0x005f },
587295610fSBaptiste Daroussin { "_", "_", 0x005f },
5961d06d6bSBaptiste Daroussin { "ru", "_", 0x005f },
6061d06d6bSBaptiste Daroussin { "rn", "-", 0x203e },
6161d06d6bSBaptiste Daroussin { "bb", "|", 0x00a6 },
6261d06d6bSBaptiste Daroussin { "sl", "/", 0x002f },
6361d06d6bSBaptiste Daroussin { "rs", "\\", 0x005c },
6461d06d6bSBaptiste Daroussin
6561d06d6bSBaptiste Daroussin /* Text markers. */
6661d06d6bSBaptiste Daroussin { "ci", "O", 0x25cb },
6761d06d6bSBaptiste Daroussin { "bu", "+\bo", 0x2022 },
6861d06d6bSBaptiste Daroussin { "dd", "<**>", 0x2021 },
6961d06d6bSBaptiste Daroussin { "dg", "<*>", 0x2020 },
7061d06d6bSBaptiste Daroussin { "lz", "<>", 0x25ca },
7161d06d6bSBaptiste Daroussin { "sq", "[]", 0x25a1 },
7261d06d6bSBaptiste Daroussin { "ps", "<paragraph>", 0x00b6 },
7361d06d6bSBaptiste Daroussin { "sc", "<section>", 0x00a7 },
7461d06d6bSBaptiste Daroussin { "lh", "<=", 0x261c },
7561d06d6bSBaptiste Daroussin { "rh", "=>", 0x261e },
7661d06d6bSBaptiste Daroussin { "at", "@", 0x0040 },
7761d06d6bSBaptiste Daroussin { "sh", "#", 0x0023 },
7861d06d6bSBaptiste Daroussin { "CR", "<cr>", 0x21b5 },
7961d06d6bSBaptiste Daroussin { "OK", "\\/", 0x2713 },
807295610fSBaptiste Daroussin { "CL", "C", 0x2663 },
817295610fSBaptiste Daroussin { "SP", "S", 0x2660 },
827295610fSBaptiste Daroussin { "HE", "H", 0x2665 },
837295610fSBaptiste Daroussin { "DI", "D", 0x2666 },
8461d06d6bSBaptiste Daroussin
8561d06d6bSBaptiste Daroussin /* Legal symbols. */
8661d06d6bSBaptiste Daroussin { "co", "(C)", 0x00a9 },
8761d06d6bSBaptiste Daroussin { "rg", "(R)", 0x00ae },
8861d06d6bSBaptiste Daroussin { "tm", "tm", 0x2122 },
8961d06d6bSBaptiste Daroussin
9061d06d6bSBaptiste Daroussin /* Punctuation. */
9161d06d6bSBaptiste Daroussin { "em", "--", 0x2014 },
9261d06d6bSBaptiste Daroussin { "en", "-", 0x2013 },
9361d06d6bSBaptiste Daroussin { "hy", "-", 0x2010 },
9461d06d6bSBaptiste Daroussin { "e", "\\", 0x005c },
9561d06d6bSBaptiste Daroussin { "r!", "!", 0x00a1 },
9661d06d6bSBaptiste Daroussin { "r?", "?", 0x00bf },
9761d06d6bSBaptiste Daroussin
9861d06d6bSBaptiste Daroussin /* Quotes. */
9961d06d6bSBaptiste Daroussin { "Bq", ",,", 0x201e },
10061d06d6bSBaptiste Daroussin { "bq", ",", 0x201a },
10161d06d6bSBaptiste Daroussin { "lq", "\"", 0x201c },
10261d06d6bSBaptiste Daroussin { "rq", "\"", 0x201d },
10361d06d6bSBaptiste Daroussin { "Lq", "\"", 0x201c },
10461d06d6bSBaptiste Daroussin { "Rq", "\"", 0x201d },
10561d06d6bSBaptiste Daroussin { "oq", "`", 0x2018 },
10661d06d6bSBaptiste Daroussin { "cq", "\'", 0x2019 },
10761d06d6bSBaptiste Daroussin { "aq", "\'", 0x0027 },
10861d06d6bSBaptiste Daroussin { "dq", "\"", 0x0022 },
10961d06d6bSBaptiste Daroussin { "Fo", "<<", 0x00ab },
11061d06d6bSBaptiste Daroussin { "Fc", ">>", 0x00bb },
11161d06d6bSBaptiste Daroussin { "fo", "<", 0x2039 },
11261d06d6bSBaptiste Daroussin { "fc", ">", 0x203a },
11361d06d6bSBaptiste Daroussin
11461d06d6bSBaptiste Daroussin /* Brackets. */
11561d06d6bSBaptiste Daroussin { "lB", "[", 0x005b },
11661d06d6bSBaptiste Daroussin { "rB", "]", 0x005d },
11761d06d6bSBaptiste Daroussin { "lC", "{", 0x007b },
11861d06d6bSBaptiste Daroussin { "rC", "}", 0x007d },
11961d06d6bSBaptiste Daroussin { "la", "<", 0x27e8 },
12061d06d6bSBaptiste Daroussin { "ra", ">", 0x27e9 },
12161d06d6bSBaptiste Daroussin { "bv", "|", 0x23aa },
12261d06d6bSBaptiste Daroussin { "braceex", "|", 0x23aa },
12361d06d6bSBaptiste Daroussin { "bracketlefttp", "|", 0x23a1 },
12461d06d6bSBaptiste Daroussin { "bracketleftbt", "|", 0x23a3 },
12561d06d6bSBaptiste Daroussin { "bracketleftex", "|", 0x23a2 },
12661d06d6bSBaptiste Daroussin { "bracketrighttp", "|", 0x23a4 },
12761d06d6bSBaptiste Daroussin { "bracketrightbt", "|", 0x23a6 },
12861d06d6bSBaptiste Daroussin { "bracketrightex", "|", 0x23a5 },
12961d06d6bSBaptiste Daroussin { "lt", ",-", 0x23a7 },
13061d06d6bSBaptiste Daroussin { "bracelefttp", ",-", 0x23a7 },
13161d06d6bSBaptiste Daroussin { "lk", "{", 0x23a8 },
13261d06d6bSBaptiste Daroussin { "braceleftmid", "{", 0x23a8 },
13361d06d6bSBaptiste Daroussin { "lb", "`-", 0x23a9 },
13461d06d6bSBaptiste Daroussin { "braceleftbt", "`-", 0x23a9 },
13561d06d6bSBaptiste Daroussin { "braceleftex", "|", 0x23aa },
13661d06d6bSBaptiste Daroussin { "rt", "-.", 0x23ab },
13761d06d6bSBaptiste Daroussin { "bracerighttp", "-.", 0x23ab },
13861d06d6bSBaptiste Daroussin { "rk", "}", 0x23ac },
13961d06d6bSBaptiste Daroussin { "bracerightmid", "}", 0x23ac },
14061d06d6bSBaptiste Daroussin { "rb", "-\'", 0x23ad },
14161d06d6bSBaptiste Daroussin { "bracerightbt", "-\'", 0x23ad },
14261d06d6bSBaptiste Daroussin { "bracerightex", "|", 0x23aa },
14361d06d6bSBaptiste Daroussin { "parenlefttp", "/", 0x239b },
14461d06d6bSBaptiste Daroussin { "parenleftbt", "\\", 0x239d },
14561d06d6bSBaptiste Daroussin { "parenleftex", "|", 0x239c },
14661d06d6bSBaptiste Daroussin { "parenrighttp", "\\", 0x239e },
14761d06d6bSBaptiste Daroussin { "parenrightbt", "/", 0x23a0 },
14861d06d6bSBaptiste Daroussin { "parenrightex", "|", 0x239f },
14961d06d6bSBaptiste Daroussin
15061d06d6bSBaptiste Daroussin /* Arrows and lines. */
15161d06d6bSBaptiste Daroussin { "<-", "<-", 0x2190 },
15261d06d6bSBaptiste Daroussin { "->", "->", 0x2192 },
15361d06d6bSBaptiste Daroussin { "<>", "<->", 0x2194 },
15461d06d6bSBaptiste Daroussin { "da", "|\bv", 0x2193 },
15561d06d6bSBaptiste Daroussin { "ua", "|\b^", 0x2191 },
15661d06d6bSBaptiste Daroussin { "va", "^v", 0x2195 },
15761d06d6bSBaptiste Daroussin { "lA", "<=", 0x21d0 },
15861d06d6bSBaptiste Daroussin { "rA", "=>", 0x21d2 },
15961d06d6bSBaptiste Daroussin { "hA", "<=>", 0x21d4 },
16061d06d6bSBaptiste Daroussin { "uA", "=\b^", 0x21d1 },
16161d06d6bSBaptiste Daroussin { "dA", "=\bv", 0x21d3 },
16261d06d6bSBaptiste Daroussin { "vA", "^=v", 0x21d5 },
16361d06d6bSBaptiste Daroussin { "an", "-", 0x23af },
16461d06d6bSBaptiste Daroussin
16561d06d6bSBaptiste Daroussin /* Logic. */
16661d06d6bSBaptiste Daroussin { "AN", "^", 0x2227 },
16761d06d6bSBaptiste Daroussin { "OR", "v", 0x2228 },
16861d06d6bSBaptiste Daroussin { "no", "~", 0x00ac },
16961d06d6bSBaptiste Daroussin { "tno", "~", 0x00ac },
17061d06d6bSBaptiste Daroussin { "te", "<there\037exists>", 0x2203 },
17161d06d6bSBaptiste Daroussin { "fa", "<for\037all>", 0x2200 },
17261d06d6bSBaptiste Daroussin { "st", "<such\037that>", 0x220b },
17361d06d6bSBaptiste Daroussin { "tf", "<therefore>", 0x2234 },
17461d06d6bSBaptiste Daroussin { "3d", "<therefore>", 0x2234 },
17561d06d6bSBaptiste Daroussin { "or", "|", 0x007c },
17661d06d6bSBaptiste Daroussin
17761d06d6bSBaptiste Daroussin /* Mathematicals. */
17861d06d6bSBaptiste Daroussin { "pl", "+", 0x002b },
17961d06d6bSBaptiste Daroussin { "mi", "-", 0x2212 },
18061d06d6bSBaptiste Daroussin { "-", "-", 0x002d },
18161d06d6bSBaptiste Daroussin { "-+", "-+", 0x2213 },
18261d06d6bSBaptiste Daroussin { "+-", "+-", 0x00b1 },
18361d06d6bSBaptiste Daroussin { "t+-", "+-", 0x00b1 },
18461d06d6bSBaptiste Daroussin { "pc", ".", 0x00b7 },
18561d06d6bSBaptiste Daroussin { "md", ".", 0x22c5 },
18661d06d6bSBaptiste Daroussin { "mu", "x", 0x00d7 },
18761d06d6bSBaptiste Daroussin { "tmu", "x", 0x00d7 },
18861d06d6bSBaptiste Daroussin { "c*", "O\bx", 0x2297 },
18961d06d6bSBaptiste Daroussin { "c+", "O\b+", 0x2295 },
19061d06d6bSBaptiste Daroussin { "di", "/", 0x00f7 },
19161d06d6bSBaptiste Daroussin { "tdi", "/", 0x00f7 },
19261d06d6bSBaptiste Daroussin { "f/", "/", 0x2044 },
19361d06d6bSBaptiste Daroussin { "**", "*", 0x2217 },
19461d06d6bSBaptiste Daroussin { "<=", "<=", 0x2264 },
19561d06d6bSBaptiste Daroussin { ">=", ">=", 0x2265 },
19661d06d6bSBaptiste Daroussin { "<<", "<<", 0x226a },
19761d06d6bSBaptiste Daroussin { ">>", ">>", 0x226b },
19861d06d6bSBaptiste Daroussin { "eq", "=", 0x003d },
19961d06d6bSBaptiste Daroussin { "!=", "!=", 0x2260 },
20061d06d6bSBaptiste Daroussin { "==", "==", 0x2261 },
20161d06d6bSBaptiste Daroussin { "ne", "!==", 0x2262 },
20261d06d6bSBaptiste Daroussin { "ap", "~", 0x223c },
20361d06d6bSBaptiste Daroussin { "|=", "-~", 0x2243 },
20461d06d6bSBaptiste Daroussin { "=~", "=~", 0x2245 },
20561d06d6bSBaptiste Daroussin { "~~", "~~", 0x2248 },
20661d06d6bSBaptiste Daroussin { "~=", "~=", 0x2248 },
20761d06d6bSBaptiste Daroussin { "pt", "<proportional\037to>", 0x221d },
20861d06d6bSBaptiste Daroussin { "es", "{}", 0x2205 },
20961d06d6bSBaptiste Daroussin { "mo", "<element\037of>", 0x2208 },
21061d06d6bSBaptiste Daroussin { "nm", "<not\037element\037of>", 0x2209 },
21161d06d6bSBaptiste Daroussin { "sb", "<proper\037subset>", 0x2282 },
21261d06d6bSBaptiste Daroussin { "nb", "<not\037subset>", 0x2284 },
21361d06d6bSBaptiste Daroussin { "sp", "<proper\037superset>", 0x2283 },
21461d06d6bSBaptiste Daroussin { "nc", "<not\037superset>", 0x2285 },
21561d06d6bSBaptiste Daroussin { "ib", "<subset\037or\037equal>", 0x2286 },
21661d06d6bSBaptiste Daroussin { "ip", "<superset\037or\037equal>", 0x2287 },
21761d06d6bSBaptiste Daroussin { "ca", "<intersection>", 0x2229 },
21861d06d6bSBaptiste Daroussin { "cu", "<union>", 0x222a },
21961d06d6bSBaptiste Daroussin { "/_", "<angle>", 0x2220 },
22061d06d6bSBaptiste Daroussin { "pp", "<perpendicular>", 0x22a5 },
22161d06d6bSBaptiste Daroussin { "is", "<integral>", 0x222b },
22261d06d6bSBaptiste Daroussin { "integral", "<integral>", 0x222b },
22361d06d6bSBaptiste Daroussin { "sum", "<sum>", 0x2211 },
22461d06d6bSBaptiste Daroussin { "product", "<product>", 0x220f },
22561d06d6bSBaptiste Daroussin { "coproduct", "<coproduct>", 0x2210 },
22661d06d6bSBaptiste Daroussin { "gr", "<nabla>", 0x2207 },
22761d06d6bSBaptiste Daroussin { "sr", "<sqrt>", 0x221a },
22861d06d6bSBaptiste Daroussin { "sqrt", "<sqrt>", 0x221a },
22961d06d6bSBaptiste Daroussin { "lc", "|~", 0x2308 },
23061d06d6bSBaptiste Daroussin { "rc", "~|", 0x2309 },
23161d06d6bSBaptiste Daroussin { "lf", "|_", 0x230a },
23261d06d6bSBaptiste Daroussin { "rf", "_|", 0x230b },
23361d06d6bSBaptiste Daroussin { "if", "<infinity>", 0x221e },
23461d06d6bSBaptiste Daroussin { "Ah", "<Aleph>", 0x2135 },
23561d06d6bSBaptiste Daroussin { "Im", "<Im>", 0x2111 },
23661d06d6bSBaptiste Daroussin { "Re", "<Re>", 0x211c },
2377295610fSBaptiste Daroussin { "wp", "p", 0x2118 },
23861d06d6bSBaptiste Daroussin { "pd", "<del>", 0x2202 },
23961d06d6bSBaptiste Daroussin { "-h", "/h", 0x210f },
24061d06d6bSBaptiste Daroussin { "hbar", "/h", 0x210f },
24161d06d6bSBaptiste Daroussin { "12", "1/2", 0x00bd },
24261d06d6bSBaptiste Daroussin { "14", "1/4", 0x00bc },
24361d06d6bSBaptiste Daroussin { "34", "3/4", 0x00be },
24461d06d6bSBaptiste Daroussin { "18", "1/8", 0x215B },
24561d06d6bSBaptiste Daroussin { "38", "3/8", 0x215C },
24661d06d6bSBaptiste Daroussin { "58", "5/8", 0x215D },
24761d06d6bSBaptiste Daroussin { "78", "7/8", 0x215E },
24861d06d6bSBaptiste Daroussin { "S1", "^1", 0x00B9 },
24961d06d6bSBaptiste Daroussin { "S2", "^2", 0x00B2 },
25061d06d6bSBaptiste Daroussin { "S3", "^3", 0x00B3 },
25161d06d6bSBaptiste Daroussin
25261d06d6bSBaptiste Daroussin /* Ligatures. */
25361d06d6bSBaptiste Daroussin { "ff", "ff", 0xfb00 },
25461d06d6bSBaptiste Daroussin { "fi", "fi", 0xfb01 },
25561d06d6bSBaptiste Daroussin { "fl", "fl", 0xfb02 },
25661d06d6bSBaptiste Daroussin { "Fi", "ffi", 0xfb03 },
25761d06d6bSBaptiste Daroussin { "Fl", "ffl", 0xfb04 },
25861d06d6bSBaptiste Daroussin { "AE", "AE", 0x00c6 },
25961d06d6bSBaptiste Daroussin { "ae", "ae", 0x00e6 },
26061d06d6bSBaptiste Daroussin { "OE", "OE", 0x0152 },
26161d06d6bSBaptiste Daroussin { "oe", "oe", 0x0153 },
26261d06d6bSBaptiste Daroussin { "ss", "ss", 0x00df },
26361d06d6bSBaptiste Daroussin { "IJ", "IJ", 0x0132 },
26461d06d6bSBaptiste Daroussin { "ij", "ij", 0x0133 },
26561d06d6bSBaptiste Daroussin
26661d06d6bSBaptiste Daroussin /* Accents. */
26761d06d6bSBaptiste Daroussin { "a\"", "\"", 0x02dd },
26861d06d6bSBaptiste Daroussin { "a-", "-", 0x00af },
26961d06d6bSBaptiste Daroussin { "a.", ".", 0x02d9 },
27061d06d6bSBaptiste Daroussin { "a^", "^", 0x005e },
27161d06d6bSBaptiste Daroussin { "aa", "\'", 0x00b4 },
27261d06d6bSBaptiste Daroussin { "\'", "\'", 0x00b4 },
27361d06d6bSBaptiste Daroussin { "ga", "`", 0x0060 },
27461d06d6bSBaptiste Daroussin { "`", "`", 0x0060 },
27561d06d6bSBaptiste Daroussin { "ab", "'\b`", 0x02d8 },
27661d06d6bSBaptiste Daroussin { "ac", ",", 0x00b8 },
27761d06d6bSBaptiste Daroussin { "ad", "\"", 0x00a8 },
27861d06d6bSBaptiste Daroussin { "ah", "v", 0x02c7 },
27961d06d6bSBaptiste Daroussin { "ao", "o", 0x02da },
28061d06d6bSBaptiste Daroussin { "a~", "~", 0x007e },
28161d06d6bSBaptiste Daroussin { "ho", ",", 0x02db },
28261d06d6bSBaptiste Daroussin { "ha", "^", 0x005e },
28361d06d6bSBaptiste Daroussin { "ti", "~", 0x007e },
2847295610fSBaptiste Daroussin { "u02DC", "~", 0x02dc },
28561d06d6bSBaptiste Daroussin
28661d06d6bSBaptiste Daroussin /* Accented letters. */
28761d06d6bSBaptiste Daroussin { "'A", "'\bA", 0x00c1 },
28861d06d6bSBaptiste Daroussin { "'E", "'\bE", 0x00c9 },
28961d06d6bSBaptiste Daroussin { "'I", "'\bI", 0x00cd },
29061d06d6bSBaptiste Daroussin { "'O", "'\bO", 0x00d3 },
29161d06d6bSBaptiste Daroussin { "'U", "'\bU", 0x00da },
2927295610fSBaptiste Daroussin { "'Y", "'\bY", 0x00dd },
29361d06d6bSBaptiste Daroussin { "'a", "'\ba", 0x00e1 },
29461d06d6bSBaptiste Daroussin { "'e", "'\be", 0x00e9 },
29561d06d6bSBaptiste Daroussin { "'i", "'\bi", 0x00ed },
29661d06d6bSBaptiste Daroussin { "'o", "'\bo", 0x00f3 },
29761d06d6bSBaptiste Daroussin { "'u", "'\bu", 0x00fa },
2987295610fSBaptiste Daroussin { "'y", "'\by", 0x00fd },
29961d06d6bSBaptiste Daroussin { "`A", "`\bA", 0x00c0 },
30061d06d6bSBaptiste Daroussin { "`E", "`\bE", 0x00c8 },
30161d06d6bSBaptiste Daroussin { "`I", "`\bI", 0x00cc },
30261d06d6bSBaptiste Daroussin { "`O", "`\bO", 0x00d2 },
30361d06d6bSBaptiste Daroussin { "`U", "`\bU", 0x00d9 },
30461d06d6bSBaptiste Daroussin { "`a", "`\ba", 0x00e0 },
30561d06d6bSBaptiste Daroussin { "`e", "`\be", 0x00e8 },
30661d06d6bSBaptiste Daroussin { "`i", "`\bi", 0x00ec },
30761d06d6bSBaptiste Daroussin { "`o", "`\bo", 0x00f2 },
30861d06d6bSBaptiste Daroussin { "`u", "`\bu", 0x00f9 },
30961d06d6bSBaptiste Daroussin { "~A", "~\bA", 0x00c3 },
31061d06d6bSBaptiste Daroussin { "~N", "~\bN", 0x00d1 },
31161d06d6bSBaptiste Daroussin { "~O", "~\bO", 0x00d5 },
31261d06d6bSBaptiste Daroussin { "~a", "~\ba", 0x00e3 },
31361d06d6bSBaptiste Daroussin { "~n", "~\bn", 0x00f1 },
31461d06d6bSBaptiste Daroussin { "~o", "~\bo", 0x00f5 },
31561d06d6bSBaptiste Daroussin { ":A", "\"\bA", 0x00c4 },
31661d06d6bSBaptiste Daroussin { ":E", "\"\bE", 0x00cb },
31761d06d6bSBaptiste Daroussin { ":I", "\"\bI", 0x00cf },
31861d06d6bSBaptiste Daroussin { ":O", "\"\bO", 0x00d6 },
31961d06d6bSBaptiste Daroussin { ":U", "\"\bU", 0x00dc },
32061d06d6bSBaptiste Daroussin { ":a", "\"\ba", 0x00e4 },
32161d06d6bSBaptiste Daroussin { ":e", "\"\be", 0x00eb },
32261d06d6bSBaptiste Daroussin { ":i", "\"\bi", 0x00ef },
32361d06d6bSBaptiste Daroussin { ":o", "\"\bo", 0x00f6 },
32461d06d6bSBaptiste Daroussin { ":u", "\"\bu", 0x00fc },
32561d06d6bSBaptiste Daroussin { ":y", "\"\by", 0x00ff },
32661d06d6bSBaptiste Daroussin { "^A", "^\bA", 0x00c2 },
32761d06d6bSBaptiste Daroussin { "^E", "^\bE", 0x00ca },
32861d06d6bSBaptiste Daroussin { "^I", "^\bI", 0x00ce },
32961d06d6bSBaptiste Daroussin { "^O", "^\bO", 0x00d4 },
33061d06d6bSBaptiste Daroussin { "^U", "^\bU", 0x00db },
33161d06d6bSBaptiste Daroussin { "^a", "^\ba", 0x00e2 },
33261d06d6bSBaptiste Daroussin { "^e", "^\be", 0x00ea },
33361d06d6bSBaptiste Daroussin { "^i", "^\bi", 0x00ee },
33461d06d6bSBaptiste Daroussin { "^o", "^\bo", 0x00f4 },
33561d06d6bSBaptiste Daroussin { "^u", "^\bu", 0x00fb },
33661d06d6bSBaptiste Daroussin { ",C", ",\bC", 0x00c7 },
33761d06d6bSBaptiste Daroussin { ",c", ",\bc", 0x00e7 },
33861d06d6bSBaptiste Daroussin { "/L", "/\bL", 0x0141 },
33961d06d6bSBaptiste Daroussin { "/l", "/\bl", 0x0142 },
34061d06d6bSBaptiste Daroussin { "/O", "/\bO", 0x00d8 },
34161d06d6bSBaptiste Daroussin { "/o", "/\bo", 0x00f8 },
34261d06d6bSBaptiste Daroussin { "oA", "o\bA", 0x00c5 },
34361d06d6bSBaptiste Daroussin { "oa", "o\ba", 0x00e5 },
34461d06d6bSBaptiste Daroussin
34561d06d6bSBaptiste Daroussin /* Special letters. */
34661d06d6bSBaptiste Daroussin { "-D", "Dh", 0x00d0 },
34761d06d6bSBaptiste Daroussin { "Sd", "dh", 0x00f0 },
34861d06d6bSBaptiste Daroussin { "TP", "Th", 0x00de },
34961d06d6bSBaptiste Daroussin { "Tp", "th", 0x00fe },
35061d06d6bSBaptiste Daroussin { ".i", "i", 0x0131 },
35161d06d6bSBaptiste Daroussin { ".j", "j", 0x0237 },
35261d06d6bSBaptiste Daroussin
35361d06d6bSBaptiste Daroussin /* Currency. */
35461d06d6bSBaptiste Daroussin { "Do", "$", 0x0024 },
35561d06d6bSBaptiste Daroussin { "ct", "/\bc", 0x00a2 },
35661d06d6bSBaptiste Daroussin { "Eu", "EUR", 0x20ac },
35761d06d6bSBaptiste Daroussin { "eu", "EUR", 0x20ac },
35861d06d6bSBaptiste Daroussin { "Ye", "=\bY", 0x00a5 },
3597295610fSBaptiste Daroussin { "Po", "-\bL", 0x00a3 },
36061d06d6bSBaptiste Daroussin { "Cs", "o\bx", 0x00a4 },
36161d06d6bSBaptiste Daroussin { "Fn", ",\bf", 0x0192 },
36261d06d6bSBaptiste Daroussin
36361d06d6bSBaptiste Daroussin /* Units. */
36461d06d6bSBaptiste Daroussin { "de", "<degree>", 0x00b0 },
36561d06d6bSBaptiste Daroussin { "%0", "<permille>", 0x2030 },
36661d06d6bSBaptiste Daroussin { "fm", "\'", 0x2032 },
367*c1c95addSBrooks Davis { "sd", "\"", 0x2033 },
36861d06d6bSBaptiste Daroussin { "mc", "<micro>", 0x00b5 },
36961d06d6bSBaptiste Daroussin { "Of", "_\ba", 0x00aa },
37061d06d6bSBaptiste Daroussin { "Om", "_\bo", 0x00ba },
37161d06d6bSBaptiste Daroussin
37261d06d6bSBaptiste Daroussin /* Greek characters. */
37361d06d6bSBaptiste Daroussin { "*A", "A", 0x0391 },
37461d06d6bSBaptiste Daroussin { "*B", "B", 0x0392 },
37561d06d6bSBaptiste Daroussin { "*G", "<Gamma>", 0x0393 },
37661d06d6bSBaptiste Daroussin { "*D", "<Delta>", 0x0394 },
37761d06d6bSBaptiste Daroussin { "*E", "E", 0x0395 },
37861d06d6bSBaptiste Daroussin { "*Z", "Z", 0x0396 },
37961d06d6bSBaptiste Daroussin { "*Y", "H", 0x0397 },
38061d06d6bSBaptiste Daroussin { "*H", "<Theta>", 0x0398 },
38161d06d6bSBaptiste Daroussin { "*I", "I", 0x0399 },
38261d06d6bSBaptiste Daroussin { "*K", "K", 0x039a },
38361d06d6bSBaptiste Daroussin { "*L", "<Lambda>", 0x039b },
38461d06d6bSBaptiste Daroussin { "*M", "M", 0x039c },
38561d06d6bSBaptiste Daroussin { "*N", "N", 0x039d },
38661d06d6bSBaptiste Daroussin { "*C", "<Xi>", 0x039e },
38761d06d6bSBaptiste Daroussin { "*O", "O", 0x039f },
38861d06d6bSBaptiste Daroussin { "*P", "<Pi>", 0x03a0 },
38961d06d6bSBaptiste Daroussin { "*R", "P", 0x03a1 },
39061d06d6bSBaptiste Daroussin { "*S", "<Sigma>", 0x03a3 },
39161d06d6bSBaptiste Daroussin { "*T", "T", 0x03a4 },
39261d06d6bSBaptiste Daroussin { "*U", "Y", 0x03a5 },
39361d06d6bSBaptiste Daroussin { "*F", "<Phi>", 0x03a6 },
39461d06d6bSBaptiste Daroussin { "*X", "X", 0x03a7 },
39561d06d6bSBaptiste Daroussin { "*Q", "<Psi>", 0x03a8 },
39661d06d6bSBaptiste Daroussin { "*W", "<Omega>", 0x03a9 },
39761d06d6bSBaptiste Daroussin { "*a", "<alpha>", 0x03b1 },
39861d06d6bSBaptiste Daroussin { "*b", "<beta>", 0x03b2 },
39961d06d6bSBaptiste Daroussin { "*g", "<gamma>", 0x03b3 },
40061d06d6bSBaptiste Daroussin { "*d", "<delta>", 0x03b4 },
40161d06d6bSBaptiste Daroussin { "*e", "<epsilon>", 0x03b5 },
40261d06d6bSBaptiste Daroussin { "*z", "<zeta>", 0x03b6 },
40361d06d6bSBaptiste Daroussin { "*y", "<eta>", 0x03b7 },
40461d06d6bSBaptiste Daroussin { "*h", "<theta>", 0x03b8 },
40561d06d6bSBaptiste Daroussin { "*i", "<iota>", 0x03b9 },
40661d06d6bSBaptiste Daroussin { "*k", "<kappa>", 0x03ba },
40761d06d6bSBaptiste Daroussin { "*l", "<lambda>", 0x03bb },
40861d06d6bSBaptiste Daroussin { "*m", "<mu>", 0x03bc },
40961d06d6bSBaptiste Daroussin { "*n", "<nu>", 0x03bd },
41061d06d6bSBaptiste Daroussin { "*c", "<xi>", 0x03be },
41161d06d6bSBaptiste Daroussin { "*o", "o", 0x03bf },
41261d06d6bSBaptiste Daroussin { "*p", "<pi>", 0x03c0 },
41361d06d6bSBaptiste Daroussin { "*r", "<rho>", 0x03c1 },
41461d06d6bSBaptiste Daroussin { "*s", "<sigma>", 0x03c3 },
41561d06d6bSBaptiste Daroussin { "*t", "<tau>", 0x03c4 },
41661d06d6bSBaptiste Daroussin { "*u", "<upsilon>", 0x03c5 },
41761d06d6bSBaptiste Daroussin { "*f", "<phi>", 0x03d5 },
41861d06d6bSBaptiste Daroussin { "*x", "<chi>", 0x03c7 },
41961d06d6bSBaptiste Daroussin { "*q", "<psi>", 0x03c8 },
42061d06d6bSBaptiste Daroussin { "*w", "<omega>", 0x03c9 },
42161d06d6bSBaptiste Daroussin { "+h", "<theta>", 0x03d1 },
42261d06d6bSBaptiste Daroussin { "+f", "<phi>", 0x03c6 },
42361d06d6bSBaptiste Daroussin { "+p", "<pi>", 0x03d6 },
42461d06d6bSBaptiste Daroussin { "+e", "<epsilon>", 0x03f5 },
42561d06d6bSBaptiste Daroussin { "ts", "<sigma>", 0x03c2 },
42661d06d6bSBaptiste Daroussin };
42761d06d6bSBaptiste Daroussin
42861d06d6bSBaptiste Daroussin static struct ohash mchars;
42961d06d6bSBaptiste Daroussin
43061d06d6bSBaptiste Daroussin
43161d06d6bSBaptiste Daroussin void
mchars_free(void)43261d06d6bSBaptiste Daroussin mchars_free(void)
43361d06d6bSBaptiste Daroussin {
43461d06d6bSBaptiste Daroussin
43561d06d6bSBaptiste Daroussin ohash_delete(&mchars);
43661d06d6bSBaptiste Daroussin }
43761d06d6bSBaptiste Daroussin
43861d06d6bSBaptiste Daroussin void
mchars_alloc(void)43961d06d6bSBaptiste Daroussin mchars_alloc(void)
44061d06d6bSBaptiste Daroussin {
44161d06d6bSBaptiste Daroussin size_t i;
44261d06d6bSBaptiste Daroussin unsigned int slot;
44361d06d6bSBaptiste Daroussin
44461d06d6bSBaptiste Daroussin mandoc_ohash_init(&mchars, 9, offsetof(struct ln, roffcode));
44561d06d6bSBaptiste Daroussin for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++) {
44661d06d6bSBaptiste Daroussin slot = ohash_qlookup(&mchars, lines[i].roffcode);
44761d06d6bSBaptiste Daroussin assert(ohash_find(&mchars, slot) == NULL);
44861d06d6bSBaptiste Daroussin ohash_insert(&mchars, slot, lines + i);
44961d06d6bSBaptiste Daroussin }
45061d06d6bSBaptiste Daroussin }
45161d06d6bSBaptiste Daroussin
45261d06d6bSBaptiste Daroussin int
mchars_spec2cp(const char * p,size_t sz)45361d06d6bSBaptiste Daroussin mchars_spec2cp(const char *p, size_t sz)
45461d06d6bSBaptiste Daroussin {
45561d06d6bSBaptiste Daroussin const struct ln *ln;
45661d06d6bSBaptiste Daroussin const char *end;
45761d06d6bSBaptiste Daroussin
45861d06d6bSBaptiste Daroussin end = p + sz;
45961d06d6bSBaptiste Daroussin ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
4607295610fSBaptiste Daroussin return ln != NULL ? ln->unicode : -1;
46161d06d6bSBaptiste Daroussin }
46261d06d6bSBaptiste Daroussin
46361d06d6bSBaptiste Daroussin int
mchars_num2char(const char * p,size_t sz)46461d06d6bSBaptiste Daroussin mchars_num2char(const char *p, size_t sz)
46561d06d6bSBaptiste Daroussin {
46661d06d6bSBaptiste Daroussin int i;
46761d06d6bSBaptiste Daroussin
46861d06d6bSBaptiste Daroussin i = mandoc_strntoi(p, sz, 10);
46961d06d6bSBaptiste Daroussin return i >= 0 && i < 256 ? i : -1;
47061d06d6bSBaptiste Daroussin }
47161d06d6bSBaptiste Daroussin
47261d06d6bSBaptiste Daroussin int
mchars_num2uc(const char * p,size_t sz)47361d06d6bSBaptiste Daroussin mchars_num2uc(const char *p, size_t sz)
47461d06d6bSBaptiste Daroussin {
47561d06d6bSBaptiste Daroussin int i;
47661d06d6bSBaptiste Daroussin
47761d06d6bSBaptiste Daroussin i = mandoc_strntoi(p, sz, 16);
47861d06d6bSBaptiste Daroussin assert(i >= 0 && i <= 0x10FFFF);
47961d06d6bSBaptiste Daroussin return i;
48061d06d6bSBaptiste Daroussin }
48161d06d6bSBaptiste Daroussin
48261d06d6bSBaptiste Daroussin const char *
mchars_spec2str(const char * p,size_t sz,size_t * rsz)48361d06d6bSBaptiste Daroussin mchars_spec2str(const char *p, size_t sz, size_t *rsz)
48461d06d6bSBaptiste Daroussin {
48561d06d6bSBaptiste Daroussin const struct ln *ln;
48661d06d6bSBaptiste Daroussin const char *end;
48761d06d6bSBaptiste Daroussin
48861d06d6bSBaptiste Daroussin end = p + sz;
48961d06d6bSBaptiste Daroussin ln = ohash_find(&mchars, ohash_qlookupi(&mchars, p, &end));
4907295610fSBaptiste Daroussin if (ln == NULL)
4917295610fSBaptiste Daroussin return NULL;
49261d06d6bSBaptiste Daroussin
49361d06d6bSBaptiste Daroussin *rsz = strlen(ln->ascii);
49461d06d6bSBaptiste Daroussin return ln->ascii;
49561d06d6bSBaptiste Daroussin }
49661d06d6bSBaptiste Daroussin
49761d06d6bSBaptiste Daroussin const char *
mchars_uc2str(int uc)49861d06d6bSBaptiste Daroussin mchars_uc2str(int uc)
49961d06d6bSBaptiste Daroussin {
50061d06d6bSBaptiste Daroussin size_t i;
50161d06d6bSBaptiste Daroussin
50261d06d6bSBaptiste Daroussin for (i = 0; i < sizeof(lines)/sizeof(lines[0]); i++)
50361d06d6bSBaptiste Daroussin if (uc == lines[i].unicode)
50461d06d6bSBaptiste Daroussin return lines[i].ascii;
50561d06d6bSBaptiste Daroussin return "<?>";
50661d06d6bSBaptiste Daroussin }
507