1*6d38604fSBaptiste Daroussin /* $Id: mandoc.c,v 1.119 2021/08/10 12:55:03 schwarze Exp $ */ 261d06d6bSBaptiste Daroussin /* 361d06d6bSBaptiste Daroussin * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4*6d38604fSBaptiste Daroussin * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org> 561d06d6bSBaptiste Daroussin * 661d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any 761d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above 861d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies. 961d06d6bSBaptiste Daroussin * 1061d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 1161d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1261d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 1361d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1461d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1561d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1661d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1761d06d6bSBaptiste Daroussin */ 1861d06d6bSBaptiste Daroussin #include "config.h" 1961d06d6bSBaptiste Daroussin 2061d06d6bSBaptiste Daroussin #include <sys/types.h> 2161d06d6bSBaptiste Daroussin 2261d06d6bSBaptiste Daroussin #include <assert.h> 2361d06d6bSBaptiste Daroussin #include <ctype.h> 2461d06d6bSBaptiste Daroussin #include <errno.h> 2561d06d6bSBaptiste Daroussin #include <limits.h> 2661d06d6bSBaptiste Daroussin #include <stdlib.h> 2761d06d6bSBaptiste Daroussin #include <stdio.h> 2861d06d6bSBaptiste Daroussin #include <string.h> 2961d06d6bSBaptiste Daroussin #include <time.h> 3061d06d6bSBaptiste Daroussin 3161d06d6bSBaptiste Daroussin #include "mandoc_aux.h" 3261d06d6bSBaptiste Daroussin #include "mandoc.h" 3361d06d6bSBaptiste Daroussin #include "roff.h" 3461d06d6bSBaptiste Daroussin #include "libmandoc.h" 357295610fSBaptiste Daroussin #include "roff_int.h" 3661d06d6bSBaptiste Daroussin 3761d06d6bSBaptiste Daroussin static int a2time(time_t *, const char *, const char *); 3861d06d6bSBaptiste Daroussin static char *time2a(time_t); 3961d06d6bSBaptiste Daroussin 4061d06d6bSBaptiste Daroussin 4161d06d6bSBaptiste Daroussin enum mandoc_esc 427295610fSBaptiste Daroussin mandoc_font(const char *cp, int sz) 437295610fSBaptiste Daroussin { 447295610fSBaptiste Daroussin switch (sz) { 457295610fSBaptiste Daroussin case 0: 467295610fSBaptiste Daroussin return ESCAPE_FONTPREV; 477295610fSBaptiste Daroussin case 1: 487295610fSBaptiste Daroussin switch (cp[0]) { 497295610fSBaptiste Daroussin case 'B': 507295610fSBaptiste Daroussin case '3': 517295610fSBaptiste Daroussin return ESCAPE_FONTBOLD; 527295610fSBaptiste Daroussin case 'I': 537295610fSBaptiste Daroussin case '2': 547295610fSBaptiste Daroussin return ESCAPE_FONTITALIC; 557295610fSBaptiste Daroussin case 'P': 567295610fSBaptiste Daroussin return ESCAPE_FONTPREV; 577295610fSBaptiste Daroussin case 'R': 587295610fSBaptiste Daroussin case '1': 597295610fSBaptiste Daroussin return ESCAPE_FONTROMAN; 607295610fSBaptiste Daroussin case '4': 617295610fSBaptiste Daroussin return ESCAPE_FONTBI; 627295610fSBaptiste Daroussin default: 637295610fSBaptiste Daroussin return ESCAPE_ERROR; 647295610fSBaptiste Daroussin } 657295610fSBaptiste Daroussin case 2: 667295610fSBaptiste Daroussin switch (cp[0]) { 677295610fSBaptiste Daroussin case 'B': 687295610fSBaptiste Daroussin switch (cp[1]) { 697295610fSBaptiste Daroussin case 'I': 707295610fSBaptiste Daroussin return ESCAPE_FONTBI; 717295610fSBaptiste Daroussin default: 727295610fSBaptiste Daroussin return ESCAPE_ERROR; 737295610fSBaptiste Daroussin } 747295610fSBaptiste Daroussin case 'C': 757295610fSBaptiste Daroussin switch (cp[1]) { 767295610fSBaptiste Daroussin case 'B': 77*6d38604fSBaptiste Daroussin return ESCAPE_FONTCB; 787295610fSBaptiste Daroussin case 'I': 79*6d38604fSBaptiste Daroussin return ESCAPE_FONTCI; 807295610fSBaptiste Daroussin case 'R': 817295610fSBaptiste Daroussin case 'W': 82*6d38604fSBaptiste Daroussin return ESCAPE_FONTCR; 837295610fSBaptiste Daroussin default: 847295610fSBaptiste Daroussin return ESCAPE_ERROR; 857295610fSBaptiste Daroussin } 867295610fSBaptiste Daroussin default: 877295610fSBaptiste Daroussin return ESCAPE_ERROR; 887295610fSBaptiste Daroussin } 897295610fSBaptiste Daroussin default: 907295610fSBaptiste Daroussin return ESCAPE_ERROR; 917295610fSBaptiste Daroussin } 927295610fSBaptiste Daroussin } 937295610fSBaptiste Daroussin 947295610fSBaptiste Daroussin enum mandoc_esc 9561d06d6bSBaptiste Daroussin mandoc_escape(const char **end, const char **start, int *sz) 9661d06d6bSBaptiste Daroussin { 9761d06d6bSBaptiste Daroussin const char *local_start; 987295610fSBaptiste Daroussin int local_sz, c, i; 9961d06d6bSBaptiste Daroussin char term; 10061d06d6bSBaptiste Daroussin enum mandoc_esc gly; 10161d06d6bSBaptiste Daroussin 10261d06d6bSBaptiste Daroussin /* 10361d06d6bSBaptiste Daroussin * When the caller doesn't provide return storage, 10461d06d6bSBaptiste Daroussin * use local storage. 10561d06d6bSBaptiste Daroussin */ 10661d06d6bSBaptiste Daroussin 10761d06d6bSBaptiste Daroussin if (NULL == start) 10861d06d6bSBaptiste Daroussin start = &local_start; 10961d06d6bSBaptiste Daroussin if (NULL == sz) 11061d06d6bSBaptiste Daroussin sz = &local_sz; 11161d06d6bSBaptiste Daroussin 11261d06d6bSBaptiste Daroussin /* 1137295610fSBaptiste Daroussin * Treat "\E" just like "\"; 1147295610fSBaptiste Daroussin * it only makes a difference in copy mode. 1157295610fSBaptiste Daroussin */ 1167295610fSBaptiste Daroussin 1177295610fSBaptiste Daroussin if (**end == 'E') 1187295610fSBaptiste Daroussin ++*end; 1197295610fSBaptiste Daroussin 1207295610fSBaptiste Daroussin /* 12161d06d6bSBaptiste Daroussin * Beyond the backslash, at least one input character 12261d06d6bSBaptiste Daroussin * is part of the escape sequence. With one exception 12361d06d6bSBaptiste Daroussin * (see below), that character won't be returned. 12461d06d6bSBaptiste Daroussin */ 12561d06d6bSBaptiste Daroussin 12661d06d6bSBaptiste Daroussin gly = ESCAPE_ERROR; 12761d06d6bSBaptiste Daroussin *start = ++*end; 12861d06d6bSBaptiste Daroussin *sz = 0; 12961d06d6bSBaptiste Daroussin term = '\0'; 13061d06d6bSBaptiste Daroussin 13161d06d6bSBaptiste Daroussin switch ((*start)[-1]) { 13261d06d6bSBaptiste Daroussin /* 13361d06d6bSBaptiste Daroussin * First the glyphs. There are several different forms of 13461d06d6bSBaptiste Daroussin * these, but each eventually returns a substring of the glyph 13561d06d6bSBaptiste Daroussin * name. 13661d06d6bSBaptiste Daroussin */ 13761d06d6bSBaptiste Daroussin case '(': 13861d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 13961d06d6bSBaptiste Daroussin *sz = 2; 14061d06d6bSBaptiste Daroussin break; 14161d06d6bSBaptiste Daroussin case '[': 1427295610fSBaptiste Daroussin if (**start == ' ') { 1437295610fSBaptiste Daroussin ++*end; 1447295610fSBaptiste Daroussin return ESCAPE_ERROR; 1457295610fSBaptiste Daroussin } 14661d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 14761d06d6bSBaptiste Daroussin term = ']'; 14861d06d6bSBaptiste Daroussin break; 14961d06d6bSBaptiste Daroussin case 'C': 15061d06d6bSBaptiste Daroussin if ('\'' != **start) 15161d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 15261d06d6bSBaptiste Daroussin *start = ++*end; 15361d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 15461d06d6bSBaptiste Daroussin term = '\''; 15561d06d6bSBaptiste Daroussin break; 15661d06d6bSBaptiste Daroussin 15761d06d6bSBaptiste Daroussin /* 15861d06d6bSBaptiste Daroussin * Escapes taking no arguments at all. 15961d06d6bSBaptiste Daroussin */ 1607295610fSBaptiste Daroussin case '!': 1617295610fSBaptiste Daroussin case '?': 1627295610fSBaptiste Daroussin return ESCAPE_UNSUPP; 1637295610fSBaptiste Daroussin case '%': 1647295610fSBaptiste Daroussin case '&': 1657295610fSBaptiste Daroussin case ')': 16661d06d6bSBaptiste Daroussin case ',': 16761d06d6bSBaptiste Daroussin case '/': 1687295610fSBaptiste Daroussin case '^': 1697295610fSBaptiste Daroussin case 'a': 1707295610fSBaptiste Daroussin case 'd': 1717295610fSBaptiste Daroussin case 'r': 1727295610fSBaptiste Daroussin case 't': 1737295610fSBaptiste Daroussin case 'u': 1747295610fSBaptiste Daroussin case '{': 1757295610fSBaptiste Daroussin case '|': 1767295610fSBaptiste Daroussin case '}': 17761d06d6bSBaptiste Daroussin return ESCAPE_IGNORE; 1787295610fSBaptiste Daroussin case 'c': 1797295610fSBaptiste Daroussin return ESCAPE_NOSPACE; 18061d06d6bSBaptiste Daroussin case 'p': 18161d06d6bSBaptiste Daroussin return ESCAPE_BREAK; 18261d06d6bSBaptiste Daroussin 18361d06d6bSBaptiste Daroussin /* 18461d06d6bSBaptiste Daroussin * The \z escape is supposed to output the following 18561d06d6bSBaptiste Daroussin * character without advancing the cursor position. 18661d06d6bSBaptiste Daroussin * Since we are mostly dealing with terminal mode, 18761d06d6bSBaptiste Daroussin * let us just skip the next character. 18861d06d6bSBaptiste Daroussin */ 18961d06d6bSBaptiste Daroussin case 'z': 19061d06d6bSBaptiste Daroussin return ESCAPE_SKIPCHAR; 19161d06d6bSBaptiste Daroussin 19261d06d6bSBaptiste Daroussin /* 19361d06d6bSBaptiste Daroussin * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where 19461d06d6bSBaptiste Daroussin * 'X' is the trigger. These have opaque sub-strings. 19561d06d6bSBaptiste Daroussin */ 19661d06d6bSBaptiste Daroussin case 'F': 1977295610fSBaptiste Daroussin case 'f': 19861d06d6bSBaptiste Daroussin case 'g': 19961d06d6bSBaptiste Daroussin case 'k': 20061d06d6bSBaptiste Daroussin case 'M': 20161d06d6bSBaptiste Daroussin case 'm': 20261d06d6bSBaptiste Daroussin case 'n': 2037295610fSBaptiste Daroussin case 'O': 20461d06d6bSBaptiste Daroussin case 'V': 20561d06d6bSBaptiste Daroussin case 'Y': 206*6d38604fSBaptiste Daroussin case '*': 207*6d38604fSBaptiste Daroussin switch ((*start)[-1]) { 208*6d38604fSBaptiste Daroussin case 'f': 209*6d38604fSBaptiste Daroussin gly = ESCAPE_FONT; 210*6d38604fSBaptiste Daroussin break; 211*6d38604fSBaptiste Daroussin case '*': 212*6d38604fSBaptiste Daroussin gly = ESCAPE_DEVICE; 213*6d38604fSBaptiste Daroussin break; 214*6d38604fSBaptiste Daroussin default: 215*6d38604fSBaptiste Daroussin gly = ESCAPE_IGNORE; 216*6d38604fSBaptiste Daroussin break; 217*6d38604fSBaptiste Daroussin } 21861d06d6bSBaptiste Daroussin switch (**start) { 21961d06d6bSBaptiste Daroussin case '(': 2207295610fSBaptiste Daroussin if ((*start)[-1] == 'O') 2217295610fSBaptiste Daroussin gly = ESCAPE_ERROR; 22261d06d6bSBaptiste Daroussin *start = ++*end; 22361d06d6bSBaptiste Daroussin *sz = 2; 22461d06d6bSBaptiste Daroussin break; 22561d06d6bSBaptiste Daroussin case '[': 2267295610fSBaptiste Daroussin if ((*start)[-1] == 'O') 2277295610fSBaptiste Daroussin gly = (*start)[1] == '5' ? 2287295610fSBaptiste Daroussin ESCAPE_UNSUPP : ESCAPE_ERROR; 22961d06d6bSBaptiste Daroussin *start = ++*end; 23061d06d6bSBaptiste Daroussin term = ']'; 23161d06d6bSBaptiste Daroussin break; 23261d06d6bSBaptiste Daroussin default: 2337295610fSBaptiste Daroussin if ((*start)[-1] == 'O') { 2347295610fSBaptiste Daroussin switch (**start) { 2357295610fSBaptiste Daroussin case '0': 2367295610fSBaptiste Daroussin gly = ESCAPE_UNSUPP; 2377295610fSBaptiste Daroussin break; 2387295610fSBaptiste Daroussin case '1': 2397295610fSBaptiste Daroussin case '2': 2407295610fSBaptiste Daroussin case '3': 2417295610fSBaptiste Daroussin case '4': 2427295610fSBaptiste Daroussin break; 2437295610fSBaptiste Daroussin default: 2447295610fSBaptiste Daroussin gly = ESCAPE_ERROR; 2457295610fSBaptiste Daroussin break; 2467295610fSBaptiste Daroussin } 2477295610fSBaptiste Daroussin } 24861d06d6bSBaptiste Daroussin *sz = 1; 24961d06d6bSBaptiste Daroussin break; 25061d06d6bSBaptiste Daroussin } 25161d06d6bSBaptiste Daroussin break; 25261d06d6bSBaptiste Daroussin 25361d06d6bSBaptiste Daroussin /* 25461d06d6bSBaptiste Daroussin * These escapes are of the form \X'Y', where 'X' is the trigger 25561d06d6bSBaptiste Daroussin * and 'Y' is any string. These have opaque sub-strings. 25661d06d6bSBaptiste Daroussin * The \B and \w escapes are handled in roff.c, roff_res(). 25761d06d6bSBaptiste Daroussin */ 25861d06d6bSBaptiste Daroussin case 'A': 25961d06d6bSBaptiste Daroussin case 'b': 26061d06d6bSBaptiste Daroussin case 'D': 26161d06d6bSBaptiste Daroussin case 'R': 26261d06d6bSBaptiste Daroussin case 'X': 26361d06d6bSBaptiste Daroussin case 'Z': 26461d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 26561d06d6bSBaptiste Daroussin /* FALLTHROUGH */ 26661d06d6bSBaptiste Daroussin case 'o': 26761d06d6bSBaptiste Daroussin if (**start == '\0') 26861d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 26961d06d6bSBaptiste Daroussin if (gly == ESCAPE_ERROR) 27061d06d6bSBaptiste Daroussin gly = ESCAPE_OVERSTRIKE; 27161d06d6bSBaptiste Daroussin term = **start; 27261d06d6bSBaptiste Daroussin *start = ++*end; 27361d06d6bSBaptiste Daroussin break; 27461d06d6bSBaptiste Daroussin 27561d06d6bSBaptiste Daroussin /* 27661d06d6bSBaptiste Daroussin * These escapes are of the form \X'N', where 'X' is the trigger 27761d06d6bSBaptiste Daroussin * and 'N' resolves to a numerical expression. 27861d06d6bSBaptiste Daroussin */ 27961d06d6bSBaptiste Daroussin case 'h': 28061d06d6bSBaptiste Daroussin case 'H': 28161d06d6bSBaptiste Daroussin case 'L': 28261d06d6bSBaptiste Daroussin case 'l': 28361d06d6bSBaptiste Daroussin case 'S': 28461d06d6bSBaptiste Daroussin case 'v': 28561d06d6bSBaptiste Daroussin case 'x': 28661d06d6bSBaptiste Daroussin if (strchr(" %&()*+-./0123456789:<=>", **start)) { 28761d06d6bSBaptiste Daroussin if ('\0' != **start) 28861d06d6bSBaptiste Daroussin ++*end; 28961d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 29061d06d6bSBaptiste Daroussin } 29161d06d6bSBaptiste Daroussin switch ((*start)[-1]) { 29261d06d6bSBaptiste Daroussin case 'h': 29361d06d6bSBaptiste Daroussin gly = ESCAPE_HORIZ; 29461d06d6bSBaptiste Daroussin break; 29561d06d6bSBaptiste Daroussin case 'l': 29661d06d6bSBaptiste Daroussin gly = ESCAPE_HLINE; 29761d06d6bSBaptiste Daroussin break; 29861d06d6bSBaptiste Daroussin default: 29961d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 30061d06d6bSBaptiste Daroussin break; 30161d06d6bSBaptiste Daroussin } 30261d06d6bSBaptiste Daroussin term = **start; 30361d06d6bSBaptiste Daroussin *start = ++*end; 30461d06d6bSBaptiste Daroussin break; 30561d06d6bSBaptiste Daroussin 30661d06d6bSBaptiste Daroussin /* 30761d06d6bSBaptiste Daroussin * Special handling for the numbered character escape. 30861d06d6bSBaptiste Daroussin * XXX Do any other escapes need similar handling? 30961d06d6bSBaptiste Daroussin */ 31061d06d6bSBaptiste Daroussin case 'N': 31161d06d6bSBaptiste Daroussin if ('\0' == **start) 31261d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 31361d06d6bSBaptiste Daroussin (*end)++; 31461d06d6bSBaptiste Daroussin if (isdigit((unsigned char)**start)) { 31561d06d6bSBaptiste Daroussin *sz = 1; 31661d06d6bSBaptiste Daroussin return ESCAPE_IGNORE; 31761d06d6bSBaptiste Daroussin } 31861d06d6bSBaptiste Daroussin (*start)++; 31961d06d6bSBaptiste Daroussin while (isdigit((unsigned char)**end)) 32061d06d6bSBaptiste Daroussin (*end)++; 32161d06d6bSBaptiste Daroussin *sz = *end - *start; 32261d06d6bSBaptiste Daroussin if ('\0' != **end) 32361d06d6bSBaptiste Daroussin (*end)++; 32461d06d6bSBaptiste Daroussin return ESCAPE_NUMBERED; 32561d06d6bSBaptiste Daroussin 32661d06d6bSBaptiste Daroussin /* 32761d06d6bSBaptiste Daroussin * Sizes get a special category of their own. 32861d06d6bSBaptiste Daroussin */ 32961d06d6bSBaptiste Daroussin case 's': 33061d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 33161d06d6bSBaptiste Daroussin 33261d06d6bSBaptiste Daroussin /* See +/- counts as a sign. */ 33361d06d6bSBaptiste Daroussin if ('+' == **end || '-' == **end || ASCII_HYPH == **end) 33461d06d6bSBaptiste Daroussin *start = ++*end; 33561d06d6bSBaptiste Daroussin 33661d06d6bSBaptiste Daroussin switch (**end) { 33761d06d6bSBaptiste Daroussin case '(': 33861d06d6bSBaptiste Daroussin *start = ++*end; 33961d06d6bSBaptiste Daroussin *sz = 2; 34061d06d6bSBaptiste Daroussin break; 34161d06d6bSBaptiste Daroussin case '[': 34261d06d6bSBaptiste Daroussin *start = ++*end; 34361d06d6bSBaptiste Daroussin term = ']'; 34461d06d6bSBaptiste Daroussin break; 34561d06d6bSBaptiste Daroussin case '\'': 34661d06d6bSBaptiste Daroussin *start = ++*end; 34761d06d6bSBaptiste Daroussin term = '\''; 34861d06d6bSBaptiste Daroussin break; 34961d06d6bSBaptiste Daroussin case '3': 35061d06d6bSBaptiste Daroussin case '2': 35161d06d6bSBaptiste Daroussin case '1': 35261d06d6bSBaptiste Daroussin *sz = (*end)[-1] == 's' && 35361d06d6bSBaptiste Daroussin isdigit((unsigned char)(*end)[1]) ? 2 : 1; 35461d06d6bSBaptiste Daroussin break; 35561d06d6bSBaptiste Daroussin default: 35661d06d6bSBaptiste Daroussin *sz = 1; 35761d06d6bSBaptiste Daroussin break; 35861d06d6bSBaptiste Daroussin } 35961d06d6bSBaptiste Daroussin 36061d06d6bSBaptiste Daroussin break; 36161d06d6bSBaptiste Daroussin 36261d06d6bSBaptiste Daroussin /* 3637295610fSBaptiste Daroussin * Several special characters can be encoded as 3647295610fSBaptiste Daroussin * one-byte escape sequences without using \[]. 36561d06d6bSBaptiste Daroussin */ 3667295610fSBaptiste Daroussin case ' ': 3677295610fSBaptiste Daroussin case '\'': 3687295610fSBaptiste Daroussin case '-': 3697295610fSBaptiste Daroussin case '.': 3707295610fSBaptiste Daroussin case '0': 3717295610fSBaptiste Daroussin case ':': 3727295610fSBaptiste Daroussin case '_': 3737295610fSBaptiste Daroussin case '`': 3747295610fSBaptiste Daroussin case 'e': 3757295610fSBaptiste Daroussin case '~': 37661d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 3777295610fSBaptiste Daroussin /* FALLTHROUGH */ 3787295610fSBaptiste Daroussin default: 3797295610fSBaptiste Daroussin if (gly == ESCAPE_ERROR) 3807295610fSBaptiste Daroussin gly = ESCAPE_UNDEF; 38161d06d6bSBaptiste Daroussin *start = --*end; 38261d06d6bSBaptiste Daroussin *sz = 1; 38361d06d6bSBaptiste Daroussin break; 38461d06d6bSBaptiste Daroussin } 38561d06d6bSBaptiste Daroussin 38661d06d6bSBaptiste Daroussin /* 38761d06d6bSBaptiste Daroussin * Read up to the terminating character, 38861d06d6bSBaptiste Daroussin * paying attention to nested escapes. 38961d06d6bSBaptiste Daroussin */ 39061d06d6bSBaptiste Daroussin 39161d06d6bSBaptiste Daroussin if ('\0' != term) { 39261d06d6bSBaptiste Daroussin while (**end != term) { 39361d06d6bSBaptiste Daroussin switch (**end) { 39461d06d6bSBaptiste Daroussin case '\0': 39561d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 39661d06d6bSBaptiste Daroussin case '\\': 39761d06d6bSBaptiste Daroussin (*end)++; 39861d06d6bSBaptiste Daroussin if (ESCAPE_ERROR == 39961d06d6bSBaptiste Daroussin mandoc_escape(end, NULL, NULL)) 40061d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 40161d06d6bSBaptiste Daroussin break; 40261d06d6bSBaptiste Daroussin default: 40361d06d6bSBaptiste Daroussin (*end)++; 40461d06d6bSBaptiste Daroussin break; 40561d06d6bSBaptiste Daroussin } 40661d06d6bSBaptiste Daroussin } 40761d06d6bSBaptiste Daroussin *sz = (*end)++ - *start; 4087295610fSBaptiste Daroussin 4097295610fSBaptiste Daroussin /* 4107295610fSBaptiste Daroussin * The file chars.c only provides one common list 4117295610fSBaptiste Daroussin * of character names, but \[-] == \- is the only 4127295610fSBaptiste Daroussin * one of the characters with one-byte names that 4137295610fSBaptiste Daroussin * allows enclosing the name in brackets. 4147295610fSBaptiste Daroussin */ 4157295610fSBaptiste Daroussin if (gly == ESCAPE_SPECIAL && *sz == 1 && **start != '-') 4167295610fSBaptiste Daroussin return ESCAPE_ERROR; 41761d06d6bSBaptiste Daroussin } else { 41861d06d6bSBaptiste Daroussin assert(*sz > 0); 41961d06d6bSBaptiste Daroussin if ((size_t)*sz > strlen(*start)) 42061d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 42161d06d6bSBaptiste Daroussin *end += *sz; 42261d06d6bSBaptiste Daroussin } 42361d06d6bSBaptiste Daroussin 42461d06d6bSBaptiste Daroussin /* Run post-processors. */ 42561d06d6bSBaptiste Daroussin 42661d06d6bSBaptiste Daroussin switch (gly) { 42761d06d6bSBaptiste Daroussin case ESCAPE_FONT: 4287295610fSBaptiste Daroussin gly = mandoc_font(*start, *sz); 42961d06d6bSBaptiste Daroussin break; 43061d06d6bSBaptiste Daroussin case ESCAPE_SPECIAL: 4317295610fSBaptiste Daroussin if (**start == 'c') { 4327295610fSBaptiste Daroussin if (*sz < 6 || *sz > 7 || 4337295610fSBaptiste Daroussin strncmp(*start, "char", 4) != 0 || 4347295610fSBaptiste Daroussin (int)strspn(*start + 4, "0123456789") + 4 < *sz) 4357295610fSBaptiste Daroussin break; 4367295610fSBaptiste Daroussin c = 0; 4377295610fSBaptiste Daroussin for (i = 4; i < *sz; i++) 4387295610fSBaptiste Daroussin c = 10 * c + ((*start)[i] - '0'); 4397295610fSBaptiste Daroussin if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) 4407295610fSBaptiste Daroussin break; 4417295610fSBaptiste Daroussin *start += 4; 4427295610fSBaptiste Daroussin *sz -= 4; 4437295610fSBaptiste Daroussin gly = ESCAPE_NUMBERED; 4447295610fSBaptiste Daroussin break; 4457295610fSBaptiste Daroussin } 4467295610fSBaptiste Daroussin 44761d06d6bSBaptiste Daroussin /* 44861d06d6bSBaptiste Daroussin * Unicode escapes are defined in groff as \[u0000] 44961d06d6bSBaptiste Daroussin * to \[u10FFFF], where the contained value must be 45061d06d6bSBaptiste Daroussin * a valid Unicode codepoint. Here, however, only 45161d06d6bSBaptiste Daroussin * check the length and range. 45261d06d6bSBaptiste Daroussin */ 45361d06d6bSBaptiste Daroussin if (**start != 'u' || *sz < 5 || *sz > 7) 45461d06d6bSBaptiste Daroussin break; 45561d06d6bSBaptiste Daroussin if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) 45661d06d6bSBaptiste Daroussin break; 45761d06d6bSBaptiste Daroussin if (*sz == 6 && (*start)[1] == '0') 45861d06d6bSBaptiste Daroussin break; 45961d06d6bSBaptiste Daroussin if (*sz == 5 && (*start)[1] == 'D' && 46061d06d6bSBaptiste Daroussin strchr("89ABCDEF", (*start)[2]) != NULL) 46161d06d6bSBaptiste Daroussin break; 46261d06d6bSBaptiste Daroussin if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") 46361d06d6bSBaptiste Daroussin + 1 == *sz) 46461d06d6bSBaptiste Daroussin gly = ESCAPE_UNICODE; 46561d06d6bSBaptiste Daroussin break; 466*6d38604fSBaptiste Daroussin case ESCAPE_DEVICE: 467*6d38604fSBaptiste Daroussin assert(*sz == 2 && (*start)[0] == '.' && (*start)[1] == 'T'); 468*6d38604fSBaptiste Daroussin break; 46961d06d6bSBaptiste Daroussin default: 47061d06d6bSBaptiste Daroussin break; 47161d06d6bSBaptiste Daroussin } 47261d06d6bSBaptiste Daroussin 47361d06d6bSBaptiste Daroussin return gly; 47461d06d6bSBaptiste Daroussin } 47561d06d6bSBaptiste Daroussin 47661d06d6bSBaptiste Daroussin static int 47761d06d6bSBaptiste Daroussin a2time(time_t *t, const char *fmt, const char *p) 47861d06d6bSBaptiste Daroussin { 47961d06d6bSBaptiste Daroussin struct tm tm; 48061d06d6bSBaptiste Daroussin char *pp; 48161d06d6bSBaptiste Daroussin 48261d06d6bSBaptiste Daroussin memset(&tm, 0, sizeof(struct tm)); 48361d06d6bSBaptiste Daroussin 48461d06d6bSBaptiste Daroussin pp = NULL; 48561d06d6bSBaptiste Daroussin #if HAVE_STRPTIME 48661d06d6bSBaptiste Daroussin pp = strptime(p, fmt, &tm); 48761d06d6bSBaptiste Daroussin #endif 48861d06d6bSBaptiste Daroussin if (NULL != pp && '\0' == *pp) { 48961d06d6bSBaptiste Daroussin *t = mktime(&tm); 49061d06d6bSBaptiste Daroussin return 1; 49161d06d6bSBaptiste Daroussin } 49261d06d6bSBaptiste Daroussin 49361d06d6bSBaptiste Daroussin return 0; 49461d06d6bSBaptiste Daroussin } 49561d06d6bSBaptiste Daroussin 49661d06d6bSBaptiste Daroussin static char * 49761d06d6bSBaptiste Daroussin time2a(time_t t) 49861d06d6bSBaptiste Daroussin { 49961d06d6bSBaptiste Daroussin struct tm *tm; 50061d06d6bSBaptiste Daroussin char *buf, *p; 50161d06d6bSBaptiste Daroussin size_t ssz; 50261d06d6bSBaptiste Daroussin int isz; 50361d06d6bSBaptiste Daroussin 50445a5aec3SBaptiste Daroussin buf = NULL; 50561d06d6bSBaptiste Daroussin tm = localtime(&t); 50661d06d6bSBaptiste Daroussin if (tm == NULL) 50745a5aec3SBaptiste Daroussin goto fail; 50861d06d6bSBaptiste Daroussin 50961d06d6bSBaptiste Daroussin /* 51061d06d6bSBaptiste Daroussin * Reserve space: 51161d06d6bSBaptiste Daroussin * up to 9 characters for the month (September) + blank 51261d06d6bSBaptiste Daroussin * up to 2 characters for the day + comma + blank 51361d06d6bSBaptiste Daroussin * 4 characters for the year and a terminating '\0' 51461d06d6bSBaptiste Daroussin */ 51561d06d6bSBaptiste Daroussin 51661d06d6bSBaptiste Daroussin p = buf = mandoc_malloc(10 + 4 + 4 + 1); 51761d06d6bSBaptiste Daroussin 51861d06d6bSBaptiste Daroussin if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0) 51961d06d6bSBaptiste Daroussin goto fail; 52061d06d6bSBaptiste Daroussin p += (int)ssz; 52161d06d6bSBaptiste Daroussin 52261d06d6bSBaptiste Daroussin /* 52361d06d6bSBaptiste Daroussin * The output format is just "%d" here, not "%2d" or "%02d". 52461d06d6bSBaptiste Daroussin * That's also the reason why we can't just format the 52561d06d6bSBaptiste Daroussin * date as a whole with "%B %e, %Y" or "%B %d, %Y". 52661d06d6bSBaptiste Daroussin * Besides, the present approach is less prone to buffer 52761d06d6bSBaptiste Daroussin * overflows, in case anybody should ever introduce the bug 52861d06d6bSBaptiste Daroussin * of looking at LC_TIME. 52961d06d6bSBaptiste Daroussin */ 53061d06d6bSBaptiste Daroussin 53145a5aec3SBaptiste Daroussin isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday); 53245a5aec3SBaptiste Daroussin if (isz < 0 || isz > 4) 53361d06d6bSBaptiste Daroussin goto fail; 53461d06d6bSBaptiste Daroussin p += isz; 53561d06d6bSBaptiste Daroussin 53661d06d6bSBaptiste Daroussin if (strftime(p, 4 + 1, "%Y", tm) == 0) 53761d06d6bSBaptiste Daroussin goto fail; 53861d06d6bSBaptiste Daroussin return buf; 53961d06d6bSBaptiste Daroussin 54061d06d6bSBaptiste Daroussin fail: 54161d06d6bSBaptiste Daroussin free(buf); 54245a5aec3SBaptiste Daroussin return mandoc_strdup(""); 54361d06d6bSBaptiste Daroussin } 54461d06d6bSBaptiste Daroussin 54561d06d6bSBaptiste Daroussin char * 546*6d38604fSBaptiste Daroussin mandoc_normdate(struct roff_node *nch, struct roff_node *nbl) 54761d06d6bSBaptiste Daroussin { 54861d06d6bSBaptiste Daroussin char *cp; 54961d06d6bSBaptiste Daroussin time_t t; 55061d06d6bSBaptiste Daroussin 551*6d38604fSBaptiste Daroussin /* No date specified. */ 55245a5aec3SBaptiste Daroussin 553*6d38604fSBaptiste Daroussin if (nch == NULL) { 554*6d38604fSBaptiste Daroussin if (nbl == NULL) 555*6d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL); 556*6d38604fSBaptiste Daroussin else 557*6d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line, 558*6d38604fSBaptiste Daroussin nbl->pos, "%s", roff_name[nbl->tok]); 559*6d38604fSBaptiste Daroussin return mandoc_strdup(""); 560*6d38604fSBaptiste Daroussin } 561*6d38604fSBaptiste Daroussin if (*nch->string == '\0') { 562*6d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_MISSING, nch->line, 563*6d38604fSBaptiste Daroussin nch->pos, "%s", roff_name[nbl->tok]); 564*6d38604fSBaptiste Daroussin return mandoc_strdup(""); 565*6d38604fSBaptiste Daroussin } 566*6d38604fSBaptiste Daroussin if (strcmp(nch->string, "$" "Mdocdate$") == 0) 56761d06d6bSBaptiste Daroussin return time2a(time(NULL)); 56861d06d6bSBaptiste Daroussin 56961d06d6bSBaptiste Daroussin /* Valid mdoc(7) date format. */ 57061d06d6bSBaptiste Daroussin 571*6d38604fSBaptiste Daroussin if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) || 572*6d38604fSBaptiste Daroussin a2time(&t, "%b %d, %Y", nch->string)) { 57361d06d6bSBaptiste Daroussin cp = time2a(t); 57461d06d6bSBaptiste Daroussin if (t > time(NULL) + 86400) 575*6d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, 576*6d38604fSBaptiste Daroussin nch->pos, "%s %s", roff_name[nbl->tok], cp); 577*6d38604fSBaptiste Daroussin else if (*nch->string != '$' && 578*6d38604fSBaptiste Daroussin strcmp(nch->string, cp) != 0) 579*6d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_NORM, nch->line, 580*6d38604fSBaptiste Daroussin nch->pos, "%s %s", roff_name[nbl->tok], cp); 58161d06d6bSBaptiste Daroussin return cp; 58261d06d6bSBaptiste Daroussin } 58361d06d6bSBaptiste Daroussin 58461d06d6bSBaptiste Daroussin /* In man(7), do not warn about the legacy format. */ 58561d06d6bSBaptiste Daroussin 586*6d38604fSBaptiste Daroussin if (a2time(&t, "%Y-%m-%d", nch->string) == 0) 587*6d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos, 588*6d38604fSBaptiste Daroussin "%s %s", roff_name[nbl->tok], nch->string); 58961d06d6bSBaptiste Daroussin else if (t > time(NULL) + 86400) 590*6d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos, 591*6d38604fSBaptiste Daroussin "%s %s", roff_name[nbl->tok], nch->string); 592*6d38604fSBaptiste Daroussin else if (nbl->tok == MDOC_Dd) 593*6d38604fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos, 594*6d38604fSBaptiste Daroussin "Dd %s", nch->string); 59561d06d6bSBaptiste Daroussin 59661d06d6bSBaptiste Daroussin /* Use any non-mdoc(7) date verbatim. */ 59761d06d6bSBaptiste Daroussin 598*6d38604fSBaptiste Daroussin return mandoc_strdup(nch->string); 59961d06d6bSBaptiste Daroussin } 60061d06d6bSBaptiste Daroussin 60161d06d6bSBaptiste Daroussin int 60261d06d6bSBaptiste Daroussin mandoc_eos(const char *p, size_t sz) 60361d06d6bSBaptiste Daroussin { 60461d06d6bSBaptiste Daroussin const char *q; 60561d06d6bSBaptiste Daroussin int enclosed, found; 60661d06d6bSBaptiste Daroussin 60761d06d6bSBaptiste Daroussin if (0 == sz) 60861d06d6bSBaptiste Daroussin return 0; 60961d06d6bSBaptiste Daroussin 61061d06d6bSBaptiste Daroussin /* 61161d06d6bSBaptiste Daroussin * End-of-sentence recognition must include situations where 61261d06d6bSBaptiste Daroussin * some symbols, such as `)', allow prior EOS punctuation to 61361d06d6bSBaptiste Daroussin * propagate outward. 61461d06d6bSBaptiste Daroussin */ 61561d06d6bSBaptiste Daroussin 61661d06d6bSBaptiste Daroussin enclosed = found = 0; 61761d06d6bSBaptiste Daroussin for (q = p + (int)sz - 1; q >= p; q--) { 61861d06d6bSBaptiste Daroussin switch (*q) { 61961d06d6bSBaptiste Daroussin case '\"': 62061d06d6bSBaptiste Daroussin case '\'': 62161d06d6bSBaptiste Daroussin case ']': 62261d06d6bSBaptiste Daroussin case ')': 62361d06d6bSBaptiste Daroussin if (0 == found) 62461d06d6bSBaptiste Daroussin enclosed = 1; 62561d06d6bSBaptiste Daroussin break; 62661d06d6bSBaptiste Daroussin case '.': 62761d06d6bSBaptiste Daroussin case '!': 62861d06d6bSBaptiste Daroussin case '?': 62961d06d6bSBaptiste Daroussin found = 1; 63061d06d6bSBaptiste Daroussin break; 63161d06d6bSBaptiste Daroussin default: 63261d06d6bSBaptiste Daroussin return found && 63361d06d6bSBaptiste Daroussin (!enclosed || isalnum((unsigned char)*q)); 63461d06d6bSBaptiste Daroussin } 63561d06d6bSBaptiste Daroussin } 63661d06d6bSBaptiste Daroussin 63761d06d6bSBaptiste Daroussin return found && !enclosed; 63861d06d6bSBaptiste Daroussin } 63961d06d6bSBaptiste Daroussin 64061d06d6bSBaptiste Daroussin /* 64161d06d6bSBaptiste Daroussin * Convert a string to a long that may not be <0. 64261d06d6bSBaptiste Daroussin * If the string is invalid, or is less than 0, return -1. 64361d06d6bSBaptiste Daroussin */ 64461d06d6bSBaptiste Daroussin int 64561d06d6bSBaptiste Daroussin mandoc_strntoi(const char *p, size_t sz, int base) 64661d06d6bSBaptiste Daroussin { 64761d06d6bSBaptiste Daroussin char buf[32]; 64861d06d6bSBaptiste Daroussin char *ep; 64961d06d6bSBaptiste Daroussin long v; 65061d06d6bSBaptiste Daroussin 65161d06d6bSBaptiste Daroussin if (sz > 31) 65261d06d6bSBaptiste Daroussin return -1; 65361d06d6bSBaptiste Daroussin 65461d06d6bSBaptiste Daroussin memcpy(buf, p, sz); 65561d06d6bSBaptiste Daroussin buf[(int)sz] = '\0'; 65661d06d6bSBaptiste Daroussin 65761d06d6bSBaptiste Daroussin errno = 0; 65861d06d6bSBaptiste Daroussin v = strtol(buf, &ep, base); 65961d06d6bSBaptiste Daroussin 66061d06d6bSBaptiste Daroussin if (buf[0] == '\0' || *ep != '\0') 66161d06d6bSBaptiste Daroussin return -1; 66261d06d6bSBaptiste Daroussin 66361d06d6bSBaptiste Daroussin if (v > INT_MAX) 66461d06d6bSBaptiste Daroussin v = INT_MAX; 66561d06d6bSBaptiste Daroussin if (v < INT_MIN) 66661d06d6bSBaptiste Daroussin v = INT_MIN; 66761d06d6bSBaptiste Daroussin 66861d06d6bSBaptiste Daroussin return (int)v; 66961d06d6bSBaptiste Daroussin } 670