1*45a5aec3SBaptiste Daroussin /* $Id: mandoc.c,v 1.116 2019/06/27 15:07:30 schwarze Exp $ */ 261d06d6bSBaptiste Daroussin /* 361d06d6bSBaptiste Daroussin * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 461d06d6bSBaptiste Daroussin * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> 561d06d6bSBaptiste Daroussin * 661d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any 761d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above 861d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies. 961d06d6bSBaptiste Daroussin * 1061d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 1161d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1261d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 1361d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1461d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1561d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1661d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1761d06d6bSBaptiste Daroussin */ 1861d06d6bSBaptiste Daroussin #include "config.h" 1961d06d6bSBaptiste Daroussin 2061d06d6bSBaptiste Daroussin #include <sys/types.h> 2161d06d6bSBaptiste Daroussin 2261d06d6bSBaptiste Daroussin #include <assert.h> 2361d06d6bSBaptiste Daroussin #include <ctype.h> 2461d06d6bSBaptiste Daroussin #include <errno.h> 2561d06d6bSBaptiste Daroussin #include <limits.h> 2661d06d6bSBaptiste Daroussin #include <stdlib.h> 2761d06d6bSBaptiste Daroussin #include <stdio.h> 2861d06d6bSBaptiste Daroussin #include <string.h> 2961d06d6bSBaptiste Daroussin #include <time.h> 3061d06d6bSBaptiste Daroussin 3161d06d6bSBaptiste Daroussin #include "mandoc_aux.h" 3261d06d6bSBaptiste Daroussin #include "mandoc.h" 3361d06d6bSBaptiste Daroussin #include "roff.h" 3461d06d6bSBaptiste Daroussin #include "libmandoc.h" 357295610fSBaptiste Daroussin #include "roff_int.h" 3661d06d6bSBaptiste Daroussin 3761d06d6bSBaptiste Daroussin static int a2time(time_t *, const char *, const char *); 3861d06d6bSBaptiste Daroussin static char *time2a(time_t); 3961d06d6bSBaptiste Daroussin 4061d06d6bSBaptiste Daroussin 4161d06d6bSBaptiste Daroussin enum mandoc_esc 427295610fSBaptiste Daroussin mandoc_font(const char *cp, int sz) 437295610fSBaptiste Daroussin { 447295610fSBaptiste Daroussin switch (sz) { 457295610fSBaptiste Daroussin case 0: 467295610fSBaptiste Daroussin return ESCAPE_FONTPREV; 477295610fSBaptiste Daroussin case 1: 487295610fSBaptiste Daroussin switch (cp[0]) { 497295610fSBaptiste Daroussin case 'B': 507295610fSBaptiste Daroussin case '3': 517295610fSBaptiste Daroussin return ESCAPE_FONTBOLD; 527295610fSBaptiste Daroussin case 'I': 537295610fSBaptiste Daroussin case '2': 547295610fSBaptiste Daroussin return ESCAPE_FONTITALIC; 557295610fSBaptiste Daroussin case 'P': 567295610fSBaptiste Daroussin return ESCAPE_FONTPREV; 577295610fSBaptiste Daroussin case 'R': 587295610fSBaptiste Daroussin case '1': 597295610fSBaptiste Daroussin return ESCAPE_FONTROMAN; 607295610fSBaptiste Daroussin case '4': 617295610fSBaptiste Daroussin return ESCAPE_FONTBI; 627295610fSBaptiste Daroussin default: 637295610fSBaptiste Daroussin return ESCAPE_ERROR; 647295610fSBaptiste Daroussin } 657295610fSBaptiste Daroussin case 2: 667295610fSBaptiste Daroussin switch (cp[0]) { 677295610fSBaptiste Daroussin case 'B': 687295610fSBaptiste Daroussin switch (cp[1]) { 697295610fSBaptiste Daroussin case 'I': 707295610fSBaptiste Daroussin return ESCAPE_FONTBI; 717295610fSBaptiste Daroussin default: 727295610fSBaptiste Daroussin return ESCAPE_ERROR; 737295610fSBaptiste Daroussin } 747295610fSBaptiste Daroussin case 'C': 757295610fSBaptiste Daroussin switch (cp[1]) { 767295610fSBaptiste Daroussin case 'B': 777295610fSBaptiste Daroussin return ESCAPE_FONTBOLD; 787295610fSBaptiste Daroussin case 'I': 797295610fSBaptiste Daroussin return ESCAPE_FONTITALIC; 807295610fSBaptiste Daroussin case 'R': 817295610fSBaptiste Daroussin case 'W': 827295610fSBaptiste Daroussin return ESCAPE_FONTCW; 837295610fSBaptiste Daroussin default: 847295610fSBaptiste Daroussin return ESCAPE_ERROR; 857295610fSBaptiste Daroussin } 867295610fSBaptiste Daroussin default: 877295610fSBaptiste Daroussin return ESCAPE_ERROR; 887295610fSBaptiste Daroussin } 897295610fSBaptiste Daroussin default: 907295610fSBaptiste Daroussin return ESCAPE_ERROR; 917295610fSBaptiste Daroussin } 927295610fSBaptiste Daroussin } 937295610fSBaptiste Daroussin 947295610fSBaptiste Daroussin enum mandoc_esc 9561d06d6bSBaptiste Daroussin mandoc_escape(const char **end, const char **start, int *sz) 9661d06d6bSBaptiste Daroussin { 9761d06d6bSBaptiste Daroussin const char *local_start; 987295610fSBaptiste Daroussin int local_sz, c, i; 9961d06d6bSBaptiste Daroussin char term; 10061d06d6bSBaptiste Daroussin enum mandoc_esc gly; 10161d06d6bSBaptiste Daroussin 10261d06d6bSBaptiste Daroussin /* 10361d06d6bSBaptiste Daroussin * When the caller doesn't provide return storage, 10461d06d6bSBaptiste Daroussin * use local storage. 10561d06d6bSBaptiste Daroussin */ 10661d06d6bSBaptiste Daroussin 10761d06d6bSBaptiste Daroussin if (NULL == start) 10861d06d6bSBaptiste Daroussin start = &local_start; 10961d06d6bSBaptiste Daroussin if (NULL == sz) 11061d06d6bSBaptiste Daroussin sz = &local_sz; 11161d06d6bSBaptiste Daroussin 11261d06d6bSBaptiste Daroussin /* 1137295610fSBaptiste Daroussin * Treat "\E" just like "\"; 1147295610fSBaptiste Daroussin * it only makes a difference in copy mode. 1157295610fSBaptiste Daroussin */ 1167295610fSBaptiste Daroussin 1177295610fSBaptiste Daroussin if (**end == 'E') 1187295610fSBaptiste Daroussin ++*end; 1197295610fSBaptiste Daroussin 1207295610fSBaptiste Daroussin /* 12161d06d6bSBaptiste Daroussin * Beyond the backslash, at least one input character 12261d06d6bSBaptiste Daroussin * is part of the escape sequence. With one exception 12361d06d6bSBaptiste Daroussin * (see below), that character won't be returned. 12461d06d6bSBaptiste Daroussin */ 12561d06d6bSBaptiste Daroussin 12661d06d6bSBaptiste Daroussin gly = ESCAPE_ERROR; 12761d06d6bSBaptiste Daroussin *start = ++*end; 12861d06d6bSBaptiste Daroussin *sz = 0; 12961d06d6bSBaptiste Daroussin term = '\0'; 13061d06d6bSBaptiste Daroussin 13161d06d6bSBaptiste Daroussin switch ((*start)[-1]) { 13261d06d6bSBaptiste Daroussin /* 13361d06d6bSBaptiste Daroussin * First the glyphs. There are several different forms of 13461d06d6bSBaptiste Daroussin * these, but each eventually returns a substring of the glyph 13561d06d6bSBaptiste Daroussin * name. 13661d06d6bSBaptiste Daroussin */ 13761d06d6bSBaptiste Daroussin case '(': 13861d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 13961d06d6bSBaptiste Daroussin *sz = 2; 14061d06d6bSBaptiste Daroussin break; 14161d06d6bSBaptiste Daroussin case '[': 1427295610fSBaptiste Daroussin if (**start == ' ') { 1437295610fSBaptiste Daroussin ++*end; 1447295610fSBaptiste Daroussin return ESCAPE_ERROR; 1457295610fSBaptiste Daroussin } 14661d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 14761d06d6bSBaptiste Daroussin term = ']'; 14861d06d6bSBaptiste Daroussin break; 14961d06d6bSBaptiste Daroussin case 'C': 15061d06d6bSBaptiste Daroussin if ('\'' != **start) 15161d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 15261d06d6bSBaptiste Daroussin *start = ++*end; 15361d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 15461d06d6bSBaptiste Daroussin term = '\''; 15561d06d6bSBaptiste Daroussin break; 15661d06d6bSBaptiste Daroussin 15761d06d6bSBaptiste Daroussin /* 15861d06d6bSBaptiste Daroussin * Escapes taking no arguments at all. 15961d06d6bSBaptiste Daroussin */ 1607295610fSBaptiste Daroussin case '!': 1617295610fSBaptiste Daroussin case '?': 1627295610fSBaptiste Daroussin return ESCAPE_UNSUPP; 1637295610fSBaptiste Daroussin case '%': 1647295610fSBaptiste Daroussin case '&': 1657295610fSBaptiste Daroussin case ')': 16661d06d6bSBaptiste Daroussin case ',': 16761d06d6bSBaptiste Daroussin case '/': 1687295610fSBaptiste Daroussin case '^': 1697295610fSBaptiste Daroussin case 'a': 1707295610fSBaptiste Daroussin case 'd': 1717295610fSBaptiste Daroussin case 'r': 1727295610fSBaptiste Daroussin case 't': 1737295610fSBaptiste Daroussin case 'u': 1747295610fSBaptiste Daroussin case '{': 1757295610fSBaptiste Daroussin case '|': 1767295610fSBaptiste Daroussin case '}': 17761d06d6bSBaptiste Daroussin return ESCAPE_IGNORE; 1787295610fSBaptiste Daroussin case 'c': 1797295610fSBaptiste Daroussin return ESCAPE_NOSPACE; 18061d06d6bSBaptiste Daroussin case 'p': 18161d06d6bSBaptiste Daroussin return ESCAPE_BREAK; 18261d06d6bSBaptiste Daroussin 18361d06d6bSBaptiste Daroussin /* 18461d06d6bSBaptiste Daroussin * The \z escape is supposed to output the following 18561d06d6bSBaptiste Daroussin * character without advancing the cursor position. 18661d06d6bSBaptiste Daroussin * Since we are mostly dealing with terminal mode, 18761d06d6bSBaptiste Daroussin * let us just skip the next character. 18861d06d6bSBaptiste Daroussin */ 18961d06d6bSBaptiste Daroussin case 'z': 19061d06d6bSBaptiste Daroussin return ESCAPE_SKIPCHAR; 19161d06d6bSBaptiste Daroussin 19261d06d6bSBaptiste Daroussin /* 19361d06d6bSBaptiste Daroussin * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where 19461d06d6bSBaptiste Daroussin * 'X' is the trigger. These have opaque sub-strings. 19561d06d6bSBaptiste Daroussin */ 19661d06d6bSBaptiste Daroussin case 'F': 1977295610fSBaptiste Daroussin case 'f': 19861d06d6bSBaptiste Daroussin case 'g': 19961d06d6bSBaptiste Daroussin case 'k': 20061d06d6bSBaptiste Daroussin case 'M': 20161d06d6bSBaptiste Daroussin case 'm': 20261d06d6bSBaptiste Daroussin case 'n': 2037295610fSBaptiste Daroussin case 'O': 20461d06d6bSBaptiste Daroussin case 'V': 20561d06d6bSBaptiste Daroussin case 'Y': 2067295610fSBaptiste Daroussin gly = (*start)[-1] == 'f' ? ESCAPE_FONT : ESCAPE_IGNORE; 20761d06d6bSBaptiste Daroussin switch (**start) { 20861d06d6bSBaptiste Daroussin case '(': 2097295610fSBaptiste Daroussin if ((*start)[-1] == 'O') 2107295610fSBaptiste Daroussin gly = ESCAPE_ERROR; 21161d06d6bSBaptiste Daroussin *start = ++*end; 21261d06d6bSBaptiste Daroussin *sz = 2; 21361d06d6bSBaptiste Daroussin break; 21461d06d6bSBaptiste Daroussin case '[': 2157295610fSBaptiste Daroussin if ((*start)[-1] == 'O') 2167295610fSBaptiste Daroussin gly = (*start)[1] == '5' ? 2177295610fSBaptiste Daroussin ESCAPE_UNSUPP : ESCAPE_ERROR; 21861d06d6bSBaptiste Daroussin *start = ++*end; 21961d06d6bSBaptiste Daroussin term = ']'; 22061d06d6bSBaptiste Daroussin break; 22161d06d6bSBaptiste Daroussin default: 2227295610fSBaptiste Daroussin if ((*start)[-1] == 'O') { 2237295610fSBaptiste Daroussin switch (**start) { 2247295610fSBaptiste Daroussin case '0': 2257295610fSBaptiste Daroussin gly = ESCAPE_UNSUPP; 2267295610fSBaptiste Daroussin break; 2277295610fSBaptiste Daroussin case '1': 2287295610fSBaptiste Daroussin case '2': 2297295610fSBaptiste Daroussin case '3': 2307295610fSBaptiste Daroussin case '4': 2317295610fSBaptiste Daroussin break; 2327295610fSBaptiste Daroussin default: 2337295610fSBaptiste Daroussin gly = ESCAPE_ERROR; 2347295610fSBaptiste Daroussin break; 2357295610fSBaptiste Daroussin } 2367295610fSBaptiste Daroussin } 23761d06d6bSBaptiste Daroussin *sz = 1; 23861d06d6bSBaptiste Daroussin break; 23961d06d6bSBaptiste Daroussin } 24061d06d6bSBaptiste Daroussin break; 2417295610fSBaptiste Daroussin case '*': 2427295610fSBaptiste Daroussin if (strncmp(*start, "(.T", 3) != 0) 2437295610fSBaptiste Daroussin abort(); 2447295610fSBaptiste Daroussin gly = ESCAPE_DEVICE; 2457295610fSBaptiste Daroussin *start = ++*end; 2467295610fSBaptiste Daroussin *sz = 2; 2477295610fSBaptiste Daroussin break; 24861d06d6bSBaptiste Daroussin 24961d06d6bSBaptiste Daroussin /* 25061d06d6bSBaptiste Daroussin * These escapes are of the form \X'Y', where 'X' is the trigger 25161d06d6bSBaptiste Daroussin * and 'Y' is any string. These have opaque sub-strings. 25261d06d6bSBaptiste Daroussin * The \B and \w escapes are handled in roff.c, roff_res(). 25361d06d6bSBaptiste Daroussin */ 25461d06d6bSBaptiste Daroussin case 'A': 25561d06d6bSBaptiste Daroussin case 'b': 25661d06d6bSBaptiste Daroussin case 'D': 25761d06d6bSBaptiste Daroussin case 'R': 25861d06d6bSBaptiste Daroussin case 'X': 25961d06d6bSBaptiste Daroussin case 'Z': 26061d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 26161d06d6bSBaptiste Daroussin /* FALLTHROUGH */ 26261d06d6bSBaptiste Daroussin case 'o': 26361d06d6bSBaptiste Daroussin if (**start == '\0') 26461d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 26561d06d6bSBaptiste Daroussin if (gly == ESCAPE_ERROR) 26661d06d6bSBaptiste Daroussin gly = ESCAPE_OVERSTRIKE; 26761d06d6bSBaptiste Daroussin term = **start; 26861d06d6bSBaptiste Daroussin *start = ++*end; 26961d06d6bSBaptiste Daroussin break; 27061d06d6bSBaptiste Daroussin 27161d06d6bSBaptiste Daroussin /* 27261d06d6bSBaptiste Daroussin * These escapes are of the form \X'N', where 'X' is the trigger 27361d06d6bSBaptiste Daroussin * and 'N' resolves to a numerical expression. 27461d06d6bSBaptiste Daroussin */ 27561d06d6bSBaptiste Daroussin case 'h': 27661d06d6bSBaptiste Daroussin case 'H': 27761d06d6bSBaptiste Daroussin case 'L': 27861d06d6bSBaptiste Daroussin case 'l': 27961d06d6bSBaptiste Daroussin case 'S': 28061d06d6bSBaptiste Daroussin case 'v': 28161d06d6bSBaptiste Daroussin case 'x': 28261d06d6bSBaptiste Daroussin if (strchr(" %&()*+-./0123456789:<=>", **start)) { 28361d06d6bSBaptiste Daroussin if ('\0' != **start) 28461d06d6bSBaptiste Daroussin ++*end; 28561d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 28661d06d6bSBaptiste Daroussin } 28761d06d6bSBaptiste Daroussin switch ((*start)[-1]) { 28861d06d6bSBaptiste Daroussin case 'h': 28961d06d6bSBaptiste Daroussin gly = ESCAPE_HORIZ; 29061d06d6bSBaptiste Daroussin break; 29161d06d6bSBaptiste Daroussin case 'l': 29261d06d6bSBaptiste Daroussin gly = ESCAPE_HLINE; 29361d06d6bSBaptiste Daroussin break; 29461d06d6bSBaptiste Daroussin default: 29561d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 29661d06d6bSBaptiste Daroussin break; 29761d06d6bSBaptiste Daroussin } 29861d06d6bSBaptiste Daroussin term = **start; 29961d06d6bSBaptiste Daroussin *start = ++*end; 30061d06d6bSBaptiste Daroussin break; 30161d06d6bSBaptiste Daroussin 30261d06d6bSBaptiste Daroussin /* 30361d06d6bSBaptiste Daroussin * Special handling for the numbered character escape. 30461d06d6bSBaptiste Daroussin * XXX Do any other escapes need similar handling? 30561d06d6bSBaptiste Daroussin */ 30661d06d6bSBaptiste Daroussin case 'N': 30761d06d6bSBaptiste Daroussin if ('\0' == **start) 30861d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 30961d06d6bSBaptiste Daroussin (*end)++; 31061d06d6bSBaptiste Daroussin if (isdigit((unsigned char)**start)) { 31161d06d6bSBaptiste Daroussin *sz = 1; 31261d06d6bSBaptiste Daroussin return ESCAPE_IGNORE; 31361d06d6bSBaptiste Daroussin } 31461d06d6bSBaptiste Daroussin (*start)++; 31561d06d6bSBaptiste Daroussin while (isdigit((unsigned char)**end)) 31661d06d6bSBaptiste Daroussin (*end)++; 31761d06d6bSBaptiste Daroussin *sz = *end - *start; 31861d06d6bSBaptiste Daroussin if ('\0' != **end) 31961d06d6bSBaptiste Daroussin (*end)++; 32061d06d6bSBaptiste Daroussin return ESCAPE_NUMBERED; 32161d06d6bSBaptiste Daroussin 32261d06d6bSBaptiste Daroussin /* 32361d06d6bSBaptiste Daroussin * Sizes get a special category of their own. 32461d06d6bSBaptiste Daroussin */ 32561d06d6bSBaptiste Daroussin case 's': 32661d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 32761d06d6bSBaptiste Daroussin 32861d06d6bSBaptiste Daroussin /* See +/- counts as a sign. */ 32961d06d6bSBaptiste Daroussin if ('+' == **end || '-' == **end || ASCII_HYPH == **end) 33061d06d6bSBaptiste Daroussin *start = ++*end; 33161d06d6bSBaptiste Daroussin 33261d06d6bSBaptiste Daroussin switch (**end) { 33361d06d6bSBaptiste Daroussin case '(': 33461d06d6bSBaptiste Daroussin *start = ++*end; 33561d06d6bSBaptiste Daroussin *sz = 2; 33661d06d6bSBaptiste Daroussin break; 33761d06d6bSBaptiste Daroussin case '[': 33861d06d6bSBaptiste Daroussin *start = ++*end; 33961d06d6bSBaptiste Daroussin term = ']'; 34061d06d6bSBaptiste Daroussin break; 34161d06d6bSBaptiste Daroussin case '\'': 34261d06d6bSBaptiste Daroussin *start = ++*end; 34361d06d6bSBaptiste Daroussin term = '\''; 34461d06d6bSBaptiste Daroussin break; 34561d06d6bSBaptiste Daroussin case '3': 34661d06d6bSBaptiste Daroussin case '2': 34761d06d6bSBaptiste Daroussin case '1': 34861d06d6bSBaptiste Daroussin *sz = (*end)[-1] == 's' && 34961d06d6bSBaptiste Daroussin isdigit((unsigned char)(*end)[1]) ? 2 : 1; 35061d06d6bSBaptiste Daroussin break; 35161d06d6bSBaptiste Daroussin default: 35261d06d6bSBaptiste Daroussin *sz = 1; 35361d06d6bSBaptiste Daroussin break; 35461d06d6bSBaptiste Daroussin } 35561d06d6bSBaptiste Daroussin 35661d06d6bSBaptiste Daroussin break; 35761d06d6bSBaptiste Daroussin 35861d06d6bSBaptiste Daroussin /* 3597295610fSBaptiste Daroussin * Several special characters can be encoded as 3607295610fSBaptiste Daroussin * one-byte escape sequences without using \[]. 36161d06d6bSBaptiste Daroussin */ 3627295610fSBaptiste Daroussin case ' ': 3637295610fSBaptiste Daroussin case '\'': 3647295610fSBaptiste Daroussin case '-': 3657295610fSBaptiste Daroussin case '.': 3667295610fSBaptiste Daroussin case '0': 3677295610fSBaptiste Daroussin case ':': 3687295610fSBaptiste Daroussin case '_': 3697295610fSBaptiste Daroussin case '`': 3707295610fSBaptiste Daroussin case 'e': 3717295610fSBaptiste Daroussin case '~': 37261d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 3737295610fSBaptiste Daroussin /* FALLTHROUGH */ 3747295610fSBaptiste Daroussin default: 3757295610fSBaptiste Daroussin if (gly == ESCAPE_ERROR) 3767295610fSBaptiste Daroussin gly = ESCAPE_UNDEF; 37761d06d6bSBaptiste Daroussin *start = --*end; 37861d06d6bSBaptiste Daroussin *sz = 1; 37961d06d6bSBaptiste Daroussin break; 38061d06d6bSBaptiste Daroussin } 38161d06d6bSBaptiste Daroussin 38261d06d6bSBaptiste Daroussin /* 38361d06d6bSBaptiste Daroussin * Read up to the terminating character, 38461d06d6bSBaptiste Daroussin * paying attention to nested escapes. 38561d06d6bSBaptiste Daroussin */ 38661d06d6bSBaptiste Daroussin 38761d06d6bSBaptiste Daroussin if ('\0' != term) { 38861d06d6bSBaptiste Daroussin while (**end != term) { 38961d06d6bSBaptiste Daroussin switch (**end) { 39061d06d6bSBaptiste Daroussin case '\0': 39161d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 39261d06d6bSBaptiste Daroussin case '\\': 39361d06d6bSBaptiste Daroussin (*end)++; 39461d06d6bSBaptiste Daroussin if (ESCAPE_ERROR == 39561d06d6bSBaptiste Daroussin mandoc_escape(end, NULL, NULL)) 39661d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 39761d06d6bSBaptiste Daroussin break; 39861d06d6bSBaptiste Daroussin default: 39961d06d6bSBaptiste Daroussin (*end)++; 40061d06d6bSBaptiste Daroussin break; 40161d06d6bSBaptiste Daroussin } 40261d06d6bSBaptiste Daroussin } 40361d06d6bSBaptiste Daroussin *sz = (*end)++ - *start; 4047295610fSBaptiste Daroussin 4057295610fSBaptiste Daroussin /* 4067295610fSBaptiste Daroussin * The file chars.c only provides one common list 4077295610fSBaptiste Daroussin * of character names, but \[-] == \- is the only 4087295610fSBaptiste Daroussin * one of the characters with one-byte names that 4097295610fSBaptiste Daroussin * allows enclosing the name in brackets. 4107295610fSBaptiste Daroussin */ 4117295610fSBaptiste Daroussin if (gly == ESCAPE_SPECIAL && *sz == 1 && **start != '-') 4127295610fSBaptiste Daroussin return ESCAPE_ERROR; 41361d06d6bSBaptiste Daroussin } else { 41461d06d6bSBaptiste Daroussin assert(*sz > 0); 41561d06d6bSBaptiste Daroussin if ((size_t)*sz > strlen(*start)) 41661d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 41761d06d6bSBaptiste Daroussin *end += *sz; 41861d06d6bSBaptiste Daroussin } 41961d06d6bSBaptiste Daroussin 42061d06d6bSBaptiste Daroussin /* Run post-processors. */ 42161d06d6bSBaptiste Daroussin 42261d06d6bSBaptiste Daroussin switch (gly) { 42361d06d6bSBaptiste Daroussin case ESCAPE_FONT: 4247295610fSBaptiste Daroussin gly = mandoc_font(*start, *sz); 42561d06d6bSBaptiste Daroussin break; 42661d06d6bSBaptiste Daroussin case ESCAPE_SPECIAL: 4277295610fSBaptiste Daroussin if (**start == 'c') { 4287295610fSBaptiste Daroussin if (*sz < 6 || *sz > 7 || 4297295610fSBaptiste Daroussin strncmp(*start, "char", 4) != 0 || 4307295610fSBaptiste Daroussin (int)strspn(*start + 4, "0123456789") + 4 < *sz) 4317295610fSBaptiste Daroussin break; 4327295610fSBaptiste Daroussin c = 0; 4337295610fSBaptiste Daroussin for (i = 4; i < *sz; i++) 4347295610fSBaptiste Daroussin c = 10 * c + ((*start)[i] - '0'); 4357295610fSBaptiste Daroussin if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) 4367295610fSBaptiste Daroussin break; 4377295610fSBaptiste Daroussin *start += 4; 4387295610fSBaptiste Daroussin *sz -= 4; 4397295610fSBaptiste Daroussin gly = ESCAPE_NUMBERED; 4407295610fSBaptiste Daroussin break; 4417295610fSBaptiste Daroussin } 4427295610fSBaptiste Daroussin 44361d06d6bSBaptiste Daroussin /* 44461d06d6bSBaptiste Daroussin * Unicode escapes are defined in groff as \[u0000] 44561d06d6bSBaptiste Daroussin * to \[u10FFFF], where the contained value must be 44661d06d6bSBaptiste Daroussin * a valid Unicode codepoint. Here, however, only 44761d06d6bSBaptiste Daroussin * check the length and range. 44861d06d6bSBaptiste Daroussin */ 44961d06d6bSBaptiste Daroussin if (**start != 'u' || *sz < 5 || *sz > 7) 45061d06d6bSBaptiste Daroussin break; 45161d06d6bSBaptiste Daroussin if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) 45261d06d6bSBaptiste Daroussin break; 45361d06d6bSBaptiste Daroussin if (*sz == 6 && (*start)[1] == '0') 45461d06d6bSBaptiste Daroussin break; 45561d06d6bSBaptiste Daroussin if (*sz == 5 && (*start)[1] == 'D' && 45661d06d6bSBaptiste Daroussin strchr("89ABCDEF", (*start)[2]) != NULL) 45761d06d6bSBaptiste Daroussin break; 45861d06d6bSBaptiste Daroussin if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") 45961d06d6bSBaptiste Daroussin + 1 == *sz) 46061d06d6bSBaptiste Daroussin gly = ESCAPE_UNICODE; 46161d06d6bSBaptiste Daroussin break; 46261d06d6bSBaptiste Daroussin default: 46361d06d6bSBaptiste Daroussin break; 46461d06d6bSBaptiste Daroussin } 46561d06d6bSBaptiste Daroussin 46661d06d6bSBaptiste Daroussin return gly; 46761d06d6bSBaptiste Daroussin } 46861d06d6bSBaptiste Daroussin 46961d06d6bSBaptiste Daroussin static int 47061d06d6bSBaptiste Daroussin a2time(time_t *t, const char *fmt, const char *p) 47161d06d6bSBaptiste Daroussin { 47261d06d6bSBaptiste Daroussin struct tm tm; 47361d06d6bSBaptiste Daroussin char *pp; 47461d06d6bSBaptiste Daroussin 47561d06d6bSBaptiste Daroussin memset(&tm, 0, sizeof(struct tm)); 47661d06d6bSBaptiste Daroussin 47761d06d6bSBaptiste Daroussin pp = NULL; 47861d06d6bSBaptiste Daroussin #if HAVE_STRPTIME 47961d06d6bSBaptiste Daroussin pp = strptime(p, fmt, &tm); 48061d06d6bSBaptiste Daroussin #endif 48161d06d6bSBaptiste Daroussin if (NULL != pp && '\0' == *pp) { 48261d06d6bSBaptiste Daroussin *t = mktime(&tm); 48361d06d6bSBaptiste Daroussin return 1; 48461d06d6bSBaptiste Daroussin } 48561d06d6bSBaptiste Daroussin 48661d06d6bSBaptiste Daroussin return 0; 48761d06d6bSBaptiste Daroussin } 48861d06d6bSBaptiste Daroussin 48961d06d6bSBaptiste Daroussin static char * 49061d06d6bSBaptiste Daroussin time2a(time_t t) 49161d06d6bSBaptiste Daroussin { 49261d06d6bSBaptiste Daroussin struct tm *tm; 49361d06d6bSBaptiste Daroussin char *buf, *p; 49461d06d6bSBaptiste Daroussin size_t ssz; 49561d06d6bSBaptiste Daroussin int isz; 49661d06d6bSBaptiste Daroussin 497*45a5aec3SBaptiste Daroussin buf = NULL; 49861d06d6bSBaptiste Daroussin tm = localtime(&t); 49961d06d6bSBaptiste Daroussin if (tm == NULL) 500*45a5aec3SBaptiste Daroussin goto fail; 50161d06d6bSBaptiste Daroussin 50261d06d6bSBaptiste Daroussin /* 50361d06d6bSBaptiste Daroussin * Reserve space: 50461d06d6bSBaptiste Daroussin * up to 9 characters for the month (September) + blank 50561d06d6bSBaptiste Daroussin * up to 2 characters for the day + comma + blank 50661d06d6bSBaptiste Daroussin * 4 characters for the year and a terminating '\0' 50761d06d6bSBaptiste Daroussin */ 50861d06d6bSBaptiste Daroussin 50961d06d6bSBaptiste Daroussin p = buf = mandoc_malloc(10 + 4 + 4 + 1); 51061d06d6bSBaptiste Daroussin 51161d06d6bSBaptiste Daroussin if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0) 51261d06d6bSBaptiste Daroussin goto fail; 51361d06d6bSBaptiste Daroussin p += (int)ssz; 51461d06d6bSBaptiste Daroussin 51561d06d6bSBaptiste Daroussin /* 51661d06d6bSBaptiste Daroussin * The output format is just "%d" here, not "%2d" or "%02d". 51761d06d6bSBaptiste Daroussin * That's also the reason why we can't just format the 51861d06d6bSBaptiste Daroussin * date as a whole with "%B %e, %Y" or "%B %d, %Y". 51961d06d6bSBaptiste Daroussin * Besides, the present approach is less prone to buffer 52061d06d6bSBaptiste Daroussin * overflows, in case anybody should ever introduce the bug 52161d06d6bSBaptiste Daroussin * of looking at LC_TIME. 52261d06d6bSBaptiste Daroussin */ 52361d06d6bSBaptiste Daroussin 524*45a5aec3SBaptiste Daroussin isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday); 525*45a5aec3SBaptiste Daroussin if (isz < 0 || isz > 4) 52661d06d6bSBaptiste Daroussin goto fail; 52761d06d6bSBaptiste Daroussin p += isz; 52861d06d6bSBaptiste Daroussin 52961d06d6bSBaptiste Daroussin if (strftime(p, 4 + 1, "%Y", tm) == 0) 53061d06d6bSBaptiste Daroussin goto fail; 53161d06d6bSBaptiste Daroussin return buf; 53261d06d6bSBaptiste Daroussin 53361d06d6bSBaptiste Daroussin fail: 53461d06d6bSBaptiste Daroussin free(buf); 535*45a5aec3SBaptiste Daroussin return mandoc_strdup(""); 53661d06d6bSBaptiste Daroussin } 53761d06d6bSBaptiste Daroussin 53861d06d6bSBaptiste Daroussin char * 53961d06d6bSBaptiste Daroussin mandoc_normdate(struct roff_man *man, char *in, int ln, int pos) 54061d06d6bSBaptiste Daroussin { 54161d06d6bSBaptiste Daroussin char *cp; 54261d06d6bSBaptiste Daroussin time_t t; 54361d06d6bSBaptiste Daroussin 544*45a5aec3SBaptiste Daroussin if (man->quick) 545*45a5aec3SBaptiste Daroussin return mandoc_strdup(in == NULL ? "" : in); 546*45a5aec3SBaptiste Daroussin 54761d06d6bSBaptiste Daroussin /* No date specified: use today's date. */ 54861d06d6bSBaptiste Daroussin 549*45a5aec3SBaptiste Daroussin if (in == NULL || *in == '\0') 5507295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_MISSING, ln, pos, NULL); 551*45a5aec3SBaptiste Daroussin if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) 55261d06d6bSBaptiste Daroussin return time2a(time(NULL)); 55361d06d6bSBaptiste Daroussin 55461d06d6bSBaptiste Daroussin /* Valid mdoc(7) date format. */ 55561d06d6bSBaptiste Daroussin 55661d06d6bSBaptiste Daroussin if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) || 55761d06d6bSBaptiste Daroussin a2time(&t, "%b %d, %Y", in)) { 55861d06d6bSBaptiste Daroussin cp = time2a(t); 55961d06d6bSBaptiste Daroussin if (t > time(NULL) + 86400) 5607295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_FUTURE, ln, pos, "%s", cp); 56161d06d6bSBaptiste Daroussin else if (*in != '$' && strcmp(in, cp) != 0) 5627295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_NORM, ln, pos, "%s", cp); 56361d06d6bSBaptiste Daroussin return cp; 56461d06d6bSBaptiste Daroussin } 56561d06d6bSBaptiste Daroussin 56661d06d6bSBaptiste Daroussin /* In man(7), do not warn about the legacy format. */ 56761d06d6bSBaptiste Daroussin 56861d06d6bSBaptiste Daroussin if (a2time(&t, "%Y-%m-%d", in) == 0) 5697295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_BAD, ln, pos, "%s", in); 57061d06d6bSBaptiste Daroussin else if (t > time(NULL) + 86400) 5717295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_FUTURE, ln, pos, "%s", in); 5727295610fSBaptiste Daroussin else if (man->meta.macroset == MACROSET_MDOC) 5737295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_LEGACY, ln, pos, "Dd %s", in); 57461d06d6bSBaptiste Daroussin 57561d06d6bSBaptiste Daroussin /* Use any non-mdoc(7) date verbatim. */ 57661d06d6bSBaptiste Daroussin 57761d06d6bSBaptiste Daroussin return mandoc_strdup(in); 57861d06d6bSBaptiste Daroussin } 57961d06d6bSBaptiste Daroussin 58061d06d6bSBaptiste Daroussin int 58161d06d6bSBaptiste Daroussin mandoc_eos(const char *p, size_t sz) 58261d06d6bSBaptiste Daroussin { 58361d06d6bSBaptiste Daroussin const char *q; 58461d06d6bSBaptiste Daroussin int enclosed, found; 58561d06d6bSBaptiste Daroussin 58661d06d6bSBaptiste Daroussin if (0 == sz) 58761d06d6bSBaptiste Daroussin return 0; 58861d06d6bSBaptiste Daroussin 58961d06d6bSBaptiste Daroussin /* 59061d06d6bSBaptiste Daroussin * End-of-sentence recognition must include situations where 59161d06d6bSBaptiste Daroussin * some symbols, such as `)', allow prior EOS punctuation to 59261d06d6bSBaptiste Daroussin * propagate outward. 59361d06d6bSBaptiste Daroussin */ 59461d06d6bSBaptiste Daroussin 59561d06d6bSBaptiste Daroussin enclosed = found = 0; 59661d06d6bSBaptiste Daroussin for (q = p + (int)sz - 1; q >= p; q--) { 59761d06d6bSBaptiste Daroussin switch (*q) { 59861d06d6bSBaptiste Daroussin case '\"': 59961d06d6bSBaptiste Daroussin case '\'': 60061d06d6bSBaptiste Daroussin case ']': 60161d06d6bSBaptiste Daroussin case ')': 60261d06d6bSBaptiste Daroussin if (0 == found) 60361d06d6bSBaptiste Daroussin enclosed = 1; 60461d06d6bSBaptiste Daroussin break; 60561d06d6bSBaptiste Daroussin case '.': 60661d06d6bSBaptiste Daroussin case '!': 60761d06d6bSBaptiste Daroussin case '?': 60861d06d6bSBaptiste Daroussin found = 1; 60961d06d6bSBaptiste Daroussin break; 61061d06d6bSBaptiste Daroussin default: 61161d06d6bSBaptiste Daroussin return found && 61261d06d6bSBaptiste Daroussin (!enclosed || isalnum((unsigned char)*q)); 61361d06d6bSBaptiste Daroussin } 61461d06d6bSBaptiste Daroussin } 61561d06d6bSBaptiste Daroussin 61661d06d6bSBaptiste Daroussin return found && !enclosed; 61761d06d6bSBaptiste Daroussin } 61861d06d6bSBaptiste Daroussin 61961d06d6bSBaptiste Daroussin /* 62061d06d6bSBaptiste Daroussin * Convert a string to a long that may not be <0. 62161d06d6bSBaptiste Daroussin * If the string is invalid, or is less than 0, return -1. 62261d06d6bSBaptiste Daroussin */ 62361d06d6bSBaptiste Daroussin int 62461d06d6bSBaptiste Daroussin mandoc_strntoi(const char *p, size_t sz, int base) 62561d06d6bSBaptiste Daroussin { 62661d06d6bSBaptiste Daroussin char buf[32]; 62761d06d6bSBaptiste Daroussin char *ep; 62861d06d6bSBaptiste Daroussin long v; 62961d06d6bSBaptiste Daroussin 63061d06d6bSBaptiste Daroussin if (sz > 31) 63161d06d6bSBaptiste Daroussin return -1; 63261d06d6bSBaptiste Daroussin 63361d06d6bSBaptiste Daroussin memcpy(buf, p, sz); 63461d06d6bSBaptiste Daroussin buf[(int)sz] = '\0'; 63561d06d6bSBaptiste Daroussin 63661d06d6bSBaptiste Daroussin errno = 0; 63761d06d6bSBaptiste Daroussin v = strtol(buf, &ep, base); 63861d06d6bSBaptiste Daroussin 63961d06d6bSBaptiste Daroussin if (buf[0] == '\0' || *ep != '\0') 64061d06d6bSBaptiste Daroussin return -1; 64161d06d6bSBaptiste Daroussin 64261d06d6bSBaptiste Daroussin if (v > INT_MAX) 64361d06d6bSBaptiste Daroussin v = INT_MAX; 64461d06d6bSBaptiste Daroussin if (v < INT_MIN) 64561d06d6bSBaptiste Daroussin v = INT_MIN; 64661d06d6bSBaptiste Daroussin 64761d06d6bSBaptiste Daroussin return (int)v; 64861d06d6bSBaptiste Daroussin } 649