1 /* 2 * Copyright (C) 1984-2000 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information about less, or for information on how to 8 * contact the author, see the README file. 9 */ 10 11 12 /* 13 * Functions to define the character set 14 * and do things specific to the character set. 15 */ 16 17 #include "less.h" 18 #if HAVE_LOCALE 19 #include <locale.h> 20 #include <ctype.h> 21 #endif 22 23 public int utf_mode = 0; 24 25 /* 26 * Predefined character sets, 27 * selected by the LESSCHARSET environment variable. 28 */ 29 struct charset { 30 char *name; 31 int *p_flag; 32 char *desc; 33 } charsets[] = { 34 { "ascii", NULL, "8bcccbcc18b95.b" }, 35 { "dos", NULL, "8bcccbcc12bc5b95.b." }, 36 { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, 37 { "iso8859", NULL, "8bcccbcc18b95.33b." }, 38 { "koi8-r", NULL, "8bcccbcc18b95.b128." }, 39 { "latin1", NULL, "8bcccbcc18b95.33b." }, 40 { "next", NULL, "8bcccbcc18b95.bb125.bb" }, 41 { "utf-8", &utf_mode, "8bcccbcc18b." }, 42 { NULL, NULL, NULL } 43 }; 44 45 #define IS_BINARY_CHAR 01 46 #define IS_CONTROL_CHAR 02 47 48 static char chardef[256]; 49 static char *binfmt = NULL; 50 public int binattr = AT_STANDOUT; 51 52 53 /* 54 * Define a charset, given a description string. 55 * The string consists of 256 letters, 56 * one for each character in the charset. 57 * If the string is shorter than 256 letters, missing letters 58 * are taken to be identical to the last one. 59 * A decimal number followed by a letter is taken to be a 60 * repetition of the letter. 61 * 62 * Each letter is one of: 63 * . normal character 64 * b binary character 65 * c control character 66 */ 67 static void 68 ichardef(s) 69 char *s; 70 { 71 register char *cp; 72 register int n; 73 register char v; 74 75 n = 0; 76 v = 0; 77 cp = chardef; 78 while (*s != '\0') 79 { 80 switch (*s++) 81 { 82 case '.': 83 v = 0; 84 break; 85 case 'c': 86 v = IS_CONTROL_CHAR; 87 break; 88 case 'b': 89 v = IS_BINARY_CHAR|IS_CONTROL_CHAR; 90 break; 91 92 case '0': case '1': case '2': case '3': case '4': 93 case '5': case '6': case '7': case '8': case '9': 94 n = (10 * n) + (s[-1] - '0'); 95 continue; 96 97 default: 98 error("invalid chardef", NULL_PARG); 99 quit(QUIT_ERROR); 100 /*NOTREACHED*/ 101 } 102 103 do 104 { 105 if (cp >= chardef + sizeof(chardef)) 106 { 107 error("chardef longer than 256", NULL_PARG); 108 quit(QUIT_ERROR); 109 /*NOTREACHED*/ 110 } 111 *cp++ = v; 112 } while (--n > 0); 113 n = 0; 114 } 115 116 while (cp < chardef + sizeof(chardef)) 117 *cp++ = v; 118 } 119 120 /* 121 * Define a charset, given a charset name. 122 * The valid charset names are listed in the "charsets" array. 123 */ 124 static int 125 icharset(name) 126 register char *name; 127 { 128 register struct charset *p; 129 130 if (name == NULL || *name == '\0') 131 return (0); 132 133 for (p = charsets; p->name != NULL; p++) 134 { 135 if (strcmp(name, p->name) == 0) 136 { 137 ichardef(p->desc); 138 if (p->p_flag != NULL) 139 *(p->p_flag) = 1; 140 return (1); 141 } 142 } 143 144 error("invalid charset name", NULL_PARG); 145 quit(QUIT_ERROR); 146 /*NOTREACHED*/ 147 } 148 149 #if HAVE_LOCALE 150 /* 151 * Define a charset, given a locale name. 152 */ 153 static void 154 ilocale() 155 { 156 register int c; 157 158 setlocale(LC_ALL, ""); 159 for (c = 0; c < (int) sizeof(chardef); c++) 160 { 161 if (isprint(c)) 162 chardef[c] = 0; 163 else if (iscntrl(c)) 164 chardef[c] = IS_CONTROL_CHAR; 165 else 166 chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR; 167 } 168 } 169 #endif 170 171 /* 172 * Define the printing format for control chars. 173 */ 174 public void 175 setbinfmt(s) 176 char *s; 177 { 178 if (s == NULL || *s == '\0') 179 s = "*s<%X>"; 180 /* 181 * Select the attributes if it starts with "*". 182 */ 183 if (*s == '*') 184 { 185 switch (s[1]) 186 { 187 case 'd': binattr = AT_BOLD; break; 188 case 'k': binattr = AT_BLINK; break; 189 case 's': binattr = AT_STANDOUT; break; 190 case 'u': binattr = AT_UNDERLINE; break; 191 default: binattr = AT_NORMAL; break; 192 } 193 s += 2; 194 } 195 binfmt = s; 196 } 197 198 /* 199 * Initialize charset data structures. 200 */ 201 public void 202 init_charset() 203 { 204 register char *s; 205 206 s = lgetenv("LESSBINFMT"); 207 setbinfmt(s); 208 209 /* 210 * See if environment variable LESSCHARSET is defined. 211 */ 212 s = lgetenv("LESSCHARSET"); 213 if (icharset(s)) 214 return; 215 /* 216 * LESSCHARSET is not defined: try LESSCHARDEF. 217 */ 218 s = lgetenv("LESSCHARDEF"); 219 if (s != NULL && *s != '\0') 220 { 221 ichardef(s); 222 return; 223 } 224 225 #if HAVE_STRSTR 226 /* 227 * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used. 228 */ 229 if ((s = lgetenv("LC_ALL")) != NULL || 230 (s = lgetenv("LC_CTYPE")) != NULL || 231 (s = lgetenv("LANG")) != NULL) 232 { 233 if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL) 234 if (icharset("utf-8")) 235 return; 236 } 237 #endif 238 239 #if HAVE_LOCALE 240 /* 241 * Use setlocale. 242 */ 243 ilocale(); 244 #else 245 /* 246 * Default to "latin1". 247 */ 248 (void) icharset("latin1"); 249 #endif 250 } 251 252 /* 253 * Is a given character a "binary" character? 254 */ 255 public int 256 binary_char(c) 257 unsigned char c; 258 { 259 c &= 0377; 260 return (chardef[c] & IS_BINARY_CHAR); 261 } 262 263 /* 264 * Is a given character a "control" character? 265 */ 266 public int 267 control_char(c) 268 int c; 269 { 270 c &= 0377; 271 return (chardef[c] & IS_CONTROL_CHAR); 272 } 273 274 /* 275 * Return the printable form of a character. 276 * For example, in the "ascii" charset '\3' is printed as "^C". 277 */ 278 public char * 279 prchar(c) 280 int c; 281 { 282 static char buf[8]; 283 284 c &= 0377; 285 if (!control_char(c)) 286 sprintf(buf, "%c", c); 287 else if (c == ESC) 288 sprintf(buf, "ESC"); 289 else if (c < 128 && !control_char(c ^ 0100)) 290 sprintf(buf, "^%c", c ^ 0100); 291 else 292 sprintf(buf, binfmt, c); 293 return (buf); 294 } 295