1 /* 2 * Copyright (C) 1984-2002 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information about less, or for information on how to 8 * contact the author, see the README file. 9 */ 10 11 12 /* 13 * Functions to define the character set 14 * and do things specific to the character set. 15 */ 16 17 #include "less.h" 18 #if HAVE_LOCALE 19 #include <locale.h> 20 #include <ctype.h> 21 #endif 22 23 public int utf_mode = 0; 24 25 /* 26 * Predefined character sets, 27 * selected by the LESSCHARSET environment variable. 28 */ 29 struct charset { 30 char *name; 31 int *p_flag; 32 char *desc; 33 } charsets[] = { 34 { "ascii", NULL, "8bcccbcc18b95.b" }, 35 { "dos", NULL, "8bcccbcc12bc5b223.b" }, 36 { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, 37 { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" }, 38 { "iso8859", NULL, "8bcccbcc18b95.33b." }, 39 { "koi8-r", NULL, "8bcccbcc18b95.b128." }, 40 { "next", NULL, "8bcccbcc18b95.bb125.bb" }, 41 { "utf-8", &utf_mode, "8bcccbcc18b." }, 42 { NULL, NULL, NULL } 43 }; 44 45 struct cs_alias { 46 char *name; 47 char *oname; 48 } cs_aliases[] = { 49 { "latin1", "iso8859" }, 50 { "latin9", "iso8859" }, 51 { NULL, NULL } 52 }; 53 54 #define IS_BINARY_CHAR 01 55 #define IS_CONTROL_CHAR 02 56 57 static char chardef[256]; 58 static char *binfmt = NULL; 59 public int binattr = AT_STANDOUT; 60 61 62 /* 63 * Define a charset, given a description string. 64 * The string consists of 256 letters, 65 * one for each character in the charset. 66 * If the string is shorter than 256 letters, missing letters 67 * are taken to be identical to the last one. 68 * A decimal number followed by a letter is taken to be a 69 * repetition of the letter. 70 * 71 * Each letter is one of: 72 * . normal character 73 * b binary character 74 * c control character 75 */ 76 static void 77 ichardef(s) 78 char *s; 79 { 80 register char *cp; 81 register int n; 82 register char v; 83 84 n = 0; 85 v = 0; 86 cp = chardef; 87 while (*s != '\0') 88 { 89 switch (*s++) 90 { 91 case '.': 92 v = 0; 93 break; 94 case 'c': 95 v = IS_CONTROL_CHAR; 96 break; 97 case 'b': 98 v = IS_BINARY_CHAR|IS_CONTROL_CHAR; 99 break; 100 101 case '0': case '1': case '2': case '3': case '4': 102 case '5': case '6': case '7': case '8': case '9': 103 n = (10 * n) + (s[-1] - '0'); 104 continue; 105 106 default: 107 error("invalid chardef", NULL_PARG); 108 quit(QUIT_ERROR); 109 /*NOTREACHED*/ 110 } 111 112 do 113 { 114 if (cp >= chardef + sizeof(chardef)) 115 { 116 error("chardef longer than 256", NULL_PARG); 117 quit(QUIT_ERROR); 118 /*NOTREACHED*/ 119 } 120 *cp++ = v; 121 } while (--n > 0); 122 n = 0; 123 } 124 125 while (cp < chardef + sizeof(chardef)) 126 *cp++ = v; 127 } 128 129 /* 130 * Define a charset, given a charset name. 131 * The valid charset names are listed in the "charsets" array. 132 */ 133 static int 134 icharset(name) 135 register char *name; 136 { 137 register struct charset *p; 138 register struct cs_alias *a; 139 140 if (name == NULL || *name == '\0') 141 return (0); 142 143 /* First see if the name is an alias. */ 144 for (a = cs_aliases; a->name != NULL; a++) 145 { 146 if (strcmp(name, a->name) == 0) 147 { 148 name = a->oname; 149 break; 150 } 151 } 152 153 for (p = charsets; p->name != NULL; p++) 154 { 155 if (strcmp(name, p->name) == 0) 156 { 157 ichardef(p->desc); 158 if (p->p_flag != NULL) 159 *(p->p_flag) = 1; 160 return (1); 161 } 162 } 163 164 error("invalid charset name", NULL_PARG); 165 quit(QUIT_ERROR); 166 /*NOTREACHED*/ 167 return (0); 168 } 169 170 #if HAVE_LOCALE 171 /* 172 * Define a charset, given a locale name. 173 */ 174 static void 175 ilocale() 176 { 177 register int c; 178 179 setlocale(LC_ALL, ""); 180 for (c = 0; c < (int) sizeof(chardef); c++) 181 { 182 if (isprint(c)) 183 chardef[c] = 0; 184 else if (iscntrl(c)) 185 chardef[c] = IS_CONTROL_CHAR; 186 else 187 chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR; 188 } 189 } 190 #endif 191 192 /* 193 * Define the printing format for control chars. 194 */ 195 public void 196 setbinfmt(s) 197 char *s; 198 { 199 if (s == NULL || *s == '\0') 200 s = "*s<%X>"; 201 /* 202 * Select the attributes if it starts with "*". 203 */ 204 if (*s == '*') 205 { 206 switch (s[1]) 207 { 208 case 'd': binattr = AT_BOLD; break; 209 case 'k': binattr = AT_BLINK; break; 210 case 's': binattr = AT_STANDOUT; break; 211 case 'u': binattr = AT_UNDERLINE; break; 212 default: binattr = AT_NORMAL; break; 213 } 214 s += 2; 215 } 216 binfmt = s; 217 } 218 219 /* 220 * Initialize charset data structures. 221 */ 222 public void 223 init_charset() 224 { 225 register char *s; 226 227 s = lgetenv("LESSBINFMT"); 228 setbinfmt(s); 229 230 /* 231 * See if environment variable LESSCHARSET is defined. 232 */ 233 s = lgetenv("LESSCHARSET"); 234 if (icharset(s)) 235 return; 236 /* 237 * LESSCHARSET is not defined: try LESSCHARDEF. 238 */ 239 s = lgetenv("LESSCHARDEF"); 240 if (s != NULL && *s != '\0') 241 { 242 ichardef(s); 243 return; 244 } 245 246 #if HAVE_STRSTR 247 /* 248 * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used. 249 */ 250 if ((s = lgetenv("LC_ALL")) != NULL || 251 (s = lgetenv("LC_CTYPE")) != NULL || 252 (s = lgetenv("LANG")) != NULL) 253 { 254 if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL) 255 if (icharset("utf-8")) 256 return; 257 } 258 #endif 259 260 #if HAVE_LOCALE 261 /* 262 * Use setlocale. 263 */ 264 ilocale(); 265 #else 266 #if MSDOS_COMPILER 267 /* 268 * Default to "dos". 269 */ 270 (void) icharset("dos"); 271 #else 272 /* 273 * Default to "latin1". 274 */ 275 (void) icharset("latin1"); 276 #endif 277 #endif 278 } 279 280 /* 281 * Is a given character a "binary" character? 282 */ 283 public int 284 binary_char(c) 285 unsigned char c; 286 { 287 c &= 0377; 288 return (chardef[c] & IS_BINARY_CHAR); 289 } 290 291 /* 292 * Is a given character a "control" character? 293 */ 294 public int 295 control_char(c) 296 int c; 297 { 298 c &= 0377; 299 return (chardef[c] & IS_CONTROL_CHAR); 300 } 301 302 /* 303 * Return the printable form of a character. 304 * For example, in the "ascii" charset '\3' is printed as "^C". 305 */ 306 public char * 307 prchar(c) 308 int c; 309 { 310 static char buf[8]; 311 312 c &= 0377; 313 if (!control_char(c)) 314 sprintf(buf, "%c", c); 315 else if (c == ESC) 316 sprintf(buf, "ESC"); 317 #if IS_EBCDIC_HOST 318 else if (!binary_char(c) && c < 64) 319 sprintf(buf, "^%c", 320 /* 321 * This array roughly inverts CONTROL() #defined in less.h, 322 * and should be kept in sync with CONTROL() and IBM-1047. 323 */ 324 "@ABC.I.?...KLMNO" 325 "PQRS.JH.XY.." 326 "\\]^_" 327 "......W[.....EFG" 328 "..V....D....TU.Z"[c]); 329 #else 330 else if (c < 128 && !control_char(c ^ 0100)) 331 sprintf(buf, "^%c", c ^ 0100); 332 #endif 333 else 334 sprintf(buf, binfmt, c); 335 return (buf); 336 } 337