1 /* 2 * Copyright (C) 1984-2000 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information about less, or for information on how to 8 * contact the author, see the README file. 9 */ 10 11 12 /* 13 * Functions to define the character set 14 * and do things specific to the character set. 15 */ 16 17 #include "less.h" 18 #if HAVE_LOCALE 19 #include <locale.h> 20 #include <ctype.h> 21 #endif 22 23 public int utf_mode = 0; 24 25 /* 26 * Predefined character sets, 27 * selected by the LESSCHARSET environment variable. 28 */ 29 struct charset { 30 char *name; 31 int *p_flag; 32 char *desc; 33 } charsets[] = { 34 { "ascii", NULL, "8bcccbcc18b95.b" }, 35 { "dos", NULL, "8bcccbcc12bc5b223.b" }, 36 { "ebcdic", NULL, "5bc6bcc7bcc41b.9b7.9b5.b..8b6.10b6.b9.7b9.8b8.17b3.3b9.7b9.8b8.6b10.b.b.b." }, 37 { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" }, 38 { "iso8859", NULL, "8bcccbcc18b95.33b." }, 39 { "koi8-r", NULL, "8bcccbcc18b95.b128." }, 40 { "next", NULL, "8bcccbcc18b95.bb125.bb" }, 41 { "utf-8", &utf_mode, "8bcccbcc18b." }, 42 { NULL, NULL, NULL } 43 }; 44 45 struct cs_alias { 46 char *name; 47 char *oname; 48 } cs_aliases[] = { 49 { "latin1", "iso8859" }, 50 { "latin9", "iso8859" }, 51 { NULL, NULL } 52 }; 53 54 #define IS_BINARY_CHAR 01 55 #define IS_CONTROL_CHAR 02 56 57 static char chardef[256]; 58 static char *binfmt = NULL; 59 public int binattr = AT_STANDOUT; 60 61 62 /* 63 * Define a charset, given a description string. 64 * The string consists of 256 letters, 65 * one for each character in the charset. 66 * If the string is shorter than 256 letters, missing letters 67 * are taken to be identical to the last one. 68 * A decimal number followed by a letter is taken to be a 69 * repetition of the letter. 70 * 71 * Each letter is one of: 72 * . normal character 73 * b binary character 74 * c control character 75 */ 76 static void 77 ichardef(s) 78 char *s; 79 { 80 register char *cp; 81 register int n; 82 register char v; 83 84 n = 0; 85 v = 0; 86 cp = chardef; 87 while (*s != '\0') 88 { 89 switch (*s++) 90 { 91 case '.': 92 v = 0; 93 break; 94 case 'c': 95 v = IS_CONTROL_CHAR; 96 break; 97 case 'b': 98 v = IS_BINARY_CHAR|IS_CONTROL_CHAR; 99 break; 100 101 case '0': case '1': case '2': case '3': case '4': 102 case '5': case '6': case '7': case '8': case '9': 103 n = (10 * n) + (s[-1] - '0'); 104 continue; 105 106 default: 107 error("invalid chardef", NULL_PARG); 108 quit(QUIT_ERROR); 109 /*NOTREACHED*/ 110 } 111 112 do 113 { 114 if (cp >= chardef + sizeof(chardef)) 115 { 116 error("chardef longer than 256", NULL_PARG); 117 quit(QUIT_ERROR); 118 /*NOTREACHED*/ 119 } 120 *cp++ = v; 121 } while (--n > 0); 122 n = 0; 123 } 124 125 while (cp < chardef + sizeof(chardef)) 126 *cp++ = v; 127 } 128 129 /* 130 * Define a charset, given a charset name. 131 * The valid charset names are listed in the "charsets" array. 132 */ 133 static int 134 icharset(name) 135 register char *name; 136 { 137 register struct charset *p; 138 register struct cs_alias *a; 139 140 if (name == NULL || *name == '\0') 141 return (0); 142 143 /* First see if the name is an alias. */ 144 for (a = cs_aliases; a->name != NULL; a++) 145 { 146 if (strcmp(name, a->name) == 0) 147 { 148 name = a->oname; 149 break; 150 } 151 } 152 153 for (p = charsets; p->name != NULL; p++) 154 { 155 if (strcmp(name, p->name) == 0) 156 { 157 ichardef(p->desc); 158 if (p->p_flag != NULL) 159 *(p->p_flag) = 1; 160 return (1); 161 } 162 } 163 164 error("invalid charset name", NULL_PARG); 165 quit(QUIT_ERROR); 166 /*NOTREACHED*/ 167 } 168 169 #if HAVE_LOCALE 170 /* 171 * Define a charset, given a locale name. 172 */ 173 static void 174 ilocale() 175 { 176 register int c; 177 178 setlocale(LC_ALL, ""); 179 for (c = 0; c < (int) sizeof(chardef); c++) 180 { 181 if (isprint(c)) 182 chardef[c] = 0; 183 else if (iscntrl(c)) 184 chardef[c] = IS_CONTROL_CHAR; 185 else 186 chardef[c] = IS_BINARY_CHAR|IS_CONTROL_CHAR; 187 } 188 } 189 #endif 190 191 /* 192 * Define the printing format for control chars. 193 */ 194 public void 195 setbinfmt(s) 196 char *s; 197 { 198 if (s == NULL || *s == '\0') 199 s = "*s<%X>"; 200 /* 201 * Select the attributes if it starts with "*". 202 */ 203 if (*s == '*') 204 { 205 switch (s[1]) 206 { 207 case 'd': binattr = AT_BOLD; break; 208 case 'k': binattr = AT_BLINK; break; 209 case 's': binattr = AT_STANDOUT; break; 210 case 'u': binattr = AT_UNDERLINE; break; 211 default: binattr = AT_NORMAL; break; 212 } 213 s += 2; 214 } 215 binfmt = s; 216 } 217 218 /* 219 * Initialize charset data structures. 220 */ 221 public void 222 init_charset() 223 { 224 register char *s; 225 226 s = lgetenv("LESSBINFMT"); 227 setbinfmt(s); 228 229 /* 230 * See if environment variable LESSCHARSET is defined. 231 */ 232 s = lgetenv("LESSCHARSET"); 233 if (icharset(s)) 234 return; 235 /* 236 * LESSCHARSET is not defined: try LESSCHARDEF. 237 */ 238 s = lgetenv("LESSCHARDEF"); 239 if (s != NULL && *s != '\0') 240 { 241 ichardef(s); 242 return; 243 } 244 245 #if HAVE_STRSTR 246 /* 247 * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used. 248 */ 249 if ((s = lgetenv("LC_ALL")) != NULL || 250 (s = lgetenv("LC_CTYPE")) != NULL || 251 (s = lgetenv("LANG")) != NULL) 252 { 253 if (strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL) 254 if (icharset("utf-8")) 255 return; 256 } 257 #endif 258 259 #if HAVE_LOCALE 260 /* 261 * Use setlocale. 262 */ 263 ilocale(); 264 #else 265 #if MSDOS_COMPILER 266 /* 267 * Default to "dos". 268 */ 269 (void) icharset("dos"); 270 #else 271 /* 272 * Default to "latin1". 273 */ 274 (void) icharset("latin1"); 275 #endif 276 #endif 277 } 278 279 /* 280 * Is a given character a "binary" character? 281 */ 282 public int 283 binary_char(c) 284 unsigned char c; 285 { 286 c &= 0377; 287 return (chardef[c] & IS_BINARY_CHAR); 288 } 289 290 /* 291 * Is a given character a "control" character? 292 */ 293 public int 294 control_char(c) 295 int c; 296 { 297 c &= 0377; 298 return (chardef[c] & IS_CONTROL_CHAR); 299 } 300 301 /* 302 * Return the printable form of a character. 303 * For example, in the "ascii" charset '\3' is printed as "^C". 304 */ 305 public char * 306 prchar(c) 307 int c; 308 { 309 static char buf[8]; 310 311 c &= 0377; 312 if (!control_char(c)) 313 sprintf(buf, "%c", c); 314 else if (c == ESC) 315 sprintf(buf, "ESC"); 316 #if IS_EBCDIC_HOST 317 else if (!binary_char(c) && c < 64) 318 sprintf(buf, "^%c", 319 /* 320 * This array roughly inverts CONTROL() #defined in less.h, 321 * and should be kept in sync with CONTROL() and IBM-1047. 322 */ 323 "@ABC.I.?...KLMNO" 324 "PQRS.JH.XY.." 325 "\\]^_" 326 "......W[.....EFG" 327 "..V....D....TU.Z"[c]); 328 #else 329 else if (c < 128 && !control_char(c ^ 0100)) 330 sprintf(buf, "^%c", c ^ 0100); 331 #endif 332 else 333 sprintf(buf, binfmt, c); 334 return (buf); 335 } 336