Lines Matching +full:128 +full:- +full:character
2 * Copyright (C) 1984-2025 Mark Nudelman
12 * Functions to define the character set
13 * and do things specific to the character set.
36 * Predefined character sets,
45 { "utf-8", &utf_mode, "8bcccbcc18b95.b126.bb" },
52 { "koi8-r", NULL, "8bcccbcc18b95.b." },
53 { "KOI8-T", NULL, "8bcccbcc18b95.b8.b6.b8.b.b.5b7.3b4.b4.b3.b.b.3b." },
56 { "TIS-620", NULL, "8bcccbcc18b95.b.4b.11b7.8b." },
59 { "windows-1251", NULL, "8bcccbcc12bc5b95.b24.b." },
60 { "windows-1252", NULL, "8bcccbcc12bc5b95.b.b11.b.2b12.b." },
61 { "windows-1255", NULL, "8bcccbcc12bc5b95.b.b8.b.5b9.b.4b." },
63 { "IBM-1047", NULL, "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" },
74 { "UTF-8", "utf-8" },
75 { "utf8", "utf-8" },
76 { "UTF8", "utf-8" },
77 { "ANSI_X3.4-1968", "ascii" },
78 { "US-ASCII", "ascii" },
80 { "ISO-8859-1", "iso8859" },
82 { "ISO-8859-15", "iso8859" },
84 { "ISO-8859-2", "iso8859" },
85 { "ISO-8859-3", "latin3" },
87 { "ISO-8859-4", "iso8859" },
89 { "ISO-8859-5", "iso8859" },
90 { "ISO-8859-6", "arabic" },
91 { "ISO-8859-7", "greek" },
93 { "ISO-8859-8", "hebrew" },
95 { "ISO-8859-9", "iso8859" },
97 { "ISO-8859-10", "iso8859" },
99 { "ISO-8859-13", "iso8859" },
101 { "ISO-8859-14", "iso8859" },
103 { "ISO-8859-16", "iso8859" },
105 { "EBCDIC-US", "ebcdic" },
106 { "IBM1047", "IBM-1047" },
107 { "KOI8-R", "koi8-r" },
108 { "KOI8-U", "koi8-r" },
109 { "GEORGIAN-PS", "georgianps" },
110 { "TCVN5712-1", "tcvn" },
112 { "windows", "windows-1252" }, /* backward compatibility */
113 { "CP1251", "windows-1251" },
114 { "CP1252", "windows-1252" },
115 { "CP1255", "windows-1255" },
143 tbl->table = (struct wchar_range *) arr->data; in wchar_range_table_set()
144 tbl->count = (unsigned int) (arr->end / sizeof(struct wchar_range)); in wchar_range_table_set()
158 * Parse a dash-separated range of hex values.
163 range->first = lstrtoulc(s, &s, 16); in wchar_range_get()
164 if (s[0] == '-') in wchar_range_get()
167 range->last = lstrtoulc(s, &s, 16); in wchar_range_get()
170 range->last = range->first; in wchar_range_get()
221 s--; in ichardef_utf()
224 /* Ignore unknown character attribute. */ in ichardef_utf()
240 * one for each character in the charset.
247 * . normal character
248 * b binary character
249 * c control character
276 if (ckd_mul(&n, n, 10) || ckd_add(&n, n, s[-1] - '0')) in ichardef()
296 } while (--n > 0); in ichardef()
317 for (a = cs_aliases; a->name != NULL; a++) in icharset()
319 if (strcmp(name, a->name) == 0) in icharset()
321 name = a->oname; in icharset()
326 for (p = charsets; p->name != NULL; p++) in icharset()
328 if (strcmp(name, p->name) == 0) in icharset()
330 ichardef(p->desc); in icharset()
331 if (p->p_flag != NULL) in icharset()
334 *(p->p_flag) = 1 + (GetConsoleOutputCP() != CP_UTF8); in icharset()
336 *(p->p_flag) = 1; in icharset()
440 * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used. in set_charset()
446 if ( strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL in set_charset()
448 if (icharset("utf-8", 1)) in set_charset()
455 * Get character definitions from locale functions, in set_charset()
462 (void) icharset("utf-8", 1); in set_charset()
467 (void) icharset("utf-8", 1); in set_charset()
493 * Is a given character a "binary" character?
505 * Is a given character a "control" character?
515 * Return the printable form of a character.
523 c &= 0377; /*{{type-issue}}*/ in prchar()
524 if ((c < 128 || !utf_mode) && !control_char(c)) in prchar()
533 * and should be kept in sync with CONTROL() and IBM-1047. in prchar()
541 else if (c < 128 && !control_char(c ^ 0100)) in prchar()
550 * Return the printable form of a UTF-8 character.
558 else if (ch < 128 && control_char(ch)) in prutfchar()
571 ch = 0xFFFD; /* REPLACEMENT CHARACTER */ in prutfchar()
579 * Get the length of a UTF-8 character in bytes.
597 /* Invalid UTF-8 encoding. */ in utf_len()
602 * Does the parameter point to the lead byte of a well-formed UTF-8 character?
624 unsigned char mask = (unsigned char) (~((1 << (8-len)) - 1)); in is_utf8_well_formed()
636 * Skip bytes until a UTF-8 lead byte (11xxxxxx) or ASCII byte (0xxxxxxx) is found.
647 * Get the value of a UTF-8 character.
700 * Store a character into a UTF-8 string.
749 * Step forward or backward one character in a string.
763 ch = (LWCHAR) (unsigned char) ((p > limit) ? *--p : 0); in step_charc()
782 while (p > limit && IS_UTF8_TRAIL(p[-1])) in step_charc()
783 p--; in step_charc()
788 len = utf_len(*--p); in step_charc()
791 p = *pp - 1; in step_charc()
853 if (table->table == NULL || table->count == 0 || ch < table->table[0].first) in is_in_table()
856 hi = table->count - 1; in is_in_table()
860 if (ch > table->table[mid].last) in is_in_table()
862 else if (ch < table->table[mid].first) in is_in_table()
863 hi = mid - 1; in is_in_table()
871 * Is a character in none of a set of specified user tables?
882 * Is a character a UTF-8 composing character?
883 * If a composing character follows any char, the two combine into one glyph.
895 * Should this UTF-8 character be treated as binary?
907 * Is this a double width UTF-8 character?
917 * Is this an omittable character?
927 * Is a character a UTF-8 combining character?