charset.c - OpenGrok cross reference for /freebsd/contrib/less/charset.c

Lines Matching +full:128 +full:- +full:character
2  * Copyright (C) 1984-2025  Mark Nudelman
12  * Functions to define the character set
13  * and do things specific to the character set.
36  * Predefined character sets,
45 		{ "utf-8",              &utf_mode,  "8bcccbcc18b95.b126.bb" },
52 		{ "koi8-r",             NULL,       "8bcccbcc18b95.b." },
53 		{ "KOI8-T",             NULL,       "8bcccbcc18b95.b8.b6.b8.b.b.5b7.3b4.b4.b3.b.b.3b." },
56 		{ "TIS-620",            NULL,       "8bcccbcc18b95.b.4b.11b7.8b." },
59 		{ "windows-1251",       NULL,       "8bcccbcc12bc5b95.b24.b." },
60 		{ "windows-1252",       NULL,       "8bcccbcc12bc5b95.b.b11.b.2b12.b." },
61 		{ "windows-1255",       NULL,       "8bcccbcc12bc5b95.b.b8.b.5b9.b.4b." },
63 		{ "IBM-1047",           NULL,       "4cbcbc3b9cbccbccbb4c6bcc5b3cbbc4bc4bccbc191.b" },
74 	{ "UTF-8",              "utf-8" },
75 	{ "utf8",               "utf-8" },
76 	{ "UTF8",               "utf-8" },
77 	{ "ANSI_X3.4-1968",     "ascii" },
78 	{ "US-ASCII",           "ascii" },
80 	{ "ISO-8859-1",         "iso8859" },
82 	{ "ISO-8859-15",        "iso8859" },
84 	{ "ISO-8859-2",         "iso8859" },
85 	{ "ISO-8859-3",         "latin3" },
87 	{ "ISO-8859-4",         "iso8859" },
89 	{ "ISO-8859-5",         "iso8859" },
90 	{ "ISO-8859-6",         "arabic" },
91 	{ "ISO-8859-7",         "greek" },
93 	{ "ISO-8859-8",         "hebrew" },
95 	{ "ISO-8859-9",         "iso8859" },
97 	{ "ISO-8859-10",        "iso8859" },
99 	{ "ISO-8859-13",        "iso8859" },
101 	{ "ISO-8859-14",        "iso8859" },
103 	{ "ISO-8859-16",        "iso8859" },
105 	{ "EBCDIC-US",          "ebcdic" },
106 	{ "IBM1047",            "IBM-1047" },
107 	{ "KOI8-R",             "koi8-r" },
108 	{ "KOI8-U",             "koi8-r" },
109 	{ "GEORGIAN-PS",        "georgianps" },
110 	{ "TCVN5712-1",         "tcvn" },
112 	{ "windows",            "windows-1252" }, /* backward compatibility */
113 	{ "CP1251",             "windows-1251" },
114 	{ "CP1252",             "windows-1252" },
115 	{ "CP1255",             "windows-1255" },
143 	tbl->table = (struct wchar_range *) arr->data;  in wchar_range_table_set()
144 	tbl->count = (unsigned int) (arr->end / sizeof(struct wchar_range));  in wchar_range_table_set()
158  * Parse a dash-separated range of hex values.
163 	range->first = lstrtoulc(s, &s, 16);  in wchar_range_get()
164 	if (s[0] == '-')  in wchar_range_get()
167 		range->last = lstrtoulc(s, &s, 16);  in wchar_range_get()
170 		range->last = range->first;  in wchar_range_get()
221 				s--;  in ichardef_utf()
224 				/* Ignore unknown character attribute. */  in ichardef_utf()
240  * one for each character in the charset.
247  *      . normal character
248  *      b binary character
249  *      c control character
276 			if (ckd_mul(&n, n, 10) || ckd_add(&n, n, s[-1] - '0'))  in ichardef()
296 		} while (--n > 0);  in ichardef()
317 	for (a = cs_aliases;  a->name != NULL;  a++)  in icharset()
319 		if (strcmp(name, a->name) == 0)  in icharset()
321 			name = a->oname;  in icharset()
326 	for (p = charsets;  p->name != NULL;  p++)  in icharset()
328 		if (strcmp(name, p->name) == 0)  in icharset()
330 			ichardef(p->desc);  in icharset()
331 			if (p->p_flag != NULL)  in icharset()
334 				*(p->p_flag) = 1 + (GetConsoleOutputCP() != CP_UTF8);  in icharset()
336 				*(p->p_flag) = 1;  in icharset()
440 	 * Check whether LC_ALL, LC_CTYPE or LANG look like UTF-8 is used.  in set_charset()
446 		if (   strstr(s, "UTF-8") != NULL || strstr(s, "utf-8") != NULL  in set_charset()
448 			if (icharset("utf-8", 1))  in set_charset()
455 	 * Get character definitions from locale functions,  in set_charset()
462 	(void) icharset("utf-8", 1);  in set_charset()
467 	(void) icharset("utf-8", 1);  in set_charset()
493  * Is a given character a "binary" character?
505  * Is a given character a "control" character?
515  * Return the printable form of a character.
523 	c &= 0377; /*{{type-issue}}*/  in prchar()
524 	if ((c < 128 || !utf_mode) && !control_char(c))  in prchar()
533 		 * and should be kept in sync with CONTROL() and IBM-1047.  in prchar()
541 	else if (c < 128 && !control_char(c ^ 0100))  in prchar()
550  * Return the printable form of a UTF-8 character.
558 	else if (ch < 128 && control_char(ch))  in prutfchar()
571 			ch = 0xFFFD; /* REPLACEMENT CHARACTER */  in prutfchar()
579  * Get the length of a UTF-8 character in bytes.
597 	/* Invalid UTF-8 encoding. */  in utf_len()
602  * Does the parameter point to the lead byte of a well-formed UTF-8 character?
624 		unsigned char mask = (unsigned char) (~((1 << (8-len)) - 1));  in is_utf8_well_formed()
636  * Skip bytes until a UTF-8 lead byte (11xxxxxx) or ASCII byte (0xxxxxxx) is found.
647  * Get the value of a UTF-8 character.
700  * Store a character into a UTF-8 string.
749  * Step forward or backward one character in a string.
763 			ch = (LWCHAR) (unsigned char) ((p > limit) ? *--p : 0);  in step_charc()
782 		while (p > limit && IS_UTF8_TRAIL(p[-1]))  in step_charc()
783 			p--;  in step_charc()
788 			len = utf_len(*--p);  in step_charc()
791 				p = *pp - 1;  in step_charc()
853 	if (table->table == NULL || table->count == 0 || ch < table->table[0].first)  in is_in_table()
856 	hi = table->count - 1;  in is_in_table()
860 		if (ch > table->table[mid].last)  in is_in_table()
862 		else if (ch < table->table[mid].first)  in is_in_table()
863 			hi = mid - 1;  in is_in_table()
871  * Is a character in none of a set of specified user tables?
882  * Is a character a UTF-8 composing character?
883  * If a composing character follows any char, the two combine into one glyph.
895  * Should this UTF-8 character be treated as binary?
907  * Is this a double width UTF-8 character?
917  * Is this an omittable character?
927  * Is a character a UTF-8 combining character?