1 /* 2 * Copyright (C) 1984-2011 Mark Nudelman 3 * 4 * You may distribute under the terms of either the GNU General Public 5 * License or the Less License, as specified in the README file. 6 * 7 * For more information about less, or for information on how to 8 * contact the author, see the README file. 9 */ 10 11 /* 12 * Routines to convert text in various ways. Used by search. 13 */ 14 15 #include "less.h" 16 #include "charset.h" 17 18 extern int utf_mode; 19 20 /* 21 * Get the length of a buffer needed to convert a string. 22 */ 23 public int 24 cvt_length(len, ops) 25 int len; 26 int ops; 27 { 28 if (utf_mode) 29 /* 30 * Just copying a string in UTF-8 mode can cause it to grow 31 * in length. 32 * Four output bytes for one input byte is the worst case. 33 */ 34 len *= 4; 35 return (len + 1); 36 } 37 38 /* 39 * Allocate a chpos array for use by cvt_text. 40 */ 41 public int * 42 cvt_alloc_chpos(len) 43 int len; 44 { 45 int i; 46 int *chpos = (int *) ecalloc(sizeof(int), len); 47 /* Initialize all entries to an invalid position. */ 48 for (i = 0; i < len; i++) 49 chpos[i] = -1; 50 return (chpos); 51 } 52 53 /* 54 * Convert text. Perform the transformations specified by ops. 55 * Returns converted text in odst. The original offset of each 56 * odst character (when it was in osrc) is returned in the chpos array. 57 */ 58 public void 59 cvt_text(odst, osrc, chpos, lenp, ops) 60 char *odst; 61 char *osrc; 62 int *chpos; 63 int *lenp; 64 int ops; 65 { 66 char *dst; 67 char *src; 68 register char *src_end; 69 LWCHAR ch; 70 71 if (lenp != NULL) 72 src_end = osrc + *lenp; 73 else 74 src_end = osrc + strlen(osrc); 75 76 for (src = osrc, dst = odst; src < src_end; ) 77 { 78 int src_pos = src - osrc; 79 int dst_pos = dst - odst; 80 ch = step_char(&src, +1, src_end); 81 if ((ops & CVT_BS) && ch == '\b' && dst > odst) 82 { 83 /* Delete backspace and preceding char. */ 84 do { 85 dst--; 86 } while (dst > odst && 87 !IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst)); 88 } else if ((ops & CVT_ANSI) && IS_CSI_START(ch)) 89 { 90 /* Skip to end of ANSI escape sequence. */ 91 src++; /* skip the CSI start char */ 92 while (src < src_end) 93 if (!is_ansi_middle(*src++)) 94 break; 95 } else 96 { 97 /* Just copy the char to the destination buffer. */ 98 if ((ops & CVT_TO_LC) && IS_UPPER(ch)) 99 ch = TO_LOWER(ch); 100 put_wchar(&dst, ch); 101 /* 102 * Record the original position of the char. 103 * But if we've already recorded a position 104 * for this char (due to a backspace), leave 105 * it alone; if multiple source chars map to 106 * one destination char, we want the position 107 * of the first one. 108 */ 109 if (chpos != NULL && chpos[dst_pos] < 0) 110 chpos[dst_pos] = src_pos; 111 } 112 } 113 if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r') 114 dst--; 115 *dst = '\0'; 116 if (lenp != NULL) 117 *lenp = dst - odst; 118 if (chpos != NULL) 119 chpos[dst - odst] = src - osrc; 120 } 121