1 /* $NetBSD: citrus_csmapper.c,v 1.11 2011/11/20 07:43:52 tnozaki Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2003 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/endian.h> 33 #include <sys/types.h> 34 #include <sys/queue.h> 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <limits.h> 39 #include <paths.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 #include "citrus_namespace.h" 45 #include "citrus_types.h" 46 #include "citrus_bcs.h" 47 #include "citrus_region.h" 48 #include "citrus_lock.h" 49 #include "citrus_memstream.h" 50 #include "citrus_mmap.h" 51 #include "citrus_module.h" 52 #include "citrus_hash.h" 53 #include "citrus_mapper.h" 54 #include "citrus_csmapper.h" 55 #include "citrus_pivot_file.h" 56 #include "citrus_db.h" 57 #include "citrus_db_hash.h" 58 #include "citrus_lookup.h" 59 60 static struct _citrus_mapper_area *maparea = NULL; 61 62 static pthread_rwlock_t ma_lock = PTHREAD_RWLOCK_INITIALIZER; 63 64 #define CS_ALIAS _PATH_CSMAPPER "/charset.alias" 65 #define CS_PIVOT _PATH_CSMAPPER "/charset.pivot" 66 67 68 /* ---------------------------------------------------------------------- */ 69 70 static int 71 get32(struct _region *r, uint32_t *rval) 72 { 73 74 if (_region_size(r) != 4) 75 return (EFTYPE); 76 77 memcpy(rval, _region_head(r), (size_t)4); 78 *rval = be32toh(*rval); 79 80 return (0); 81 } 82 83 static int 84 open_subdb(struct _citrus_db **subdb, struct _citrus_db *db, const char *src) 85 { 86 struct _region r; 87 int ret; 88 89 ret = _db_lookup_by_s(db, src, &r, NULL); 90 if (ret) 91 return (ret); 92 ret = _db_open(subdb, &r, _CITRUS_PIVOT_SUB_MAGIC, _db_hash_std, NULL); 93 if (ret) 94 return (ret); 95 96 return (0); 97 } 98 99 100 #define NO_SUCH_FILE EOPNOTSUPP 101 static int 102 find_best_pivot_pvdb(const char *src, const char *dst, char *pivot, 103 size_t pvlen, unsigned long *rnorm) 104 { 105 struct _citrus_db *db1, *db2, *db3; 106 struct _region fr, r1, r2; 107 char buf[LINE_MAX]; 108 uint32_t val32; 109 unsigned long norm; 110 int i, num, ret; 111 112 ret = _map_file(&fr, CS_PIVOT ".pvdb"); 113 if (ret) { 114 if (ret == ENOENT) 115 ret = NO_SUCH_FILE; 116 return (ret); 117 } 118 ret = _db_open(&db1, &fr, _CITRUS_PIVOT_MAGIC, _db_hash_std, NULL); 119 if (ret) 120 goto quit1; 121 ret = open_subdb(&db2, db1, src); 122 if (ret) 123 goto quit2; 124 125 num = _db_get_num_entries(db2); 126 *rnorm = ULONG_MAX; 127 for (i = 0; i < num; i++) { 128 /* iterate each pivot */ 129 ret = _db_get_entry(db2, i, &r1, &r2); 130 if (ret) 131 goto quit3; 132 /* r1:pivot name, r2:norm among src and pivot */ 133 ret = get32(&r2, &val32); 134 if (ret) 135 goto quit3; 136 norm = val32; 137 snprintf(buf, sizeof(buf), "%.*s", 138 (int)_region_size(&r1), (char *)_region_head(&r1)); 139 /* buf: pivot name */ 140 ret = open_subdb(&db3, db1, buf); 141 if (ret) 142 goto quit3; 143 if (_db_lookup_by_s(db3, dst, &r2, NULL) != 0) 144 /* don't break the loop, test all src/dst pairs. */ 145 goto quit4; 146 /* r2: norm among pivot and dst */ 147 ret = get32(&r2, &val32); 148 if (ret) 149 goto quit4; 150 norm += val32; 151 /* judge minimum norm */ 152 if (norm < *rnorm) { 153 *rnorm = norm; 154 strlcpy(pivot, buf, pvlen); 155 } 156 quit4: 157 _db_close(db3); 158 if (ret) 159 goto quit3; 160 } 161 quit3: 162 _db_close(db2); 163 quit2: 164 _db_close(db1); 165 quit1: 166 _unmap_file(&fr); 167 if (ret) 168 return (ret); 169 170 if (*rnorm == ULONG_MAX) 171 return (ENOENT); 172 173 return (0); 174 } 175 176 /* ---------------------------------------------------------------------- */ 177 178 struct zone { 179 const char *begin, *end; 180 }; 181 182 struct parse_arg { 183 char dst[PATH_MAX]; 184 unsigned long norm; 185 }; 186 187 static int 188 parse_line(struct parse_arg *pa, struct _region *r) 189 { 190 struct zone z1, z2; 191 char buf[20]; 192 size_t len; 193 194 len = _region_size(r); 195 z1.begin = _bcs_skip_ws_len(_region_head(r), &len); 196 if (len == 0) 197 return (EFTYPE); 198 z1.end = _bcs_skip_nonws_len(z1.begin, &len); 199 if (len == 0) 200 return (EFTYPE); 201 z2.begin = _bcs_skip_ws_len(z1.end, &len); 202 if (len == 0) 203 return (EFTYPE); 204 z2.end = _bcs_skip_nonws_len(z2.begin, &len); 205 206 /* z1 : dst name, z2 : norm */ 207 snprintf(pa->dst, sizeof(pa->dst), 208 "%.*s", (int)(z1.end-z1.begin), z1.begin); 209 snprintf(buf, sizeof(buf), 210 "%.*s", (int)(z2.end-z2.begin), z2.begin); 211 pa->norm = _bcs_strtoul(buf, NULL, 0); 212 213 return (0); 214 } 215 216 static int 217 find_dst(struct parse_arg *pasrc, const char *dst) 218 { 219 struct _lookup *cl; 220 struct parse_arg padst; 221 struct _region data; 222 int ret; 223 224 ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE); 225 if (ret) 226 return (ret); 227 228 ret = _lookup_seq_lookup(cl, pasrc->dst, &data); 229 while (ret == 0) { 230 ret = parse_line(&padst, &data); 231 if (ret) 232 break; 233 if (strcmp(dst, padst.dst) == 0) { 234 pasrc->norm += padst.norm; 235 break; 236 } 237 ret = _lookup_seq_next(cl, NULL, &data); 238 } 239 _lookup_seq_close(cl); 240 241 return (ret); 242 } 243 244 static int 245 find_best_pivot_lookup(const char *src, const char *dst, char *pivot, 246 size_t pvlen, unsigned long *rnorm) 247 { 248 struct _lookup *cl; 249 struct _region data; 250 struct parse_arg pa; 251 char pivot_min[PATH_MAX]; 252 unsigned long norm_min; 253 int ret; 254 255 ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE); 256 if (ret) 257 return (ret); 258 259 norm_min = ULONG_MAX; 260 261 /* find pivot code */ 262 ret = _lookup_seq_lookup(cl, src, &data); 263 while (ret == 0) { 264 ret = parse_line(&pa, &data); 265 if (ret) 266 break; 267 ret = find_dst(&pa, dst); 268 if (ret) 269 break; 270 if (pa.norm < norm_min) { 271 norm_min = pa.norm; 272 strlcpy(pivot_min, pa.dst, sizeof(pivot_min)); 273 } 274 ret = _lookup_seq_next(cl, NULL, &data); 275 } 276 _lookup_seq_close(cl); 277 278 if (ret != ENOENT) 279 return (ret); 280 if (norm_min == ULONG_MAX) 281 return (ENOENT); 282 strlcpy(pivot, pivot_min, pvlen); 283 if (rnorm) 284 *rnorm = norm_min; 285 286 return (0); 287 } 288 289 static int 290 find_best_pivot(const char *src, const char *dst, char *pivot, size_t pvlen, 291 unsigned long *rnorm) 292 { 293 int ret; 294 295 ret = find_best_pivot_pvdb(src, dst, pivot, pvlen, rnorm); 296 if (ret == NO_SUCH_FILE) 297 ret = find_best_pivot_lookup(src, dst, pivot, pvlen, rnorm); 298 299 return (ret); 300 } 301 302 static __inline int 303 open_serial_mapper(struct _citrus_mapper_area *__restrict ma, 304 struct _citrus_mapper * __restrict * __restrict rcm, 305 const char *src, const char *pivot, const char *dst) 306 { 307 char buf[PATH_MAX]; 308 309 snprintf(buf, sizeof(buf), "%s/%s,%s/%s", src, pivot, pivot, dst); 310 311 return (_mapper_open_direct(ma, rcm, "mapper_serial", buf)); 312 } 313 314 static struct _citrus_csmapper *csm_none = NULL; 315 static int 316 get_none(struct _citrus_mapper_area *__restrict ma, 317 struct _citrus_csmapper *__restrict *__restrict rcsm) 318 { 319 int ret; 320 321 WLOCK(&ma_lock); 322 if (csm_none) { 323 *rcsm = csm_none; 324 ret = 0; 325 goto quit; 326 } 327 328 ret = _mapper_open_direct(ma, &csm_none, "mapper_none", ""); 329 if (ret) 330 goto quit; 331 _mapper_set_persistent(csm_none); 332 333 *rcsm = csm_none; 334 ret = 0; 335 quit: 336 UNLOCK(&ma_lock); 337 return (ret); 338 } 339 340 int 341 _citrus_csmapper_open(struct _citrus_csmapper * __restrict * __restrict rcsm, 342 const char * __restrict src, const char * __restrict dst, uint32_t flags, 343 unsigned long *rnorm) 344 { 345 const char *realsrc, *realdst; 346 char buf1[PATH_MAX], buf2[PATH_MAX], key[PATH_MAX], pivot[PATH_MAX]; 347 unsigned long norm; 348 int ret; 349 350 norm = 0; 351 352 ret = _citrus_mapper_create_area(&maparea, _PATH_CSMAPPER); 353 if (ret) 354 return (ret); 355 356 realsrc = _lookup_alias(CS_ALIAS, src, buf1, sizeof(buf1), 357 _LOOKUP_CASE_IGNORE); 358 realdst = _lookup_alias(CS_ALIAS, dst, buf2, sizeof(buf2), 359 _LOOKUP_CASE_IGNORE); 360 if (!strcmp(realsrc, realdst)) { 361 ret = get_none(maparea, rcsm); 362 if (ret == 0 && rnorm != NULL) 363 *rnorm = 0; 364 return (ret); 365 } 366 367 snprintf(key, sizeof(key), "%s/%s", realsrc, realdst); 368 369 ret = _mapper_open(maparea, rcsm, key); 370 if (ret == 0) { 371 if (rnorm != NULL) 372 *rnorm = 0; 373 return (0); 374 } 375 if (ret != ENOENT || (flags & _CSMAPPER_F_PREVENT_PIVOT)!=0) 376 return (ret); 377 378 ret = find_best_pivot(realsrc, realdst, pivot, sizeof(pivot), &norm); 379 if (ret) 380 return (ret); 381 382 ret = open_serial_mapper(maparea, rcsm, realsrc, pivot, realdst); 383 if (ret == 0 && rnorm != NULL) 384 *rnorm = norm; 385 386 return (ret); 387 } 388