1 /* $FreeBSD$ */ 2 /* $NetBSD: msdosfs_conv.c,v 1.25 1997/11/17 15:36:40 ws Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-4-Clause 6 * 7 * Copyright (C) 1995, 1997 Wolfgang Solfrank. 8 * Copyright (C) 1995, 1997 TooLs GmbH. 9 * All rights reserved. 10 * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by TooLs GmbH. 23 * 4. The name of TooLs GmbH may not be used to endorse or promote products 24 * derived from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR 27 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 28 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 29 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 31 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 32 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 33 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 34 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 35 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 */ 37 /*- 38 * Written by Paul Popelka (paulp@uts.amdahl.com) 39 * 40 * You can do anything you want with this software, just don't say you wrote 41 * it, and don't remove this notice. 42 * 43 * This software is provided "as is". 44 * 45 * The author supplies this software to be publicly redistributed on the 46 * understanding that the author is not responsible for the correct 47 * functioning of this software in any circumstances and is not liable for 48 * any damages caused by this software. 49 * 50 * October 1992 51 */ 52 53 #include <sys/param.h> 54 #include <sys/systm.h> 55 #include <sys/dirent.h> 56 #include <sys/iconv.h> 57 #include <sys/mount.h> 58 59 #include <fs/msdosfs/bpb.h> 60 #include <fs/msdosfs/direntry.h> 61 #include <fs/msdosfs/msdosfsmount.h> 62 63 extern struct iconv_functions *msdosfs_iconv; 64 65 static int mbsadjpos(const char **, size_t, size_t, int, int, void *handle); 66 static u_char * dos2unixchr(u_char *, const u_char **, size_t *, int, struct msdosfsmount *); 67 static uint16_t unix2doschr(const u_char **, size_t *, struct msdosfsmount *); 68 static u_char * win2unixchr(u_char *, uint16_t, struct msdosfsmount *); 69 static uint16_t unix2winchr(const u_char **, size_t *, int, struct msdosfsmount *); 70 71 /* 72 * 0 - character disallowed in long file name. 73 * 1 - character should be replaced by '_' in DOS file name, 74 * and generation number inserted. 75 * 2 - character ('.' and ' ') should be skipped in DOS file name, 76 * and generation number inserted. 77 */ 78 static const u_char 79 unix2dos[256] = { 80 /* iso8859-1 -> cp850 */ 81 0, 0, 0, 0, 0, 0, 0, 0, /* 00-07 */ 82 0, 0, 0, 0, 0, 0, 0, 0, /* 08-0f */ 83 0, 0, 0, 0, 0, 0, 0, 0, /* 10-17 */ 84 0, 0, 0, 0, 0, 0, 0, 0, /* 18-1f */ 85 2, 0x21, 0, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ 86 0x28, 0x29, 0, 1, 1, 0x2d, 2, 0, /* 28-2f */ 87 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ 88 0x38, 0x39, 0, 1, 0, 1, 0, 0, /* 38-3f */ 89 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */ 90 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */ 91 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */ 92 0x58, 0x59, 0x5a, 1, 0, 1, 0x5e, 0x5f, /* 58-5f */ 93 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 60-67 */ 94 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 68-6f */ 95 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 70-77 */ 96 0x58, 0x59, 0x5a, 0x7b, 0, 0x7d, 0x7e, 0, /* 78-7f */ 97 0, 0, 0, 0, 0, 0, 0, 0, /* 80-87 */ 98 0, 0, 0, 0, 0, 0, 0, 0, /* 88-8f */ 99 0, 0, 0, 0, 0, 0, 0, 0, /* 90-97 */ 100 0, 0, 0, 0, 0, 0, 0, 0, /* 98-9f */ 101 0, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, /* a0-a7 */ 102 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, /* a8-af */ 103 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, /* b0-b7 */ 104 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, /* b8-bf */ 105 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* c0-c7 */ 106 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* c8-cf */ 107 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, /* d0-d7 */ 108 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, /* d8-df */ 109 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* e0-e7 */ 110 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* e8-ef */ 111 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0xf6, /* f0-f7 */ 112 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0x98, /* f8-ff */ 113 }; 114 115 static const u_char 116 dos2unix[256] = { 117 /* cp850 -> iso8859-1 */ 118 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 00-07 */ 119 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 08-0f */ 120 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 10-17 */ 121 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 18-1f */ 122 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ 123 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ 124 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ 125 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ 126 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */ 127 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */ 128 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */ 129 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ 130 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ 131 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ 132 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ 133 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ 134 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, /* 80-87 */ 135 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, /* 88-8f */ 136 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, /* 90-97 */ 137 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x3f, /* 98-9f */ 138 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, /* a0-a7 */ 139 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, /* a8-af */ 140 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xc1, 0xc2, 0xc0, /* b0-b7 */ 141 0xa9, 0x3f, 0x3f, 0x3f, 0x3f, 0xa2, 0xa5, 0x3f, /* b8-bf */ 142 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xe3, 0xc3, /* c0-c7 */ 143 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xa4, /* c8-cf */ 144 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x3f, 0xcd, 0xce, /* d0-d7 */ 145 0xcf, 0x3f, 0x3f, 0x3f, 0x3f, 0xa6, 0xcc, 0x3f, /* d8-df */ 146 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, /* e0-e7 */ 147 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0x3f, /* e8-ef */ 148 0xad, 0xb1, 0x3f, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, /* f0-f7 */ 149 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x3f, 0x3f, /* f8-ff */ 150 }; 151 152 static const u_char 153 u2l[256] = { 154 /* tolower */ 155 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */ 156 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */ 157 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */ 158 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */ 159 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ 160 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ 161 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ 162 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ 163 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */ 164 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */ 165 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */ 166 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ 167 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ 168 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ 169 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ 170 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ 171 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */ 172 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */ 173 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */ 174 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */ 175 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */ 176 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */ 177 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */ 178 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */ 179 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */ 180 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */ 181 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */ 182 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */ 183 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */ 184 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */ 185 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */ 186 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ 187 }; 188 189 static const u_char 190 l2u[256] = { 191 /* toupper */ 192 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */ 193 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */ 194 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */ 195 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */ 196 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ 197 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ 198 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ 199 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ 200 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */ 201 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */ 202 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */ 203 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ 204 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ 205 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ 206 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ 207 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ 208 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */ 209 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */ 210 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */ 211 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */ 212 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */ 213 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */ 214 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */ 215 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */ 216 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */ 217 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */ 218 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */ 219 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */ 220 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */ 221 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */ 222 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */ 223 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ 224 }; 225 226 /* 227 * DOS filenames are made of 2 parts, the name part and the extension part. 228 * The name part is 8 characters long and the extension part is 3 229 * characters long. They may contain trailing blanks if the name or 230 * extension are not long enough to fill their respective fields. 231 */ 232 233 /* 234 * Convert a DOS filename to a unix filename. And, return the number of 235 * characters in the resulting unix filename excluding the terminating 236 * null. 237 */ 238 int 239 dos2unixfn(u_char dn[11], u_char *un, int lower, struct msdosfsmount *pmp) 240 { 241 size_t i; 242 int thislong = 0; 243 u_char *c, tmpbuf[5]; 244 245 /* 246 * If first char of the filename is SLOT_E5 (0x05), then the real 247 * first char of the filename should be 0xe5. But, they couldn't 248 * just have a 0xe5 mean 0xe5 because that is used to mean a freed 249 * directory slot. Another dos quirk. 250 */ 251 if (*dn == SLOT_E5) 252 *dn = 0xe5; 253 254 /* 255 * Copy the name portion into the unix filename string. 256 */ 257 for (i = 8; i > 0 && *dn != ' ';) { 258 c = dos2unixchr(tmpbuf, __DECONST(const u_char **, &dn), &i, 259 lower & LCASE_BASE, pmp); 260 while (*c != '\0') { 261 *un++ = *c++; 262 thislong++; 263 } 264 } 265 dn += i; 266 267 /* 268 * Now, if there is an extension then put in a period and copy in 269 * the extension. 270 */ 271 if (*dn != ' ') { 272 *un++ = '.'; 273 thislong++; 274 for (i = 3; i > 0 && *dn != ' ';) { 275 c = dos2unixchr(tmpbuf, __DECONST(const u_char **, &dn), 276 &i, lower & LCASE_EXT, pmp); 277 while (*c != '\0') { 278 *un++ = *c++; 279 thislong++; 280 } 281 } 282 } 283 *un++ = 0; 284 285 return (thislong); 286 } 287 288 /* 289 * Convert a unix filename to a DOS filename according to Win95 rules. 290 * If applicable and gen is not 0, it is inserted into the converted 291 * filename as a generation number. 292 * Returns 293 * 0 if name couldn't be converted 294 * 1 if the converted name is the same as the original 295 * (no long filename entry necessary for Win95) 296 * 2 if conversion was successful 297 * 3 if conversion was successful and generation number was inserted 298 */ 299 int 300 unix2dosfn(const u_char *un, u_char dn[12], size_t unlen, u_int gen, 301 struct msdosfsmount *pmp) 302 { 303 ssize_t i, j; 304 int l; 305 int conv = 1; 306 const u_char *cp, *dp, *dp1; 307 u_char gentext[6], *wcp; 308 uint16_t c; 309 310 /* 311 * Fill the dos filename string with blanks. These are DOS's pad 312 * characters. 313 */ 314 for (i = 0; i < 11; i++) 315 dn[i] = ' '; 316 dn[11] = 0; 317 318 /* 319 * The filenames "." and ".." are handled specially, since they 320 * don't follow dos filename rules. 321 */ 322 if (un[0] == '.' && unlen == 1) { 323 dn[0] = '.'; 324 return gen <= 1; 325 } 326 if (un[0] == '.' && un[1] == '.' && unlen == 2) { 327 dn[0] = '.'; 328 dn[1] = '.'; 329 return gen <= 1; 330 } 331 332 /* 333 * Filenames with only blanks and dots are not allowed! 334 */ 335 for (cp = un, i = unlen; --i >= 0; cp++) 336 if (*cp != ' ' && *cp != '.') 337 break; 338 if (i < 0) 339 return 0; 340 341 342 /* 343 * Filenames with some characters are not allowed! 344 */ 345 for (cp = un, i = unlen; i > 0;) 346 if (unix2doschr(&cp, (size_t *)&i, pmp) == 0) 347 return 0; 348 349 /* 350 * Now find the extension 351 * Note: dot as first char doesn't start extension 352 * and trailing dots and blanks are ignored 353 * Note(2003/7): It seems recent Windows has 354 * defferent rule than this code, that Windows 355 * ignores all dots before extension, and use all 356 * chars as filename except for dots. 357 */ 358 dp = dp1 = NULL; 359 for (cp = un + 1, i = unlen - 1; --i >= 0;) { 360 switch (*cp++) { 361 case '.': 362 if (!dp1) 363 dp1 = cp; 364 break; 365 case ' ': 366 break; 367 default: 368 if (dp1) 369 dp = dp1; 370 dp1 = NULL; 371 break; 372 } 373 } 374 375 /* 376 * Now convert it (this part is for extension). 377 * As Windows XP do, if it's not ascii char, 378 * this function should return 2 or 3, so that checkng out Unicode name. 379 */ 380 if (dp) { 381 if (dp1) 382 l = dp1 - dp; 383 else 384 l = unlen - (dp - un); 385 for (cp = dp, i = l, j = 8; i > 0 && j < 11; j++) { 386 c = unix2doschr(&cp, (size_t *)&i, pmp); 387 if (c & 0xff00) { 388 dn[j] = c >> 8; 389 if (++j < 11) { 390 dn[j] = c; 391 if (conv != 3) 392 conv = 2; 393 continue; 394 } else { 395 conv = 3; 396 dn[j-1] = ' '; 397 break; 398 } 399 } else { 400 dn[j] = c; 401 } 402 if (((dn[j] & 0x80) || *(cp - 1) != dn[j]) && conv != 3) 403 conv = 2; 404 if (dn[j] == 1) { 405 conv = 3; 406 dn[j] = '_'; 407 } 408 if (dn[j] == 2) { 409 conv = 3; 410 dn[j--] = ' '; 411 } 412 } 413 if (i > 0) 414 conv = 3; 415 dp--; 416 } else { 417 for (dp = cp; *--dp == ' ' || *dp == '.';); 418 dp++; 419 } 420 421 /* 422 * Now convert the rest of the name 423 */ 424 for (i = dp - un, j = 0; un < dp && j < 8; j++) { 425 c = unix2doschr(&un, &i, pmp); 426 if (c & 0xff00) { 427 dn[j] = c >> 8; 428 if (++j < 8) { 429 dn[j] = c; 430 if (conv != 3) 431 conv = 2; 432 continue; 433 } else { 434 conv = 3; 435 dn[j-1] = ' '; 436 break; 437 } 438 } else { 439 dn[j] = c; 440 } 441 if (((dn[j] & 0x80) || *(un - 1) != dn[j]) && conv != 3) 442 conv = 2; 443 if (dn[j] == 1) { 444 conv = 3; 445 dn[j] = '_'; 446 } 447 if (dn[j] == 2) { 448 conv = 3; 449 dn[j--] = ' '; 450 } 451 } 452 if (un < dp) 453 conv = 3; 454 /* 455 * If we didn't have any chars in filename, 456 * generate a default 457 */ 458 if (!j) 459 dn[0] = '_'; 460 461 /* 462 * If there wasn't any char dropped, 463 * there is no place for generation numbers 464 */ 465 if (conv != 3) { 466 if (gen > 1) 467 conv = 0; 468 goto done; 469 } 470 471 /* 472 * Now insert the generation number into the filename part 473 */ 474 if (gen == 0) 475 goto done; 476 for (wcp = gentext + sizeof(gentext); wcp > gentext && gen; gen /= 10) 477 *--wcp = gen % 10 + '0'; 478 if (gen) { 479 conv = 0; 480 goto done; 481 } 482 for (i = 8; dn[--i] == ' ';); 483 i++; 484 if (gentext + sizeof(gentext) - wcp + 1 > 8 - i) 485 i = 8 - (gentext + sizeof(gentext) - wcp + 1); 486 /* 487 * Correct posision to where insert the generation number 488 */ 489 cp = dn; 490 i -= mbsadjpos((const char**)&cp, i, unlen, 1, pmp->pm_flags, pmp->pm_d2u); 491 492 dn[i++] = '~'; 493 while (wcp < gentext + sizeof(gentext)) 494 dn[i++] = *wcp++; 495 496 /* 497 * Tail of the filename should be space 498 */ 499 while (i < 8) 500 dn[i++] = ' '; 501 conv = 3; 502 503 done: 504 /* 505 * The first character cannot be E5, 506 * because that means a deleted entry 507 */ 508 if (dn[0] == 0xe5) 509 dn[0] = SLOT_E5; 510 511 return conv; 512 } 513 514 /* 515 * Create a Win95 long name directory entry 516 * Note: assumes that the filename is valid, 517 * i.e. doesn't consist solely of blanks and dots 518 */ 519 int 520 unix2winfn(const u_char *un, size_t unlen, struct winentry *wep, int cnt, 521 int chksum, struct msdosfsmount *pmp) 522 { 523 uint8_t *wcp; 524 int i, end; 525 uint16_t code; 526 527 /* 528 * Drop trailing blanks and dots 529 */ 530 unlen = winLenFixup(un, unlen); 531 532 /* 533 * Cut *un for this slot 534 */ 535 unlen = mbsadjpos((const char **)&un, unlen, (cnt - 1) * WIN_CHARS, 2, 536 pmp->pm_flags, pmp->pm_u2w); 537 538 /* 539 * Initialize winentry to some useful default 540 */ 541 memset(wep, 0xff, sizeof(*wep)); 542 wep->weCnt = cnt; 543 wep->weAttributes = ATTR_WIN95; 544 wep->weReserved1 = 0; 545 wep->weChksum = chksum; 546 wep->weReserved2 = 0; 547 548 /* 549 * Now convert the filename parts 550 */ 551 end = 0; 552 for (wcp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0 && !end;) { 553 code = unix2winchr(&un, &unlen, 0, pmp); 554 *wcp++ = code; 555 *wcp++ = code >> 8; 556 if (!code) 557 end = WIN_LAST; 558 } 559 for (wcp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0 && !end;) { 560 code = unix2winchr(&un, &unlen, 0, pmp); 561 *wcp++ = code; 562 *wcp++ = code >> 8; 563 if (!code) 564 end = WIN_LAST; 565 } 566 for (wcp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0 && !end;) { 567 code = unix2winchr(&un, &unlen, 0, pmp); 568 *wcp++ = code; 569 *wcp++ = code >> 8; 570 if (!code) 571 end = WIN_LAST; 572 } 573 if (*un == '\0') 574 end = WIN_LAST; 575 wep->weCnt |= end; 576 return !end; 577 } 578 579 /* 580 * Compare our filename to the one in the Win95 entry 581 * Returns the checksum or -1 if no match 582 */ 583 int 584 winChkName(struct mbnambuf *nbp, const u_char *un, size_t unlen, int chksum, 585 struct msdosfsmount *pmp) 586 { 587 size_t len; 588 uint16_t c1, c2; 589 u_char *np; 590 struct dirent dirbuf; 591 592 /* 593 * We already have winentry in *nbp. 594 */ 595 if (!mbnambuf_flush(nbp, &dirbuf) || dirbuf.d_namlen == 0) 596 return -1; 597 598 #ifdef MSDOSFS_DEBUG 599 printf("winChkName(): un=%s:%zu,d_name=%s:%d\n", un, unlen, 600 dirbuf.d_name, 601 dirbuf.d_namlen); 602 #endif 603 604 /* 605 * Compare the name parts 606 */ 607 len = dirbuf.d_namlen; 608 if (unlen != len) 609 return -2; 610 611 for (np = dirbuf.d_name; unlen > 0 && len > 0;) { 612 /* 613 * Comparison must be case insensitive, because FAT disallows 614 * to look up or create files in case sensitive even when 615 * it's a long file name. 616 */ 617 c1 = unix2winchr(__DECONST(const u_char **, &np), &len, 618 LCASE_BASE, pmp); 619 c2 = unix2winchr(&un, &unlen, LCASE_BASE, pmp); 620 if (c1 != c2) 621 return -2; 622 } 623 return chksum; 624 } 625 626 /* 627 * Convert Win95 filename to dirbuf. 628 * Returns the checksum or -1 if impossible 629 */ 630 int 631 win2unixfn(struct mbnambuf *nbp, struct winentry *wep, int chksum, 632 struct msdosfsmount *pmp) 633 { 634 u_char *c, tmpbuf[5]; 635 uint8_t *cp; 636 uint8_t *np, name[WIN_CHARS * 3 + 1]; 637 uint16_t code; 638 int i; 639 640 if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS) 641 || !(wep->weCnt&WIN_CNT)) 642 return -1; 643 644 /* 645 * First compare checksums 646 */ 647 if (wep->weCnt&WIN_LAST) { 648 chksum = wep->weChksum; 649 } else if (chksum != wep->weChksum) 650 chksum = -1; 651 if (chksum == -1) 652 return -1; 653 654 /* 655 * Convert the name parts 656 */ 657 np = name; 658 for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) { 659 code = (cp[1] << 8) | cp[0]; 660 switch (code) { 661 case 0: 662 *np = '\0'; 663 if (mbnambuf_write(nbp, name, 664 (wep->weCnt & WIN_CNT) - 1) != 0) 665 return -1; 666 return chksum; 667 case '/': 668 *np = '\0'; 669 return -1; 670 default: 671 c = win2unixchr(tmpbuf, code, pmp); 672 while (*c != '\0') 673 *np++ = *c++; 674 break; 675 } 676 cp += 2; 677 } 678 for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) { 679 code = (cp[1] << 8) | cp[0]; 680 switch (code) { 681 case 0: 682 *np = '\0'; 683 if (mbnambuf_write(nbp, name, 684 (wep->weCnt & WIN_CNT) - 1) != 0) 685 return -1; 686 return chksum; 687 case '/': 688 *np = '\0'; 689 return -1; 690 default: 691 c = win2unixchr(tmpbuf, code, pmp); 692 while (*c != '\0') 693 *np++ = *c++; 694 break; 695 } 696 cp += 2; 697 } 698 for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) { 699 code = (cp[1] << 8) | cp[0]; 700 switch (code) { 701 case 0: 702 *np = '\0'; 703 if (mbnambuf_write(nbp, name, 704 (wep->weCnt & WIN_CNT) - 1) != 0) 705 return -1; 706 return chksum; 707 case '/': 708 *np = '\0'; 709 return -1; 710 default: 711 c = win2unixchr(tmpbuf, code, pmp); 712 while (*c != '\0') 713 *np++ = *c++; 714 break; 715 } 716 cp += 2; 717 } 718 *np = '\0'; 719 if (mbnambuf_write(nbp, name, (wep->weCnt & WIN_CNT) - 1) != 0) 720 return -1; 721 return chksum; 722 } 723 724 /* 725 * Compute the unrolled checksum of a DOS filename for Win95 LFN use. 726 */ 727 uint8_t 728 winChksum(uint8_t *name) 729 { 730 int i; 731 uint8_t s; 732 733 for (s = 0, i = 11; --i >= 0; s += *name++) 734 s = (s << 7)|(s >> 1); 735 return (s); 736 } 737 738 /* 739 * Determine the number of slots necessary for Win95 names 740 */ 741 int 742 winSlotCnt(const u_char *un, size_t unlen, struct msdosfsmount *pmp) 743 { 744 size_t wlen; 745 char wn[WIN_MAXLEN * 2 + 1], *wnp; 746 747 unlen = winLenFixup(un, unlen); 748 749 if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { 750 wlen = WIN_MAXLEN * 2; 751 wnp = wn; 752 msdosfs_iconv->conv(pmp->pm_u2w, (const char **)&un, &unlen, &wnp, &wlen); 753 if (unlen > 0) 754 return 0; 755 return howmany(WIN_MAXLEN - wlen/2, WIN_CHARS); 756 } 757 758 if (unlen > WIN_MAXLEN) 759 return 0; 760 return howmany(unlen, WIN_CHARS); 761 } 762 763 /* 764 * Determine the number of bytes necessary for Win95 names 765 */ 766 size_t 767 winLenFixup(const u_char *un, size_t unlen) 768 { 769 for (un += unlen; unlen > 0; unlen--) 770 if (*--un != ' ' && *un != '.') 771 break; 772 return unlen; 773 } 774 775 /* 776 * Store an area with multi byte string instr, and returns left 777 * byte of instr and moves pointer forward. The area's size is 778 * inlen or outlen. 779 */ 780 static int 781 mbsadjpos(const char **instr, size_t inlen, size_t outlen, int weight, int flag, void *handle) 782 { 783 char *outp, outstr[outlen * weight + 1]; 784 785 if (flag & MSDOSFSMNT_KICONV && msdosfs_iconv) { 786 outp = outstr; 787 outlen *= weight; 788 msdosfs_iconv->conv(handle, instr, &inlen, &outp, &outlen); 789 return (inlen); 790 } 791 792 (*instr) += min(inlen, outlen); 793 return (inlen - min(inlen, outlen)); 794 } 795 796 /* 797 * Convert DOS char to Local char 798 */ 799 static u_char * 800 dos2unixchr(u_char *outbuf, const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp) 801 { 802 u_char c, *outp; 803 size_t len, olen; 804 805 outp = outbuf; 806 if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { 807 olen = len = 4; 808 809 if (lower & (LCASE_BASE | LCASE_EXT)) 810 msdosfs_iconv->convchr_case(pmp->pm_d2u, (const char **)instr, 811 ilen, (char **)&outp, &olen, KICONV_LOWER); 812 else 813 msdosfs_iconv->convchr(pmp->pm_d2u, (const char **)instr, 814 ilen, (char **)&outp, &olen); 815 len -= olen; 816 817 /* 818 * return '?' if failed to convert 819 */ 820 if (len == 0) { 821 (*ilen)--; 822 (*instr)++; 823 *outp++ = '?'; 824 } 825 } else { 826 (*ilen)--; 827 c = *(*instr)++; 828 c = dos2unix[c]; 829 if (lower & (LCASE_BASE | LCASE_EXT)) 830 c = u2l[c]; 831 *outp++ = c; 832 outbuf[1] = '\0'; 833 } 834 835 *outp = '\0'; 836 outp = outbuf; 837 return (outp); 838 } 839 840 /* 841 * Convert Local char to DOS char 842 */ 843 static uint16_t 844 unix2doschr(const u_char **instr, size_t *ilen, struct msdosfsmount *pmp) 845 { 846 u_char c; 847 char *up, *outp, unicode[3], outbuf[3]; 848 uint16_t wc; 849 size_t len, ucslen, unixlen, olen; 850 851 if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { 852 /* 853 * to hide an invisible character, using a unicode filter 854 */ 855 ucslen = 2; 856 len = *ilen; 857 up = unicode; 858 msdosfs_iconv->convchr(pmp->pm_u2w, (const char **)instr, 859 ilen, &up, &ucslen); 860 unixlen = len - *ilen; 861 862 /* 863 * cannot be converted 864 */ 865 if (unixlen == 0) { 866 (*ilen)--; 867 (*instr)++; 868 return (0); 869 } 870 871 /* 872 * return magic number for ascii char 873 */ 874 if (unixlen == 1) { 875 c = *(*instr -1); 876 if (! (c & 0x80)) { 877 c = unix2dos[c]; 878 if (c <= 2) 879 return (c); 880 } 881 } 882 883 /* 884 * now convert using libiconv 885 */ 886 *instr -= unixlen; 887 *ilen = len; 888 889 olen = len = 2; 890 outp = outbuf; 891 msdosfs_iconv->convchr_case(pmp->pm_u2d, (const char **)instr, 892 ilen, &outp, &olen, KICONV_FROM_UPPER); 893 len -= olen; 894 895 /* 896 * cannot be converted, but has unicode char, should return magic number 897 */ 898 if (len == 0) { 899 (*ilen) -= unixlen; 900 (*instr) += unixlen; 901 return (1); 902 } 903 904 wc = 0; 905 while(len--) 906 wc |= (*(outp - len - 1) & 0xff) << (len << 3); 907 return (wc); 908 } 909 910 (*ilen)--; 911 c = *(*instr)++; 912 c = l2u[c]; 913 c = unix2dos[c]; 914 return ((uint16_t)c); 915 } 916 917 /* 918 * Convert Windows char to Local char 919 */ 920 static u_char * 921 win2unixchr(u_char *outbuf, uint16_t wc, struct msdosfsmount *pmp) 922 { 923 u_char *inp, *outp, inbuf[3]; 924 size_t ilen, olen, len; 925 926 outp = outbuf; 927 if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { 928 inbuf[0] = (u_char)(wc>>8); 929 inbuf[1] = (u_char)wc; 930 inbuf[2] = '\0'; 931 932 ilen = 2; 933 olen = len = 4; 934 inp = inbuf; 935 msdosfs_iconv->convchr(pmp->pm_w2u, __DECONST(const char **, 936 &inp), &ilen, (char **)&outp, &olen); 937 len -= olen; 938 939 /* 940 * return '?' if failed to convert 941 */ 942 if (len == 0) 943 *outp++ = '?'; 944 } else { 945 *outp++ = (wc & 0xff00) ? '?' : (u_char)(wc & 0xff); 946 } 947 948 *outp = '\0'; 949 outp = outbuf; 950 return (outp); 951 } 952 953 /* 954 * Convert Local char to Windows char 955 */ 956 static uint16_t 957 unix2winchr(const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp) 958 { 959 u_char *outp, outbuf[3]; 960 uint16_t wc; 961 size_t olen; 962 963 if (*ilen == 0) 964 return (0); 965 966 if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { 967 outp = outbuf; 968 olen = 2; 969 if (lower & (LCASE_BASE | LCASE_EXT)) 970 msdosfs_iconv->convchr_case(pmp->pm_u2w, (const char **)instr, 971 ilen, (char **)&outp, &olen, 972 KICONV_FROM_LOWER); 973 else 974 msdosfs_iconv->convchr(pmp->pm_u2w, (const char **)instr, 975 ilen, (char **)&outp, &olen); 976 977 /* 978 * return '0' if end of filename 979 */ 980 if (olen == 2) 981 return (0); 982 983 wc = (outbuf[0]<<8) | outbuf[1]; 984 985 return (wc); 986 } 987 988 (*ilen)--; 989 wc = (*instr)[0]; 990 if (lower & (LCASE_BASE | LCASE_EXT)) 991 wc = u2l[wc]; 992 (*instr)++; 993 return (wc); 994 } 995 996 /* 997 * Initialize the temporary concatenation buffer. 998 */ 999 void 1000 mbnambuf_init(struct mbnambuf *nbp) 1001 { 1002 1003 nbp->nb_len = 0; 1004 nbp->nb_last_id = -1; 1005 nbp->nb_buf[sizeof(nbp->nb_buf) - 1] = '\0'; 1006 } 1007 1008 /* 1009 * Fill out our concatenation buffer with the given substring, at the offset 1010 * specified by its id. Since this function must be called with ids in 1011 * descending order, we take advantage of the fact that ASCII substrings are 1012 * exactly WIN_CHARS in length. For non-ASCII substrings, we shift all 1013 * previous (i.e. higher id) substrings upwards to make room for this one. 1014 * This only penalizes portions of substrings that contain more than 1015 * WIN_CHARS bytes when they are first encountered. 1016 */ 1017 int 1018 mbnambuf_write(struct mbnambuf *nbp, char *name, int id) 1019 { 1020 char *slot; 1021 size_t count, newlen; 1022 1023 if (nbp->nb_len != 0 && id != nbp->nb_last_id - 1) { 1024 #ifdef MSDOSFS_DEBUG 1025 printf("msdosfs: non-decreasing id: id %d, last id %d\n", 1026 id, nbp->nb_last_id); 1027 #endif 1028 return (EINVAL); 1029 } 1030 1031 /* Will store this substring in a WIN_CHARS-aligned slot. */ 1032 slot = &nbp->nb_buf[id * WIN_CHARS]; 1033 count = strlen(name); 1034 newlen = nbp->nb_len + count; 1035 if (newlen > WIN_MAXLEN || newlen > MAXNAMLEN) { 1036 #ifdef MSDOSFS_DEBUG 1037 printf("msdosfs: file name length %zu too large\n", newlen); 1038 #endif 1039 return (ENAMETOOLONG); 1040 } 1041 1042 /* Shift suffix upwards by the amount length exceeds WIN_CHARS. */ 1043 if (count > WIN_CHARS && nbp->nb_len != 0) { 1044 if ((id * WIN_CHARS + count + nbp->nb_len) > 1045 sizeof(nbp->nb_buf)) 1046 return (ENAMETOOLONG); 1047 1048 memmove(slot + count, slot + WIN_CHARS, nbp->nb_len); 1049 } 1050 1051 /* Copy in the substring to its slot and update length so far. */ 1052 memcpy(slot, name, count); 1053 nbp->nb_len = newlen; 1054 nbp->nb_last_id = id; 1055 1056 return (0); 1057 } 1058 1059 /* 1060 * Take the completed string and use it to setup the struct dirent. 1061 * Be sure to always nul-terminate the d_name and then copy the string 1062 * from our buffer. Note that this function assumes the full string has 1063 * been reassembled in the buffer. If it's called before all substrings 1064 * have been written via mbnambuf_write(), the result will be incorrect. 1065 */ 1066 char * 1067 mbnambuf_flush(struct mbnambuf *nbp, struct dirent *dp) 1068 { 1069 1070 if (nbp->nb_len > sizeof(dp->d_name) - 1) { 1071 mbnambuf_init(nbp); 1072 return (NULL); 1073 } 1074 memcpy(dp->d_name, &nbp->nb_buf[0], nbp->nb_len); 1075 dp->d_name[nbp->nb_len] = '\0'; 1076 dp->d_namlen = nbp->nb_len; 1077 1078 mbnambuf_init(nbp); 1079 return (dp->d_name); 1080 } 1081