xref: /illumos-gate/usr/src/lib/libc/port/locale/strftime.c (revision 2833423dc59f4c35fe4713dbb942950c82df0437)
1 /*
2  * Copyright 2013 Garrett D'Amore <garrett@damore.org>
3  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1989 The Regents of the University of California.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms are permitted
8  * provided that the above copyright notice and this paragraph are
9  * duplicated in all such forms and that any documentation,
10  * advertising materials, and other materials related to such
11  * distribution and use acknowledge that the software was developed
12  * by the University of California, Berkeley. The name of the
13  * University may not be used to endorse or promote products derived
14  * from this software without specific prior written permission.
15  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
18  */
19 
20 #include "lint.h"
21 #include "tzfile.h"
22 #include <fcntl.h>
23 #include <stdint.h>
24 #include <sys/stat.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <locale.h>
28 #include "timelocal.h"
29 #include "localeimpl.h"
30 
31 static char *_add(const char *, char *, const char *);
32 static char *_conv(int, const char *, char *, const char *);
33 static char *_fmt(locale_t, const char *, const struct tm *, char *,
34     const char * const);
35 static char *_yconv(int, int, int, int, char *, const char *);
36 
37 extern char *tzname[];
38 
39 #define	IN_NONE	0
40 #define	IN_SOME	1
41 #define	IN_THIS	2
42 #define	IN_ALL	3
43 
44 #define	PAD_DEFAULT	0
45 #define	PAD_LESS	1
46 #define	PAD_SPACE	2
47 #define	PAD_ZERO	3
48 
49 static const char *fmt_padding[][4] = {
50 	/* DEFAULT,	LESS,	SPACE,	ZERO */
51 #define	PAD_FMT_MONTHDAY	0
52 #define	PAD_FMT_HMS		0
53 #define	PAD_FMT_CENTURY		0
54 #define	PAD_FMT_SHORTYEAR	0
55 #define	PAD_FMT_MONTH		0
56 #define	PAD_FMT_WEEKOFYEAR	0
57 #define	PAD_FMT_DAYOFMONTH	0
58 	{ "%02d",	"%d",	"%2d",	"%02d" },
59 #define	PAD_FMT_SDAYOFMONTH	1
60 #define	PAD_FMT_SHMS		1
61 	{ "%2d",	"%d",	"%2d",	"%02d" },
62 #define	PAD_FMT_DAYOFYEAR	2
63 	{ "%03d",	"%d",	"%3d",	"%03d" },
64 #define	PAD_FMT_YEAR		3
65 	{ "%04d",	"%d",	"%4d",	"%04d" }
66 };
67 
68 /*
69  * If a caller (such as ascftime) passes in a very large maximum size,
70  * the pointer addition here may overflow, wrap around the end of
71  * memory, and, if used as-is, would cause subsequent range checks to fail
72  * spuriously, resulting in corrupted output even if there is sufficent
73  * room in the buffer for the requested format.
74  *
75  * To prevent this, we replace an overflowed result with the largest
76  * possible pointer value.
77  */
78 static char *
79 _ptlim(char *s, size_t maxsize)
80 {
81 	char *e = s + maxsize;
82 
83 	if (e < s)
84 		e = (char *)(-sizeof (*e));
85 
86 	return (e);
87 }
88 
89 size_t
90 strftime_l(char *_RESTRICT_KYWD s, size_t maxsize,
91     const char *_RESTRICT_KYWD format, const struct tm *_RESTRICT_KYWD t,
92     locale_t loc)
93 {
94 	char *p;
95 	char *e = _ptlim(s, maxsize);
96 
97 	tzset();
98 	p = _fmt(loc, ((format == NULL) ? "%c" : format), t, s, e);
99 	if (p == e)
100 		return (0);
101 	*p = '\0';
102 	return (p - s);
103 }
104 
105 size_t
106 strftime(char *_RESTRICT_KYWD s, size_t maxsize,
107     const char *_RESTRICT_KYWD format, const struct tm *_RESTRICT_KYWD t)
108 {
109 	return (strftime_l(s, maxsize, format, t, uselocale(NULL)));
110 }
111 
112 static char *
113 _fmt(locale_t loc, const char *format, const struct tm *t, char *pt,
114     const char * const ptlim)
115 {
116 	int Ealternative, Oalternative, PadIndex;
117 	const struct lc_time *tptr = loc->time;
118 
119 #define	PADDING(x)	fmt_padding[x][PadIndex]
120 
121 	for (; *format; ++format) {
122 		if (*format == '%') {
123 			Ealternative = 0;
124 			Oalternative = 0;
125 			PadIndex	 = PAD_DEFAULT;
126 label:
127 			switch (*++format) {
128 			case '\0':
129 				--format;
130 				break;
131 			case 'A':
132 				pt = _add((t->tm_wday < 0 ||
133 				    t->tm_wday >= DAYSPERWEEK) ?
134 				    "?" : tptr->weekday[t->tm_wday],
135 				    pt, ptlim);
136 				continue;
137 			case 'a':
138 				pt = _add((t->tm_wday < 0 ||
139 				    t->tm_wday >= DAYSPERWEEK) ?
140 				    "?" : tptr->wday[t->tm_wday],
141 				    pt, ptlim);
142 				continue;
143 			case 'B':
144 				pt = _add((t->tm_mon < 0 ||
145 				    t->tm_mon >= MONSPERYEAR) ?
146 				    "?" : (tptr->month)[t->tm_mon],
147 				    pt, ptlim);
148 				continue;
149 			case 'b':
150 			case 'h':
151 				pt = _add((t->tm_mon < 0 ||
152 				    t->tm_mon >= MONSPERYEAR) ?
153 				    "?" : tptr->mon[t->tm_mon],
154 				    pt, ptlim);
155 				continue;
156 			case 'C':
157 				/*
158 				 * %C used to do a...
159 				 *	_fmt("%a %b %e %X %Y", t);
160 				 * ...whereas now POSIX 1003.2 calls for
161 				 * something completely different.
162 				 * (ado, 1993-05-24)
163 				 */
164 				pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 0,
165 				    pt, ptlim);
166 				continue;
167 			case 'c':
168 				pt = _fmt(loc, tptr->c_fmt, t, pt, ptlim);
169 				continue;
170 			case 'D':
171 				pt = _fmt(loc, "%m/%d/%y", t, pt, ptlim);
172 				continue;
173 			case 'd':
174 				pt = _conv(t->tm_mday,
175 				    PADDING(PAD_FMT_DAYOFMONTH), pt, ptlim);
176 				continue;
177 			case 'E':
178 				if (Ealternative || Oalternative)
179 					break;
180 				Ealternative++;
181 				goto label;
182 			case 'O':
183 				/*
184 				 * C99 locale modifiers.
185 				 * The sequences
186 				 *	%Ec %EC %Ex %EX %Ey %EY
187 				 *	%Od %oe %OH %OI %Om %OM
188 				 *	%OS %Ou %OU %OV %Ow %OW %Oy
189 				 * are supposed to provide alternate
190 				 * representations.
191 				 */
192 				if (Ealternative || Oalternative)
193 					break;
194 				Oalternative++;
195 				goto label;
196 			case 'e':
197 				pt = _conv(t->tm_mday,
198 				    PADDING(PAD_FMT_SDAYOFMONTH), pt, ptlim);
199 				continue;
200 			case 'F':
201 				pt = _fmt(loc, "%Y-%m-%d", t, pt, ptlim);
202 				continue;
203 			case 'H':
204 				pt = _conv(t->tm_hour, PADDING(PAD_FMT_HMS),
205 				    pt, ptlim);
206 				continue;
207 			case 'I':
208 				pt = _conv((t->tm_hour % 12) ?
209 				    (t->tm_hour % 12) : 12,
210 				    PADDING(PAD_FMT_HMS), pt, ptlim);
211 				continue;
212 			case 'j':
213 				pt = _conv(t->tm_yday + 1,
214 				    PADDING(PAD_FMT_DAYOFYEAR), pt, ptlim);
215 				continue;
216 			case 'k':
217 				/*
218 				 * This used to be...
219 				 *	_conv(t->tm_hour % 12 ?
220 				 *		t->tm_hour % 12 : 12, 2, ' ');
221 				 * ...and has been changed to the below to
222 				 * match SunOS 4.1.1 and Arnold Robbins'
223 				 * strftime version 3.0. That is, "%k" and
224 				 * "%l" have been swapped.
225 				 * (ado, 1993-05-24)
226 				 */
227 				pt = _conv(t->tm_hour,
228 				    PADDING(PAD_FMT_SHMS), pt, ptlim);
229 				continue;
230 			case 'l':
231 				/*
232 				 * This used to be...
233 				 *	_conv(t->tm_hour, 2, ' ');
234 				 * ...and has been changed to the below to
235 				 * match SunOS 4.1.1 and Arnold Robbin's
236 				 * strftime version 3.0. That is, "%k" and
237 				 * "%l" have been swapped.
238 				 * (ado, 1993-05-24)
239 				 */
240 				pt = _conv((t->tm_hour % 12) ?
241 				    (t->tm_hour % 12) : 12,
242 				    PADDING(PAD_FMT_SHMS), pt, ptlim);
243 				continue;
244 			case 'M':
245 				pt = _conv(t->tm_min, PADDING(PAD_FMT_HMS),
246 				    pt, ptlim);
247 				continue;
248 			case 'm':
249 				pt = _conv(t->tm_mon + 1,
250 				    PADDING(PAD_FMT_MONTH),
251 				    pt, ptlim);
252 				continue;
253 			case 'n':
254 				pt = _add("\n", pt, ptlim);
255 				continue;
256 			case 'p':
257 				pt = _add((t->tm_hour >= (HOURSPERDAY / 2)) ?
258 				    tptr->pm : tptr->am, pt, ptlim);
259 				continue;
260 			case 'R':
261 				pt = _fmt(loc, "%H:%M", t, pt, ptlim);
262 				continue;
263 			case 'r':
264 				pt = _fmt(loc, tptr->ampm_fmt, t, pt, ptlim);
265 				continue;
266 			case 'S':
267 				pt = _conv(t->tm_sec, PADDING(PAD_FMT_HMS),
268 				    pt, ptlim);
269 				continue;
270 
271 			case 's':
272 			{
273 				struct tm tm;
274 				char *buf;
275 
276 				tm = *t;
277 				(void) asprintf(&buf, "%ld", mktime(&tm));
278 				pt = _add(buf, pt, ptlim);
279 				continue;
280 			}
281 
282 			case 'T':
283 				pt = _fmt(loc, "%H:%M:%S", t, pt, ptlim);
284 				continue;
285 			case 't':
286 				pt = _add("\t", pt, ptlim);
287 				continue;
288 			case 'U':
289 				pt = _conv((t->tm_yday + DAYSPERWEEK -
290 				    t->tm_wday) / DAYSPERWEEK,
291 				    PADDING(PAD_FMT_WEEKOFYEAR),
292 				    pt, ptlim);
293 				continue;
294 			case 'u':
295 				/*
296 				 * From Arnold Robbins' strftime version 3.0:
297 				 * "ISO 8601: Weekday as a decimal number
298 				 * [1 (Monday) - 7]"
299 				 * (ado, 1993-05-24)
300 				 */
301 				pt = _conv((t->tm_wday == 0) ?
302 				    DAYSPERWEEK : t->tm_wday,
303 				    "%d", pt, ptlim);
304 				continue;
305 			case 'V':	/* ISO 8601 week number */
306 			case 'G':	/* ISO 8601 year (four digits) */
307 			case 'g':	/* ISO 8601 year (two digits) */
308 /*
309  * From Arnold Robbins' strftime version 3.0: "the week number of the
310  * year (the first Monday as the first day of week 1) as a decimal number
311  * (01-53)."
312  * (ado, 1993-05-24)
313  *
314  * From "http://www.ft.uni-erlangen.de/~mskuhn/iso-time.html" by Markus Kuhn:
315  * "Week 01 of a year is per definition the first week which has the
316  * Thursday in this year, which is equivalent to the week which contains
317  * the fourth day of January. In other words, the first week of a new year
318  * is the week which has the majority of its days in the new year. Week 01
319  * might also contain days from the previous year and the week before week
320  * 01 of a year is the last week (52 or 53) of the previous year even if
321  * it contains days from the new year. A week starts with Monday (day 1)
322  * and ends with Sunday (day 7). For example, the first week of the year
323  * 1997 lasts from 1996-12-30 to 1997-01-05..."
324  * (ado, 1996-01-02)
325  */
326 			{
327 				int	year;
328 				int	base;
329 				int	yday;
330 				int	wday;
331 				int	w;
332 
333 				year = t->tm_year;
334 				base = TM_YEAR_BASE;
335 				yday = t->tm_yday;
336 				wday = t->tm_wday;
337 				for (;;) {
338 					int	len;
339 					int	bot;
340 					int	top;
341 
342 					len = isleap_sum(year, base) ?
343 					    DAYSPERLYEAR : DAYSPERNYEAR;
344 					/*
345 					 * What yday (-3 ... 3) does
346 					 * the ISO year begin on?
347 					 */
348 					bot = ((yday + 11 - wday) %
349 					    DAYSPERWEEK) - 3;
350 					/*
351 					 * What yday does the NEXT
352 					 * ISO year begin on?
353 					 */
354 					top = bot - (len % DAYSPERWEEK);
355 					if (top < -3)
356 						top += DAYSPERWEEK;
357 					top += len;
358 					if (yday >= top) {
359 						++base;
360 						w = 1;
361 						break;
362 					}
363 					if (yday >= bot) {
364 						w = 1 + ((yday - bot) /
365 						    DAYSPERWEEK);
366 						break;
367 					}
368 					--base;
369 					yday += isleap_sum(year, base) ?
370 					    DAYSPERLYEAR : DAYSPERNYEAR;
371 				}
372 #ifdef XPG4_1994_04_09
373 				if ((w == 52 && t->tm_mon == TM_JANUARY) ||
374 				    (w == 1 && t->tm_mon == TM_DECEMBER))
375 					w = 53;
376 #endif /* defined XPG4_1994_04_09 */
377 				if (*format == 'V')
378 					pt = _conv(w,
379 					    PADDING(PAD_FMT_WEEKOFYEAR),
380 					    pt, ptlim);
381 				else if (*format == 'g') {
382 					pt = _yconv(year, base, 0, 1,
383 					    pt, ptlim);
384 				} else
385 					pt = _yconv(year, base, 1, 1,
386 					    pt, ptlim);
387 			}
388 				continue;
389 			case 'v':
390 				/*
391 				 * From Arnold Robbins' strftime version 3.0:
392 				 * "date as dd-bbb-YYYY"
393 				 * (ado, 1993-05-24)
394 				 */
395 				pt = _fmt(loc, "%e-%b-%Y", t, pt, ptlim);
396 				continue;
397 			case 'W':
398 				pt = _conv((t->tm_yday + DAYSPERWEEK -
399 				    (t->tm_wday ?
400 				    (t->tm_wday - 1) :
401 				    (DAYSPERWEEK - 1))) / DAYSPERWEEK,
402 				    PADDING(PAD_FMT_WEEKOFYEAR),
403 				    pt, ptlim);
404 				continue;
405 			case 'w':
406 				pt = _conv(t->tm_wday, "%d", pt, ptlim);
407 				continue;
408 			case 'X':
409 				pt = _fmt(loc, tptr->X_fmt, t, pt, ptlim);
410 				continue;
411 			case 'x':
412 				pt = _fmt(loc, tptr->x_fmt, t, pt, ptlim);
413 				continue;
414 			case 'y':
415 				pt = _yconv(t->tm_year, TM_YEAR_BASE, 0, 1,
416 				    pt, ptlim);
417 				continue;
418 			case 'Y':
419 				pt = _yconv(t->tm_year, TM_YEAR_BASE, 1, 1,
420 				    pt, ptlim);
421 				continue;
422 			case 'Z':
423 				if (t->tm_isdst >= 0)
424 					pt = _add(tzname[t->tm_isdst != 0],
425 					    pt, ptlim);
426 				/*
427 				 * C99 says that %Z must be replaced by the
428 				 * empty string if the time zone is not
429 				 * determinable.
430 				 */
431 				continue;
432 			case 'z':
433 				{
434 				int		diff;
435 				char const *	sign;
436 
437 				if (t->tm_isdst < 0)
438 					continue;
439 				/*
440 				 * C99 says that the UTC offset must
441 				 * be computed by looking only at
442 				 * tm_isdst. This requirement is
443 				 * incorrect, since it means the code
444 				 * must rely on magic (in this case
445 				 * altzone and timezone), and the
446 				 * magic might not have the correct
447 				 * offset. Doing things correctly is
448 				 * tricky and requires disobeying C99;
449 				 * see GNU C strftime for details.
450 				 * For now, punt and conform to the
451 				 * standard, even though it's incorrect.
452 				 *
453 				 * C99 says that %z must be replaced by the
454 				 * empty string if the time zone is not
455 				 * determinable, so output nothing if the
456 				 * appropriate variables are not available.
457 				 */
458 				if (t->tm_isdst == 0)
459 					diff = -timezone;
460 				else
461 					diff = -altzone;
462 				if (diff < 0) {
463 					sign = "-";
464 					diff = -diff;
465 				} else
466 					sign = "+";
467 				pt = _add(sign, pt, ptlim);
468 				diff /= SECSPERMIN;
469 				diff = (diff / MINSPERHOUR) * 100 +
470 				    (diff % MINSPERHOUR);
471 				pt = _conv(diff, PADDING(PAD_FMT_YEAR),
472 				    pt, ptlim);
473 				}
474 				continue;
475 			case '+':
476 				pt = _fmt(loc, tptr->date_fmt, t, pt, ptlim);
477 				continue;
478 			case '-':
479 				if (PadIndex != PAD_DEFAULT)
480 					break;
481 				PadIndex = PAD_LESS;
482 				goto label;
483 			case '_':
484 				if (PadIndex != PAD_DEFAULT)
485 					break;
486 				PadIndex = PAD_SPACE;
487 				goto label;
488 			case '0':
489 				if (PadIndex != PAD_DEFAULT)
490 					break;
491 				PadIndex = PAD_ZERO;
492 				goto label;
493 			case '%':
494 			/*
495 			 * X311J/88-090 (4.12.3.5): if conversion char is
496 			 * undefined, behavior is undefined. Print out the
497 			 * character itself as printf(3) also does.
498 			 */
499 			default:
500 				break;
501 			}
502 		}
503 		if (pt == ptlim)
504 			break;
505 		*pt++ = *format;
506 	}
507 	return (pt);
508 }
509 
510 static char *
511 _conv(const int n, const char *format, char *const pt,
512     const char *const ptlim)
513 {
514 	char	buf[12];
515 
516 	(void) sprintf(buf, format, n);
517 	return (_add(buf, pt, ptlim));
518 }
519 
520 static char *
521 _add(const char *str, char *pt, const char *const ptlim)
522 {
523 	while (pt < ptlim && (*pt = *str++) != '\0')
524 		++pt;
525 	return (pt);
526 }
527 
528 /*
529  * POSIX and the C Standard are unclear or inconsistent about
530  * what %C and %y do if the year is negative or exceeds 9999.
531  * Use the convention that %C concatenated with %y yields the
532  * same output as %Y, and that %Y contains at least 4 bytes,
533  * with more only if necessary.
534  */
535 
536 static char *
537 _yconv(const int a, const int b, const int convert_top, const int convert_yy,
538     char *pt, const char * const ptlim)
539 {
540 	register int	lead;
541 	register int	trail;
542 
543 #define	DIVISOR	100
544 	trail = a % DIVISOR + b % DIVISOR;
545 	lead = a / DIVISOR + b / DIVISOR + trail / DIVISOR;
546 	trail %= DIVISOR;
547 	if (trail < 0 && lead > 0) {
548 		trail += DIVISOR;
549 		--lead;
550 	} else if (lead < 0 && trail > 0) {
551 		trail -= DIVISOR;
552 		++lead;
553 	}
554 	if (convert_top) {
555 		if (lead == 0 && trail < 0)
556 			pt = _add("-0", pt, ptlim);
557 		else	pt = _conv(lead, "%02d", pt, ptlim);
558 	}
559 	if (convert_yy)
560 		pt = _conv(((trail < 0) ? -trail : trail), "%02d", pt, ptlim);
561 	return (pt);
562 }
563