From 52518704eed9929d06f3cc81eb83518ea06f17b3 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Thu, 26 Jun 2014 14:41:07 -0700 Subject: [PATCH] Avoid locale problems. * zdump.c, zic.c: Don't include , because the behavior if its macros is locale-dependent if HAVE_GETTEXT, and we want the C locale's semantics. Instead, use portable replacements for ctype.h operations, with the desired semantics. All uses of isalpha etc. replaced by calls to new functions is_alpha etc. or by inline code. (isascii): Remove. (is_alpha): New function. * zic.c (doabbr): Simplify by using is_alpha. (is_space): New function. * NEWS: Document the above. --- NEWS | 4 +++ zdump.c | 32 ++++++++++++----- zic.c | 105 +++++++++++++++++++++++++++++++++++++------------------- 3 files changed, 96 insertions(+), 45 deletions(-) diff --git a/NEWS b/NEWS index 2983cc0..d0cbd42 100644 --- a/NEWS +++ b/NEWS @@ -12,6 +12,10 @@ Unreleased, experimental changes 'localtime', 'mktime', etc. now use much less stack space if ALL_STATE is defined. (Thanks to Elliott Hughes for reporting the problem.) + 'zic' no longer mishandles input when ignoring case in locales that + are not compatible with English, e.g., unibyte Turkish locales when + compiled with HAVE_GETTEXT. + Error diagnostics of 'zic' and 'yearistype' have been reworded so that they no longer use ASCII '-' as if it were a dash. diff --git a/zdump.c b/zdump.c index f3e2a24..bde827a 100644 --- a/zdump.c +++ b/zdump.c @@ -24,10 +24,6 @@ #include "time.h" /* for struct tm */ #include "stdlib.h" /* for exit, malloc, atoi */ #include "limits.h" /* for CHAR_BIT, LLONG_MAX */ -#include "ctype.h" /* for isalpha et al. */ -#ifndef isascii -#define isascii(x) 1 -#endif /* !defined isascii */ /* ** Substitutes for pre-C99 compilers. @@ -220,6 +216,25 @@ static void show(char * zone, time_t t, int v); static const char * tformat(void); static time_t yeartot(intmax_t y) ATTRIBUTE_PURE; +/* Is A an alphabetic character in the C locale? */ +static int +is_alpha(char a) +{ + switch (a) { + default: + return 0; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + return 1; + } +} + #ifndef TYPECHECK #define my_localtime localtime #else /* !defined TYPECHECK */ @@ -266,7 +281,7 @@ abbrok(const char *const abbrp, const char *const zone) return; cp = abbrp; wp = NULL; - while (isascii((unsigned char) *cp) && isalpha((unsigned char) *cp)) + while (is_alpha(*cp)) ++cp; if (cp - abbrp == 0) wp = _("lacks alphabetic at start"); @@ -276,10 +291,9 @@ abbrok(const char *const abbrp, const char *const zone) wp = _("has more than 6 alphabetics"); if (wp == NULL && (*cp == '+' || *cp == '-')) { ++cp; - if (isascii((unsigned char) *cp) && - isdigit((unsigned char) *cp)) - if (*cp++ == '1' && *cp >= '0' && *cp <= '4') - ++cp; + if ('0' <= *cp && *cp <= '9') + if (*cp++ == '1' && '0' <= *cp && *cp <= '4') + cp++; if (*cp != '\0') wp = _("differs from POSIX standard"); } diff --git a/zic.c b/zic.c index fa25cd5..9c22e7a 100644 --- a/zic.c +++ b/zic.c @@ -31,19 +31,6 @@ typedef int_fast64_t zic_t; #define MKDIR_UMASK 0755 #endif -/* -** On some ancient hosts, predicates like 'isspace(C)' are defined -** only if isascii(C) || C == EOF. Modern hosts obey the C Standard, -** which says they are defined only if C == ((unsigned char) C) || C == EOF. -** Neither the C Standard nor Posix require that 'isascii' exist. -** For portability, we check both ancient and modern requirements. -** If isascii is not defined, the isascii check succeeds trivially. -*/ -#include "ctype.h" -#ifndef isascii -#define isascii(x) 1 -#endif - #define end(cp) (strchr((cp), '\0')) struct rule { @@ -132,7 +119,8 @@ static int inzcont(char ** fields, int nfields); static int inzone(char ** fields, int nfields); static int inzsub(char ** fields, int nfields, int iscont); static int itsdir(const char * name); -static int lowerit(int c); +static int is_alpha(char a); +static char lowerit(char); static int mkdirs(char * filename); static void newabbr(const char * abbr); static zic_t oadd(zic_t t1, zic_t t2); @@ -640,16 +628,25 @@ static void namecheck(const char *name) { register char const *cp; - static char const benign[] = ("-/_" - "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ"); + + /* Benign characters in a portable file name. */ + static char const benign[] = + "-/_" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + /* Non-control chars in the POSIX portable character set, + excluding the benign characters. */ + static char const printable_and_not_benign[] = + " !\"#$%&'()*+,.0123456789:;<=>?@[\\]^`{|}~"; + register char const *component = name; if (!noise) return; for (cp = name; *cp; cp++) { unsigned char c = *cp; if (!strchr(benign, c)) { - warning((isascii(c) && isprint(c) + warning((strchr(printable_and_not_benign, c) ? _("file name '%s' contains byte '%c'") : _("file name '%s' contains byte '\\%o'")), name, c); @@ -1862,10 +1859,8 @@ doabbr(char *const abbr, const char *const format, const char *const letters, } if (!doquotes) return; - for (cp = abbr; *cp != '\0'; ++cp) - if (strchr("ABCDEFGHIJKLMNOPQRSTUVWXYZ", *cp) == NULL && - strchr("abcdefghijklmnopqrstuvwxyz", *cp) == NULL) - break; + for (cp = abbr; is_alpha(*cp); cp++) + continue; len = strlen(abbr); if (len > 0 && *cp == '\0') return; @@ -2582,11 +2577,54 @@ yearistype(const int year, const char *const type) exit(EXIT_FAILURE); } +/* Is A a space character in the C locale? */ static int -lowerit(int a) +is_space(char a) { - a = (unsigned char) a; - return (isascii(a) && isupper(a)) ? tolower(a) : a; + switch (a) { + default: + return 0; + case ' ': case '\f': case '\n': case '\r': case '\t': case '\v': + return 1; + } +} + +/* Is A an alphabetic character in the C locale? */ +static int +is_alpha(char a) +{ + switch (a) { + default: + return 0; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': + case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': + case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': + case 'V': case 'W': case 'X': case 'Y': case 'Z': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': + case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': + case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': + case 'v': case 'w': case 'x': case 'y': case 'z': + return 1; + } +} + +/* If A is an uppercase character in the C locale, return its lowercase + counterpart. Otherwise, return A. */ +static char +lowerit(char a) +{ + switch (a) { + default: return a; + case 'A': return 'a'; case 'B': return 'b'; case 'C': return 'c'; + case 'D': return 'd'; case 'E': return 'e'; case 'F': return 'f'; + case 'G': return 'g'; case 'H': return 'h'; case 'I': return 'i'; + case 'J': return 'j'; case 'K': return 'k'; case 'L': return 'l'; + case 'M': return 'm'; case 'N': return 'n'; case 'O': return 'o'; + case 'P': return 'p'; case 'Q': return 'q'; case 'R': return 'r'; + case 'S': return 's'; case 'T': return 't'; case 'U': return 'u'; + case 'V': return 'v'; case 'W': return 'w'; case 'X': return 'x'; + case 'Y': return 'y'; case 'Z': return 'z'; + } } /* case-insensitive equality */ @@ -2653,8 +2691,7 @@ getfields(register char *cp) array = emalloc(size_product(strlen(cp) + 1, sizeof *array)); nsubs = 0; for ( ; ; ) { - while (isascii((unsigned char) *cp) && - isspace((unsigned char) *cp)) + while (is_space(*cp)) ++cp; if (*cp == '\0' || *cp == '#') break; @@ -2671,9 +2708,8 @@ getfields(register char *cp) )); exit(1); } - } while (*cp != '\0' && *cp != '#' && - (!isascii(*cp) || !isspace((unsigned char) *cp))); - if (isascii(*cp) && isspace((unsigned char) *cp)) + } while (*cp && *cp != '#' && !is_space(*cp)); + if (is_space(*cp)) ++cp; *dp = '\0'; } @@ -2806,8 +2842,7 @@ newabbr(const char *const string) */ cp = string; mp = NULL; - while (isascii((unsigned char) *cp) && - isalpha((unsigned char) *cp)) + while (is_alpha(*cp)) ++cp; if (cp - string == 0) mp = _("time zone abbreviation lacks alphabetic at start"); @@ -2817,8 +2852,7 @@ mp = _("time zone abbreviation has fewer than 3 alphabetics"); mp = _("time zone abbreviation has too many alphabetics"); if (mp == NULL && (*cp == '+' || *cp == '-')) { ++cp; - if (isascii((unsigned char) *cp) && - isdigit((unsigned char) *cp)) + if (is_digit(*cp)) if (*cp++ == '1' && *cp >= '0' && *cp <= '4') ++cp; @@ -2852,8 +2886,7 @@ mkdirs(char *argname) /* ** DOS drive specifier? */ - if (isalpha((unsigned char) name[0]) && - name[1] == ':' && name[2] == '\0') { + if (is_alpha(name[0]) && name[1] == ':' && name[2] == '\0') { *cp = '/'; continue; }