From fd53b3e112b0a174177d31ddd7fd93323268d91d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carl=20=C3=85stholm?= <[EMAIL REDACTED]>
Date: Tue, 10 Sep 2024 21:50:48 +0200
Subject: [PATCH] stdlib: Rewrite SDL_wcstol impl
SDL_wcstol should now fully adhere to the libc spec.
---
include/SDL3/SDL_stdinc.h | 17 +++----
src/stdlib/SDL_string.c | 102 +++++++++++++++++++++++---------------
2 files changed, 69 insertions(+), 50 deletions(-)
diff --git a/include/SDL3/SDL_stdinc.h b/include/SDL3/SDL_stdinc.h
index 4b7aae2ff9d1f..093b3d066fa25 100644
--- a/include/SDL3/SDL_stdinc.h
+++ b/include/SDL3/SDL_stdinc.h
@@ -1499,21 +1499,18 @@ extern SDL_DECLSPEC int SDLCALL SDL_wcsncasecmp(const wchar_t *str1, const wchar
/**
* Parse a `long` from a wide string.
*
- * This function makes fewer guarantees than the C runtime `wcstol`:
+ * If `str` starts with whitespace, then those whitespace characters are skipped before attempting to parse the number.
*
- * - Only the bases 10 and 16 are guaranteed to be supported. The behavior for
- * other bases is unspecified.
- * - It is unspecified what this function returns when the parsed integer does
- * not fit inside a `long`.
+ * If the parsed number does not fit inside a `long`, the result is clamped to the minimum and maximum representable `long` values.
*
* \param str The null-terminated wide string to read. Must not be NULL.
* \param endp If not NULL, the address of the first invalid wide character
* (i.e. the next character after the parsed number) will be
* written to this pointer.
- * \param base The base of the integer to read. The values 0, 10 and 16 are
- * supported. If 0, the base will be inferred from the integer's
- * prefix.
- * \returns The parsed `long`.
+ * \param base The base of the integer to read. Supported values are 0 and 2 to 36 inclusive.
+ * If 0, the base will be inferred from the number's
+ * prefix (0x for hexadecimal, 0 for octal, decimal otherwise).
+ * \returns The parsed `long`, or 0 if no number could be parsed.
*
* \threadsafety It is safe to call this function from any thread.
*
@@ -1826,7 +1823,7 @@ extern SDL_DECLSPEC long long SDLCALL SDL_strtoll(const char *str, char **endp,
* - Only the bases 10 and 16 are guaranteed to be supported. The behavior for
* other bases is unspecified.
* - It is unspecified what this function returns when the parsed integer does
- * not fit inside a `long long`.
+ * not fit inside an `unsigned long long`.
*
* \param str The null-terminated string to read. Must not be NULL.
* \param endp If not NULL, the address of the first invalid character (i.e.
diff --git a/src/stdlib/SDL_string.c b/src/stdlib/SDL_string.c
index 907f9537c0eaa..4006b35003784 100644
--- a/src/stdlib/SDL_string.c
+++ b/src/stdlib/SDL_string.c
@@ -371,46 +371,78 @@ static size_t SDL_ScanLong(const char *text, int count, int radix, long *valuep)
#endif
#if !defined(HAVE_WCSTOL)
+// SDL_ScanLongW assumes that wchar_t can converted to int without truncating bits
+SDL_COMPILE_TIME_ASSERT(wchar_t_int, sizeof(wchar_t) <= sizeof(int));
+
static size_t SDL_ScanLongW(const wchar_t *text, int count, int radix, long *valuep)
{
- const wchar_t *textstart = text;
- long value = 0;
+ const wchar_t *text_start = text;
+ const wchar_t *number_start = text_start;
+ unsigned long value = 0;
bool negative = false;
- if (*text == '-') {
- negative = true;
- ++text;
- }
- if (radix == 16 && SDL_wcsncmp(text, L"0x", 2) == 0) {
- text += 2;
- }
- for (;;) {
- int v;
- if (*text >= '0' && *text <= '9') {
- v = *text - '0';
- } else if (radix == 16 && SDL_isupperhex(*text)) {
- v = 10 + (*text - 'A');
- } else if (radix == 16 && SDL_islowerhex(*text)) {
- v = 10 + (*text - 'a');
- } else {
- break;
+ if (radix == 0 || (radix >= 2 && radix <= 36)) {
+ while (SDL_isspace(*text)) {
+ ++text;
}
- value *= radix;
- value += v;
- ++text;
-
- if (count > 0 && (text - textstart) == count) {
- break;
+ if (*text == '-' || *text == '+') {
+ negative = *text == '-';
+ ++text;
}
+ if ((radix == 0 || radix == 16) && *text == '0') {
+ ++text;
+ if (*text == 'x' || *text == 'X') {
+ radix = 16;
+ ++text;
+ } else if (radix == 0) {
+ radix = 8;
+ }
+ } else if (radix == 0) {
+ radix = 10;
+ }
+ number_start = text;
+ do {
+ unsigned long digit;
+ if (*text >= '0' && *text <= '9') {
+ digit = *text - '0';
+ } else if (radix > 10) {
+ if (*text >= 'A' && *text < 'A' + (radix - 10)) {
+ digit = 10 + (*text - 'A');
+ } else if (*text >= 'a' && *text < 'a' + (radix - 10)) {
+ digit = 10 + (*text - 'a');
+ } else {
+ break;
+ }
+ } else {
+ break;
+ }
+ { // saturate to ULONG_MAX
+ unsigned long next_value = value * radix + digit;
+ if (next_value < value) {
+ next_value = ~0UL;
+ }
+ value = next_value;
+ }
+ ++text;
+ } while (count == 0 || (text - text_start) != count);
}
- if (valuep && text > textstart) {
- if (negative && value) {
- *valuep = -value;
+ if (text == number_start) { // no number was parsed, so no character were consumed
+ text = text_start;
+ }
+ if (valuep) {
+ if (negative) {
+ *valuep = 0UL - value;
+ if (*valuep > 0) {
+ *valuep = ((~0UL) >> 1) + 1UL; // LONG_MIN
+ }
} else {
*valuep = value;
+ if (*valuep < 0) {
+ *valuep = (~0UL) >> 1; // LONG_MAX
+ }
}
}
- return text - textstart;
+ return text - text_start;
}
#endif
@@ -824,18 +856,8 @@ long SDL_wcstol(const wchar_t *string, wchar_t **endp, int base)
#ifdef HAVE_WCSTOL
return wcstol(string, endp, base);
#else
- size_t len;
long value = 0;
-
- if (!base) {
- if ((SDL_wcslen(string) > 2) && (SDL_wcsncmp(string, L"0x", 2) == 0)) {
- base = 16;
- } else {
- base = 10;
- }
- }
-
- len = SDL_ScanLongW(string, 0, base, &value);
+ size_t len = SDL_ScanLongW(string, 0, base, &value);
if (endp) {
*endp = (wchar_t *)string + len;
}