SDL: stdlib: Rewrite SDL_strtoull impl

From fb82772fb31cf10a345d4714f468f5672a4a2752 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carl=20=C3=85stholm?= <[EMAIL REDACTED]>
Date: Wed, 11 Sep 2024 22:32:45 +0200
Subject: [PATCH] stdlib: Rewrite SDL_strtoull impl

---
 include/SDL3/SDL_stdinc.h |  14 ++--
 src/stdlib/SDL_string.c   | 134 ++++++++++++++++++++++++++------------
 2 files changed, 99 insertions(+), 49 deletions(-)

diff --git a/include/SDL3/SDL_stdinc.h b/include/SDL3/SDL_stdinc.h
index 0f45ff24cfe0e..f444104811ed5 100644
--- a/include/SDL3/SDL_stdinc.h
+++ b/include/SDL3/SDL_stdinc.h
@@ -1819,20 +1819,18 @@ extern SDL_DECLSPEC long long SDLCALL SDL_strtoll(const char *str, char **endp,
 /**
  * Parse an `unsigned long long` from a string.
  *
- * This function makes fewer guarantees than the C runtime `strtoull`:
+ * If `str` starts with whitespace, then those whitespace characters are skipped before attempting to parse the number.
  *
- * - Only the bases 10 and 16 are guaranteed to be supported. The behavior for
- *   other bases is unspecified.
- * - It is unspecified what this function returns when the parsed integer does
- *   not fit inside an `unsigned long long`.
+ * If the parsed number does not fit inside an `unsigned long long`,
+ * the result is clamped to the maximum representable `unsigned long long` value.
  *
  * \param str The null-terminated string to read. Must not be NULL.
  * \param endp If not NULL, the address of the first invalid character (i.e.
  *             the next character after the parsed number) will be written to
  *             this pointer.
- * \param base The base of the integer to read. The values 0, 10 and 16 are
- *             supported. If 0, the base will be inferred from the integer's
- *             prefix.
+ * \param base The base of the integer to read. Supported values are 0 and 2 to 36 inclusive.
+ *             If 0, the base will be inferred from the number's
+ *             prefix (0x for hexadecimal, 0 for octal, decimal otherwise).
  * \returns The parsed `unsigned long long`.
  *
  * \threadsafety It is safe to call this function from any thread.
diff --git a/src/stdlib/SDL_string.c b/src/stdlib/SDL_string.c
index 4006b35003784..0095168623792 100644
--- a/src/stdlib/SDL_string.c
+++ b/src/stdlib/SDL_string.c
@@ -326,6 +326,91 @@ static size_t UTF8_GetTrailingBytes(unsigned char c)
     return 0;
 }
 
+#if !defined(HAVE_VSSCANF) || !defined(HAVE_STRTOL) || !defined(HAVE_STRTOUL) || !defined(HAVE_STRTOLL) || !defined(HAVE_STRTOULL) || !defined(HAVE_STRTOD)
+/**
+ * Parses an unsigned long long and returns the unsigned value and sign bit.
+ *
+ * Positive values are clamped to ULLONG_MAX.
+ * The result `value == 0 && negative` indicates negative overflow
+ * and might need to be handled differently depending on whether a
+ * signed or unsigned integer is being parsed.
+ */
+static size_t SDL_ScanUnsignedLongLongInternal(const char *text, int count, int radix, unsigned long long *valuep, bool *negativep)
+{
+    const unsigned long long ullong_max = ~0ULL;
+
+    const char *text_start = text;
+    const char *number_start = text_start;
+    unsigned long long value = 0;
+    bool negative = false;
+    bool overflow = false;
+
+    if (radix == 0 || (radix >= 2 && radix <= 36)) {
+        while (SDL_isspace(*text)) {
+            ++text;
+        }
+        if (*text == '-' || *text == '+') {
+            negative = *text == '-';
+            ++text;
+        }
+        if ((radix == 0 || radix == 16) && *text == '0') {
+            ++text;
+            if (*text == 'x' || *text == 'X') {
+                radix = 16;
+                ++text;
+            } else if (radix == 0) {
+                radix = 8;
+            }
+        } else if (radix == 0) {
+            radix = 10;
+        }
+        number_start = text;
+        do {
+            unsigned long long digit;
+            if (*text >= '0' && *text <= '9') {
+                digit = *text - '0';
+            } else if (radix > 10) {
+                if (*text >= 'A' && *text < 'A' + (radix - 10)) {
+                    digit = 10 + (*text - 'A');
+                } else if (*text >= 'a' && *text < 'a' + (radix - 10)) {
+                    digit = 10 + (*text - 'a');
+                } else {
+                    break;
+                }
+            } else {
+                break;
+            }
+            if (value != 0 && radix > ullong_max / value) {
+                overflow = true;
+            } else {
+                value *= radix;
+                if (digit > ullong_max - value) {
+                    overflow = true;
+                } else {
+                    value += digit;
+                }
+            }
+            ++text;
+        } while (count == 0 || (text - text_start) != count);
+    }
+    if (text == number_start) { // no number was parsed, so no characters were consumed
+        text = text_start;
+    }
+    if (overflow) {
+        if (negative) {
+            value = 0;
+        } else {
+            value = ullong_max;
+        }
+    } else if (value == 0) {
+        negative = false;
+    }
+    *valuep = value;
+    *negativep = negative;
+    return text - text_start;
+}
+#endif
+
 #if !defined(HAVE_VSSCANF) || !defined(HAVE_STRTOL) || !defined(HAVE_STRTOUL) || !defined(HAVE_STRTOD)
 static size_t SDL_ScanLong(const char *text, int count, int radix, long *valuep)
 {
@@ -563,39 +648,16 @@ static size_t SDL_ScanLongLong(const char *text, int count, int radix, long long
 #if !defined(HAVE_VSSCANF) || !defined(HAVE_STRTOULL)
 static size_t SDL_ScanUnsignedLongLong(const char *text, int count, int radix, unsigned long long *valuep)
 {
-    const char *textstart = text;
-    unsigned long long value = 0;
-
-    if (*text == '-') {
-        return SDL_ScanLongLong(text, count, radix, (long long *)valuep);
-    }
-
-    if (radix == 16 && SDL_strncmp(text, "0x", 2) == 0) {
-        text += 2;
-    }
-    for (;;) {
-        int v;
-        if (SDL_isdigit((unsigned char)*text)) {
-            v = *text - '0';
-        } else if (radix == 16 && SDL_isupperhex(*text)) {
-            v = 10 + (*text - 'A');
-        } else if (radix == 16 && SDL_islowerhex(*text)) {
-            v = 10 + (*text - 'a');
+    bool negative;
+    size_t len = SDL_ScanUnsignedLongLongInternal(text, count, radix, valuep, &negative);
+    if (negative) {
+        if (*valuep == 0) {
+            *valuep = ~0ULL; // ULLONG_MAX
         } else {
-            break;
-        }
-        value *= radix;
-        value += v;
-        ++text;
-
-        if (count > 0 && (text - textstart) == count) {
-            break;
+            *valuep = 0ULL - *valuep;
         }
     }
-    if (valuep && text > textstart) {
-        *valuep = value;
-    }
-    return text - textstart;
+    return len;
 }
 #endif
 
@@ -1292,18 +1354,8 @@ unsigned long long SDL_strtoull(const char *string, char **endp, int base)
 #ifdef HAVE_STRTOULL
     return strtoull(string, endp, base);
 #else
-    size_t len;
     unsigned long long value = 0;
-
-    if (!base) {
-        if ((SDL_strlen(string) > 2) && (SDL_strncasecmp(string, "0x", 2) == 0)) {
-            base = 16;
-        } else {
-            base = 10;
-        }
-    }
-
-    len = SDL_ScanUnsignedLongLong(string, 0, base, &value);
+    size_t len = SDL_ScanUnsignedLongLong(string, 0, base, &value);
     if (endp) {
         *endp = (char *)string + len;
     }