diff --git a/include/util.hpp b/include/util.hpp index 7d0254485..49970d7b2 100644 --- a/include/util.hpp +++ b/include/util.hpp @@ -29,10 +29,6 @@ bool isPrintable(int c); bool isUpper(int c); bool isLower(int c); bool isLetter(int c); -bool isDigit(int c); -bool isBinDigit(int c); -bool isOctDigit(int c); -bool isHexDigit(int c); bool isAlphanumeric(int c); // Locale-independent character transform functions @@ -42,7 +38,35 @@ char toUpper(char c); bool startsIdentifier(int c); bool continuesIdentifier(int c); -uint8_t parseHexDigit(int c); +template +bool isDigit(int c) { + static_assert(Base <= 36, "Base must be 36 or less to allow digits 0-9A-Z"); + if constexpr (Base <= 10) { + return c >= '0' && c < static_cast('0' + Base); + } else { + return isDigit<10>(c) || (c >= 'A' && c < static_cast('A' + Base - 10)) + || (c >= 'a' && c < static_cast('a' + Base - 10)); + } +} + +template +uint8_t parseDigit(int c) { + static_assert(Base <= 36, "Base must be 36 or less to allow digits 0-9A-Z"); + assume(isDigit(c)); + if constexpr (Base <= 10) { + return c - '0'; + } else { + // Check digit ranges from greatest to least ('a'-'z', then 'A'-'Z', then '0'-'9') + if (c >= 'a') { + return c - 'a' + 10; + } else if (c >= 'A') { + return c - 'A' + 10; + } else { + return parseDigit<10>(c); + } + } +} + std::optional parseNumber(char const *&str, NumberBase base = BASE_AUTO); std::optional parseWholeNumber(char const *str, NumberBase base = BASE_AUTO); diff --git a/src/asm/format.cpp b/src/asm/format.cpp index cc9cb5410..364d91fcd 100644 --- a/src/asm/format.cpp +++ b/src/asm/format.cpp @@ -47,7 +47,7 @@ size_t FormatSpec::parseSpec(char const *spec) { padZero = true; } // - if (isDigit(spec[i])) { + if (isDigit<10>(spec[i])) { width = parseSpecNumber(); } // diff --git a/src/asm/lexer.cpp b/src/asm/lexer.cpp index 07800e4ea..e5cf09bf1 100644 --- a/src/asm/lexer.cpp +++ b/src/asm/lexer.cpp @@ -526,7 +526,8 @@ static int peek(); static void shiftChar(); static int bumpChar(); static int nextChar(); -static uint32_t readDecimalNumber(int initial); +template +static uint32_t readNumber(int initial, char const *prefix); static uint32_t readBracketedMacroArgNum() { bool enableExpansions = lexerState->enableExpansions; @@ -543,8 +544,8 @@ static uint32_t readBracketedMacroArgNum() { c = nextChar(); } - if (isDigit(c)) { - uint32_t n = readDecimalNumber(bumpChar()); + if (isDigit<10>(c)) { + uint32_t n = readNumber<10>(bumpChar(), nullptr); if (n > INT32_MAX) { error("Number in bracketed macro argument is too large"); return 0; @@ -968,7 +969,7 @@ static std::tuple readFractionDigits() { if (c == '_') { checkDigitSeparator(prevWasSeparator, "fixed-point"); prevWasSeparator = true; - } else if (isDigit(c)) { + } else if (isDigit<10>(c)) { prevWasSeparator = false; int digit = c - '0'; if (dividend > (UINT32_MAX - digit) / 10 || divisor > UINT32_MAX / 10) { @@ -976,7 +977,7 @@ static std::tuple readFractionDigits() { WARNING_LARGE_CONSTANT, "Fixed-point constant has too many fractional digits" ); // Discard any additional digits - for (int d = peek(); isDigit(d) || d == '_'; c = d, d = nextChar()) {} + for (int d = peek(); isDigit<10>(d) || d == '_'; c = d, d = nextChar()) {} return {dividend, divisor, c == '_'}; } dividend = dividend * 10 + digit; @@ -998,12 +999,12 @@ static uint8_t readPrecisionSuffix() { bool empty = true; // '_' is not allowed after 'q'/'Q' - for (int c = peek(); isDigit(c); c = nextChar()) { + for (int c = peek(); isDigit<10>(c); c = nextChar()) { empty = false; int digit = c - '0'; if (precision > (UINT8_MAX - digit) / 10) { // Discard any additional digits - skipChars(isDigit); + skipChars(isDigit<10>); // Return an invalid precision to cause a subsequent error, which is checked afterwards // to cover the default `options.fixPrecision` as well, just in case return UINT8_MAX; @@ -1051,8 +1052,13 @@ static bool isValidDigit(char c) { return isAlphanumeric(c) || c == '.' || c == '#' || c == '@'; } -static bool isCustomBinDigit(int c) { - return isBinDigit(c) || c == options.binDigits[0] || c == options.binDigits[1]; +static bool isAsmBinDigit(int c) { + return isDigit<2>(c) || c == options.binDigits[0] || c == options.binDigits[1]; +} + +static uint8_t parseAsmBinDigit(int c) { + assume(isAsmBinDigit(c)); + return c == '1' || c == options.binDigits[1]; // Returns 0 or 1 } static bool checkDigitErrors(char const *digits, size_t n, char const *type) { @@ -1092,108 +1098,35 @@ void lexer_SetGfxDigits(char const digits[4]) { } } -static uint32_t readBinaryNumber(char const *prefix) { - uint32_t number = 0; - bool empty = true; - bool prevWasSeparator = false; - - for (int c = peek();; c = nextChar()) { - if (c == '_') { - checkDigitSeparator(prevWasSeparator, "integer"); - prevWasSeparator = true; - continue; - } - - int bit; - if (c == '0' || c == options.binDigits[0]) { - bit = 0; - } else if (c == '1' || c == options.binDigits[1]) { - bit = 1; +template +static uint32_t readNumber(int initial, char const *prefix) { + auto isSomeDigit = [](int c) { + if constexpr (Base == 2) { + return isAsmBinDigit(c); } else { - break; - } - empty = false; - prevWasSeparator = false; - - if (number > (UINT32_MAX - bit) / 2) { - warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); - // Discard any additional digits - skipChars([](int d) { return isCustomBinDigit(d) || d == '_'; }); - return 0; + return isDigit(c); } - number = number * 2 + bit; - } - - checkDigitsEnding(empty, prefix, prevWasSeparator, "integer"); - return number; -} - -static uint32_t readOctalNumber(char const *prefix) { - uint32_t number = 0; - bool empty = true; - bool prevWasSeparator = false; - - for (int c = peek();; c = nextChar()) { - if (c == '_') { - checkDigitSeparator(prevWasSeparator, "integer"); - prevWasSeparator = true; - continue; + }; + auto parseSomeDigit = [](int c) { + if constexpr (Base == 2) { + return parseAsmBinDigit(c); + } else { + return parseDigit(c); } + }; - if (!isOctDigit(c)) { - break; - } - int digit = c - '0'; + uint32_t number; + bool empty; + if constexpr (Base == 10) { + assume(prefix == nullptr); + number = parseSomeDigit(initial); empty = false; - prevWasSeparator = false; - - if (number > (UINT32_MAX - digit) / 8) { - warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); - // Discard any additional digits - skipChars([](int d) { return isOctDigit(d) || d == '_'; }); - return 0; - } - number = number * 8 + digit; - } - - checkDigitsEnding(empty, prefix, prevWasSeparator, "integer"); - return number; -} - -static uint32_t readDecimalNumber(int initial) { - assume(isDigit(initial)); - uint32_t number = initial - '0'; - bool prevWasSeparator = false; - - for (int c = peek();; c = nextChar()) { - if (c == '_') { - checkDigitSeparator(prevWasSeparator, "integer"); - prevWasSeparator = true; - continue; - } - - if (!isDigit(c)) { - break; - } - int digit = c - '0'; - prevWasSeparator = false; - - if (number > (UINT32_MAX - digit) / 10) { - warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); - // Discard any additional digits - skipChars([](int d) { return isDigit(d) || d == '_'; }); - return 0; - } - number = number * 10 + digit; + } else { + assume(initial == 0 && prefix != nullptr); + number = 0; + empty = true; } - checkDigitsEnding(false, nullptr, prevWasSeparator, "integer"); - return number; -} - -static uint32_t readHexNumber(char const *prefix) { - uint32_t number = 0; - bool empty = true; bool prevWasSeparator = false; for (int c = peek();; c = nextChar()) { @@ -1203,20 +1136,20 @@ static uint32_t readHexNumber(char const *prefix) { continue; } - if (!isHexDigit(c)) { + if (!isSomeDigit(c)) { break; } - int digit = parseHexDigit(c); + int digit = parseSomeDigit(c); empty = false; prevWasSeparator = false; - if (number > (UINT32_MAX - digit) / 16) { + if (number > (UINT32_MAX - digit) / Base) { warning(WARNING_LARGE_CONSTANT, "Integer constant is too large"); // Discard any additional digits - skipChars([](int d) { return isHexDigit(d) || d == '_'; }); + skipChars([&isSomeDigit](int d) { return isSomeDigit(d) || d == '_'; }); return 0; } - number = number * 16 + digit; + number = number * Base + digit; } checkDigitsEnding(empty, prefix, prevWasSeparator, "integer"); @@ -1830,15 +1763,15 @@ static Token yylex_NORMAL() { case 'x': case 'X': shiftChar(); - return Token(T_(NUMBER), readHexNumber("\"0x\"")); + return Token(T_(NUMBER), readNumber<16>(0, "\"0x\"")); case 'o': case 'O': shiftChar(); - return Token(T_(NUMBER), readOctalNumber("\"0o\"")); + return Token(T_(NUMBER), readNumber<8>(0, "\"0o\"")); case 'b': case 'B': shiftChar(); - return Token(T_(NUMBER), readBinaryNumber("\"0b\"")); + return Token(T_(NUMBER), readNumber<2>(0, "\"0b\"")); } [[fallthrough]]; @@ -1853,7 +1786,7 @@ static Token yylex_NORMAL() { case '7': case '8': case '9': { - uint32_t n = readDecimalNumber(c); + uint32_t n = readNumber<10>(c, nullptr); if (peek() == '.') { shiftChar(); @@ -1864,20 +1797,20 @@ static Token yylex_NORMAL() { case '&': // Either &=, binary AND, logical AND, or an octal constant c = peek(); - if (isOctDigit(c) || c == '_') { - return Token(T_(NUMBER), readOctalNumber("'&'")); + if (isDigit<8>(c) || c == '_') { + return Token(T_(NUMBER), readNumber<8>(0, "'&'")); } return oneOrTwo('=', T_(POP_ANDEQ), '&', T_(OP_LOGICAND), T_(OP_AND)); case '%': // Either %=, MOD, or a binary constant c = peek(); - if (isCustomBinDigit(c) || c == '_') { - return Token(T_(NUMBER), readBinaryNumber("'%'")); + if (isAsmBinDigit(c) || c == '_') { + return Token(T_(NUMBER), readNumber<2>(0, "'%'")); } return oneOrTwo('=', T_(POP_MODEQ), T_(OP_MOD)); case '$': // Hex constant - return Token(T_(NUMBER), readHexNumber("'$'")); + return Token(T_(NUMBER), readNumber<16>(0, "'$'")); case '`': // Gfx constant return Token(T_(NUMBER), readGfxConstant()); diff --git a/src/diagnostics.cpp b/src/diagnostics.cpp index e12c4004b..56c92daec 100644 --- a/src/diagnostics.cpp +++ b/src/diagnostics.cpp @@ -11,7 +11,7 @@ #include "helpers.hpp" #include "style.hpp" -#include "util.hpp" // isDigit +#include "util.hpp" // parseNumber void warnx(char const *fmt, ...) { va_list ap; diff --git a/src/fix/mbc.cpp b/src/fix/mbc.cpp index 219be9c2e..5546b8c26 100644 --- a/src/fix/mbc.cpp +++ b/src/fix/mbc.cpp @@ -128,7 +128,7 @@ MbcType mbc_ParseName(char const *name, uint8_t &tpp1Major, uint8_t &tpp1Minor) } // Parse numeric MBC and return it as-is (unless it's too large) - if (char c = *ptr; isDigit(c) || c == '$' || c == '&' || c == '%') { + if (char c = *ptr; isDigit<10>(c) || c == '$' || c == '&' || c == '%') { if (std::optional mbc = parseWholeNumber(ptr); !mbc) { fatalUnknownMBC(name); } else if (*mbc > 0xFF) { diff --git a/src/gfx/pal_spec.cpp b/src/gfx/pal_spec.cpp index 32d3cdd11..8ac6256f2 100644 --- a/src/gfx/pal_spec.cpp +++ b/src/gfx/pal_spec.cpp @@ -21,7 +21,7 @@ #include "diagnostics.hpp" #include "helpers.hpp" #include "platform.hpp" -#include "util.hpp" // UpperMap, isDigit +#include "util.hpp" // UpperMap, parseDigit #include "gfx/main.hpp" #include "gfx/png.hpp" @@ -37,7 +37,7 @@ static void skipBlankSpace(std::string_view const &str, size_t &pos) { } static uint8_t toHex(char c1, char c2) { - return parseHexDigit(c1) * 16 + parseHexDigit(c2); + return parseDigit<16>(c1) * 16 + parseDigit<16>(c2); } static uint8_t singleToHex(char c) { diff --git a/src/link/lexer.cpp b/src/link/lexer.cpp index a6f59c8da..541938660 100644 --- a/src/link/lexer.cpp +++ b/src/link/lexer.cpp @@ -94,66 +94,41 @@ static std::string readKeyword(int initial) { return keyword; } -static yy::parser::symbol_type parseDecNumber(int initial) { +template +static yy::parser::symbol_type readNumber(int initial, char const *prefix, char const *name) { LexerStackEntry &context = lexerStack.back(); - uint32_t number = initial - '0'; - for (int c = context.file.sgetc(); isDigit(c) || c == '_'; c = context.file.snextc()) { + uint32_t number; + if constexpr (Base == 10) { + assume(prefix == nullptr && name == nullptr); + number = parseDigit(initial); + } else { + assume(initial == 0 && prefix != nullptr && name != nullptr); + int c = context.file.sgetc(); + if (!isDigit(c)) { + scriptError("No %s digits found after %s", name, prefix); + return yy::parser::make_number(0); + } + number = parseDigit(c); + context.file.sbumpc(); + } + for (int c = context.file.sgetc(); isDigit(c) || c == '_'; c = context.file.snextc()) { if (c != '_') { - number = number * 10 + (c - '0'); + number = number * Base + parseDigit(c); } } return yy::parser::make_number(number); } static yy::parser::symbol_type parseBinNumber(char const *prefix) { - LexerStackEntry &context = lexerStack.back(); - int c = context.file.sgetc(); - if (!isBinDigit(c)) { - scriptError("No binary digits found after %s", prefix); - return yy::parser::make_number(0); - } - - uint32_t number = c - '0'; - for (c = context.file.snextc(); isBinDigit(c) || c == '_'; c = context.file.snextc()) { - if (c != '_') { - number = number * 2 + (c - '0'); - } - } - return yy::parser::make_number(number); + return readNumber<2>(0, prefix, "binary"); } static yy::parser::symbol_type parseOctNumber(char const *prefix) { - LexerStackEntry &context = lexerStack.back(); - int c = context.file.sgetc(); - if (!isOctDigit(c)) { - scriptError("No octal digits found after %s", prefix); - return yy::parser::make_number(0); - } - - uint32_t number = c - '0'; - for (c = context.file.snextc(); isOctDigit(c) || c == '_'; c = context.file.snextc()) { - if (c != '_') { - number = number * 8 + (c - '0'); - } - } - return yy::parser::make_number(number); + return readNumber<8>(0, prefix, "octal"); } static yy::parser::symbol_type parseHexNumber(char const *prefix) { - LexerStackEntry &context = lexerStack.back(); - int c = context.file.sgetc(); - if (!isHexDigit(c)) { - scriptError("No hexadecimal digits found after %s", prefix); - return yy::parser::make_number(0); - } - - uint32_t number = parseHexDigit(c); - for (c = context.file.snextc(); isHexDigit(c) || c == '_'; c = context.file.snextc()) { - if (c != '_') { - number = number * 16 + parseHexDigit(c); - } - } - return yy::parser::make_number(number); + return readNumber<16>(0, prefix, "hexadecimal"); } static yy::parser::symbol_type parseAnyNumber(int initial) { @@ -174,7 +149,7 @@ static yy::parser::symbol_type parseAnyNumber(int initial) { return parseBinNumber("\"0b\""); } } - return parseDecNumber(initial); + return readNumber<10>(initial, nullptr, nullptr); } static yy::parser::symbol_type parseString() { @@ -245,7 +220,7 @@ yy::parser::symbol_type yylex() { return parseBinNumber("'%'"); } else if (c == '&') { return parseOctNumber("'&'"); - } else if (isDigit(c)) { + } else if (isDigit<10>(c)) { return parseAnyNumber(c); } else if (isLetter(c)) { std::string keyword = readKeyword(c); diff --git a/src/util.cpp b/src/util.cpp index 514c172fa..e44804f18 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -38,24 +38,8 @@ bool isLetter(int c) { return isUpper(c) || isLower(c); } -bool isDigit(int c) { - return c >= '0' && c <= '9'; -} - -bool isBinDigit(int c) { - return c == '0' || c == '1'; -} - -bool isOctDigit(int c) { - return c >= '0' && c <= '7'; -} - -bool isHexDigit(int c) { - return isDigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); -} - bool isAlphanumeric(int c) { - return isLetter(c) || isDigit(c); + return isLetter(c) || isDigit<10>(c); } char toLower(char c) { @@ -73,18 +57,7 @@ bool startsIdentifier(int c) { } bool continuesIdentifier(int c) { - return startsIdentifier(c) || isDigit(c) || c == '#' || c == '$' || c == '@'; -} - -uint8_t parseHexDigit(int c) { - if (c >= 'A' && c <= 'F') { - return c - 'A' + 10; - } else if (c >= 'a' && c <= 'f') { - return c - 'a' + 10; - } else { - assume(isDigit(c)); - return c - '0'; - } + return startsIdentifier(c) || isDigit<10>(c) || c == '#' || c == '$' || c == '@'; } // Parses a number from a string, moving the pointer to skip the parsed characters. @@ -134,23 +107,23 @@ std::optional parseNumber(char const *&str, NumberBase base) { } // Get the digit-condition function corresponding to the base - bool (*canParseDigit)(int c) = base == BASE_2 ? isBinDigit - : base == BASE_8 ? isOctDigit - : base == BASE_10 ? isDigit - : base == BASE_16 ? isHexDigit - : nullptr; // LCOV_EXCL_LINE - assume(canParseDigit != nullptr); + bool (*isSomeDigit)(int c) = base == BASE_2 ? isDigit<2> + : base == BASE_8 ? isDigit<8> + : base == BASE_10 ? isDigit<10> + : base == BASE_16 ? isDigit<16> + : nullptr; // LCOV_EXCL_LINE + assume(isSomeDigit != nullptr); char const * const startDigits = str; // Parse the number one digit at a time // Does *not* support '_' digit separators uint64_t result = 0; - for (; canParseDigit(str[0]); ++str) { - uint8_t digit = parseHexDigit(str[0]); + for (; isSomeDigit(str[0]); ++str) { + uint8_t digit = parseDigit<16>(str[0]); if (result > (UINT64_MAX - digit) / base) { // Skip remaining digits and set errno = ERANGE on overflow - while (canParseDigit(str[0])) { + while (isSomeDigit(str[0])) { ++str; } result = UINT64_MAX;