* - better number building (formatting) / parsing, now it isn't
* ideal code
* - use Assert()
- * - add support for roman number to standard number conversion
* - add support for number spelling
* - add support for string to string formatting (we must be better
* than Oracle :-),
{"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
/* ----------
- * Roman numbers
+ * Roman numerals
* ----------
*/
static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
+/*
+ * MACRO: Check if the current and next characters form a valid subtraction
+ * combination for roman numerals.
+ */
+#define IS_VALID_SUB_COMB(curr, next) \
+ (((curr) == 'I' && ((next) == 'V' || (next) == 'X')) || \
+ ((curr) == 'X' && ((next) == 'L' || (next) == 'C')) || \
+ ((curr) == 'C' && ((next) == 'D' || (next) == 'M')))
+
+/*
+ * MACRO: Roman numeral value, or 0 if character isn't a roman numeral.
+ */
+#define ROMAN_VAL(r) \
+ ((r) == 'I' ? 1 : \
+ (r) == 'V' ? 5 : \
+ (r) == 'X' ? 10 : \
+ (r) == 'L' ? 50 : \
+ (r) == 'C' ? 100 : \
+ (r) == 'D' ? 500 : \
+ (r) == 'M' ? 1000 : 0)
+
+/*
+ * 'MMMDCCCLXXXVIII' (3888) is the longest valid roman numeral (15 characters).
+ */
+#define MAX_ROMAN_LEN 15
+
/* ----------
* Ordinal postfixes
* ----------
#define DCH_TIMED 0x02
#define DCH_ZONED 0x04
+/*
+ * These macros are used in NUM_processor() and its subsidiary routines.
+ * OVERLOAD_TEST: true if we've reached end of input string
+ * AMOUNT_TEST(s): true if at least s bytes remain in string
+ */
+#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
+#define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
+
+
/* ----------
* Functions
* ----------
static char *fill_str(char *str, int c, int max);
static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
static char *int_to_roman(int number);
+static int roman_to_int(NUMProc *Np, int input_len);
static void NUM_prepare_locale(NUMProc *Np);
static char *get_last_relevant_decnum(char *num);
static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
case NUM_rn:
case NUM_RN:
+ if (IS_ROMAN(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"RN\" twice")));
num->flag |= NUM_F_ROMAN;
break;
num->flag |= NUM_F_EEEE;
break;
}
+
+ if (IS_ROMAN(num) &&
+ (num->flag & ~(NUM_F_ROMAN | NUM_F_FILLMODE)) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("\"RN\" is incompatible with other formats"),
+ errdetail("\"RN\" may only be used together with \"FM\".")));
}
/* ----------
*result,
numstr[12];
- result = (char *) palloc(16);
+ result = (char *) palloc(MAX_ROMAN_LEN + 1);
*result = '\0';
/*
*/
if (number > 3999 || number < 1)
{
- fill_str(result, '#', 15);
+ fill_str(result, '#', MAX_ROMAN_LEN);
return result;
}
return result;
}
+/*
+ * Convert a roman numeral (standard form) to an integer.
+ * Result is an integer between 1 and 3999.
+ * Np->inout_p is advanced past the characters consumed.
+ *
+ * If input is invalid, return -1.
+ */
+static int
+roman_to_int(NUMProc *Np, int input_len)
+{
+ int result = 0;
+ int len;
+ char romanChars[MAX_ROMAN_LEN];
+ int romanValues[MAX_ROMAN_LEN];
+ int repeatCount = 1;
+ int vCount = 0,
+ lCount = 0,
+ dCount = 0;
+ bool subtractionEncountered = false;
+ int lastSubtractedValue = 0;
+
+ /*
+ * Skip any leading whitespace. Perhaps we should limit the amount of
+ * space skipped to MAX_ROMAN_LEN, but that seems unnecessarily picky.
+ */
+ while (!OVERLOAD_TEST && isspace((unsigned char) *Np->inout_p))
+ Np->inout_p++;
+
+ /*
+ * Collect and decode valid roman numerals, consuming at most
+ * MAX_ROMAN_LEN characters. We do this in a separate loop to avoid
+ * repeated decoding and because the main loop needs to know when it's at
+ * the last numeral.
+ */
+ for (len = 0; len < MAX_ROMAN_LEN && !OVERLOAD_TEST; len++)
+ {
+ char currChar = pg_ascii_toupper(*Np->inout_p);
+ int currValue = ROMAN_VAL(currChar);
+
+ if (currValue == 0)
+ break; /* Not a valid roman numeral. */
+ romanChars[len] = currChar;
+ romanValues[len] = currValue;
+ Np->inout_p++;
+ }
+
+ if (len == 0)
+ return -1; /* No valid roman numerals. */
+
+ /* Check for valid combinations and compute the represented value. */
+ for (int i = 0; i < len; i++)
+ {
+ char currChar = romanChars[i];
+ int currValue = romanValues[i];
+
+ /*
+ * Ensure no numeral greater than or equal to the subtracted numeral
+ * appears after a subtraction.
+ */
+ if (subtractionEncountered && currValue >= lastSubtractedValue)
+ return -1;
+
+ /*
+ * V, L, and D should not appear before a larger numeral, nor should
+ * they be repeated.
+ */
+ if ((vCount && currValue >= ROMAN_VAL('V')) ||
+ (lCount && currValue >= ROMAN_VAL('L')) ||
+ (dCount && currValue >= ROMAN_VAL('D')))
+ return -1;
+ if (currChar == 'V')
+ vCount++;
+ else if (currChar == 'L')
+ lCount++;
+ else if (currChar == 'D')
+ dCount++;
+
+ if (i < len - 1)
+ {
+ /* Compare current numeral to next numeral. */
+ char nextChar = romanChars[i + 1];
+ int nextValue = romanValues[i + 1];
+
+ /*
+ * If the current value is less than the next value, handle
+ * subtraction. Verify valid subtractive combinations and update
+ * the result accordingly.
+ */
+ if (currValue < nextValue)
+ {
+ if (!IS_VALID_SUB_COMB(currChar, nextChar))
+ return -1;
+
+ /*
+ * Reject cases where same numeral is repeated with
+ * subtraction (e.g. 'MCCM' or 'DCCCD').
+ */
+ if (repeatCount > 1)
+ return -1;
+
+ /*
+ * We are going to skip nextChar, so first make checks needed
+ * for V, L, and D. These are the same as we'd have applied
+ * if we reached nextChar without a subtraction.
+ */
+ if ((vCount && nextValue >= ROMAN_VAL('V')) ||
+ (lCount && nextValue >= ROMAN_VAL('L')) ||
+ (dCount && nextValue >= ROMAN_VAL('D')))
+ return -1;
+ if (nextChar == 'V')
+ vCount++;
+ else if (nextChar == 'L')
+ lCount++;
+ else if (nextChar == 'D')
+ dCount++;
+
+ /*
+ * Skip the next numeral as it is part of the subtractive
+ * combination.
+ */
+ i++;
+
+ /* Update state. */
+ repeatCount = 1;
+ subtractionEncountered = true;
+ lastSubtractedValue = currValue;
+ result += (nextValue - currValue);
+ }
+ else
+ {
+ /* For same numerals, check for repetition. */
+ if (currChar == nextChar)
+ {
+ repeatCount++;
+ if (repeatCount > 3)
+ return -1;
+ }
+ else
+ repeatCount = 1;
+ result += currValue;
+ }
+ }
+ else
+ {
+ /* This is the last numeral; just add it to the result. */
+ result += currValue;
+ }
+ }
+
+ return result;
+}
/* ----------
return result;
}
-/*
- * These macros are used in NUM_processor() and its subsidiary routines.
- * OVERLOAD_TEST: true if we've reached end of input string
- * AMOUNT_TEST(s): true if at least s bytes remain in string
- */
-#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
-#define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
-
/* ----------
* Number extraction for TO_NUMBER()
* ----------
return strcpy(inout, number);
}
- /*
- * Roman correction
- */
- if (IS_ROMAN(Np->Num))
- {
- if (!Np->is_to_char)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("\"RN\" not supported for input")));
-
- Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
- Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
-
- if (IS_FILLMODE(Np->Num))
- {
- Np->Num->flag = 0;
- Np->Num->flag |= NUM_F_FILLMODE;
- }
- else
- Np->Num->flag = 0;
- Np->Num->flag |= NUM_F_ROMAN;
- }
-
/*
* Sign
*/
break;
case NUM_RN:
- if (IS_FILLMODE(Np->Num))
- {
- strcpy(Np->inout_p, Np->number_p);
- Np->inout_p += strlen(Np->inout_p) - 1;
- }
- else
- {
- sprintf(Np->inout_p, "%15s", Np->number_p);
- Np->inout_p += strlen(Np->inout_p) - 1;
- }
- break;
-
case NUM_rn:
- if (IS_FILLMODE(Np->Num))
+ if (Np->is_to_char)
{
- strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
+ const char *number_p;
+
+ if (n->key->id == NUM_rn)
+ number_p = asc_tolower_z(Np->number_p);
+ else
+ number_p = Np->number_p;
+ if (IS_FILLMODE(Np->Num))
+ strcpy(Np->inout_p, number_p);
+ else
+ sprintf(Np->inout_p, "%15s", number_p);
Np->inout_p += strlen(Np->inout_p) - 1;
}
else
{
- sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
- Np->inout_p += strlen(Np->inout_p) - 1;
+ int roman_result = roman_to_int(Np, input_len);
+ int numlen;
+
+ if (roman_result < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid Roman numeral")));
+ numlen = sprintf(Np->number_p, "%d", roman_result);
+ Np->number_p += numlen;
+ Np->Num->pre = numlen;
+ Np->Num->post = 0;
+ continue; /* roman_to_int ate all the chars */
}
break;
1234.560000000000000000
(1 row)
+-- Test for correct conversion between numbers and Roman numerals
+WITH rows AS
+ (SELECT i, to_char(i, 'RN') AS roman FROM generate_series(1, 3999) AS i)
+SELECT
+ bool_and(to_number(roman, 'RN') = i) as valid
+FROM rows;
+ valid
+-------
+ t
+(1 row)
+
+-- Some additional tests for RN input
+SELECT to_number('CvIiI', 'rn');
+ to_number
+-----------
+ 108
+(1 row)
+
+SELECT to_number('MMXX ', 'RN');
+ to_number
+-----------
+ 2020
+(1 row)
+
+SELECT to_number(' XIV', ' RN');
+ to_number
+-----------
+ 14
+(1 row)
+
+SELECT to_number(' XIV ', ' RN');
+ to_number
+-----------
+ 14
+(1 row)
+
+SELECT to_number('M CC', 'RN');
+ to_number
+-----------
+ 1000
+(1 row)
+
+-- error cases
+SELECT to_number('viv', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('DCCCD', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('XIXL', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('MCCM', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('MMMM', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('VV', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('IL', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('VIX', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('LXC', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('DCM', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('MMMDCM', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('CLXC', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('CM', 'MIRN');
+ERROR: "RN" is incompatible with other formats
+DETAIL: "RN" may only be used together with "FM".
+SELECT to_number('CM', 'RNRN');
+ERROR: cannot use "RN" twice
+SELECT to_number('qiv', 'RN');
+ERROR: invalid Roman numeral
+SELECT to_number('', 'RN');
+ERROR: invalid input syntax for type numeric: " "
+SELECT to_number(' ', 'RN');
+ERROR: invalid Roman numeral
RESET lc_numeric;
--
-- Input syntax
SELECT to_number('1,234.56','L99,999.99');
SELECT to_number('42nd', '99th');
SELECT to_number('123456', '99999V99');
+
+-- Test for correct conversion between numbers and Roman numerals
+WITH rows AS
+ (SELECT i, to_char(i, 'RN') AS roman FROM generate_series(1, 3999) AS i)
+SELECT
+ bool_and(to_number(roman, 'RN') = i) as valid
+FROM rows;
+
+-- Some additional tests for RN input
+SELECT to_number('CvIiI', 'rn');
+SELECT to_number('MMXX ', 'RN');
+SELECT to_number(' XIV', ' RN');
+SELECT to_number(' XIV ', ' RN');
+SELECT to_number('M CC', 'RN');
+-- error cases
+SELECT to_number('viv', 'RN');
+SELECT to_number('DCCCD', 'RN');
+SELECT to_number('XIXL', 'RN');
+SELECT to_number('MCCM', 'RN');
+SELECT to_number('MMMM', 'RN');
+SELECT to_number('VV', 'RN');
+SELECT to_number('IL', 'RN');
+SELECT to_number('VIX', 'RN');
+SELECT to_number('LXC', 'RN');
+SELECT to_number('DCM', 'RN');
+SELECT to_number('MMMDCM', 'RN');
+SELECT to_number('CLXC', 'RN');
+SELECT to_number('CM', 'MIRN');
+SELECT to_number('CM', 'RNRN');
+SELECT to_number('qiv', 'RN');
+SELECT to_number('', 'RN');
+SELECT to_number(' ', 'RN');
+
RESET lc_numeric;
--