[Nickle] nickle: Branch 'master'
Bart Massey
bart at cs.pdx.edu
Thu Aug 28 14:48:09 PDT 2014
>From ed7cbccbeffe531514e8c0447eb4ea52140dacbf Mon Sep 17 00:00:00 2001
From: Bart Massey <bart at cs.pdx.edu>
Date: Thu, 28 Aug 2014 13:49:01 -0700
Subject: [PATCH] added checked string-to-number conversions; fixed
string-to-real conversion
The existing string_to_integer() and string_to_real()
conversions were in the style of atoi() and atof(); they
ignored leading whitespace and trailing garbage, returning 0
for non-numbers.
This patch adds string_to_integer_checked() and
string_to_real_checked() which raise an invalid_argument
exception when the string is not a syntactically valid
Nickle token with no leading or trailing whitespace.
This patch also corrects a bug in string_to_real(), which
would not stop early on strings like "1.2xxe4" and would
instead return the obviously nonsensical 10020.
---
builtin-toplevel.c | 62 ++++++++++++++--
lex.l | 202 ++++++++++++++++++++++++++++++-----------------------
nickle.h | 4 +-
3 files changed, 173 insertions(+), 95 deletions(-)
diff --git a/builtin-toplevel.c b/builtin-toplevel.c
index 73c6573..5077e32 100644
--- a/builtin-toplevel.c
+++ b/builtin-toplevel.c
@@ -171,6 +171,11 @@ import_Toplevel_namespace()
" real string_to_real (string s)\n"
"\n"
" Parse a string representation of a numeric value.\n" },
+ { do_string_to_real_checked, "string_to_real_checked", "R", "s", "\n"
+ " real string_to_real_checked (string s)\n"
+ "\n"
+ " Parse a string representation of a numeric value.\n"
+ " Raise an invalid_argument exception on non-numeric strings.\n" },
{ do_hash, "hash", "i", "p", "\n"
" int hash (poly p)\n"
"\n"
@@ -238,6 +243,13 @@ import_Toplevel_namespace()
"\n"
" Parse 's' as an integer.\n"
" Use 'base' if supplied, else autodetect.\n" },
+ { do_string_to_integer_checked, "string_to_integer_checked",
"i", "s.i", "\n"
+ " int string_to_integer_checked (string s)\n"
+ " int string_to_integer_checked (string s, int base)\n"
+ "\n"
+ " Parse a string representation of an integer.\n"
+ " Raise an invalid_argument exception on non-integer strings.\n"
+ " Use 'base' if supplied, else autodetect.\n" },
{ 0 }
};
@@ -264,8 +276,8 @@ do_time (void)
NewDoubleDigitNatural ((double_digit) time(0)))));
}
-Value
-do_string_to_integer (int n, Value *p)
+static Value
+do_string_to_integer_maybe_checked (int n, Value *p, int checked)
{
ENTER ();
char *s;
@@ -290,7 +302,9 @@ do_string_to_integer (int n, Value *p)
}
s = StringChars (&str->string);
- while (isspace ((int)(*s))) s++;
+ if (!checked)
+ while (isspace ((int)(*s)))
+ s++;
switch (*s) {
case '-':
negative = 1;
@@ -333,7 +347,14 @@ do_string_to_integer (int n, Value *p)
!strncmp (s, "0X", 2)) s += 2;
break;
}
- ret = atov (s, ibase);
+ ret = atov (s, ibase, checked);
+ if (ret == Void) {
+ RaiseStandardException(exception_invalid_argument, 3,
+
NewStrString("string_to_integer_checked: invalid integer string"),
+ NewInt(1),
+ p[0]);
+ RETURN (ret);
+ }
if (!aborting)
{
if (negative)
@@ -344,10 +365,41 @@ do_string_to_integer (int n, Value *p)
}
Value
+do_string_to_integer (int n, Value *p)
+{
+ ENTER();
+ RETURN (do_string_to_integer_maybe_checked(n, p, 0));
+}
+
+Value
+do_string_to_integer_checked (int n, Value *p)
+{
+ ENTER();
+ RETURN (do_string_to_integer_maybe_checked(n, p, 1));
+}
+
+Value
do_string_to_real (Value str)
{
ENTER ();
- RETURN (aetov (StringChars (&str->string), 10));
+ char *s = StringChars (&str->string);
+ while (isspace ((int)(*s)))
+ s++;
+ RETURN (aetov (s, 10, 0));
+}
+
+
+Value
+do_string_to_real_checked (Value str)
+{
+ ENTER ();
+ Value result = aetov (StringChars (&str->string), 10, 1);
+ if (result == Void)
+ RaiseStandardException(exception_invalid_argument, 3,
+ NewStrString ("string_to_real_checked:
bad number string"),
+ NewInt(1),
+ str);
+ RETURN (result);
}
diff --git a/lex.l b/lex.l
index 91e2698..8d50dfb 100644
--- a/lex.l
+++ b/lex.l
@@ -23,7 +23,7 @@
int yyget_lineno (void);
FILE *yyget_in (void);
FILE *yyget_out (void);
-int yyget_leng (void);
+yy_size_t yyget_leng (void);
char *yyget_text (void);
void yyset_lineno (int);
void yyset_in (FILE *);
@@ -563,74 +563,74 @@ has_member { yylval.ints = HASMEMBER; return HASMEMBER; }
return STRING_CONST;
}
0[0-7]* {
- yylval.value = atov(yytext+1, 8);
+ yylval.value = atov(yytext+1, 8, 0);
if (yytext[1] == '\0')
return TEN_NUM;
else
return OCTAL0_NUM;
}
0o[0-7]+ {
- yylval.value = atov(yytext+2, 8);
+ yylval.value = atov(yytext+2, 8, 0);
return OCTAL_NUM;
}
0o[0-7]+\./\.\.\. {
- yylval.value = aetov(yytext+2, 8);
+ yylval.value = aetov(yytext+2, 8, 0);
return OCTAL_FLOAT;
}
0o[0-7]+/\.\. {
- yylval.value = atov(yytext+2, 8);
+ yylval.value = atov(yytext+2, 8, 0);
return OCTAL_NUM;
}
0o(([0-7]+((\.[0-7]*(\{[0-7]+\})?)?))|(\.[0-7]+)|(\.[0-7]*\{[0-7]+\}))(([Ee][-+]?[0-7]+)?)
{
- yylval.value = aetov (yytext+2, 8);
+ yylval.value = aetov (yytext+2, 8, 0);
return OCTAL_FLOAT;
}
0b[01]+ {
- yylval.value = atov(yytext+2, 2);
+ yylval.value = atov(yytext+2, 2, 0);
return BINARY_NUM;
}
0b[0-1]+\./\.\.\. {
- yylval.value = aetov(yytext+2, 2);
+ yylval.value = aetov(yytext+2, 2, 0);
return BINARY_FLOAT;
}
0b[0-1]+/\.\. {
- yylval.value = atov(yytext+2, 2);
+ yylval.value = atov(yytext+2, 2, 0);
return BINARY_NUM;
}
0b(([0-1]+((\.[0-1]*(\{[0-1]+\})?)?))|(\.[0-1]+)|(\.[0-1]*\{[0-1]+\}))(([Ee][-+]?[0-1]+)?)
{
- yylval.value = aetov (yytext+2, 2);
+ yylval.value = aetov (yytext+2, 2, 0);
return BINARY_FLOAT;
}
0x[0-9a-fA-F]+ {
- yylval.value = atov(yytext+2, 16);
+ yylval.value = atov(yytext+2, 16, 0);
return HEX_NUM;
}
0x[0-9a-fA-F]+\./\.\.\. {
- yylval.value = aetov(yytext+2, 16);
+ yylval.value = aetov(yytext+2, 16, 0);
return HEX_FLOAT;
}
0x[0-9a-fA-F]+/\.\. {
- yylval.value = atov(yytext+2, 16);
+ yylval.value = atov(yytext+2, 16, 0);
return HEX_NUM;
}
0x(([0-9a-fA-F]+((\.[0-9a-fA-F]*(\{[0-9a-fA-F]+\})?)?))|(\.[0-9a-fA-F]+)|(\.[0-9a-fA-F]*\{[0-9a-fA-F]+\}))(([Ee][-+]?[0-9a-fA-F]+)?)
{
- yylval.value = aetov (yytext+2, 16);
+ yylval.value = aetov (yytext+2, 16, 0);
return HEX_FLOAT;
}
[0-9]+ {
- yylval.value = atov(yytext, 10);
+ yylval.value = atov(yytext, 10, 0);
return TEN_NUM;
}
[0-9]+\./\.\.\. {
- yylval.value = aetov(yytext, 10);
+ yylval.value = aetov(yytext, 10, 0);
return TEN_FLOAT;
}
[0-9]+/\.\. {
- yylval.value = atov(yytext, 10);
+ yylval.value = atov(yytext, 10, 0);
return TEN_NUM;
}
(([0-9]+((\.[0-9]*(\{[0-9]+\})?)?))|(\.[0-9]+)|(\.[0-9]*\{[0-9]+\}))(([Ee][-+]?[0-9]+)?)
{
- yylval.value = aetov (yytext, 10);
+ yylval.value = aetov (yytext, 10, 0);
return TEN_FLOAT;
}
[a-zA-Z\200-\377_][0-9a-zA-Z\200-\377_]* {
@@ -770,115 +770,139 @@ skipline (void)
} while (c != EOF && c != '\n');
}
-Value
-atov (char *s, int base)
+static Value
+atovn (char *s, int base, int* nconverted)
{
ENTER ();
Value result;
Value b;
char c;
- int i;
+ int i, n;
+ if (*s == '\0')
+ {
+ *nconverted = 0;
+ RETURN (Zero);
+ }
+ n = 0;
b = NewInt (base);
- result = NewInt (0);
+ result = Zero;
for (;;) {
c = *s++;
if ('0' <= c && c <= '9')
i = c - '0';
- else if ('a' <= c && c <= 'z')
+ else if ('a' <= c && c <= 'z' && c < 'a' + base - 10)
i = c - 'a' + 10;
- else if ('A' <= c && c <= 'Z')
+ else if ('A' <= c && c <= 'Z' && c < 'A' + base - 10)
i = c - 'A' + 10;
- else
- break;
+ else
+ break;
if (i >= base)
break;
+ n++;
result = Plus (NewInt (i), Times (result, b));
}
+ *nconverted = n;
RETURN (result);
}
Value
-aetov (char *s, int base)
+atov(char *s, int base, int checked)
+{
+ ENTER();
+ int nconverted;
+ Value v;
+
+ if (checked && *s == '\0')
+ RETURN (Void);
+ v = atovn(s, base, &nconverted);
+ if (checked && s[nconverted] != '\0')
+ RETURN (Void);
+ RETURN (v);
+}
+
+Value
+aetov (char *s, int base, int checked)
{
ENTER ();
- char *int_part, *frac_part, *rep_part, *exp_part, *next;
- int sign, frac_len, rep_len, esign;
- Value v, sv;
+ int sign = 1;
+ int esign = 1;
+ Value int_val, frac_val, rep_val, exp_val, v;
+ int int_len = -1;
+ int frac_len = -1;
+ int rep_len = -1;
+ int exp_len = -1;
+ int rep_fail = 0;
- int_part = s;
- sign = 1;
- if (*int_part == '+')
- int_part++;
- else if (*int_part == '-') {
- int_part++;
+ /* Parse the argument. */
+ if (*s == '+')
+ s++;
+ else if (*s == '-')
+ {
+ s++;
sign = -1;
}
- next = int_part;
- frac_part = strchr (next, '.');
- frac_len = -1;
- rep_part = 0;
- rep_len = 0;
- esign = 1;
- if (frac_part) {
- frac_part++;
- next = frac_part;
- rep_part = strchr (next, '{');
- if (rep_part)
- {
- frac_len = rep_part - frac_part;
- rep_part++;
- next = strchr (rep_part, '}');
- if (!next)
- RETURN (Void); /* "can't" happen */
- rep_len = next - rep_part;
- next = next + 1;
+ int_val = atovn(s, base, &int_len);
+ s += int_len;
+ if (*s == '.') {
+ s++;
+ frac_val = atovn(s, base, &frac_len);
+ s += frac_len;
+ if (*s == '{')
+ {
+ s++;
+ rep_val = atovn(s, base, &rep_len);
+ s += rep_len;
+ /* "can't" happen in lexer */
+ if (*s != '}')
+ {
+ rep_fail = 1;
+ s -= rep_len;
+ rep_len = -1;
+ }
+ s++;
}
}
- exp_part = strchr (next, 'e');
- if (!exp_part)
- exp_part = strchr (next, 'E');
- if (exp_part) {
- if (frac_len < 0)
- frac_len = exp_part - frac_part;
- exp_part++;
- if (*exp_part == '+')
- exp_part++;
- else if (*exp_part == '-') {
- esign = -1;
- exp_part++;
- }
- } else if (frac_len < 0 && frac_part)
- frac_len = strlen(frac_part);
- v = atov (int_part, base);
- if (frac_part)
+ if (!rep_fail && (int_len > 0 || frac_len > 0) && (*s == 'e' || *s == 'E'))
{
- v = Plus (v, Divide (atov (frac_part, base),
- Pow (NewInt (base),
- NewInt (frac_len))));
+ s++;
+ if (*s == '+')
+ {
+ s++;
+ }
+ else if (*s == '-')
+ {
+ esign = -1;
+ s++;
+ }
+ exp_val = atovn(s, base, &exp_len);
+ s += exp_len;
}
- if (rep_part)
+ if (checked && (rep_fail || *s != '\0' || (int_len <= 0 && frac_len <= 0)))
+ RETURN (Void);
+ /* Construct the value to return. */
+ v = int_val;
+ if (frac_len > 0)
+ v = Plus (v, Divide (frac_val,
+ Pow (NewInt (base), NewInt (frac_len))));
+ if (rep_len > 0)
{
- Value rep;
-
- rep = Divide (atov (rep_part, base), Minus (Pow (NewInt (base),
- NewInt (rep_len)),
- One));
- if (frac_len)
- rep = Divide (rep, Pow (NewInt (base),
- NewInt (frac_len)));
+ Value rep = Divide (rep_val, Minus (Pow (NewInt (base),
+ NewInt (rep_len)),
+ One));
+ if (frac_len > 0)
+ rep = Divide (rep, Pow (NewInt (base), NewInt (frac_len)));
v = Plus (v, rep);
}
- if (exp_part)
+ if (exp_len > 0)
{
- sv = Pow (NewInt (base), atov (exp_part, base));
+ Value pow = Pow (NewInt (base), exp_val);
if (esign > 0)
- v = Times (v, sv);
+ v = Times (v, pow);
else
- v = Divide (v, sv);
+ v = Divide (v, pow);
}
if (sign == -1)
v = Negate (v);
RETURN (v);
}
-
diff --git a/nickle.h b/nickle.h
index cd2aba0..8747ed1 100644
--- a/nickle.h
+++ b/nickle.h
@@ -768,7 +768,7 @@ void yyerror (char *msg);
void ParseError (char *fmt, ...);
int yylex (void);
Bool LexFile (char *file, Bool complain, Bool after);
-Value atov (char *, int), aetov (char *, int);
+Value atov (char *, int, int), aetov (char *, int, int);
extern int ignorenl;
void skipcomment (void);
Value lexdoc (void);
@@ -882,6 +882,7 @@ Value do_Thread_trace (int, Value *);
Value do_Thread_trace (int, Value *);
Value do_History_show (int, Value *);
Value do_string_to_integer (int, Value *);
+Value do_string_to_integer_checked (int, Value *);
Value do_Semaphore_new (int, Value *);
Value do_Command_undefine (int, Value *);
Value do_Command_pretty_print (int , Value *);
@@ -907,6 +908,7 @@ Value do_dim (Value);
Value do_dims (Value);
Value do_reference (Value);
Value do_string_to_real (Value);
+Value do_string_to_real_checked (Value);
Value do_abs (Value);
Value do_floor (Value);
Value do_func_args (Value);
--
2.1.0
On Thu, Aug 28, 2014 at 1:43 PM, Bart Massey <bart at keithp.com> wrote:
> Makefile.am | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> New commits:
> commit b766c8f934e4a0dadbed59233158251ea06a7a73
> Author: Bart Massey <bart at cs.pdx.edu>
> Date: Sat Aug 23 09:38:26 2014 -0700
>
> removed gratuitous -O2 from Makefile.am to let CFLAGS handle it
>
> diff --git a/Makefile.am b/Makefile.am
> index 85a57a6..2b31c0b 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -69,7 +69,7 @@ AM_CPPFLAGS = \
> -DNICKLELIBDIR=\"@nicklelibdir@\"
>
> AM_CFLAGS = \
> - -D_FORTIFY_SOURCE=2 -O2 \
> + -D_FORTIFY_SOURCE=2 \
> -Wall -Wpointer-arith -Wstrict-prototypes \
> -Wmissing-prototypes -Wmissing-declarations \
> -Wnested-externs -fno-strict-aliasing -fwrapv
> _______________________________________________
> Nickle mailing list
> Nickle at nickle.org
> http://nickle.org/mailman/listinfo/nickle
More information about the Nickle
mailing list