[Nickle] nickle: Branch 'master'

Bart Massey bart at cs.pdx.edu
Thu Aug 28 14:48:09 PDT 2014


>From ed7cbccbeffe531514e8c0447eb4ea52140dacbf Mon Sep 17 00:00:00 2001
From: Bart Massey <bart at cs.pdx.edu>
Date: Thu, 28 Aug 2014 13:49:01 -0700
Subject: [PATCH] added checked string-to-number conversions; fixed
 string-to-real conversion

The existing string_to_integer() and string_to_real()
conversions were in the style of atoi() and atof(); they
ignored leading whitespace and trailing garbage, returning 0
for non-numbers.

This patch adds string_to_integer_checked() and
string_to_real_checked() which raise an invalid_argument
exception when the string is not a syntactically valid
Nickle token with no leading or trailing whitespace.

This patch also corrects a bug in string_to_real(), which
would not stop early on strings like "1.2xxe4" and would
instead return the obviously nonsensical 10020.
---
 builtin-toplevel.c |  62 ++++++++++++++--
 lex.l              | 202 ++++++++++++++++++++++++++++++-----------------------
 nickle.h           |   4 +-
 3 files changed, 173 insertions(+), 95 deletions(-)

diff --git a/builtin-toplevel.c b/builtin-toplevel.c
index 73c6573..5077e32 100644
--- a/builtin-toplevel.c
+++ b/builtin-toplevel.c
@@ -171,6 +171,11 @@ import_Toplevel_namespace()
     " real string_to_real (string s)\n"
     "\n"
     " Parse a string representation of a numeric value.\n" },
+        { do_string_to_real_checked, "string_to_real_checked", "R", "s", "\n"
+    " real string_to_real_checked (string s)\n"
+    "\n"
+    " Parse a string representation of a numeric value.\n"
+            " Raise an invalid_argument exception on non-numeric strings.\n" },
  { do_hash, "hash", "i", "p", "\n"
     " int hash (poly p)\n"
     "\n"
@@ -238,6 +243,13 @@ import_Toplevel_namespace()
     "\n"
     " Parse 's' as an integer.\n"
     " Use 'base' if supplied, else autodetect.\n" },
+        { do_string_to_integer_checked, "string_to_integer_checked",
"i", "s.i", "\n"
+    " int string_to_integer_checked (string s)\n"
+    " int string_to_integer_checked (string s, int base)\n"
+    "\n"
+    " Parse a string representation of an integer.\n"
+            " Raise an invalid_argument exception on non-integer strings.\n"
+    " Use 'base' if supplied, else autodetect.\n" },
         { 0 }
     };

@@ -264,8 +276,8 @@ do_time (void)
  NewDoubleDigitNatural ((double_digit) time(0)))));
 }

-Value
-do_string_to_integer (int n, Value *p)
+static Value
+do_string_to_integer_maybe_checked (int n, Value *p, int checked)
 {
     ENTER ();
     char    *s;
@@ -290,7 +302,9 @@ do_string_to_integer (int n, Value *p)
     }

     s = StringChars (&str->string);
-    while (isspace ((int)(*s))) s++;
+    if (!checked)
+        while (isspace ((int)(*s)))
+            s++;
     switch (*s) {
     case '-':
  negative = 1;
@@ -333,7 +347,14 @@ do_string_to_integer (int n, Value *p)
  !strncmp (s, "0X", 2)) s += 2;
     break;
  }
- ret = atov (s, ibase);
+ ret = atov (s, ibase, checked);
+        if (ret == Void) {
+            RaiseStandardException(exception_invalid_argument, 3,
+
NewStrString("string_to_integer_checked: invalid integer string"),
+                                   NewInt(1),
+                                   p[0]);
+            RETURN (ret);
+        }
  if (!aborting)
  {
     if (negative)
@@ -344,10 +365,41 @@ do_string_to_integer (int n, Value *p)
 }

 Value
+do_string_to_integer (int n, Value *p)
+{
+    ENTER();
+    RETURN (do_string_to_integer_maybe_checked(n, p, 0));
+}
+
+Value
+do_string_to_integer_checked (int n, Value *p)
+{
+    ENTER();
+    RETURN (do_string_to_integer_maybe_checked(n, p, 1));
+}
+
+Value
 do_string_to_real (Value str)
 {
     ENTER ();
-    RETURN (aetov (StringChars (&str->string), 10));
+    char *s = StringChars (&str->string);
+    while (isspace ((int)(*s)))
+        s++;
+    RETURN (aetov (s, 10, 0));
+}
+
+
+Value
+do_string_to_real_checked (Value str)
+{
+    ENTER ();
+    Value result = aetov (StringChars (&str->string), 10, 1);
+    if (result == Void)
+        RaiseStandardException(exception_invalid_argument, 3,
+                               NewStrString ("string_to_real_checked:
bad number string"),
+                               NewInt(1),
+                               str);
+    RETURN (result);
 }


diff --git a/lex.l b/lex.l
index 91e2698..8d50dfb 100644
--- a/lex.l
+++ b/lex.l
@@ -23,7 +23,7 @@
 int yyget_lineno (void);
 FILE *yyget_in (void);
 FILE *yyget_out (void);
-int yyget_leng (void);
+yy_size_t yyget_leng (void);
 char *yyget_text (void);
 void yyset_lineno (int);
 void yyset_in (FILE *);
@@ -563,74 +563,74 @@ has_member { yylval.ints = HASMEMBER; return HASMEMBER; }
  return STRING_CONST;
  }
 0[0-7]* {
- yylval.value = atov(yytext+1, 8);
+                yylval.value = atov(yytext+1, 8, 0);
  if (yytext[1] == '\0')
     return TEN_NUM;
         else
     return OCTAL0_NUM;
  }
 0o[0-7]+ {
- yylval.value = atov(yytext+2, 8);
+ yylval.value = atov(yytext+2, 8, 0);
  return OCTAL_NUM;
  }
 0o[0-7]+\./\.\.\.   {
- yylval.value = aetov(yytext+2, 8);
+ yylval.value = aetov(yytext+2, 8, 0);
  return OCTAL_FLOAT;
  }
 0o[0-7]+/\.\. {
- yylval.value = atov(yytext+2, 8);
+ yylval.value = atov(yytext+2, 8, 0);
  return OCTAL_NUM;
  }
 0o(([0-7]+((\.[0-7]*(\{[0-7]+\})?)?))|(\.[0-7]+)|(\.[0-7]*\{[0-7]+\}))(([Ee][-+]?[0-7]+)?)
{
- yylval.value = aetov (yytext+2, 8);
+ yylval.value = aetov (yytext+2, 8, 0);
  return OCTAL_FLOAT;
  }
 0b[01]+ {
- yylval.value = atov(yytext+2, 2);
+ yylval.value = atov(yytext+2, 2, 0);
  return BINARY_NUM;
  }
 0b[0-1]+\./\.\.\.   {
- yylval.value = aetov(yytext+2, 2);
+ yylval.value = aetov(yytext+2, 2, 0);
  return BINARY_FLOAT;
  }
 0b[0-1]+/\.\. {
- yylval.value = atov(yytext+2, 2);
+ yylval.value = atov(yytext+2, 2, 0);
  return BINARY_NUM;
  }
 0b(([0-1]+((\.[0-1]*(\{[0-1]+\})?)?))|(\.[0-1]+)|(\.[0-1]*\{[0-1]+\}))(([Ee][-+]?[0-1]+)?)
{
- yylval.value = aetov (yytext+2, 2);
+ yylval.value = aetov (yytext+2, 2, 0);
  return BINARY_FLOAT;
  }
 0x[0-9a-fA-F]+ {
- yylval.value = atov(yytext+2, 16);
+ yylval.value = atov(yytext+2, 16, 0);
  return HEX_NUM;
  }
 0x[0-9a-fA-F]+\./\.\.\.   {
- yylval.value = aetov(yytext+2, 16);
+ yylval.value = aetov(yytext+2, 16, 0);
  return HEX_FLOAT;
  }
 0x[0-9a-fA-F]+/\.\. {
- yylval.value = atov(yytext+2, 16);
+ yylval.value = atov(yytext+2, 16, 0);
  return HEX_NUM;
  }
 0x(([0-9a-fA-F]+((\.[0-9a-fA-F]*(\{[0-9a-fA-F]+\})?)?))|(\.[0-9a-fA-F]+)|(\.[0-9a-fA-F]*\{[0-9a-fA-F]+\}))(([Ee][-+]?[0-9a-fA-F]+)?)
{
- yylval.value = aetov (yytext+2, 16);
+ yylval.value = aetov (yytext+2, 16, 0);
  return HEX_FLOAT;
  }
 [0-9]+ {
- yylval.value = atov(yytext, 10);
+ yylval.value = atov(yytext, 10, 0);
  return TEN_NUM;
  }
 [0-9]+\./\.\.\. {
- yylval.value = aetov(yytext, 10);
+ yylval.value = aetov(yytext, 10, 0);
  return TEN_FLOAT;
  }
 [0-9]+/\.\. {
- yylval.value = atov(yytext, 10);
+ yylval.value = atov(yytext, 10, 0);
  return TEN_NUM;
  }
 (([0-9]+((\.[0-9]*(\{[0-9]+\})?)?))|(\.[0-9]+)|(\.[0-9]*\{[0-9]+\}))(([Ee][-+]?[0-9]+)?)
{
- yylval.value = aetov (yytext, 10);
+ yylval.value = aetov (yytext, 10, 0);
  return TEN_FLOAT;
  }
 [a-zA-Z\200-\377_][0-9a-zA-Z\200-\377_]* {
@@ -770,115 +770,139 @@ skipline (void)
     } while (c != EOF && c != '\n');
 }

-Value
-atov (char *s, int base)
+static Value
+atovn (char *s, int base, int* nconverted)
 {
     ENTER ();
     Value result;
     Value b;
     char c;
-    int i;
+    int i, n;

+    if (*s == '\0')
+    {
+        *nconverted = 0;
+        RETURN (Zero);
+    }
+    n = 0;
     b = NewInt (base);
-    result = NewInt (0);
+    result = Zero;
     for (;;) {
  c = *s++;
  if ('0' <= c  && c <= '9')
     i = c - '0';
- else if ('a' <= c && c <= 'z')
+ else if ('a' <= c && c <= 'z' && c < 'a' + base - 10)
     i = c - 'a' + 10;
- else if ('A' <= c && c <= 'Z')
+ else if ('A' <= c && c <= 'Z' && c < 'A' + base - 10)
     i = c - 'A' + 10;
- else
-    break;
+        else
+            break;
  if (i >= base)
     break;
+        n++;
  result = Plus (NewInt (i), Times (result, b));
     }
+    *nconverted = n;
     RETURN (result);
 }

 Value
-aetov (char *s, int base)
+atov(char *s, int base, int checked)
+{
+    ENTER();
+    int nconverted;
+    Value v;
+
+    if (checked && *s == '\0')
+        RETURN (Void);
+    v = atovn(s, base, &nconverted);
+    if (checked && s[nconverted] != '\0')
+        RETURN (Void);
+    RETURN (v);
+}
+
+Value
+aetov (char *s, int base, int checked)
 {
     ENTER ();
-    char    *int_part, *frac_part, *rep_part, *exp_part, *next;
-    int    sign, frac_len, rep_len, esign;
-    Value   v, sv;
+    int sign = 1;
+    int esign = 1;
+    Value int_val, frac_val, rep_val, exp_val, v;
+    int int_len = -1;
+    int frac_len = -1;
+    int rep_len = -1;
+    int exp_len = -1;
+    int rep_fail = 0;

-    int_part = s;
-    sign = 1;
-    if (*int_part == '+')
- int_part++;
-    else if (*int_part == '-') {
- int_part++;
+    /* Parse the argument. */
+    if (*s == '+')
+ s++;
+    else if (*s == '-')
+    {
+ s++;
  sign = -1;
     }
-    next = int_part;
-    frac_part = strchr (next, '.');
-    frac_len = -1;
-    rep_part = 0;
-    rep_len = 0;
-    esign = 1;
-    if (frac_part) {
- frac_part++;
- next = frac_part;
- rep_part = strchr (next, '{');
- if (rep_part)
- {
-    frac_len = rep_part - frac_part;
-    rep_part++;
-    next = strchr (rep_part, '}');
-    if (!next)
- RETURN (Void);    /* "can't" happen */
-    rep_len = next - rep_part;
-    next = next + 1;
+    int_val = atovn(s, base, &int_len);
+    s += int_len;
+    if (*s == '.') {
+        s++;
+        frac_val = atovn(s, base, &frac_len);
+        s += frac_len;
+ if (*s == '{')
+        {
+            s++;
+            rep_val = atovn(s, base, &rep_len);
+    s += rep_len;
+            /* "can't" happen in lexer */
+    if (*s != '}')
+            {
+                rep_fail = 1;
+                s -= rep_len;
+                rep_len = -1;
+            }
+            s++;
  }
     }
-    exp_part = strchr (next, 'e');
-    if (!exp_part)
- exp_part = strchr (next, 'E');
-    if (exp_part) {
- if (frac_len < 0)
-    frac_len = exp_part - frac_part;
- exp_part++;
- if (*exp_part == '+')
-    exp_part++;
- else if (*exp_part == '-') {
-    esign = -1;
-    exp_part++;
- }
-    } else if (frac_len < 0 && frac_part)
- frac_len = strlen(frac_part);
-    v = atov (int_part, base);
-    if (frac_part)
+    if (!rep_fail && (int_len > 0 || frac_len > 0) && (*s == 'e' || *s == 'E'))
     {
- v = Plus (v, Divide (atov (frac_part, base),
- Pow (NewInt (base),
-       NewInt (frac_len))));
+        s++;
+        if (*s == '+')
+        {
+                s++;
+        }
+        else if (*s == '-')
+        {
+                esign = -1;
+                s++;
+        }
+        exp_val = atovn(s, base, &exp_len);
+        s += exp_len;
     }
-    if (rep_part)
+    if (checked && (rep_fail || *s != '\0' || (int_len <= 0 && frac_len <= 0)))
+        RETURN (Void);
+    /* Construct the value to return. */
+    v = int_val;
+    if (frac_len > 0)
+        v = Plus (v, Divide (frac_val,
+                             Pow (NewInt (base), NewInt (frac_len))));
+    if (rep_len > 0)
     {
- Value rep;
-
- rep = Divide (atov (rep_part, base), Minus (Pow (NewInt (base),
-       NewInt (rep_len)),
-  One));
- if (frac_len)
-    rep = Divide (rep, Pow (NewInt (base),
-    NewInt (frac_len)));
+ Value rep = Divide (rep_val, Minus (Pow (NewInt (base),
+                                            NewInt (rep_len)),
+                                            One));
+ if (frac_len > 0)
+    rep = Divide (rep, Pow (NewInt (base), NewInt (frac_len)));
  v = Plus (v, rep);
     }
-    if (exp_part)
+    if (exp_len > 0)
     {
- sv = Pow (NewInt (base), atov (exp_part, base));
+ Value pow = Pow (NewInt (base), exp_val);
  if (esign > 0)
-    v = Times (v, sv);
+    v = Times (v, pow);
  else
-    v = Divide (v, sv);
+    v = Divide (v, pow);
     }
     if (sign == -1)
  v = Negate (v);
     RETURN (v);
 }
-
diff --git a/nickle.h b/nickle.h
index cd2aba0..8747ed1 100644
--- a/nickle.h
+++ b/nickle.h
@@ -768,7 +768,7 @@ void yyerror (char *msg);
 void ParseError (char *fmt, ...);
 int yylex (void);
 Bool LexFile (char *file, Bool complain, Bool after);
-Value atov (char *, int), aetov (char *, int);
+Value atov (char *, int, int), aetov (char *, int, int);
 extern int  ignorenl;
 void skipcomment (void);
 Value lexdoc (void);
@@ -882,6 +882,7 @@ Value do_Thread_trace (int, Value *);
 Value do_Thread_trace (int, Value *);
 Value do_History_show (int, Value *);
 Value do_string_to_integer (int, Value *);
+Value do_string_to_integer_checked (int, Value *);
 Value do_Semaphore_new (int, Value *);
 Value do_Command_undefine (int, Value *);
 Value do_Command_pretty_print (int , Value *);
@@ -907,6 +908,7 @@ Value do_dim (Value);
 Value do_dims (Value);
 Value do_reference (Value);
 Value do_string_to_real (Value);
+Value do_string_to_real_checked (Value);
 Value do_abs (Value);
 Value do_floor (Value);
 Value do_func_args (Value);
-- 
2.1.0

On Thu, Aug 28, 2014 at 1:43 PM, Bart Massey <bart at keithp.com> wrote:
>  Makefile.am |    2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> New commits:
> commit b766c8f934e4a0dadbed59233158251ea06a7a73
> Author: Bart Massey <bart at cs.pdx.edu>
> Date:   Sat Aug 23 09:38:26 2014 -0700
>
>     removed gratuitous -O2 from Makefile.am to let CFLAGS handle it
>
> diff --git a/Makefile.am b/Makefile.am
> index 85a57a6..2b31c0b 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -69,7 +69,7 @@ AM_CPPFLAGS = \
>         -DNICKLELIBDIR=\"@nicklelibdir@\"
>
>  AM_CFLAGS = \
> -       -D_FORTIFY_SOURCE=2 -O2 \
> +       -D_FORTIFY_SOURCE=2 \
>         -Wall -Wpointer-arith -Wstrict-prototypes \
>         -Wmissing-prototypes -Wmissing-declarations \
>         -Wnested-externs -fno-strict-aliasing -fwrapv
> _______________________________________________
> Nickle mailing list
> Nickle at nickle.org
> http://nickle.org/mailman/listinfo/nickle


More information about the Nickle mailing list