yacc

Advanced example

This section gives an example of a grammar using some of the advanced features. The desk calculator example is modified to provide a desk calculator that does floating-point interval arithmetic. The calculator understands floating-point constants, the arithmetic operations +, -, *, /, and unary minus (-), and; registers labeled a through z. It also understands intervals written in the following format, where X is less than or equal to Y:

   (X,Y)

There are 26 interval-valued variables, A through Z, that may also be used. The usage is similar to that in the previous example: assignments return no value and print nothing, while expressions print the floating or interval value.

Intervals are represented by a structure consisting of the left and right endpoint values stored as doubles. This structure is given a type name, INTERVAL. The yacc value stack can also contain floating point scalars, and integers used to index into the arrays holding the variable values.

YYERROR is used to handle error conditions. The errors dealt with in this way are division by an interval containing 0, and an interval presented in the wrong order. The error recovery mechanism of yacc is used to discard the rest of the offending line.

In addition to the mixing of types on the value stack, this grammar also demonstrates an interesting use of syntax to keep track of the type (for example, scalar or interval) of intermediate expressions. Note that a scalar can be automatically promoted to an interval if the context demands an interval value. This causes a large number of conflicts when the grammar is processed by yacc\: 18 shift-reduce and 26 reduce-reduce. The problem can be seen by looking at the two input lines:

   2.5 + (3.5 - 4.)

and:

   2.5 + (3.5, 4)

Notice that the 2.5 is to be used in an interval value expression in the second example, but this fact is not known until the comma is read. By this time, 2.5 is finished, and the parser cannot go back and change its mind. More generally, it might be necessary to look ahead an arbitrary number of tokens to decide whether to convert a scalar to an interval. This problem is avoided by having two rules for each binary interval valued operator--one when the left operand is a scalar, and one rule when the left operand is an interval. In the second case, the right operand must be an interval, so the conversion will be applied automatically. Despite this, there are still many cases where the conversion may be applied or not, leading to the above conflicts. These are resolved by listing the rules that yield scalars first in the specification file: in this way, the conflict will be resolved in favor of keeping scalar valued expressions scalar valued, until they are forced to become intervals.

This way of handling multiple types is very instructive. If there were many kinds of expression types instead of just two, the number of rules needed would increase dramatically and the conflicts even more so. Thus, while this example is instructive, it is better practice in a more normal programming language environment to maintain the type information at the lexical level, and not as part of the grammar.

Finally, a word about the lexical analysis. The only unusual feature is the treatment of floating-point constants. The C library routine atof() is used to do the actual conversion from a character string to a double-precision value. If the lexical analyzer detects an error, it responds by returning a token that is illegal in the grammar, provoking a syntax error in the parser and then error recovery.

   %{
   
   #include <stdio.h>
   #include <ctype.h>
   

   typedef struct interval
   {
        double lo, hi;
   }  INTERVAL;
   

   INTERVAL vmul(), vdiv();
   

   double atof();
   

   double dreg[26];
   INTERVAL vreg[26];
   

   %}
   

   %start line
   

   %union
   {
     int ival;
     double dval;
     INTERVAL vval;
   }
   

   %token <ival> DREG VREG  /* indices into dreg, vreg arrays */
   

   

   %token <dval> CONST       /* floating point constant */
   

   %type <dval> dexp         /* expression */
   

   %type <vval> vexp         /* interval expression */
   

     /* precedence information about the operators */
   

   %left   ´+´ ´-´
   %left   ´*´ ´/´
   %left   UMINUS   /* precedence for unary minus */

   %%                       /* beginning of rules section */
   lines   :  /* empty */
           |  lines line
           ;
   line    :  dexp ´\n´
           {
                      (void) printf("%15.8f\n",$1);
           }
           |  vexp ´\n´
           {
                      (void) printf("(%15.8f, %15.8f)\n", $1.lo, $1.hi);

           }
           |  DREG ´=´ dexp ´\n´
           {
                    dreg[$1] = $3;
           }
           |  VREG ´=´ vexp ´\n´
           {
                    vreg[$1] = $3;
           }
           |  error ´\n´
           {
                      yyerrok;
           }
           ;

   dexp    :  CONST
           |  DREG
           {
                     $$ = dreg[$1];
           }
           |  dexp ´+´ dexp
           {
                       $$ = $1 + $3;
           }
           |  dexp ´-´ dexp
           {
                       $$ = $1 - $3;
           }
           |  dexp ´*´ dexp
           {
                       $$ = $1 * $3;
           }
           |  dexp ´/´ dexp
           {
                       $$ = $1 / $3;
           }

           |  ´-´ dexp    %prec UMINUS
           {
                      $$ = -$2;
           }
           |  ´(´ dexp´)´
           {
                       $$ = $2;
           }
           ;
   
   vexp    :  dexp
           {
                    $$.hi = $$.lo = $1;
           }
           |  ´(´ dexp ´,´ dexp ´)´
           {
                    $$.lo = $2;
                    $$.hi = $4;
                    if( $$.lo > $$.hi )
                    {
                           (void) printf("interval out of order \n");
                           YYERROR;
                    }
           }
           |  VREG
           {
                       $$ = vreg[$1];
           }
           |  vexp ´+´ vexp
           {
                   $$.hi = $1.hi + $3.hi;
                   $$.lo = $1.lo + $3.lo;
           }
           |  dexp ´+´ vexp
           {
                   $$.hi = $1 + $3.hi;
                   $$.lo = $1 + $3.lo;
           }
           |  vexp ´-´ vexp
           {
                   $$.hi = $1.hi - $3.lo;
                   $$.lo = $1.lo - $3.hi;
           }
           |  dexp ´-´ vexp
           {
                   $$.hi = $1 - $3.lo;
                   $$.lo = $1 - $3.hi;
           }

           |  vexp ´*´ vexp
           {
                   $$ = vmul( $1.lo,$1.hi,$3 );
           }
           |  dexp ´*´ vexp
           {
                   $$ = vmul( $1, $1, $3 );
           }
           |  vexp ´/´ vexp
           {
                   if( dcheck( $3 ) ) YYERROR;
                   $$ = vdiv( $1, $1, $3 );
           }
           |  dexp ´/´ vexp
           {
                   if( dcheck( $3 ) ) YYERROR;
                   $$ = vdiv( $1.lo, $1.hi, $3 );
           }
           |  ´-´ vexp    %prec UMINUS
           {
                   $$.hi = -$2.lo;$$.lo = -$2.hi;
           }
           |  ´(´ vexp ´)´
           {
                   $$ = $2;
           }
           ;
   
   %%                        /* beginning of subroutines section */
   # define BSZ 50   /* buffer size for floating point number */
   

            /* lexical analysis */
   int yylex( )
   {
           register int c;
                                /* skip over blanks */
           while ((c = getchar()) == ´ ´)
                   ;
           if (isupper(c))
           {
               yylval.ival = c - ´A´;
               return (VREG);
           }
           if (islower(c))
           {
               yylval.ival = c - ´a´;
               return(DREG);
           }

               /* gobble up digits. points, exponents */
   
           if (isdigit(c) || c == ´.´)
           {
               char buf[BSZ+1], *cp = buf;
               int dot = 0, exp = 0;
   

               for(; (cp - buf) < BSZ ; ++cp, c = getchar())
                   {
                       *cp = c;
                       if (isdigit(c))
                          continue;
                       if (c == ´.´)
                       {
                       if (dot++ || exp)
                           return (´.´);   /* will cause syntax error */
                           continue;
                       }
                       if( c == ´e´)
                       {
                       if (exp++)
                           return (´e´);   /* will cause syntax error */
                       continue;
                       }
                           /* end of number */
                       break;
                   }
   

                   *cp = ´nbsp;´;
                   if (cp - buf >= BSZ)
                       (void) printf("constant too long - truncated\n");
                   else
                       ungetc(c, stdin);   /* push back last char read */
                   yylval.dval = atof(buf);
                   return (CONST);
           }
           return (c);
   }

   INTERVAL
   hilo(a, b, c, d)
           double a, b, c, d;
   {
             /* returns the smallest interval containing a, b, c, and d */
   
             /* used by *,/ routine */
           INTERVAL v;
   

           if (a > b)
           {
                 v.hi = a;
                 v.lo = b;
           }
           else
           {
                 v.hi = b;
                 v.lo = a;
           }
           if (c > d)
           {
                 if (c > v.hi)
                     v.hi = c;
                 if (d < v.lo)
                     v.lo = d;
   

           }
           else
           {
                 if (d > v.hi)
                     v.hi = d;
                 if (c < v.lo)
                     v.lo = c;
           }
           return (v);
   }
   INTERVAL
   vmul(a, b, v)
           double a, b;
           INTERVAL v;
   {
           return (hilo(a * v.hi, a * v.lo, b * v.hi, b * v.lo));
   }

   dcheck(v)
           INTERVAL v;
   {
           if (v.hi >= 0. && v.lo <= 0.)
           {
                (void) printf("divisor interval contains 0.\n");
                return (1);
           }
           return (0);
   }
   INTERVAL
   vdiv(a, b, v)
           double a, b;
           INTERVAL v;
   {
      return (hilo(a / v.hi, a / v.lo, b / v.hi, b / v.lo));
   }