#|| 


VHDL Object Model 1.0
---------------------

Copyright (c) 1994 Ohio Board of Regents and the University of
Cincinnati.  All rights reserved.

Authors: David Benz, Phillip Baraona
E-Mail: dbenz@thor.ece.uc.edu, pbaraona@thor.ece.uc.edu


||#

#|| 

  File: lexer.re
  Contains: Functions and constants for building the VHDL object model lexer.

  VHDL lexer function hierarchy:

    lex-vhdl
      lex-token(lex-whitespace(input))
        lex-grammar-prefix (REFINE provided)
        lex-end-of-form (REFINE provided)
        lex-character-literal
        lex-keyword (REFINE provided)
        lex-abstract-literal
           lex-decimal-literal
	      lex-exponent
	   lex-based-literal
	      lex-based-decimal-part
	         lex-based-exponent
        lex-string-literal
        lex-bit-string-literal 
        lex-identifier


  Need check in lexed-based-decimal to determine if base-separators are same.

||#


!! in-package("VOM-1-0")
!! in-grammar('user)

%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Constants %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

constant *DIGIT*: string = "0123456789"

constant *DIGIT-PLUS-UNDERSCORE* : string = "01234567890_"

constant *LETTER*: string = "abcdefghijklmnopqrstuvwxyz"

constant *LETTER-OR-DIGIT*: string =
   "abcdefghijklmnopqrstuvwxyz0123456789"

constant *LETTER-OR-DIGIT-OR-UNDERSCORE*: string =
   "abcdefghijklmnopqrstuvwxyz0123456789_"

constant *WHITESPACE*: string = " 	
"           % i.e. blank space, tab, and newline

%% Space
constant *SPACE* : string = "	 "

%% Newline - separate from space so line numbers can be counted.
constant *NEWLINE-STRING* : string = format(false, "~%")
constant *NEWLINE* : char = *NEWLINE-STRING*(1)

constant *SINGLE-QUOTE* : char = #\'

constant *COMMENT-START*: string = "--"

constant *COMMENT-FINISH*: string = "
"

constant *QUOTE*: string = "\""

constant *PERCENT*: string = "%"

constant *BACKSLASH*: string = "\\"

constant *DECIMAL*: string = "."

constant *BASE-SEPARATOR*: string = "#:"

constant *BINARY-DIGITS-PLUS-UNDERSCORE*: string = "01_"

constant *OCTAL-DIGITS-PLUS-UNDERSCORE*: string = "01234567_"

constant *HEX-DIGITS-PLUS-UNDERSCORE*: string = "0123456789ABCDEFabcdef_"

constant *VALID-EXTENDED-DIGITS*: string = "0123456789abcdef_"

constant *UNDERSCORE*: char = #\_

constant *END-OF-LINE*: string = "
"


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  remove-underscores
%
%   Description: In a VHDL abstract literal, underscores are used solely
%   for readability. This function eliminates them so that REFINE won't
%   do anything with them.
%
%   Revision History
%   Date      Person    Description
%   8/24/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function REMOVE-UNDERSCORES(input: string) : string =
   [ x | (x) x in input & x ~= #\_ ]


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  downcase-character
%
%   Description: Used to associate a letter with it's given position in 
%   a base string.
%
%   Revision History
%   Date      Person    Description
%   8/25/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function DOWNCASE-CHARACTER (c: char): char =
   lisp::char-downcase(c)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  find-position
%
%   Description: Finds the position of a given character in a given string.
%
%   Revision History
%   Date      Person    Description
%   8/25/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function FIND-POSITION(given-char: char, given-string: string): integer =
  let (lowered-given-char = downcase-character(given-char), i: integer = 1)
  (while given-string(i) ~= lowered-given-char do
   i <- i+1
   );
  i - 1


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  convert-string-to-integer
%
%   Description: Converts a based-literal string to an integer. This 
%   assumes that the integers/characters in the string are valid.
%
%   Revision History
%   Date      Person    Description
%   8/25/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function CONVERT-STRING-TO-INTEGER(base: integer, given-string: string)
                                   : real
  computed-using

     base >= 2 &
     size(given-string) = 0
     => 
       convert-string-to-integer(base, given-string) = 0.0,

     base >= 2 
     & current-bit = find-position(first(given-string), 
				   *VALID-EXTENDED-DIGITS*)
     & current-size = size(given-string) - 1
     & reduced-string = rest(given-string)
     =>
       convert-string-to-integer(base, given-string) = 
       current-bit * lisp::expt(base, current-size) + 
       convert-string-to-integer(base, reduced-string)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  convert-string-to-fraction
%
%   Description: Converts a based-literal string to a fraction. This 
%   assumes that the integers/characters in the string are valid.
%
%   Revision History
%   Date      Person    Description
%   8/25/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function CONVERT-STRING-TO-FRACTION(base: integer, given-string: string)
                                    : real
  computed-using

     base >= 2 
     & size(given-string) = 0
     =>
       convert-string-to-fraction(base, given-string) = 0.0,

     base >= 2 
     & current-bit = integer-to-real(find-position(last(given-string), 
				   *VALID-EXTENDED-DIGITS*))
     & reduced-string = subseq(given-string, 1, size(given-string) - 1)
     =>
       convert-string-to-fraction(base, given-string) = 
       (current-bit / integer-to-real(lisp::expt(base, size(given-string)))) + 
       convert-string-to-fraction(base, reduced-string)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  find-token-end
%
%   Description: Searches through input to find the first occurrence of
%   seperator that is all alone.  For instance, "" can be embedded
%   inside of strings.  The end of the string does not occur until the
%   first " that is not followed by another ".  This routine returns the
%   string sequence that begins with the first such ".  Used in extended
%   identifiers and strings.
%
%   Revision History
%   Date      Person    Description
%   9/28/94   PWB       Original Creation
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function FIND-TOKEN-END(input: stream-sequence, seperator: string):
   stream-sequence
   computed-using

      %% Check case where there is no seperator in rest of input sequence.
      first-seperator = ss-left-trim-until(input, seperator, false)
      & undefined?(first-seperator)
      =>
        find-token-end(input, seperator) = first-seperator,

      %% Check case when first seperator is end of token.
      first-seperator = ss-left-trim-until(input, seperator, false)
      & ( undefined?(ss-rest(first-seperator)) or
          ~ ss-prefix?(ss-rest(first-seperator), seperator, false))
      =>
        find-token-end(input, seperator) = first-seperator,

      %% Check case where first seperator is first of two in a row.  Do
      %% this by stripping off the seperators and making a recursive call.
      first-seperator = ss-left-trim-until(input, seperator, false)
      & ss-prefix?(ss-rest(first-seperator), seperator, false)
      & id-end =
            find-token-end(ss-rest(ss-rest(first-seperator)), seperator)
      =>
        find-token-end(input, seperator) = id-end


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  correct-underscore-usage
%
%   Description: Checks for the occurrence of consecutive underscores and
%   also that the last character may not be an underscore.
%
%   Revision History
%   Date      Person    Description
%   10/6/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function CORRECT-UNDERSCORE-USAGE(given-string: string): boolean 
   computed-using

      size(given-string) = 1 
      & first(given-string) ~= #\_ 
      =>
        correct-underscore-usage(given-string) = true,

      %% last character is an underscore
      size(given-string) = 1 
      & first(given-string) = #\_
      =>
        correct-underscore-usage(given-string) = false,

      %% two consecutive underscores found
      size(given-string) > 1   
      & first(given-string) = #\_ 
      & first(rest(given-string)) = #\_ 
      => 
        correct-underscore-usage(given-string) = false,

      %% default case, keep going
      correct-underscore-usage(given-string) =
         correct-underscore-usage(rest(given-string))   
   

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-identifier
%
%   Description: Recognizes valid VHDL identifiers. Basic VHDL identifiers
%   are case insensitive. Extended VHDL identifiers are different from
%   both basic identifiers and keywords.
%
%   Reference: VHDL LRM (Sec. 13.3)
%
%   Revision History
%   Date      Person    Description
%   8/23/94   DB        Copied from SubPascal example.
%   8/24/94   DB        Added extended identifier assertions.
%   9/1/94    PWB       Added check to ensure ID doesn't end in underscore
%   9/28/94   PWB       Added code to allow backslash in extended ID.
%   10/6/94   DB        Added call to correct-underscore-usage.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-IDENTIFIER(input: stream-sequence): lex-value
   computed-using

      input-1 = ss-left-trim(input, *LETTER*, false)
      & ss-suffix?(input, input-1) 
      & end-id = ss-left-trim(input-1, *LETTER-OR-DIGIT-OR-UNDERSCORE*, 
			      false)
      & id-found = ss-diff(input, end-id)
      & correct-underscore-usage(id-found)
%      & last(id-found) ~= *UNDERSCORE*
      => lex-identifier(input) =
	    < input,
	      end-id,
	      undefined,
	      're::--symbol--,
	      lisp::read-from-string(id-found) >,

      %% extended identifiers
      ss-prefix?(input, *BACKSLASH*, false)
      & start-text = ss-left-trim-match(input, *BACKSLASH*, false)
      & end-text = find-token-end(start-text, *BACKSLASH*)
      & defined? (end-text) 
      & ss-prefix?(end-text, *BACKSLASH*, false) 
      & end-input = ss-left-trim-match(end-text, *BACKSLASH*, false)
      =>
        lex-identifier(input) = 
                          <input,
                           end-input,
                           undefined, 
                          're::--symbol--,
                          string-to-symbol(concat("\\",
			     ss-diff(start-text, end-text), 
			     "\\"), "VHDL") >


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-abstract-literal
%
%   Description: Handles parsing of anything termed an abstract_literal in
%   VHDL. Converts these into either real or integer values depending on
%   syntax. Abstract literals, by definition, are unsigned.
%
%   Reference: VHDL LRM (Sec. 13.4)
%
%   Revision History
%   Date      Person    Description
%   8/23/94   DB        Copied from SubPascal example.
%   8/24/94   DB        Added decimal literals.
%   8/25/94   DB        Added based literals.
%   10/6/94   DB        Changed definition of base separator to include ":" 
%                       as well as "#". (Sec. 13.10) 
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-ABSTRACT-LITERAL(input: stream-sequence) : lex-value
   computed-using
    
      ss-first(input) in *DIGIT*              %% must start w/ a digit
      & end-whole-part = ss-left-trim(input, *DIGIT-PLUS-UNDERSCORE*, false)
      & ~ss-first(end-whole-part) in *BASE-SEPARATOR*
      => lex-abstract-literal(input)
	 = lex-decimal-literal(input, end-whole-part),

      ss-first(input) in *DIGIT*              %% must start w/ a digit
      & end-based-literal = ss-left-trim(input, *DIGIT-PLUS-UNDERSCORE*, 
					 false)
      & ss-suffix?(input, end-based-literal)
      & ss-first(end-based-literal) in *BASE-SEPARATOR*
      & base-plus-underscores = ss-diff(input, end-based-literal)
      & correct-underscore-usage(base-plus-underscores)
      & base = real-to-nearest-integer(lisp::read-from-string(
             remove-underscores(base-plus-underscores)))
      & base >= 2 
      & base <= 16
      => lex-abstract-literal(input)
         = lex-based-literal(input, end-based-literal, base)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-decimal-literal
%
%   Description: Takes an unsigned integer and tests if the number is 
%   followed by either a fractional part and/or and exponent. If the 
%   unsigned integer is followed by either it is treated as a real number,
%   otherwise it is returned as an integer.
%
%   Reference: VHDL LRM (Sec. 13.4.1)
%
%   Revision History
%   Date      Person    Description
%   8/23/94   PWB       Copied from SubPascal example.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-DECIMAL-LITERAL(number-prefix: stream-sequence,
			     current-input: stream-sequence) : lex-value
   computed-using

   ss-prefix?(current-input,*DECIMAL*,false) 
   & fractional-part-begin = ss-left-trim-match(current-input, *DECIMAL*, 
						false)
   & fractional-part-end 
     = ss-left-trim(fractional-part-begin, *DIGIT-PLUS-UNDERSCORE*, false)
   & ss-suffix?(fractional-part-begin, fractional-part-end)
   =>  lex-decimal-literal(number-prefix, current-input)
       = lex-exponent(number-prefix, fractional-part-end, 're::--real--),

   lex-decimal-literal(number-prefix, current-input)
      = lex-exponent(number-prefix, current-input, 're::--integer--)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-exponent
%
%   Description: Recognizes the optional exponent and uses the lisp function
%   read-from-string to return either real or integer numbers.
%   
%   Notes: 
%    - A real literal is defined by the reference manual to be an abstract 
%    literal which does not contain a decimal point. What about negative
%    exponents? The reference manual says nothing about these 
%    (s. 13.4, p. 185). I'm assuming that these are real literals as well.
%
%   Revision History
%   Date      Person    Description
%   8/23/94   DB        Copied from SubPascal example.
%   8/23/94   DB        Changed case-sensitive? flags to false,
%                       added handling of underscore.
%   8/24/94   DB        Removed underscores, added correct conversions to
%                       integer or real.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-EXPONENT(number-prefix: stream-sequence,
		      current-input: stream-sequence,
		      token-type: symbol): lex-value
   computed-using

   %% 1st case, positive exponent, real prefix
   ss-prefix?(current-input, "E+" ,false)  
   & after-E = ss-left-trim-match(current-input, "E+", false)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & token-type = 're::--real--
   & number-found = ss-diff(number-prefix, end-exponent)
   & correct-underscore-usage(number-found)
   => lex-exponent(number-prefix, current-input, token-type)
     = (< number-prefix, 
	 end-exponent, 
	 undefined,
	 token-type,
	 lisp::read-from-string(
             remove-underscores(number-found)) >),

   %% case 1A, positive exponent, integer prefix
   ss-prefix?(current-input, "E+" ,false)  
   & after-E = ss-left-trim-match(current-input, "E+", false)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & token-type = 're::--integer--
   & number-found = ss-diff(number-prefix, end-exponent)
   & correct-underscore-usage(number-found)
   => lex-exponent(number-prefix, current-input, token-type)
     = (< number-prefix, 
	 end-exponent, 
	 undefined,
	 token-type,
	 real-to-nearest-integer(lisp::read-from-string(
             remove-underscores(number-found))) >),

   %% 2nd case, just E, real prefix
   ss-prefix?(current-input, "E", false)        
   & after-E = ss-left-trim-match(current-input, "E", false)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & token-type = 're::--real--
   & number-found = ss-diff(number-prefix, end-exponent)
   & correct-underscore-usage(number-found)
   => lex-exponent(number-prefix, current-input, token-type)
     = (< number-prefix, 
	 end-exponent, 
	 undefined,
	 token-type,
	 lisp::read-from-string(
             remove-underscores(number-found)) >),

   %% case 2A, just E, integer prefix
   ss-prefix?(current-input, "E", false)        
   & after-E = ss-left-trim-match(current-input, "E", false)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & token-type = 're::--integer--
   & number-found = ss-diff(number-prefix, end-exponent)
   & correct-underscore-usage(number-found)
   => lex-exponent(number-prefix, current-input, token-type)
     = (< number-prefix, 
	 end-exponent, 
	 undefined,
	 token-type,
	 real-to-nearest-integer(lisp::read-from-string(
             remove-underscores(number-found))) >),

   %% 3rd case, exponent and minus
   ss-prefix?(current-input, "E-", false)
   & after-E = ss-left-trim-match(current-input, "E-", false)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & number-found = ss-diff(number-prefix, end-exponent)
   & correct-underscore-usage(number-found)
   => lex-exponent(number-prefix, current-input, @@)
     = (< number-prefix, 
	 end-exponent, 
	 undefined,
	 're::--real--,
	 lisp::read-from-string(
             remove-underscores(number-found)) >),

   %% default case, return what we have
   number-found = ss-diff(number-prefix, current-input)
   & correct-underscore-usage(number-found)
   => lex-exponent(number-prefix, current-input, token-type) 
     = (< number-prefix,                                 
	 current-input,                                 
	 undefined,
	 token-type,
	 lisp::read-from-string(
	     remove-underscores(number-found)) >)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-based-literal
%
%   Description: Deals with based_literals.
%
%   Reference: VHDL LRM (Sec. 13.4.2)
%
%   Notes: One strange case encountered: 
%
%     --ERROR: No extended digit is allowed in the exponent 
%     constant a:integer:=16#54321#A;
%
%   which parses as follows:
%  
%      entity TEST is constant A: INTEGER := 344865 A;
%
%   I think this is correct according to the definitions. 
%
%   Revision History
%   Date      Person    Description
%   8/25/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-BASED-LITERAL(base-prefix: stream-sequence,
			   current-input: stream-sequence,
			   base: integer) : lex-value
   computed-using
      
      ss-first(current-input) in *BASE-SEPARATOR*
      & remove-pound = ss-left-trim(current-input,*BASE-SEPARATOR*,
					  false)
      & defined?(remove-pound)
      & valid-integers = append(subseq(*VALID-EXTENDED-DIGITS*, 1, base), 
				#\_)
      & decimal-location = ss-left-trim(remove-pound,valid-integers,false)
      =>
        lex-based-literal(base-prefix, current-input, base) =
	  lex-based-decimal-part(base-prefix, remove-pound, 
				 decimal-location, base)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-based-decimal-part
%
%   Description: Determines whether based literal is a real or an integer
%   based on whether there is a decimal present.
%
%   Notes: Needs to be adapted to deal with decimals.
%
%   Revision History
%   Date      Person    Description
%   8/25/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-BASED-DECIMAL-PART(base-prefix: stream-sequence,
				decimal-prefix: stream-sequence,
				current-input: stream-sequence,
				base: integer) : lex-value
   computed-using

      %% decimal based literal
      ss-prefix?(current-input,*DECIMAL*,false)
      & fractional-part-begin = ss-left-trim-match(current-input, *DECIMAL*, 
						false)
      & valid-integers = append(subseq(*VALID-EXTENDED-DIGITS*, 1, base), 
				#\_)
      & fractional-part-end 
        = ss-left-trim(fractional-part-begin, valid-integers, false)
      =>
        lex-based-decimal-part(base-prefix, decimal-prefix, current-input, 
			       base) 
	  = lex-based-exponent(base-prefix,
                               convert-string-to-integer(base,
	                          remove-underscores(ss-diff(
                                    decimal-prefix, current-input))) +
                               convert-string-to-fraction(base,
			          remove-underscores(ss-diff(
                                    fractional-part-begin, 
				    fractional-part-end))),
			       fractional-part-end, base, 're::--real--),

     %% default case
     lex-based-decimal-part(base-prefix, decimal-prefix, current-input, base)
        = lex-based-exponent(base-prefix, 
			     convert-string-to-integer(base,
	                        remove-underscores(ss-diff(
                                  decimal-prefix, current-input))), 
			     current-input, base, 're::--integer--)
 

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-based-exponent
%
%   Description: Chops the exponent part of a based literal.
%
%   Revision History
%   Date      Person    Description
%   8/25/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-BASED-EXPONENT(base-prefix: stream-sequence,
			    decimal-value: real,
			    current-input: stream-sequence,
			    base: integer,
			    token-type) : lex-value
   computed-using

   %% 1st case just E, integer token
   ss-prefix?(current-input, "#", false) 
   & begin-exponent = ss-left-trim-match(current-input, "#", false)
   & defined?(begin-exponent)
   & ss-prefix?(begin-exponent, "E", false)        
   & after-E = ss-left-trim-match(begin-exponent, "E", false)
   & defined?(after-E)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & token-type = 're::--integer--
   => lex-based-exponent(base-prefix, decimal-value, 
			 current-input, base, token-type)
     = (< base-prefix, 
	 end-exponent, 
	 undefined,
	 token-type,
	 lisp::expt(real-to-nearest-integer(decimal-value),
	     lisp::read-from-string(
                remove-underscores(ss-diff(after-E, end-exponent))))
	 >),

   %% case 1A, just E, real token
   ss-prefix?(current-input, "#", false) 
   & begin-exponent = ss-left-trim-match(current-input, "#", false)
   & defined?(begin-exponent)
   & ss-prefix?(begin-exponent, "E", false)        
   & after-E = ss-left-trim-match(begin-exponent, "E", false)
   & defined?(after-E)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & token-type = 're::--real--
   => lex-based-exponent(base-prefix, decimal-value, 
			 current-input, base, token-type)
     = (< base-prefix, 
	 end-exponent, 
	 undefined,
	 token-type,
	 decimal-value * lisp::expt(base, lisp::read-from-string(
                remove-underscores(ss-diff(after-E, end-exponent))))
	 >), 

   %% 2nd case, positive exponent, integer token
   ss-prefix?(current-input, "#", false) 
   & begin-exponent = ss-left-trim-match(current-input, "#", false)
   & defined?(begin-exponent)
   & ss-prefix?(begin-exponent, "E+", false)        
   & after-E = ss-left-trim-match(begin-exponent, "E+", false)
   & defined?(after-E)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & token-type = 're::--integer--
   => lex-based-exponent(base-prefix, decimal-value, 
			 current-input, base, token-type)
     = (< base-prefix, 
	 end-exponent, 
	 undefined,
	 token-type,
	 lisp::expt(real-to-nearest-integer(decimal-value),
	     lisp::read-from-string(
                remove-underscores(ss-diff(after-E, end-exponent))))
	 >),

   %% case 2A, positive exponent, real token
   ss-prefix?(current-input, "#", false) 
   & begin-exponent = ss-left-trim-match(current-input, "#", false)
   & defined?(begin-exponent)
   & ss-prefix?(begin-exponent, "E+", false)        
   & after-E = ss-left-trim-match(begin-exponent, "E+", false)
   & defined?(after-E)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & token-type = 're::--real--
   => lex-based-exponent(base-prefix, decimal-value, 
			 current-input, base, token-type)
     = (< base-prefix, 
	 end-exponent, 
	 undefined,
	 token-type,
	 decimal-value * lisp::expt(base, lisp::read-from-string(
                remove-underscores(ss-diff(after-E, end-exponent))))
	 >), 

   %% 3rd case, negative exponent, real token
   ss-prefix?(current-input, "#", false) 
   & begin-exponent = ss-left-trim-match(current-input, "#", false)
   & defined?(begin-exponent)
   & ss-prefix?(begin-exponent, "E-", false)        
   & after-E = ss-left-trim-match(begin-exponent, "E-", false)
   & defined?(after-E)
   & ss-first(after-E) in *DIGIT*
   & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false)
   & token-type = 're::--real--
   => lex-based-exponent(base-prefix, decimal-value, 
			 current-input, base, token-type)
     = (< base-prefix, 
	 end-exponent, 
	 undefined,
	 token-type,
	 decimal-value * lisp::expt(base, -1.0 * lisp::read-from-string(
                remove-underscores(ss-diff(after-E, end-exponent))))
	 >), 


   ss-prefix?(current-input, "#", false) 
   & end-based-literal = ss-left-trim-match(current-input, "#", false) 
   & defined?(end-based-literal) 
   =>
     lex-based-exponent(base-prefix, decimal-value, 
			 current-input, base, token-type)
        = (< base-prefix,                                 
	   end-based-literal,                                 
	   undefined,
	   token-type,
	   real-to-nearest-integer(decimal-value) >)
 
  
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-character-literal
%
%   Description:  This function checks for a character literal at the
%   beginning of the input stream passed into the routine.
%
%   Revision History
%   Date      Person    Description
%   8/24/94   PWB       Original Creation
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-CHARACTER-LITERAL(input : stream-sequence) : lex-value
   computed-using

   %% First and third characters are single quotes
   ss-first(input) = *SINGLE-QUOTE* &
   ss-first(ss-rest(ss-rest(input))) = *SINGLE-QUOTE* &
   %% end-input is from fourth character on
   end-input = ss-rest(ss-rest(ss-rest(input))) &
   second-char = ss-first(ss-rest(input))
   =>
   lex-character-literal(input) =
      < input,
        end-input,
        undefined,
        're::--char--,
        second-char >


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-string-literal
%
%   Description: Recognizes anything between two quotes as a string literal.
%
%   Revision History
%   Date      Person    Description
%   8/23/94   PWB       Copied from SubPascal example.
%   9/28/94   PWB       Modified to allow embedded quotes (i.e. "")
%   10/6/94   DB        Modified to allow replacement of " by %. (Sec. 13.10)
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-STRING-LITERAL(input: stream-sequence) : lex-value

  computed-using

   ss-prefix?(input, *QUOTE*, false) &
   start-text = ss-left-trim-match(input, *QUOTE*, false) &
   end-text = find-token-end(start-text, *QUOTE*) &
   defined? (end-text) &
   ss-prefix?(end-text, *QUOTE*, false) &
   end-input = ss-left-trim-match(end-text, *QUOTE*, false)

   =>
     lex-string-literal(input) = 
                          <input,
                           end-input,
                           undefined, 
                          're::--string--,
                          ss-diff(start-text, end-text) >,

   ss-prefix?(input, *PERCENT*, false) &
   start-text = ss-left-trim-match(input, *PERCENT*, false) &
   end-text = find-token-end(start-text, *PERCENT*) &
   defined? (end-text) &
   ss-prefix?(end-text, *PERCENT*, false) &
   end-input = ss-left-trim-match(end-text, *PERCENT*, false)

   =>
     lex-string-literal(input) = 
                          <input,
                           end-input,
                           undefined, 
                          're::--string--,
                          ss-diff(start-text, end-text) >


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-bit-string-literal
%
%   Description: Recognizes strings with certain bases as base string 
%   literals.
%
%   Revision History
%   Date      Person    Description
%   8/23/94   DB        Original Creation
%   8/24/94   DB        Added assertions about the digits associated with
%                       a given base (B | O | X).
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-BIT-STRING-LITERAL(input: stream-sequence) : lex-value

  computed-using

   ss-prefix?(input,"B", false)
   & quoted-text = ss-left-trim(input,"B", false)
   & ss-prefix?(quoted-text, *QUOTE*, false) 
   & start-quoted-text = ss-left-trim-match(quoted-text, *QUOTE*, false) 
   & end-text = ss-left-trim(start-quoted-text, 
			     *BINARY-DIGITS-PLUS-UNDERSCORE*, false) 
   & ss-prefix?(end-text, *QUOTE*, false) 
   & end-input = ss-left-trim-match(end-text, *QUOTE*, false)
   =>
     lex-bit-string-literal(input) = 
                          <input,
                           end-input,
                           undefined, 
                          'vom-1-0::*bit-string-literal*,
			  ss-diff(input, end-text) >,

   ss-prefix?(input,"O", false)
   & quoted-text = ss-left-trim(input,"O", false)
   & ss-prefix?(quoted-text, *QUOTE*, false) 
   & start-quoted-text = ss-left-trim-match(quoted-text, *QUOTE*, false) 
   & end-text = ss-left-trim(start-quoted-text, 
			     *OCTAL-DIGITS-PLUS-UNDERSCORE*, false) 
   & ss-prefix?(end-text, *QUOTE*, false) 
   & end-input = ss-left-trim-match(end-text, *QUOTE*, false)
   =>
     lex-bit-string-literal(input) = 
                          <input,
                           end-input,
                           undefined, 
                          'vom-1-0::*bit-string-literal*,
			  ss-diff(input, end-text) >,

   ss-prefix?(input,"X", false)
   & quoted-text = ss-left-trim(input,"X", false)
   & ss-prefix?(quoted-text, *QUOTE*, false) 
   & start-quoted-text = ss-left-trim-match(quoted-text, *QUOTE*, false) 
   & end-text = ss-left-trim(start-quoted-text, 
			     *HEX-DIGITS-PLUS-UNDERSCORE*, false) 
   & ss-prefix?(end-text, *QUOTE*, false) 
   & end-input = ss-left-trim-match(end-text, *QUOTE*, false)
   =>
     lex-bit-string-literal(input) = 
                          <input,
                           end-input,
                           undefined, 
                          'vom-1-0::*bit-string-literal*,
			  ss-diff(input, end-text) >    


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-whitespace
%
%   Description:  Remove leading whitespace from the input.  Also,
%   increment the *LINE-NUMBER* variable for each linefeed found.
%
%   Revision History
%   Date      Person    Description
%   8/24/94   PWB       Copied & modified from SubPascal example.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-WHITESPACE(input : stream-sequence) : stream-sequence =
let (
   blanks-removed : stream-sequence = ss-left-trim(input, *SPACE*, false),
   linefeed-removed : stream-sequence = input,
   comment-removed : stream-sequence = input )

   %% blanks-removed contains input stream with leading spaces removed.
   %% Next, remove one linefeed, incrementing *LINE-NUMBER*.
   ( if ( ss-first(blanks-removed) =  *NEWLINE* ) then
      linefeed-removed <- lex-whitespace ( ss-rest(blanks-removed) );
      *LINE-NUMBER* <- *LINE-NUMBER* + 1
   else
      linefeed-removed <- blanks-removed
   );

   %% Now, remove one comment from linefeed-removed.
   ( if ( ss-prefix? ( linefeed-removed, *COMMENT-START*, false ) ) then
      let ( after-start-comment : stream-sequence = 
         ss-left-trim-match ( linefeed-removed, *COMMENT-START*, false ) )
      comment-removed <- lex-whitespace ( ss-left-trim-until-match 
         ( after-start-comment, *NEWLINE-STRING*, false ) )
   else
      comment-removed <- linefeed-removed
   );

   %% Now that all leading blanks, linefeeds and comments have been
   %% removed, return comment-removed.
   comment-removed


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  character-canonicalizer
%
%   Description: VHDL reserved words are case-insensitive. Hence, all  
%   keywords will be downcased to match their definition.
%
%   Reference:  VHDL LRM (Sec. 13.9)                                   
%
%   Revision History
%   Date      Person    Description
%   8/23/94   DB        Copied from SubPascal example.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function CHARACTER-CANONICALIZER (c: char, cf: any-type): char =
   lisp::char-downcase(c)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  accept-keyword?
%
%   Description: Determines whether to accept of reject a given keyword.
%   In the case where the keyword is part of a larger identifier the 
%   keyword will be rejected.
%
%   Revision History
%   Date      Person    Description
%   8/23/94   PWB       Copied from SubPascal example.
%   9/1/94    DB        Changed *LETTER-OR-DIGIT* to 
%                       *LETTER-OR-DIGIT-OR-UNDERSCORE*                         
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function ACCEPT-KEYWORD? (ss: stream-sequence,
		          st: string,
		          cf: any-type): boolean =
   ss-empty?(ss)
      or-else (ss-first(ss) ~in *LETTER-OR-DIGIT-OR-UNDERSCORE* &
	       lisp::char-downcase(ss-first(ss)) ~in 
                  *LETTER-OR-DIGIT-OR-UNDERSCORE*)
      or-else ex(ch) (st = [.., ch] & ch ~in *LETTER*)


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  print-syntax-error
%
%   Description: Simple syntax error handling routine.
%
%   Revision History
%   Date      Person    Description
%   10/1/94   DB        Original creation.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

function print-syntax-error(input: stream-sequence,
		   error-message: string): lex-value =
   format(true, "~A : line number ~A~%", error-message, *LINE-NUMBER*);
   let (end-input = ss-left-trim-until(input, *END-OF-LINE*, false))
   < input,
     end-input,
     undefined,
     ss-diff(input, end-input),
     're::--string-- >
     

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-token
%
%   Description: Processes the input after the comments have been 
%   removed.
%
%   Revision History
%   Date      Person    Description
%   8/23/94   DB        Copied from SubPascal example.
%   9/30/94   DB        Added call to print-syntax-error. This doesn't ask
%                       user for information on what to do. If you don't
%                       like this uncomment the call to report-syntax-error 
%                       and comment out our call.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-TOKEN(input: stream-sequence,
		   pcb: parse-control-block): lex-value
   computed-using

   %% Are we at a grammar prefix?   i.e.  ##r
   lex-val = lex-grammar-prefix(input, undefined, pcb)
   & defined?(lex-val)
   => lex-token(input, pcb) = lex-val,

   %% Are we at the end of form?  i.e. end of file or end of pattern?
   lex-val = lex-end-of-form(input, undefined, pcb)
   & defined?(lex-val)
   & pseudoterminal-legal?('re::--end--, pcb, true)
   => lex-token(input, pcb) = lex-val,

   %% Are we at a character literal?
   lex-val = lex-character-literal(input)
   & defined?(lex-val)
   =>
   lex-token(input,pcb) = lex-val,

   %% Are we at a keyword in the grammar?
   lex-val = lex-keyword(input,
			 undefined,
			 pcb,
			 'character-canonicalizer,
			 'accept-keyword?)
   & defined?(lex-val)
   => lex-token(input, pcb) = lex-val,

   %% Get number if possible
   lex-val = lex-abstract-literal(input)
   & defined?(lex-val)
   => lex-token(input, pcb) = lex-val,

   %% Get string literal
   lex-val = lex-string-literal(input)
   & defined?(lex-val)
   => lex-token(input, pcb) = lex-val,

   %% Get bit-string-literal
   lex-val = lex-bit-string-literal(input)
   & defined?(lex-val)
   => lex-token(input, pcb) = lex-val,

   %% Get an identifier if possible
   lex-val = lex-identifier(input)
   & defined?(lex-val)
   => lex-token(input, pcb) = lex-val,

   %% No other cases, generate an error message
%%   lex-token(input, pcb)
%%      = print-syntax-error(input, "syntax error")

   %% Call to default refine syntax error function
     lex-token(input, pcb)
        = report-syntax-error(input,
          "unrecognizable text encountered during lexical analysis")

   
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%
%   Function Name:  lex-vhdl
%
%   Description: Top-level lexing function. Called from VHDL-93 parser.
%
%   Revision History
%   Date      Person    Description
%   8/23/94   PWB       Copied from SubPascal example.
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function LEX-VHDL(input: stream-sequence,
		  lexical-context-from-last-call: any-type,
		  pcb: parse-control-block): lex-value =

   %% First, strip off white space and comments, then get token.
   lex-token(lex-whitespace(input),pcb)


<div align="center"><br /><script type="text/javascript"><!--
google_ad_client = "pub-7293844627074885";
//468x60, Created at 07. 11. 25
google_ad_slot = "8619794253";
google_ad_width = 468;
google_ad_height = 60;
//--></script>
<script type="text/javascript" src="http://pagead2.googlesyndication.com/pagead/show_ads.js">
</script><br />&nbsp;</div>