#|| VHDL Object Model 1.0 --------------------- Copyright (c) 1994 Ohio Board of Regents and the University of Cincinnati. All rights reserved. Authors: David Benz, Phillip Baraona E-Mail: dbenz@thor.ece.uc.edu, pbaraona@thor.ece.uc.edu ||# #|| File: lexer.re Contains: Functions and constants for building the VHDL object model lexer. VHDL lexer function hierarchy: lex-vhdl lex-token(lex-whitespace(input)) lex-grammar-prefix (REFINE provided) lex-end-of-form (REFINE provided) lex-character-literal lex-keyword (REFINE provided) lex-abstract-literal lex-decimal-literal lex-exponent lex-based-literal lex-based-decimal-part lex-based-exponent lex-string-literal lex-bit-string-literal lex-identifier Need check in lexed-based-decimal to determine if base-separators are same. ||# !! in-package("VOM-1-0") !! in-grammar('user) %%%%%%%%%%%%%%%%%%%%%%%%%%%%% Constants %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% constant *DIGIT*: string = "0123456789" constant *DIGIT-PLUS-UNDERSCORE* : string = "01234567890_" constant *LETTER*: string = "abcdefghijklmnopqrstuvwxyz" constant *LETTER-OR-DIGIT*: string = "abcdefghijklmnopqrstuvwxyz0123456789" constant *LETTER-OR-DIGIT-OR-UNDERSCORE*: string = "abcdefghijklmnopqrstuvwxyz0123456789_" constant *WHITESPACE*: string = " " % i.e. blank space, tab, and newline %% Space constant *SPACE* : string = " " %% Newline - separate from space so line numbers can be counted. constant *NEWLINE-STRING* : string = format(false, "~%") constant *NEWLINE* : char = *NEWLINE-STRING*(1) constant *SINGLE-QUOTE* : char = #\' constant *COMMENT-START*: string = "--" constant *COMMENT-FINISH*: string = " " constant *QUOTE*: string = "\"" constant *PERCENT*: string = "%" constant *BACKSLASH*: string = "\\" constant *DECIMAL*: string = "." constant *BASE-SEPARATOR*: string = "#:" constant *BINARY-DIGITS-PLUS-UNDERSCORE*: string = "01_" constant *OCTAL-DIGITS-PLUS-UNDERSCORE*: string = "01234567_" constant *HEX-DIGITS-PLUS-UNDERSCORE*: string = "0123456789ABCDEFabcdef_" constant *VALID-EXTENDED-DIGITS*: string = "0123456789abcdef_" constant *UNDERSCORE*: char = #\_ constant *END-OF-LINE*: string = " " %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: remove-underscores % % Description: In a VHDL abstract literal, underscores are used solely % for readability. This function eliminates them so that REFINE won't % do anything with them. % % Revision History % Date Person Description % 8/24/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function REMOVE-UNDERSCORES(input: string) : string = [ x | (x) x in input & x ~= #\_ ] %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: downcase-character % % Description: Used to associate a letter with it's given position in % a base string. % % Revision History % Date Person Description % 8/25/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function DOWNCASE-CHARACTER (c: char): char = lisp::char-downcase(c) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: find-position % % Description: Finds the position of a given character in a given string. % % Revision History % Date Person Description % 8/25/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function FIND-POSITION(given-char: char, given-string: string): integer = let (lowered-given-char = downcase-character(given-char), i: integer = 1) (while given-string(i) ~= lowered-given-char do i <- i+1 ); i - 1 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: convert-string-to-integer % % Description: Converts a based-literal string to an integer. This % assumes that the integers/characters in the string are valid. % % Revision History % Date Person Description % 8/25/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function CONVERT-STRING-TO-INTEGER(base: integer, given-string: string) : real computed-using base >= 2 & size(given-string) = 0 => convert-string-to-integer(base, given-string) = 0.0, base >= 2 & current-bit = find-position(first(given-string), *VALID-EXTENDED-DIGITS*) & current-size = size(given-string) - 1 & reduced-string = rest(given-string) => convert-string-to-integer(base, given-string) = current-bit * lisp::expt(base, current-size) + convert-string-to-integer(base, reduced-string) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: convert-string-to-fraction % % Description: Converts a based-literal string to a fraction. This % assumes that the integers/characters in the string are valid. % % Revision History % Date Person Description % 8/25/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function CONVERT-STRING-TO-FRACTION(base: integer, given-string: string) : real computed-using base >= 2 & size(given-string) = 0 => convert-string-to-fraction(base, given-string) = 0.0, base >= 2 & current-bit = integer-to-real(find-position(last(given-string), *VALID-EXTENDED-DIGITS*)) & reduced-string = subseq(given-string, 1, size(given-string) - 1) => convert-string-to-fraction(base, given-string) = (current-bit / integer-to-real(lisp::expt(base, size(given-string)))) + convert-string-to-fraction(base, reduced-string) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: find-token-end % % Description: Searches through input to find the first occurrence of % seperator that is all alone. For instance, "" can be embedded % inside of strings. The end of the string does not occur until the % first " that is not followed by another ". This routine returns the % string sequence that begins with the first such ". Used in extended % identifiers and strings. % % Revision History % Date Person Description % 9/28/94 PWB Original Creation % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function FIND-TOKEN-END(input: stream-sequence, seperator: string): stream-sequence computed-using %% Check case where there is no seperator in rest of input sequence. first-seperator = ss-left-trim-until(input, seperator, false) & undefined?(first-seperator) => find-token-end(input, seperator) = first-seperator, %% Check case when first seperator is end of token. first-seperator = ss-left-trim-until(input, seperator, false) & ( undefined?(ss-rest(first-seperator)) or ~ ss-prefix?(ss-rest(first-seperator), seperator, false)) => find-token-end(input, seperator) = first-seperator, %% Check case where first seperator is first of two in a row. Do %% this by stripping off the seperators and making a recursive call. first-seperator = ss-left-trim-until(input, seperator, false) & ss-prefix?(ss-rest(first-seperator), seperator, false) & id-end = find-token-end(ss-rest(ss-rest(first-seperator)), seperator) => find-token-end(input, seperator) = id-end %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: correct-underscore-usage % % Description: Checks for the occurrence of consecutive underscores and % also that the last character may not be an underscore. % % Revision History % Date Person Description % 10/6/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function CORRECT-UNDERSCORE-USAGE(given-string: string): boolean computed-using size(given-string) = 1 & first(given-string) ~= #\_ => correct-underscore-usage(given-string) = true, %% last character is an underscore size(given-string) = 1 & first(given-string) = #\_ => correct-underscore-usage(given-string) = false, %% two consecutive underscores found size(given-string) > 1 & first(given-string) = #\_ & first(rest(given-string)) = #\_ => correct-underscore-usage(given-string) = false, %% default case, keep going correct-underscore-usage(given-string) = correct-underscore-usage(rest(given-string)) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-identifier % % Description: Recognizes valid VHDL identifiers. Basic VHDL identifiers % are case insensitive. Extended VHDL identifiers are different from % both basic identifiers and keywords. % % Reference: VHDL LRM (Sec. 13.3) % % Revision History % Date Person Description % 8/23/94 DB Copied from SubPascal example. % 8/24/94 DB Added extended identifier assertions. % 9/1/94 PWB Added check to ensure ID doesn't end in underscore % 9/28/94 PWB Added code to allow backslash in extended ID. % 10/6/94 DB Added call to correct-underscore-usage. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-IDENTIFIER(input: stream-sequence): lex-value computed-using input-1 = ss-left-trim(input, *LETTER*, false) & ss-suffix?(input, input-1) & end-id = ss-left-trim(input-1, *LETTER-OR-DIGIT-OR-UNDERSCORE*, false) & id-found = ss-diff(input, end-id) & correct-underscore-usage(id-found) % & last(id-found) ~= *UNDERSCORE* => lex-identifier(input) = < input, end-id, undefined, 're::--symbol--, lisp::read-from-string(id-found) >, %% extended identifiers ss-prefix?(input, *BACKSLASH*, false) & start-text = ss-left-trim-match(input, *BACKSLASH*, false) & end-text = find-token-end(start-text, *BACKSLASH*) & defined? (end-text) & ss-prefix?(end-text, *BACKSLASH*, false) & end-input = ss-left-trim-match(end-text, *BACKSLASH*, false) => lex-identifier(input) = %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-abstract-literal % % Description: Handles parsing of anything termed an abstract_literal in % VHDL. Converts these into either real or integer values depending on % syntax. Abstract literals, by definition, are unsigned. % % Reference: VHDL LRM (Sec. 13.4) % % Revision History % Date Person Description % 8/23/94 DB Copied from SubPascal example. % 8/24/94 DB Added decimal literals. % 8/25/94 DB Added based literals. % 10/6/94 DB Changed definition of base separator to include ":" % as well as "#". (Sec. 13.10) % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-ABSTRACT-LITERAL(input: stream-sequence) : lex-value computed-using ss-first(input) in *DIGIT* %% must start w/ a digit & end-whole-part = ss-left-trim(input, *DIGIT-PLUS-UNDERSCORE*, false) & ~ss-first(end-whole-part) in *BASE-SEPARATOR* => lex-abstract-literal(input) = lex-decimal-literal(input, end-whole-part), ss-first(input) in *DIGIT* %% must start w/ a digit & end-based-literal = ss-left-trim(input, *DIGIT-PLUS-UNDERSCORE*, false) & ss-suffix?(input, end-based-literal) & ss-first(end-based-literal) in *BASE-SEPARATOR* & base-plus-underscores = ss-diff(input, end-based-literal) & correct-underscore-usage(base-plus-underscores) & base = real-to-nearest-integer(lisp::read-from-string( remove-underscores(base-plus-underscores))) & base >= 2 & base <= 16 => lex-abstract-literal(input) = lex-based-literal(input, end-based-literal, base) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-decimal-literal % % Description: Takes an unsigned integer and tests if the number is % followed by either a fractional part and/or and exponent. If the % unsigned integer is followed by either it is treated as a real number, % otherwise it is returned as an integer. % % Reference: VHDL LRM (Sec. 13.4.1) % % Revision History % Date Person Description % 8/23/94 PWB Copied from SubPascal example. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-DECIMAL-LITERAL(number-prefix: stream-sequence, current-input: stream-sequence) : lex-value computed-using ss-prefix?(current-input,*DECIMAL*,false) & fractional-part-begin = ss-left-trim-match(current-input, *DECIMAL*, false) & fractional-part-end = ss-left-trim(fractional-part-begin, *DIGIT-PLUS-UNDERSCORE*, false) & ss-suffix?(fractional-part-begin, fractional-part-end) => lex-decimal-literal(number-prefix, current-input) = lex-exponent(number-prefix, fractional-part-end, 're::--real--), lex-decimal-literal(number-prefix, current-input) = lex-exponent(number-prefix, current-input, 're::--integer--) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-exponent % % Description: Recognizes the optional exponent and uses the lisp function % read-from-string to return either real or integer numbers. % % Notes: % - A real literal is defined by the reference manual to be an abstract % literal which does not contain a decimal point. What about negative % exponents? The reference manual says nothing about these % (s. 13.4, p. 185). I'm assuming that these are real literals as well. % % Revision History % Date Person Description % 8/23/94 DB Copied from SubPascal example. % 8/23/94 DB Changed case-sensitive? flags to false, % added handling of underscore. % 8/24/94 DB Removed underscores, added correct conversions to % integer or real. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-EXPONENT(number-prefix: stream-sequence, current-input: stream-sequence, token-type: symbol): lex-value computed-using %% 1st case, positive exponent, real prefix ss-prefix?(current-input, "E+" ,false) & after-E = ss-left-trim-match(current-input, "E+", false) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & token-type = 're::--real-- & number-found = ss-diff(number-prefix, end-exponent) & correct-underscore-usage(number-found) => lex-exponent(number-prefix, current-input, token-type) = (< number-prefix, end-exponent, undefined, token-type, lisp::read-from-string( remove-underscores(number-found)) >), %% case 1A, positive exponent, integer prefix ss-prefix?(current-input, "E+" ,false) & after-E = ss-left-trim-match(current-input, "E+", false) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & token-type = 're::--integer-- & number-found = ss-diff(number-prefix, end-exponent) & correct-underscore-usage(number-found) => lex-exponent(number-prefix, current-input, token-type) = (< number-prefix, end-exponent, undefined, token-type, real-to-nearest-integer(lisp::read-from-string( remove-underscores(number-found))) >), %% 2nd case, just E, real prefix ss-prefix?(current-input, "E", false) & after-E = ss-left-trim-match(current-input, "E", false) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & token-type = 're::--real-- & number-found = ss-diff(number-prefix, end-exponent) & correct-underscore-usage(number-found) => lex-exponent(number-prefix, current-input, token-type) = (< number-prefix, end-exponent, undefined, token-type, lisp::read-from-string( remove-underscores(number-found)) >), %% case 2A, just E, integer prefix ss-prefix?(current-input, "E", false) & after-E = ss-left-trim-match(current-input, "E", false) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & token-type = 're::--integer-- & number-found = ss-diff(number-prefix, end-exponent) & correct-underscore-usage(number-found) => lex-exponent(number-prefix, current-input, token-type) = (< number-prefix, end-exponent, undefined, token-type, real-to-nearest-integer(lisp::read-from-string( remove-underscores(number-found))) >), %% 3rd case, exponent and minus ss-prefix?(current-input, "E-", false) & after-E = ss-left-trim-match(current-input, "E-", false) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & number-found = ss-diff(number-prefix, end-exponent) & correct-underscore-usage(number-found) => lex-exponent(number-prefix, current-input, @@) = (< number-prefix, end-exponent, undefined, 're::--real--, lisp::read-from-string( remove-underscores(number-found)) >), %% default case, return what we have number-found = ss-diff(number-prefix, current-input) & correct-underscore-usage(number-found) => lex-exponent(number-prefix, current-input, token-type) = (< number-prefix, current-input, undefined, token-type, lisp::read-from-string( remove-underscores(number-found)) >) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-based-literal % % Description: Deals with based_literals. % % Reference: VHDL LRM (Sec. 13.4.2) % % Notes: One strange case encountered: % % --ERROR: No extended digit is allowed in the exponent % constant a:integer:=16#54321#A; % % which parses as follows: % % entity TEST is constant A: INTEGER := 344865 A; % % I think this is correct according to the definitions. % % Revision History % Date Person Description % 8/25/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-BASED-LITERAL(base-prefix: stream-sequence, current-input: stream-sequence, base: integer) : lex-value computed-using ss-first(current-input) in *BASE-SEPARATOR* & remove-pound = ss-left-trim(current-input,*BASE-SEPARATOR*, false) & defined?(remove-pound) & valid-integers = append(subseq(*VALID-EXTENDED-DIGITS*, 1, base), #\_) & decimal-location = ss-left-trim(remove-pound,valid-integers,false) => lex-based-literal(base-prefix, current-input, base) = lex-based-decimal-part(base-prefix, remove-pound, decimal-location, base) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-based-decimal-part % % Description: Determines whether based literal is a real or an integer % based on whether there is a decimal present. % % Notes: Needs to be adapted to deal with decimals. % % Revision History % Date Person Description % 8/25/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-BASED-DECIMAL-PART(base-prefix: stream-sequence, decimal-prefix: stream-sequence, current-input: stream-sequence, base: integer) : lex-value computed-using %% decimal based literal ss-prefix?(current-input,*DECIMAL*,false) & fractional-part-begin = ss-left-trim-match(current-input, *DECIMAL*, false) & valid-integers = append(subseq(*VALID-EXTENDED-DIGITS*, 1, base), #\_) & fractional-part-end = ss-left-trim(fractional-part-begin, valid-integers, false) => lex-based-decimal-part(base-prefix, decimal-prefix, current-input, base) = lex-based-exponent(base-prefix, convert-string-to-integer(base, remove-underscores(ss-diff( decimal-prefix, current-input))) + convert-string-to-fraction(base, remove-underscores(ss-diff( fractional-part-begin, fractional-part-end))), fractional-part-end, base, 're::--real--), %% default case lex-based-decimal-part(base-prefix, decimal-prefix, current-input, base) = lex-based-exponent(base-prefix, convert-string-to-integer(base, remove-underscores(ss-diff( decimal-prefix, current-input))), current-input, base, 're::--integer--) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-based-exponent % % Description: Chops the exponent part of a based literal. % % Revision History % Date Person Description % 8/25/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-BASED-EXPONENT(base-prefix: stream-sequence, decimal-value: real, current-input: stream-sequence, base: integer, token-type) : lex-value computed-using %% 1st case just E, integer token ss-prefix?(current-input, "#", false) & begin-exponent = ss-left-trim-match(current-input, "#", false) & defined?(begin-exponent) & ss-prefix?(begin-exponent, "E", false) & after-E = ss-left-trim-match(begin-exponent, "E", false) & defined?(after-E) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & token-type = 're::--integer-- => lex-based-exponent(base-prefix, decimal-value, current-input, base, token-type) = (< base-prefix, end-exponent, undefined, token-type, lisp::expt(real-to-nearest-integer(decimal-value), lisp::read-from-string( remove-underscores(ss-diff(after-E, end-exponent)))) >), %% case 1A, just E, real token ss-prefix?(current-input, "#", false) & begin-exponent = ss-left-trim-match(current-input, "#", false) & defined?(begin-exponent) & ss-prefix?(begin-exponent, "E", false) & after-E = ss-left-trim-match(begin-exponent, "E", false) & defined?(after-E) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & token-type = 're::--real-- => lex-based-exponent(base-prefix, decimal-value, current-input, base, token-type) = (< base-prefix, end-exponent, undefined, token-type, decimal-value * lisp::expt(base, lisp::read-from-string( remove-underscores(ss-diff(after-E, end-exponent)))) >), %% 2nd case, positive exponent, integer token ss-prefix?(current-input, "#", false) & begin-exponent = ss-left-trim-match(current-input, "#", false) & defined?(begin-exponent) & ss-prefix?(begin-exponent, "E+", false) & after-E = ss-left-trim-match(begin-exponent, "E+", false) & defined?(after-E) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & token-type = 're::--integer-- => lex-based-exponent(base-prefix, decimal-value, current-input, base, token-type) = (< base-prefix, end-exponent, undefined, token-type, lisp::expt(real-to-nearest-integer(decimal-value), lisp::read-from-string( remove-underscores(ss-diff(after-E, end-exponent)))) >), %% case 2A, positive exponent, real token ss-prefix?(current-input, "#", false) & begin-exponent = ss-left-trim-match(current-input, "#", false) & defined?(begin-exponent) & ss-prefix?(begin-exponent, "E+", false) & after-E = ss-left-trim-match(begin-exponent, "E+", false) & defined?(after-E) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & token-type = 're::--real-- => lex-based-exponent(base-prefix, decimal-value, current-input, base, token-type) = (< base-prefix, end-exponent, undefined, token-type, decimal-value * lisp::expt(base, lisp::read-from-string( remove-underscores(ss-diff(after-E, end-exponent)))) >), %% 3rd case, negative exponent, real token ss-prefix?(current-input, "#", false) & begin-exponent = ss-left-trim-match(current-input, "#", false) & defined?(begin-exponent) & ss-prefix?(begin-exponent, "E-", false) & after-E = ss-left-trim-match(begin-exponent, "E-", false) & defined?(after-E) & ss-first(after-E) in *DIGIT* & end-exponent = ss-left-trim(after-E, *DIGIT-PLUS-UNDERSCORE*, false) & token-type = 're::--real-- => lex-based-exponent(base-prefix, decimal-value, current-input, base, token-type) = (< base-prefix, end-exponent, undefined, token-type, decimal-value * lisp::expt(base, -1.0 * lisp::read-from-string( remove-underscores(ss-diff(after-E, end-exponent)))) >), ss-prefix?(current-input, "#", false) & end-based-literal = ss-left-trim-match(current-input, "#", false) & defined?(end-based-literal) => lex-based-exponent(base-prefix, decimal-value, current-input, base, token-type) = (< base-prefix, end-based-literal, undefined, token-type, real-to-nearest-integer(decimal-value) >) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-character-literal % % Description: This function checks for a character literal at the % beginning of the input stream passed into the routine. % % Revision History % Date Person Description % 8/24/94 PWB Original Creation % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-CHARACTER-LITERAL(input : stream-sequence) : lex-value computed-using %% First and third characters are single quotes ss-first(input) = *SINGLE-QUOTE* & ss-first(ss-rest(ss-rest(input))) = *SINGLE-QUOTE* & %% end-input is from fourth character on end-input = ss-rest(ss-rest(ss-rest(input))) & second-char = ss-first(ss-rest(input)) => lex-character-literal(input) = < input, end-input, undefined, 're::--char--, second-char > %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-string-literal % % Description: Recognizes anything between two quotes as a string literal. % % Revision History % Date Person Description % 8/23/94 PWB Copied from SubPascal example. % 9/28/94 PWB Modified to allow embedded quotes (i.e. "") % 10/6/94 DB Modified to allow replacement of " by %. (Sec. 13.10) % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-STRING-LITERAL(input: stream-sequence) : lex-value computed-using ss-prefix?(input, *QUOTE*, false) & start-text = ss-left-trim-match(input, *QUOTE*, false) & end-text = find-token-end(start-text, *QUOTE*) & defined? (end-text) & ss-prefix?(end-text, *QUOTE*, false) & end-input = ss-left-trim-match(end-text, *QUOTE*, false) => lex-string-literal(input) = , ss-prefix?(input, *PERCENT*, false) & start-text = ss-left-trim-match(input, *PERCENT*, false) & end-text = find-token-end(start-text, *PERCENT*) & defined? (end-text) & ss-prefix?(end-text, *PERCENT*, false) & end-input = ss-left-trim-match(end-text, *PERCENT*, false) => lex-string-literal(input) = %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-bit-string-literal % % Description: Recognizes strings with certain bases as base string % literals. % % Revision History % Date Person Description % 8/23/94 DB Original Creation % 8/24/94 DB Added assertions about the digits associated with % a given base (B | O | X). % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-BIT-STRING-LITERAL(input: stream-sequence) : lex-value computed-using ss-prefix?(input,"B", false) & quoted-text = ss-left-trim(input,"B", false) & ss-prefix?(quoted-text, *QUOTE*, false) & start-quoted-text = ss-left-trim-match(quoted-text, *QUOTE*, false) & end-text = ss-left-trim(start-quoted-text, *BINARY-DIGITS-PLUS-UNDERSCORE*, false) & ss-prefix?(end-text, *QUOTE*, false) & end-input = ss-left-trim-match(end-text, *QUOTE*, false) => lex-bit-string-literal(input) = , ss-prefix?(input,"O", false) & quoted-text = ss-left-trim(input,"O", false) & ss-prefix?(quoted-text, *QUOTE*, false) & start-quoted-text = ss-left-trim-match(quoted-text, *QUOTE*, false) & end-text = ss-left-trim(start-quoted-text, *OCTAL-DIGITS-PLUS-UNDERSCORE*, false) & ss-prefix?(end-text, *QUOTE*, false) & end-input = ss-left-trim-match(end-text, *QUOTE*, false) => lex-bit-string-literal(input) = , ss-prefix?(input,"X", false) & quoted-text = ss-left-trim(input,"X", false) & ss-prefix?(quoted-text, *QUOTE*, false) & start-quoted-text = ss-left-trim-match(quoted-text, *QUOTE*, false) & end-text = ss-left-trim(start-quoted-text, *HEX-DIGITS-PLUS-UNDERSCORE*, false) & ss-prefix?(end-text, *QUOTE*, false) & end-input = ss-left-trim-match(end-text, *QUOTE*, false) => lex-bit-string-literal(input) = %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-whitespace % % Description: Remove leading whitespace from the input. Also, % increment the *LINE-NUMBER* variable for each linefeed found. % % Revision History % Date Person Description % 8/24/94 PWB Copied & modified from SubPascal example. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-WHITESPACE(input : stream-sequence) : stream-sequence = let ( blanks-removed : stream-sequence = ss-left-trim(input, *SPACE*, false), linefeed-removed : stream-sequence = input, comment-removed : stream-sequence = input ) %% blanks-removed contains input stream with leading spaces removed. %% Next, remove one linefeed, incrementing *LINE-NUMBER*. ( if ( ss-first(blanks-removed) = *NEWLINE* ) then linefeed-removed <- lex-whitespace ( ss-rest(blanks-removed) ); *LINE-NUMBER* <- *LINE-NUMBER* + 1 else linefeed-removed <- blanks-removed ); %% Now, remove one comment from linefeed-removed. ( if ( ss-prefix? ( linefeed-removed, *COMMENT-START*, false ) ) then let ( after-start-comment : stream-sequence = ss-left-trim-match ( linefeed-removed, *COMMENT-START*, false ) ) comment-removed <- lex-whitespace ( ss-left-trim-until-match ( after-start-comment, *NEWLINE-STRING*, false ) ) else comment-removed <- linefeed-removed ); %% Now that all leading blanks, linefeeds and comments have been %% removed, return comment-removed. comment-removed %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: character-canonicalizer % % Description: VHDL reserved words are case-insensitive. Hence, all % keywords will be downcased to match their definition. % % Reference: VHDL LRM (Sec. 13.9) % % Revision History % Date Person Description % 8/23/94 DB Copied from SubPascal example. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function CHARACTER-CANONICALIZER (c: char, cf: any-type): char = lisp::char-downcase(c) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: accept-keyword? % % Description: Determines whether to accept of reject a given keyword. % In the case where the keyword is part of a larger identifier the % keyword will be rejected. % % Revision History % Date Person Description % 8/23/94 PWB Copied from SubPascal example. % 9/1/94 DB Changed *LETTER-OR-DIGIT* to % *LETTER-OR-DIGIT-OR-UNDERSCORE* % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function ACCEPT-KEYWORD? (ss: stream-sequence, st: string, cf: any-type): boolean = ss-empty?(ss) or-else (ss-first(ss) ~in *LETTER-OR-DIGIT-OR-UNDERSCORE* & lisp::char-downcase(ss-first(ss)) ~in *LETTER-OR-DIGIT-OR-UNDERSCORE*) or-else ex(ch) (st = [.., ch] & ch ~in *LETTER*) %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: print-syntax-error % % Description: Simple syntax error handling routine. % % Revision History % Date Person Description % 10/1/94 DB Original creation. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function print-syntax-error(input: stream-sequence, error-message: string): lex-value = format(true, "~A : line number ~A~%", error-message, *LINE-NUMBER*); let (end-input = ss-left-trim-until(input, *END-OF-LINE*, false)) < input, end-input, undefined, ss-diff(input, end-input), 're::--string-- > %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-token % % Description: Processes the input after the comments have been % removed. % % Revision History % Date Person Description % 8/23/94 DB Copied from SubPascal example. % 9/30/94 DB Added call to print-syntax-error. This doesn't ask % user for information on what to do. If you don't % like this uncomment the call to report-syntax-error % and comment out our call. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-TOKEN(input: stream-sequence, pcb: parse-control-block): lex-value computed-using %% Are we at a grammar prefix? i.e. ##r lex-val = lex-grammar-prefix(input, undefined, pcb) & defined?(lex-val) => lex-token(input, pcb) = lex-val, %% Are we at the end of form? i.e. end of file or end of pattern? lex-val = lex-end-of-form(input, undefined, pcb) & defined?(lex-val) & pseudoterminal-legal?('re::--end--, pcb, true) => lex-token(input, pcb) = lex-val, %% Are we at a character literal? lex-val = lex-character-literal(input) & defined?(lex-val) => lex-token(input,pcb) = lex-val, %% Are we at a keyword in the grammar? lex-val = lex-keyword(input, undefined, pcb, 'character-canonicalizer, 'accept-keyword?) & defined?(lex-val) => lex-token(input, pcb) = lex-val, %% Get number if possible lex-val = lex-abstract-literal(input) & defined?(lex-val) => lex-token(input, pcb) = lex-val, %% Get string literal lex-val = lex-string-literal(input) & defined?(lex-val) => lex-token(input, pcb) = lex-val, %% Get bit-string-literal lex-val = lex-bit-string-literal(input) & defined?(lex-val) => lex-token(input, pcb) = lex-val, %% Get an identifier if possible lex-val = lex-identifier(input) & defined?(lex-val) => lex-token(input, pcb) = lex-val, %% No other cases, generate an error message %% lex-token(input, pcb) %% = print-syntax-error(input, "syntax error") %% Call to default refine syntax error function lex-token(input, pcb) = report-syntax-error(input, "unrecognizable text encountered during lexical analysis") %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% % % Function Name: lex-vhdl % % Description: Top-level lexing function. Called from VHDL-93 parser. % % Revision History % Date Person Description % 8/23/94 PWB Copied from SubPascal example. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% function LEX-VHDL(input: stream-sequence, lexical-context-from-last-call: any-type, pcb: parse-control-block): lex-value = %% First, strip off white space and comments, then get token. lex-token(lex-whitespace(input),pcb)