{*******************************************************} { } { WHY not a compiler? by Y [05-04-00] } { Token stream, a lexical analysis engine } { } { Copyright (c) 1999-2000 CROWELL, Inc. } { All Rights Reserved. } { } {*******************************************************} unit CTokenStream; {$I CDEFINES.PAS} {$IFDEF TPC} {$N+} {$ENDIF} interface uses Objects, CConstants, CIOStream; type {TTokenStream, a lexical analysis tool} PTokenStream = ^TTokenStream; TTokenStream = object(TObject) InStream: TDoubleBufStream; Status: Integer; CompilerError: TCompilerError; TokensRemain: Integer; SecondBuffer: array [1..SecondBufferSize] of TToken; InputFileName: String; constructor Init(FileName: String); procedure Free; destructor Done; virtual; {private declarations} procedure SkipSpaces; procedure SkipSingleCharComment(Chars: SmallString); (* '{}' *) procedure SkipDoubleCharComment(Chars: SmallString); { '(**)' } procedure SkipSingleLineComment(Chars: SmallString); { '\' } procedure SkipDoubleLineComment(Chars: SmallString); {'//' or '--'} procedure SkipAll; function ReadFilteredString(Chars: CharSet): String; function ReadExactString(Chars: SmallString): Boolean; function ReadDecimal(var Token: TToken): Boolean; function ReadHexadecimal(var Token: TToken): Boolean; {public declarations} function ReadChar(var C: Char): Boolean; function Read(var Token: TToken): Boolean; {any token type} function ReadBigDelimiter(var Token: TToken; Chars: SmallString): Boolean; function ReadDelimiter(var Token: TToken): Boolean; function ReadFloat(var Token: TToken): Boolean; function ReadInteger(var Token: TToken): Boolean; function ReadIdentifier(var Token: TToken): Boolean; function ReadReservedWord(var Token: TToken): Boolean; function ReadString(var Token: TToken; Chars: SmallString): Boolean; {'""'} procedure PutBack(Token: TToken); procedure PutBackString(S: String); end; implementation uses Strings; {TTokenStream} constructor TTokenStream.Init(FileName: String); begin inherited Init; InStream.Init(FileName, stOpenRead, ReadBufferSize); Status := InStream.Status; CompilerError := ceNoErrors; TokensRemain := 0; InputFileName := FileName; end; {TTokenStream.Init} procedure TTokenStream.Free; begin end; {TTokenStream.Free} destructor TTokenStream.Done; begin InStream.Done; Status := InStream.Status; end; {TTokenStream.Done} procedure TTokenStream.SkipSpaces; var C: Char; begin if ((InStream.Status <> 0) or (Status <> 0)) then Exit; repeat C := InStream.GetChar; until not (C in SkipChars) or (InStream.Status <> 0); if (InStream.Status = 0) then InStream.PutBack(C); end; procedure TTokenStream.SkipSingleCharComment(Chars: SmallString); var C: Char; begin if ((InStream.Status <> 0) or (Status <> 0)) then Exit; C := InStream.GetChar; if (InStream.Status <> 0) then Exit; if (C = Chars[1]) then begin repeat C := InStream.GetChar; until (C = Chars[2]) or (InStream.Status <> 0); if (InStream.Status <> 0) then begin Status := stCompilerError; CompilerError := ceCommentNotClosed; end end else InStream.PutBack(C); end; {TTokenStream.SkipSingleCharComment} procedure TTokenStream.SkipDoubleCharComment(Chars: SmallString); var C: Char; begin if ((InStream.Status <> 0) or (Status <> 0)) then Exit; C := InStream.GetChar; if (InStream.Status <> 0) then Exit; if C = Chars[1] then begin C := InStream.GetChar; if (C <> Chars[2]) or (InStream.Status <> 0) then begin InStream.PutBack(C); InStream.PutBack(Chars[1]); end else begin {C = '*', i.e. '(*' encountered} repeat repeat C := InStream.GetChar; until (C = Chars[3]) or (InStream.Status <> 0); if (InStream.Status <> 0) then begin Status := stCompilerError; CompilerError := ceCommentNotClosed; end else {C = '*'} begin C := InStream.GetChar; if (C <> Chars[4]) then if (InStream.Status = 0) then InStream.PutBack(C) else begin {InStream.Status <> 0} Status := stCompilerError; CompilerError := ceCommentNotClosed; end else {C = ')'}; end; until (C = Chars[4]) or (InStream.Status <> 0); end; end else InStream.PutBack(C); end; {TTokenStream.TTokenStream.SkipDoubleCharComment} procedure TTokenStream.SkipSingleLineComment(Chars: SmallString); var C: Char; begin if ((InStream.Status <> 0) or (Status <> 0)) then Exit; C := InStream.GetChar; if (InStream.Status <> 0) then Exit; if C = Chars[1] then begin repeat C := InStream.GetChar; until (C in [CR, LF]) or (InStream.Status <> 0); end else InStream.PutBack(C); end; {TTokenStream.SkipSingleLineComment} procedure TTokenStream.SkipDoubleLineComment(Chars: SmallString); var C: Char; begin if ((InStream.Status <> 0) or (Status <> 0)) then Exit; C := InStream.GetChar; if (InStream.Status <> 0) then Exit; if C = Chars[1] then begin C := InStream.GetChar; if (C <> Chars[2]) or (InStream.Status <> 0) then begin InStream.PutBack(C); InStream.PutBack(Chars[1]); end else repeat C := InStream.GetChar; until (C in [CR, LF]) or (InStream.Status <> 0); end else InStream.PutBack(C); end; {TTokenStream.SkipDoubleLineComment} procedure TTokenStream.SkipAll; var C: Char; begin if ((InStream.Status <> 0) or (Status <> 0)) then Exit; repeat SkipSpaces; SkipSingleCharComment(sSingleComment); SkipDoubleCharComment(sDoubleComment); SkipSingleLineComment(sSingleLineComment); SkipDoubleLineComment(sDoubleLineComment); {SkipDoubleLineComment(sAdaDoubleLineComment);} C := InStream.GetChar; if (InStream.Status = 0) then InStream.PutBack(C); until not (C in SkipChars) or (InStream.Status <> 0) or (Status <> 0); end; {TTokenStream.SkipAll} function TTokenStream.ReadChar(var C: Char): Boolean; begin C := InStream.GetChar; ReadChar := (InStream.Status = 0); end; {ReadChar} function TTokenStream.Read(var Token: TToken): Boolean; var Temp: Boolean; Line, Position: Integer; begin Temp := False; if (TokensRemain > 0) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else {lexical analysis comes here!} if (InStream.Status = 0) and (Status = 0) then begin SkipAll; {remove spaces and comments} Line := InStream.CurrentLine; Position := InStream.CurrentPos; case Token.Kind of tkUnknown: begin Temp := True; {a priori} if not ReadString(Token, sStringChars) then if not ReadBigDelimiter(Token, sAssignment) then if not ReadBigDelimiter(Token, sDoublePoint) then if not ReadBigDelimiter(Token, sGreaterEqual) then if not ReadBigDelimiter(Token, sLessEqual) then if not ReadBigDelimiter(Token, sNotEqual) then if not ReadFloat(Token) then if not ReadInteger(Token) then if not ReadDelimiter(Token) then if not ReadReservedWord(Token) then if not ReadIdentifier(Token) then Temp := False; end; tkBigDelimiter: Temp := ReadBigDelimiter(Token, Token.S); tkDelimiter: Temp := ReadDelimiter(Token); tkIdentifier: Temp := ReadIdentifier(Token); tkInteger: Temp := ReadInteger(Token); tkReservedWord: Temp := ReadReservedWord(Token); tkString: Temp := ReadString(Token, sStringChars); end; if Temp then begin Token.Line := Line; Token.Position := Position; end; end; Read := Temp; end; {TTokenStream.Read} function TTokenStream.ReadBigDelimiter(var Token: TToken; Chars: SmallString): Boolean; var Temp: Boolean; Line, Position: Integer; begin Temp := False; FreeToken(Token); if (TokensRemain > 0) then {read a token from the token buffer} if (SecondBuffer[TokensRemain].Kind = tkBigDelimiter) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else else begin {perform lexical analysis} SkipAll; Line := InStream.CurrentLine; Position := InStream.CurrentPos; if (InStream.Status = 0) and (Status = 0) then begin if ReadExactString(Chars) then begin with Token do begin Kind := tkBigDelimiter; StringToToken(Token, Chars); Token.Kind := tkBigDelimiter; end; Temp := True; end else Temp := False; end; if Temp then begin Token.Line := Line; Token.Position := Position; end; end; ReadBigDelimiter := Temp; end; {TTokenStream.ReadBigDelimiter} function TTokenStream.ReadFilteredString(Chars: CharSet): String; var C: Char; TempString: String; begin TempString := ''; if (InStream.Status = 0) and (Status = 0) then begin repeat C := InStream.GetChar; if (InStream.Status = 0) then if (C in Chars) then TempString := TempString + C else InStream.PutBack(C); until not (C in Chars) or (InStream.Status <> 0); end; ReadFilteredString := TempString; end; {TTokenStream.ReadFilteredString} function TTokenStream.ReadExactString(Chars: SmallString): Boolean; var C, C2: Char; Temp: Boolean; CLength, i: Integer; Chars2: SmallString; begin if not CaseSensitive then Chars2 := LowerCase(Chars) else Chars2 := Chars; CLength := Length(Chars); if (InStream.Status = 0) and (Status = 0) then begin Temp := True; i := 1; repeat C := InStream.GetChar; if CaseSensitive then C2 := C else C2 := LoCase(C); if (InStream.Status = 0) then if (C2 <> Chars2[i]) then begin Temp := False; PutBackString(Copy(Chars, 1, i - 1) + C); end else Inc(i) else Temp := False; until not Temp or (i > CLength); end; ReadExactString := Temp; end; {TTokenStream.ReadExactString} function TTokenStream.ReadDecimal(var Token: TToken): Boolean; var Temp: Boolean; Error: Integer; TempString: String; TempInteger: Longint; Line, Position: Integer; begin Temp := False; if (TokensRemain > 0) then {read a token from the token buffer} if (SecondBuffer[TokensRemain].Kind = tkInteger) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else else begin {perform lexical analysis} SkipAll; Line := InStream.CurrentLine; Position := InStream.CurrentPos; TempString := ReadFilteredString(Digits); if (TempString <> '') then begin Val(TempString, TempInteger, Error); {warning! standard routine used!} if (Error <> 0) then begin Temp := False; Status := stCompilerError; CompilerError := ceConstantOutOfRange; end else begin Temp := True; with Token do begin I := TempInteger; Kind := tkInteger; end; end; end; if Temp then begin Token.Line := Line; Token.Position := Position; end; end; ReadDecimal := Temp; end; {TTokenStream.ReadDecimal} function TTokenStream.ReadDelimiter(var Token: TToken): Boolean; var C: Char; Index: Integer; Temp: Boolean; Line, Position: Integer; begin Temp := False; FreeToken(Token); if (TokensRemain > 0) then {read a token from the token buffer} if (SecondBuffer[TokensRemain].Kind = tkDelimiter) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else else begin {perform lexical analysis} SkipAll; Line := InStream.CurrentLine; Position := InStream.CurrentPos; if (InStream.Status = 0) and (Status = 0) then begin C := InStream.GetChar; if (InStream.Status = 0) then begin Index := Pos(C, Delimiters); if (Index <> 0) then begin Temp := True; with Token do begin Kind := tkDelimiter; I := Index; end; end else InStream.PutBack(C); end; end; if Temp then begin Token.Line := Line; Token.Position := Position; end; end; ReadDelimiter := Temp; end; {TTokenStream.ReadDelimiter} function TTokenStream.ReadFloat(var Token: TToken): Boolean; var Temp: Boolean; TempValue: Float; TempString, TempString2: String; Error: Integer; C: Char; Line, Position: Integer; begin Temp := False; TempString := ''; FreeToken(Token); if (TokensRemain > 0) then {read a token from the token buffer} if (SecondBuffer[TokensRemain].Kind = tkFloat) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else else begin {perform lexical analysis} SkipAll; Line := InStream.CurrentLine; Position := InStream.CurrentPos; TempString := ReadFilteredString(Digits); if (InStream.Status = 0) then if (TempString <> '') then begin C := InStream.GetChar; if (InStream.Status = 0) then if (C = '.') then begin TempString2 := ReadFilteredString(Digits); if (TempString2 <> '') then begin TempString := TempString + '.' + TempString2; C := InStream.GetChar; if (InStream.Status = 0) then if (LoCase(C) = 'e') then begin C := InStream.GetChar; if (InStream.Status = 0) then begin TempString := TempString + 'E'; if (C in ['+', '-']) then TempString := TempString + C else InStream.PutBack(C); TempString := TempString + ReadFilteredString(Digits); end; end else InStream.PutBack(C); end else begin InStream.PutBack(C); PutBackString(TempString); TempString := ''; end; end else begin InStream.PutBack(C); PutBackString(TempString); TempString := ''; end; end; if Temp then begin Token.Line := Line; Token.Position := Position; end; end; if (TempString <> '') then begin FloatVal(TempString, TempValue, Error); if (Error = 0) then begin Temp := True; Token.Kind := tkFloat; Token.F := TempValue; end else {never gets here due to Run-time error #205 or #206} begin Status := stCompilerError; CompilerError := ceErrorInRealConstant; end; end; ReadFloat := Temp; end; {TTokenStream.ReadFloat} function TTokenStream.ReadHexadecimal(var Token: TToken): Boolean; var Error: Integer; Temp: Boolean; TempString: String; TempValue: Longint; function DoReadHexadecimal: Boolean; begin if (InStream.Status = 0) and (Status = 0) then begin TempString := ReadFilteredString(HexDigits); if (TempString <> '') then begin TempString := '$' + TempString; Val(TempString, TempValue, Error); {warning! standard routine used!} if (Error <> 0) then begin Temp := False; Status := stCompilerError; CompilerError := ceConstantOutOfRange; end else begin Temp := True; with Token do begin I := TempValue; Kind := tkInteger; end; end; end; end; DoReadHexadecimal := Temp; end; {DoReadHexadecimal} begin Temp := False; TempString := ''; FreeToken(Token); if (TokensRemain > 0) then {read a token from the token buffer} if (SecondBuffer[TokensRemain].Kind = tkInteger) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else else begin {perform lexical analysis} if ReadExactString(cCHexPrefix) then if not DoReadHexadecimal then PutBackString(cCHexPrefix) else else if ReadExactString(cPasHexPrefix) then if not DoReadHexadecimal then PutBackString(cPasHexPrefix) else else if ReadDelimiter(Token) then {if (Token.I = Byte(cPasHexPrefix)) then} if (Delimiters[Token.I] = cPasHexPrefix) then if not DoReadHexadecimal then PutBack(Token) {empty string after the prefix} else else PutBack(Token); {delimiter but not a hexadecimal prefix} end; ReadHexadecimal := Temp; end; {TTokenStream.ReadHexadecimal} function TTokenStream.ReadInteger(var Token: TToken): Boolean; var Temp: Boolean; Line, Position: Integer; begin Temp := False; SkipAll; if (TokensRemain > 0) then {read a token from the token buffer} if (SecondBuffer[TokensRemain].Kind = tkInteger) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else else begin {perform lexical analysis} Temp := True; if not ReadHexadecimal(Token) then if not ReadDecimal(Token) then Temp := False; end; ReadInteger := Temp; end; function TTokenStream.ReadIdentifier(var Token: TToken): Boolean; var Temp: Boolean; TempString: String; Line, Position: Integer; begin Temp := False; FreeToken(Token); if (TokensRemain > 0) then {read a token from the token buffer} if (SecondBuffer[TokensRemain].Kind = tkIdentifier) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else else begin {perform lexical analysis} SkipAll; Line := InStream.CurrentLine; Position := InStream.CurrentPos; if (InStream.Status = 0) and (Status = 0) then begin TempString := ReadFilteredString(Letters); if (TempString <> '') then begin TempString := TempString + ReadFilteredString(Letters + Digits); StringToToken(Token, TempString); Token.Kind := tkIdentifier; Temp := True; end else Temp := False; end; if Temp then begin Token.Line := Line; Token.Position := Position; end; end; ReadIdentifier := Temp; end; {TTokenStream.ReadIdentifier} function TTokenStream.ReadReservedWord(var Token: TToken): Boolean; var Temp: Boolean; TempString: String; i: Integer; begin Temp := False; FreeToken(Token); if (TokensRemain > 0) then {read a token from the token buffer} if (SecondBuffer[TokensRemain].Kind = tkReservedWord) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else else begin {perform lexical analysis} if ReadIdentifier(Token) then begin if CaseSensitive then TempString := Token.S else TempString := LowerCase(Token.S); i := 1; while not Temp and (i <= High(ReservedWords)) do begin if (TempString = StrPas(ReservedWords[i])) then begin Temp := True; FreeToken(Token); Token.Kind := tkReservedWord; Token.I := i; end; Inc(i); end; if not Temp then PutBack(Token); end; end; ReadReservedWord := Temp; end; {TTokenStream.ReadReservedWord} function TTokenStream.ReadString(var Token: TToken; Chars: SmallString): Boolean; var C: Char; Temp: Boolean; TempString: String; Index: Integer; Line, Position: Integer; begin Temp := False; TempString := ''; FreeToken(Token); if (TokensRemain > 0) then {read a token from the token buffer} if (SecondBuffer[TokensRemain].Kind = tkString) then begin Token := SecondBuffer[TokensRemain]; Dec(TokensRemain); Temp := True; end else else begin {perform lexical analysis} SkipAll; if (InStream.Status = 0) and (Status = 0) then begin C := InStream.GetChar; if (InStream.Status = 0) then begin if C = Chars[1] then begin repeat C := InStream.GetChar; if (InStream.Status = 0) then if (C = sStringEscape) then begin C := InStream.GetChar; {just after escape} if (InStream.Status = 0) then {convert} begin Index := Pos(LoCase(C), sStringEscapeSequences); if (Index <> 0) then C := sStringEscapeSequences[Index + 1] else begin InStream.PutBack(C); if ReadDecimal(Token) then C := Char(Token.I) else if (C = cCHexPrefix[2]) then begin InStream.PutBack('0'); {for ReadHexadecimal} if ReadHexadecimal(Token) then C := Char(Token.I) else begin InStream.GetChar; {pop '0' character} C := '\'; end; end; end; TempString := TempString + C; end; end else if (C = Chars[2]) then begin C := InStream.GetChar; if (InStream.Status = 0) then if (C = Chars[2]) then TempString := TempString + C else begin InStream.PutBack(C); Temp := True; StringToToken(Token, TempString); Token.Kind := tkString; end; end else if C in [CR, LF] then begin Status := stCompilerError; CompilerError := ceStringConstantExceedsLine; end else TempString := TempString + C; until Temp or (InStream.Status <> 0) or (Status <> 0); if (InStream.Status <> 0) then begin Status := stCompilerError; CompilerError := ceStringNotClosed; end; end else InStream.PutBack(C); end; end; end; ReadString := Temp; end; {TTokenStream.ReadString} procedure TTokenStream.PutBack(Token: TToken); begin if (TokensRemain >= SecondBufferSize) then Status := stNoMorePlace else begin Inc(TokensRemain); SecondBuffer[TokensRemain] := Token; end; end; {TTokenStream.PutBack} procedure TTokenStream.PutBackString(S: String); var i, SLength: Integer; begin SLength := Length(S); for i := SLength downto 1 do InStream.PutBack(S[i]); end; {TTokenStream.PutBackString} end {CTokenStream}.