{*******************************************************}
{                                                       }
{       WHY not a compiler? by Y [05-04-00]             }
{       Token stream, a lexical analysis engine         }
{                                                       }
{       Copyright (c) 1999-2000 CROWELL, Inc.           }
{       All Rights Reserved.                            }
{                                                       }
{*******************************************************}

unit CTokenStream;

{$I CDEFINES.PAS} {$IFDEF TPC} {$N+} {$ENDIF}

interface

uses Objects, CConstants, CIOStream;

type

  {TTokenStream, a lexical analysis tool}

  PTokenStream = ^TTokenStream;
  TTokenStream = object(TObject)
    InStream: TDoubleBufStream;
    Status: Integer;
    CompilerError: TCompilerError;
    TokensRemain: Integer;
    SecondBuffer: array [1..SecondBufferSize] of TToken;
    InputFileName: String;
    constructor Init(FileName: String);
    procedure Free;
    destructor Done; virtual;
    {private declarations}
    procedure SkipSpaces;
    procedure SkipSingleCharComment(Chars: SmallString); (*   '{}'   *)
    procedure SkipDoubleCharComment(Chars: SmallString); {   '(**)'   }
    procedure SkipSingleLineComment(Chars: SmallString); {     '\'    }
    procedure SkipDoubleLineComment(Chars: SmallString); {'//' or '--'}
    procedure SkipAll;
    function ReadFilteredString(Chars: CharSet): String;
    function ReadExactString(Chars: SmallString): Boolean;
    function ReadDecimal(var Token: TToken): Boolean;
    function ReadHexadecimal(var Token: TToken): Boolean;
    {public declarations}
    function ReadChar(var C: Char): Boolean;
    function Read(var Token: TToken): Boolean; {any token type}
    function ReadBigDelimiter(var Token: TToken; Chars: SmallString): Boolean;
    function ReadDelimiter(var Token: TToken): Boolean;
    function ReadFloat(var Token: TToken): Boolean;
    function ReadInteger(var Token: TToken): Boolean;
    function ReadIdentifier(var Token: TToken): Boolean;
    function ReadReservedWord(var Token: TToken): Boolean;
    function ReadString(var Token: TToken; Chars: SmallString): Boolean; {'""'}
    procedure PutBack(Token: TToken);
    procedure PutBackString(S: String);
  end;

implementation

uses Strings;

{TTokenStream}

constructor TTokenStream.Init(FileName: String);
begin
  inherited Init;
  InStream.Init(FileName, stOpenRead, ReadBufferSize);
  Status := InStream.Status;
  CompilerError := ceNoErrors;
  TokensRemain := 0;
  InputFileName := FileName;
end; {TTokenStream.Init}

procedure TTokenStream.Free;
begin
end; {TTokenStream.Free}

destructor TTokenStream.Done;
begin
  InStream.Done;
  Status := InStream.Status;
end; {TTokenStream.Done}

procedure TTokenStream.SkipSpaces;
var
  C: Char;
begin
  if ((InStream.Status <> 0) or (Status <> 0)) then
    Exit;
  repeat
    C := InStream.GetChar;
  until not (C in SkipChars) or (InStream.Status <> 0);
  if (InStream.Status = 0) then
    InStream.PutBack(C);
end;

procedure TTokenStream.SkipSingleCharComment(Chars: SmallString);
var
  C: Char;
begin
  if ((InStream.Status <> 0) or (Status <> 0)) then
    Exit;

  C := InStream.GetChar;

  if (InStream.Status <> 0) then
    Exit;

  if (C = Chars[1]) then
  begin
    repeat
      C := InStream.GetChar;
    until (C = Chars[2]) or (InStream.Status <> 0);
    if (InStream.Status <> 0) then
    begin
      Status := stCompilerError;
      CompilerError := ceCommentNotClosed;
    end
  end
  else
    InStream.PutBack(C);
end; {TTokenStream.SkipSingleCharComment}

procedure TTokenStream.SkipDoubleCharComment(Chars: SmallString);
var
  C: Char;
begin
  if ((InStream.Status <> 0) or (Status <> 0)) then
    Exit;

  C := InStream.GetChar;

  if (InStream.Status <> 0) then
    Exit;

  if C = Chars[1] then
  begin
    C := InStream.GetChar;
    if (C <> Chars[2]) or (InStream.Status <> 0) then
    begin
      InStream.PutBack(C);
      InStream.PutBack(Chars[1]);
    end
    else
    begin {C = '*', i.e. '(*' encountered}
      repeat
        repeat
          C := InStream.GetChar;
        until (C = Chars[3]) or (InStream.Status <> 0);
        if (InStream.Status <> 0) then
        begin
          Status := stCompilerError;
          CompilerError := ceCommentNotClosed;
        end
        else {C = '*'}
        begin
          C := InStream.GetChar;
          if (C <> Chars[4]) then
            if (InStream.Status = 0) then
              InStream.PutBack(C)
            else
            begin {InStream.Status <> 0}
              Status := stCompilerError;
              CompilerError := ceCommentNotClosed;
            end
          else {C = ')'};
        end;
      until (C = Chars[4]) or (InStream.Status <> 0);
    end;
  end
  else
    InStream.PutBack(C);
end; {TTokenStream.TTokenStream.SkipDoubleCharComment}

procedure TTokenStream.SkipSingleLineComment(Chars: SmallString);
var
  C: Char;
begin
  if ((InStream.Status <> 0) or (Status <> 0)) then
    Exit;

  C := InStream.GetChar;

  if (InStream.Status <> 0) then
    Exit;

  if C = Chars[1] then
  begin
    repeat
      C := InStream.GetChar;
    until (C in [CR, LF]) or (InStream.Status <> 0);
  end
  else
    InStream.PutBack(C);
end; {TTokenStream.SkipSingleLineComment}

procedure TTokenStream.SkipDoubleLineComment(Chars: SmallString);
var
  C: Char;
begin
  if ((InStream.Status <> 0) or (Status <> 0)) then
    Exit;

  C := InStream.GetChar;

  if (InStream.Status <> 0) then
    Exit;

  if C = Chars[1] then
  begin
    C := InStream.GetChar;
    if (C <> Chars[2]) or (InStream.Status <> 0) then
    begin
      InStream.PutBack(C);
      InStream.PutBack(Chars[1]);
    end
    else
      repeat
        C := InStream.GetChar;
      until (C in [CR, LF]) or (InStream.Status <> 0);
  end
  else
    InStream.PutBack(C);
end; {TTokenStream.SkipDoubleLineComment}

procedure TTokenStream.SkipAll;
var
  C: Char;
begin
  if ((InStream.Status <> 0) or (Status <> 0)) then
    Exit;

  repeat
    SkipSpaces;
    SkipSingleCharComment(sSingleComment);
    SkipDoubleCharComment(sDoubleComment);
    SkipSingleLineComment(sSingleLineComment);
    SkipDoubleLineComment(sDoubleLineComment);
    {SkipDoubleLineComment(sAdaDoubleLineComment);}
    C := InStream.GetChar;
    if (InStream.Status = 0) then
      InStream.PutBack(C);
  until not (C in SkipChars) or (InStream.Status <> 0) or (Status <> 0);
end; {TTokenStream.SkipAll}

function TTokenStream.ReadChar(var C: Char): Boolean;
begin
  C := InStream.GetChar;
  ReadChar := (InStream.Status = 0);
end; {ReadChar}

function TTokenStream.Read(var Token: TToken): Boolean;
var
  Temp: Boolean;
  Line, Position: Integer;
begin
  Temp := False;
  if (TokensRemain > 0) then
  begin
    Token := SecondBuffer[TokensRemain];
    Dec(TokensRemain);
    Temp := True;
  end
  else {lexical analysis comes here!}
  if (InStream.Status = 0) and (Status = 0) then
  begin
    SkipAll; {remove spaces and comments}
    Line := InStream.CurrentLine;
    Position := InStream.CurrentPos;

    case Token.Kind of
      tkUnknown:
      begin
        Temp := True; {a priori}
        if not ReadString(Token, sStringChars) then
          if not ReadBigDelimiter(Token, sAssignment) then
          if not ReadBigDelimiter(Token, sDoublePoint) then
          if not ReadBigDelimiter(Token, sGreaterEqual) then
          if not ReadBigDelimiter(Token, sLessEqual) then
          if not ReadBigDelimiter(Token, sNotEqual) then
            if not ReadFloat(Token) then
              if not ReadInteger(Token) then
                if not ReadDelimiter(Token) then
                  if not ReadReservedWord(Token) then
                    if not ReadIdentifier(Token) then
                      Temp := False;
      end;
      tkBigDelimiter:
        Temp := ReadBigDelimiter(Token, Token.S);
      tkDelimiter:
        Temp := ReadDelimiter(Token);
      tkIdentifier:
        Temp := ReadIdentifier(Token);
      tkInteger:
        Temp := ReadInteger(Token);
      tkReservedWord:
        Temp := ReadReservedWord(Token);
      tkString:
        Temp := ReadString(Token, sStringChars);
    end;
    if Temp then
    begin
      Token.Line := Line;
      Token.Position := Position;
    end;
  end;

  Read := Temp;
end; {TTokenStream.Read}

function TTokenStream.ReadBigDelimiter(var Token: TToken; Chars: SmallString):
  Boolean;
var
  Temp: Boolean;
  Line, Position: Integer;
begin
  Temp := False;
  FreeToken(Token);

  if (TokensRemain > 0) then {read a token from the token buffer}
    if (SecondBuffer[TokensRemain].Kind = tkBigDelimiter) then
    begin
      Token := SecondBuffer[TokensRemain];
      Dec(TokensRemain);
      Temp := True;
    end
    else
  else
  begin {perform lexical analysis}
    SkipAll;
    Line := InStream.CurrentLine;
    Position := InStream.CurrentPos;

    if (InStream.Status = 0) and (Status = 0) then
    begin
      if ReadExactString(Chars) then
      begin
        with Token do
        begin
          Kind := tkBigDelimiter;
          StringToToken(Token, Chars);
          Token.Kind := tkBigDelimiter;
        end;
        Temp := True;
      end
      else
        Temp := False;
    end;
    if Temp then
    begin
      Token.Line := Line;
      Token.Position := Position;
    end;
  end;

  ReadBigDelimiter := Temp;
end; {TTokenStream.ReadBigDelimiter}

function TTokenStream.ReadFilteredString(Chars: CharSet): String;
var
  C: Char;
  TempString: String;
begin
  TempString := '';

  if (InStream.Status = 0) and (Status = 0) then
  begin
    repeat
      C := InStream.GetChar;
      if (InStream.Status = 0) then
        if (C in Chars) then
          TempString := TempString + C
        else
          InStream.PutBack(C);
    until not (C in Chars) or (InStream.Status <> 0);
  end;

  ReadFilteredString := TempString;
end; {TTokenStream.ReadFilteredString}

function TTokenStream.ReadExactString(Chars: SmallString): Boolean;
var
  C, C2: Char;
  Temp: Boolean;
  CLength, i: Integer;
  Chars2: SmallString;
begin
  if not CaseSensitive then
    Chars2 := LowerCase(Chars)
  else
    Chars2 := Chars;
  CLength := Length(Chars);

  if (InStream.Status = 0) and (Status = 0) then
  begin
    Temp := True;
    i := 1;
    repeat
      C := InStream.GetChar;
      if CaseSensitive then
        C2 := C
      else
        C2 := LoCase(C);
      if (InStream.Status = 0) then
        if (C2 <> Chars2[i]) then
        begin
          Temp := False;
          PutBackString(Copy(Chars, 1, i - 1) + C);
        end
        else
          Inc(i)
      else
        Temp := False;
    until not Temp or (i > CLength);
  end;

  ReadExactString := Temp;
end; {TTokenStream.ReadExactString}

function TTokenStream.ReadDecimal(var Token: TToken): Boolean;
var
  Temp: Boolean;
  Error: Integer;
  TempString: String;
  TempInteger: Longint;
  Line, Position: Integer;
begin
  Temp := False;

  if (TokensRemain > 0) then {read a token from the token buffer}
    if (SecondBuffer[TokensRemain].Kind = tkInteger) then
    begin
      Token := SecondBuffer[TokensRemain];
      Dec(TokensRemain);
      Temp := True;
    end
    else
  else
  begin {perform lexical analysis}
    SkipAll;
    Line := InStream.CurrentLine;
    Position := InStream.CurrentPos;

    TempString := ReadFilteredString(Digits);

    if (TempString <> '') then
    begin
      Val(TempString, TempInteger, Error); {warning! standard routine used!}
      if (Error <> 0) then
      begin
        Temp := False;
        Status := stCompilerError;
        CompilerError := ceConstantOutOfRange;
      end
      else
      begin
        Temp := True;
        with Token do
        begin
          I := TempInteger;
          Kind := tkInteger;
        end;
      end;
    end;
    if Temp then
    begin
      Token.Line := Line;
      Token.Position := Position;
    end;
  end;

  ReadDecimal := Temp;
end; {TTokenStream.ReadDecimal}

function TTokenStream.ReadDelimiter(var Token: TToken): Boolean;
var
  C: Char;
  Index: Integer;
  Temp: Boolean;
  Line, Position: Integer;
begin
  Temp := False;
  FreeToken(Token);

  if (TokensRemain > 0) then {read a token from the token buffer}
    if (SecondBuffer[TokensRemain].Kind = tkDelimiter) then
    begin
      Token := SecondBuffer[TokensRemain];
      Dec(TokensRemain);
      Temp := True;
    end
    else
  else
  begin {perform lexical analysis}
    SkipAll;
    Line := InStream.CurrentLine;
    Position := InStream.CurrentPos;

    if (InStream.Status = 0) and (Status = 0) then
    begin
      C := InStream.GetChar;
      if (InStream.Status = 0) then
      begin
        Index := Pos(C, Delimiters);
        if (Index <> 0) then
        begin
          Temp := True;
          with Token do
          begin
            Kind := tkDelimiter;
            I := Index;
          end;
        end
        else InStream.PutBack(C);
      end;
    end;
    if Temp then
    begin
      Token.Line := Line;
      Token.Position := Position;
    end;
  end;

  ReadDelimiter := Temp;
end; {TTokenStream.ReadDelimiter}

function TTokenStream.ReadFloat(var Token: TToken): Boolean;
var
  Temp: Boolean;
  TempValue: Float;
  TempString, TempString2: String;
  Error: Integer;
  C: Char;
  Line, Position: Integer;
begin
  Temp := False;
  TempString := '';
  FreeToken(Token);

  if (TokensRemain > 0) then {read a token from the token buffer}
    if (SecondBuffer[TokensRemain].Kind = tkFloat) then
    begin
      Token := SecondBuffer[TokensRemain];
      Dec(TokensRemain);
      Temp := True;
    end
    else
  else
  begin {perform lexical analysis}
    SkipAll;
    Line := InStream.CurrentLine;
    Position := InStream.CurrentPos;

    TempString := ReadFilteredString(Digits);

    if (InStream.Status = 0) then
      if (TempString <> '') then
      begin
        C := InStream.GetChar;
        if (InStream.Status = 0) then
          if (C = '.') then
          begin
            TempString2 := ReadFilteredString(Digits);
            if (TempString2 <> '') then
            begin
              TempString := TempString + '.' + TempString2;
              C := InStream.GetChar;
              if (InStream.Status = 0) then
                if (LoCase(C) = 'e') then
                begin
                  C := InStream.GetChar;
                  if (InStream.Status = 0) then
                  begin
                    TempString := TempString + 'E';
                    if (C in ['+', '-']) then
                      TempString := TempString + C
                    else
                      InStream.PutBack(C);
                    TempString := TempString + ReadFilteredString(Digits);
                  end;
                end
              else
                InStream.PutBack(C);
            end
            else
            begin
              InStream.PutBack(C);
              PutBackString(TempString);
              TempString := '';
            end;
          end
          else
          begin
            InStream.PutBack(C);
            PutBackString(TempString);
            TempString := '';
          end;
      end;

    if Temp then
    begin
      Token.Line := Line;
      Token.Position := Position;
    end;
  end;

  if (TempString <> '') then
  begin
    FloatVal(TempString, TempValue, Error);
    if (Error = 0) then
    begin
      Temp := True;
      Token.Kind := tkFloat;
      Token.F := TempValue;
    end
    else {never gets here due to Run-time error #205 or #206}
    begin
      Status := stCompilerError;
      CompilerError := ceErrorInRealConstant;
    end;
  end;

  ReadFloat := Temp;
end; {TTokenStream.ReadFloat}

function TTokenStream.ReadHexadecimal(var Token: TToken): Boolean;
var
  Error: Integer;
  Temp: Boolean;
  TempString: String;
  TempValue: Longint;

  function DoReadHexadecimal: Boolean;
  begin if (InStream.Status = 0) and (Status = 0) then
        begin
          TempString := ReadFilteredString(HexDigits);
          if (TempString <> '') then
          begin
            TempString := '$' + TempString;
            Val(TempString, TempValue, Error); {warning! standard routine used!}
            if (Error <> 0) then
            begin
              Temp := False;
              Status := stCompilerError;
              CompilerError := ceConstantOutOfRange;
            end
            else
            begin
              Temp := True;
              with Token do
              begin
                I := TempValue;
                Kind := tkInteger;
              end;
            end;
          end;
        end;
        DoReadHexadecimal := Temp;
  end; {DoReadHexadecimal}

begin
  Temp := False;
  TempString := '';
  FreeToken(Token);

  if (TokensRemain > 0) then {read a token from the token buffer}
    if (SecondBuffer[TokensRemain].Kind = tkInteger) then
    begin
      Token := SecondBuffer[TokensRemain];
      Dec(TokensRemain);
      Temp := True;
    end
    else
  else
  begin {perform lexical analysis}
    if ReadExactString(cCHexPrefix) then
      if not DoReadHexadecimal then
        PutBackString(cCHexPrefix)
      else
    else if ReadExactString(cPasHexPrefix) then
      if not DoReadHexadecimal then
        PutBackString(cPasHexPrefix)
      else
    else if ReadDelimiter(Token) then
      {if (Token.I = Byte(cPasHexPrefix)) then}
      if (Delimiters[Token.I] = cPasHexPrefix) then
         if not DoReadHexadecimal then
          PutBack(Token) {empty string after the prefix}
        else
      else
        PutBack(Token); {delimiter but not a hexadecimal prefix}
  end;

  ReadHexadecimal := Temp;
end; {TTokenStream.ReadHexadecimal}

function TTokenStream.ReadInteger(var Token: TToken): Boolean;
var
  Temp: Boolean;
  Line, Position: Integer;
begin
  Temp := False;

  SkipAll;

  if (TokensRemain > 0) then {read a token from the token buffer}
    if (SecondBuffer[TokensRemain].Kind = tkInteger) then
    begin
      Token := SecondBuffer[TokensRemain];
      Dec(TokensRemain);
      Temp := True;
    end
    else
  else
  begin {perform lexical analysis}
    Temp := True;
    if not ReadHexadecimal(Token) then
      if not ReadDecimal(Token) then
        Temp := False;
  end;

  ReadInteger := Temp;
end;

function TTokenStream.ReadIdentifier(var Token: TToken): Boolean;
var
  Temp: Boolean;
  TempString: String;
  Line, Position: Integer;
begin
  Temp := False;
  FreeToken(Token);

  if (TokensRemain > 0) then {read a token from the token buffer}
    if (SecondBuffer[TokensRemain].Kind = tkIdentifier) then
    begin
      Token := SecondBuffer[TokensRemain];
      Dec(TokensRemain);
      Temp := True;
    end
    else
  else
  begin {perform lexical analysis}
    SkipAll;
    Line := InStream.CurrentLine;
    Position := InStream.CurrentPos;

    if (InStream.Status = 0) and (Status = 0) then
    begin
      TempString := ReadFilteredString(Letters);
      if (TempString <> '') then
      begin
        TempString := TempString + ReadFilteredString(Letters + Digits);
        StringToToken(Token, TempString);
        Token.Kind := tkIdentifier;
        Temp := True;
      end
      else
        Temp := False;
    end;

    if Temp then
    begin
      Token.Line := Line;
      Token.Position := Position;
    end;
  end;

  ReadIdentifier := Temp;
end; {TTokenStream.ReadIdentifier}

function TTokenStream.ReadReservedWord(var Token: TToken): Boolean;
var
  Temp: Boolean;
  TempString: String;
  i: Integer;
begin
  Temp := False;
  FreeToken(Token);

  if (TokensRemain > 0) then {read a token from the token buffer}
    if (SecondBuffer[TokensRemain].Kind = tkReservedWord) then
    begin
      Token := SecondBuffer[TokensRemain];
      Dec(TokensRemain);
      Temp := True;
    end
    else
  else
  begin {perform lexical analysis}
    if ReadIdentifier(Token) then
    begin
      if CaseSensitive then
        TempString := Token.S
      else
        TempString := LowerCase(Token.S);
      i := 1;
      while not Temp and (i <= High(ReservedWords)) do
      begin
        if (TempString = StrPas(ReservedWords[i])) then
        begin
          Temp := True;
          FreeToken(Token);
          Token.Kind := tkReservedWord;
          Token.I := i;
        end;
        Inc(i);
      end;
      if not Temp then
        PutBack(Token);
    end;
  end;

  ReadReservedWord := Temp;
end; {TTokenStream.ReadReservedWord}

function TTokenStream.ReadString(var Token: TToken; Chars: SmallString): Boolean;
var
  C: Char;
  Temp: Boolean;
  TempString: String;
  Index: Integer;
  Line, Position: Integer;
begin
  Temp := False;
  TempString := '';
  FreeToken(Token);

  if (TokensRemain > 0) then {read a token from the token buffer}
    if (SecondBuffer[TokensRemain].Kind = tkString) then
    begin
      Token := SecondBuffer[TokensRemain];
      Dec(TokensRemain);
      Temp := True;
    end
    else
  else
  begin {perform lexical analysis}
    SkipAll;
    if (InStream.Status = 0) and (Status = 0) then
    begin
      C := InStream.GetChar;
      if (InStream.Status = 0) then
      begin
        if C = Chars[1] then
        begin
          repeat
            C := InStream.GetChar;
            if (InStream.Status = 0) then
              if (C = sStringEscape) then
              begin
                C := InStream.GetChar; {just after escape}
                if (InStream.Status = 0) then {convert}
                begin
                  Index := Pos(LoCase(C), sStringEscapeSequences);
                  if (Index <> 0) then
                    C := sStringEscapeSequences[Index + 1]
                  else
                    begin
                      InStream.PutBack(C);
                      if ReadDecimal(Token) then
                        C := Char(Token.I)
                      else if (C = cCHexPrefix[2]) then
                      begin
                        InStream.PutBack('0'); {for ReadHexadecimal}
                        if ReadHexadecimal(Token) then
                          C := Char(Token.I)
                        else
                        begin
                          InStream.GetChar; {pop '0' character}
                          C := '\';
                        end;
                      end;
                    end;
                  TempString := TempString + C;
                end;
              end
              else if (C = Chars[2]) then
              begin
                C := InStream.GetChar;
                if (InStream.Status = 0) then
                  if (C = Chars[2]) then
                    TempString := TempString + C
                  else
                  begin
                    InStream.PutBack(C);
                    Temp := True;
                    StringToToken(Token, TempString);
                    Token.Kind := tkString;
                  end;
              end
              else if C in [CR, LF] then
              begin
                Status := stCompilerError;
                CompilerError := ceStringConstantExceedsLine;
              end
              else
                TempString := TempString + C;
          until Temp or (InStream.Status <> 0) or (Status <> 0);
          if (InStream.Status <> 0) then
          begin
            Status := stCompilerError;
            CompilerError := ceStringNotClosed;
          end;
        end
        else
          InStream.PutBack(C);
      end;
    end;
  end;

  ReadString := Temp;
end; {TTokenStream.ReadString}

procedure TTokenStream.PutBack(Token: TToken);
begin
  if (TokensRemain >= SecondBufferSize) then
    Status := stNoMorePlace
  else
  begin
    Inc(TokensRemain);
    SecondBuffer[TokensRemain] := Token;
  end;
end; {TTokenStream.PutBack}

procedure TTokenStream.PutBackString(S: String);
var
  i, SLength: Integer;
begin
  SLength := Length(S);
  for i := SLength downto 1 do
    InStream.PutBack(S[i]);
end; {TTokenStream.PutBackString}

end {CTokenStream}.