unit Scaner;

interface

uses Common;

type
  TLexem = (ltEmpty, ltCRLF, ltIdentifier, ltIntConst, ltCharConst, ltStrConst,
    opAssign, opPlus, opMinus, opMul, opDiv, opMod, opLeftBracket,
    opRightBracket, opLeftParent, opRightParent, opArrow, opAt, opPeriod,
    opComma, opColon, opSemikolon, opAnd, opOr, opNot, opGreater, opLess,
    opEqual, opNotEqual, opGreaterEqual, opLessEqual, opInc, opDec,
    opAddAss, opDecAss, opMulAss, opDivAss, opModAss,
    rwVoid, rwInt, rwChar, rwWord, rwRec, rwConst, rwBegin, rwEnd, rwAsm,
    rwIf, rwElse, rwElsif, rwWhile, rwDo, rwFor, rwTo, rwStep, rwInline,
    rwProc, rwPublic, rwExtern, rwInclude, rwBreak, rwNext,
    rwRet);

const
  AllowCRLF: Boolean = False;
  SavePos: LongInt = 0;

var
  Lexem: TLexem;
  StrConst: string[250];
  Identifier: string[16];
  IntConst: Integer;
  CharConst: Char;

function ReadLexem: TLexem;
procedure PredLexem;

implementation

const
  CR = #13;
  LF = #10;
  Tab = #9;
  CRLF = #2;
  TSpecial = ['=', '+', '-', '*', '/', '%', '(', ')', '[', ']', '^', '.',
              ',', ':', ';', '@' ,'&', '|', '!', '>', '<'];
  TLetter = ['A'..'Z', 'a'..'z', '_'];
  TDigit = ['0'..'9'];
  Symbol: Char = ' ';

procedure LoCase(var Character: Char);
begin
  if Character in ['A'..'Z'] then Inc(Character, 32);
end; {LoCase}

function ReadSymbol: Char;
begin
  if not EoF(SourceFile) then begin
    Read(SourceFile, Symbol);
    if Symbol > #31 then Inc(Column)
    else if Symbol = Tab then Inc(Column, 9 - Column mod 8);
  end else Symbol := #1;
  if Symbol = CR then begin
    Symbol := ReadSymbol;
    if Symbol = LF then begin
      Column := 1;
      Inc(Line);
      Symbol := CRLF;
    end;
  end;
  ReadSymbol := Symbol;
end; {ReadSymbol}

function NextSymbol: Char;
var C: Char;
begin
  if EoF(SourceFile) then NextSymbol := #1
  else begin
    Read(SourceFile, C);
    Seek(SourceFile, FilePos(SourceFile) - 1);
    NextSymbol := C;
  end
end; {NextSymbol}

procedure CheckComments;
begin
  if Symbol = '{' then repeat
    if ReadSymbol = #1 then Error(ecOpenComment);
    if Symbol = '{' then begin
      CheckComments;
      ReadSymbol;
    end;
  until Symbol = '}';
end; {CheckComments}

procedure ReadSpecial;
const
  LongSpecial: array[opEqual..opModAss] of string[2] =
  ('==', '<>', '>=', '<=', '++', '--', '+=', '-=', '*=', '/=', '%=');
  Special: array[opAssign..opLess] of Char =
  ('=', '+', '-', '*', '/', '%', '(', ')', '[', ']', '^','@' , '.', ',',
   ':', ';', '&', '|', '!', '>', '<');
var
  Symb: opEqual..opModAss;
  S: string[2];
begin
  Identifier := Symbol;
  for Lexem := opAssign to opLess do if Symbol = Special[Lexem] then Break;
  if NextSymbol in TSpecial then begin
    S := Symbol + NextSymbol;
    for Symb := opEqual to opModAss do if S = LongSpecial[Symb] then begin
      Lexem := Symb;
      ReadSymbol;
      Identifier := S;
    end;
  end;
end; {ReadSpecial}

procedure ReadIdentifier;
const
  ResWords: array[rwVoid..High(TLexem)] of string[8] =
  ('void', 'int', 'char', 'word', 'rec', 'const', 'begin', 'end', 'asm', 'if',
   'else', 'elsif', 'while', 'do', 'for', 'to', 'step', 'inline', 'proc',
   'public', 'extern', 'include', 'break', 'next', 'ret');
var
  R: TLexem;
  I: Byte absolute R;
  S: string[16];
begin
  { read string}
  S := Symbol;
  while NextSymbol in TLetter + TDigit do S := S + ReadSymbol;
  if not CaseSensetive then for I := 1 to Length(S) do LoCase(S[I]);
  Lexem := ltIdentifier;
  Identifier := S;
  { check reserved word }
  for R := rwVoid to High(TLexem) do if ResWords[R] = S then Lexem := R;
end; { ReadIdentifier }

procedure ReadIntConst;
var
  Code: Integer;
  S: string[16];
begin
  Lexem := ltIntConst;
  if (Symbol = '0') and (NextSymbol in ['b', 'B']) then begin
    ReadSymbol;
    IntConst := 0;
    while NextSymbol in ['0', '1'] do begin
      Inc(IntConst, IntConst);
      if ReadSymbol = '1' then Inc(IntConst, 1);
    end;
    Exit;
  end;
  if (Symbol = '0') and (NextSymbol in ['x', 'X']) then begin
    ReadSymbol;
    S := '$' end
  else S := Symbol;
  while NextSymbol in ['0'..'9', 'a'..'f', 'A'..'F'] do S := S + ReadSymbol;
  Val(S, IntConst, Code);
end; {ReadInteger}

procedure ReadChar;
begin
  Lexem := ltCharConst;
  StrConst := '';
  while ReadSymbol <> '''' do begin
    if Symbol = #1 then Error(ecExceedsLine)
    else StrConst := StrConst + Symbol;
  end;
  CharConst := StrConst[1];
  IntConst := Word(StrConst[1]);
  if Length(StrConst) <> 1 then Lexem := ltStrConst;
end; {ReadChar}

procedure ReadString;
begin
  Lexem := ltStrConst;
  StrConst := '';
  while ReadSymbol <> '"' do begin
    if Symbol = #1 then Error(ecExceedsLine)
    else StrConst := StrConst + Symbol;
  end;
  StrConst := StrConst + #0;
end; {ReadString}

function ReadLexem;
label Next;
begin
Next:
  SavePos := FilePos(SourceFile);
  SaveLine := Line;
  SaveColumn := Column;
  ReadSymbol;
  CheckComments;
  if Symbol in TSpecial then ReadSpecial
  else case Symbol of
    'A'..'Z', 'a'..'z', '_': ReadIdentifier;
    '0'..'9': ReadIntConst;
    '''': ReadChar;
    '"': ReadString;
    CRLF: if AllowCRLF then Lexem := ltCRLF else goto Next;
    #1: Lexem := ltEmpty;
  else goto Next end;
  ReadLexem := Lexem;
end; { ReadLexem }

procedure PredLexem;
begin
  Seek(SourceFile, SavePos);
  Line := SaveLine;
  Column := SaveColumn;
  Lexem := ltEmpty;
end;

end.