{$MODE OBJFPC} { -*- delphi -*- } {$INCLUDE settings.inc} unit tokenutils; interface {$IFOPT C+} const kBlank = High(QWord); {$ENDIF} // XXX support TGravitationalSystemValue with explicit required units, e.g. "1.4kg/m^3" // XXX support reading these out like a stream type {$IFOPT C+} TBlank = kBlank..kBlank; {$ENDIF} TTokenType = (ttNone, ttInteger, ttString, ttComma, ttError); TToken = record case TokenType: TTokenType of ttNone: ({$IFOPT C+} Blank: TBlank {$ENDIF}); ttInteger: (IntegerValue: Integer); ttString: (StringValue: ^UTF8String); ttComma: (); ttError: (); end; TTokenArray = array of TToken; function GetLineTokens(Line: UTF8String; StopWord: UTF8String = ''): TTokenArray; procedure FreeLineTokens(var TokenArray: TTokenArray); implementation uses sysutils; function GetLineTokens(Line: UTF8String; StopWord: UTF8String = ''): TTokenArray; type TTokeniserMode = (tmStart, tmNumeric, tmWord, tmQuoted); var Mode: TTokeniserMode; StartIndex, Index: Cardinal; Token: TToken; SawStopWord: Boolean; procedure Start(NewMode: TTokeniserMode); begin Assert(Token.TokenType = ttNone); StartIndex := Index; Mode := NewMode; end; procedure PushToken(); begin Assert(Token.TokenType in [ttInteger, ttString, ttComma, ttError]); SetLength(Result, Length(Result)+1); Result[High(Result)] := Token; Token.TokenType := ttNone; {$IFOPT C+} Token.Blank := kBlank; {$ENDIF} Mode := tmStart; end; procedure Error(); begin Token.TokenType := ttError; PushToken(); Assert(Mode = tmStart); end; procedure FinishInteger(); var FailPos: Integer; begin Assert(Token.TokenType = ttNone); Token.TokenType := ttInteger; FailPos := -1; {$IFOPT R-} {$DEFINE range_checks_off} {$RANGECHECKS ON} {$ENDIF} try Val(Copy(Line, StartIndex, Index-StartIndex), Token.IntegerValue, FailPos); except on ERangeError do ; end; {$IFDEF range_checks_off} {$RANGECHECKS OFF} {$UNDEF range_checks_off} {$ENDIF} if (FailPos <> 0) then Error() else PushToken(); Assert(Mode = tmStart); end; procedure FinishWord(); begin Assert(Token.TokenType = ttNone); Token.TokenType := ttString; Assert(Index > StartIndex); New(Token.StringValue); Assert(Token.StringValue^ = ''); Token.StringValue^ := Copy(Line, StartIndex, Index-StartIndex); if (Token.StringValue^ = StopWord) then begin Assert(StopWord <> ''); SawStopWord := True; end; PushToken(); Assert(Mode = tmStart); end; procedure FinishQuotedString(); begin Assert(Token.TokenType = ttNone); Token.TokenType := ttString; Assert(Index > StartIndex); Assert(Line[StartIndex] = '"'); Assert(Line[Index] = '"'); New(Token.StringValue); Assert(Token.StringValue^ = ''); if (Index-StartIndex > 2) then Token.StringValue^ := Copy(Line, StartIndex+1, Index-StartIndex-1); PushToken(); Assert(Mode = tmStart); end; procedure Comma(); begin Assert(Token.TokenType = ttNone); Token.TokenType := ttComma; PushToken(); Assert(Mode = tmStart); end; begin SetLength(Result, 0); Token.TokenType := ttNone; {$IFOPT C+} Token.Blank := kBlank; {$ENDIF} if (Length(Line) > 0) then begin SawStopWord := False; Mode := tmStart; for Index := 1 to Length(Line) do // $R- begin case Mode of tmStart: case Line[Index] of ' ', #9: ; '0'..'9', '-': begin if (SawStopWord) then begin Start(tmWord); Break; end else Start(tmNumeric); end; '"': begin if (SawStopWord) then Error(); Start(tmQuoted); end; ',': Error(); else begin Start(tmWord); if (SawStopWord) then Break; end; end; tmNumeric: case Line[Index] of ' ', #9: FinishInteger(); '0'..'9': ; '-': begin FinishInteger(); Error(); Start(tmNumeric); end; '"': begin FinishInteger(); Error(); Start(tmQuoted); end; ',': begin FinishInteger(); Comma(); end; else begin FinishInteger(); Error(); Start(tmWord); if (SawStopWord) then Break; end; end; tmWord: case Line[Index] of ' ', #9: begin Assert(not SawStopWord); FinishWord(); end; '"': begin Assert(not SawStopWord); FinishWord(); if (SawStopWord) then Error(); Start(tmQuoted); end; ',': begin Assert(not SawStopWord); FinishWord(); if (SawStopWord) then Error(); Comma(); end; end; tmQuoted: case Line[Index] of '"': begin Assert(not SawStopWord); FinishQuotedString(); end; end; end; end; Index := Length(Line)+1; // $R- case Mode of tmStart: ; tmNumeric: FinishInteger(); tmWord: FinishWord(); tmQuoted: begin Error(); FinishWord(); end; end; end; Assert(Token.TokenType = ttNone); end; procedure FreeLineTokens(var TokenArray: TTokenArray); var Index: Cardinal; begin if (Length(TokenArray) > 0) then begin for Index := Low(TokenArray) to High(TokenArray) do // $R- begin if (TokenArray[Index].TokenType = ttString) then Dispose(TokenArray[Index].StringValue); end; SetLength(TokenArray, 0); end; end; end.