{$MODE OBJFPC} { -*- delphi -*- } {$INCLUDE settings.inc} program test_stringutils_utf8; uses utf8; procedure TestIsValidUTF8(Name, Value: UTF8String; Result: Boolean); begin if (IsValidUTF8(Value) <> Result) then Writeln('Failed test ', Name); end; begin TestIsValidUTF8('Empty', '', True); // based on http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt TestIsValidUTF8('1', 'Some correct UTF-8 text', True); TestIsValidUTF8('2.1.1', #$00, True); TestIsValidUTF8('2.1.2', #$C2#$80, True); TestIsValidUTF8('2.1.3', #$E0#$A0#$80, True); TestIsValidUTF8('2.1.4 (control)', #$F0#$A0#$80#$80, True); // U+20000 TestIsValidUTF8('2.1.4', #$F0#$90#$80#$80, True); // U+10000 TestIsValidUTF8('2.1.5', #$F8#$88#$80#$80#$80, False); // beyond U+10FFFF // 2.1.6 skipped TestIsValidUTF8('2.2.1', #$7F, True); TestIsValidUTF8('2.2.2', #$DF#$BF, True); TestIsValidUTF8('2.2.3', #$EF#$BF#$BF, True); TestIsValidUTF8('2.2.4', #$F7#$BF#$BF#$BF, False); // beyond U+10FFFF TestIsValidUTF8('2.2.5', #$FB#$BF#$BF#$BF#$BF, False); // beyond U+10FFFF TestIsValidUTF8('2.2.6', #$FD#$BF#$BF#$BF#$BF#$BF, False); // beyond U+10FFFF TestIsValidUTF8('2.3.1', #$ED#$9F#$BF, True); TestIsValidUTF8('2.3.2', #$EE#$80#$80, True); TestIsValidUTF8('2.3.3', #$EF#$BF#$BD, True); TestIsValidUTF8('2.3.4', #$F4#$8F#$BF#$BF, True); TestIsValidUTF8('2.3.5', #$F4#$90#$80#$80, False); // beyond U+10FFFF TestIsValidUTF8('3.1.1', #$80, False); TestIsValidUTF8('3.1.2', #$BF, False); TestIsValidUTF8('3.1.3', #$80#$BF, False); TestIsValidUTF8('3.1.4', #$80#$BF#$80, False); TestIsValidUTF8('3.1.5', #$80#$BF#$80#$BF, False); TestIsValidUTF8('3.1.6', #$80#$BF#$80#$BF#$80, False); TestIsValidUTF8('3.1.7', #$80#$BF#$80#$BF#$80#$BF, False); TestIsValidUTF8('3.1.8', #$80#$BF#$80#$BF#$80#$BF#$80, False); TestIsValidUTF8('3.1.9', #$81#$82#$83#$84#$85#$86#$87#$88, False); // truncated compared to original TestIsValidUTF8('3.2.1 c0', #$C0#$20, False); TestIsValidUTF8('3.2.1 c1', #$C1#$20, False); TestIsValidUTF8('3.2.1 c2', #$C2#$20, False); TestIsValidUTF8('3.2.1 c3', #$C3#$20, False); TestIsValidUTF8('3.2.1 c4', #$C4#$20, False); TestIsValidUTF8('3.2.1 c5', #$C5#$20, False); TestIsValidUTF8('3.2.1 c6', #$C6#$20, False); TestIsValidUTF8('3.2.1 c7', #$C7#$20, False); TestIsValidUTF8('3.2.1 c8', #$C8#$20, False); TestIsValidUTF8('3.2.1 c9', #$C9#$20, False); TestIsValidUTF8('3.2.1 ca', #$CA#$20, False); TestIsValidUTF8('3.2.1 cb', #$CB#$20, False); TestIsValidUTF8('3.2.1 cc', #$CC#$20, False); TestIsValidUTF8('3.2.1 cd', #$CE#$20, False); TestIsValidUTF8('3.2.1 ce', #$CE#$20, False); TestIsValidUTF8('3.2.1 cf', #$CF#$20, False); TestIsValidUTF8('3.2.1 d0', #$D0#$20, False); TestIsValidUTF8('3.2.1 d1', #$D1#$20, False); TestIsValidUTF8('3.2.1 d2', #$D2#$20, False); TestIsValidUTF8('3.2.1 d3', #$D3#$20, False); TestIsValidUTF8('3.2.1 d4', #$D4#$20, False); TestIsValidUTF8('3.2.1 d5', #$D5#$20, False); TestIsValidUTF8('3.2.1 d6', #$D6#$20, False); TestIsValidUTF8('3.2.1 d7', #$D7#$20, False); TestIsValidUTF8('3.2.1 d8', #$D8#$20, False); TestIsValidUTF8('3.2.1 d9', #$D9#$20, False); TestIsValidUTF8('3.2.1 da', #$DA#$20, False); TestIsValidUTF8('3.2.1 db', #$DB#$20, False); TestIsValidUTF8('3.2.1 dc', #$DC#$20, False); TestIsValidUTF8('3.2.1 dd', #$DD#$20, False); TestIsValidUTF8('3.2.1 de', #$DE#$20, False); TestIsValidUTF8('3.2.1 df', #$DF#$20, False); TestIsValidUTF8('3.2.2 e0', #$E0#$20, False); TestIsValidUTF8('3.2.2 e1', #$E1#$20, False); TestIsValidUTF8('3.2.2 e2', #$E2#$20, False); TestIsValidUTF8('3.2.2 e3', #$E3#$20, False); TestIsValidUTF8('3.2.2 e4', #$E4#$20, False); TestIsValidUTF8('3.2.2 e5', #$E5#$20, False); TestIsValidUTF8('3.2.2 e6', #$E6#$20, False); TestIsValidUTF8('3.2.2 e7', #$E7#$20, False); TestIsValidUTF8('3.2.2 e8', #$E8#$20, False); TestIsValidUTF8('3.2.2 e9', #$E9#$20, False); TestIsValidUTF8('3.2.2 ea', #$EA#$20, False); TestIsValidUTF8('3.2.2 eb', #$EB#$20, False); TestIsValidUTF8('3.2.2 ec', #$EC#$20, False); TestIsValidUTF8('3.2.2 ed', #$ED#$20, False); TestIsValidUTF8('3.2.2 ee', #$EE#$20, False); TestIsValidUTF8('3.2.2 ef', #$EF#$20, False); TestIsValidUTF8('3.2.3 f0', #$F0#$20, False); TestIsValidUTF8('3.2.3 f1', #$F1#$20, False); TestIsValidUTF8('3.2.3 f2', #$F2#$20, False); TestIsValidUTF8('3.2.3 f3', #$F3#$20, False); TestIsValidUTF8('3.2.3 f4', #$F4#$20, False); TestIsValidUTF8('3.2.3 f5', #$F5#$20, False); TestIsValidUTF8('3.2.3 f6', #$F6#$20, False); TestIsValidUTF8('3.2.3 f7', #$F7#$20, False); TestIsValidUTF8('3.2.4 f8', #$F8#$20, False); TestIsValidUTF8('3.2.4 f9', #$F9#$20, False); TestIsValidUTF8('3.2.4 fa', #$FA#$20, False); TestIsValidUTF8('3.2.4 fb', #$FB#$20, False); TestIsValidUTF8('3.2.5 fc', #$FC#$20, False); TestIsValidUTF8('3.2.5 fd', #$FD#$20, False); TestIsValidUTF8('3.3.1', #$C0, False); TestIsValidUTF8('3.3.2', #$E0#$80, False); TestIsValidUTF8('3.3.3', #$F0#$80#$80, False); TestIsValidUTF8('3.3.4', #$F8#$80#$80#$80, False); TestIsValidUTF8('3.3.5', #$FC#$80#$80#$80#$80, False); TestIsValidUTF8('3.3.6', #$DF, False); TestIsValidUTF8('3.3.7', #$EF#$BF, False); TestIsValidUTF8('3.3.8', #$F7#$BF#$BF, False); TestIsValidUTF8('3.3.9', #$FB#$BF#$BF#$BF, False); TestIsValidUTF8('3.3.10', #$FD#$BF#$BF#$BF#$BF, False); TestIsValidUTF8('3.3.1 with space', #$C0#$20, False); TestIsValidUTF8('3.3.2 with space', #$E0#$80#$20, False); TestIsValidUTF8('3.3.3 with space', #$F0#$80#$80#$20, False); TestIsValidUTF8('3.3.4 with space', #$F8#$80#$80#$80#$20, False); TestIsValidUTF8('3.3.5 with space', #$FC#$80#$80#$80#$80#$20, False); TestIsValidUTF8('3.3.6 with space', #$DF#$20, False); TestIsValidUTF8('3.3.7 with space', #$EF#$BF#$20, False); TestIsValidUTF8('3.3.8 with space', #$F7#$BF#$BF#$20, False); TestIsValidUTF8('3.3.9 with space', #$FB#$BF#$BF#$BF#$20, False); TestIsValidUTF8('3.3.10 with space', #$FD#$BF#$BF#$BF#$BF#$20, False); // 3.4 skipped TestIsValidUTF8('3.5.1', #$FE, False); TestIsValidUTF8('3.5.2', #$FD, False); TestIsValidUTF8('3.5.3', #$FE#$FE#$FF#$FF, False); TestIsValidUTF8('4.1.1 (control)', #$2F, True); TestIsValidUTF8('4.1.1', #$C0#$AF, False); TestIsValidUTF8('4.1.2', #$E0#$80#$AF, False); TestIsValidUTF8('4.1.3', #$F0#$80#$80#$AF, False); TestIsValidUTF8('4.1.4', #$F8#$80#$80#$80#$AF, False); TestIsValidUTF8('4.1.5', #$FC#$80#$80#$80#$80#$AF, False); TestIsValidUTF8('4.2.1 (control)', #$7F, True); TestIsValidUTF8('4.2.1', #$C1#$BF, False); TestIsValidUTF8('4.2.2', #$E0#$9F#$BF, False); TestIsValidUTF8('4.2.3', #$F0#$8F#$BF#$BF, False); TestIsValidUTF8('4.2.4', #$F8#$87#$BF#$BF#$BF, False); TestIsValidUTF8('4.2.5', #$FC#$83#$BF#$BF#$BF#$BF, False); TestIsValidUTF8('4.3.1 (control)', #$00, True); TestIsValidUTF8('4.3.1', #$C0#$80, False); TestIsValidUTF8('4.3.2', #$E0#$80#$80, False); TestIsValidUTF8('4.3.3', #$F0#$80#$80#$80, False); TestIsValidUTF8('4.3.4', #$F8#$80#$80#$80#$80, False); TestIsValidUTF8('4.3.5', #$FC#$80#$80#$80#$80#$80, False); TestIsValidUTF8('5.1.1', #$ED#$A0#$80, False); TestIsValidUTF8('5.1.2', #$ED#$AD#$BF, False); TestIsValidUTF8('5.1.3', #$ED#$AE#$80, False); TestIsValidUTF8('5.1.4', #$ED#$AF#$BF, False); TestIsValidUTF8('5.1.5', #$ED#$B0#$80, False); TestIsValidUTF8('5.1.6', #$ED#$BE#$80, False); TestIsValidUTF8('5.1.7', #$ED#$BF#$BF, False); TestIsValidUTF8('5.2.1', #$ED#$A0#$80#$ED#$B0#$80, False); TestIsValidUTF8('5.2.2', #$ED#$A0#$80#$ED#$BF#$BF, False); TestIsValidUTF8('5.2.3', #$ED#$AD#$BF#$ED#$B0#$80, False); TestIsValidUTF8('5.2.4', #$ED#$AD#$BF#$ED#$BF#$BF, False); TestIsValidUTF8('5.2.5', #$ED#$AE#$80#$ED#$B0#$80, False); TestIsValidUTF8('5.2.6', #$ED#$AE#$80#$ED#$BF#$BF, False); TestIsValidUTF8('5.2.7', #$ED#$AF#$BF#$ED#$B0#$80, False); TestIsValidUTF8('5.2.8', #$ED#$AF#$BF#$ED#$BF#$BF, False); TestIsValidUTF8('3.5.1', #$EF#$BF#$BE, True); // invalid Unicode character but we don't consider it invalid UTF-8 TestIsValidUTF8('3.5.2', #$EF#$BF#$BF, True); // invalid Unicode character but we don't consider it invalid UTF-8 end.