Unit CastleStringUtils

DescriptionUsesClasses, Interfaces, Objects and RecordsFunctions and ProceduresTypesConstantsVariables

Description

String utilities. Also some operations on chars and PChars. And various convertions strings<->numbers.

General comments for all procedures that have parameter like IgnoreCase:

  • If such parameter has some default value, this default value should be

    True

    for procedures that only read processed string

    False

    for procedures that can modify processed string (for safety, so that accidental modification should be harder)

  • If I don't write in docs for this procedure whether this procedure takes current locale into account (as current locale can change the meaning of "ignoring case"), then it means it does take current locale into account.

Uses

Overview

Classes, Interfaces, Objects and Records

Name Description
Class TCastleStringList List of strings.
Class EDeformatError  
record TPercentReplace  
Class EUnknownPercentFormat  

Functions and Procedures

function RandomString: string;
procedure StringReplaceAllTo1st(var S: string; const FromPattern, ToPattern: string; IgnoreCase: boolean = true); overload;
function BreakLine(const s: string; MaxCol: integer; onbreakChars: TSetOfChars = WhiteSpaces): string; overload;
function SDeleteChars(const s: string; const excludedChars: TSetOfChars): string;
function SReplaceChars(const s, FromChars, ToChars: string): string; overload;
function SReplaceChars(const s: string; FromChars: TSetOfChars; ToChar: char): string; overload;
function SReplaceChars(const s: string; FromChar, ToChar: char): string; overload;
function SPad(const s: string; len: integer; c: char = ' '): string; overload;
function SZeroPad(const s: string; len: integer): string;
function LoCase(c: char): char;
function CharPos(c: char; const s: string; Offset: Integer = 1): integer;
function CharsPos(const chars: TSetOfChars; const s: string): integer;
function CharsPosEx(const chars: TSetOfChars; const s: string; Offset: Integer): integer;
function BackCharsPos(const chars: TSetOfChars; const s: string): integer;
function BackPos(const SubString, S: string): Integer; overload;
function BackPos(const SubString: char; const S: string): Integer; overload;
function FirstDelimiter(const Delimiters, S: string): Integer;
function SEnding(const s: string; P: integer): string;
function IsPrefix(const Prefix, S: string; IgnoreCase: boolean = true): boolean; overload;
function IsSuffix(const Suffix, S: string; IgnoreCase: boolean = true): boolean; overload;
function PrefixRemove(const Prefix, S: string; IgnoreCase: boolean): string;
function SuffixRemove(const Suffix, S: string; IgnoreCase: boolean): string;
procedure SAppendData(var s: string; const Data; DataSize: integer);
function SChar(const s: string; CharNum: integer): PChar;
function SCharIs(const s: string; index: integer; c: char): boolean; overload;
function SCharIs(const s: string; index: integer; const chars: TSetOfChars): boolean; overload;
function SReadableForm(const s: string): string;
function CopyPos(const s: string; StartPosition, EndPosition: integer): string;
procedure DeletePos(var S: string; StartPosition, EndPosition: Integer);
function NextToken(const S: string; var SeekPos: Integer; const TokenDelims: TSetOfChars = WhiteSpaces): string;
function NextTokenOnce(const s: string; SeekPos: integer = 1; const TokenDelims: TSetOfChars = WhiteSpaces): string; overload;
function CreateTokens(const s: string; const TokenDelims: TSetOfChars = WhiteSpaces): TCastleStringList;
function NextTokenRestr(const s: string; var SeekPos: integer; const TokenDelims: TSetOfChars = WhiteSpaces; const RestrAreas: TSetOfChars = ['''','"']): string; overload;
function FindPos(const SubText, Text: string; StartPosition, Count: integer; Options: TSearchOptions; const WordBorders: TSetOfChars = DefaultWordBorders): integer; overload;
function MatchingFind(const SubText, Text: string; MatchStart, MatchLength: integer; Options: TSearchOptions; const WordBorders: TSetOfChars): boolean; overload;
function MatchingFind(const SubText, Text: string; MatchStart, MatchLength: integer; matchCase, wholeWord: boolean; const WordBorders: TSetOfChars): boolean; overload;
function FindWordPos(const SubText, Text: string; const WordBorders: TSetOfChars = DefaultWordBorders): integer; overload;
function GetWordAtPos(const Text: string; Position: integer; const WordBorders: TSetOfChars = DefaultWordBorders): string; overload;
function SRight(const s: string; const rpart: integer): string;
function SAppendPart(const s, PartSeparator, NextPart: string): string;
function FileToString(const FileName: string; const AllowStdIn: boolean = false): string;
procedure StringToFile(const FileName, contents: string);
procedure DeFormat(Data: string; const Format: string; const args: array of pointer; const IgnoreCase: boolean = true; const RelaxedWhitespaceChecking: boolean = true); overload;
function TryDeFormat(Data: string; const Format: string; const args: array of pointer; const IgnoreCase: boolean = true; const RelaxedWhitespaceChecking: boolean = true): integer; overload;
procedure GetFileFilterExts(const FileFilter: string; Extensions: TStringList);
function GetFileFilterName(const FileFilter: string): string;
function GetFileFilterExtsStr(const FileFilter: string): string;
function SReplacePatterns(const s: string; const patterns, values: array of string; Options: TSearchOptions): string;
function SCharsCount(const s: string; c: char): Cardinal; overload;
function SCharsCount(const s: string; const Chars: TSetOfChars): Cardinal; overload;
function STruncateHash(const s: string): string;
function SUnformattable(const s: string): string;
function SAnsiCompare(const s1, s2: string; IgnoreCase: boolean): Integer;
function SAnsiSame(const s1, s2: string; IgnoreCase: boolean): boolean;
function SPercentReplace(const InitialFormat: string; const Replaces: array of TPercentReplace; out ReplacementsDone: Cardinal; ErrorOnUnknownPercentFormat: boolean = true; PercentChar: char ='%'; IgnoreCase: boolean = false): string; overload;
function SPercentReplace(const InitialFormat: string; const Replaces: array of TPercentReplace; ErrorOnUnknownPercentFormat: boolean = true; PercentChar: char ='%'; IgnoreCase: boolean = false): string; overload;
function FormatIndexedName(const NamePattern: string; const Index: Integer; out ReplacementsDone: Cardinal): string; overload;
function FormatIndexedName(const NamePattern: string; const Index: Integer): string; overload;
function AnsiUpperCaseChar(C: char): char;
function AnsiLowerCaseChar(C: char): char;
function SAnsiUpperFirstChar(const S: string): string;
function DigitAsChar(b: byte): char;
function DigitAsByte(c: char): byte;
function IntToStrZPad(n: integer; minLength: integer): string;
function IntToStrBase(const n: Int64; Base: Byte): string; overload;
function IntToStrBase( n: QWord; Base: Byte): string; overload;
function IntToStrBase(const n: Int64; Base: Byte; minLength: Cardinal): string; overload;
function IntToStrBase(const n: QWord; Base: Byte; minLength: Cardinal): string; overload;
function IntToStr2(n: Int64; const MinLength: Cardinal = 1; const ZeroDigit: char = '0'; const OneDigit: char = '1'; const MinusSign: char = '-'): string; overload;
function IntToStr16(const n: Int64; const minLength: Cardinal = 1): string; overload;
function IntToStr16(const n: QWord; const minLength: Cardinal = 1): string; overload;
function ToStr(const args: array of const): string;
function VarRecToStr(const v: TVarRec): string;
function PointerToStr(Ptr: Pointer): string;
function Str2ToInt(const s: string): integer;
function StrHexToInt(const s: string): Int64;
function StrToFloatDef(const s: string; DefValue: Extended): Extended;
function SetToStr(const SetVariable; NumStart, NumEnd: byte): string;
function PCharOrNil(const s: string): PChar;
function CharToNiceStr(c: char; BackSpaceTabEnterString: boolean = true): string;
function SCompressWhiteSpace(const S: string): string;

Types

TSearchOptions = set of (soMatchCase, soWholeWord, soBackwards);
TSetOfChars = SysUtils.TSysCharSet;

Constants

AllChars = [Low(Char) .. High(Char)];
DefaultWordBorders = AllChars - ['a'..'z', 'A'..'Z', '0'..'9', '_'];
WhiteSpaces = [' ', #9, #10, #13];
BoolToStr: array[boolean] of string=('FALSE','TRUE');
BoolToStrYesNo: array[boolean]of string = ('No','Yes');
CtrlA = Chr(Ord('a') - Ord('a') + 1);
CtrlB = Chr(Ord('b') - Ord('a') + 1);
CtrlC = Chr(Ord('c') - Ord('a') + 1);
CtrlD = Chr(Ord('d') - Ord('a') + 1);
CtrlE = Chr(Ord('e') - Ord('a') + 1);
CtrlF = Chr(Ord('f') - Ord('a') + 1);
CtrlG = Chr(Ord('g') - Ord('a') + 1);
CtrlH = Chr(Ord('h') - Ord('a') + 1);
CtrlI = Chr(Ord('i') - Ord('a') + 1);
CtrlJ = Chr(Ord('j') - Ord('a') + 1);
CtrlK = Chr(Ord('k') - Ord('a') + 1);
CtrlL = Chr(Ord('l') - Ord('a') + 1);
CtrlM = Chr(Ord('m') - Ord('a') + 1);
CtrlN = Chr(Ord('n') - Ord('a') + 1);
CtrlO = Chr(Ord('o') - Ord('a') + 1);
CtrlP = Chr(Ord('p') - Ord('a') + 1);
CtrlQ = Chr(Ord('q') - Ord('a') + 1);
CtrlR = Chr(Ord('r') - Ord('a') + 1);
CtrlS = Chr(Ord('s') - Ord('a') + 1);
CtrlT = Chr(Ord('t') - Ord('a') + 1);
CtrlU = Chr(Ord('u') - Ord('a') + 1);
CtrlV = Chr(Ord('v') - Ord('a') + 1);
CtrlW = Chr(Ord('w') - Ord('a') + 1);
CtrlX = Chr(Ord('x') - Ord('a') + 1);
CtrlY = Chr(Ord('y') - Ord('a') + 1);
CtrlZ = Chr(Ord('z') - Ord('a') + 1);
CharBackSpace = #8;
CharTab = #9;
CharEnter = #13;
CharEscape = #27;
CharDelete = #127;

Description

Functions and Procedures

function RandomString: string;
 
procedure StringReplaceAllTo1st(var S: string; const FromPattern, ToPattern: string; IgnoreCase: boolean = true); overload;

Replace all occurrences of FromPattern string to ToPattern string, within another string S.

StringReplaceAllTo1st(s, from, to) is actually equivalent to simply s := StringReplace(s, from, to, [rfReplaceAll, rfIgnoreCase]). So StringReplaceAllTo1st is just a wrapper for very common use case of StringReplace.

function BreakLine(const s: string; MaxCol: integer; onbreakChars: TSetOfChars = WhiteSpaces): string; overload;

Insert newline characters into string S, such that each line has at most MaxCol chars. Newline characters inserted is NL.

It tries to insert NL at the last character in OnBreakChars but still before MaxCol limit, and the character in OnBreakChars is deleted in this case. In other words, in most typical situation it simply breaks the string where the whitespace is, trying to make the line as long as possible within MaxCol limit. If no such character in OnBreakChars is found (e.g., you put a long line of non-white characters), it will still break the string at MaxCol position (so in this exceptional case, it will cause a break in the middle of the word).

While breaking the string in the middle of the word in not nice, this allows us a safe feeling that this will always break the string into MaxCol chunks.

This intelligently recognizes already existing newline characters (#13, #10, #13#10 or #10#13) in the string, so e.g. it will not insert more newline characters when they are not necessary.

function SDeleteChars(const s: string; const excludedChars: TSetOfChars): string;

Returns S with all chars in ExcludedChars deleted.

function SReplaceChars(const s, FromChars, ToChars: string): string; overload;

Replace all occurrences of characters in FromChars with the new string / character. There are three overloaded versions:

  1. SReplaceChars(string, string, string) looks in S for characters within FromChars, and replaces them with characters on appropriate position in ToChars. For example, SReplaceChars(S, 'ab', 'cd') replaces all occurrences of 'a' into 'c' and all occurrences of 'b' into 'd'. It must always be Length(FromChars) <= Length(ToChars).

  2. SReplaceChars(string, TSetOfChars, char) replaces all occurrences of any character in given set with the one specified character.

  3. SReplaceChars(string, char, char) simply replaces all occurrences of one character into another.

function SReplaceChars(const s: string; FromChars: TSetOfChars; ToChar: char): string; overload;
 
function SReplaceChars(const s: string; FromChar, ToChar: char): string; overload;
 
function SPad(const s: string; len: integer; c: char = ' '): string; overload;

Pad (fill from the left with character C) string S, until length of resulting string is at least Len.

For example, SPad('29', 4, '0') gives '0029'

function SZeroPad(const s: string; len: integer): string;

Pad (fill from the left) with zeros string S, until length of resulting string is at least Len. It's actually just a shortcut for SPad with padding character set to '0'.

function LoCase(c: char): char;

Convert uppercase letters to lowercase. Analogous to UpCase. Doesn't change other characters. Just like UpCase, this doesn't take current locale into account, and works only on English A-Z -> a-z letters.

function CharPos(c: char; const s: string; Offset: Integer = 1): integer;
 
function CharsPos(const chars: TSetOfChars; const s: string): integer;

Find first occurrence of any character in Chars in string S. This is quite like FirstDelimiter but it takes parameter as TSetOfChars and has much more sensible name.

BackCharsPos does the same, but from the end of the string (i.e. finds the last occurrence).

CharsPosEx searches starting from Offset char.

They all return 0 if not found.

function CharsPosEx(const chars: TSetOfChars; const s: string; Offset: Integer): integer;
 
function BackCharsPos(const chars: TSetOfChars; const s: string): integer;
 
function BackPos(const SubString, S: string): Integer; overload;

Find last occurrence of SubString within S. 0 if not found. Overloaded version is optimized for searching for single character.

function BackPos(const SubString: char; const S: string): Integer; overload;
 
function FirstDelimiter(const Delimiters, S: string): Integer;

Find first occurrence of character in Delimiters. Name is analogous to LastDelimiter. Returns 0 if not found.

function SEnding(const s: string; P: integer): string;

Returns suffix of S starting from position P. Returns '' if P > length(S). Yes, this is simply equivalent to Copy(S, P, MaxInt).

function IsPrefix(const Prefix, S: string; IgnoreCase: boolean = true): boolean; overload;
 
function IsSuffix(const Suffix, S: string; IgnoreCase: boolean = true): boolean; overload;
 
function PrefixRemove(const Prefix, S: string; IgnoreCase: boolean): string;

Removes the prefix, if it is present. More precisely, if IsPrefix(Prefix, S, IgnoreCase) then returns S with this prefix removed. Else returns S.

function SuffixRemove(const Suffix, S: string; IgnoreCase: boolean): string;

Like PrefixRemove, but checks for and removes Suffix.

procedure SAppendData(var s: string; const Data; DataSize: integer);

Appends to a string S DataSize bytes from Data.

function SChar(const s: string; CharNum: integer): PChar;

A pointer to S[CharNum], that is just @S[CharNum], avoiding range checking.

function SCharIs(const s: string; index: integer; c: char): boolean; overload;

Check whether S[Index] = C, also checking is Index within S length. Return false if S is too short, or the chatacter differs.

function SCharIs(const s: string; index: integer; const chars: TSetOfChars): boolean; overload;
 
function SReadableForm(const s: string): string;

Replace typically unreadable characters in string S with #number notation. Useful for printing strings with some unprintable chars for debugging purposes.

function CopyPos(const s: string; StartPosition, EndPosition: integer): string;

Return S[StartPosition..EndPosition]. This is similar to standard Copy procedure, but last parameter is EndPosition instead of Count, which is more comfortable sometimes.

procedure DeletePos(var S: string; StartPosition, EndPosition: Integer);

Delete from S range of characters [StartPosition..EndPosition]. Analogous to standard Delete but with EndPosition parameter (while standard Delete takes Count).

function NextToken(const S: string; var SeekPos: Integer; const TokenDelims: TSetOfChars = WhiteSpaces): string;

Find next part in the string S separated by delimiters TokenDelims. More precisely: search S, starting from position SeekPos, for the first character that is not in TokenDelims. Then, all subsequent characters that are not in TokenDelims are appended to the Result, until any character is in TokenDelims is found. In effect, Result contains the whole part that was in TokenDelims.

SeekPos is advanced to the position of the next character, i.e. the character right after the ending character that was in TokenDelims. In other words, SeekPos points to the position of the next "unprocessed" character in string S. Often you will want to make another call to NextToken, passing this SeekPos, and this way you can split your string S into parts delimited by TokenDelims.

Returns '' if no more tokens available (SeekPos value at the end is unspecified).

Typical use scenario (iterate over all tokens in the string) :

  SeekPos := 1;
  repeat
    Token := NextToken(S, SeekPos);
    if Token = '' then break;
    { ... process_next_token (Token) ... }
  until false;

The above example will split the string into parts separated by whitespace.

Note: it's much easier to use CreateTokens instead of this procedure. But this procedure gives you quite more flexibility.

function NextTokenOnce(const s: string; SeekPos: integer = 1; const TokenDelims: TSetOfChars = WhiteSpaces): string; overload;

NextTokenOnce works just like NextToken, but doesn't advance the SeekPos position. This means that it's quite useless when you're interested in all tokens inside some string, but it's also more comfortable when you're interested in only one token inside some string. When SeekPos = 1, this is the first token.

function CreateTokens(const s: string; const TokenDelims: TSetOfChars = WhiteSpaces): TCastleStringList;

Returns TCastleStringList with tokens extracted from S. Token is something delimited by TokenDelims. TokenDelims are not contained in resulting items. E.g. CreateTokens('foo, bar', [' ', ',']) returns TCastleStringList with 2 items: 'foo' and 'bar'.

function NextTokenRestr(const s: string; var SeekPos: integer; const TokenDelims: TSetOfChars = WhiteSpaces; const RestrAreas: TSetOfChars = ['''','"']): string; overload;

Advanced version of NextToken, that avoids splitting string inside pairs of "restricted" characters, like quotes.

Basically, just like NextToken, this finds the next part in the string S separated by delimiters TokenDelims. Looks for the first character not in TokenDelims (starting from position SeekPos), and then reads the string up to the character in TokenDelims. But the characters surrounded by a pair of same characters from RestrAreas are never treated like TokenDelims.

This way you can e.g. split a string by whitespaces, but still request that whitespaces inside ' and ' or between " and " be ignored (not split on them).

For example, with default values for TokenDelims and RestrAreas, you can reliably split XML attributes like

  <foo val1='value in single quotes'
       val2="value in double quotes"
       val3='value in single quotes, double quote inside (") ignored'
       val4=value_without_the_quotes
  />

Parsing such string with NextTokenRestr will result in six tokens returned: '<foo', then one token for each valX=..., then '/>'.

function FindPos(const SubText, Text: string; StartPosition, Count: integer; Options: TSearchOptions; const WordBorders: TSetOfChars = DefaultWordBorders): integer; overload;

Find substring SubText within Text. Returns 0 if not found. Similar to a standard Pos function, with some improvements.

Parameters
StartPosition
Starts searching for SubText starting from this position. Note that the resulting position is still returned with respect to the string beginning. Just like standard PosEx.
Count
Looks only at Count characters from Text. You can say that the search is done only within Copy(Text, StartPosition, Count).
Options
Various searching options:

  • soMatchCase: makes searching case-sensitive (by default, case is ignored, taking locale into account).

  • soWholeWord: looks only for SubText occurrences surrounded by characters from WordBorders (or the beginning/end of Text).

    Note that, while the beginning/end of Text is always treated like a word border, but the mere beginning/end of the searching range (StartPosition, Count) is not a word border. For example FindPos('cat', 'foocat dog', 4, MaxInt, [soWholeWord]) will answer 0 (not found), because the only 'cat' occurrence is not surrounded by default word borders.

  • soBackwards: search from the end, that is return rightmost found occurrence.

function MatchingFind(const SubText, Text: string; MatchStart, MatchLength: integer; Options: TSearchOptions; const WordBorders: TSetOfChars): boolean; overload;

Check is given match (MatchStart, MatchLength) good result of FindPos call for the same arguments.

In other words, this checks something more than just the equality of Copy(Text, MatchStart, MatchLength) with SubText. This precisely checks if FindPos(SubText, Text, Options, WordBorders) would return given MatchStart (and MatchLength is equal to length of SubText). For example if soMatchCase in Options, then SubText is compared case-sensitive. If soWholeWords in Options, then given match must be surrounded by WordBorders in Text. And so on.

The typical usage for this is when you make an interactive text editor application, and you have some text seleected, and you have to check could this selection be done by previous FindPos successful search.

function MatchingFind(const SubText, Text: string; MatchStart, MatchLength: integer; matchCase, wholeWord: boolean; const WordBorders: TSetOfChars): boolean; overload;
 
function FindWordPos(const SubText, Text: string; const WordBorders: TSetOfChars = DefaultWordBorders): integer; overload;

Find substring SubText within Text, requiring SubText to be surrounded by WordBorders. Always case-sensitive. Returns 0 if not found.

This is equivalent to FindPos(SubText, Text, 1, Length(Text), [soWholeWord, soMatchCase], WordBorders). The only difference between standard Pos function is that this looks only for "words" — that is, occurrences surrounded by WordBorders.

function GetWordAtPos(const Text: string; Position: integer; const WordBorders: TSetOfChars = DefaultWordBorders): string; overload;

Return word surrounding Position inside Text. A "word" is determined by looking around Text[Position], as far as you can, until the beginning/end of Text is found or character in WordBorders.

function SRight(const s: string; const rpart: integer): string;

Return rightmost RPart characters from S. If RPart > Length(S) then returns S.

function SAppendPart(const s, PartSeparator, NextPart: string): string;

If S = '' then returns NextPart, else returns S + PartSeparator + NextPart.

function FileToString(const FileName: string; const AllowStdIn: boolean = false): string;

Read whole file contents to string.

If AllowStdIn, then FileName = '-' (one dash) is treated specially: we will read stdin whole (Pascal Input) stream. Note that the current implementation of this always changes newline into NL (current OS newline), and may add additional newline at the end of the file (this may be fixed, to return more accurately stdin contents; for usual text file reading, this doesn't matter).

procedure StringToFile(const FileName, contents: string);
 
procedure DeFormat(Data: string; const Format: string; const args: array of pointer; const IgnoreCase: boolean = true; const RelaxedWhitespaceChecking: boolean = true); overload;

Parse a string according to the given format, returning the values corresponding to placeholders %x in format string.

Format parameter is a sequence of white spaces, placeholders like %d or %f, and other characters. More precisely:

  • If RelaxedWhitespaceChecking = True (that's the default value) then 1 or more white spaces in Format must correspond to 1 or more any whitespace characters in Data. I.e., the actual number and kind of whitespace in Format and Data doesn't have to match — it's only important that some whitespace in Format correspond to some whitespace in Data.

  • %d in Format means an integer value (possibly signed) in Data. Args should have a pointer to Integer variable on the appropriate position.

  • %f in Format means a float value (possibly signed, possibly with a dot) in Data. Args should have a pointer to Float variable on the appropriate position.

  • %.single., %.double., %.extended. are like %f, but they specify appropriate variable type in Args. Since DeFormat can't check the type validity of your pointers, always be sure to pass in Args pointers to appropriate types.

  • %s in Format means a string (will end on the first whitespace) in Data. Args should contain a pointer to an AnsiString on the appropriate position. Note that I mean it — a pointer to an AnsiString, not just a string typecasted into a pointer. I.e., if S is AnsiString, Args should contain @S, not Pointer(S).

    Note that a string may be empty in some cases, e.g. Format = '%d %s' and Data = '123 ' will result in the empty string as second Args.

  • %% in Format means a one % sign in Data.

  • All the other characters (non-white, not %x sequences above) should be present in Data exactly like they are specified in Format. IgnoreCase controls is the letter case checked. When RelaxedWhitespaceChecking = False then white-space characters are treated just like non-white chars: they must match exactly between Format and Data.

Format must always match the whole Data — in other words, when we finished reading the Format, Data should be finished too. The exception is at the beginning and end of Data, if RelaxedWhitespaceChecking = True : then at the beginning and end of Data any number of white-space is allowed.

For DeFormat, the opposite must also be true: when we finished reading Data, Format should be finished too. However, for TryDeFormat, it's allowed for Data to end prematurely. TryDeFormat returns how many Args were initialized.

Note that while usually you will want RelaxedWhitespaceChecking = True, sometimes it can be needed to set this to False not only to get strickter checking, but also to get some things matching that otherwise wouldn't match. For example, consider Data = 'first second apple' and Format = 'first %s second %s'. With RelaxedWhitespaceChecking these things do not match — because the 1st space character in the Format string "consumes" the 1st and 2nd space characters in the Data. Then '%s' is matched to the word 'second', and the word 'second' is compared with 'apple' and they do not match. If you want such Data and Format to match, you must pass RelaxedWhitespaceChecking = True. Then the first '%s' will be matched to '' (empty string).

This was written because both JclSscanf and scanf units were buggy. (see openGL.testy/nehe10).

Exceptions raised
EDeformatError
In case of any error — mismatch between Format and Data. Note that in case of error, some of Args may be initialized, and some not — no guarantees here, sorry.
function TryDeFormat(Data: string; const Format: string; const args: array of pointer; const IgnoreCase: boolean = true; const RelaxedWhitespaceChecking: boolean = true): integer; overload;
 
procedure GetFileFilterExts(const FileFilter: string; Extensions: TStringList);

Extract file extensions from a file filter usually specified a TOpenDialog.Filter value.

More precisely: expects FileFilter to be in the form of 'xxxx|name1.ext1;name2.ext2'. Where "xxxx" is just about anything (it is ignored), and in fact whole "xxxx|" (with bar) may be omitted. The rest (after "|") is treated as a filename list, separated by semicolon ";".

As Extensions contents, we set an array of all extensions extracted from these filenames. For example above, we would set Extensions to array with two items: ['.ext1', '.ext2'].

function GetFileFilterName(const FileFilter: string): string;

Extract file filter name, from a file filter usually specified a TOpenDialog.Filter value.

More precisely: if we do not see bar "|" character, then this is the filter name. Otherwise, everything on the right of "|" is "extensions" and everything on the left is "filter name".

Additionally, if filter name ends with extensions value in parenthesis, they are removed. In other words, for 'Pascal files (*.pas)|*.pas', this will return just 'Pascal files'. The '(*.pas)' was removed from the filter name, because we detected this just repeats the extensions on the right of "|". Extensions on the right of "|" must be separated by semicolons, extensions within parenthesis on the left of "|" may be separated by semicolons ";" or colons ",".

function GetFileFilterExtsStr(const FileFilter: string): string;

Search in FileFilter for the bar character "|", and return everything after it. This is a simple basis for GetFileFilterExts.

If no "|" found, we return an empty string (in other words, file filter without "|" is treated as just a filter name, without any extensions).

function SReplacePatterns(const s: string; const patterns, values: array of string; Options: TSearchOptions): string;

Replace all strings in Patterns with corresponding strings in Values. This is similar to standard StringReplace, but this does many replaces at once.

Patterns and Values arrays must have equal length. Patterns[0] will be replaced with Values[0], Patterns[1] with Values[0] etc. Patterns are scanned from left to right, that is if two pattern occurrences overlap — we will detect the leftmost one. If both patterns start at the same place (this means that one pattern is a prefix of the other), we will choose the first pattern in Patterns table.

Using this avoids a common trap at repeated search-replace operations. A naive implementation of doing many search-replace over the same string is like

  Result := S;
  Result := StringReplace(Result, Patterns[0], Values[0], [rfReplaceAll]);
  Result := StringReplace(Result, Patterns[1], Values[1], [rfReplaceAll]);
  etc.

But the above fails badly when inserting some Values[] creates an occurrence of Pattern checked later. For example, when Values[0] contains inside whole Patterns[1]. More exotic situations involve when some Values[] glues with previous string contents to make a pattern detected later. This means that you could replace the same content many times, which is usually not what you want.

That's why you should instead use this function for such situations.

Options cannot contain soBackwards flag.

function SCharsCount(const s: string; c: char): Cardinal; overload;
 
function SCharsCount(const s: string; const Chars: TSetOfChars): Cardinal; overload;
 
function STruncateHash(const s: string): string;

Remove from the string S everything after the first hash "#" character. Removes also this very "#" character.

If string doesn't contain hash character, it's simply returned.

Useful for interpreting simple text files when you want to treat things after "#" like a comment.

function SUnformattable(const s: string): string;

Return the value to reproduce exactly string S by Format procedure. Saying simply, this doubles the "%" characters inside the string. The intention is to make such string that Format(SUnformattable(S), []) = S. In other words, "quote" any suspicious "%" characters in S for Format.

function SAnsiCompare(const s1, s2: string; IgnoreCase: boolean): Integer;

Compare strings, taking into account current locale. This simply does AnsiCompareStr or AnsiCompareText, depending on IgnoreCase.

Returns value < 0 when S1 < S2, returns 0 when S1 = S2 and value > 0 when S1 > S2.

function SAnsiSame(const s1, s2: string; IgnoreCase: boolean): boolean;

Check if strings are equal, taking into account current locale. Shortcut for SAnsiCompare(S1, S2) = 0

function SPercentReplace(const InitialFormat: string; const Replaces: array of TPercentReplace; out ReplacementsDone: Cardinal; ErrorOnUnknownPercentFormat: boolean = true; PercentChar: char ='%'; IgnoreCase: boolean = false): string; overload;

Searches for %x patterns and replaces them with specified strings. Something like a more generalized Format routine.

More precisely: every two-char sequence that starts with PercentChar and then is followed by one of Replaces[I].c characters is replaced with appropriate Replaces[i].s. Moreover, a pair of two PercentChar characters is replaced with one PercentChar character.

For example, assume that Replaces contains two items: (c: 'B'; s: '<bold>'), (c: 'b'; s: '</bold>'). Then SPercentReplace('100%% of cats are %Bcute%b', Replaces) will return string '100% of cats are <bold>cute</bold>'.

EUnknownPercentFormat is raised if we will see two-char sequence that starts with PercentChar and then is followed by character that is not any Replaces[i].c and is not PercentChar. Also, a single PercentChar at the end of the string is an error.

For example, assume that Replaces contains the same two items as previously. Following calls will result in EUnknownPercentFormat being raised: SPercentReplace('Unknown sequence %x', Replaces), SPercentReplace('Unterminated sequence %', Replaces).

If ErrorOnUnknownPercentFormat is False, then EUnknownPercentFormat will not be raised. Instead, incorrect sequence (like %x or unterminated % in examples above) will simply be left in the string.

Of course, replacing is done intelligently. Which means that e.g. sequence of four % characters will be correctly transformed into two % characters.

Note that IgnoreCase is used to match characters for Replaces[I].c. IgnoreCase is not used when it comes to comparing with PercentChar character, i.e. even when PercentChar will be set to some letter, it will always be compared in case-sensitive manner, regardless of IgnoreCase value.

It is undefined (meaning: don't do it) what happens if Replaces array contains more than once the same character C, or if any character C in Replaces array is equal to PercentChar.

ReplacementsDone, if passed, will return how many replacements were done. Not counting "meaningless" replacements of pair of PercentChar to one PercentChar (that is, we count only actual replacements from Replaces array).

Exceptions raised
EUnknownPercentFormat
In case of error in InitialFormat string, if ErrorOnUnknownPercentFormat is True.
function SPercentReplace(const InitialFormat: string; const Replaces: array of TPercentReplace; ErrorOnUnknownPercentFormat: boolean = true; PercentChar: char ='%'; IgnoreCase: boolean = false): string; overload;
 
function FormatIndexedName(const NamePattern: string; const Index: Integer; out ReplacementsDone: Cardinal): string; overload;

Replace %d in the NamePattern with Index.

This is something like a more specialized Format (sprintf for you, C folks), working similar to SPercentReplace.

  • %d is replaced with Index.

    You can insert a non-negative number between % and d, to pad the counter with zeros to desired length. For example, with Counter = 2, %d is replaced with just "2", %2d is replaced with "02", %4d is replaced with "0002".

  • %% is replaced with single percent char %.

  • Everything else is just copied to resulting string. Not recognized %-patterns are also just copied. Much like SPercentReplace with ErrorOnUnknownPercentFormat = false (since FormatIndexedName main use is to replace end-user supplied filenames on command-line, it tries to be tolerant to errors).

function FormatIndexedName(const NamePattern: string; const Index: Integer): string; overload;
 
function AnsiUpperCaseChar(C: char): char;
 
function AnsiLowerCaseChar(C: char): char;
 
function SAnsiUpperFirstChar(const S: string): string;

Returns S with S[1] character replaced with AnsiUpperCaseChar(S[1]) (unless S does not have 1st char, i.e. S = '')

function DigitAsChar(b: byte): char;

Convert digit (like number 0) to character (like '0'). Use only for arguments within 0..9 range.

function DigitAsByte(c: char): byte;

Convert digit character (like '0') to a number (like 0). Use only for characters in '0'...'9' range.

function IntToStrZPad(n: integer; minLength: integer): string;

Convert integer to string, padding string with zeros if needed.

function IntToStrBase(const n: Int64; Base: Byte): string; overload;

Convert integer to string, in base-Base (like base-16) numeral system. For digits above '9', we will use upper letters 'A', 'B'... etc. That's also why Base cannot be larger than 'Z'-'A' + 1 + 10 (we would not have enough digits then).

Overloaded versions with MinLength pad result with zeros to have at least MinLength.

function IntToStrBase( n: QWord; Base: Byte): string; overload;
 
function IntToStrBase(const n: Int64; Base: Byte; minLength: Cardinal): string; overload;
 
function IntToStrBase(const n: QWord; Base: Byte; minLength: Cardinal): string; overload;
 
function IntToStr2(n: Int64; const MinLength: Cardinal = 1; const ZeroDigit: char = '0'; const OneDigit: char = '1'; const MinusSign: char = '-'): string; overload;

Convert integer to binary (base-2 numeral system). MinLength means to left-pad result with zeros if necessary.

function IntToStr16(const n: Int64; const minLength: Cardinal = 1): string; overload;

Convert integer to hexadecimal (base-16 numeral system).

function IntToStr16(const n: QWord; const minLength: Cardinal = 1): string; overload;
 
function ToStr(const args: array of const): string;
 
function VarRecToStr(const v: TVarRec): string;
 
function PointerToStr(Ptr: Pointer): string;

Returns Ptr as 0xXXX... hexadecimal value. "0x" is not a Pascal standard for coding hex values, but it's so popular that users are more likely to "get" 0x notation.

function Str2ToInt(const s: string): integer;

Convert string representing binary number to an integer. String must contain only '0', '1' (digits) and start with an optional sign (+ or -).

Exceptions raised
EConvertError
On problems with conversion.
function StrHexToInt(const s: string): Int64;

Convert string with hexadecimal number to an integer. String must contain only digits (0-9, a-z, A-Z), and with an optional sign (+ or -).

Exceptions raised
EConvertError
On problems with conversion.
function StrToFloatDef(const s: string; DefValue: Extended): Extended;
 
function SetToStr(const SetVariable; NumStart, NumEnd: byte): string;

Convert a set to a string representation, in somewhat hacky way. This assumes that given SetVariable is a set value, and the set type is "set of [NumStart .. NumEnd]".

Implementation is heavily dependent on how the sets are internally stored. For now, we depend that a set of [NumStart .. NumEnd] behaves like a set of Byte, shifted to the left (i.e., NumStart corresponds to a 0 in set of Byte). This is not necessarily true ! For example in Delphi 5 (as far as I remember — I don't have this Delphi now, and I don't remember on which Delphi version I observed this) set of 1..16 uses first three bytes, and the first bit (that would correspond to 0) is simply wasted. In fact, SizeOf such set is still 4, which means that internally sets eat 4 bytes anyway. But SizeOf set 200..216 is also 4, which means that the compiler is smart and doesn't waste too much space to store only 17 bits.

This all is not a rant on internal set handling by Delphi. On the contrary, Delphi does it for speed reasons, and that's very good. This is just a warning that SetToStr is not really reliable, and you may need to experiment a little with NumStart / NumEnd values to get sensible results. Although if your set is like "set of [0 ... something]", this should usually work OK,

Still: this function should be used only for debug purposes. Don't depend on it working 100% correctly always — it can't, because we can't depend on how compiler stores sets.

function PCharOrNil(const s: string): PChar;

PCharOrNil simply returns a Pointer(S), you can think of it as a NO-OP. If string is empty, this returns Nil, otherwise it works just like PChar(S): returns a Pointer(S) with appropriate type cast.

function CharToNiceStr(c: char; BackSpaceTabEnterString: boolean = true): string;

Return a nice very short description of the character.

For normal readable characters just returns them, for special characters returns short string like "Ctrl+something" or "Escape".

The returned string doesn't contain any quotes around, doesn't contain any word merely stating "character" (for example argument 'c' just generates 'c', not 'character "c"').

BackSpaceTabEnterString determines behavior on three special values: #8, #9, #13. These may be either described as Backspace/Tab/Enter (if BackSpaceTabEnterString = true) or as Ctrl+H, Ctrl+I, Ctrl+M (if BackSpaceTabEnterString = false).

function SCompressWhiteSpace(const S: string): string;

Replace any number of consecutive whitespace (including newlines) with a single whitespace. This is nice when you have a string (possibly multiline) supplied by user, and you want to use this for some UI item (like window's caption or menu item) — this "sanitizes" whitespace inside such string.

Types

TSearchOptions = set of (soMatchCase, soWholeWord, soBackwards);
 
TSetOfChars = SysUtils.TSysCharSet;

A set of chars.

Constants

AllChars = [Low(Char) .. High(Char)];
 
DefaultWordBorders = AllChars - ['a'..'z', 'A'..'Z', '0'..'9', '_'];
 
WhiteSpaces = [' ', #9, #10, #13];
 
BoolToStr: array[boolean] of string=('FALSE','TRUE');

Convert boolean to string, using a simple table lookup.

I don't use BoolToStr function from SysUtils unit, since there are differences in FPC implementations:

  • In FPC <= 2.0.4, BoolToStr takes one param and returns 'FALSE' or 'TRUE' string.

  • In FPC > 2.0.4 (trunk (2.3.1 currently), and fixes_2_2 (2,1.3)), BoolToStr was changed for Delphi compat. Now when passed only 1 param it returns 0 or -1 (who the hell needs such BoolToStr interpretation ?).

    You have to pass 2nd param to BoolToStr as True to get strings 'False' and 'True'. But this makes it non-compileable in FPC <= 2.0.4. So to call BoolToStr like I want to, I would have to use ugly $ifdefs...

So I decided to use my BoolToStr table throughout my units. When I'll switch fully to FPC > 2.0.4, I'll drop this and use BoolToStr function from SysUtils unit.

BoolToStrYesNo: array[boolean]of string = ('No','Yes');
 
CtrlA = Chr(Ord('a') - Ord('a') + 1);
 
CtrlB = Chr(Ord('b') - Ord('a') + 1);
 
CtrlC = Chr(Ord('c') - Ord('a') + 1);
 
CtrlD = Chr(Ord('d') - Ord('a') + 1);
 
CtrlE = Chr(Ord('e') - Ord('a') + 1);
 
CtrlF = Chr(Ord('f') - Ord('a') + 1);
 
CtrlG = Chr(Ord('g') - Ord('a') + 1);
 
CtrlH = Chr(Ord('h') - Ord('a') + 1);
 
CtrlI = Chr(Ord('i') - Ord('a') + 1);
 
CtrlJ = Chr(Ord('j') - Ord('a') + 1);
 
CtrlK = Chr(Ord('k') - Ord('a') + 1);
 
CtrlL = Chr(Ord('l') - Ord('a') + 1);
 
CtrlM = Chr(Ord('m') - Ord('a') + 1);
 
CtrlN = Chr(Ord('n') - Ord('a') + 1);
 
CtrlO = Chr(Ord('o') - Ord('a') + 1);
 
CtrlP = Chr(Ord('p') - Ord('a') + 1);
 
CtrlQ = Chr(Ord('q') - Ord('a') + 1);
 
CtrlR = Chr(Ord('r') - Ord('a') + 1);
 
CtrlS = Chr(Ord('s') - Ord('a') + 1);
 
CtrlT = Chr(Ord('t') - Ord('a') + 1);
 
CtrlU = Chr(Ord('u') - Ord('a') + 1);
 
CtrlV = Chr(Ord('v') - Ord('a') + 1);
 
CtrlW = Chr(Ord('w') - Ord('a') + 1);
 
CtrlX = Chr(Ord('x') - Ord('a') + 1);
 
CtrlY = Chr(Ord('y') - Ord('a') + 1);
 
CtrlZ = Chr(Ord('z') - Ord('a') + 1);
 
CharBackSpace = #8;
 
CharTab = #9;
 
CharEnter = #13;
 
CharEscape = #27;
 
CharDelete = #127;
 

Generated by PasDoc 0.12.1 on 2013-02-04 20:26:52