+++ /dev/null
-unit RegExpr;\r
-\r
-{\r
- TRegExpr class library\r
- Delphi Regular Expressions\r
-\r
- Copyright (c) 1999-2004 Andrey V. Sorokin, St.Petersburg, Russia\r
-\r
- You may use this software in any kind of development,\r
- including comercial, redistribute, and modify it freely,\r
- under the following restrictions :\r
- 1. This software is provided as it is, without any kind of\r
- warranty given. Use it at Your own risk.The author is not\r
- responsible for any consequences of use of this software.\r
- 2. The origin of this software may not be mispresented, You\r
- must not claim that You wrote the original software. If\r
- You use this software in any kind of product, it would be\r
- appreciated that there in a information box, or in the\r
- documentation would be an acknowledgement like\r
-\r
- Partial Copyright (c) 2004 Andrey V. Sorokin\r
- http://RegExpStudio.com\r
- mailto:anso@mail.ru\r
-\r
- 3. You may not have any income from distributing this source\r
- (or altered version of it) to other developers. When You\r
- use this product in a comercial package, the source may\r
- not be charged seperatly.\r
- 4. Altered versions must be plainly marked as such, and must\r
- not be misrepresented as being the original software.\r
- 5. RegExp Studio application and all the visual components as \r
- well as documentation is not part of the TRegExpr library \r
- and is not free for usage.\r
-\r
- mailto:anso@mail.ru\r
- http://RegExpStudio.com\r
- http://anso.da.ru/\r
-}\r
-\r
-interface\r
-\r
-// ======== Determine compiler\r
-{$IFDEF VER80} Sorry, TRegExpr is for 32-bits Delphi only. Delphi 1 is not supported (and whos really care today?!). {$ENDIF}\r
-{$IFDEF VER90} {$DEFINE D2} {$ENDIF} // D2\r
-{$IFDEF VER93} {$DEFINE D2} {$ENDIF} // CPPB 1\r
-{$IFDEF VER100} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D3\r
-{$IFDEF VER110} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // CPPB 3\r
-{$IFDEF VER120} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D4\r
-{$IFDEF VER130} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D5\r
-{$IFDEF VER140} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D6\r
-{$IFDEF VER150} {$DEFINE D7} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D7\r
-\r
-// ======== Define base compiler options\r
-{$BOOLEVAL OFF}\r
-{$EXTENDEDSYNTAX ON}\r
-{$LONGSTRINGS ON}\r
-{$OPTIMIZATION ON}\r
-{$IFDEF D6}\r
- {$WARN SYMBOL_PLATFORM OFF} // Suppress .Net warnings\r
-{$ENDIF}\r
-{$IFDEF D7}\r
- {$WARN UNSAFE_CAST OFF} // Suppress .Net warnings\r
- {$WARN UNSAFE_TYPE OFF} // Suppress .Net warnings\r
- {$WARN UNSAFE_CODE OFF} // Suppress .Net warnings\r
-{$ENDIF}\r
-{$IFDEF FPC}\r
- {$MODE DELPHI} // Delphi-compatible mode in FreePascal\r
-{$ENDIF}\r
-\r
-// ======== Define options for TRegExpr engine\r
-{.$DEFINE UniCode} // Unicode support\r
-{$DEFINE RegExpPCodeDump} // p-code dumping (see Dump method)\r
-{$IFNDEF FPC} // the option is not supported in FreePascal\r
- {$DEFINE reRealExceptionAddr} // exceptions will point to appropriate source line, not to Error procedure\r
-{$ENDIF}\r
-{$DEFINE ComplexBraces} // support braces in complex cases\r
-{$IFNDEF UniCode} // the option applicable only for non-UniCode mode\r
- {$DEFINE UseSetOfChar} // Significant optimization by using set of char\r
-{$ENDIF}\r
-{$IFDEF UseSetOfChar}\r
- {$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars\r
-{$ENDIF}\r
-\r
-// ======== Define Pascal-language options\r
-// Define 'UseAsserts' option (do not edit this definitions).\r
-// Asserts used to catch 'strange bugs' in TRegExpr implementation (when something goes\r
-// completely wrong). You can swith asserts on/off with help of {$C+}/{$C-} compiler options.\r
-{$IFDEF D3} {$DEFINE UseAsserts} {$ENDIF}\r
-{$IFDEF FPC} {$DEFINE UseAsserts} {$ENDIF}\r
-\r
-// Define 'use subroutine parameters default values' option (do not edit this definition).\r
-{$IFDEF D4} {$DEFINE DefParam} {$ENDIF}\r
-\r
-// Define 'OverMeth' options, to use method overloading (do not edit this definitions).\r
-{$IFDEF D5} {$DEFINE OverMeth} {$ENDIF}\r
-{$IFDEF FPC} {$DEFINE OverMeth} {$ENDIF}\r
-\r
-uses\r
- Classes, // TStrings in Split method\r
- SysUtils; // Exception\r
-\r
-type\r
- {$IFDEF UniCode}\r
- PRegExprChar = PWideChar;\r
- RegExprString = WideString;\r
- REChar = WideChar;\r
- {$ELSE}\r
- PRegExprChar = PChar;\r
- RegExprString = AnsiString; //###0.952 was string\r
- REChar = Char;\r
- {$ENDIF}\r
- TREOp = REChar; // internal p-code type //###0.933\r
- PREOp = ^TREOp;\r
- TRENextOff = integer; // internal Next "pointer" (offset to current p-code) //###0.933\r
- PRENextOff = ^TRENextOff; // used for extracting Next "pointers" from compiled r.e. //###0.933\r
- TREBracesArg = integer; // type of {m,n} arguments\r
- PREBracesArg = ^TREBracesArg;\r
-\r
-const\r
- REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units\r
- RENextOffSz = SizeOf (TRENextOff) div SizeOf (REChar); // size of Next 'pointer' -"-\r
- REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-\r
-\r
-type\r
- TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar\r
- of object;\r
-\r
-const\r
- EscChar = '\'; // 'Escape'-char ('\' in common r.e.) used for escaping metachars (\w, \d etc).\r
- RegExprModifierI : boolean = False; // default value for ModifierI\r
- RegExprModifierR : boolean = True; // default value for ModifierR\r
- RegExprModifierS : boolean = True; // default value for ModifierS\r
- RegExprModifierG : boolean = True; // default value for ModifierG\r
- RegExprModifierM : boolean = False; // default value for ModifierM\r
- RegExprModifierX : boolean = False; // default value for ModifierX\r
- RegExprSpaceChars : RegExprString = // default value for SpaceChars\r
- ' '#$9#$A#$D#$C;\r
- RegExprWordChars : RegExprString = // default value for WordChars\r
- '0123456789' //###0.940\r
- + 'abcdefghijklmnopqrstuvwxyz'\r
- + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_';\r
- RegExprLineSeparators : RegExprString =// default value for LineSeparators\r
- #$d#$a{$IFDEF UniCode}+#$b#$c#$2028#$2029#$85{$ENDIF}; //###0.947\r
- RegExprLinePairedSeparator : RegExprString =// default value for LinePairedSeparator\r
- #$d#$a;\r
- { if You need Unix-styled line separators (only \n), then use:\r
- RegExprLineSeparators = #$a;\r
- RegExprLinePairedSeparator = '';\r
- }\r
-\r
-\r
-const\r
- NSUBEXP = 15; // max number of subexpression //###0.929\r
- // Cannot be more than NSUBEXPMAX\r
- // Be carefull - don't use values which overflow CLOSE opcode\r
- // (in this case you'll get compiler erorr).\r
- // Big NSUBEXP will cause more slow work and more stack required\r
- NSUBEXPMAX = 255; // Max possible value for NSUBEXP. //###0.945\r
- // Don't change it! It's defined by internal TRegExpr design.\r
-\r
- MaxBracesArg = $7FFFFFFF - 1; // max value for {n,m} arguments //###0.933\r
-\r
- {$IFDEF ComplexBraces}\r
- LoopStackMax = 10; // max depth of loops stack //###0.925\r
- {$ENDIF}\r
-\r
- TinySetLen = 3;\r
- // if range includes more then TinySetLen chars, //###0.934\r
- // then use full (32 bytes) ANYOFFULL instead of ANYOF[BUT]TINYSET\r
- // !!! Attension ! If you change TinySetLen, you must\r
- // change code marked as "//!!!TinySet"\r
-\r
-\r
-type\r
-\r
-{$IFDEF UseSetOfChar}\r
- PSetOfREChar = ^TSetOfREChar;\r
- TSetOfREChar = set of REChar;\r
-{$ENDIF}\r
-\r
- TRegExpr = class;\r
-\r
- TRegExprReplaceFunction = function (ARegExpr : TRegExpr): string\r
- of object;\r
-\r
- TRegExpr = class\r
- private\r
- startp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr starting points\r
- endp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr end points\r
-\r
- {$IFDEF ComplexBraces}\r
- LoopStack : array [1 .. LoopStackMax] of integer; // state before entering loop\r
- LoopStackIdx : integer; // 0 - out of all loops\r
- {$ENDIF}\r
-\r
- // The "internal use only" fields to pass info from compile\r
- // to execute that permits the execute phase to run lots faster on\r
- // simple cases.\r
- regstart : REChar; // char that must begin a match; '\0' if none obvious\r
- reganch : REChar; // is the match anchored (at beginning-of-line only)?\r
- regmust : PRegExprChar; // string (pointer into program) that match must include, or nil\r
- regmlen : integer; // length of regmust string\r
- // Regstart and reganch permit very fast decisions on suitable starting points\r
- // for a match, cutting down the work a lot. Regmust permits fast rejection\r
- // of lines that cannot possibly match. The regmust tests are costly enough\r
- // that regcomp() supplies a regmust only if the r.e. contains something\r
- // potentially expensive (at present, the only such thing detected is * or +\r
- // at the start of the r.e., which can involve a lot of backup). Regmlen is\r
- // supplied because the test in regexec() needs it and regcomp() is computing\r
- // it anyway.\r
- {$IFDEF UseFirstCharSet} //###0.929\r
- FirstCharSet : TSetOfREChar;\r
- {$ENDIF}\r
-\r
- // work variables for Exec's routins - save stack in recursion}\r
- reginput : PRegExprChar; // String-input pointer.\r
- fInputStart : PRegExprChar; // Pointer to first char of input string.\r
- fInputEnd : PRegExprChar; // Pointer to char AFTER last char of input string\r
-\r
- // work variables for compiler's routines\r
- regparse : PRegExprChar; // Input-scan pointer.\r
- regnpar : integer; // count.\r
- regdummy : char;\r
- regcode : PRegExprChar; // Code-emit pointer; @regdummy = don't.\r
- regsize : integer; // Code size.\r
-\r
- regexpbeg : PRegExprChar; // only for error handling. Contains\r
- // pointer to beginning of r.e. while compiling\r
- fExprIsCompiled : boolean; // true if r.e. successfully compiled\r
-\r
- // programm is essentially a linear encoding\r
- // of a nondeterministic finite-state machine (aka syntax charts or\r
- // "railroad normal form" in parsing technology). Each node is an opcode\r
- // plus a "next" pointer, possibly plus an operand. "Next" pointers of\r
- // all nodes except BRANCH implement concatenation; a "next" pointer with\r
- // a BRANCH on both ends of it is connecting two alternatives. (Here we\r
- // have one of the subtle syntax dependencies: an individual BRANCH (as\r
- // opposed to a collection of them) is never concatenated with anything\r
- // because of operator precedence.) The operand of some types of node is\r
- // a literal string; for others, it is a node leading into a sub-FSM. In\r
- // particular, the operand of a BRANCH node is the first node of the branch.\r
- // (NB this is *not* a tree structure: the tail of the branch connects\r
- // to the thing following the set of BRANCHes.) The opcodes are:\r
- programm : PRegExprChar; // Unwarranted chumminess with compiler.\r
-\r
- fExpression : PRegExprChar; // source of compiled r.e.\r
- fInputString : PRegExprChar; // input string\r
-\r
- fLastError : integer; // see Error, LastError\r
-\r
- fModifiers : integer; // modifiers\r
- fCompModifiers : integer; // compiler's copy of modifiers\r
- fProgModifiers : integer; // modifiers values from last programm compilation\r
-\r
- fSpaceChars : RegExprString; //###0.927\r
- fWordChars : RegExprString; //###0.929\r
- fInvertCase : TRegExprInvertCaseFunction; //###0.927\r
-\r
- fLineSeparators : RegExprString; //###0.941\r
- fLinePairedSeparatorAssigned : boolean;\r
- fLinePairedSeparatorHead,\r
- fLinePairedSeparatorTail : REChar;\r
- {$IFNDEF UniCode}\r
- fLineSeparatorsSet : set of REChar;\r
- {$ENDIF}\r
-\r
- procedure InvalidateProgramm;\r
- // Mark programm as have to be [re]compiled\r
-\r
- function IsProgrammOk : boolean; //###0.941\r
- // Check if we can use precompiled r.e. or\r
- // [re]compile it if something changed\r
-\r
- function GetExpression : RegExprString;\r
- procedure SetExpression (const s : RegExprString);\r
-\r
- function GetModifierStr : RegExprString;\r
- class function ParseModifiersStr (const AModifiers : RegExprString;\r
- var AModifiersInt : integer) : boolean; //###0.941 class function now\r
- // Parse AModifiers string and return true and set AModifiersInt\r
- // if it's in format 'ismxrg-ismxrg'.\r
- procedure SetModifierStr (const AModifiers : RegExprString);\r
-\r
- function GetModifier (AIndex : integer) : boolean;\r
- procedure SetModifier (AIndex : integer; ASet : boolean);\r
-\r
- procedure Error (AErrorID : integer); virtual; // error handler.\r
- // Default handler raise exception ERegExpr with\r
- // Message = ErrorMsg (AErrorID), ErrorCode = AErrorID\r
- // and CompilerErrorPos = value of property CompilerErrorPos.\r
-\r
-\r
- {==================== Compiler section ===================}\r
- function CompileRegExpr (exp : PRegExprChar) : boolean;\r
- // compile a regular expression into internal code\r
-\r
- procedure Tail (p : PRegExprChar; val : PRegExprChar);\r
- // set the next-pointer at the end of a node chain\r
-\r
- procedure OpTail (p : PRegExprChar; val : PRegExprChar);\r
- // regoptail - regtail on operand of first argument; nop if operandless\r
-\r
- function EmitNode (op : TREOp) : PRegExprChar;\r
- // regnode - emit a node, return location\r
-\r
- procedure EmitC (b : REChar);\r
- // emit (if appropriate) a byte of code\r
-\r
- procedure InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer); //###0.90\r
- // insert an operator in front of already-emitted operand\r
- // Means relocating the operand.\r
-\r
- function ParseReg (paren : integer; var flagp : integer) : PRegExprChar;\r
- // regular expression, i.e. main body or parenthesized thing\r
-\r
- function ParseBranch (var flagp : integer) : PRegExprChar;\r
- // one alternative of an | operator\r
-\r
- function ParsePiece (var flagp : integer) : PRegExprChar;\r
- // something followed by possible [*+?]\r
-\r
- function ParseAtom (var flagp : integer) : PRegExprChar;\r
- // the lowest level\r
-\r
- function GetCompilerErrorPos : integer;\r
- // current pos in r.e. - for error hanling\r
-\r
- {$IFDEF UseFirstCharSet} //###0.929\r
- procedure FillFirstCharSet (prog : PRegExprChar);\r
- {$ENDIF}\r
-\r
- {===================== Mathing section ===================}\r
- function regrepeat (p : PRegExprChar; AMax : integer) : integer;\r
- // repeatedly match something simple, report how many\r
-\r
- function regnext (p : PRegExprChar) : PRegExprChar;\r
- // dig the "next" pointer out of a node\r
-\r
- function MatchPrim (prog : PRegExprChar) : boolean;\r
- // recursively matching routine\r
-\r
- function ExecPrim (AOffset: integer) : boolean;\r
- // Exec for stored InputString\r
-\r
- {$IFDEF RegExpPCodeDump}\r
- function DumpOp (op : REChar) : RegExprString;\r
- {$ENDIF}\r
-\r
- function GetSubExprMatchCount : integer;\r
- function GetMatchPos (Idx : integer) : integer;\r
- function GetMatchLen (Idx : integer) : integer;\r
- function GetMatch (Idx : integer) : RegExprString;\r
-\r
- function GetInputString : RegExprString;\r
- procedure SetInputString (const AInputString : RegExprString);\r
-\r
- {$IFNDEF UseSetOfChar}\r
- function StrScanCI (s : PRegExprChar; ch : REChar) : PRegExprChar; //###0.928\r
- {$ENDIF}\r
-\r
- procedure SetLineSeparators (const AStr : RegExprString);\r
- procedure SetLinePairedSeparator (const AStr : RegExprString);\r
- function GetLinePairedSeparator : RegExprString;\r
-\r
- public\r
- constructor Create;\r
- destructor Destroy; override;\r
-\r
- class function VersionMajor : integer; //###0.944\r
- class function VersionMinor : integer; //###0.944\r
-\r
- property Expression : RegExprString read GetExpression write SetExpression;\r
- // Regular expression.\r
- // For optimization, TRegExpr will automatically compiles it into 'P-code'\r
- // (You can see it with help of Dump method) and stores in internal\r
- // structures. Real [re]compilation occures only when it really needed -\r
- // while calling Exec[Next], Substitute, Dump, etc\r
- // and only if Expression or other P-code affected properties was changed\r
- // after last [re]compilation.\r
- // If any errors while [re]compilation occures, Error method is called\r
- // (by default Error raises exception - see below)\r
-\r
- property ModifierStr : RegExprString read GetModifierStr write SetModifierStr;\r
- // Set/get default values of r.e.syntax modifiers. Modifiers in\r
- // r.e. (?ismx-ismx) will replace this default values.\r
- // If you try to set unsupported modifier, Error will be called\r
- // (by defaul Error raises exception ERegExpr).\r
-\r
- property ModifierI : boolean index 1 read GetModifier write SetModifier;\r
- // Modifier /i - caseinsensitive, initialized from RegExprModifierI\r
-\r
- property ModifierR : boolean index 2 read GetModifier write SetModifier;\r
- // Modifier /r - use r.e.syntax extended for russian,\r
- // (was property ExtSyntaxEnabled in previous versions)\r
- // If true, then à-ÿ additional include russian letter '¸',\r
- // À-ß additional include '¨', and à-ß include all russian symbols.\r
- // You have to turn it off if it may interfere with you national alphabet.\r
- // , initialized from RegExprModifierR\r
-\r
- property ModifierS : boolean index 3 read GetModifier write SetModifier;\r
- // Modifier /s - '.' works as any char (else as [^\n]),\r
- // , initialized from RegExprModifierS\r
-\r
- property ModifierG : boolean index 4 read GetModifier write SetModifier;\r
- // Switching off modifier /g switchs all operators in\r
- // non-greedy style, so if ModifierG = False, then\r
- // all '*' works as '*?', all '+' as '+?' and so on.\r
- // , initialized from RegExprModifierG\r
-\r
- property ModifierM : boolean index 5 read GetModifier write SetModifier;\r
- // Treat string as multiple lines. That is, change `^' and `$' from\r
- // matching at only the very start or end of the string to the start\r
- // or end of any line anywhere within the string.\r
- // , initialized from RegExprModifierM\r
-\r
- property ModifierX : boolean index 6 read GetModifier write SetModifier;\r
- // Modifier /x - eXtended syntax, allow r.e. text formatting,\r
- // see description in the help. Initialized from RegExprModifierX\r
-\r
- function Exec (const AInputString : RegExprString) : boolean; {$IFDEF OverMeth} overload;\r
- {$IFNDEF FPC} // I do not know why FreePascal cannot overload methods with empty param list\r
- function Exec : boolean; overload; //###0.949\r
- {$ENDIF}\r
- function Exec (AOffset: integer) : boolean; overload; //###0.949\r
- {$ENDIF}\r
- // match a programm against a string AInputString\r
- // !!! Exec store AInputString into InputString property\r
- // For Delphi 5 and higher available overloaded versions - first without\r
- // parameter (uses already assigned to InputString property value)\r
- // and second that has integer parameter and is same as ExecPos\r
-\r
- function ExecNext : boolean;\r
- // find next match:\r
- // ExecNext;\r
- // works same as\r
- // if MatchLen [0] = 0 then ExecPos (MatchPos [0] + 1)\r
- // else ExecPos (MatchPos [0] + MatchLen [0]);\r
- // but it's more simpler !\r
- // Raises exception if used without preceeding SUCCESSFUL call to\r
- // Exec* (Exec, ExecPos, ExecNext). So You always must use something like\r
- // if Exec (InputString) then repeat { proceed results} until not ExecNext;\r
-\r
- function ExecPos (AOffset: integer {$IFDEF DefParam}= 1{$ENDIF}) : boolean;\r
- // find match for InputString starting from AOffset position\r
- // (AOffset=1 - first char of InputString)\r
-\r
- property InputString : RegExprString read GetInputString write SetInputString;\r
- // returns current input string (from last Exec call or last assign\r
- // to this property).\r
- // Any assignment to this property clear Match* properties !\r
-\r
- function Substitute (const ATemplate : RegExprString) : RegExprString;\r
- // Returns ATemplate with '$&' or '$0' replaced by whole r.e.\r
- // occurence and '$n' replaced by occurence of subexpression #n.\r
- // Since v.0.929 '$' used instead of '\' (for future extensions\r
- // and for more Perl-compatibility) and accept more then one digit.\r
- // If you want place into template raw '$' or '\', use prefix '\'\r
- // Example: '1\$ is $2\\rub\\' -> '1$ is <Match[2]>\rub\'\r
- // If you want to place raw digit after '$n' you must delimit\r
- // n with curly braces '{}'.\r
- // Example: 'a$12bc' -> 'a<Match[12]>bc'\r
- // 'a${1}2bc' -> 'a<Match[1]>2bc'.\r
-\r
- procedure Split (AInputStr : RegExprString; APieces : TStrings);\r
- // Split AInputStr into APieces by r.e. occurencies\r
- // Internally calls Exec[Next]\r
-\r
- function Replace (AInputStr : RegExprString;\r
- const AReplaceStr : RegExprString;\r
- AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) //###0.946\r
- : RegExprString; {$IFDEF OverMeth} overload;\r
- function Replace (AInputStr : RegExprString;\r
- AReplaceFunc : TRegExprReplaceFunction)\r
- : RegExprString; overload;\r
- {$ENDIF}\r
- function ReplaceEx (AInputStr : RegExprString;\r
- AReplaceFunc : TRegExprReplaceFunction)\r
- : RegExprString;\r
- // Returns AInputStr with r.e. occurencies replaced by AReplaceStr\r
- // If AUseSubstitution is true, then AReplaceStr will be used\r
- // as template for Substitution methods.\r
- // For example:\r
- // Expression := '({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*';\r
- // Replace ('BLOCK( test1)', 'def "$1" value "$2"', True);\r
- // will return: def 'BLOCK' value 'test1'\r
- // Replace ('BLOCK( test1)', 'def "$1" value "$2"')\r
- // will return: def "$1" value "$2"\r
- // Internally calls Exec[Next]\r
- // Overloaded version and ReplaceEx operate with call-back function,\r
- // so You can implement really complex functionality.\r
-\r
- property SubExprMatchCount : integer read GetSubExprMatchCount;\r
- // Number of subexpressions has been found in last Exec* call.\r
- // If there are no subexpr. but whole expr was found (Exec* returned True),\r
- // then SubExprMatchCount=0, if no subexpressions nor whole\r
- // r.e. found (Exec* returned false) then SubExprMatchCount=-1.\r
- // Note, that some subexpr. may be not found and for such\r
- // subexpr. MathPos=MatchLen=-1 and Match=''.\r
- // For example: Expression := '(1)?2(3)?';\r
- // Exec ('123'): SubExprMatchCount=2, Match[0]='123', [1]='1', [2]='3'\r
- // Exec ('12'): SubExprMatchCount=1, Match[0]='12', [1]='1'\r
- // Exec ('23'): SubExprMatchCount=2, Match[0]='23', [1]='', [2]='3'\r
- // Exec ('2'): SubExprMatchCount=0, Match[0]='2'\r
- // Exec ('7') - return False: SubExprMatchCount=-1\r
-\r
- property MatchPos [Idx : integer] : integer read GetMatchPos;\r
- // pos of entrance subexpr. #Idx into tested in last Exec*\r
- // string. First subexpr. have Idx=1, last - MatchCount,\r
- // whole r.e. have Idx=0.\r
- // Returns -1 if in r.e. no such subexpr. or this subexpr.\r
- // not found in input string.\r
-\r
- property MatchLen [Idx : integer] : integer read GetMatchLen;\r
- // len of entrance subexpr. #Idx r.e. into tested in last Exec*\r
- // string. First subexpr. have Idx=1, last - MatchCount,\r
- // whole r.e. have Idx=0.\r
- // Returns -1 if in r.e. no such subexpr. or this subexpr.\r
- // not found in input string.\r
- // Remember - MatchLen may be 0 (if r.e. match empty string) !\r
-\r
- property Match [Idx : integer] : RegExprString read GetMatch;\r
- // == copy (InputString, MatchPos [Idx], MatchLen [Idx])\r
- // Returns '' if in r.e. no such subexpr. or this subexpr.\r
- // not found in input string.\r
-\r
- function LastError : integer;\r
- // Returns ID of last error, 0 if no errors (unusable if\r
- // Error method raises exception) and clear internal status\r
- // into 0 (no errors).\r
-\r
- function ErrorMsg (AErrorID : integer) : RegExprString; virtual;\r
- // Returns Error message for error with ID = AErrorID.\r
-\r
- property CompilerErrorPos : integer read GetCompilerErrorPos;\r
- // Returns pos in r.e. there compiler stopped.\r
- // Usefull for error diagnostics\r
-\r
- property SpaceChars : RegExprString read fSpaceChars write fSpaceChars; //###0.927\r
- // Contains chars, treated as /s (initially filled with RegExprSpaceChars\r
- // global constant)\r
-\r
- property WordChars : RegExprString read fWordChars write fWordChars; //###0.929\r
- // Contains chars, treated as /w (initially filled with RegExprWordChars\r
- // global constant)\r
-\r
- property LineSeparators : RegExprString read fLineSeparators write SetLineSeparators; //###0.941\r
- // line separators (like \n in Unix)\r
-\r
- property LinePairedSeparator : RegExprString read GetLinePairedSeparator write SetLinePairedSeparator; //###0.941\r
- // paired line separator (like \r\n in DOS and Windows).\r
- // must contain exactly two chars or no chars at all\r
-\r
- class function InvertCaseFunction (const Ch : REChar) : REChar;\r
- // Converts Ch into upper case if it in lower case or in lower\r
- // if it in upper (uses current system local setings)\r
-\r
- property InvertCase : TRegExprInvertCaseFunction read fInvertCase write fInvertCase; //##0.935\r
- // Set this property if you want to override case-insensitive functionality.\r
- // Create set it to RegExprInvertCaseFunction (InvertCaseFunction by default)\r
-\r
- procedure Compile; //###0.941\r
- // [Re]compile r.e. Usefull for example for GUI r.e. editors (to check\r
- // all properties validity).\r
-\r
- {$IFDEF RegExpPCodeDump}\r
- function Dump : RegExprString;\r
- // dump a compiled regexp in vaguely comprehensible form\r
- {$ENDIF}\r
- end;\r
-\r
- ERegExpr = class (Exception)\r
- public\r
- ErrorCode : integer;\r
- CompilerErrorPos : integer;\r
- end;\r
-\r
-const\r
- RegExprInvertCaseFunction : TRegExprInvertCaseFunction = {$IFDEF FPC} nil {$ELSE} TRegExpr.InvertCaseFunction{$ENDIF};\r
- // defaul for InvertCase property\r
-\r
-function ExecRegExpr (const ARegExpr, AInputStr : RegExprString) : boolean;\r
-// true if string AInputString match regular expression ARegExpr\r
-// ! will raise exeption if syntax errors in ARegExpr\r
-\r
-procedure SplitRegExpr (const ARegExpr, AInputStr : RegExprString; APieces : TStrings);\r
-// Split AInputStr into APieces by r.e. ARegExpr occurencies\r
-\r
-function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString;\r
- AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString; //###0.947\r
-// Returns AInputStr with r.e. occurencies replaced by AReplaceStr\r
-// If AUseSubstitution is true, then AReplaceStr will be used\r
-// as template for Substitution methods.\r
-// For example:\r
-// ReplaceRegExpr ('({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*',\r
-// 'BLOCK( test1)', 'def "$1" value "$2"', True)\r
-// will return: def 'BLOCK' value 'test1'\r
-// ReplaceRegExpr ('({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*',\r
-// 'BLOCK( test1)', 'def "$1" value "$2"')\r
-// will return: def "$1" value "$2"\r
-\r
-function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;\r
-// Replace all metachars with its safe representation,\r
-// for example 'abc$cd.(' converts into 'abc\$cd\.\('\r
-// This function usefull for r.e. autogeneration from\r
-// user input\r
-\r
-function RegExprSubExpressions (const ARegExpr : string;\r
- ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : integer;\r
-// Makes list of subexpressions found in ARegExpr r.e.\r
-// In ASubExps every item represent subexpression,\r
-// from first to last, in format:\r
-// String - subexpression text (without '()')\r
-// low word of Object - starting position in ARegExpr, including '('\r
-// if exists! (first position is 1)\r
-// high word of Object - length, including starting '(' and ending ')'\r
-// if exist!\r
-// AExtendedSyntax - must be True if modifier /m will be On while\r
-// using the r.e.\r
-// Usefull for GUI editors of r.e. etc (You can find example of using\r
-// in TestRExp.dpr project)\r
-// Returns\r
-// 0 Success. No unbalanced brackets was found;\r
-// -1 There are not enough closing brackets ')';\r
-// -(n+1) At position n was found opening '[' without //###0.942\r
-// corresponding closing ']';\r
-// n At position n was found closing bracket ')' without\r
-// corresponding opening '('.\r
-// If Result <> 0, then ASubExpr can contain empty items or illegal ones\r
-\r
-\r
-implementation\r
-\r
-uses\r
- Windows; // CharUpper/Lower\r
-\r
-const\r
- TRegExprVersionMajor : integer = 0;\r
- TRegExprVersionMinor : integer = 952;\r
- // TRegExpr.VersionMajor/Minor return values of this constants\r
-\r
- MaskModI = 1; // modifier /i bit in fModifiers\r
- MaskModR = 2; // -"- /r\r
- MaskModS = 4; // -"- /s\r
- MaskModG = 8; // -"- /g\r
- MaskModM = 16; // -"- /m\r
- MaskModX = 32; // -"- /x\r
-\r
- {$IFDEF UniCode}\r
- XIgnoredChars = ' '#9#$d#$a;\r
- {$ELSE}\r
- XIgnoredChars = [' ', #9, #$d, #$a];\r
- {$ENDIF}\r
-\r
-{=============================================================}\r
-{=================== WideString functions ====================}\r
-{=============================================================}\r
-\r
-{$IFDEF UniCode}\r
-\r
-function StrPCopy (Dest: PRegExprChar; const Source: RegExprString): PRegExprChar;\r
- var\r
- i, Len : Integer;\r
- begin\r
- Len := length (Source); //###0.932\r
- for i := 1 to Len do\r
- Dest [i - 1] := Source [i];\r
- Dest [Len] := #0;\r
- Result := Dest;\r
- end; { of function StrPCopy\r
---------------------------------------------------------------}\r
-\r
-function StrLCopy (Dest, Source: PRegExprChar; MaxLen: Cardinal): PRegExprChar;\r
- var i: Integer;\r
- begin\r
- for i := 0 to MaxLen - 1 do\r
- Dest [i] := Source [i];\r
- Result := Dest;\r
- end; { of function StrLCopy\r
---------------------------------------------------------------}\r
-\r
-function StrLen (Str: PRegExprChar): Cardinal;\r
- begin\r
- Result:=0;\r
- while Str [result] <> #0\r
- do Inc (Result);\r
- end; { of function StrLen\r
---------------------------------------------------------------}\r
-\r
-function StrPos (Str1, Str2: PRegExprChar): PRegExprChar;\r
- var n: Integer;\r
- begin\r
- Result := nil;\r
- n := Pos (RegExprString (Str2), RegExprString (Str1));\r
- if n = 0\r
- then EXIT;\r
- Result := Str1 + n - 1;\r
- end; { of function StrPos\r
---------------------------------------------------------------}\r
-\r
-function StrLComp (Str1, Str2: PRegExprChar; MaxLen: Cardinal): Integer;\r
- var S1, S2: RegExprString;\r
- begin\r
- S1 := Str1;\r
- S2 := Str2;\r
- if Copy (S1, 1, MaxLen) > Copy (S2, 1, MaxLen)\r
- then Result := 1\r
- else\r
- if Copy (S1, 1, MaxLen) < Copy (S2, 1, MaxLen)\r
- then Result := -1\r
- else Result := 0;\r
- end; { function StrLComp\r
---------------------------------------------------------------}\r
-\r
-function StrScan (Str: PRegExprChar; Chr: WideChar): PRegExprChar;\r
- begin\r
- Result := nil;\r
- while (Str^ <> #0) and (Str^ <> Chr)\r
- do Inc (Str);\r
- if (Str^ <> #0)\r
- then Result := Str;\r
- end; { of function StrScan\r
---------------------------------------------------------------}\r
-\r
-{$ENDIF}\r
-\r
-\r
-{=============================================================}\r
-{===================== Global functions ======================}\r
-{=============================================================}\r
-\r
-function ExecRegExpr (const ARegExpr, AInputStr : RegExprString) : boolean;\r
- var r : TRegExpr;\r
- begin\r
- r := TRegExpr.Create;\r
- try\r
- r.Expression := ARegExpr;\r
- Result := r.Exec (AInputStr);\r
- finally r.Free;\r
- end;\r
- end; { of function ExecRegExpr\r
---------------------------------------------------------------}\r
-\r
-procedure SplitRegExpr (const ARegExpr, AInputStr : RegExprString; APieces : TStrings);\r
- var r : TRegExpr;\r
- begin\r
- APieces.Clear;\r
- r := TRegExpr.Create;\r
- try\r
- r.Expression := ARegExpr;\r
- r.Split (AInputStr, APieces);\r
- finally r.Free;\r
- end;\r
- end; { of procedure SplitRegExpr\r
---------------------------------------------------------------}\r
-\r
-function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString;\r
- AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString;\r
- begin\r
- with TRegExpr.Create do try\r
- Expression := ARegExpr;\r
- Result := Replace (AInputStr, AReplaceStr, AUseSubstitution);\r
- finally Free;\r
- end;\r
- end; { of function ReplaceRegExpr\r
---------------------------------------------------------------}\r
-\r
-function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;\r
- const\r
- RegExprMetaSet : RegExprString = '^$.[()|?+*'+EscChar+'{'\r
- + ']}'; // - this last are additional to META.\r
- // Very similar to META array, but slighly changed.\r
- // !Any changes in META array must be synchronized with this set.\r
- var\r
- i, i0, Len : integer;\r
- begin\r
- Result := '';\r
- Len := length (AStr);\r
- i := 1;\r
- i0 := i;\r
- while i <= Len do begin\r
- if Pos (AStr [i], RegExprMetaSet) > 0 then begin\r
- Result := Result + System.Copy (AStr, i0, i - i0)\r
- + EscChar + AStr [i];\r
- i0 := i + 1;\r
- end;\r
- inc (i);\r
- end;\r
- Result := Result + System.Copy (AStr, i0, MaxInt); // Tail\r
- end; { of function QuoteRegExprMetaChars\r
---------------------------------------------------------------}\r
-\r
-function RegExprSubExpressions (const ARegExpr : string;\r
- ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : integer;\r
- type\r
- TStackItemRec = record //###0.945\r
- SubExprIdx : integer;\r
- StartPos : integer;\r
- end;\r
- TStackArray = packed array [0 .. NSUBEXPMAX - 1] of TStackItemRec;\r
- var\r
- Len, SubExprLen : integer;\r
- i, i0 : integer;\r
- Modif : integer;\r
- Stack : ^TStackArray; //###0.945\r
- StackIdx, StackSz : integer;\r
- begin\r
- Result := 0; // no unbalanced brackets found at this very moment\r
-\r
- ASubExprs.Clear; // I don't think that adding to non empty list\r
- // can be usefull, so I simplified algorithm to work only with empty list\r
-\r
- Len := length (ARegExpr); // some optimization tricks\r
-\r
- // first we have to calculate number of subexpression to reserve\r
- // space in Stack array (may be we'll reserve more then need, but\r
- // it's faster then memory reallocation during parsing)\r
- StackSz := 1; // add 1 for entire r.e.\r
- for i := 1 to Len do\r
- if ARegExpr [i] = '('\r
- then inc (StackSz);\r
-// SetLength (Stack, StackSz); //###0.945\r
- GetMem (Stack, SizeOf (TStackItemRec) * StackSz);\r
- try\r
-\r
- StackIdx := 0;\r
- i := 1;\r
- while (i <= Len) do begin\r
- case ARegExpr [i] of\r
- '(': begin\r
- if (i < Len) and (ARegExpr [i + 1] = '?') then begin\r
- // this is not subexpression, but comment or other\r
- // Perl extension. We must check is it (?ismxrg-ismxrg)\r
- // and change AExtendedSyntax if /x is changed.\r
- inc (i, 2); // skip '(?'\r
- i0 := i;\r
- while (i <= Len) and (ARegExpr [i] <> ')')\r
- do inc (i);\r
- if i > Len\r
- then Result := -1 // unbalansed '('\r
- else\r
- if TRegExpr.ParseModifiersStr (System.Copy (ARegExpr, i, i - i0), Modif)\r
- then AExtendedSyntax := (Modif and MaskModX) <> 0;\r
- end\r
- else begin // subexpression starts\r
- ASubExprs.Add (''); // just reserve space\r
- with Stack [StackIdx] do begin\r
- SubExprIdx := ASubExprs.Count - 1;\r
- StartPos := i;\r
- end;\r
- inc (StackIdx);\r
- end;\r
- end;\r
- ')': begin\r
- if StackIdx = 0\r
- then Result := i // unbalanced ')'\r
- else begin\r
- dec (StackIdx);\r
- with Stack [StackIdx] do begin\r
- SubExprLen := i - StartPos + 1;\r
- ASubExprs.Objects [SubExprIdx] :=\r
- TObject (StartPos or (SubExprLen ShL 16));\r
- ASubExprs [SubExprIdx] := System.Copy (\r
- ARegExpr, StartPos + 1, SubExprLen - 2); // add without brackets\r
- end;\r
- end;\r
- end;\r
- EscChar: inc (i); // skip quoted symbol\r
- '[': begin\r
- // we have to skip character ranges at once, because they can\r
- // contain '#', and '#' in it must NOT be recognized as eXtended\r
- // comment beginning!\r
- i0 := i;\r
- inc (i);\r
- if ARegExpr [i] = ']' // cannot be 'emty' ranges - this interpretes\r
- then inc (i); // as ']' by itself\r
- while (i <= Len) and (ARegExpr [i] <> ']') do\r
- if ARegExpr [i] = EscChar //###0.942\r
- then inc (i, 2) // skip 'escaped' char to prevent stopping at '\]'\r
- else inc (i);\r
- if (i > Len) or (ARegExpr [i] <> ']') //###0.942\r
- then Result := - (i0 + 1); // unbalansed '[' //###0.942\r
- end;\r
- '#': if AExtendedSyntax then begin\r
- // skip eXtended comments\r
- while (i <= Len) and (ARegExpr [i] <> #$d) and (ARegExpr [i] <> #$a)\r
- // do not use [#$d, #$a] due to UniCode compatibility\r
- do inc (i);\r
- while (i + 1 <= Len) and ((ARegExpr [i + 1] = #$d) or (ARegExpr [i + 1] = #$a))\r
- do inc (i); // attempt to work with different kinds of line separators\r
- // now we are at the line separator that must be skipped.\r
- end;\r
- // here is no 'else' clause - we simply skip ordinary chars\r
- end; // of case\r
- inc (i); // skip scanned char\r
- // ! can move after Len due to skipping quoted symbol\r
- end;\r
-\r
- // check brackets balance\r
- if StackIdx <> 0\r
- then Result := -1; // unbalansed '('\r
-\r
- // check if entire r.e. added\r
- if (ASubExprs.Count = 0)\r
- or ((integer (ASubExprs.Objects [0]) and $FFFF) <> 1)\r
- or (((integer (ASubExprs.Objects [0]) ShR 16) and $FFFF) <> Len)\r
- // whole r.e. wasn't added because it isn't bracketed\r
- // well, we add it now:\r
- then ASubExprs.InsertObject (0, ARegExpr, TObject ((Len ShL 16) or 1));\r
-\r
- finally FreeMem (Stack);\r
- end;\r
- end; { of function RegExprSubExpressions\r
---------------------------------------------------------------}\r
-\r
-\r
-\r
-const\r
- MAGIC = TREOp (216);// programm signature\r
-\r
-// name opcode opnd? meaning\r
- EEND = TREOp (0); // - End of program\r
- BOL = TREOp (1); // - Match "" at beginning of line\r
- EOL = TREOp (2); // - Match "" at end of line\r
- ANY = TREOp (3); // - Match any one character\r
- ANYOF = TREOp (4); // Str Match any character in string Str\r
- ANYBUT = TREOp (5); // Str Match any char. not in string Str\r
- BRANCH = TREOp (6); // Node Match this alternative, or the next\r
- BACK = TREOp (7); // - Jump backward (Next < 0)\r
- EXACTLY = TREOp (8); // Str Match string Str\r
- NOTHING = TREOp (9); // - Match empty string\r
- STAR = TREOp (10); // Node Match this (simple) thing 0 or more times\r
- PLUS = TREOp (11); // Node Match this (simple) thing 1 or more times\r
- ANYDIGIT = TREOp (12); // - Match any digit (equiv [0-9])\r
- NOTDIGIT = TREOp (13); // - Match not digit (equiv [0-9])\r
- ANYLETTER = TREOp (14); // - Match any letter from property WordChars\r
- NOTLETTER = TREOp (15); // - Match not letter from property WordChars\r
- ANYSPACE = TREOp (16); // - Match any space char (see property SpaceChars)\r
- NOTSPACE = TREOp (17); // - Match not space char (see property SpaceChars)\r
- BRACES = TREOp (18); // Node,Min,Max Match this (simple) thing from Min to Max times.\r
- // Min and Max are TREBracesArg\r
- COMMENT = TREOp (19); // - Comment ;)\r
- EXACTLYCI = TREOp (20); // Str Match string Str case insensitive\r
- ANYOFCI = TREOp (21); // Str Match any character in string Str, case insensitive\r
- ANYBUTCI = TREOp (22); // Str Match any char. not in string Str, case insensitive\r
- LOOPENTRY = TREOp (23); // Node Start of loop (Node - LOOP for this loop)\r
- LOOP = TREOp (24); // Node,Min,Max,LoopEntryJmp - back jump for LOOPENTRY.\r
- // Min and Max are TREBracesArg\r
- // Node - next node in sequence,\r
- // LoopEntryJmp - associated LOOPENTRY node addr\r
- ANYOFTINYSET= TREOp (25); // Chrs Match any one char from Chrs (exactly TinySetLen chars)\r
- ANYBUTTINYSET=TREOp (26); // Chrs Match any one char not in Chrs (exactly TinySetLen chars)\r
- ANYOFFULLSET= TREOp (27); // Set Match any one char from set of char\r
- // - very fast (one CPU instruction !) but takes 32 bytes of p-code\r
- BSUBEXP = TREOp (28); // Idx Match previously matched subexpression #Idx (stored as REChar) //###0.936\r
- BSUBEXPCI = TREOp (29); // Idx -"- in case-insensitive mode\r
-\r
- // Non-Greedy Style Ops //###0.940\r
- STARNG = TREOp (30); // Same as START but in non-greedy mode\r
- PLUSNG = TREOp (31); // Same as PLUS but in non-greedy mode\r
- BRACESNG = TREOp (32); // Same as BRACES but in non-greedy mode\r
- LOOPNG = TREOp (33); // Same as LOOP but in non-greedy mode\r
-\r
- // Multiline mode \m\r
- BOLML = TREOp (34); // - Match "" at beginning of line\r
- EOLML = TREOp (35); // - Match "" at end of line\r
- ANYML = TREOp (36); // - Match any one character\r
-\r
- // Word boundary\r
- BOUND = TREOp (37); // Match "" between words //###0.943\r
- NOTBOUND = TREOp (38); // Match "" not between words //###0.943\r
-\r
- // !!! Change OPEN value if you add new opcodes !!!\r
-\r
- OPEN = TREOp (39); // - Mark this point in input as start of \n\r
- // OPEN + 1 is \1, etc.\r
- CLOSE = TREOp (ord (OPEN) + NSUBEXP);\r
- // - Analogous to OPEN.\r
-\r
- // !!! Don't add new OpCodes after CLOSE !!!\r
-\r
-// We work with p-code thru pointers, compatible with PRegExprChar.\r
-// Note: all code components (TRENextOff, TREOp, TREBracesArg, etc)\r
-// must have lengths that can be divided by SizeOf (REChar) !\r
-// A node is TREOp of opcode followed Next "pointer" of TRENextOff type.\r
-// The Next is a offset from the opcode of the node containing it.\r
-// An operand, if any, simply follows the node. (Note that much of\r
-// the code generation knows about this implicit relationship!)\r
-// Using TRENextOff=integer speed up p-code processing.\r
-\r
-// Opcodes description:\r
-//\r
-// BRANCH The set of branches constituting a single choice are hooked\r
-// together with their "next" pointers, since precedence prevents\r
-// anything being concatenated to any individual branch. The\r
-// "next" pointer of the last BRANCH in a choice points to the\r
-// thing following the whole choice. This is also where the\r
-// final "next" pointer of each individual branch points; each\r
-// branch starts with the operand node of a BRANCH node.\r
-// BACK Normal "next" pointers all implicitly point forward; BACK\r
-// exists to make loop structures possible.\r
-// STAR,PLUS,BRACES '?', and complex '*' and '+', are implemented as\r
-// circular BRANCH structures using BACK. Complex '{min,max}'\r
-// - as pair LOOPENTRY-LOOP (see below). Simple cases (one\r
-// character per match) are implemented with STAR, PLUS and\r
-// BRACES for speed and to minimize recursive plunges.\r
-// LOOPENTRY,LOOP {min,max} are implemented as special pair\r
-// LOOPENTRY-LOOP. Each LOOPENTRY initialize loopstack for\r
-// current level.\r
-// OPEN,CLOSE are numbered at compile time.\r
-\r
-\r
-{=============================================================}\r
-{================== Error handling section ===================}\r
-{=============================================================}\r
-\r
-const\r
- reeOk = 0;\r
- reeCompNullArgument = 100;\r
- reeCompRegexpTooBig = 101;\r
- reeCompParseRegTooManyBrackets = 102;\r
- reeCompParseRegUnmatchedBrackets = 103;\r
- reeCompParseRegUnmatchedBrackets2 = 104;\r
- reeCompParseRegJunkOnEnd = 105;\r
- reePlusStarOperandCouldBeEmpty = 106;\r
- reeNestedSQP = 107;\r
- reeBadHexDigit = 108;\r
- reeInvalidRange = 109;\r
- reeParseAtomTrailingBackSlash = 110;\r
- reeNoHexCodeAfterBSlashX = 111;\r
- reeHexCodeAfterBSlashXTooBig = 112;\r
- reeUnmatchedSqBrackets = 113;\r
- reeInternalUrp = 114;\r
- reeQPSBFollowsNothing = 115;\r
- reeTrailingBackSlash = 116;\r
- reeRarseAtomInternalDisaster = 119;\r
- reeBRACESArgTooBig = 122;\r
- reeBracesMinParamGreaterMax = 124;\r
- reeUnclosedComment = 125;\r
- reeComplexBracesNotImplemented = 126;\r
- reeUrecognizedModifier = 127;\r
- reeBadLinePairedSeparator = 128;\r
- reeRegRepeatCalledInappropriately = 1000;\r
- reeMatchPrimMemoryCorruption = 1001;\r
- reeMatchPrimCorruptedPointers = 1002;\r
- reeNoExpression = 1003;\r
- reeCorruptedProgram = 1004;\r
- reeNoInpitStringSpecified = 1005;\r
- reeOffsetMustBeGreaterThen0 = 1006;\r
- reeExecNextWithoutExec = 1007;\r
- reeGetInputStringWithoutInputString = 1008;\r
- reeDumpCorruptedOpcode = 1011;\r
- reeModifierUnsupported = 1013;\r
- reeLoopStackExceeded = 1014;\r
- reeLoopWithoutEntry = 1015;\r
- reeBadPCodeImported = 2000;\r
-\r
-function TRegExpr.ErrorMsg (AErrorID : integer) : RegExprString;\r
- begin\r
- case AErrorID of\r
- reeOk: Result := 'No errors';\r
- reeCompNullArgument: Result := 'TRegExpr(comp): Null Argument';\r
- reeCompRegexpTooBig: Result := 'TRegExpr(comp): Regexp Too Big';\r
- reeCompParseRegTooManyBrackets: Result := 'TRegExpr(comp): ParseReg Too Many ()';\r
- reeCompParseRegUnmatchedBrackets: Result := 'TRegExpr(comp): ParseReg Unmatched ()';\r
- reeCompParseRegUnmatchedBrackets2: Result := 'TRegExpr(comp): ParseReg Unmatched ()';\r
- reeCompParseRegJunkOnEnd: Result := 'TRegExpr(comp): ParseReg Junk On End';\r
- reePlusStarOperandCouldBeEmpty: Result := 'TRegExpr(comp): *+ Operand Could Be Empty';\r
- reeNestedSQP: Result := 'TRegExpr(comp): Nested *?+';\r
- reeBadHexDigit: Result := 'TRegExpr(comp): Bad Hex Digit';\r
- reeInvalidRange: Result := 'TRegExpr(comp): Invalid [] Range';\r
- reeParseAtomTrailingBackSlash: Result := 'TRegExpr(comp): Parse Atom Trailing \';\r
- reeNoHexCodeAfterBSlashX: Result := 'TRegExpr(comp): No Hex Code After \x';\r
- reeHexCodeAfterBSlashXTooBig: Result := 'TRegExpr(comp): Hex Code After \x Is Too Big';\r
- reeUnmatchedSqBrackets: Result := 'TRegExpr(comp): Unmatched []';\r
- reeInternalUrp: Result := 'TRegExpr(comp): Internal Urp';\r
- reeQPSBFollowsNothing: Result := 'TRegExpr(comp): ?+*{ Follows Nothing';\r
- reeTrailingBackSlash: Result := 'TRegExpr(comp): Trailing \';\r
- reeRarseAtomInternalDisaster: Result := 'TRegExpr(comp): RarseAtom Internal Disaster';\r
- reeBRACESArgTooBig: Result := 'TRegExpr(comp): BRACES Argument Too Big';\r
- reeBracesMinParamGreaterMax: Result := 'TRegExpr(comp): BRACE Min Param Greater then Max';\r
- reeUnclosedComment: Result := 'TRegExpr(comp): Unclosed (?#Comment)';\r
- reeComplexBracesNotImplemented: Result := 'TRegExpr(comp): If you want take part in beta-testing BRACES ''{min,max}'' and non-greedy ops ''*?'', ''+?'', ''??'' for complex cases - remove ''.'' from {.$DEFINE ComplexBraces}';\r
- reeUrecognizedModifier: Result := 'TRegExpr(comp): Urecognized Modifier';\r
- reeBadLinePairedSeparator: Result := 'TRegExpr(comp): LinePairedSeparator must countain two different chars or no chars at all';\r
-\r
- reeRegRepeatCalledInappropriately: Result := 'TRegExpr(exec): RegRepeat Called Inappropriately';\r
- reeMatchPrimMemoryCorruption: Result := 'TRegExpr(exec): MatchPrim Memory Corruption';\r
- reeMatchPrimCorruptedPointers: Result := 'TRegExpr(exec): MatchPrim Corrupted Pointers';\r
- reeNoExpression: Result := 'TRegExpr(exec): Not Assigned Expression Property';\r
- reeCorruptedProgram: Result := 'TRegExpr(exec): Corrupted Program';\r
- reeNoInpitStringSpecified: Result := 'TRegExpr(exec): No Input String Specified';\r
- reeOffsetMustBeGreaterThen0: Result := 'TRegExpr(exec): Offset Must Be Greater Then 0';\r
- reeExecNextWithoutExec: Result := 'TRegExpr(exec): ExecNext Without Exec[Pos]';\r
- reeGetInputStringWithoutInputString: Result := 'TRegExpr(exec): GetInputString Without InputString';\r
- reeDumpCorruptedOpcode: Result := 'TRegExpr(dump): Corrupted Opcode';\r
- reeLoopStackExceeded: Result := 'TRegExpr(exec): Loop Stack Exceeded';\r
- reeLoopWithoutEntry: Result := 'TRegExpr(exec): Loop Without LoopEntry !';\r
-\r
- reeBadPCodeImported: Result := 'TRegExpr(misc): Bad p-code imported';\r
- else Result := 'Unknown error';\r
- end;\r
- end; { of procedure TRegExpr.Error\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.LastError : integer;\r
- begin\r
- Result := fLastError;\r
- fLastError := reeOk;\r
- end; { of function TRegExpr.LastError\r
---------------------------------------------------------------}\r
-\r
-\r
-{=============================================================}\r
-{===================== Common section ========================}\r
-{=============================================================}\r
-\r
-class function TRegExpr.VersionMajor : integer; //###0.944\r
- begin\r
- Result := TRegExprVersionMajor;\r
- end; { of class function TRegExpr.VersionMajor\r
---------------------------------------------------------------}\r
-\r
-class function TRegExpr.VersionMinor : integer; //###0.944\r
- begin\r
- Result := TRegExprVersionMinor;\r
- end; { of class function TRegExpr.VersionMinor\r
---------------------------------------------------------------}\r
-\r
-constructor TRegExpr.Create;\r
- begin\r
- inherited;\r
- programm := nil;\r
- fExpression := nil;\r
- fInputString := nil;\r
-\r
- regexpbeg := nil;\r
- fExprIsCompiled := false;\r
-\r
- ModifierI := RegExprModifierI;\r
- ModifierR := RegExprModifierR;\r
- ModifierS := RegExprModifierS;\r
- ModifierG := RegExprModifierG;\r
- ModifierM := RegExprModifierM; //###0.940\r
-\r
- SpaceChars := RegExprSpaceChars; //###0.927\r
- WordChars := RegExprWordChars; //###0.929\r
- fInvertCase := RegExprInvertCaseFunction; //###0.927\r
-\r
- fLineSeparators := RegExprLineSeparators; //###0.941\r
- LinePairedSeparator := RegExprLinePairedSeparator; //###0.941\r
- end; { of constructor TRegExpr.Create\r
---------------------------------------------------------------}\r
-\r
-destructor TRegExpr.Destroy;\r
- begin\r
- if programm <> nil\r
- then FreeMem (programm);\r
- if fExpression <> nil\r
- then FreeMem (fExpression);\r
- if fInputString <> nil\r
- then FreeMem (fInputString);\r
- end; { of destructor TRegExpr.Destroy\r
---------------------------------------------------------------}\r
-\r
-class function TRegExpr.InvertCaseFunction (const Ch : REChar) : REChar;\r
- begin\r
- {$IFDEF UniCode}\r
- if Ch >= #128\r
- then Result := Ch\r
- else\r
- {$ENDIF}\r
- begin\r
- Result := {$IFDEF FPC}AnsiUpperCase (Ch) [1]{$ELSE} REChar (CharUpper (PChar (Ch))){$ENDIF};\r
- if Result = Ch\r
- then Result := {$IFDEF FPC}AnsiLowerCase (Ch) [1]{$ELSE} REChar (CharLower (PChar (Ch))){$ENDIF};\r
- end;\r
- end; { of function TRegExpr.InvertCaseFunction\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetExpression : RegExprString;\r
- begin\r
- if fExpression <> nil\r
- then Result := fExpression\r
- else Result := '';\r
- end; { of function TRegExpr.GetExpression\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.SetExpression (const s : RegExprString);\r
- var\r
- Len : integer; //###0.950\r
- begin\r
- if (s <> fExpression) or not fExprIsCompiled then begin\r
- fExprIsCompiled := false;\r
- if fExpression <> nil then begin\r
- FreeMem (fExpression);\r
- fExpression := nil;\r
- end;\r
- if s <> '' then begin\r
- Len := length (s); //###0.950\r
- GetMem (fExpression, (Len + 1) * SizeOf (REChar));\r
-// StrPCopy (fExpression, s); //###0.950 replaced due to StrPCopy limitation of 255 chars\r
- {$IFDEF UniCode}\r
- StrPCopy (fExpression, Copy (s, 1, Len)); //###0.950\r
- {$ELSE}\r
- StrLCopy (fExpression, PRegExprChar (s), Len); //###0.950\r
- {$ENDIF UniCode}\r
-\r
- InvalidateProgramm; //###0.941\r
- end;\r
- end;\r
- end; { of procedure TRegExpr.SetExpression\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetSubExprMatchCount : integer;\r
- begin\r
- if Assigned (fInputString) then begin\r
- Result := NSUBEXP - 1;\r
- while (Result > 0) and ((startp [Result] = nil)\r
- or (endp [Result] = nil))\r
- do dec (Result);\r
- end\r
- else Result := -1;\r
- end; { of function TRegExpr.GetSubExprMatchCount\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetMatchPos (Idx : integer) : integer;\r
- begin\r
- if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)\r
- and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin\r
- Result := (startp [Idx] - fInputString) + 1;\r
- end\r
- else Result := -1;\r
- end; { of function TRegExpr.GetMatchPos\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetMatchLen (Idx : integer) : integer;\r
- begin\r
- if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)\r
- and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin\r
- Result := endp [Idx] - startp [Idx];\r
- end\r
- else Result := -1;\r
- end; { of function TRegExpr.GetMatchLen\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetMatch (Idx : integer) : RegExprString;\r
- begin\r
- if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)\r
- and Assigned (startp [Idx]) and Assigned (endp [Idx])\r
- //then Result := copy (fInputString, MatchPos [Idx], MatchLen [Idx]) //###0.929\r
- then SetString (Result, startp [idx], endp [idx] - startp [idx])\r
- else Result := '';\r
- end; { of function TRegExpr.GetMatch\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetModifierStr : RegExprString;\r
- begin\r
- Result := '-';\r
-\r
- if ModifierI\r
- then Result := 'i' + Result\r
- else Result := Result + 'i';\r
- if ModifierR\r
- then Result := 'r' + Result\r
- else Result := Result + 'r';\r
- if ModifierS\r
- then Result := 's' + Result\r
- else Result := Result + 's';\r
- if ModifierG\r
- then Result := 'g' + Result\r
- else Result := Result + 'g';\r
- if ModifierM\r
- then Result := 'm' + Result\r
- else Result := Result + 'm';\r
- if ModifierX\r
- then Result := 'x' + Result\r
- else Result := Result + 'x';\r
-\r
- if Result [length (Result)] = '-' // remove '-' if all modifiers are 'On'\r
- then System.Delete (Result, length (Result), 1);\r
- end; { of function TRegExpr.GetModifierStr\r
---------------------------------------------------------------}\r
-\r
-class function TRegExpr.ParseModifiersStr (const AModifiers : RegExprString;\r
-var AModifiersInt : integer) : boolean;\r
-// !!! Be carefull - this is class function and must not use object instance fields\r
- var\r
- i : integer;\r
- IsOn : boolean;\r
- Mask : integer;\r
- begin\r
- Result := true;\r
- IsOn := true;\r
- Mask := 0; // prevent compiler warning\r
- for i := 1 to length (AModifiers) do\r
- if AModifiers [i] = '-'\r
- then IsOn := false\r
- else begin\r
- if Pos (AModifiers [i], 'iI') > 0\r
- then Mask := MaskModI\r
- else if Pos (AModifiers [i], 'rR') > 0\r
- then Mask := MaskModR\r
- else if Pos (AModifiers [i], 'sS') > 0\r
- then Mask := MaskModS\r
- else if Pos (AModifiers [i], 'gG') > 0\r
- then Mask := MaskModG\r
- else if Pos (AModifiers [i], 'mM') > 0\r
- then Mask := MaskModM\r
- else if Pos (AModifiers [i], 'xX') > 0\r
- then Mask := MaskModX\r
- else begin\r
- Result := false;\r
- EXIT;\r
- end;\r
- if IsOn\r
- then AModifiersInt := AModifiersInt or Mask\r
- else AModifiersInt := AModifiersInt and not Mask;\r
- end;\r
- end; { of function TRegExpr.ParseModifiersStr\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.SetModifierStr (const AModifiers : RegExprString);\r
- begin\r
- if not ParseModifiersStr (AModifiers, fModifiers)\r
- then Error (reeModifierUnsupported);\r
- end; { of procedure TRegExpr.SetModifierStr\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetModifier (AIndex : integer) : boolean;\r
- var\r
- Mask : integer;\r
- begin\r
- Result := false;\r
- case AIndex of\r
- 1: Mask := MaskModI;\r
- 2: Mask := MaskModR;\r
- 3: Mask := MaskModS;\r
- 4: Mask := MaskModG;\r
- 5: Mask := MaskModM;\r
- 6: Mask := MaskModX;\r
- else begin\r
- Error (reeModifierUnsupported);\r
- EXIT;\r
- end;\r
- end;\r
- Result := (fModifiers and Mask) <> 0;\r
- end; { of function TRegExpr.GetModifier\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.SetModifier (AIndex : integer; ASet : boolean);\r
- var\r
- Mask : integer;\r
- begin\r
- case AIndex of\r
- 1: Mask := MaskModI;\r
- 2: Mask := MaskModR;\r
- 3: Mask := MaskModS;\r
- 4: Mask := MaskModG;\r
- 5: Mask := MaskModM;\r
- 6: Mask := MaskModX;\r
- else begin\r
- Error (reeModifierUnsupported);\r
- EXIT;\r
- end;\r
- end;\r
- if ASet\r
- then fModifiers := fModifiers or Mask\r
- else fModifiers := fModifiers and not Mask;\r
- end; { of procedure TRegExpr.SetModifier\r
---------------------------------------------------------------}\r
-\r
-\r
-{=============================================================}\r
-{==================== Compiler section =======================}\r
-{=============================================================}\r
-\r
-procedure TRegExpr.InvalidateProgramm;\r
- begin\r
- if programm <> nil then begin\r
- FreeMem (programm);\r
- programm := nil;\r
- end;\r
- end; { of procedure TRegExpr.InvalidateProgramm\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.Compile; //###0.941\r
- begin\r
- if fExpression = nil then begin // No Expression assigned\r
- Error (reeNoExpression);\r
- EXIT;\r
- end;\r
- CompileRegExpr (fExpression);\r
- end; { of procedure TRegExpr.Compile\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.IsProgrammOk : boolean;\r
- {$IFNDEF UniCode}\r
- var\r
- i : integer;\r
- {$ENDIF}\r
- begin\r
- Result := false;\r
-\r
- // check modifiers\r
- if fModifiers <> fProgModifiers //###0.941\r
- then InvalidateProgramm;\r
-\r
- // can we optimize line separators by using sets?\r
- {$IFNDEF UniCode}\r
- fLineSeparatorsSet := [];\r
- for i := 1 to length (fLineSeparators)\r
- do System.Include (fLineSeparatorsSet, fLineSeparators [i]);\r
- {$ENDIF}\r
-\r
- // [Re]compile if needed\r
- if programm = nil\r
- then Compile; //###0.941\r
-\r
- // check [re]compiled programm\r
- if programm = nil\r
- then EXIT // error was set/raised by Compile (was reeExecAfterCompErr)\r
- else if programm [0] <> MAGIC // Program corrupted.\r
- then Error (reeCorruptedProgram)\r
- else Result := true;\r
- end; { of function TRegExpr.IsProgrammOk\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);\r
-// set the next-pointer at the end of a node chain\r
- var\r
- scan : PRegExprChar;\r
- temp : PRegExprChar;\r
-// i : int64;\r
- begin\r
- if p = @regdummy\r
- then EXIT;\r
- // Find last node.\r
- scan := p;\r
- REPEAT\r
- temp := regnext (scan);\r
- if temp = nil\r
- then BREAK;\r
- scan := temp;\r
- UNTIL false;\r
- // Set Next 'pointer'\r
- if val < scan\r
- then PRENextOff (scan + REOpSz)^ := - (scan - val) //###0.948\r
- // work around PWideChar subtraction bug (Delphi uses\r
- // shr after subtraction to calculate widechar distance %-( )\r
- // so, if difference is negative we have .. the "feature" :(\r
- // I could wrap it in $IFDEF UniCode, but I didn't because\r
- // "P \96 Q computes the difference between the address given\r
- // by P (the higher address) and the address given by Q (the\r
- // lower address)" - Delphi help quotation.\r
- else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933\r
- end; { of procedure TRegExpr.Tail\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.OpTail (p : PRegExprChar; val : PRegExprChar);\r
-// regtail on operand of first argument; nop if operandless\r
- begin\r
- // "Operandless" and "op != BRANCH" are synonymous in practice.\r
- if (p = nil) or (p = @regdummy) or (PREOp (p)^ <> BRANCH)\r
- then EXIT;\r
- Tail (p + REOpSz + RENextOffSz, val); //###0.933\r
- end; { of procedure TRegExpr.OpTail\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933\r
-// emit a node, return location\r
- begin\r
- Result := regcode;\r
- if Result <> @regdummy then begin\r
- PREOp (regcode)^ := op;\r
- inc (regcode, REOpSz);\r
- PRENextOff (regcode)^ := 0; // Next "pointer" := nil\r
- inc (regcode, RENextOffSz);\r
- end\r
- else inc (regsize, REOpSz + RENextOffSz); // compute code size without code generation\r
- end; { of function TRegExpr.EmitNode\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.EmitC (b : REChar);\r
-// emit a byte to code\r
- begin\r
- if regcode <> @regdummy then begin\r
- regcode^ := b;\r
- inc (regcode);\r
- end\r
- else inc (regsize); // Type of p-code pointer always is ^REChar\r
- end; { of procedure TRegExpr.EmitC\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer);\r
-// insert an operator in front of already-emitted operand\r
-// Means relocating the operand.\r
- var\r
- src, dst, place : PRegExprChar;\r
- i : integer;\r
- begin\r
- if regcode = @regdummy then begin\r
- inc (regsize, sz);\r
- EXIT;\r
- end;\r
- src := regcode;\r
- inc (regcode, sz);\r
- dst := regcode;\r
- while src > opnd do begin\r
- dec (dst);\r
- dec (src);\r
- dst^ := src^;\r
- end;\r
- place := opnd; // Op node, where operand used to be.\r
- PREOp (place)^ := op;\r
- inc (place, REOpSz);\r
- for i := 1 + REOpSz to sz do begin\r
- place^ := #0;\r
- inc (place);\r
- end;\r
- end; { of procedure TRegExpr.InsertOperator\r
---------------------------------------------------------------}\r
-\r
-function strcspn (s1 : PRegExprChar; s2 : PRegExprChar) : integer;\r
-// find length of initial segment of s1 consisting\r
-// entirely of characters not from s2\r
- var scan1, scan2 : PRegExprChar;\r
- begin\r
- Result := 0;\r
- scan1 := s1;\r
- while scan1^ <> #0 do begin\r
- scan2 := s2;\r
- while scan2^ <> #0 do\r
- if scan1^ = scan2^\r
- then EXIT\r
- else inc (scan2);\r
- inc (Result);\r
- inc (scan1)\r
- end;\r
- end; { of function strcspn\r
---------------------------------------------------------------}\r
-\r
-const\r
-// Flags to be passed up and down.\r
- HASWIDTH = 01; // Known never to match nil string.\r
- SIMPLE = 02; // Simple enough to be STAR/PLUS/BRACES operand.\r
- SPSTART = 04; // Starts with * or +.\r
- WORST = 0; // Worst case.\r
- META : array [0 .. 12] of REChar = (\r
- '^', '$', '.', '[', '(', ')', '|', '?', '+', '*', EscChar, '{', #0);\r
- // Any modification must be synchronized with QuoteRegExprMetaChars !!!\r
-\r
-{$IFDEF UniCode}\r
- RusRangeLo : array [0 .. 33] of REChar =\r
- (#$430,#$431,#$432,#$433,#$434,#$435,#$451,#$436,#$437,\r
- #$438,#$439,#$43A,#$43B,#$43C,#$43D,#$43E,#$43F,\r
- #$440,#$441,#$442,#$443,#$444,#$445,#$446,#$447,\r
- #$448,#$449,#$44A,#$44B,#$44C,#$44D,#$44E,#$44F,#0);\r
- RusRangeHi : array [0 .. 33] of REChar =\r
- (#$410,#$411,#$412,#$413,#$414,#$415,#$401,#$416,#$417,\r
- #$418,#$419,#$41A,#$41B,#$41C,#$41D,#$41E,#$41F,\r
- #$420,#$421,#$422,#$423,#$424,#$425,#$426,#$427,\r
- #$428,#$429,#$42A,#$42B,#$42C,#$42D,#$42E,#$42F,#0);\r
- RusRangeLoLow = #$430{'à'};\r
- RusRangeLoHigh = #$44F{'ÿ'};\r
- RusRangeHiLow = #$410{'À'};\r
- RusRangeHiHigh = #$42F{'ß'};\r
-{$ELSE}\r
- RusRangeLo = 'àáâãäå¸æçèéêëìíîïðñòóôõö÷øùúûüýþÿ';\r
- RusRangeHi = 'ÀÁÂÃÄŨÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß';\r
- RusRangeLoLow = 'à';\r
- RusRangeLoHigh = 'ÿ';\r
- RusRangeHiLow = 'À';\r
- RusRangeHiHigh = 'ß';\r
-{$ENDIF}\r
-\r
-function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;\r
-// compile a regular expression into internal code\r
-// We can't allocate space until we know how big the compiled form will be,\r
-// but we can't compile it (and thus know how big it is) until we've got a\r
-// place to put the code. So we cheat: we compile it twice, once with code\r
-// generation turned off and size counting turned on, and once "for real".\r
-// This also means that we don't allocate space until we are sure that the\r
-// thing really will compile successfully, and we never have to move the\r
-// code and thus invalidate pointers into it. (Note that it has to be in\r
-// one piece because free() must be able to free it all.)\r
-// Beware that the optimization-preparation code in here knows about some\r
-// of the structure of the compiled regexp.\r
- var\r
- scan, longest : PRegExprChar;\r
- len : cardinal;\r
- flags : integer;\r
- begin\r
- Result := false; // life too dark\r
-\r
- regparse := nil; // for correct error handling\r
- regexpbeg := exp;\r
- try\r
-\r
- if programm <> nil then begin\r
- FreeMem (programm);\r
- programm := nil;\r
- end;\r
-\r
- if exp = nil then begin\r
- Error (reeCompNullArgument);\r
- EXIT;\r
- end;\r
-\r
- fProgModifiers := fModifiers;\r
- // well, may it's paranoia. I'll check it later... !!!!!!!!\r
-\r
- // First pass: determine size, legality.\r
- fCompModifiers := fModifiers;\r
- regparse := exp;\r
- regnpar := 1;\r
- regsize := 0;\r
- regcode := @regdummy;\r
- EmitC (MAGIC);\r
- if ParseReg (0, flags) = nil\r
- then EXIT;\r
-\r
- // Small enough for 2-bytes programm pointers ?\r
- // ###0.933 no real p-code length limits now :)))\r
-// if regsize >= 64 * 1024 then begin\r
-// Error (reeCompRegexpTooBig);\r
-// EXIT;\r
-// end;\r
-\r
- // Allocate space.\r
- GetMem (programm, regsize * SizeOf (REChar));\r
-\r
- // Second pass: emit code.\r
- fCompModifiers := fModifiers;\r
- regparse := exp;\r
- regnpar := 1;\r
- regcode := programm;\r
- EmitC (MAGIC);\r
- if ParseReg (0, flags) = nil\r
- then EXIT;\r
-\r
- // Dig out information for optimizations.\r
- {$IFDEF UseFirstCharSet} //###0.929\r
- FirstCharSet := [];\r
- FillFirstCharSet (programm + REOpSz);\r
- {$ENDIF}\r
- regstart := #0; // Worst-case defaults.\r
- reganch := #0;\r
- regmust := nil;\r
- regmlen := 0;\r
- scan := programm + REOpSz; // First BRANCH.\r
- if PREOp (regnext (scan))^ = EEND then begin // Only one top-level choice.\r
- scan := scan + REOpSz + RENextOffSz;\r
-\r
- // Starting-point info.\r
- if PREOp (scan)^ = EXACTLY\r
- then regstart := (scan + REOpSz + RENextOffSz)^\r
- else if PREOp (scan)^ = BOL\r
- then inc (reganch);\r
-\r
- // If there's something expensive in the r.e., find the longest\r
- // literal string that must appear and make it the regmust. Resolve\r
- // ties in favor of later strings, since the regstart check works\r
- // with the beginning of the r.e. and avoiding duplication\r
- // strengthens checking. Not a strong reason, but sufficient in the\r
- // absence of others.\r
- if (flags and SPSTART) <> 0 then begin\r
- longest := nil;\r
- len := 0;\r
- while scan <> nil do begin\r
- if (PREOp (scan)^ = EXACTLY)\r
- and (strlen (scan + REOpSz + RENextOffSz) >= len) then begin\r
- longest := scan + REOpSz + RENextOffSz;\r
- len := strlen (longest);\r
- end;\r
- scan := regnext (scan);\r
- end;\r
- regmust := longest;\r
- regmlen := len;\r
- end;\r
- end;\r
-\r
- Result := true;\r
-\r
- finally begin\r
- if not Result\r
- then InvalidateProgramm;\r
- regexpbeg := nil;\r
- fExprIsCompiled := Result; //###0.944\r
- end;\r
- end;\r
-\r
- end; { of function TRegExpr.CompileRegExpr\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.ParseReg (paren : integer; var flagp : integer) : PRegExprChar;\r
-// regular expression, i.e. main body or parenthesized thing\r
-// Caller must absorb opening parenthesis.\r
-// Combining parenthesis handling with the base level of regular expression\r
-// is a trifle forced, but the need to tie the tails of the branches to what\r
-// follows makes it hard to avoid.\r
- var\r
- ret, br, ender : PRegExprChar;\r
- parno : integer;\r
- flags : integer;\r
- SavedModifiers : integer;\r
- begin\r
- Result := nil;\r
- flagp := HASWIDTH; // Tentatively.\r
- parno := 0; // eliminate compiler stupid warning\r
- SavedModifiers := fCompModifiers;\r
-\r
- // Make an OPEN node, if parenthesized.\r
- if paren <> 0 then begin\r
- if regnpar >= NSUBEXP then begin\r
- Error (reeCompParseRegTooManyBrackets);\r
- EXIT;\r
- end;\r
- parno := regnpar;\r
- inc (regnpar);\r
- ret := EmitNode (TREOp (ord (OPEN) + parno));\r
- end\r
- else ret := nil;\r
-\r
- // Pick up the branches, linking them together.\r
- br := ParseBranch (flags);\r
- if br = nil then begin\r
- Result := nil;\r
- EXIT;\r
- end;\r
- if ret <> nil\r
- then Tail (ret, br) // OPEN -> first.\r
- else ret := br;\r
- if (flags and HASWIDTH) = 0\r
- then flagp := flagp and not HASWIDTH;\r
- flagp := flagp or flags and SPSTART;\r
- while (regparse^ = '|') do begin\r
- inc (regparse);\r
- br := ParseBranch (flags);\r
- if br = nil then begin\r
- Result := nil;\r
- EXIT;\r
- end;\r
- Tail (ret, br); // BRANCH -> BRANCH.\r
- if (flags and HASWIDTH) = 0\r
- then flagp := flagp and not HASWIDTH;\r
- flagp := flagp or flags and SPSTART;\r
- end;\r
-\r
- // Make a closing node, and hook it on the end.\r
- if paren <> 0\r
- then ender := EmitNode (TREOp (ord (CLOSE) + parno))\r
- else ender := EmitNode (EEND);\r
- Tail (ret, ender);\r
-\r
- // Hook the tails of the branches to the closing node.\r
- br := ret;\r
- while br <> nil do begin\r
- OpTail (br, ender);\r
- br := regnext (br);\r
- end;\r
-\r
- // Check for proper termination.\r
- if paren <> 0 then\r
- if regparse^ <> ')' then begin\r
- Error (reeCompParseRegUnmatchedBrackets);\r
- EXIT;\r
- end\r
- else inc (regparse); // skip trailing ')'\r
- if (paren = 0) and (regparse^ <> #0) then begin\r
- if regparse^ = ')'\r
- then Error (reeCompParseRegUnmatchedBrackets2)\r
- else Error (reeCompParseRegJunkOnEnd);\r
- EXIT;\r
- end;\r
- fCompModifiers := SavedModifiers; // restore modifiers of parent\r
- Result := ret;\r
- end; { of function TRegExpr.ParseReg\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.ParseBranch (var flagp : integer) : PRegExprChar;\r
-// one alternative of an | operator\r
-// Implements the concatenation operator.\r
- var\r
- ret, chain, latest : PRegExprChar;\r
- flags : integer;\r
- begin\r
- flagp := WORST; // Tentatively.\r
-\r
- ret := EmitNode (BRANCH);\r
- chain := nil;\r
- while (regparse^ <> #0) and (regparse^ <> '|')\r
- and (regparse^ <> ')') do begin\r
- latest := ParsePiece (flags);\r
- if latest = nil then begin\r
- Result := nil;\r
- EXIT;\r
- end;\r
- flagp := flagp or flags and HASWIDTH;\r
- if chain = nil // First piece.\r
- then flagp := flagp or flags and SPSTART\r
- else Tail (chain, latest);\r
- chain := latest;\r
- end;\r
- if chain = nil // Loop ran zero times.\r
- then EmitNode (NOTHING);\r
- Result := ret;\r
- end; { of function TRegExpr.ParseBranch\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;\r
-// something followed by possible [*+?{]\r
-// Note that the branching code sequences used for ? and the general cases\r
-// of * and + and { are somewhat optimized: they use the same NOTHING node as\r
-// both the endmarker for their branch list and the body of the last branch.\r
-// It might seem that this node could be dispensed with entirely, but the\r
-// endmarker role is not redundant.\r
- function parsenum (AStart, AEnd : PRegExprChar) : TREBracesArg;\r
- begin\r
- Result := 0;\r
- if AEnd - AStart + 1 > 8 then begin // prevent stupid scanning\r
- Error (reeBRACESArgTooBig);\r
- EXIT;\r
- end;\r
- while AStart <= AEnd do begin\r
- Result := Result * 10 + (ord (AStart^) - ord ('0'));\r
- inc (AStart);\r
- end;\r
- if (Result > MaxBracesArg) or (Result < 0) then begin\r
- Error (reeBRACESArgTooBig);\r
- EXIT;\r
- end;\r
- end;\r
-\r
- var\r
- op : REChar;\r
- NonGreedyOp, NonGreedyCh : boolean; //###0.940\r
- TheOp : TREOp; //###0.940\r
- NextNode : PRegExprChar;\r
- flags : integer;\r
- BracesMin, Bracesmax : TREBracesArg;\r
- p, savedparse : PRegExprChar;\r
-\r
- procedure EmitComplexBraces (ABracesMin, ABracesMax : TREBracesArg;\r
- ANonGreedyOp : boolean); //###0.940\r
- {$IFDEF ComplexBraces}\r
- var\r
- off : integer;\r
- {$ENDIF}\r
- begin\r
- {$IFNDEF ComplexBraces}\r
- Error (reeComplexBracesNotImplemented);\r
- {$ELSE}\r
- if ANonGreedyOp\r
- then TheOp := LOOPNG\r
- else TheOp := LOOP;\r
- InsertOperator (LOOPENTRY, Result, REOpSz + RENextOffSz);\r
- NextNode := EmitNode (TheOp);\r
- if regcode <> @regdummy then begin\r
- off := (Result + REOpSz + RENextOffSz)\r
- - (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY\r
- PREBracesArg (regcode)^ := ABracesMin;\r
- inc (regcode, REBracesArgSz);\r
- PREBracesArg (regcode)^ := ABracesMax;\r
- inc (regcode, REBracesArgSz);\r
- PRENextOff (regcode)^ := off;\r
- inc (regcode, RENextOffSz);\r
- end\r
- else inc (regsize, REBracesArgSz * 2 + RENextOffSz);\r
- Tail (Result, NextNode); // LOOPENTRY -> LOOP\r
- if regcode <> @regdummy then\r
- Tail (Result + REOpSz + RENextOffSz, NextNode); // Atom -> LOOP\r
- {$ENDIF}\r
- end;\r
-\r
- procedure EmitSimpleBraces (ABracesMin, ABracesMax : TREBracesArg;\r
- ANonGreedyOp : boolean); //###0.940\r
- begin\r
- if ANonGreedyOp //###0.940\r
- then TheOp := BRACESNG\r
- else TheOp := BRACES;\r
- InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);\r
- if regcode <> @regdummy then begin\r
- PREBracesArg (Result + REOpSz + RENextOffSz)^ := ABracesMin;\r
- PREBracesArg (Result + REOpSz + RENextOffSz + REBracesArgSz)^ := ABracesMax;\r
- end;\r
- end;\r
-\r
- begin\r
- Result := ParseAtom (flags);\r
- if Result = nil\r
- then EXIT;\r
-\r
- op := regparse^;\r
- if not ((op = '*') or (op = '+') or (op = '?') or (op = '{')) then begin\r
- flagp := flags;\r
- EXIT;\r
- end;\r
- if ((flags and HASWIDTH) = 0) and (op <> '?') then begin\r
- Error (reePlusStarOperandCouldBeEmpty);\r
- EXIT;\r
- end;\r
-\r
- case op of\r
- '*': begin\r
- flagp := WORST or SPSTART;\r
- NonGreedyCh := (regparse + 1)^ = '?'; //###0.940\r
- NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940\r
- if (flags and SIMPLE) = 0 then begin\r
- if NonGreedyOp //###0.940\r
- then EmitComplexBraces (0, MaxBracesArg, NonGreedyOp)\r
- else begin // Emit x* as (x&|), where & means "self".\r
- InsertOperator (BRANCH, Result, REOpSz + RENextOffSz); // Either x\r
- OpTail (Result, EmitNode (BACK)); // and loop\r
- OpTail (Result, Result); // back\r
- Tail (Result, EmitNode (BRANCH)); // or\r
- Tail (Result, EmitNode (NOTHING)); // nil.\r
- end\r
- end\r
- else begin // Simple\r
- if NonGreedyOp //###0.940\r
- then TheOp := STARNG\r
- else TheOp := STAR;\r
- InsertOperator (TheOp, Result, REOpSz + RENextOffSz);\r
- end;\r
- if NonGreedyCh //###0.940\r
- then inc (regparse); // Skip extra char ('?')\r
- end; { of case '*'}\r
- '+': begin\r
- flagp := WORST or SPSTART or HASWIDTH;\r
- NonGreedyCh := (regparse + 1)^ = '?'; //###0.940\r
- NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940\r
- if (flags and SIMPLE) = 0 then begin\r
- if NonGreedyOp //###0.940\r
- then EmitComplexBraces (1, MaxBracesArg, NonGreedyOp)\r
- else begin // Emit x+ as x(&|), where & means "self".\r
- NextNode := EmitNode (BRANCH); // Either\r
- Tail (Result, NextNode);\r
- Tail (EmitNode (BACK), Result); // loop back\r
- Tail (NextNode, EmitNode (BRANCH)); // or\r
- Tail (Result, EmitNode (NOTHING)); // nil.\r
- end\r
- end\r
- else begin // Simple\r
- if NonGreedyOp //###0.940\r
- then TheOp := PLUSNG\r
- else TheOp := PLUS;\r
- InsertOperator (TheOp, Result, REOpSz + RENextOffSz);\r
- end;\r
- if NonGreedyCh //###0.940\r
- then inc (regparse); // Skip extra char ('?')\r
- end; { of case '+'}\r
- '?': begin\r
- flagp := WORST;\r
- NonGreedyCh := (regparse + 1)^ = '?'; //###0.940\r
- NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940\r
- if NonGreedyOp then begin //###0.940 // We emit x?? as x{0,1}?\r
- if (flags and SIMPLE) = 0\r
- then EmitComplexBraces (0, 1, NonGreedyOp)\r
- else EmitSimpleBraces (0, 1, NonGreedyOp);\r
- end\r
- else begin // greedy '?'\r
- InsertOperator (BRANCH, Result, REOpSz + RENextOffSz); // Either x\r
- Tail (Result, EmitNode (BRANCH)); // or\r
- NextNode := EmitNode (NOTHING); // nil.\r
- Tail (Result, NextNode);\r
- OpTail (Result, NextNode);\r
- end;\r
- if NonGreedyCh //###0.940\r
- then inc (regparse); // Skip extra char ('?')\r
- end; { of case '?'}\r
- '{': begin\r
- savedparse := regparse;\r
- // !!!!!!!!!!!!\r
- // Filip Jirsak's note - what will happen, when we are at the end of regparse?\r
- inc (regparse);\r
- p := regparse;\r
- while Pos (regparse^, '0123456789') > 0 // <min> MUST appear\r
- do inc (regparse);\r
- if (regparse^ <> '}') and (regparse^ <> ',') or (p = regparse) then begin\r
- regparse := savedparse;\r
- flagp := flags;\r
- EXIT;\r
- end;\r
- BracesMin := parsenum (p, regparse - 1);\r
- if regparse^ = ',' then begin\r
- inc (regparse);\r
- p := regparse;\r
- while Pos (regparse^, '0123456789') > 0\r
- do inc (regparse);\r
- if regparse^ <> '}' then begin\r
- regparse := savedparse;\r
- EXIT;\r
- end;\r
- if p = regparse\r
- then BracesMax := MaxBracesArg\r
- else BracesMax := parsenum (p, regparse - 1);\r
- end\r
- else BracesMax := BracesMin; // {n} == {n,n}\r
- if BracesMin > BracesMax then begin\r
- Error (reeBracesMinParamGreaterMax);\r
- EXIT;\r
- end;\r
- if BracesMin > 0\r
- then flagp := WORST;\r
- if BracesMax > 0\r
- then flagp := flagp or HASWIDTH or SPSTART;\r
-\r
- NonGreedyCh := (regparse + 1)^ = '?'; //###0.940\r
- NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940\r
- if (flags and SIMPLE) <> 0\r
- then EmitSimpleBraces (BracesMin, BracesMax, NonGreedyOp)\r
- else EmitComplexBraces (BracesMin, BracesMax, NonGreedyOp);\r
- if NonGreedyCh //###0.940\r
- then inc (regparse); // Skip extra char '?'\r
- end; { of case '{'}\r
-// else // here we can't be\r
- end; { of case op}\r
-\r
- inc (regparse);\r
- if (regparse^ = '*') or (regparse^ = '+') or (regparse^ = '?') or (regparse^ = '{') then begin\r
- Error (reeNestedSQP);\r
- EXIT;\r
- end;\r
- end; { of function TRegExpr.ParsePiece\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;\r
-// the lowest level\r
-// Optimization: gobbles an entire sequence of ordinary characters so that\r
-// it can turn them into a single node, which is smaller to store and\r
-// faster to run. Backslashed characters are exceptions, each becoming a\r
-// separate node; the code is simpler that way and it's not worth fixing.\r
- var\r
- ret : PRegExprChar;\r
- flags : integer;\r
- RangeBeg, RangeEnd : REChar;\r
- CanBeRange : boolean;\r
- len : integer;\r
- ender : REChar;\r
- begmodfs : PRegExprChar;\r
-\r
- {$IFDEF UseSetOfChar} //###0.930\r
- RangePCodeBeg : PRegExprChar;\r
- RangePCodeIdx : integer;\r
- RangeIsCI : boolean;\r
- RangeSet : TSetOfREChar;\r
- RangeLen : integer;\r
- RangeChMin, RangeChMax : REChar;\r
- {$ENDIF}\r
-\r
- procedure EmitExactly (ch : REChar);\r
- begin\r
- if (fCompModifiers and MaskModI) <> 0\r
- then ret := EmitNode (EXACTLYCI)\r
- else ret := EmitNode (EXACTLY);\r
- EmitC (ch);\r
- EmitC (#0);\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
-\r
- procedure EmitStr (const s : RegExprString);\r
- var i : integer;\r
- begin\r
- for i := 1 to length (s)\r
- do EmitC (s [i]);\r
- end;\r
-\r
- function HexDig (ch : REChar) : integer;\r
- begin\r
- Result := 0;\r
- if (ch >= 'a') and (ch <= 'f')\r
- then ch := REChar (ord (ch) - (ord ('a') - ord ('A')));\r
- if (ch < '0') or (ch > 'F') or ((ch > '9') and (ch < 'A')) then begin\r
- Error (reeBadHexDigit);\r
- EXIT;\r
- end;\r
- Result := ord (ch) - ord ('0');\r
- if ch >= 'A'\r
- then Result := Result - (ord ('A') - ord ('9') - 1);\r
- end;\r
-\r
- function EmitRange (AOpCode : REChar) : PRegExprChar;\r
- begin\r
- {$IFDEF UseSetOfChar}\r
- case AOpCode of\r
- ANYBUTCI, ANYBUT:\r
- Result := EmitNode (ANYBUTTINYSET);\r
- else // ANYOFCI, ANYOF\r
- Result := EmitNode (ANYOFTINYSET);\r
- end;\r
- case AOpCode of\r
- ANYBUTCI, ANYOFCI:\r
- RangeIsCI := True;\r
- else // ANYBUT, ANYOF\r
- RangeIsCI := False;\r
- end;\r
- RangePCodeBeg := regcode;\r
- RangePCodeIdx := regsize;\r
- RangeLen := 0;\r
- RangeSet := [];\r
- RangeChMin := #255;\r
- RangeChMax := #0;\r
- {$ELSE}\r
- Result := EmitNode (AOpCode);\r
- // ToDo:\r
- // !!!!!!!!!!!!! Implement ANYOF[BUT]TINYSET generation for UniCode !!!!!!!!!!\r
- {$ENDIF}\r
- end;\r
-\r
-{$IFDEF UseSetOfChar}\r
- procedure EmitRangeCPrim (b : REChar); //###0.930\r
- begin\r
- if b in RangeSet\r
- then EXIT;\r
- inc (RangeLen);\r
- if b < RangeChMin\r
- then RangeChMin := b;\r
- if b > RangeChMax\r
- then RangeChMax := b;\r
- Include (RangeSet, b);\r
- end;\r
- {$ENDIF}\r
-\r
- procedure EmitRangeC (b : REChar);\r
- {$IFDEF UseSetOfChar}\r
- var\r
- Ch : REChar;\r
- {$ENDIF}\r
- begin\r
- CanBeRange := false;\r
- {$IFDEF UseSetOfChar}\r
- if b <> #0 then begin\r
- EmitRangeCPrim (b); //###0.930\r
- if RangeIsCI\r
- then EmitRangeCPrim (InvertCase (b)); //###0.930\r
- end\r
- else begin\r
- {$IFDEF UseAsserts}\r
- Assert (RangeLen > 0, 'TRegExpr.ParseAtom(subroutine EmitRangeC): empty range'); // impossible, but who knows..\r
- Assert (RangeChMin <= RangeChMax, 'TRegExpr.ParseAtom(subroutine EmitRangeC): RangeChMin > RangeChMax'); // impossible, but who knows..\r
- {$ENDIF}\r
- if RangeLen <= TinySetLen then begin // emit "tiny set"\r
- if regcode = @regdummy then begin\r
- regsize := RangePCodeIdx + TinySetLen; // RangeChMin/Max !!!\r
- EXIT;\r
- end;\r
- regcode := RangePCodeBeg;\r
- for Ch := RangeChMin to RangeChMax do //###0.930\r
- if Ch in RangeSet then begin\r
- regcode^ := Ch;\r
- inc (regcode);\r
- end;\r
- // fill rest:\r
- while regcode < RangePCodeBeg + TinySetLen do begin\r
- regcode^ := RangeChMax;\r
- inc (regcode);\r
- end;\r
- end\r
- else begin\r
- if regcode = @regdummy then begin\r
- regsize := RangePCodeIdx + SizeOf (TSetOfREChar);\r
- EXIT;\r
- end;\r
- if (RangePCodeBeg - REOpSz - RENextOffSz)^ = ANYBUTTINYSET\r
- then RangeSet := [#0 .. #255] - RangeSet;\r
- PREOp (RangePCodeBeg - REOpSz - RENextOffSz)^ := ANYOFFULLSET;\r
- regcode := RangePCodeBeg;\r
- Move (RangeSet, regcode^, SizeOf (TSetOfREChar));\r
- inc (regcode, SizeOf (TSetOfREChar));\r
- end;\r
- end;\r
- {$ELSE}\r
- EmitC (b);\r
- {$ENDIF}\r
- end;\r
-\r
- procedure EmitSimpleRangeC (b : REChar);\r
- begin\r
- RangeBeg := b;\r
- EmitRangeC (b);\r
- CanBeRange := true;\r
- end;\r
-\r
- procedure EmitRangeStr (const s : RegExprString);\r
- var i : integer;\r
- begin\r
- for i := 1 to length (s)\r
- do EmitRangeC (s [i]);\r
- end;\r
-\r
- function UnQuoteChar (var APtr : PRegExprChar) : REChar; //###0.934\r
- begin\r
- case APtr^ of\r
- 't': Result := #$9; // tab (HT/TAB)\r
- 'n': Result := #$a; // newline (NL)\r
- 'r': Result := #$d; // car.return (CR)\r
- 'f': Result := #$c; // form feed (FF)\r
- 'a': Result := #$7; // alarm (bell) (BEL)\r
- 'e': Result := #$1b; // escape (ESC)\r
- 'x': begin // hex char\r
- Result := #0;\r
- inc (APtr);\r
- if APtr^ = #0 then begin\r
- Error (reeNoHexCodeAfterBSlashX);\r
- EXIT;\r
- end;\r
- if APtr^ = '{' then begin // \x{nnnn} //###0.936\r
- REPEAT\r
- inc (APtr);\r
- if APtr^ = #0 then begin\r
- Error (reeNoHexCodeAfterBSlashX);\r
- EXIT;\r
- end;\r
- if APtr^ <> '}' then begin\r
- if (Ord (Result)\r
- ShR (SizeOf (REChar) * 8 - 4)) and $F <> 0 then begin\r
- Error (reeHexCodeAfterBSlashXTooBig);\r
- EXIT;\r
- end;\r
- Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));\r
- // HexDig will cause Error if bad hex digit found\r
- end\r
- else BREAK;\r
- UNTIL False;\r
- end\r
- else begin\r
- Result := REChar (HexDig (APtr^));\r
- // HexDig will cause Error if bad hex digit found\r
- inc (APtr);\r
- if APtr^ = #0 then begin\r
- Error (reeNoHexCodeAfterBSlashX);\r
- EXIT;\r
- end;\r
- Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));\r
- // HexDig will cause Error if bad hex digit found\r
- end;\r
- end;\r
- else Result := APtr^;\r
- end;\r
- end;\r
-\r
- begin\r
- Result := nil;\r
- flagp := WORST; // Tentatively.\r
-\r
- inc (regparse);\r
- case (regparse - 1)^ of\r
- '^': if ((fCompModifiers and MaskModM) = 0)\r
- or ((fLineSeparators = '') and not fLinePairedSeparatorAssigned)\r
- then ret := EmitNode (BOL)\r
- else ret := EmitNode (BOLML);\r
- '$': if ((fCompModifiers and MaskModM) = 0)\r
- or ((fLineSeparators = '') and not fLinePairedSeparatorAssigned)\r
- then ret := EmitNode (EOL)\r
- else ret := EmitNode (EOLML);\r
- '.':\r
- if (fCompModifiers and MaskModS) <> 0 then begin\r
- ret := EmitNode (ANY);\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end\r
- else begin // not /s, so emit [^:LineSeparators:]\r
- ret := EmitNode (ANYML);\r
- flagp := flagp or HASWIDTH; // not so simple ;)\r
-// ret := EmitRange (ANYBUT);\r
-// EmitRangeStr (LineSeparators); //###0.941\r
-// EmitRangeStr (LinePairedSeparator); // !!! isn't correct if have to accept only paired\r
-// EmitRangeC (#0);\r
-// flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
- '[': begin\r
- if regparse^ = '^' then begin // Complement of range.\r
- if (fCompModifiers and MaskModI) <> 0\r
- then ret := EmitRange (ANYBUTCI)\r
- else ret := EmitRange (ANYBUT);\r
- inc (regparse);\r
- end\r
- else\r
- if (fCompModifiers and MaskModI) <> 0\r
- then ret := EmitRange (ANYOFCI)\r
- else ret := EmitRange (ANYOF);\r
-\r
- CanBeRange := false;\r
-\r
- if (regparse^ = ']') then begin\r
- EmitSimpleRangeC (regparse^); // []-a] -> ']' .. 'a'\r
- inc (regparse);\r
- end;\r
-\r
- while (regparse^ <> #0) and (regparse^ <> ']') do begin\r
- if (regparse^ = '-')\r
- and ((regparse + 1)^ <> #0) and ((regparse + 1)^ <> ']')\r
- and CanBeRange then begin\r
- inc (regparse);\r
- RangeEnd := regparse^;\r
- if RangeEnd = EscChar then begin\r
- {$IFDEF UniCode} //###0.935\r
- if (ord ((regparse + 1)^) < 256)\r
- and (char ((regparse + 1)^)\r
- in ['d', 'D', 's', 'S', 'w', 'W']) then begin\r
- {$ELSE}\r
- if (regparse + 1)^ in ['d', 'D', 's', 'S', 'w', 'W'] then begin\r
- {$ENDIF}\r
- EmitRangeC ('-'); // or treat as error ?!!\r
- CONTINUE;\r
- end;\r
- inc (regparse);\r
- RangeEnd := UnQuoteChar (regparse);\r
- end;\r
-\r
- // r.e.ranges extension for russian\r
- if ((fCompModifiers and MaskModR) <> 0)\r
- and (RangeBeg = RusRangeLoLow) and (RangeEnd = RusRangeLoHigh) then begin\r
- EmitRangeStr (RusRangeLo);\r
- end\r
- else if ((fCompModifiers and MaskModR) <> 0)\r
- and (RangeBeg = RusRangeHiLow) and (RangeEnd = RusRangeHiHigh) then begin\r
- EmitRangeStr (RusRangeHi);\r
- end\r
- else if ((fCompModifiers and MaskModR) <> 0)\r
- and (RangeBeg = RusRangeLoLow) and (RangeEnd = RusRangeHiHigh) then begin\r
- EmitRangeStr (RusRangeLo);\r
- EmitRangeStr (RusRangeHi);\r
- end\r
- else begin // standard r.e. handling\r
- if RangeBeg > RangeEnd then begin\r
- Error (reeInvalidRange);\r
- EXIT;\r
- end;\r
- inc (RangeBeg);\r
- EmitRangeC (RangeEnd); // prevent infinite loop if RangeEnd=$ff\r
- while RangeBeg < RangeEnd do begin //###0.929\r
- EmitRangeC (RangeBeg);\r
- inc (RangeBeg);\r
- end;\r
- end;\r
- inc (regparse);\r
- end\r
- else begin\r
- if regparse^ = EscChar then begin\r
- inc (regparse);\r
- if regparse^ = #0 then begin\r
- Error (reeParseAtomTrailingBackSlash);\r
- EXIT;\r
- end;\r
- case regparse^ of // r.e.extensions\r
- 'd': EmitRangeStr ('0123456789');\r
- 'w': EmitRangeStr (WordChars);\r
- 's': EmitRangeStr (SpaceChars);\r
- else EmitSimpleRangeC (UnQuoteChar (regparse));\r
- end; { of case}\r
- end\r
- else EmitSimpleRangeC (regparse^);\r
- inc (regparse);\r
- end;\r
- end; { of while}\r
- EmitRangeC (#0);\r
- if regparse^ <> ']' then begin\r
- Error (reeUnmatchedSqBrackets);\r
- EXIT;\r
- end;\r
- inc (regparse);\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
- '(': begin\r
- if regparse^ = '?' then begin\r
- // check for extended Perl syntax : (?..)\r
- if (regparse + 1)^ = '#' then begin // (?#comment)\r
- inc (regparse, 2); // find closing ')'\r
- while (regparse^ <> #0) and (regparse^ <> ')')\r
- do inc (regparse);\r
- if regparse^ <> ')' then begin\r
- Error (reeUnclosedComment);\r
- EXIT;\r
- end;\r
- inc (regparse); // skip ')'\r
- ret := EmitNode (COMMENT); // comment\r
- end\r
- else begin // modifiers ?\r
- inc (regparse); // skip '?'\r
- begmodfs := regparse;\r
- while (regparse^ <> #0) and (regparse^ <> ')')\r
- do inc (regparse);\r
- if (regparse^ <> ')')\r
- or not ParseModifiersStr (copy (begmodfs, 1, (regparse - begmodfs)), fCompModifiers) then begin\r
- Error (reeUrecognizedModifier);\r
- EXIT;\r
- end;\r
- inc (regparse); // skip ')'\r
- ret := EmitNode (COMMENT); // comment\r
-// Error (reeQPSBFollowsNothing);\r
-// EXIT;\r
- end;\r
- end\r
- else begin\r
- ret := ParseReg (1, flags);\r
- if ret = nil then begin\r
- Result := nil;\r
- EXIT;\r
- end;\r
- flagp := flagp or flags and (HASWIDTH or SPSTART);\r
- end;\r
- end;\r
- #0, '|', ')': begin // Supposed to be caught earlier.\r
- Error (reeInternalUrp);\r
- EXIT;\r
- end;\r
- '?', '+', '*': begin\r
- Error (reeQPSBFollowsNothing);\r
- EXIT;\r
- end;\r
- EscChar: begin\r
- if regparse^ = #0 then begin\r
- Error (reeTrailingBackSlash);\r
- EXIT;\r
- end;\r
- case regparse^ of // r.e.extensions\r
- 'b': ret := EmitNode (BOUND); //###0.943\r
- 'B': ret := EmitNode (NOTBOUND); //###0.943\r
- 'A': ret := EmitNode (BOL); //###0.941\r
- 'Z': ret := EmitNode (EOL); //###0.941\r
- 'd': begin // r.e.extension - any digit ('0' .. '9')\r
- ret := EmitNode (ANYDIGIT);\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
- 'D': begin // r.e.extension - not digit ('0' .. '9')\r
- ret := EmitNode (NOTDIGIT);\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
- 's': begin // r.e.extension - any space char\r
- {$IFDEF UseSetOfChar}\r
- ret := EmitRange (ANYOF);\r
- EmitRangeStr (SpaceChars);\r
- EmitRangeC (#0);\r
- {$ELSE}\r
- ret := EmitNode (ANYSPACE);\r
- {$ENDIF}\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
- 'S': begin // r.e.extension - not space char\r
- {$IFDEF UseSetOfChar}\r
- ret := EmitRange (ANYBUT);\r
- EmitRangeStr (SpaceChars);\r
- EmitRangeC (#0);\r
- {$ELSE}\r
- ret := EmitNode (NOTSPACE);\r
- {$ENDIF}\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
- 'w': begin // r.e.extension - any english char / digit / '_'\r
- {$IFDEF UseSetOfChar}\r
- ret := EmitRange (ANYOF);\r
- EmitRangeStr (WordChars);\r
- EmitRangeC (#0);\r
- {$ELSE}\r
- ret := EmitNode (ANYLETTER);\r
- {$ENDIF}\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
- 'W': begin // r.e.extension - not english char / digit / '_'\r
- {$IFDEF UseSetOfChar}\r
- ret := EmitRange (ANYBUT);\r
- EmitRangeStr (WordChars);\r
- EmitRangeC (#0);\r
- {$ELSE}\r
- ret := EmitNode (NOTLETTER);\r
- {$ENDIF}\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
- '1' .. '9': begin //###0.936\r
- if (fCompModifiers and MaskModI) <> 0\r
- then ret := EmitNode (BSUBEXPCI)\r
- else ret := EmitNode (BSUBEXP);\r
- EmitC (REChar (ord (regparse^) - ord ('0')));\r
- flagp := flagp or HASWIDTH or SIMPLE;\r
- end;\r
- else EmitExactly (UnQuoteChar (regparse));\r
- end; { of case}\r
- inc (regparse);\r
- end;\r
- else begin\r
- dec (regparse);\r
- if ((fCompModifiers and MaskModX) <> 0) and // check for eXtended syntax\r
- ((regparse^ = '#')\r
- or ({$IFDEF UniCode}StrScan (XIgnoredChars, regparse^) <> nil //###0.947\r
- {$ELSE}regparse^ in XIgnoredChars{$ENDIF})) then begin //###0.941 \x\r
- if regparse^ = '#' then begin // Skip eXtended comment\r
- // find comment terminator (group of \n and/or \r)\r
- while (regparse^ <> #0) and (regparse^ <> #$d) and (regparse^ <> #$a)\r
- do inc (regparse);\r
- while (regparse^ = #$d) or (regparse^ = #$a) // skip comment terminator\r
- do inc (regparse); // attempt to support different type of line separators\r
- end\r
- else begin // Skip the blanks!\r
- while {$IFDEF UniCode}StrScan (XIgnoredChars, regparse^) <> nil //###0.947\r
- {$ELSE}regparse^ in XIgnoredChars{$ENDIF}\r
- do inc (regparse);\r
- end;\r
- ret := EmitNode (COMMENT); // comment\r
- end\r
- else begin\r
- len := strcspn (regparse, META);\r
- if len <= 0 then\r
- if regparse^ <> '{' then begin\r
- Error (reeRarseAtomInternalDisaster);\r
- EXIT;\r
- end\r
- else len := strcspn (regparse + 1, META) + 1; // bad {n,m} - compile as EXATLY\r
- ender := (regparse + len)^;\r
- if (len > 1)\r
- and ((ender = '*') or (ender = '+') or (ender = '?') or (ender = '{'))\r
- then dec (len); // Back off clear of ?+*{ operand.\r
- flagp := flagp or HASWIDTH;\r
- if len = 1\r
- then flagp := flagp or SIMPLE;\r
- if (fCompModifiers and MaskModI) <> 0\r
- then ret := EmitNode (EXACTLYCI)\r
- else ret := EmitNode (EXACTLY);\r
- while (len > 0)\r
- and (((fCompModifiers and MaskModX) = 0) or (regparse^ <> '#')) do begin\r
- if ((fCompModifiers and MaskModX) = 0) or not ( //###0.941\r
- {$IFDEF UniCode}StrScan (XIgnoredChars, regparse^) <> nil //###0.947\r
- {$ELSE}regparse^ in XIgnoredChars{$ENDIF} )\r
- then EmitC (regparse^);\r
- inc (regparse);\r
- dec (len);\r
- end;\r
- EmitC (#0);\r
- end; { of if not comment}\r
- end; { of case else}\r
- end; { of case}\r
-\r
- Result := ret;\r
- end; { of function TRegExpr.ParseAtom\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetCompilerErrorPos : integer;\r
- begin\r
- Result := 0;\r
- if (regexpbeg = nil) or (regparse = nil)\r
- then EXIT; // not in compiling mode ?\r
- Result := regparse - regexpbeg;\r
- end; { of function TRegExpr.GetCompilerErrorPos\r
---------------------------------------------------------------}\r
-\r
-\r
-{=============================================================}\r
-{===================== Matching section ======================}\r
-{=============================================================}\r
-\r
-{$IFNDEF UseSetOfChar}\r
-function TRegExpr.StrScanCI (s : PRegExprChar; ch : REChar) : PRegExprChar; //###0.928 - now method of TRegExpr\r
- begin\r
- while (s^ <> #0) and (s^ <> ch) and (s^ <> InvertCase (ch))\r
- do inc (s);\r
- if s^ <> #0\r
- then Result := s\r
- else Result := nil;\r
- end; { of function TRegExpr.StrScanCI\r
---------------------------------------------------------------}\r
-{$ENDIF}\r
-\r
-function TRegExpr.regrepeat (p : PRegExprChar; AMax : integer) : integer;\r
-// repeatedly match something simple, report how many\r
- var\r
- scan : PRegExprChar;\r
- opnd : PRegExprChar;\r
- TheMax : integer;\r
- {Ch,} InvCh : REChar; //###0.931\r
- sestart, seend : PRegExprChar; //###0.936\r
- begin\r
- Result := 0;\r
- scan := reginput;\r
- opnd := p + REOpSz + RENextOffSz; //OPERAND\r
- TheMax := fInputEnd - scan;\r
- if TheMax > AMax\r
- then TheMax := AMax;\r
- case PREOp (p)^ of\r
- ANY: begin\r
- // note - ANYML cannot be proceeded in regrepeat because can skip\r
- // more than one char at once\r
- Result := TheMax;\r
- inc (scan, Result);\r
- end;\r
- EXACTLY: begin // in opnd can be only ONE char !!!\r
-// Ch := opnd^; // store in register //###0.931\r
- while (Result < TheMax) and (opnd^ = scan^) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- end;\r
- EXACTLYCI: begin // in opnd can be only ONE char !!!\r
-// Ch := opnd^; // store in register //###0.931\r
- while (Result < TheMax) and (opnd^ = scan^) do begin // prevent unneeded InvertCase //###0.931\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- if Result < TheMax then begin //###0.931\r
- InvCh := InvertCase (opnd^); // store in register\r
- while (Result < TheMax) and\r
- ((opnd^ = scan^) or (InvCh = scan^)) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- end;\r
- end;\r
- BSUBEXP: begin //###0.936\r
- sestart := startp [ord (opnd^)];\r
- if sestart = nil\r
- then EXIT;\r
- seend := endp [ord (opnd^)];\r
- if seend = nil\r
- then EXIT;\r
- REPEAT\r
- opnd := sestart;\r
- while opnd < seend do begin\r
- if (scan >= fInputEnd) or (scan^ <> opnd^)\r
- then EXIT;\r
- inc (scan);\r
- inc (opnd);\r
- end;\r
- inc (Result);\r
- reginput := scan;\r
- UNTIL Result >= AMax;\r
- end;\r
- BSUBEXPCI: begin //###0.936\r
- sestart := startp [ord (opnd^)];\r
- if sestart = nil\r
- then EXIT;\r
- seend := endp [ord (opnd^)];\r
- if seend = nil\r
- then EXIT;\r
- REPEAT\r
- opnd := sestart;\r
- while opnd < seend do begin\r
- if (scan >= fInputEnd) or\r
- ((scan^ <> opnd^) and (scan^ <> InvertCase (opnd^)))\r
- then EXIT;\r
- inc (scan);\r
- inc (opnd);\r
- end;\r
- inc (Result);\r
- reginput := scan;\r
- UNTIL Result >= AMax;\r
- end;\r
- ANYDIGIT:\r
- while (Result < TheMax) and\r
- (scan^ >= '0') and (scan^ <= '9') do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- NOTDIGIT:\r
- while (Result < TheMax) and\r
- ((scan^ < '0') or (scan^ > '9')) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- {$IFNDEF UseSetOfChar} //###0.929\r
- ANYLETTER:\r
- while (Result < TheMax) and\r
- (Pos (scan^, fWordChars) > 0) //###0.940\r
- { ((scan^ >= 'a') and (scan^ <= 'z') !! I've forgotten (>='0') and (<='9')\r
- or (scan^ >= 'A') and (scan^ <= 'Z') or (scan^ = '_'))} do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- NOTLETTER:\r
- while (Result < TheMax) and\r
- (Pos (scan^, fWordChars) <= 0) //###0.940\r
- { not ((scan^ >= 'a') and (scan^ <= 'z') !! I've forgotten (>='0') and (<='9')\r
- or (scan^ >= 'A') and (scan^ <= 'Z')\r
- or (scan^ = '_'))} do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- ANYSPACE:\r
- while (Result < TheMax) and\r
- (Pos (scan^, fSpaceChars) > 0) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- NOTSPACE:\r
- while (Result < TheMax) and\r
- (Pos (scan^, fSpaceChars) <= 0) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- {$ENDIF}\r
- ANYOFTINYSET: begin\r
- while (Result < TheMax) and //!!!TinySet\r
- ((scan^ = opnd^) or (scan^ = (opnd + 1)^)\r
- or (scan^ = (opnd + 2)^)) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- end;\r
- ANYBUTTINYSET: begin\r
- while (Result < TheMax) and //!!!TinySet\r
- (scan^ <> opnd^) and (scan^ <> (opnd + 1)^)\r
- and (scan^ <> (opnd + 2)^) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- end;\r
- {$IFDEF UseSetOfChar} //###0.929\r
- ANYOFFULLSET: begin\r
- while (Result < TheMax) and\r
- (scan^ in PSetOfREChar (opnd)^) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- end;\r
- {$ELSE}\r
- ANYOF:\r
- while (Result < TheMax) and\r
- (StrScan (opnd, scan^) <> nil) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- ANYBUT:\r
- while (Result < TheMax) and\r
- (StrScan (opnd, scan^) = nil) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- ANYOFCI:\r
- while (Result < TheMax) and (StrScanCI (opnd, scan^) <> nil) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- ANYBUTCI:\r
- while (Result < TheMax) and (StrScanCI (opnd, scan^) = nil) do begin\r
- inc (Result);\r
- inc (scan);\r
- end;\r
- {$ENDIF}\r
- else begin // Oh dear. Called inappropriately.\r
- Result := 0; // Best compromise.\r
- Error (reeRegRepeatCalledInappropriately);\r
- EXIT;\r
- end;\r
- end; { of case}\r
- reginput := scan;\r
- end; { of function TRegExpr.regrepeat\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.regnext (p : PRegExprChar) : PRegExprChar;\r
-// dig the "next" pointer out of a node\r
- var offset : TRENextOff;\r
- begin\r
- if p = @regdummy then begin\r
- Result := nil;\r
- EXIT;\r
- end;\r
- offset := PRENextOff (p + REOpSz)^; //###0.933 inlined NEXT\r
- if offset = 0\r
- then Result := nil\r
- else Result := p + offset;\r
- end; { of function TRegExpr.regnext\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;\r
-// recursively matching routine\r
-// Conceptually the strategy is simple: check to see whether the current\r
-// node matches, call self recursively to see whether the rest matches,\r
-// and then act accordingly. In practice we make some effort to avoid\r
-// recursion, in particular by going through "ordinary" nodes (that don't\r
-// need to know whether the rest of the match failed) by a loop instead of\r
-// by recursion.\r
- var\r
- scan : PRegExprChar; // Current node.\r
- next : PRegExprChar; // Next node.\r
- len : integer;\r
- opnd : PRegExprChar;\r
- no : integer;\r
- save : PRegExprChar;\r
- nextch : REChar;\r
- BracesMin, BracesMax : integer; // we use integer instead of TREBracesArg for better support */+\r
- {$IFDEF ComplexBraces}\r
- SavedLoopStack : array [1 .. LoopStackMax] of integer; // :(( very bad for recursion\r
- SavedLoopStackIdx : integer; //###0.925\r
- {$ENDIF}\r
- begin\r
- Result := false;\r
- scan := prog;\r
-\r
- while scan <> nil do begin\r
- len := PRENextOff (scan + 1)^; //###0.932 inlined regnext\r
- if len = 0\r
- then next := nil\r
- else next := scan + len;\r
-\r
- case scan^ of\r
- NOTBOUND, //###0.943 //!!! think about UseSetOfChar !!!\r
- BOUND:\r
- if (scan^ = BOUND)\r
- xor (\r
- ((reginput = fInputStart) or (Pos ((reginput - 1)^, fWordChars) <= 0))\r
- and (reginput^ <> #0) and (Pos (reginput^, fWordChars) > 0)\r
- or\r
- (reginput <> fInputStart) and (Pos ((reginput - 1)^, fWordChars) > 0)\r
- and ((reginput^ = #0) or (Pos (reginput^, fWordChars) <= 0)))\r
- then EXIT;\r
-\r
- BOL: if reginput <> fInputStart\r
- then EXIT;\r
- EOL: if reginput^ <> #0\r
- then EXIT;\r
- BOLML: if reginput > fInputStart then begin\r
- nextch := (reginput - 1)^;\r
- if (nextch <> fLinePairedSeparatorTail)\r
- or ((reginput - 1) <= fInputStart)\r
- or ((reginput - 2)^ <> fLinePairedSeparatorHead)\r
- then begin\r
- if (nextch = fLinePairedSeparatorHead)\r
- and (reginput^ = fLinePairedSeparatorTail)\r
- then EXIT; // don't stop between paired separator\r
- if\r
- {$IFNDEF UniCode}\r
- not (nextch in fLineSeparatorsSet)\r
- {$ELSE}\r
- (pos (nextch, fLineSeparators) <= 0)\r
- {$ENDIF}\r
- then EXIT;\r
- end;\r
- end;\r
- EOLML: if reginput^ <> #0 then begin\r
- nextch := reginput^;\r
- if (nextch <> fLinePairedSeparatorHead)\r
- or ((reginput + 1)^ <> fLinePairedSeparatorTail)\r
- then begin\r
- if (nextch = fLinePairedSeparatorTail)\r
- and (reginput > fInputStart)\r
- and ((reginput - 1)^ = fLinePairedSeparatorHead)\r
- then EXIT; // don't stop between paired separator\r
- if\r
- {$IFNDEF UniCode}\r
- not (nextch in fLineSeparatorsSet)\r
- {$ELSE}\r
- (pos (nextch, fLineSeparators) <= 0)\r
- {$ENDIF}\r
- then EXIT;\r
- end;\r
- end;\r
- ANY: begin\r
- if reginput^ = #0\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- ANYML: begin //###0.941\r
- if (reginput^ = #0)\r
- or ((reginput^ = fLinePairedSeparatorHead)\r
- and ((reginput + 1)^ = fLinePairedSeparatorTail))\r
- or {$IFNDEF UniCode} (reginput^ in fLineSeparatorsSet)\r
- {$ELSE} (pos (reginput^, fLineSeparators) > 0) {$ENDIF}\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- ANYDIGIT: begin\r
- if (reginput^ = #0) or (reginput^ < '0') or (reginput^ > '9')\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- NOTDIGIT: begin\r
- if (reginput^ = #0) or ((reginput^ >= '0') and (reginput^ <= '9'))\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- {$IFNDEF UseSetOfChar} //###0.929\r
- ANYLETTER: begin\r
- if (reginput^ = #0) or (Pos (reginput^, fWordChars) <= 0) //###0.943\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- NOTLETTER: begin\r
- if (reginput^ = #0) or (Pos (reginput^, fWordChars) > 0) //###0.943\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- ANYSPACE: begin\r
- if (reginput^ = #0) or not (Pos (reginput^, fSpaceChars) > 0) //###0.943\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- NOTSPACE: begin\r
- if (reginput^ = #0) or (Pos (reginput^, fSpaceChars) > 0) //###0.943\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- {$ENDIF}\r
- EXACTLYCI: begin\r
- opnd := scan + REOpSz + RENextOffSz; // OPERAND\r
- // Inline the first character, for speed.\r
- if (opnd^ <> reginput^)\r
- and (InvertCase (opnd^) <> reginput^)\r
- then EXIT;\r
- len := strlen (opnd);\r
- //###0.929 begin\r
- no := len;\r
- save := reginput;\r
- while no > 1 do begin\r
- inc (save);\r
- inc (opnd);\r
- if (opnd^ <> save^)\r
- and (InvertCase (opnd^) <> save^)\r
- then EXIT;\r
- dec (no);\r
- end;\r
- //###0.929 end\r
- inc (reginput, len);\r
- end;\r
- EXACTLY: begin\r
- opnd := scan + REOpSz + RENextOffSz; // OPERAND\r
- // Inline the first character, for speed.\r
- if opnd^ <> reginput^\r
- then EXIT;\r
- len := strlen (opnd);\r
- //###0.929 begin\r
- no := len;\r
- save := reginput;\r
- while no > 1 do begin\r
- inc (save);\r
- inc (opnd);\r
- if opnd^ <> save^\r
- then EXIT;\r
- dec (no);\r
- end;\r
- //###0.929 end\r
- inc (reginput, len);\r
- end;\r
- BSUBEXP: begin //###0.936\r
- no := ord ((scan + REOpSz + RENextOffSz)^);\r
- if startp [no] = nil\r
- then EXIT;\r
- if endp [no] = nil\r
- then EXIT;\r
- save := reginput;\r
- opnd := startp [no];\r
- while opnd < endp [no] do begin\r
- if (save >= fInputEnd) or (save^ <> opnd^)\r
- then EXIT;\r
- inc (save);\r
- inc (opnd);\r
- end;\r
- reginput := save;\r
- end;\r
- BSUBEXPCI: begin //###0.936\r
- no := ord ((scan + REOpSz + RENextOffSz)^);\r
- if startp [no] = nil\r
- then EXIT;\r
- if endp [no] = nil\r
- then EXIT;\r
- save := reginput;\r
- opnd := startp [no];\r
- while opnd < endp [no] do begin\r
- if (save >= fInputEnd) or\r
- ((save^ <> opnd^) and (save^ <> InvertCase (opnd^)))\r
- then EXIT;\r
- inc (save);\r
- inc (opnd);\r
- end;\r
- reginput := save;\r
- end;\r
- ANYOFTINYSET: begin\r
- if (reginput^ = #0) or //!!!TinySet\r
- ((reginput^ <> (scan + REOpSz + RENextOffSz)^)\r
- and (reginput^ <> (scan + REOpSz + RENextOffSz + 1)^)\r
- and (reginput^ <> (scan + REOpSz + RENextOffSz + 2)^))\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- ANYBUTTINYSET: begin\r
- if (reginput^ = #0) or //!!!TinySet\r
- (reginput^ = (scan + REOpSz + RENextOffSz)^)\r
- or (reginput^ = (scan + REOpSz + RENextOffSz + 1)^)\r
- or (reginput^ = (scan + REOpSz + RENextOffSz + 2)^)\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- {$IFDEF UseSetOfChar} //###0.929\r
- ANYOFFULLSET: begin\r
- if (reginput^ = #0)\r
- or not (reginput^ in PSetOfREChar (scan + REOpSz + RENextOffSz)^)\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- {$ELSE}\r
- ANYOF: begin\r
- if (reginput^ = #0) or (StrScan (scan + REOpSz + RENextOffSz, reginput^) = nil)\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- ANYBUT: begin\r
- if (reginput^ = #0) or (StrScan (scan + REOpSz + RENextOffSz, reginput^) <> nil)\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- ANYOFCI: begin\r
- if (reginput^ = #0) or (StrScanCI (scan + REOpSz + RENextOffSz, reginput^) = nil)\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- ANYBUTCI: begin\r
- if (reginput^ = #0) or (StrScanCI (scan + REOpSz + RENextOffSz, reginput^) <> nil)\r
- then EXIT;\r
- inc (reginput);\r
- end;\r
- {$ENDIF}\r
- NOTHING: ;\r
- COMMENT: ;\r
- BACK: ;\r
- Succ (OPEN) .. TREOp (Ord (OPEN) + NSUBEXP - 1) : begin //###0.929\r
- no := ord (scan^) - ord (OPEN);\r
-// save := reginput;\r
- save := startp [no]; //###0.936\r
- startp [no] := reginput; //###0.936\r
- Result := MatchPrim (next);\r
- if not Result //###0.936\r
- then startp [no] := save;\r
-// if Result and (startp [no] = nil)\r
-// then startp [no] := save;\r
- // Don't set startp if some later invocation of the same\r
- // parentheses already has.\r
- EXIT;\r
- end;\r
- Succ (CLOSE) .. TREOp (Ord (CLOSE) + NSUBEXP - 1): begin //###0.929\r
- no := ord (scan^) - ord (CLOSE);\r
-// save := reginput;\r
- save := endp [no]; //###0.936\r
- endp [no] := reginput; //###0.936\r
- Result := MatchPrim (next);\r
- if not Result //###0.936\r
- then endp [no] := save;\r
-// if Result and (endp [no] = nil)\r
-// then endp [no] := save;\r
- // Don't set endp if some later invocation of the same\r
- // parentheses already has.\r
- EXIT;\r
- end;\r
- BRANCH: begin\r
- if (next^ <> BRANCH) // No choice.\r
- then next := scan + REOpSz + RENextOffSz // Avoid recursion\r
- else begin\r
- REPEAT\r
- save := reginput;\r
- Result := MatchPrim (scan + REOpSz + RENextOffSz);\r
- if Result\r
- then EXIT;\r
- reginput := save;\r
- scan := regnext (scan);\r
- UNTIL (scan = nil) or (scan^ <> BRANCH);\r
- EXIT;\r
- end;\r
- end;\r
- {$IFDEF ComplexBraces}\r
- LOOPENTRY: begin //###0.925\r
- no := LoopStackIdx;\r
- inc (LoopStackIdx);\r
- if LoopStackIdx > LoopStackMax then begin\r
- Error (reeLoopStackExceeded);\r
- EXIT;\r
- end;\r
- save := reginput;\r
- LoopStack [LoopStackIdx] := 0; // init loop counter\r
- Result := MatchPrim (next); // execute LOOP\r
- LoopStackIdx := no; // cleanup\r
- if Result\r
- then EXIT;\r
- reginput := save;\r
- EXIT;\r
- end;\r
- LOOP, LOOPNG: begin //###0.940\r
- if LoopStackIdx <= 0 then begin\r
- Error (reeLoopWithoutEntry);\r
- EXIT;\r
- end;\r
- opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + 2 * REBracesArgSz)^;\r
- BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;\r
- BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;\r
- save := reginput;\r
- if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work\r
- if scan^ = LOOP then begin\r
- // greedy way - first try to max deep of greed ;)\r
- if LoopStack [LoopStackIdx] < BracesMax then begin\r
- inc (LoopStack [LoopStackIdx]);\r
- no := LoopStackIdx;\r
- Result := MatchPrim (opnd);\r
- LoopStackIdx := no;\r
- if Result\r
- then EXIT;\r
- reginput := save;\r
- end;\r
- dec (LoopStackIdx); // Fail. May be we are too greedy? ;)\r
- Result := MatchPrim (next);\r
- if not Result\r
- then reginput := save;\r
- EXIT;\r
- end\r
- else begin\r
- // non-greedy - try just now\r
- Result := MatchPrim (next);\r
- if Result\r
- then EXIT\r
- else reginput := save; // failed - move next and try again\r
- if LoopStack [LoopStackIdx] < BracesMax then begin\r
- inc (LoopStack [LoopStackIdx]);\r
- no := LoopStackIdx;\r
- Result := MatchPrim (opnd);\r
- LoopStackIdx := no;\r
- if Result\r
- then EXIT;\r
- reginput := save;\r
- end;\r
- dec (LoopStackIdx); // Failed - back up\r
- EXIT;\r
- end\r
- end\r
- else begin // first match a min_cnt times\r
- inc (LoopStack [LoopStackIdx]);\r
- no := LoopStackIdx;\r
- Result := MatchPrim (opnd);\r
- LoopStackIdx := no;\r
- if Result\r
- then EXIT;\r
- dec (LoopStack [LoopStackIdx]);\r
- reginput := save;\r
- EXIT;\r
- end;\r
- end;\r
- {$ENDIF}\r
- STAR, PLUS, BRACES, STARNG, PLUSNG, BRACESNG: begin\r
- // Lookahead to avoid useless match attempts when we know\r
- // what character comes next.\r
- nextch := #0;\r
- if next^ = EXACTLY\r
- then nextch := (next + REOpSz + RENextOffSz)^;\r
- BracesMax := MaxInt; // infinite loop for * and + //###0.92\r
- if (scan^ = STAR) or (scan^ = STARNG)\r
- then BracesMin := 0 // STAR\r
- else if (scan^ = PLUS) or (scan^ = PLUSNG)\r
- then BracesMin := 1 // PLUS\r
- else begin // BRACES\r
- BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;\r
- BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;\r
- end;\r
- save := reginput;\r
- opnd := scan + REOpSz + RENextOffSz;\r
- if (scan^ = BRACES) or (scan^ = BRACESNG)\r
- then inc (opnd, 2 * REBracesArgSz);\r
-\r
- if (scan^ = PLUSNG) or (scan^ = STARNG) or (scan^ = BRACESNG) then begin\r
- // non-greedy mode\r
- BracesMax := regrepeat (opnd, BracesMax); // don't repeat more than BracesMax\r
- // Now we know real Max limit to move forward (for recursion 'back up')\r
- // In some cases it can be faster to check only Min positions first,\r
- // but after that we have to check every position separtely instead\r
- // of fast scannig in loop.\r
- no := BracesMin;\r
- while no <= BracesMax do begin\r
- reginput := save + no;\r
- // If it could work, try it.\r
- if (nextch = #0) or (reginput^ = nextch) then begin\r
- {$IFDEF ComplexBraces}\r
- System.Move (LoopStack, SavedLoopStack, SizeOf (LoopStack)); //###0.925\r
- SavedLoopStackIdx := LoopStackIdx;\r
- {$ENDIF}\r
- if MatchPrim (next) then begin\r
- Result := true;\r
- EXIT;\r
- end;\r
- {$IFDEF ComplexBraces}\r
- System.Move (SavedLoopStack, LoopStack, SizeOf (LoopStack));\r
- LoopStackIdx := SavedLoopStackIdx;\r
- {$ENDIF}\r
- end;\r
- inc (no); // Couldn't or didn't - move forward.\r
- end; { of while}\r
- EXIT;\r
- end\r
- else begin // greedy mode\r
- no := regrepeat (opnd, BracesMax); // don't repeat more than max_cnt\r
- while no >= BracesMin do begin\r
- // If it could work, try it.\r
- if (nextch = #0) or (reginput^ = nextch) then begin\r
- {$IFDEF ComplexBraces}\r
- System.Move (LoopStack, SavedLoopStack, SizeOf (LoopStack)); //###0.925\r
- SavedLoopStackIdx := LoopStackIdx;\r
- {$ENDIF}\r
- if MatchPrim (next) then begin\r
- Result := true;\r
- EXIT;\r
- end;\r
- {$IFDEF ComplexBraces}\r
- System.Move (SavedLoopStack, LoopStack, SizeOf (LoopStack));\r
- LoopStackIdx := SavedLoopStackIdx;\r
- {$ENDIF}\r
- end;\r
- dec (no); // Couldn't or didn't - back up.\r
- reginput := save + no;\r
- end; { of while}\r
- EXIT;\r
- end;\r
- end;\r
- EEND: begin\r
- Result := true; // Success!\r
- EXIT;\r
- end;\r
- else begin\r
- Error (reeMatchPrimMemoryCorruption);\r
- EXIT;\r
- end;\r
- end; { of case scan^}\r
- scan := next;\r
- end; { of while scan <> nil}\r
-\r
- // We get here only if there's trouble -- normally "case EEND" is the\r
- // terminating point.\r
- Error (reeMatchPrimCorruptedPointers);\r
- end; { of function TRegExpr.MatchPrim\r
---------------------------------------------------------------}\r
-\r
-{$IFDEF UseFirstCharSet} //###0.929\r
-procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);\r
- var\r
- scan : PRegExprChar; // Current node.\r
- next : PRegExprChar; // Next node.\r
- opnd : PRegExprChar;\r
- min_cnt : integer;\r
- begin\r
- scan := prog;\r
- while scan <> nil do begin\r
- next := regnext (scan);\r
- case PREOp (scan)^ of\r
- BSUBEXP, BSUBEXPCI: begin //###0.938\r
- FirstCharSet := [#0 .. #255]; // :((( we cannot\r
- // optimize r.e. if it starts with back reference\r
- EXIT;\r
- end;\r
- BOL, BOLML: ; // EXIT; //###0.937\r
- EOL, EOLML: begin //###0.948 was empty in 0.947, was EXIT in 0.937\r
- Include (FirstCharSet, #0);\r
- if ModifierM\r
- then begin\r
- opnd := PRegExprChar (LineSeparators);\r
- while opnd^ <> #0 do begin\r
- Include (FirstCharSet, opnd^);\r
- inc (opnd);\r
- end;\r
- end;\r
- EXIT;\r
- end;\r
- BOUND, NOTBOUND: ; //###0.943 ?!!\r
- ANY, ANYML: begin // we can better define ANYML !!!\r
- FirstCharSet := [#0 .. #255]; //###0.930\r
- EXIT;\r
- end;\r
- ANYDIGIT: begin\r
- FirstCharSet := FirstCharSet + ['0' .. '9'];\r
- EXIT;\r
- end;\r
- NOTDIGIT: begin\r
- FirstCharSet := FirstCharSet + ([#0 .. #255] - ['0' .. '9']); //###0.948 FirstCharSet was forgotten\r
- EXIT;\r
- end;\r
- EXACTLYCI: begin\r
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz)^);\r
- Include (FirstCharSet, InvertCase ((scan + REOpSz + RENextOffSz)^));\r
- EXIT;\r
- end;\r
- EXACTLY: begin\r
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz)^);\r
- EXIT;\r
- end;\r
- ANYOFFULLSET: begin\r
- FirstCharSet := FirstCharSet + PSetOfREChar (scan + REOpSz + RENextOffSz)^;\r
- EXIT;\r
- end;\r
- ANYOFTINYSET: begin\r
- //!!!TinySet\r
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz)^);\r
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz + 1)^);\r
- Include (FirstCharSet, (scan + REOpSz + RENextOffSz + 2)^);\r
- // ... // up to TinySetLen\r
- EXIT;\r
- end;\r
- ANYBUTTINYSET: begin\r
- //!!!TinySet\r
- FirstCharSet := FirstCharSet + ([#0 .. #255] - [ //###0.948 FirstCharSet was forgotten\r
- (scan + REOpSz + RENextOffSz)^,\r
- (scan + REOpSz + RENextOffSz + 1)^,\r
- (scan + REOpSz + RENextOffSz + 2)^]);\r
- // ... // up to TinySetLen\r
- EXIT;\r
- end;\r
- NOTHING: ;\r
- COMMENT: ;\r
- BACK: ;\r
- Succ (OPEN) .. TREOp (Ord (OPEN) + NSUBEXP - 1) : begin //###0.929\r
- FillFirstCharSet (next);\r
- EXIT;\r
- end;\r
- Succ (CLOSE) .. TREOp (Ord (CLOSE) + NSUBEXP - 1): begin //###0.929\r
- FillFirstCharSet (next);\r
- EXIT;\r
- end;\r
- BRANCH: begin\r
- if (PREOp (next)^ <> BRANCH) // No choice.\r
- then next := scan + REOpSz + RENextOffSz // Avoid recursion.\r
- else begin\r
- REPEAT\r
- FillFirstCharSet (scan + REOpSz + RENextOffSz);\r
- scan := regnext (scan);\r
- UNTIL (scan = nil) or (PREOp (scan)^ <> BRANCH);\r
- EXIT;\r
- end;\r
- end;\r
- {$IFDEF ComplexBraces}\r
- LOOPENTRY: begin //###0.925\r
-// LoopStack [LoopStackIdx] := 0; //###0.940 line removed\r
- FillFirstCharSet (next); // execute LOOP\r
- EXIT;\r
- end;\r
- LOOP, LOOPNG: begin //###0.940\r
- opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + REBracesArgSz * 2)^;\r
- min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^;\r
- FillFirstCharSet (opnd);\r
- if min_cnt = 0\r
- then FillFirstCharSet (next);\r
- EXIT;\r
- end;\r
- {$ENDIF}\r
- STAR, STARNG: //###0.940\r
- FillFirstCharSet (scan + REOpSz + RENextOffSz);\r
- PLUS, PLUSNG: begin //###0.940\r
- FillFirstCharSet (scan + REOpSz + RENextOffSz);\r
- EXIT;\r
- end;\r
- BRACES, BRACESNG: begin //###0.940\r
- opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;\r
- min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; // BRACES\r
- FillFirstCharSet (opnd);\r
- if min_cnt > 0\r
- then EXIT;\r
- end;\r
- EEND: begin\r
- FirstCharSet := [#0 .. #255]; //###0.948\r
- EXIT;\r
- end;\r
- else begin\r
- Error (reeMatchPrimMemoryCorruption);\r
- EXIT;\r
- end;\r
- end; { of case scan^}\r
- scan := next;\r
- end; { of while scan <> nil}\r
- end; { of procedure FillFirstCharSet\r
---------------------------------------------------------------}\r
-{$ENDIF}\r
-\r
-function TRegExpr.Exec (const AInputString : RegExprString) : boolean;\r
- begin\r
- InputString := AInputString;\r
- Result := ExecPrim (1);\r
- end; { of function TRegExpr.Exec\r
---------------------------------------------------------------}\r
-\r
-{$IFDEF OverMeth}\r
-{$IFNDEF FPC}\r
-function TRegExpr.Exec : boolean;\r
- begin\r
- Result := ExecPrim (1);\r
- end; { of function TRegExpr.Exec\r
---------------------------------------------------------------}\r
-{$ENDIF}\r
-function TRegExpr.Exec (AOffset: integer) : boolean;\r
- begin\r
- Result := ExecPrim (AOffset);\r
- end; { of function TRegExpr.Exec\r
---------------------------------------------------------------}\r
-{$ENDIF}\r
-\r
-function TRegExpr.ExecPos (AOffset: integer {$IFDEF DefParam}= 1{$ENDIF}) : boolean;\r
- begin\r
- Result := ExecPrim (AOffset);\r
- end; { of function TRegExpr.ExecPos\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.ExecPrim (AOffset: integer) : boolean;\r
- procedure ClearMatchs;\r
- // Clears matchs array\r
- var i : integer;\r
- begin\r
- for i := 0 to NSUBEXP - 1 do begin\r
- startp [i] := nil;\r
- endp [i] := nil;\r
- end;\r
- end; { of procedure ClearMatchs;\r
-..............................................................}\r
- function RegMatch (str : PRegExprChar) : boolean;\r
- // try match at specific point\r
- begin\r
- //###0.949 removed clearing of start\endp\r
- reginput := str;\r
- Result := MatchPrim (programm + REOpSz);\r
- if Result then begin\r
- startp [0] := str;\r
- endp [0] := reginput;\r
- end;\r
- end; { of function RegMatch\r
-..............................................................}\r
- var\r
- s : PRegExprChar;\r
- StartPtr: PRegExprChar;\r
- InputLen : integer;\r
- begin\r
- Result := false; // Be paranoid...\r
-\r
- ClearMatchs; //###0.949\r
- // ensure that Match cleared either if optimization tricks or some error\r
- // will lead to leaving ExecPrim without actual search. That is\r
- // importent for ExecNext logic and so on.\r
-\r
- if not IsProgrammOk //###0.929\r
- then EXIT;\r
-\r
- // Check InputString presence\r
- if not Assigned (fInputString) then begin\r
- Error (reeNoInpitStringSpecified);\r
- EXIT;\r
- end;\r
-\r
- InputLen := length (fInputString);\r
-\r
- //Check that the start position is not negative\r
- if AOffset < 1 then begin\r
- Error (reeOffsetMustBeGreaterThen0);\r
- EXIT;\r
- end;\r
- // Check that the start position is not longer than the line\r
- // If so then exit with nothing found\r
- if AOffset > (InputLen + 1) // for matching empty string after last char.\r
- then EXIT;\r
-\r
- StartPtr := fInputString + AOffset - 1;\r
-\r
- // If there is a "must appear" string, look for it.\r
- if regmust <> nil then begin\r
- s := StartPtr;\r
- REPEAT\r
- s := StrScan (s, regmust [0]);\r
- if s <> nil then begin\r
- if StrLComp (s, regmust, regmlen) = 0\r
- then BREAK; // Found it.\r
- inc (s);\r
- end;\r
- UNTIL s = nil;\r
- if s = nil // Not present.\r
- then EXIT;\r
- end;\r
-\r
- // Mark beginning of line for ^ .\r
- fInputStart := fInputString;\r
-\r
- // Pointer to end of input stream - for\r
- // pascal-style string processing (may include #0)\r
- fInputEnd := fInputString + InputLen;\r
-\r
- {$IFDEF ComplexBraces}\r
- // no loops started\r
- LoopStackIdx := 0; //###0.925\r
- {$ENDIF}\r
-\r
- // Simplest case: anchored match need be tried only once.\r
- if reganch <> #0 then begin\r
- Result := RegMatch (StartPtr);\r
- EXIT;\r
- end;\r
-\r
- // Messy cases: unanchored match.\r
- s := StartPtr;\r
- if regstart <> #0 then // We know what char it must start with.\r
- REPEAT\r
- s := StrScan (s, regstart);\r
- if s <> nil then begin\r
- Result := RegMatch (s);\r
- if Result\r
- then EXIT\r
- else ClearMatchs; //###0.949\r
- inc (s);\r
- end;\r
- UNTIL s = nil\r
- else begin // We don't - general case.\r
- repeat //###0.948\r
- {$IFDEF UseFirstCharSet}\r
- if s^ in FirstCharSet\r
- then Result := RegMatch (s);\r
- {$ELSE}\r
- Result := RegMatch (s);\r
- {$ENDIF}\r
- if Result or (s^ = #0) // Exit on a match or after testing the end-of-string.\r
- then EXIT\r
- else ClearMatchs; //###0.949\r
- inc (s);\r
- until false;\r
-(* optimized and fixed by Martin Fuller - empty strings\r
- were not allowed to pass thru in UseFirstCharSet mode\r
- {$IFDEF UseFirstCharSet} //###0.929\r
- while s^ <> #0 do begin\r
- if s^ in FirstCharSet\r
- then Result := RegMatch (s);\r
- if Result\r
- then EXIT;\r
- inc (s);\r
- end;\r
- {$ELSE}\r
- REPEAT\r
- Result := RegMatch (s);\r
- if Result\r
- then EXIT;\r
- inc (s);\r
- UNTIL s^ = #0;\r
- {$ENDIF}\r
-*)\r
- end;\r
- // Failure\r
- end; { of function TRegExpr.ExecPrim\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.ExecNext : boolean;\r
- var offset : integer;\r
- begin\r
- Result := false;\r
- if not Assigned (startp[0]) or not Assigned (endp[0]) then begin\r
- Error (reeExecNextWithoutExec);\r
- EXIT;\r
- end;\r
-// Offset := MatchPos [0] + MatchLen [0];\r
-// if MatchLen [0] = 0\r
- Offset := endp [0] - fInputString + 1; //###0.929\r
- if endp [0] = startp [0] //###0.929\r
- then inc (Offset); // prevent infinite looping if empty string match r.e.\r
- Result := ExecPrim (Offset);\r
- end; { of function TRegExpr.ExecNext\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetInputString : RegExprString;\r
- begin\r
- if not Assigned (fInputString) then begin\r
- Error (reeGetInputStringWithoutInputString);\r
- EXIT;\r
- end;\r
- Result := fInputString;\r
- end; { of function TRegExpr.GetInputString\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.SetInputString (const AInputString : RegExprString);\r
- var\r
- Len : integer;\r
- i : integer;\r
- begin\r
- // clear Match* - before next Exec* call it's undefined\r
- for i := 0 to NSUBEXP - 1 do begin\r
- startp [i] := nil;\r
- endp [i] := nil;\r
- end;\r
-\r
- // need reallocation of input string buffer ?\r
- Len := length (AInputString);\r
- if Assigned (fInputString) and (Length (fInputString) <> Len) then begin\r
- FreeMem (fInputString);\r
- fInputString := nil;\r
- end;\r
- // buffer [re]allocation\r
- if not Assigned (fInputString)\r
- then GetMem (fInputString, (Len + 1) * SizeOf (REChar));\r
-\r
- // copy input string into buffer\r
- {$IFDEF UniCode}\r
- StrPCopy (fInputString, Copy (AInputString, 1, Len)); //###0.927\r
- {$ELSE}\r
- StrLCopy (fInputString, PRegExprChar (AInputString), Len);\r
- {$ENDIF}\r
-\r
- {\r
- fInputString : string;\r
- fInputStart, fInputEnd : PRegExprChar;\r
-\r
- SetInputString:\r
- fInputString := AInputString;\r
- UniqueString (fInputString);\r
- fInputStart := PChar (fInputString);\r
- Len := length (fInputString);\r
- fInputEnd := PRegExprChar (integer (fInputStart) + Len); ??\r
- !! startp/endp âñå ðàâíî áóäåò îïàñíî èñïîëüçîâàòü ?\r
- }\r
- end; { of procedure TRegExpr.SetInputString\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.SetLineSeparators (const AStr : RegExprString);\r
- begin\r
- if AStr <> fLineSeparators then begin\r
- fLineSeparators := AStr;\r
- InvalidateProgramm;\r
- end;\r
- end; { of procedure TRegExpr.SetLineSeparators\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.SetLinePairedSeparator (const AStr : RegExprString);\r
- begin\r
- if length (AStr) = 2 then begin\r
- if AStr [1] = AStr [2] then begin\r
- // it's impossible for our 'one-point' checking to support\r
- // two chars separator for identical chars\r
- Error (reeBadLinePairedSeparator);\r
- EXIT;\r
- end;\r
- if not fLinePairedSeparatorAssigned\r
- or (AStr [1] <> fLinePairedSeparatorHead)\r
- or (AStr [2] <> fLinePairedSeparatorTail) then begin\r
- fLinePairedSeparatorAssigned := true;\r
- fLinePairedSeparatorHead := AStr [1];\r
- fLinePairedSeparatorTail := AStr [2];\r
- InvalidateProgramm;\r
- end;\r
- end\r
- else if length (AStr) = 0 then begin\r
- if fLinePairedSeparatorAssigned then begin\r
- fLinePairedSeparatorAssigned := false;\r
- InvalidateProgramm;\r
- end;\r
- end\r
- else Error (reeBadLinePairedSeparator);\r
- end; { of procedure TRegExpr.SetLinePairedSeparator\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.GetLinePairedSeparator : RegExprString;\r
- begin\r
- if fLinePairedSeparatorAssigned then begin\r
- {$IFDEF UniCode}\r
- // Here is some UniCode 'magic'\r
- // If You do know better decision to concatenate\r
- // two WideChars, please, let me know!\r
- Result := fLinePairedSeparatorHead; //###0.947\r
- Result := Result + fLinePairedSeparatorTail;\r
- {$ELSE}\r
- Result := fLinePairedSeparatorHead + fLinePairedSeparatorTail;\r
- {$ENDIF}\r
- end\r
- else Result := '';\r
- end; { of function TRegExpr.GetLinePairedSeparator\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString;\r
-// perform substitutions after a regexp match\r
-// completely rewritten in 0.929\r
- var\r
- TemplateLen : integer;\r
- TemplateBeg, TemplateEnd : PRegExprChar;\r
- p, p0, ResultPtr : PRegExprChar;\r
- ResultLen : integer;\r
- n : integer;\r
- Ch : REChar;\r
- function ParseVarName (var APtr : PRegExprChar) : integer;\r
- // extract name of variable (digits, may be enclosed with\r
- // curly braces) from APtr^, uses TemplateEnd !!!\r
- const\r
- Digits = ['0' .. '9'];\r
- var\r
- p : PRegExprChar;\r
- Delimited : boolean;\r
- begin\r
- Result := 0;\r
- p := APtr;\r
- Delimited := (p < TemplateEnd) and (p^ = '{');\r
- if Delimited\r
- then inc (p); // skip left curly brace\r
- if (p < TemplateEnd) and (p^ = '&')\r
- then inc (p) // this is '$&' or '${&}'\r
- else\r
- while (p < TemplateEnd) and\r
- {$IFDEF UniCode} //###0.935\r
- (ord (p^) < 256) and (char (p^) in Digits)\r
- {$ELSE}\r
- (p^ in Digits)\r
- {$ENDIF}\r
- do begin\r
- Result := Result * 10 + (ord (p^) - ord ('0')); //###0.939\r
- inc (p);\r
- end;\r
- if Delimited then\r
- if (p < TemplateEnd) and (p^ = '}')\r
- then inc (p) // skip right curly brace\r
- else p := APtr; // isn't properly terminated\r
- if p = APtr\r
- then Result := -1; // no valid digits found or no right curly brace\r
- APtr := p;\r
- end;\r
- begin\r
- // Check programm and input string\r
- if not IsProgrammOk\r
- then EXIT;\r
- if not Assigned (fInputString) then begin\r
- Error (reeNoInpitStringSpecified);\r
- EXIT;\r
- end;\r
- // Prepare for working\r
- TemplateLen := length (ATemplate);\r
- if TemplateLen = 0 then begin // prevent nil pointers\r
- Result := '';\r
- EXIT;\r
- end;\r
- TemplateBeg := pointer (ATemplate);\r
- TemplateEnd := TemplateBeg + TemplateLen;\r
- // Count result length for speed optimization.\r
- ResultLen := 0;\r
- p := TemplateBeg;\r
- while p < TemplateEnd do begin\r
- Ch := p^;\r
- inc (p);\r
- if Ch = '$'\r
- then n := ParseVarName (p)\r
- else n := -1;\r
- if n >= 0 then begin\r
- if (n < NSUBEXP) and Assigned (startp [n]) and Assigned (endp [n])\r
- then inc (ResultLen, endp [n] - startp [n]);\r
- end\r
- else begin\r
- if (Ch = EscChar) and (p < TemplateEnd)\r
- then inc (p); // quoted or special char followed\r
- inc (ResultLen);\r
- end;\r
- end;\r
- // Get memory. We do it once and it significant speed up work !\r
- if ResultLen = 0 then begin\r
- Result := '';\r
- EXIT;\r
- end;\r
- SetString (Result, nil, ResultLen);\r
- // Fill Result\r
- ResultPtr := pointer (Result);\r
- p := TemplateBeg;\r
- while p < TemplateEnd do begin\r
- Ch := p^;\r
- inc (p);\r
- if Ch = '$'\r
- then n := ParseVarName (p)\r
- else n := -1;\r
- if n >= 0 then begin\r
- p0 := startp [n];\r
- if (n < NSUBEXP) and Assigned (p0) and Assigned (endp [n]) then\r
- while p0 < endp [n] do begin\r
- ResultPtr^ := p0^;\r
- inc (ResultPtr);\r
- inc (p0);\r
- end;\r
- end\r
- else begin\r
- if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed\r
- Ch := p^;\r
- inc (p);\r
- end;\r
- ResultPtr^ := Ch;\r
- inc (ResultPtr);\r
- end;\r
- end;\r
- end; { of function TRegExpr.Substitute\r
---------------------------------------------------------------}\r
-\r
-procedure TRegExpr.Split (AInputStr : RegExprString; APieces : TStrings);\r
- var PrevPos : integer;\r
- begin\r
- PrevPos := 1;\r
- if Exec (AInputStr) then\r
- REPEAT\r
- APieces.Add (System.Copy (AInputStr, PrevPos, MatchPos [0] - PrevPos));\r
- PrevPos := MatchPos [0] + MatchLen [0];\r
- UNTIL not ExecNext;\r
- APieces.Add (System.Copy (AInputStr, PrevPos, MaxInt)); // Tail\r
- end; { of procedure TRegExpr.Split\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.Replace (AInputStr : RegExprString; const AReplaceStr : RegExprString;\r
- AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString;\r
- var\r
- PrevPos : integer;\r
- begin\r
- Result := '';\r
- PrevPos := 1;\r
- if Exec (AInputStr) then\r
- REPEAT\r
- Result := Result + System.Copy (AInputStr, PrevPos,\r
- MatchPos [0] - PrevPos);\r
- if AUseSubstitution //###0.946\r
- then Result := Result + Substitute (AReplaceStr)\r
- else Result := Result + AReplaceStr;\r
- PrevPos := MatchPos [0] + MatchLen [0];\r
- UNTIL not ExecNext;\r
- Result := Result + System.Copy (AInputStr, PrevPos, MaxInt); // Tail\r
- end; { of function TRegExpr.Replace\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.ReplaceEx (AInputStr : RegExprString;\r
- AReplaceFunc : TRegExprReplaceFunction)\r
- : RegExprString;\r
- var\r
- PrevPos : integer;\r
- begin\r
- Result := '';\r
- PrevPos := 1;\r
- if Exec (AInputStr) then\r
- REPEAT\r
- Result := Result + System.Copy (AInputStr, PrevPos,\r
- MatchPos [0] - PrevPos)\r
- + AReplaceFunc (Self);\r
- PrevPos := MatchPos [0] + MatchLen [0];\r
- UNTIL not ExecNext;\r
- Result := Result + System.Copy (AInputStr, PrevPos, MaxInt); // Tail\r
- end; { of function TRegExpr.ReplaceEx\r
---------------------------------------------------------------}\r
-\r
-\r
-{$IFDEF OverMeth}\r
-function TRegExpr.Replace (AInputStr : RegExprString;\r
- AReplaceFunc : TRegExprReplaceFunction)\r
- : RegExprString;\r
- begin\r
- ReplaceEx (AInputStr, AReplaceFunc);\r
- end; { of function TRegExpr.Replace\r
---------------------------------------------------------------}\r
-{$ENDIF}\r
-\r
-{=============================================================}\r
-{====================== Debug section ========================}\r
-{=============================================================}\r
-\r
-{$IFDEF RegExpPCodeDump}\r
-function TRegExpr.DumpOp (op : TREOp) : RegExprString;\r
-// printable representation of opcode\r
- begin\r
- case op of\r
- BOL: Result := 'BOL';\r
- EOL: Result := 'EOL';\r
- BOLML: Result := 'BOLML';\r
- EOLML: Result := 'EOLML';\r
- BOUND: Result := 'BOUND'; //###0.943\r
- NOTBOUND: Result := 'NOTBOUND'; //###0.943\r
- ANY: Result := 'ANY';\r
- ANYML: Result := 'ANYML'; //###0.941\r
- ANYLETTER: Result := 'ANYLETTER';\r
- NOTLETTER: Result := 'NOTLETTER';\r
- ANYDIGIT: Result := 'ANYDIGIT';\r
- NOTDIGIT: Result := 'NOTDIGIT';\r
- ANYSPACE: Result := 'ANYSPACE';\r
- NOTSPACE: Result := 'NOTSPACE';\r
- ANYOF: Result := 'ANYOF';\r
- ANYBUT: Result := 'ANYBUT';\r
- ANYOFCI: Result := 'ANYOF/CI';\r
- ANYBUTCI: Result := 'ANYBUT/CI';\r
- BRANCH: Result := 'BRANCH';\r
- EXACTLY: Result := 'EXACTLY';\r
- EXACTLYCI: Result := 'EXACTLY/CI';\r
- NOTHING: Result := 'NOTHING';\r
- COMMENT: Result := 'COMMENT';\r
- BACK: Result := 'BACK';\r
- EEND: Result := 'END';\r
- BSUBEXP: Result := 'BSUBEXP';\r
- BSUBEXPCI: Result := 'BSUBEXP/CI';\r
- Succ (OPEN) .. TREOp (Ord (OPEN) + NSUBEXP - 1): //###0.929\r
- Result := Format ('OPEN[%d]', [ord (op) - ord (OPEN)]);\r
- Succ (CLOSE) .. TREOp (Ord (CLOSE) + NSUBEXP - 1): //###0.929\r
- Result := Format ('CLOSE[%d]', [ord (op) - ord (CLOSE)]);\r
- STAR: Result := 'STAR';\r
- PLUS: Result := 'PLUS';\r
- BRACES: Result := 'BRACES';\r
- {$IFDEF ComplexBraces}\r
- LOOPENTRY: Result := 'LOOPENTRY'; //###0.925\r
- LOOP: Result := 'LOOP'; //###0.925\r
- LOOPNG: Result := 'LOOPNG'; //###0.940\r
- {$ENDIF}\r
- ANYOFTINYSET: Result:= 'ANYOFTINYSET';\r
- ANYBUTTINYSET:Result:= 'ANYBUTTINYSET';\r
- {$IFDEF UseSetOfChar} //###0.929\r
- ANYOFFULLSET: Result:= 'ANYOFFULLSET';\r
- {$ENDIF}\r
- STARNG: Result := 'STARNG'; //###0.940\r
- PLUSNG: Result := 'PLUSNG'; //###0.940\r
- BRACESNG: Result := 'BRACESNG'; //###0.940\r
- else Error (reeDumpCorruptedOpcode);\r
- end; {of case op}\r
- Result := ':' + Result;\r
- end; { of function TRegExpr.DumpOp\r
---------------------------------------------------------------}\r
-\r
-function TRegExpr.Dump : RegExprString;\r
-// dump a regexp in vaguely comprehensible form\r
- var\r
- s : PRegExprChar;\r
- op : TREOp; // Arbitrary non-END op.\r
- next : PRegExprChar;\r
- i : integer;\r
- Diff : integer;\r
-{$IFDEF UseSetOfChar} //###0.929\r
- Ch : REChar;\r
-{$ENDIF}\r
- begin\r
- if not IsProgrammOk //###0.929\r
- then EXIT;\r
-\r
- op := EXACTLY;\r
- Result := '';\r
- s := programm + REOpSz;\r
- while op <> EEND do begin // While that wasn't END last time...\r
- op := s^;\r
- Result := Result + Format ('%2d%s', [s - programm, DumpOp (s^)]); // Where, what.\r
- next := regnext (s);\r
- if next = nil // Next ptr.\r
- then Result := Result + ' (0)'\r
- else begin\r
- if next > s //###0.948 PWideChar subtraction workaround (see comments in Tail method for details)\r
- then Diff := next - s\r
- else Diff := - (s - next);\r
- Result := Result + Format (' (%d) ', [(s - programm) + Diff]);\r
- end;\r
- inc (s, REOpSz + RENextOffSz);\r
- if (op = ANYOF) or (op = ANYOFCI) or (op = ANYBUT) or (op = ANYBUTCI)\r
- or (op = EXACTLY) or (op = EXACTLYCI) then begin\r
- // Literal string, where present.\r
- while s^ <> #0 do begin\r
- Result := Result + s^;\r
- inc (s);\r
- end;\r
- inc (s);\r
- end;\r
- if (op = ANYOFTINYSET) or (op = ANYBUTTINYSET) then begin\r
- for i := 1 to TinySetLen do begin\r
- Result := Result + s^;\r
- inc (s);\r
- end;\r
- end;\r
- if (op = BSUBEXP) or (op = BSUBEXPCI) then begin\r
- Result := Result + ' \' + IntToStr (Ord (s^));\r
- inc (s);\r
- end;\r
- {$IFDEF UseSetOfChar} //###0.929\r
- if op = ANYOFFULLSET then begin\r
- for Ch := #0 to #255 do\r
- if Ch in PSetOfREChar (s)^ then\r
- if Ch < ' '\r
- then Result := Result + '#' + IntToStr (Ord (Ch)) //###0.936\r
- else Result := Result + Ch;\r
- inc (s, SizeOf (TSetOfREChar));\r
- end;\r
- {$ENDIF}\r
- if (op = BRACES) or (op = BRACESNG) then begin //###0.941\r
- // show min/max argument of BRACES operator\r
- Result := Result + Format ('{%d,%d}', [PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);\r
- inc (s, REBracesArgSz * 2);\r
- end;\r
- {$IFDEF ComplexBraces}\r
- if (op = LOOP) or (op = LOOPNG) then begin //###0.940\r
- Result := Result + Format (' -> (%d) {%d,%d}', [\r
- (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (s + 2 * REBracesArgSz)^,\r
- PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);\r
- inc (s, 2 * REBracesArgSz + RENextOffSz);\r
- end;\r
- {$ENDIF}\r
- Result := Result + #$d#$a;\r
- end; { of while}\r
-\r
- // Header fields of interest.\r
-\r
- if regstart <> #0\r
- then Result := Result + 'start ' + regstart;\r
- if reganch <> #0\r
- then Result := Result + 'anchored ';\r
- if regmust <> nil\r
- then Result := Result + 'must have ' + regmust;\r
- {$IFDEF UseFirstCharSet} //###0.929\r
- Result := Result + #$d#$a'FirstCharSet:';\r
- for Ch := #0 to #255 do\r
- if Ch in FirstCharSet\r
- then begin\r
- if Ch < ' '\r
- then Result := Result + '#' + IntToStr(Ord(Ch)) //###0.948\r
- else Result := Result + Ch;\r
- end;\r
- {$ENDIF}\r
- Result := Result + #$d#$a;\r
- end; { of function TRegExpr.Dump\r
---------------------------------------------------------------}\r
-{$ENDIF}\r
-\r
-{$IFDEF reRealExceptionAddr}\r
-{$OPTIMIZATION ON}\r
-// ReturnAddr works correctly only if compiler optimization is ON\r
-// I placed this method at very end of unit because there are no\r
-// way to restore compiler optimization flag ...\r
-{$ENDIF}\r
-procedure TRegExpr.Error (AErrorID : integer);\r
-{$IFDEF reRealExceptionAddr}\r
- function ReturnAddr : pointer; //###0.938\r
- asm\r
- mov eax,[ebp+4]\r
- end;\r
-{$ENDIF}\r
- var\r
- e : ERegExpr;\r
- begin\r
- fLastError := AErrorID; // dummy stub - useless because will raise exception\r
- if AErrorID < 1000 // compilation error ?\r
- then e := ERegExpr.Create (ErrorMsg (AErrorID) // yes - show error pos\r
- + ' (pos ' + IntToStr (CompilerErrorPos) + ')')\r
- else e := ERegExpr.Create (ErrorMsg (AErrorID));\r
- e.ErrorCode := AErrorID;\r
- e.CompilerErrorPos := CompilerErrorPos;\r
- raise e\r
- {$IFDEF reRealExceptionAddr}\r
- At ReturnAddr; //###0.938\r
- {$ENDIF}\r
- end; { of procedure TRegExpr.Error\r
---------------------------------------------------------------}\r
-\r
-(*\r
- PCode persistence:\r
- FirstCharSet\r
- programm, regsize\r
- regstart // -> programm\r
- reganch // -> programm\r
- regmust, regmlen // -> programm\r
- fExprIsCompiled\r
-*)\r
-\r
-// be carefull - placed here code will be always compiled with\r
-// compiler optimization flag\r
-\r
-{$IFDEF FPC}\r
-initialization\r
- RegExprInvertCaseFunction := TRegExpr.InvertCaseFunction;\r
-\r
-{$ENDIF}\r
-end.\r
-\r