ICU 57.1  57.1
uregex.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 2004-2016, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: uregex.h
7 * encoding: US-ASCII
8 * indentation:4
9 *
10 * created on: 2004mar09
11 * created by: Andy Heninger
12 *
13 * ICU Regular Expressions, API for C
14 */
15 
23 #ifndef UREGEX_H
24 #define UREGEX_H
25 
26 #include "unicode/utext.h"
27 #include "unicode/utypes.h"
28 
29 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
30 
31 #include "unicode/localpointer.h"
32 #include "unicode/parseerr.h"
33 
34 struct URegularExpression;
41 
42 
47 typedef enum URegexpFlag{
48 
49 #ifndef U_HIDE_DRAFT_API
50 
54 #endif /* U_HIDE_DRAFT_API */
55 
57 
60 
64 
76 
82 
89 
97  UREGEX_UWORD = 256,
98 
107 
108 } URegexpFlag;
109 
132 U_STABLE URegularExpression * U_EXPORT2
133 uregex_open( const UChar *pattern,
134  int32_t patternLength,
135  uint32_t flags,
136  UParseError *pe,
137  UErrorCode *status);
138 
162 U_STABLE URegularExpression * U_EXPORT2
163 uregex_openUText(UText *pattern,
164  uint32_t flags,
165  UParseError *pe,
166  UErrorCode *status);
167 
191 #if !UCONFIG_NO_CONVERSION
192 U_STABLE URegularExpression * U_EXPORT2
193 uregex_openC( const char *pattern,
194  uint32_t flags,
195  UParseError *pe,
196  UErrorCode *status);
197 #endif
198 
199 
200 
208 U_STABLE void U_EXPORT2
210 
211 #if U_SHOW_CPLUSPLUS_API
212 
214 
225 
227 
228 #endif
229 
248 U_STABLE URegularExpression * U_EXPORT2
249 uregex_clone(const URegularExpression *regexp, UErrorCode *status);
250 
268 U_STABLE const UChar * U_EXPORT2
269 uregex_pattern(const URegularExpression *regexp,
270  int32_t *patLength,
271  UErrorCode *status);
272 
284 U_STABLE UText * U_EXPORT2
286  UErrorCode *status);
287 
296 U_STABLE int32_t U_EXPORT2
297 uregex_flags(const URegularExpression *regexp,
298  UErrorCode *status);
299 
300 
321 U_STABLE void U_EXPORT2
323  const UChar *text,
324  int32_t textLength,
325  UErrorCode *status);
326 
327 
344 U_STABLE void U_EXPORT2
346  UText *text,
347  UErrorCode *status);
348 
369 U_STABLE const UChar * U_EXPORT2
371  int32_t *textLength,
372  UErrorCode *status);
373 
390 U_STABLE UText * U_EXPORT2
392  UText *dest,
393  UErrorCode *status);
394 
420 U_STABLE void U_EXPORT2
422  UText *text,
423  UErrorCode *status);
424 
445 U_STABLE UBool U_EXPORT2
447  int32_t startIndex,
448  UErrorCode *status);
449 
471 U_STABLE UBool U_EXPORT2
473  int64_t startIndex,
474  UErrorCode *status);
475 
499 U_STABLE UBool U_EXPORT2
501  int32_t startIndex,
502  UErrorCode *status);
503 
528 U_STABLE UBool U_EXPORT2
530  int64_t startIndex,
531  UErrorCode *status);
532 
552 U_STABLE UBool U_EXPORT2
554  int32_t startIndex,
555  UErrorCode *status);
556 
577 U_STABLE UBool U_EXPORT2
579  int64_t startIndex,
580  UErrorCode *status);
581 
595 U_STABLE UBool U_EXPORT2
597  UErrorCode *status);
598 
606 U_STABLE int32_t U_EXPORT2
608  UErrorCode *status);
609 
626 U_STABLE int32_t U_EXPORT2
628  const UChar *groupName,
629  int32_t nameLength,
630  UErrorCode *status);
631 
632 
650 U_STABLE int32_t U_EXPORT2
652  const char *groupName,
653  int32_t nameLength,
654  UErrorCode *status);
655 
672 U_STABLE int32_t U_EXPORT2
674  int32_t groupNum,
675  UChar *dest,
676  int32_t destCapacity,
677  UErrorCode *status);
678 
701 U_STABLE UText * U_EXPORT2
703  int32_t groupNum,
704  UText *dest,
705  int64_t *groupLength,
706  UErrorCode *status);
707 
722 U_STABLE int32_t U_EXPORT2
724  int32_t groupNum,
725  UErrorCode *status);
726 
742 U_STABLE int64_t U_EXPORT2
744  int32_t groupNum,
745  UErrorCode *status);
746 
760 U_STABLE int32_t U_EXPORT2
762  int32_t groupNum,
763  UErrorCode *status);
764 
779 U_STABLE int64_t U_EXPORT2
781  int32_t groupNum,
782  UErrorCode *status);
783 
797 U_STABLE void U_EXPORT2
799  int32_t index,
800  UErrorCode *status);
801 
816 U_STABLE void U_EXPORT2
818  int64_t index,
819  UErrorCode *status);
820 
841 U_STABLE void U_EXPORT2
843  int32_t regionStart,
844  int32_t regionLimit,
845  UErrorCode *status);
846 
868 U_STABLE void U_EXPORT2
870  int64_t regionStart,
871  int64_t regionLimit,
872  UErrorCode *status);
873 
888 U_STABLE void U_EXPORT2
890  int64_t regionStart,
891  int64_t regionLimit,
892  int64_t startIndex,
893  UErrorCode *status);
894 
904 U_STABLE int32_t U_EXPORT2
906  UErrorCode *status);
907 
918 U_STABLE int64_t U_EXPORT2
920  UErrorCode *status);
921 
932 U_STABLE int32_t U_EXPORT2
934  UErrorCode *status);
935 
947 U_STABLE int64_t U_EXPORT2
949  UErrorCode *status);
950 
961 U_STABLE UBool U_EXPORT2
963  UErrorCode *status);
964 
965 
985 U_STABLE void U_EXPORT2
987  UBool b,
988  UErrorCode *status);
989 
990 
1000 U_STABLE UBool U_EXPORT2
1002  UErrorCode *status);
1003 
1004 
1018 U_STABLE void U_EXPORT2
1020  UBool b,
1021  UErrorCode *status);
1022 
1033 U_STABLE UBool U_EXPORT2
1034 uregex_hitEnd(const URegularExpression *regexp,
1035  UErrorCode *status);
1036 
1048 U_STABLE UBool U_EXPORT2
1050  UErrorCode *status);
1051 
1052 
1053 
1054 
1055 
1080 U_STABLE int32_t U_EXPORT2
1082  const UChar *replacementText,
1083  int32_t replacementLength,
1084  UChar *destBuf,
1085  int32_t destCapacity,
1086  UErrorCode *status);
1087 
1109 U_STABLE UText * U_EXPORT2
1111  UText *replacement,
1112  UText *dest,
1113  UErrorCode *status);
1114 
1139 U_STABLE int32_t U_EXPORT2
1141  const UChar *replacementText,
1142  int32_t replacementLength,
1143  UChar *destBuf,
1144  int32_t destCapacity,
1145  UErrorCode *status);
1146 
1168 U_STABLE UText * U_EXPORT2
1170  UText *replacement,
1171  UText *dest,
1172  UErrorCode *status);
1173 
1220 U_STABLE int32_t U_EXPORT2
1222  const UChar *replacementText,
1223  int32_t replacementLength,
1224  UChar **destBuf,
1225  int32_t *destCapacity,
1226  UErrorCode *status);
1227 
1250 U_STABLE void U_EXPORT2
1252  UText *replacementText,
1253  UText *dest,
1254  UErrorCode *status);
1255 
1280 U_STABLE int32_t U_EXPORT2
1282  UChar **destBuf,
1283  int32_t *destCapacity,
1284  UErrorCode *status);
1285 
1304 U_STABLE UText * U_EXPORT2
1306  UText *dest,
1307  UErrorCode *status);
1308 
1360 U_STABLE int32_t U_EXPORT2
1362  UChar *destBuf,
1363  int32_t destCapacity,
1364  int32_t *requiredCapacity,
1365  UChar *destFields[],
1366  int32_t destFieldsCapacity,
1367  UErrorCode *status);
1368 
1395 U_STABLE int32_t U_EXPORT2
1397  UText *destFields[],
1398  int32_t destFieldsCapacity,
1399  UErrorCode *status);
1400 
1423 U_STABLE void U_EXPORT2
1425  int32_t limit,
1426  UErrorCode *status);
1427 
1437 U_STABLE int32_t U_EXPORT2
1439  UErrorCode *status);
1440 
1461 U_STABLE void U_EXPORT2
1463  int32_t limit,
1464  UErrorCode *status);
1465 
1473 U_STABLE int32_t U_EXPORT2
1475  UErrorCode *status);
1476 
1477 
1498  const void *context,
1499  int32_t steps);
1501 
1516 U_STABLE void U_EXPORT2
1518  URegexMatchCallback *callback,
1519  const void *context,
1520  UErrorCode *status);
1521 
1522 
1534 U_STABLE void U_EXPORT2
1536  URegexMatchCallback **callback,
1537  const void **context,
1538  UErrorCode *status);
1539 
1572  const void *context,
1573  int64_t matchIndex);
1575 
1576 
1588 U_STABLE void U_EXPORT2
1590  URegexFindProgressCallback *callback,
1591  const void *context,
1592  UErrorCode *status);
1593 
1605 U_STABLE void U_EXPORT2
1607  URegexFindProgressCallback **callback,
1608  const void **context,
1609  UErrorCode *status);
1610 
1611 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
1612 #endif /* UREGEX_H */
UText * uregex_getUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
int64_t uregex_regionEnd64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionEnd.
void uregex_setText(URegularExpression *regexp, const UChar *text, int32_t textLength, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
UBool uregex_lookingAt64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_lookingAt.
int32_t uregex_groupNumberFromCName(URegularExpression *regexp, const char *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
URegularExpression * uregex_openC(const char *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
Forces normalization of pattern and strings.
Definition: uregex.h:53
int32_t uregex_getTimeLimit(const URegularExpression *regexp, UErrorCode *status)
Get the time limit for for matches with this URegularExpression.
"Smart pointer" class, closes a URegularExpression via uregex_close().
int32_t uregex_groupCount(URegularExpression *regexp, UErrorCode *status)
Get the number of capturing groups in this regular expression's pattern.
int32_t uregex_splitUText(URegularExpression *regexp, UText *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
void uregex_setUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
Control behavior of "$" and "^" If set, recognize line terminators within string, otherwise...
Definition: uregex.h:81
void uregex_reset(URegularExpression *regexp, int32_t index, UErrorCode *status)
Reset any saved state from the previous match.
UBool uregex_find(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Find the first matching substring of the input string that matches the pattern.
void uregex_getMatchCallback(const URegularExpression *regexp, URegexMatchCallback **callback, const void **context, UErrorCode *status)
Get the callback function for this URegularExpression.
int32_t uregex_appendTail(URegularExpression *regexp, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string...
int32_t uregex_regionEnd(const URegularExpression *regexp, UErrorCode *status)
Reports the end index (exclusive) of the matching region for this URegularExpression.
If set, '.
Definition: uregex.h:63
Unix-only line endings.
Definition: uregex.h:88
UBool uregex_requireEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE the most recent match succeeded and additional input could cause it to fail...
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:849
int32_t uregex_replaceAll(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string...
void uregex_setRegion(URegularExpression *regexp, int32_t regionStart, int32_t regionLimit, UErrorCode *status)
Sets the limits of the matching region for this URegularExpression.
int32_t uregex_split(URegularExpression *regexp, UChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UChar *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
int32_t uregex_end(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the position following the end of the text matched by the sp...
UBool uregex_matches64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_matches.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1571
UBool uregex_matches(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string against the pattern.
URegularExpression * uregex_open(const UChar *pattern, int32_t patternLength, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
void uregex_close(URegularExpression *regexp)
Close the regular expression, recovering all resources (memory) it was holding.
int32_t uregex_groupNumberFromName(URegularExpression *regexp, const UChar *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
C API: Abstract Unicode Text API.
int64_t uregex_start64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_start.
UBool uregex_findNext(URegularExpression *regexp, UErrorCode *status)
Find the next pattern match in the input string.
void uregex_setFindProgressCallback(URegularExpression *regexp, URegexFindProgressCallback *callback, const void *context, UErrorCode *status)
Set the find progress callback function for this URegularExpression.
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:82
int32_t uregex_getStackLimit(const URegularExpression *regexp, UErrorCode *status)
Get the size of the heap storage available for use by the back tracking stack.
URegularExpression * uregex_clone(const URegularExpression *regexp, UErrorCode *status)
Make a copy of a compiled regular expression.
URegexpFlag
Constants for Regular Expression Match Modes.
Definition: uregex.h:47
void uregex_setStackLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set the amount of heap storage available for use by the match backtracking stack. ...
Allow white space and comments within patterns.
Definition: uregex.h:59
void uregex_useAnchoringBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Set whether this URegularExpression is using Anchoring Bounds for its region.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
int32_t uregex_appendReplacement(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
void uregex_appendReplacementUText(URegularExpression *regexp, UText *replacementText, UText *dest, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:536
UBool uregex_hasTransparentBounds(const URegularExpression *regexp, UErrorCode *status)
Queries the transparency of region bounds for this URegularExpression.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
const UChar * uregex_pattern(const URegularExpression *regexp, int32_t *patLength, UErrorCode *status)
Returns a pointer to the source form of the pattern for this regular expression.
UBool uregex_find64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_find.
void uregex_setRegionAndStart(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode *status)
Set the matching region and the starting index for subsequent matches in a single operation...
void uregex_reset64(URegularExpression *regexp, int64_t index, UErrorCode *status)
64bit version of uregex_reset.
void uregex_setMatchCallback(URegularExpression *regexp, URegexMatchCallback *callback, const void *context, UErrorCode *status)
Set a callback function for this URegularExpression.
int32_t uregex_flags(const URegularExpression *regexp, UErrorCode *status)
Get the match mode flags that were specified when compiling this regular expression.
struct URegularExpression URegularExpression
Structure representing a compiled regular expression, plus the results of a match operation...
Definition: uregex.h:40
void uregex_getFindProgressCallback(const URegularExpression *regexp, URegexFindProgressCallback **callback, const void **context, UErrorCode *status)
Get the find progress callback function for this URegularExpression.
UText * uregex_replaceFirstUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string...
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:83
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1497
UBool uregex_hasAnchoringBounds(const URegularExpression *regexp, UErrorCode *status)
Return true if this URegularExpression is using anchoring bounds.
int32_t uregex_regionStart(const URegularExpression *regexp, UErrorCode *status)
Reports the start index of the matching region.
C API: Parse Error Information.
Unicode word boundaries.
Definition: uregex.h:97
UText * uregex_groupUText(URegularExpression *regexp, int32_t groupNum, UText *dest, int64_t *groupLength, UErrorCode *status)
Returns a shallow immutable clone of the entire input string with the current index set to the beginn...
UText * uregex_appendTailUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string...
int32_t uregex_group(URegularExpression *regexp, int32_t groupNum, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract the string for the specified matching expression or subexpression.
Error on Unrecognized backslash escapes.
Definition: uregex.h:106
UText * uregex_patternUText(const URegularExpression *regexp, UErrorCode *status)
Returns the source text of the pattern for this regular expression.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
If set, treat the entire pattern as a literal string.
Definition: uregex.h:75
void uregex_useTransparentBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Sets the transparency of region bounds for this URegularExpression.
UText struct.
Definition: utext.h:1343
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Basic definitions for ICU, for both C and C++ APIs.
int32_t uregex_start(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the start of the text matched by the specified capture group...
void uregex_refreshUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression is looking for matches without changing...
int64_t uregex_regionStart64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionStart.
Enable case insensitive matching.
Definition: uregex.h:56
const UChar * uregex_getText(URegularExpression *regexp, int32_t *textLength, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
URegularExpression * uregex_openUText(UText *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
UBool uregex_hitEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE if the most recent matching operation touched the end of the text being processed...
void uregex_setTimeLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set a processing time limit for match operations with this URegularExpression.
UText * uregex_replaceAllUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string...
int32_t uregex_replaceFirst(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string...
int64_t uregex_end64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_end.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
void uregex_setRegion64(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, UErrorCode *status)
64bit version of uregex_setRegion.
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
UBool uregex_lookingAt(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string, starting from the specified index, against the pattern...