ICU 4.8.1.1  4.8.1.1
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
uregex.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 2004-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: uregex.h
7 * encoding: US-ASCII
8 * indentation:4
9 *
10 * created on: 2004mar09
11 * created by: Andy Heninger
12 *
13 * ICU Regular Expressions, API for C
14 */
15 
23 #ifndef UREGEX_H
24 #define UREGEX_H
25 
26 #include "unicode/utext.h"
27 #include "unicode/utypes.h"
28 
29 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
30 
31 #include "unicode/localpointer.h"
32 #include "unicode/parseerr.h"
33 
34 struct URegularExpression;
41 
42 
47 typedef enum URegexpFlag{
48 
49 #ifndef U_HIDE_DRAFT_API
50 
54 #endif
55 
57 
60 
64 
77 
83 
90 
98  UREGEX_UWORD = 256,
99 
108 
109 } URegexpFlag;
110 
134 uregex_open( const UChar *pattern,
135  int32_t patternLength,
136  uint32_t flags,
137  UParseError *pe,
138  UErrorCode *status);
139 
164 uregex_openUText(UText *pattern,
165  uint32_t flags,
166  UParseError *pe,
167  UErrorCode *status);
168 
192 #if !UCONFIG_NO_CONVERSION
194 uregex_openC( const char *pattern,
195  uint32_t flags,
196  UParseError *pe,
197  UErrorCode *status);
198 #endif
199 
200 
201 
209 U_STABLE void U_EXPORT2
211 
212 #if U_SHOW_CPLUSPLUS_API
213 
215 
225 U_DEFINE_LOCAL_OPEN_POINTER(LocalURegularExpressionPointer, URegularExpression, uregex_close);
226 
228 
229 #endif
230 
250 uregex_clone(const URegularExpression *regexp, UErrorCode *status);
251 
269 U_STABLE const UChar * U_EXPORT2
270 uregex_pattern(const URegularExpression *regexp,
271  int32_t *patLength,
272  UErrorCode *status);
273 
287  UErrorCode *status);
288 
289 
298 U_STABLE int32_t U_EXPORT2
299 uregex_flags(const URegularExpression *regexp,
300  UErrorCode *status);
301 
302 
323 U_STABLE void U_EXPORT2
325  const UChar *text,
326  int32_t textLength,
327  UErrorCode *status);
328 
329 
346 U_DRAFT void U_EXPORT2
348  UText *text,
349  UErrorCode *status);
350 
371 U_STABLE const UChar * U_EXPORT2
373  int32_t *textLength,
374  UErrorCode *status);
375 
376 
395  UText *dest,
396  UErrorCode *status);
397 
423 U_DRAFT void U_EXPORT2
425  UText *text,
426  UErrorCode *status);
427 
450  int32_t startIndex,
451  UErrorCode *status);
452 
476  int64_t startIndex,
477  UErrorCode *status);
478 
504  int32_t startIndex,
505  UErrorCode *status);
506 
533  int64_t startIndex,
534  UErrorCode *status);
535 
557  int32_t startIndex,
558  UErrorCode *status);
559 
582  int64_t startIndex,
583  UErrorCode *status);
584 
600  UErrorCode *status);
601 
609 U_STABLE int32_t U_EXPORT2
611  UErrorCode *status);
612 
629 U_STABLE int32_t U_EXPORT2
631  int32_t groupNum,
632  UChar *dest,
633  int32_t destCapacity,
634  UErrorCode *status);
635 
636 
661  int32_t groupNum,
662  UText *dest,
663  int64_t *groupLength,
664  UErrorCode *status);
665 
666 
686  int32_t groupNum,
687  UText *dest,
688  UErrorCode *status);
689 
704 U_STABLE int32_t U_EXPORT2
706  int32_t groupNum,
707  UErrorCode *status);
708 
724 U_DRAFT int64_t U_EXPORT2
726  int32_t groupNum,
727  UErrorCode *status);
728 
742 U_STABLE int32_t U_EXPORT2
744  int32_t groupNum,
745  UErrorCode *status);
746 
761 U_DRAFT int64_t U_EXPORT2
763  int32_t groupNum,
764  UErrorCode *status);
765 
779 U_STABLE void U_EXPORT2
781  int32_t index,
782  UErrorCode *status);
783 
798 U_DRAFT void U_EXPORT2
800  int64_t index,
801  UErrorCode *status);
802 
823 U_STABLE void U_EXPORT2
825  int32_t regionStart,
826  int32_t regionLimit,
827  UErrorCode *status);
828 
850 U_DRAFT void U_EXPORT2
852  int64_t regionStart,
853  int64_t regionLimit,
854  UErrorCode *status);
855 
870 U_DRAFT void U_EXPORT2
872  int64_t regionStart,
873  int64_t regionLimit,
874  int64_t startIndex,
875  UErrorCode *status);
876 
886 U_STABLE int32_t U_EXPORT2
888  UErrorCode *status);
889 
900 U_DRAFT int64_t U_EXPORT2
902  UErrorCode *status);
903 
914 U_STABLE int32_t U_EXPORT2
916  UErrorCode *status);
917 
929 U_DRAFT int64_t U_EXPORT2
931  UErrorCode *status);
932 
945  UErrorCode *status);
946 
947 
967 U_STABLE void U_EXPORT2
969  UBool b,
970  UErrorCode *status);
971 
972 
984  UErrorCode *status);
985 
986 
1000 U_STABLE void U_EXPORT2
1002  UBool b,
1003  UErrorCode *status);
1004 
1016 uregex_hitEnd(const URegularExpression *regexp,
1017  UErrorCode *status);
1018 
1032  UErrorCode *status);
1033 
1034 
1035 
1036 
1037 
1062 U_STABLE int32_t U_EXPORT2
1064  const UChar *replacementText,
1065  int32_t replacementLength,
1066  UChar *destBuf,
1067  int32_t destCapacity,
1068  UErrorCode *status);
1069 
1093  UText *replacement,
1094  UText *dest,
1095  UErrorCode *status);
1096 
1121 U_STABLE int32_t U_EXPORT2
1123  const UChar *replacementText,
1124  int32_t replacementLength,
1125  UChar *destBuf,
1126  int32_t destCapacity,
1127  UErrorCode *status);
1128 
1152  UText *replacement,
1153  UText *dest,
1154  UErrorCode *status);
1155 
1156 
1203 U_STABLE int32_t U_EXPORT2
1205  const UChar *replacementText,
1206  int32_t replacementLength,
1207  UChar **destBuf,
1208  int32_t *destCapacity,
1209  UErrorCode *status);
1210 
1211 
1234 U_DRAFT void U_EXPORT2
1236  UText *replacementText,
1237  UText *dest,
1238  UErrorCode *status);
1239 
1240 
1265 U_STABLE int32_t U_EXPORT2
1267  UChar **destBuf,
1268  int32_t *destCapacity,
1269  UErrorCode *status);
1270 
1271 
1292  UText *dest,
1293  UErrorCode *status);
1294 
1295 
1296 
1348 U_STABLE int32_t U_EXPORT2
1350  UChar *destBuf,
1351  int32_t destCapacity,
1352  int32_t *requiredCapacity,
1353  UChar *destFields[],
1354  int32_t destFieldsCapacity,
1355  UErrorCode *status);
1356 
1357 
1384 U_DRAFT int32_t U_EXPORT2
1386  UText *destFields[],
1387  int32_t destFieldsCapacity,
1388  UErrorCode *status);
1389 
1390 
1391 
1392 
1415 U_STABLE void U_EXPORT2
1417  int32_t limit,
1418  UErrorCode *status);
1419 
1429 U_STABLE int32_t U_EXPORT2
1431  UErrorCode *status);
1432 
1453 U_STABLE void U_EXPORT2
1455  int32_t limit,
1456  UErrorCode *status);
1457 
1465 U_STABLE int32_t U_EXPORT2
1467  UErrorCode *status);
1468 
1469 
1490  const void *context,
1491  int32_t steps);
1493 
1508 U_STABLE void U_EXPORT2
1510  URegexMatchCallback *callback,
1511  const void *context,
1512  UErrorCode *status);
1513 
1514 
1526 U_STABLE void U_EXPORT2
1528  URegexMatchCallback **callback,
1529  const void **context,
1530  UErrorCode *status);
1531 
1532 
1565  const void *context,
1566  int64_t matchIndex);
1568 
1580 U_DRAFT void U_EXPORT2
1582  URegexFindProgressCallback *callback,
1583  const void *context,
1584  UErrorCode *status);
1585 
1586 
1598 U_DRAFT void U_EXPORT2
1600  URegexFindProgressCallback **callback,
1601  const void **context,
1602  UErrorCode *status);
1603 
1604 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
1605 #endif /* UREGEX_H */
UText * uregex_getUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
int64_t uregex_regionEnd64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionEnd.
void uregex_setText(URegularExpression *regexp, const UChar *text, int32_t textLength, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
UBool uregex_lookingAt64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_lookingAt.
URegularExpression * uregex_openC(const char *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
Forces normalization of pattern and strings.
Definition: uregex.h:53
int32_t uregex_getTimeLimit(const URegularExpression *regexp, UErrorCode *status)
Get the time limit for for matches with this URegularExpression.
int32_t uregex_groupCount(URegularExpression *regexp, UErrorCode *status)
Get the number of capturing groups in this regular expression's pattern.
int32_t uregex_splitUText(URegularExpression *regexp, UText *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
void uregex_setUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
Control behavior of "$" and "^" If set, recognize line terminators within string, otherwise...
Definition: uregex.h:82
void uregex_reset(URegularExpression *regexp, int32_t index, UErrorCode *status)
Reset any saved state from the previous match.
UBool uregex_find(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Find the first matching substring of the input string that matches the pattern.
void uregex_getMatchCallback(const URegularExpression *regexp, URegexMatchCallback **callback, const void **context, UErrorCode *status)
Get the callback function for this URegularExpression.
int32_t uregex_appendTail(URegularExpression *regexp, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string...
int32_t uregex_regionEnd(const URegularExpression *regexp, UErrorCode *status)
Reports the end index (exclusive) of the matching region for this URegularExpression.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: utypes.h:287
If set, '.
Definition: uregex.h:63
Unix-only line endings.
Definition: uregex.h:89
UBool uregex_requireEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE the most recent match succeeded and additional input could cause it to fail...
int32_t uregex_replaceAll(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string...
void uregex_setRegion(URegularExpression *regexp, int32_t regionStart, int32_t regionLimit, UErrorCode *status)
Sets the limits of the matching region for this URegularExpression.
int32_t uregex_split(URegularExpression *regexp, UChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UChar *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
int32_t uregex_end(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the position following the end of the text matched by the sp...
#define U_INTERNAL
This is used to declare a function as an internal ICU C API.
Definition: umachine.h:145
UBool uregex_matches64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_matches.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1564
UBool uregex_matches(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string against the pattern.
URegularExpression * uregex_open(const UChar *pattern, int32_t patternLength, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
void uregex_close(URegularExpression *regexp)
Close the regular expression, recovering all resources (memory) it was holding.
C API: Abstract Unicode Text API.
int64_t uregex_start64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_start.
UBool uregex_findNext(URegularExpression *regexp, UErrorCode *status)
Find the next pattern match in the input string.
void uregex_setFindProgressCallback(URegularExpression *regexp, URegexFindProgressCallback *callback, const void *context, UErrorCode *status)
Set the find progress callback function for this URegularExpression.
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:110
int32_t uregex_getStackLimit(const URegularExpression *regexp, UErrorCode *status)
Get the size of the heap storage available for use by the back tracking stack.
URegularExpression * uregex_clone(const URegularExpression *regexp, UErrorCode *status)
Make a copy of a compiled regular expression.
URegexpFlag
Constants for Regular Expression Match Modes.
Definition: uregex.h:47
void uregex_setStackLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set the amount of heap storage available for use by the match backtracking stack. ...
Allow white space and comments within patterns.
Definition: uregex.h:59
void uregex_useAnchoringBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Set whether this URegularExpression is using Anchoring Bounds for its region.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:131
int32_t uregex_appendReplacement(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
void uregex_appendReplacementUText(URegularExpression *regexp, UText *replacementText, UText *dest, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
UBool uregex_hasTransparentBounds(const URegularExpression *regexp, UErrorCode *status)
Queries the transparency of region bounds for this URegularExpression.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
const UChar * uregex_pattern(const URegularExpression *regexp, int32_t *patLength, UErrorCode *status)
Returns a pointer to the source form of the pattern for this regular expression.
UBool uregex_find64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_find.
void uregex_setRegionAndStart(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode *status)
Set the matching region and the starting index for subsequent matches in a single operation...
void uregex_reset64(URegularExpression *regexp, int64_t index, UErrorCode *status)
64bit version of uregex_reset.
void uregex_setMatchCallback(URegularExpression *regexp, URegexMatchCallback *callback, const void *context, UErrorCode *status)
Set a callback function for this URegularExpression.
#define U_EXPORT2
Definition: platform.h:314
int32_t uregex_flags(const URegularExpression *regexp, UErrorCode *status)
Get the match mode flags that were specified when compiling this regular expression.
struct URegularExpression URegularExpression
Structure representing a compiled regular expression, plus the results of a match operation...
Definition: uregex.h:40
void uregex_getFindProgressCallback(const URegularExpression *regexp, URegexFindProgressCallback **callback, const void **context, UErrorCode *status)
Get the find progress callback function for this URegularExpression.
UText * uregex_replaceFirstUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string...
uint16_t UChar
Define UChar to be wchar_t if that is 16 bits wide; always assumed to be unsigned.
Definition: umachine.h:325
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:111
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:132
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1489
UBool uregex_hasAnchoringBounds(const URegularExpression *regexp, UErrorCode *status)
Return true if this URegularExpression is using anchoring bounds.
int32_t uregex_regionStart(const URegularExpression *regexp, UErrorCode *status)
Reports the start index of the matching region.
C API: Parse Error Information.
Unicode word boundaries.
Definition: uregex.h:98
UText * uregex_groupUText(URegularExpression *regexp, int32_t groupNum, UText *dest, int64_t *groupLength, UErrorCode *status)
Returns a shallow immutable clone of the entire input string.
UText * uregex_appendTailUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string...
int32_t uregex_group(URegularExpression *regexp, int32_t groupNum, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract the string for the specified matching expression or subexpression.
Error on Unrecognized backslash escapes.
Definition: uregex.h:107
UText * uregex_patternUText(const URegularExpression *regexp, UErrorCode *status)
Returns the source text of the pattern for this regular expression.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:639
If set, treat the entire pattern as a literal string.
Definition: uregex.h:76
void uregex_useTransparentBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Sets the transparency of region bounds for this URegularExpression.
UText struct.
Definition: utext.h:1482
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Basic definitions for ICU, for both C and C++ APIs.
int32_t uregex_start(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the start of the text matched by the specified capture group...
void uregex_refreshUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression is looking for matches without changing...
int64_t uregex_regionStart64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionStart.
Enable case insensitive matching.
Definition: uregex.h:56
const UChar * uregex_getText(URegularExpression *regexp, int32_t *textLength, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
URegularExpression * uregex_openUText(UText *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
UBool uregex_hitEnd(const URegularExpression *regexp, UErrorCode *status)
Return TRUE if the most recent matching operation touched the end of the text being processed...
void uregex_setTimeLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set a processing time limit for match operations with this URegularExpression.
#define U_DRAFT
This is used to declare a function as a draft public ICU C API.
Definition: umachine.h:139
UText * uregex_groupUTextDeep(URegularExpression *regexp, int32_t groupNum, UText *dest, UErrorCode *status)
Extract the string for the specified matching expression or subexpression.
UText * uregex_replaceAllUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string...
int32_t uregex_replaceFirst(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string...
int64_t uregex_end64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_end.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:137
void uregex_setRegion64(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, UErrorCode *status)
64bit version of uregex_setRegion.
int8_t UBool
The ICU boolean type.
Definition: umachine.h:228
UBool uregex_lookingAt(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string, starting from the specified index, against the pattern...