ICU 65.1  65.1
regex.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2002-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * file name: regex.h
9 * encoding: UTF-8
10 * indentation:4
11 *
12 * created on: 2002oct22
13 * created by: Andy Heninger
14 *
15 * ICU Regular Expressions, API for C++
16 */
17 
18 #ifndef REGEX_H
19 #define REGEX_H
20 
21 //#define REGEX_DEBUG
22 
45 #include "unicode/utypes.h"
46 
47 #if U_SHOW_CPLUSPLUS_API
48 
49 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
50 
51 #include "unicode/uobject.h"
52 #include "unicode/unistr.h"
53 #include "unicode/utext.h"
54 #include "unicode/parseerr.h"
55 
56 #include "unicode/uregex.h"
57 
58 // Forward Declarations
59 
60 struct UHashtable;
61 
62 U_NAMESPACE_BEGIN
63 
64 struct Regex8BitSet;
65 class RegexCImpl;
66 class RegexMatcher;
67 class RegexPattern;
68 struct REStackFrame;
69 class RuleBasedBreakIterator;
70 class UnicodeSet;
71 class UVector;
72 class UVector32;
73 class UVector64;
74 
75 
88 public:
89 
98 
105  RegexPattern(const RegexPattern &source);
106 
112  virtual ~RegexPattern();
113 
122  UBool operator==(const RegexPattern& that) const;
123 
132  inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);}
133 
139  RegexPattern &operator =(const RegexPattern &source);
140 
148  virtual RegexPattern *clone() const;
149 
150 
175  static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
176  UParseError &pe,
177  UErrorCode &status);
178 
205  static RegexPattern * U_EXPORT2 compile( UText *regex,
206  UParseError &pe,
207  UErrorCode &status);
208 
233  static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
234  uint32_t flags,
235  UParseError &pe,
236  UErrorCode &status);
237 
264  static RegexPattern * U_EXPORT2 compile( UText *regex,
265  uint32_t flags,
266  UParseError &pe,
267  UErrorCode &status);
268 
291  static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
292  uint32_t flags,
293  UErrorCode &status);
294 
319  static RegexPattern * U_EXPORT2 compile( UText *regex,
320  uint32_t flags,
321  UErrorCode &status);
322 
328  virtual uint32_t flags() const;
329 
347  virtual RegexMatcher *matcher(const UnicodeString &input,
348  UErrorCode &status) const;
349 
350 private:
363  RegexMatcher *matcher(const char16_t *input,
364  UErrorCode &status) const;
365 public:
366 
367 
379  virtual RegexMatcher *matcher(UErrorCode &status) const;
380 
381 
396  static UBool U_EXPORT2 matches(const UnicodeString &regex,
397  const UnicodeString &input,
398  UParseError &pe,
399  UErrorCode &status);
400 
415  static UBool U_EXPORT2 matches(UText *regex,
416  UText *input,
417  UParseError &pe,
418  UErrorCode &status);
419 
428  virtual UnicodeString pattern() const;
429 
430 
441  virtual UText *patternText(UErrorCode &status) const;
442 
443 
457  virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
458 
459 
476  virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
477 
478 
517  virtual int32_t split(const UnicodeString &input,
518  UnicodeString dest[],
519  int32_t destCapacity,
520  UErrorCode &status) const;
521 
522 
561  virtual int32_t split(UText *input,
562  UText *dest[],
563  int32_t destCapacity,
564  UErrorCode &status) const;
565 
566 
572  virtual UClassID getDynamicClassID() const;
573 
579  static UClassID U_EXPORT2 getStaticClassID();
580 
581 private:
582  //
583  // Implementation Data
584  //
585  UText *fPattern; // The original pattern string.
586  UnicodeString *fPatternString; // The original pattern UncodeString if relevant
587  uint32_t fFlags; // The flags used when compiling the pattern.
588  //
589  UVector64 *fCompiledPat; // The compiled pattern p-code.
590  UnicodeString fLiteralText; // Any literal string data from the pattern,
591  // after un-escaping, for use during the match.
592 
593  UVector *fSets; // Any UnicodeSets referenced from the pattern.
594  Regex8BitSet *fSets8; // (and fast sets for latin-1 range.)
595 
596 
597  UErrorCode fDeferredStatus; // status if some prior error has left this
598  // RegexPattern in an unusable state.
599 
600  int32_t fMinMatchLen; // Minimum Match Length. All matches will have length
601  // >= this value. For some patterns, this calculated
602  // value may be less than the true shortest
603  // possible match.
604 
605  int32_t fFrameSize; // Size of a state stack frame in the
606  // execution engine.
607 
608  int32_t fDataSize; // The size of the data needed by the pattern that
609  // does not go on the state stack, but has just
610  // a single copy per matcher.
611 
612  UVector32 *fGroupMap; // Map from capture group number to position of
613  // the group's variables in the matcher stack frame.
614 
615  UnicodeSet **fStaticSets; // Ptr to static (shared) sets for predefined
616  // regex character classes, e.g. Word.
617 
618  Regex8BitSet *fStaticSets8; // Ptr to the static (shared) latin-1 only
619  // sets for predefined regex classes.
620 
621  int32_t fStartType; // Info on how a match must start.
622  int32_t fInitialStringIdx; //
623  int32_t fInitialStringLen;
624  UnicodeSet *fInitialChars;
625  UChar32 fInitialChar;
626  Regex8BitSet *fInitialChars8;
627  UBool fNeedsAltInput;
628 
629  UHashtable *fNamedCaptureMap; // Map from capture group names to numbers.
630 
631  friend class RegexCompile;
632  friend class RegexMatcher;
633  friend class RegexCImpl;
634 
635  //
636  // Implementation Methods
637  //
638  void init(); // Common initialization, for use by constructors.
639  void zap(); // Common cleanup
640 
641  void dumpOp(int32_t index) const;
642 
643  public:
644 #ifndef U_HIDE_INTERNAL_API
649  void dumpPattern() const;
650 #endif /* U_HIDE_INTERNAL_API */
651 };
652 
653 
654 
665 public:
666 
680  RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);
681 
696  RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
697 
718  RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
719  uint32_t flags, UErrorCode &status);
720 
741  RegexMatcher(UText *regexp, UText *input,
742  uint32_t flags, UErrorCode &status);
743 
744 private:
756  RegexMatcher(const UnicodeString &regexp, const char16_t *input,
757  uint32_t flags, UErrorCode &status);
758 public:
759 
760 
766  virtual ~RegexMatcher();
767 
768 
775  virtual UBool matches(UErrorCode &status);
776 
777 
788  virtual UBool matches(int64_t startIndex, UErrorCode &status);
789 
790 
804  virtual UBool lookingAt(UErrorCode &status);
805 
806 
820  virtual UBool lookingAt(int64_t startIndex, UErrorCode &status);
821 
822 
835  virtual UBool find();
836 
837 
852  virtual UBool find(UErrorCode &status);
853 
863  virtual UBool find(int64_t start, UErrorCode &status);
864 
865 
875  virtual UnicodeString group(UErrorCode &status) const;
876 
877 
895  virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
896 
902  virtual int32_t groupCount() const;
903 
904 
919  virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
920 
941  virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
942 
950  virtual int32_t start(UErrorCode &status) const;
951 
959  virtual int64_t start64(UErrorCode &status) const;
960 
961 
975  virtual int32_t start(int32_t group, UErrorCode &status) const;
976 
990  virtual int64_t start64(int32_t group, UErrorCode &status) const;
991 
1005  virtual int32_t end(UErrorCode &status) const;
1006 
1020  virtual int64_t end64(UErrorCode &status) const;
1021 
1022 
1040  virtual int32_t end(int32_t group, UErrorCode &status) const;
1041 
1059  virtual int64_t end64(int32_t group, UErrorCode &status) const;
1060 
1069  virtual RegexMatcher &reset();
1070 
1071 
1087  virtual RegexMatcher &reset(int64_t index, UErrorCode &status);
1088 
1089 
1107  virtual RegexMatcher &reset(const UnicodeString &input);
1108 
1109 
1123  virtual RegexMatcher &reset(UText *input);
1124 
1125 
1150  virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status);
1151 
1152 private:
1165  RegexMatcher &reset(const char16_t *input);
1166 public:
1167 
1175  virtual const UnicodeString &input() const;
1176 
1185  virtual UText *inputText() const;
1186 
1197  virtual UText *getInput(UText *dest, UErrorCode &status) const;
1198 
1199 
1218  virtual RegexMatcher &region(int64_t start, int64_t limit, UErrorCode &status);
1219 
1231  virtual RegexMatcher &region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
1232 
1241  virtual int32_t regionStart() const;
1242 
1251  virtual int64_t regionStart64() const;
1252 
1253 
1262  virtual int32_t regionEnd() const;
1263 
1272  virtual int64_t regionEnd64() const;
1273 
1282  virtual UBool hasTransparentBounds() const;
1283 
1303 
1304 
1312  virtual UBool hasAnchoringBounds() const;
1313 
1314 
1328 
1329 
1342  virtual UBool hitEnd() const;
1343 
1353  virtual UBool requireEnd() const;
1354 
1355 
1361  virtual const RegexPattern &pattern() const;
1362 
1363 
1380  virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
1381 
1382 
1403  virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
1404 
1405 
1426  virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
1427 
1428 
1453  virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
1454 
1455 
1484  const UnicodeString &replacement, UErrorCode &status);
1485 
1486 
1515  UText *replacement, UErrorCode &status);
1516 
1517 
1529 
1530 
1544  virtual UText *appendTail(UText *dest, UErrorCode &status);
1545 
1546 
1570  virtual int32_t split(const UnicodeString &input,
1571  UnicodeString dest[],
1572  int32_t destCapacity,
1573  UErrorCode &status);
1574 
1575 
1599  virtual int32_t split(UText *input,
1600  UText *dest[],
1601  int32_t destCapacity,
1602  UErrorCode &status);
1603 
1625  virtual void setTimeLimit(int32_t limit, UErrorCode &status);
1626 
1633  virtual int32_t getTimeLimit() const;
1634 
1656  virtual void setStackLimit(int32_t limit, UErrorCode &status);
1657 
1665  virtual int32_t getStackLimit() const;
1666 
1667 
1681  virtual void setMatchCallback(URegexMatchCallback *callback,
1682  const void *context,
1683  UErrorCode &status);
1684 
1685 
1696  virtual void getMatchCallback(URegexMatchCallback *&callback,
1697  const void *&context,
1698  UErrorCode &status);
1699 
1700 
1715  const void *context,
1716  UErrorCode &status);
1717 
1718 
1730  const void *&context,
1731  UErrorCode &status);
1732 
1733 #ifndef U_HIDE_INTERNAL_API
1739  void setTrace(UBool state);
1740 #endif /* U_HIDE_INTERNAL_API */
1741 
1747  static UClassID U_EXPORT2 getStaticClassID();
1748 
1754  virtual UClassID getDynamicClassID() const;
1755 
1756 private:
1757  // Constructors and other object boilerplate are private.
1758  // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
1759  RegexMatcher(); // default constructor not implemented
1760  RegexMatcher(const RegexPattern *pat);
1761  RegexMatcher(const RegexMatcher &other);
1762  RegexMatcher &operator =(const RegexMatcher &rhs);
1763  void init(UErrorCode &status); // Common initialization
1764  void init2(UText *t, UErrorCode &e); // Common initialization, part 2.
1765 
1766  friend class RegexPattern;
1767  friend class RegexCImpl;
1768 public:
1769 #ifndef U_HIDE_INTERNAL_API
1771  void resetPreserveRegion(); // Reset matcher state, but preserve any region.
1772 #endif /* U_HIDE_INTERNAL_API */
1773 private:
1774 
1775  //
1776  // MatchAt This is the internal interface to the match engine itself.
1777  // Match status comes back in matcher member variables.
1778  //
1779  void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status);
1780  inline void backTrack(int64_t &inputIdx, int32_t &patIdx);
1781  UBool isWordBoundary(int64_t pos); // perform Perl-like \b test
1782  UBool isUWordBoundary(int64_t pos); // perform RBBI based \b test
1783  REStackFrame *resetStack();
1784  inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
1785  void IncrementTime(UErrorCode &status);
1786 
1787  // Call user find callback function, if set. Return TRUE if operation should be interrupted.
1788  inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
1789 
1790  int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
1791 
1792  UBool findUsingChunk(UErrorCode &status);
1793  void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
1794  UBool isChunkWordBoundary(int32_t pos);
1795 
1796  const RegexPattern *fPattern;
1797  RegexPattern *fPatternOwned; // Non-NULL if this matcher owns the pattern, and
1798  // should delete it when through.
1799 
1800  const UnicodeString *fInput; // The string being matched. Only used for input()
1801  UText *fInputText; // The text being matched. Is never NULL.
1802  UText *fAltInputText; // A shallow copy of the text being matched.
1803  // Only created if the pattern contains backreferences.
1804  int64_t fInputLength; // Full length of the input text.
1805  int32_t fFrameSize; // The size of a frame in the backtrack stack.
1806 
1807  int64_t fRegionStart; // Start of the input region, default = 0.
1808  int64_t fRegionLimit; // End of input region, default to input.length.
1809 
1810  int64_t fAnchorStart; // Region bounds for anchoring operations (^ or $).
1811  int64_t fAnchorLimit; // See useAnchoringBounds
1812 
1813  int64_t fLookStart; // Region bounds for look-ahead/behind and
1814  int64_t fLookLimit; // and other boundary tests. See
1815  // useTransparentBounds
1816 
1817  int64_t fActiveStart; // Currently active bounds for matching.
1818  int64_t fActiveLimit; // Usually is the same as region, but
1819  // is changed to fLookStart/Limit when
1820  // entering look around regions.
1821 
1822  UBool fTransparentBounds; // True if using transparent bounds.
1823  UBool fAnchoringBounds; // True if using anchoring bounds.
1824 
1825  UBool fMatch; // True if the last attempted match was successful.
1826  int64_t fMatchStart; // Position of the start of the most recent match
1827  int64_t fMatchEnd; // First position after the end of the most recent match
1828  // Zero if no previous match, even when a region
1829  // is active.
1830  int64_t fLastMatchEnd; // First position after the end of the previous match,
1831  // or -1 if there was no previous match.
1832  int64_t fAppendPosition; // First position after the end of the previous
1833  // appendReplacement(). As described by the
1834  // JavaDoc for Java Matcher, where it is called
1835  // "append position"
1836  UBool fHitEnd; // True if the last match touched the end of input.
1837  UBool fRequireEnd; // True if the last match required end-of-input
1838  // (matched $ or Z)
1839 
1840  UVector64 *fStack;
1841  REStackFrame *fFrame; // After finding a match, the last active stack frame,
1842  // which will contain the capture group results.
1843  // NOT valid while match engine is running.
1844 
1845  int64_t *fData; // Data area for use by the compiled pattern.
1846  int64_t fSmallData[8]; // Use this for data if it's enough.
1847 
1848  int32_t fTimeLimit; // Max time (in arbitrary steps) to let the
1849  // match engine run. Zero for unlimited.
1850 
1851  int32_t fTime; // Match time, accumulates while matching.
1852  int32_t fTickCounter; // Low bits counter for time. Counts down StateSaves.
1853  // Kept separately from fTime to keep as much
1854  // code as possible out of the inline
1855  // StateSave function.
1856 
1857  int32_t fStackLimit; // Maximum memory size to use for the backtrack
1858  // stack, in bytes. Zero for unlimited.
1859 
1860  URegexMatchCallback *fCallbackFn; // Pointer to match progress callback funct.
1861  // NULL if there is no callback.
1862  const void *fCallbackContext; // User Context ptr for callback function.
1863 
1864  URegexFindProgressCallback *fFindProgressCallbackFn; // Pointer to match progress callback funct.
1865  // NULL if there is no callback.
1866  const void *fFindProgressCallbackContext; // User Context ptr for callback function.
1867 
1868 
1869  UBool fInputUniStrMaybeMutable; // Set when fInputText wraps a UnicodeString that may be mutable - compatibility.
1870 
1871  UBool fTraceDebug; // Set true for debug tracing of match engine.
1872 
1873  UErrorCode fDeferredStatus; // Save error state that cannot be immediately
1874  // reported, or that permanently disables this matcher.
1875 
1876  RuleBasedBreakIterator *fWordBreakItr;
1877 };
1878 
1879 U_NAMESPACE_END
1880 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
1881 
1882 #endif /* U_SHOW_CPLUSPLUS_API */
1883 
1884 #endif
class RegexMatcher bundles together a regular expression pattern and input text to which the expressi...
Definition: regex.h:664
virtual int64_t end64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
virtual UBool hasTransparentBounds() const
Queries the transparency of region bounds for this matcher.
void resetPreserveRegion()
virtual UBool hasAnchoringBounds() const
Return true if this matcher is using anchoring bounds.
virtual int32_t end(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual RegexMatcher & reset(const UnicodeString &input)
Resets this matcher with a new input string.
virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
virtual void getMatchCallback(URegexMatchCallback *&callback, const void *&context, UErrorCode &status)
Get the callback function for this URegularExpression.
virtual int32_t groupCount() const
Returns the number of capturing groups in this matcher's pattern.
virtual UnicodeString group(UErrorCode &status) const
Returns a string containing the text matched by the previous match.
virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual void setFindProgressCallback(URegexFindProgressCallback *callback, const void *context, UErrorCode &status)
Set a progress callback function for use with find operations on this Matcher.
virtual RegexMatcher & reset()
Resets this matcher.
virtual UText * getInput(UText *dest, UErrorCode &status) const
Returns the input string being matched, either by copying it into the provided UText parameter or by ...
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
virtual UBool lookingAt(int64_t startIndex, UErrorCode &status)
Attempts to match the input string, starting from the specified index, against the pattern.
virtual UBool matches(int64_t startIndex, UErrorCode &status)
Resets the matcher, then attempts to match the input beginning at the specified startIndex,...
virtual int32_t start(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual UBool find()
Find the next pattern match in the input string.
RegexMatcher(UText *regexp, UText *input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual const RegexPattern & pattern() const
Returns the pattern that is interpreted by this matcher.
virtual RegexMatcher & region(int64_t start, int64_t limit, UErrorCode &status)
Sets the limits of this matcher's region.
virtual RegexMatcher & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the regular expression is looking for matches without changing...
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
RegexMatcher(const UnicodeString &regexp, const UnicodeString &input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual int32_t getTimeLimit() const
Get the time limit, if any, for match operations made with this Matcher.
virtual UText * group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual void setTimeLimit(int32_t limit, UErrorCode &status)
Set a processing time limit for match operations with this Matcher.
virtual void setMatchCallback(URegexMatchCallback *callback, const void *context, UErrorCode &status)
Set a callback function for use with this Matcher.
virtual int64_t regionStart64() const
Reports the start index of this matcher's region.
virtual int32_t start(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual UnicodeString & appendTail(UnicodeString &dest)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual RegexMatcher & reset(UText *input)
Resets this matcher with a new input string.
virtual ~RegexMatcher()
Destructor.
virtual UBool lookingAt(UErrorCode &status)
Attempts to match the input string, starting from the beginning of the region, against the pattern.
virtual const UnicodeString & input() const
Returns the input string being matched.
virtual UText * replaceAll(UText *replacement, UText *dest, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual RegexMatcher & region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status)
Identical to region(start, limit, status) but also allows a start position without resetting the regi...
virtual RegexMatcher & reset(int64_t index, UErrorCode &status)
Resets this matcher, and set the current input position.
virtual int64_t regionEnd64() const
Reports the end (limit) index (exclusive) of this matcher's region.
virtual UBool find(UErrorCode &status)
Find the next pattern match in the input string.
RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual RegexMatcher & useAnchoringBounds(UBool b)
Set whether this matcher is using Anchoring Bounds for its region.
virtual UBool matches(UErrorCode &status)
Attempts to match the entire input region against the pattern.
virtual RegexMatcher & appendReplacement(UnicodeString &dest, const UnicodeString &replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
void setTrace(UBool state)
setTrace Debug function, enable/disable tracing of the matching engine.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual RegexMatcher & useTransparentBounds(UBool b)
Sets the transparency of region bounds for this matcher.
virtual int32_t getStackLimit() const
Get the size of the heap storage available for use by the back tracking stack.
virtual int32_t end(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
virtual UText * group(UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual int64_t start64(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual int64_t end64(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual UText * replaceFirst(UText *replacement, UText *dest, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
virtual void setStackLimit(int32_t limit, UErrorCode &status)
Set the amount of heap storage available for use by the match backtracking stack.
virtual int32_t regionEnd() const
Reports the end (limit) index (exclusive) of this matcher's region.
virtual UText * inputText() const
Returns the input string being matched.
virtual RegexMatcher & appendReplacement(UText *dest, UText *replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
virtual int64_t start64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual void getFindProgressCallback(URegexFindProgressCallback *&callback, const void *&context, UErrorCode &status)
Get the find progress callback function for this URegularExpression.
virtual UBool hitEnd() const
Return TRUE if the most recent matching operation attempted to access additional input beyond the ava...
virtual UText * appendTail(UText *dest, UErrorCode &status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual UBool requireEnd() const
Return TRUE the most recent match succeeded and additional input could cause it to fail.
virtual UBool find(int64_t start, UErrorCode &status)
Resets this RegexMatcher and then attempts to find the next substring of the input string that matche...
virtual int32_t regionStart() const
Reports the start index of this matcher's region.
virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const
Returns a string containing the text captured by the given group during the previous match operation.
Class RegexPattern represents a compiled regular expression.
Definition: regex.h:87
static RegexPattern * compile(UText *regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
virtual ~RegexPattern()
Destructor.
virtual uint32_t flags() const
Get the URegexpFlag match mode flags that were used when compiling this pattern.
void dumpPattern() const
Dump a compiled pattern.
virtual RegexPattern * clone() const
Create an exact copy of this RegexPattern object.
static UBool matches(UText *regex, UText *input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
static RegexPattern * compile(const UnicodeString &regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
static RegexPattern * compile(UText *regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
virtual UText * patternText(UErrorCode &status) const
Returns the regular expression from which this pattern was compiled.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UBool operator!=(const RegexPattern &that) const
Comparison operator.
Definition: regex.h:132
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
virtual UClassID getDynamicClassID() const
ICU "poor man's RTTI", returns a UClassID for the actual class.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
virtual RegexMatcher * matcher(const UnicodeString &input, UErrorCode &status) const
Creates a RegexMatcher that will match the given input against this pattern.
UBool operator==(const RegexPattern &that) const
Comparison operator.
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
static UBool matches(const UnicodeString &regex, const UnicodeString &input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const
Get the group number corresponding to a named capture group.
RegexPattern(const RegexPattern &source)
Copy Constructor.
static RegexPattern * compile(UText *regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
RegexPattern()
default constructor.
virtual UnicodeString pattern() const
Returns the regular expression from which this pattern was compiled.
virtual RegexMatcher * matcher(UErrorCode &status) const
Creates a RegexMatcher that will match against this pattern.
virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const
Get the group number corresponding to a named capture group.
A subclass of BreakIterator whose behavior is specified using a list of rules.
Definition: rbbi.h:55
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:281
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:295
struct UHashtable UHashtable
Definition: msgfmt.h:43
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
UText struct.
Definition: utext.h:1347
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:425
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261
#define U_FINAL
Defined to the C++11 "final" keyword if available.
Definition: umachine.h:140
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
C API: Regular Expressions.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1573
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1499
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:301