158f0484fSRodney W. Grimes.\" Copyright (c) 1992, 1993, 1994 Henry Spencer. 258f0484fSRodney W. Grimes.\" Copyright (c) 1992, 1993, 1994 358f0484fSRodney W. Grimes.\" The Regents of the University of California. All rights reserved. 458f0484fSRodney W. Grimes.\" 558f0484fSRodney W. Grimes.\" This code is derived from software contributed to Berkeley by 658f0484fSRodney W. Grimes.\" Henry Spencer. 758f0484fSRodney W. Grimes.\" 858f0484fSRodney W. Grimes.\" Redistribution and use in source and binary forms, with or without 958f0484fSRodney W. Grimes.\" modification, are permitted provided that the following conditions 1058f0484fSRodney W. Grimes.\" are met: 1158f0484fSRodney W. Grimes.\" 1. Redistributions of source code must retain the above copyright 1258f0484fSRodney W. Grimes.\" notice, this list of conditions and the following disclaimer. 1358f0484fSRodney W. Grimes.\" 2. Redistributions in binary form must reproduce the above copyright 1458f0484fSRodney W. Grimes.\" notice, this list of conditions and the following disclaimer in the 1558f0484fSRodney W. Grimes.\" documentation and/or other materials provided with the distribution. 16fbbd9655SWarner Losh.\" 3. Neither the name of the University nor the names of its contributors 1758f0484fSRodney W. Grimes.\" may be used to endorse or promote products derived from this software 1858f0484fSRodney W. Grimes.\" without specific prior written permission. 1958f0484fSRodney W. Grimes.\" 2058f0484fSRodney W. Grimes.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2158f0484fSRodney W. Grimes.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2258f0484fSRodney W. Grimes.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2358f0484fSRodney W. Grimes.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2458f0484fSRodney W. Grimes.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2558f0484fSRodney W. Grimes.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2658f0484fSRodney W. Grimes.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2758f0484fSRodney W. Grimes.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2858f0484fSRodney W. Grimes.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2958f0484fSRodney W. Grimes.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3058f0484fSRodney W. Grimes.\" SUCH DAMAGE. 3158f0484fSRodney W. Grimes.\" 32*ba8b64deSKyle Evans.Dd April 15, 2017 33794b517fSRuslan Ermilov.Dt REGEX 3 34794b517fSRuslan Ermilov.Os 35794b517fSRuslan Ermilov.Sh NAME 36794b517fSRuslan Ermilov.Nm regcomp , 37794b517fSRuslan Ermilov.Nm regexec , 38794b517fSRuslan Ermilov.Nm regerror , 39794b517fSRuslan Ermilov.Nm regfree 40794b517fSRuslan Ermilov.Nd regular-expression library 41794b517fSRuslan Ermilov.Sh LIBRARY 42794b517fSRuslan Ermilov.Lb libc 43794b517fSRuslan Ermilov.Sh SYNOPSIS 4432eef9aeSRuslan Ermilov.In regex.h 45794b517fSRuslan Ermilov.Ft int 464047df8dSMike Barcroft.Fo regcomp 474047df8dSMike Barcroft.Fa "regex_t * restrict preg" "const char * restrict pattern" "int cflags" 484047df8dSMike Barcroft.Fc 49794b517fSRuslan Ermilov.Ft int 50794b517fSRuslan Ermilov.Fo regexec 514047df8dSMike Barcroft.Fa "const regex_t * restrict preg" "const char * restrict string" 524047df8dSMike Barcroft.Fa "size_t nmatch" "regmatch_t pmatch[restrict]" "int eflags" 53794b517fSRuslan Ermilov.Fc 54794b517fSRuslan Ermilov.Ft size_t 55794b517fSRuslan Ermilov.Fo regerror 564047df8dSMike Barcroft.Fa "int errcode" "const regex_t * restrict preg" 574047df8dSMike Barcroft.Fa "char * restrict errbuf" "size_t errbuf_size" 58794b517fSRuslan Ermilov.Fc 59794b517fSRuslan Ermilov.Ft void 60794b517fSRuslan Ermilov.Fn regfree "regex_t *preg" 61794b517fSRuslan Ermilov.Sh DESCRIPTION 62794b517fSRuslan ErmilovThese routines implement 63794b517fSRuslan Ermilov.St -p1003.2 64794b517fSRuslan Ermilovregular expressions 65794b517fSRuslan Ermilov.Pq Do RE Dc Ns s ; 6658f0484fSRodney W. Grimessee 67794b517fSRuslan Ermilov.Xr re_format 7 . 681fae73b1SRuslan ErmilovThe 691fae73b1SRuslan Ermilov.Fn regcomp 701fae73b1SRuslan Ermilovfunction 7158f0484fSRodney W. Grimescompiles an RE written as a string into an internal form, 72794b517fSRuslan Ermilov.Fn regexec 7358f0484fSRodney W. Grimesmatches that internal form against a string and reports results, 74794b517fSRuslan Ermilov.Fn regerror 7558f0484fSRodney W. Grimestransforms error codes from either into human-readable messages, 7658f0484fSRodney W. Grimesand 77794b517fSRuslan Ermilov.Fn regfree 7858f0484fSRodney W. Grimesfrees any dynamically-allocated storage used by the internal form 7958f0484fSRodney W. Grimesof an RE. 80794b517fSRuslan Ermilov.Pp 8158f0484fSRodney W. GrimesThe header 82fe08efe6SRuslan Ermilov.In regex.h 8358f0484fSRodney W. Grimesdeclares two structure types, 84794b517fSRuslan Ermilov.Ft regex_t 8558f0484fSRodney W. Grimesand 86794b517fSRuslan Ermilov.Ft regmatch_t , 8758f0484fSRodney W. Grimesthe former for compiled internal forms and the latter for match reporting. 8858f0484fSRodney W. GrimesIt also declares the four functions, 8958f0484fSRodney W. Grimesa type 90794b517fSRuslan Ermilov.Ft regoff_t , 91794b517fSRuslan Ermilovand a number of constants with names starting with 92794b517fSRuslan Ermilov.Dq Dv REG_ . 93794b517fSRuslan Ermilov.Pp 941fae73b1SRuslan ErmilovThe 951fae73b1SRuslan Ermilov.Fn regcomp 961fae73b1SRuslan Ermilovfunction 9758f0484fSRodney W. Grimescompiles the regular expression contained in the 98794b517fSRuslan Ermilov.Fa pattern 9958f0484fSRodney W. Grimesstring, 10058f0484fSRodney W. Grimessubject to the flags in 101794b517fSRuslan Ermilov.Fa cflags , 10258f0484fSRodney W. Grimesand places the results in the 103794b517fSRuslan Ermilov.Ft regex_t 10458f0484fSRodney W. Grimesstructure pointed to by 105794b517fSRuslan Ermilov.Fa preg . 1062efeeba5SRuslan ErmilovThe 1072efeeba5SRuslan Ermilov.Fa cflags 1082efeeba5SRuslan Ermilovargument 10958f0484fSRodney W. Grimesis the bitwise OR of zero or more of the following flags: 110794b517fSRuslan Ermilov.Bl -tag -width REG_EXTENDED 111794b517fSRuslan Ermilov.It Dv REG_EXTENDED 112794b517fSRuslan ErmilovCompile modern 113794b517fSRuslan Ermilov.Pq Dq extended 114794b517fSRuslan ErmilovREs, 115794b517fSRuslan Ermilovrather than the obsolete 116794b517fSRuslan Ermilov.Pq Dq basic 117794b517fSRuslan ErmilovREs that 11858f0484fSRodney W. Grimesare the default. 119794b517fSRuslan Ermilov.It Dv REG_BASIC 12058f0484fSRodney W. GrimesThis is a synonym for 0, 121794b517fSRuslan Ermilovprovided as a counterpart to 122794b517fSRuslan Ermilov.Dv REG_EXTENDED 123794b517fSRuslan Ermilovto improve readability. 124794b517fSRuslan Ermilov.It Dv REG_NOSPEC 12558f0484fSRodney W. GrimesCompile with recognition of all special characters turned off. 12658f0484fSRodney W. GrimesAll characters are thus considered ordinary, 127794b517fSRuslan Ermilovso the 128794b517fSRuslan Ermilov.Dq RE 129794b517fSRuslan Ermilovis a literal string. 13058f0484fSRodney W. GrimesThis is an extension, 131794b517fSRuslan Ermilovcompatible with but not specified by 132794b517fSRuslan Ermilov.St -p1003.2 , 13358f0484fSRodney W. Grimesand should be used with 13458f0484fSRodney W. Grimescaution in software intended to be portable to other systems. 135794b517fSRuslan Ermilov.Dv REG_EXTENDED 136794b517fSRuslan Ermilovand 137794b517fSRuslan Ermilov.Dv REG_NOSPEC 138794b517fSRuslan Ermilovmay not be used 13958f0484fSRodney W. Grimesin the same call to 140794b517fSRuslan Ermilov.Fn regcomp . 141794b517fSRuslan Ermilov.It Dv REG_ICASE 14258f0484fSRodney W. GrimesCompile for matching that ignores upper/lower case distinctions. 14358f0484fSRodney W. GrimesSee 144794b517fSRuslan Ermilov.Xr re_format 7 . 145794b517fSRuslan Ermilov.It Dv REG_NOSUB 14658f0484fSRodney W. GrimesCompile for matching that need only report success or failure, 14758f0484fSRodney W. Grimesnot what was matched. 148794b517fSRuslan Ermilov.It Dv REG_NEWLINE 14958f0484fSRodney W. GrimesCompile for newline-sensitive matching. 15058f0484fSRodney W. GrimesBy default, newline is a completely ordinary character with no special 15158f0484fSRodney W. Grimesmeaning in either REs or strings. 15258f0484fSRodney W. GrimesWith this flag, 153794b517fSRuslan Ermilov.Ql [^ 154794b517fSRuslan Ermilovbracket expressions and 155794b517fSRuslan Ermilov.Ql .\& 156794b517fSRuslan Ermilovnever match newline, 157794b517fSRuslan Ermilova 158794b517fSRuslan Ermilov.Ql ^\& 159794b517fSRuslan Ermilovanchor matches the null string after any newline in the string 16058f0484fSRodney W. Grimesin addition to its normal function, 161794b517fSRuslan Ermilovand the 162794b517fSRuslan Ermilov.Ql $\& 163794b517fSRuslan Ermilovanchor matches the null string before any newline in the 16458f0484fSRodney W. Grimesstring in addition to its normal function. 165794b517fSRuslan Ermilov.It Dv REG_PEND 16658f0484fSRodney W. GrimesThe regular expression ends, 16758f0484fSRodney W. Grimesnot at the first NUL, 16858f0484fSRodney W. Grimesbut just before the character pointed to by the 169794b517fSRuslan Ermilov.Va re_endp 17058f0484fSRodney W. Grimesmember of the structure pointed to by 171794b517fSRuslan Ermilov.Fa preg . 17258f0484fSRodney W. GrimesThe 173794b517fSRuslan Ermilov.Va re_endp 17458f0484fSRodney W. Grimesmember is of type 175794b517fSRuslan Ermilov.Ft "const char *" . 17658f0484fSRodney W. GrimesThis flag permits inclusion of NULs in the RE; 17758f0484fSRodney W. Grimesthey are considered ordinary characters. 17858f0484fSRodney W. GrimesThis is an extension, 179794b517fSRuslan Ermilovcompatible with but not specified by 180794b517fSRuslan Ermilov.St -p1003.2 , 18158f0484fSRodney W. Grimesand should be used with 18258f0484fSRodney W. Grimescaution in software intended to be portable to other systems. 183*ba8b64deSKyle Evans.It Dv REG_POSIX 184*ba8b64deSKyle EvansCompile only 185*ba8b64deSKyle Evans.St -p1003.2 186*ba8b64deSKyle Evanscompliant expressions. 187*ba8b64deSKyle EvansThis flag has no effect unless linking against 188*ba8b64deSKyle Evans.Nm libregex . 189*ba8b64deSKyle EvansThis is an extension, 190*ba8b64deSKyle Evanscompatible with but not specified by 191*ba8b64deSKyle Evans.St -p1003.2 , 192*ba8b64deSKyle Evansand should be used with 193*ba8b64deSKyle Evanscaution in software intended to be portable to other systems. 194794b517fSRuslan Ermilov.El 195794b517fSRuslan Ermilov.Pp 19658f0484fSRodney W. GrimesWhen successful, 197794b517fSRuslan Ermilov.Fn regcomp 19858f0484fSRodney W. Grimesreturns 0 and fills in the structure pointed to by 199794b517fSRuslan Ermilov.Fa preg . 20058f0484fSRodney W. GrimesOne member of that structure 20158f0484fSRodney W. Grimes(other than 202794b517fSRuslan Ermilov.Va re_endp ) 20358f0484fSRodney W. Grimesis publicized: 204794b517fSRuslan Ermilov.Va re_nsub , 20558f0484fSRodney W. Grimesof type 206794b517fSRuslan Ermilov.Ft size_t , 20758f0484fSRodney W. Grimescontains the number of parenthesized subexpressions within the RE 20858f0484fSRodney W. Grimes(except that the value of this member is undefined if the 209794b517fSRuslan Ermilov.Dv REG_NOSUB 210794b517fSRuslan Ermilovflag was used). 21158f0484fSRodney W. GrimesIf 212794b517fSRuslan Ermilov.Fn regcomp 21358f0484fSRodney W. Grimesfails, it returns a non-zero error code; 214794b517fSRuslan Ermilovsee 215794b517fSRuslan Ermilov.Sx DIAGNOSTICS . 216794b517fSRuslan Ermilov.Pp 2171fae73b1SRuslan ErmilovThe 2181fae73b1SRuslan Ermilov.Fn regexec 2191fae73b1SRuslan Ermilovfunction 22058f0484fSRodney W. Grimesmatches the compiled RE pointed to by 221794b517fSRuslan Ermilov.Fa preg 22258f0484fSRodney W. Grimesagainst the 223794b517fSRuslan Ermilov.Fa string , 22458f0484fSRodney W. Grimessubject to the flags in 225794b517fSRuslan Ermilov.Fa eflags , 22658f0484fSRodney W. Grimesand reports results using 227794b517fSRuslan Ermilov.Fa nmatch , 228794b517fSRuslan Ermilov.Fa pmatch , 22958f0484fSRodney W. Grimesand the returned value. 23058f0484fSRodney W. GrimesThe RE must have been compiled by a previous invocation of 231794b517fSRuslan Ermilov.Fn regcomp . 23258f0484fSRodney W. GrimesThe compiled form is not altered during execution of 233794b517fSRuslan Ermilov.Fn regexec , 23458f0484fSRodney W. Grimesso a single compiled RE can be used simultaneously by multiple threads. 235794b517fSRuslan Ermilov.Pp 23658f0484fSRodney W. GrimesBy default, 23758f0484fSRodney W. Grimesthe NUL-terminated string pointed to by 238794b517fSRuslan Ermilov.Fa string 23958f0484fSRodney W. Grimesis considered to be the text of an entire line, minus any terminating 24058f0484fSRodney W. Grimesnewline. 24158f0484fSRodney W. GrimesThe 242794b517fSRuslan Ermilov.Fa eflags 24358f0484fSRodney W. Grimesargument is the bitwise OR of zero or more of the following flags: 244794b517fSRuslan Ermilov.Bl -tag -width REG_STARTEND 245794b517fSRuslan Ermilov.It Dv REG_NOTBOL 24693ea9f9fSPedro F. GiffuniThe first character of the string is treated as the continuation 24793ea9f9fSPedro F. Giffuniof a line. 24893ea9f9fSPedro F. GiffuniThis means that the anchors 24993ea9f9fSPedro F. Giffuni.Ql ^\& , 25093ea9f9fSPedro F. Giffuni.Ql [[:<:]] , 25193ea9f9fSPedro F. Giffuniand 25293ea9f9fSPedro F. Giffuni.Ql \e< 25393ea9f9fSPedro F. Giffunido not match before it; but see 25493ea9f9fSPedro F. Giffuni.Dv REG_STARTEND 25593ea9f9fSPedro F. Giffunibelow. 256794b517fSRuslan ErmilovThis does not affect the behavior of newlines under 257794b517fSRuslan Ermilov.Dv REG_NEWLINE . 258794b517fSRuslan Ermilov.It Dv REG_NOTEOL 25958f0484fSRodney W. GrimesThe NUL terminating 26058f0484fSRodney W. Grimesthe string 261794b517fSRuslan Ermilovdoes not end a line, so the 262794b517fSRuslan Ermilov.Ql $\& 26393ea9f9fSPedro F. Giffunianchor does not match before it. 264794b517fSRuslan ErmilovThis does not affect the behavior of newlines under 265794b517fSRuslan Ermilov.Dv REG_NEWLINE . 266794b517fSRuslan Ermilov.It Dv REG_STARTEND 26758f0484fSRodney W. GrimesThe string is considered to start at 26893ea9f9fSPedro F. Giffuni.Fa string No + 26993ea9f9fSPedro F. Giffuni.Fa pmatch Ns [0]. Ns Fa rm_so 27093ea9f9fSPedro F. Giffuniand to end before the byte located at 27193ea9f9fSPedro F. Giffuni.Fa string No + 27293ea9f9fSPedro F. Giffuni.Fa pmatch Ns [0]. Ns Fa rm_eo , 27358f0484fSRodney W. Grimesregardless of the value of 274794b517fSRuslan Ermilov.Fa nmatch . 27558f0484fSRodney W. GrimesSee below for the definition of 276794b517fSRuslan Ermilov.Fa pmatch 27758f0484fSRodney W. Grimesand 278794b517fSRuslan Ermilov.Fa nmatch . 27958f0484fSRodney W. GrimesThis is an extension, 280794b517fSRuslan Ermilovcompatible with but not specified by 281794b517fSRuslan Ermilov.St -p1003.2 , 28258f0484fSRodney W. Grimesand should be used with 28358f0484fSRodney W. Grimescaution in software intended to be portable to other systems. 28493ea9f9fSPedro F. Giffuni.Pp 28593ea9f9fSPedro F. GiffuniWithout 28693ea9f9fSPedro F. Giffuni.Dv REG_NOTBOL , 28793ea9f9fSPedro F. Giffunithe position 28893ea9f9fSPedro F. Giffuni.Fa rm_so 28993ea9f9fSPedro F. Giffuniis considered the beginning of a line, such that 29093ea9f9fSPedro F. Giffuni.Ql ^ 29193ea9f9fSPedro F. Giffunimatches before it, and the beginning of a word if there is a word 29293ea9f9fSPedro F. Giffunicharacter at this position, such that 29393ea9f9fSPedro F. Giffuni.Ql [[:<:]] 29493ea9f9fSPedro F. Giffuniand 29593ea9f9fSPedro F. Giffuni.Ql \e< 29693ea9f9fSPedro F. Giffunimatch before it. 29793ea9f9fSPedro F. Giffuni.Pp 29893ea9f9fSPedro F. GiffuniWith 29993ea9f9fSPedro F. Giffuni.Dv REG_NOTBOL , 30093ea9f9fSPedro F. Giffunithe character at position 30193ea9f9fSPedro F. Giffuni.Fa rm_so 30293ea9f9fSPedro F. Giffuniis treated as the continuation of a line, and if 30393ea9f9fSPedro F. Giffuni.Fa rm_so 30493ea9f9fSPedro F. Giffuniis greater than 0, the preceding character is taken into consideration. 30593ea9f9fSPedro F. GiffuniIf the preceding character is a newline and the regular expression was compiled 30693ea9f9fSPedro F. Giffuniwith 30793ea9f9fSPedro F. Giffuni.Dv REG_NEWLINE , 30893ea9f9fSPedro F. Giffuni.Ql ^ 30993ea9f9fSPedro F. Giffunimatches before the string; if the preceding character is not a word character 31093ea9f9fSPedro F. Giffunibut the string starts with a word character, 31193ea9f9fSPedro F. Giffuni.Ql [[:<:]] 31293ea9f9fSPedro F. Giffuniand 31393ea9f9fSPedro F. Giffuni.Ql \e< 31493ea9f9fSPedro F. Giffunimatch before the string. 315794b517fSRuslan Ermilov.El 316794b517fSRuslan Ermilov.Pp 31758f0484fSRodney W. GrimesSee 318794b517fSRuslan Ermilov.Xr re_format 7 31958f0484fSRodney W. Grimesfor a discussion of what is matched in situations where an RE or a 32058f0484fSRodney W. Grimesportion thereof could match any of several substrings of 321794b517fSRuslan Ermilov.Fa string . 322794b517fSRuslan Ermilov.Pp 32358f0484fSRodney W. GrimesNormally, 324794b517fSRuslan Ermilov.Fn regexec 325794b517fSRuslan Ermilovreturns 0 for success and the non-zero code 326794b517fSRuslan Ermilov.Dv REG_NOMATCH 327794b517fSRuslan Ermilovfor failure. 32858f0484fSRodney W. GrimesOther non-zero error codes may be returned in exceptional situations; 329794b517fSRuslan Ermilovsee 330794b517fSRuslan Ermilov.Sx DIAGNOSTICS . 331794b517fSRuslan Ermilov.Pp 332794b517fSRuslan ErmilovIf 333794b517fSRuslan Ermilov.Dv REG_NOSUB 334794b517fSRuslan Ermilovwas specified in the compilation of the RE, 33558f0484fSRodney W. Grimesor if 336794b517fSRuslan Ermilov.Fa nmatch 33758f0484fSRodney W. Grimesis 0, 338794b517fSRuslan Ermilov.Fn regexec 33958f0484fSRodney W. Grimesignores the 340794b517fSRuslan Ermilov.Fa pmatch 341794b517fSRuslan Ermilovargument (but see below for the case where 342794b517fSRuslan Ermilov.Dv REG_STARTEND 343794b517fSRuslan Ermilovis specified). 34458f0484fSRodney W. GrimesOtherwise, 345794b517fSRuslan Ermilov.Fa pmatch 34658f0484fSRodney W. Grimespoints to an array of 347794b517fSRuslan Ermilov.Fa nmatch 34858f0484fSRodney W. Grimesstructures of type 349794b517fSRuslan Ermilov.Ft regmatch_t . 35058f0484fSRodney W. GrimesSuch a structure has at least the members 351794b517fSRuslan Ermilov.Va rm_so 35258f0484fSRodney W. Grimesand 353794b517fSRuslan Ermilov.Va rm_eo , 35458f0484fSRodney W. Grimesboth of type 355794b517fSRuslan Ermilov.Ft regoff_t 35658f0484fSRodney W. Grimes(a signed arithmetic type at least as large as an 357794b517fSRuslan Ermilov.Ft off_t 35858f0484fSRodney W. Grimesand a 359794b517fSRuslan Ermilov.Ft ssize_t ) , 36058f0484fSRodney W. Grimescontaining respectively the offset of the first character of a substring 36158f0484fSRodney W. Grimesand the offset of the first character after the end of the substring. 36258f0484fSRodney W. GrimesOffsets are measured from the beginning of the 363794b517fSRuslan Ermilov.Fa string 36458f0484fSRodney W. Grimesargument given to 365794b517fSRuslan Ermilov.Fn regexec . 36658f0484fSRodney W. GrimesAn empty substring is denoted by equal offsets, 36758f0484fSRodney W. Grimesboth indicating the character following the empty substring. 368794b517fSRuslan Ermilov.Pp 36958f0484fSRodney W. GrimesThe 0th member of the 370794b517fSRuslan Ermilov.Fa pmatch 37158f0484fSRodney W. Grimesarray is filled in to indicate what substring of 372794b517fSRuslan Ermilov.Fa string 37358f0484fSRodney W. Grimeswas matched by the entire RE. 37458f0484fSRodney W. GrimesRemaining members report what substring was matched by parenthesized 37558f0484fSRodney W. Grimessubexpressions within the RE; 37658f0484fSRodney W. Grimesmember 377794b517fSRuslan Ermilov.Va i 37858f0484fSRodney W. Grimesreports subexpression 379794b517fSRuslan Ermilov.Va i , 38058f0484fSRodney W. Grimeswith subexpressions counted (starting at 1) by the order of their opening 38158f0484fSRodney W. Grimesparentheses in the RE, left to right. 382794b517fSRuslan ErmilovUnused entries in the array (corresponding either to subexpressions that 38358f0484fSRodney W. Grimesdid not participate in the match at all, or to subexpressions that do not 384794b517fSRuslan Ermilovexist in the RE (that is, 385794b517fSRuslan Ermilov.Va i 386794b517fSRuslan Ermilov> 387794b517fSRuslan Ermilov.Fa preg Ns -> Ns Va re_nsub ) ) 388794b517fSRuslan Ermilovhave both 389794b517fSRuslan Ermilov.Va rm_so 39058f0484fSRodney W. Grimesand 391794b517fSRuslan Ermilov.Va rm_eo 392794b517fSRuslan Ermilovset to -1. 39358f0484fSRodney W. GrimesIf a subexpression participated in the match several times, 39458f0484fSRodney W. Grimesthe reported substring is the last one it matched. 395794b517fSRuslan Ermilov(Note, as an example in particular, that when the RE 396794b517fSRuslan Ermilov.Ql "(b*)+" 397794b517fSRuslan Ermilovmatches 398794b517fSRuslan Ermilov.Ql bbb , 399794b517fSRuslan Ermilovthe parenthesized subexpression matches each of the three 400794b517fSRuslan Ermilov.So Li b Sc Ns s 401794b517fSRuslan Ermilovand then 402794b517fSRuslan Ermilovan infinite number of empty strings following the last 403794b517fSRuslan Ermilov.Ql b , 40458f0484fSRodney W. Grimesso the reported substring is one of the empties.) 405794b517fSRuslan Ermilov.Pp 406794b517fSRuslan ErmilovIf 407794b517fSRuslan Ermilov.Dv REG_STARTEND 408794b517fSRuslan Ermilovis specified, 409794b517fSRuslan Ermilov.Fa pmatch 41058f0484fSRodney W. Grimesmust point to at least one 411794b517fSRuslan Ermilov.Ft regmatch_t 41258f0484fSRodney W. Grimes(even if 413794b517fSRuslan Ermilov.Fa nmatch 414794b517fSRuslan Ermilovis 0 or 415794b517fSRuslan Ermilov.Dv REG_NOSUB 416794b517fSRuslan Ermilovwas specified), 417794b517fSRuslan Ermilovto hold the input offsets for 418794b517fSRuslan Ermilov.Dv REG_STARTEND . 41958f0484fSRodney W. GrimesUse for output is still entirely controlled by 420794b517fSRuslan Ermilov.Fa nmatch ; 42158f0484fSRodney W. Grimesif 422794b517fSRuslan Ermilov.Fa nmatch 423794b517fSRuslan Ermilovis 0 or 424794b517fSRuslan Ermilov.Dv REG_NOSUB 425794b517fSRuslan Ermilovwas specified, 42658f0484fSRodney W. Grimesthe value of 427794b517fSRuslan Ermilov.Fa pmatch Ns [0] 42858f0484fSRodney W. Grimeswill not be changed by a successful 429794b517fSRuslan Ermilov.Fn regexec . 430794b517fSRuslan Ermilov.Pp 4311fae73b1SRuslan ErmilovThe 4321fae73b1SRuslan Ermilov.Fn regerror 4331fae73b1SRuslan Ermilovfunction 43458f0484fSRodney W. Grimesmaps a non-zero 435794b517fSRuslan Ermilov.Fa errcode 43658f0484fSRodney W. Grimesfrom either 437794b517fSRuslan Ermilov.Fn regcomp 43858f0484fSRodney W. Grimesor 439794b517fSRuslan Ermilov.Fn regexec 44058f0484fSRodney W. Grimesto a human-readable, printable message. 44158f0484fSRodney W. GrimesIf 442794b517fSRuslan Ermilov.Fa preg 443794b517fSRuslan Ermilovis 444794b517fSRuslan Ermilov.No non\- Ns Dv NULL , 44558f0484fSRodney W. Grimesthe error code should have arisen from use of 44658f0484fSRodney W. Grimesthe 447794b517fSRuslan Ermilov.Ft regex_t 44858f0484fSRodney W. Grimespointed to by 449794b517fSRuslan Ermilov.Fa preg , 45058f0484fSRodney W. Grimesand if the error code came from 451794b517fSRuslan Ermilov.Fn regcomp , 45258f0484fSRodney W. Grimesit should have been the result from the most recent 453794b517fSRuslan Ermilov.Fn regcomp 45458f0484fSRodney W. Grimesusing that 455794b517fSRuslan Ermilov.Ft regex_t . 4561fae73b1SRuslan ErmilovThe 457a2746a44SBaptiste Daroussin.Po 458a2746a44SBaptiste Daroussin.Fn regerror 45958f0484fSRodney W. Grimesmay be able to supply a more detailed message using information 46058f0484fSRodney W. Grimesfrom the 461a2746a44SBaptiste Daroussin.Ft regex_t . 462a2746a44SBaptiste Daroussin.Pc 4631fae73b1SRuslan ErmilovThe 4641fae73b1SRuslan Ermilov.Fn regerror 4651fae73b1SRuslan Ermilovfunction 46658f0484fSRodney W. Grimesplaces the NUL-terminated message into the buffer pointed to by 467794b517fSRuslan Ermilov.Fa errbuf , 46858f0484fSRodney W. Grimeslimiting the length (including the NUL) to at most 469794b517fSRuslan Ermilov.Fa errbuf_size 47058f0484fSRodney W. Grimesbytes. 4710227791bSRuslan ErmilovIf the whole message will not fit, 47258f0484fSRodney W. Grimesas much of it as will fit before the terminating NUL is supplied. 47358f0484fSRodney W. GrimesIn any case, 47458f0484fSRodney W. Grimesthe returned value is the size of buffer needed to hold the whole 47558f0484fSRodney W. Grimesmessage (including terminating NUL). 47658f0484fSRodney W. GrimesIf 477794b517fSRuslan Ermilov.Fa errbuf_size 47858f0484fSRodney W. Grimesis 0, 479794b517fSRuslan Ermilov.Fa errbuf 48058f0484fSRodney W. Grimesis ignored but the return value is still correct. 481794b517fSRuslan Ermilov.Pp 48258f0484fSRodney W. GrimesIf the 483794b517fSRuslan Ermilov.Fa errcode 48458f0484fSRodney W. Grimesgiven to 485794b517fSRuslan Ermilov.Fn regerror 486794b517fSRuslan Ermilovis first ORed with 487794b517fSRuslan Ermilov.Dv REG_ITOA , 488794b517fSRuslan Ermilovthe 489794b517fSRuslan Ermilov.Dq message 490794b517fSRuslan Ermilovthat results is the printable name of the error code, 49157e4378bSRuslan Ermilove.g.\& 492794b517fSRuslan Ermilov.Dq Dv REG_NOMATCH , 49358f0484fSRodney W. Grimesrather than an explanation thereof. 49458f0484fSRodney W. GrimesIf 495794b517fSRuslan Ermilov.Fa errcode 496794b517fSRuslan Ermilovis 497794b517fSRuslan Ermilov.Dv REG_ATOI , 49858f0484fSRodney W. Grimesthen 499794b517fSRuslan Ermilov.Fa preg 500794b517fSRuslan Ermilovshall be 501794b517fSRuslan Ermilov.No non\- Ns Dv NULL 502794b517fSRuslan Ermilovand the 503794b517fSRuslan Ermilov.Va re_endp 50458f0484fSRodney W. Grimesmember of the structure it points to 50558f0484fSRodney W. Grimesmust point to the printable name of an error code; 50658f0484fSRodney W. Grimesin this case, the result in 507794b517fSRuslan Ermilov.Fa errbuf 50858f0484fSRodney W. Grimesis the decimal digits of 50958f0484fSRodney W. Grimesthe numeric value of the error code 51058f0484fSRodney W. Grimes(0 if the name is not recognized). 511794b517fSRuslan Ermilov.Dv REG_ITOA 512794b517fSRuslan Ermilovand 513794b517fSRuslan Ermilov.Dv REG_ATOI 514794b517fSRuslan Ermilovare intended primarily as debugging facilities; 51558f0484fSRodney W. Grimesthey are extensions, 516794b517fSRuslan Ermilovcompatible with but not specified by 517794b517fSRuslan Ermilov.St -p1003.2 , 51858f0484fSRodney W. Grimesand should be used with 51958f0484fSRodney W. Grimescaution in software intended to be portable to other systems. 52058f0484fSRodney W. GrimesBe warned also that they are considered experimental and changes are possible. 521794b517fSRuslan Ermilov.Pp 5221fae73b1SRuslan ErmilovThe 5231fae73b1SRuslan Ermilov.Fn regfree 5241fae73b1SRuslan Ermilovfunction 52558f0484fSRodney W. Grimesfrees any dynamically-allocated storage associated with the compiled RE 52658f0484fSRodney W. Grimespointed to by 527794b517fSRuslan Ermilov.Fa preg . 52858f0484fSRodney W. GrimesThe remaining 529794b517fSRuslan Ermilov.Ft regex_t 53058f0484fSRodney W. Grimesis no longer a valid compiled RE 53158f0484fSRodney W. Grimesand the effect of supplying it to 532794b517fSRuslan Ermilov.Fn regexec 53358f0484fSRodney W. Grimesor 534794b517fSRuslan Ermilov.Fn regerror 53558f0484fSRodney W. Grimesis undefined. 536794b517fSRuslan Ermilov.Pp 53758f0484fSRodney W. GrimesNone of these functions references global variables except for tables 53858f0484fSRodney W. Grimesof constants; 53958f0484fSRodney W. Grimesall are safe for use from multiple threads if the arguments are safe. 540794b517fSRuslan Ermilov.Sh IMPLEMENTATION CHOICES 541794b517fSRuslan ErmilovThere are a number of decisions that 542794b517fSRuslan Ermilov.St -p1003.2 543794b517fSRuslan Ermilovleaves up to the implementor, 544794b517fSRuslan Ermiloveither by explicitly saying 545794b517fSRuslan Ermilov.Dq undefined 546794b517fSRuslan Ermilovor by virtue of them being 54758f0484fSRodney W. Grimesforbidden by the RE grammar. 54858f0484fSRodney W. GrimesThis implementation treats them as follows. 549794b517fSRuslan Ermilov.Pp 55058f0484fSRodney W. GrimesSee 551794b517fSRuslan Ermilov.Xr re_format 7 55258f0484fSRodney W. Grimesfor a discussion of the definition of case-independent matching. 553794b517fSRuslan Ermilov.Pp 55458f0484fSRodney W. GrimesThere is no particular limit on the length of REs, 55558f0484fSRodney W. Grimesexcept insofar as memory is limited. 55658f0484fSRodney W. GrimesMemory usage is approximately linear in RE size, and largely insensitive 55758f0484fSRodney W. Grimesto RE complexity, except for bounded repetitions. 558794b517fSRuslan ErmilovSee 559794b517fSRuslan Ermilov.Sx BUGS 560794b517fSRuslan Ermilovfor one short RE using them 56158f0484fSRodney W. Grimesthat will run almost any system out of memory. 562794b517fSRuslan Ermilov.Pp 56358f0484fSRodney W. GrimesA backslashed character other than one specifically given a magic meaning 564794b517fSRuslan Ermilovby 565794b517fSRuslan Ermilov.St -p1003.2 566794b517fSRuslan Ermilov(such magic meanings occur only in obsolete 567794b517fSRuslan Ermilov.Bq Dq basic 568794b517fSRuslan ErmilovREs) 56958f0484fSRodney W. Grimesis taken as an ordinary character. 570794b517fSRuslan Ermilov.Pp 571794b517fSRuslan ErmilovAny unmatched 572794b517fSRuslan Ermilov.Ql [\& 573794b517fSRuslan Ermilovis a 574794b517fSRuslan Ermilov.Dv REG_EBRACK 575794b517fSRuslan Ermiloverror. 576794b517fSRuslan Ermilov.Pp 57758f0484fSRodney W. GrimesEquivalence classes cannot begin or end bracket-expression ranges. 57858f0484fSRodney W. GrimesThe endpoint of one range cannot begin another. 579794b517fSRuslan Ermilov.Pp 580794b517fSRuslan Ermilov.Dv RE_DUP_MAX , 581794b517fSRuslan Ermilovthe limit on repetition counts in bounded repetitions, is 255. 582794b517fSRuslan Ermilov.Pp 583794b517fSRuslan ErmilovA repetition operator 584c4d9468eSRuslan Ermilov.Ql ( ?\& , 585794b517fSRuslan Ermilov.Ql *\& , 586794b517fSRuslan Ermilov.Ql +\& , 587c4d9468eSRuslan Ermilovor bounds) 588794b517fSRuslan Ermilovcannot follow another 58958f0484fSRodney W. Grimesrepetition operator. 59058f0484fSRodney W. GrimesA repetition operator cannot begin an expression or subexpression 591794b517fSRuslan Ermilovor follow 592794b517fSRuslan Ermilov.Ql ^\& 593794b517fSRuslan Ermilovor 594794b517fSRuslan Ermilov.Ql |\& . 595794b517fSRuslan Ermilov.Pp 596794b517fSRuslan Ermilov.Ql |\& 597794b517fSRuslan Ermilovcannot appear first or last in a (sub)expression or after another 598794b517fSRuslan Ermilov.Ql |\& , 5991a0a9345SRuslan Ermilovi.e., an operand of 600794b517fSRuslan Ermilov.Ql |\& 601794b517fSRuslan Ermilovcannot be an empty subexpression. 602794b517fSRuslan ErmilovAn empty parenthesized subexpression, 603794b517fSRuslan Ermilov.Ql "()" , 604794b517fSRuslan Ermilovis legal and matches an 60558f0484fSRodney W. Grimesempty (sub)string. 60658f0484fSRodney W. GrimesAn empty string is not a legal RE. 607794b517fSRuslan Ermilov.Pp 608794b517fSRuslan ErmilovA 609794b517fSRuslan Ermilov.Ql {\& 610794b517fSRuslan Ermilovfollowed by a digit is considered the beginning of bounds for a 61158f0484fSRodney W. Grimesbounded repetition, which must then follow the syntax for bounds. 612794b517fSRuslan ErmilovA 613794b517fSRuslan Ermilov.Ql {\& 614794b517fSRuslan Ermilov.Em not 615794b517fSRuslan Ermilovfollowed by a digit is considered an ordinary character. 616794b517fSRuslan Ermilov.Pp 617794b517fSRuslan Ermilov.Ql ^\& 618794b517fSRuslan Ermilovand 619794b517fSRuslan Ermilov.Ql $\& 620794b517fSRuslan Ermilovbeginning and ending subexpressions in obsolete 621794b517fSRuslan Ermilov.Pq Dq basic 62258f0484fSRodney W. GrimesREs are anchors, not ordinary characters. 623794b517fSRuslan Ermilov.Sh DIAGNOSTICS 62458f0484fSRodney W. GrimesNon-zero error codes from 625794b517fSRuslan Ermilov.Fn regcomp 62658f0484fSRodney W. Grimesand 627794b517fSRuslan Ermilov.Fn regexec 62858f0484fSRodney W. Grimesinclude the following: 629794b517fSRuslan Ermilov.Pp 630794b517fSRuslan Ermilov.Bl -tag -width REG_ECOLLATE -compact 631794b517fSRuslan Ermilov.It Dv REG_NOMATCH 6321fae73b1SRuslan ErmilovThe 633794b517fSRuslan Ermilov.Fn regexec 6341fae73b1SRuslan Ermilovfunction 635794b517fSRuslan Ermilovfailed to match 636794b517fSRuslan Ermilov.It Dv REG_BADPAT 637794b517fSRuslan Ermilovinvalid regular expression 638794b517fSRuslan Ermilov.It Dv REG_ECOLLATE 639794b517fSRuslan Ermilovinvalid collating element 640794b517fSRuslan Ermilov.It Dv REG_ECTYPE 641794b517fSRuslan Ermilovinvalid character class 642794b517fSRuslan Ermilov.It Dv REG_EESCAPE 643794b517fSRuslan Ermilov.Ql \e 644794b517fSRuslan Ermilovapplied to unescapable character 645794b517fSRuslan Ermilov.It Dv REG_ESUBREG 646794b517fSRuslan Ermilovinvalid backreference number 647794b517fSRuslan Ermilov.It Dv REG_EBRACK 648794b517fSRuslan Ermilovbrackets 649794b517fSRuslan Ermilov.Ql "[ ]" 650794b517fSRuslan Ermilovnot balanced 651794b517fSRuslan Ermilov.It Dv REG_EPAREN 652794b517fSRuslan Ermilovparentheses 653794b517fSRuslan Ermilov.Ql "( )" 654794b517fSRuslan Ermilovnot balanced 655794b517fSRuslan Ermilov.It Dv REG_EBRACE 656794b517fSRuslan Ermilovbraces 657794b517fSRuslan Ermilov.Ql "{ }" 658794b517fSRuslan Ermilovnot balanced 659794b517fSRuslan Ermilov.It Dv REG_BADBR 660794b517fSRuslan Ermilovinvalid repetition count(s) in 661794b517fSRuslan Ermilov.Ql "{ }" 662794b517fSRuslan Ermilov.It Dv REG_ERANGE 663794b517fSRuslan Ermilovinvalid character range in 664794b517fSRuslan Ermilov.Ql "[ ]" 665794b517fSRuslan Ermilov.It Dv REG_ESPACE 666794b517fSRuslan Ermilovran out of memory 667794b517fSRuslan Ermilov.It Dv REG_BADRPT 668794b517fSRuslan Ermilov.Ql ?\& , 669794b517fSRuslan Ermilov.Ql *\& , 670794b517fSRuslan Ermilovor 671794b517fSRuslan Ermilov.Ql +\& 672794b517fSRuslan Ermilovoperand invalid 673794b517fSRuslan Ermilov.It Dv REG_EMPTY 674794b517fSRuslan Ermilovempty (sub)expression 675794b517fSRuslan Ermilov.It Dv REG_ASSERT 6760227791bSRuslan Ermilovcannot happen - you found a bug 677794b517fSRuslan Ermilov.It Dv REG_INVARG 6781a0a9345SRuslan Ermilovinvalid argument, e.g.\& negative-length string 67967aff189STim J. Robbins.It Dv REG_ILLSEQ 68067aff189STim J. Robbinsillegal byte sequence (bad multibyte character) 681794b517fSRuslan Ermilov.El 68224a0682cSRuslan Ermilov.Sh SEE ALSO 68324a0682cSRuslan Ermilov.Xr grep 1 , 68424a0682cSRuslan Ermilov.Xr re_format 7 68524a0682cSRuslan Ermilov.Pp 68624a0682cSRuslan Ermilov.St -p1003.2 , 68724a0682cSRuslan Ermilovsections 2.8 (Regular Expression Notation) 68824a0682cSRuslan Ermilovand 68924a0682cSRuslan ErmilovB.5 (C Binding for Regular Expression Matching). 690794b517fSRuslan Ermilov.Sh HISTORY 691794b517fSRuslan ErmilovOriginally written by 692794b517fSRuslan Ermilov.An Henry Spencer . 6937bdf80e5SMike PritchardAltered for inclusion in the 6947bdf80e5SMike Pritchard.Bx 4.4 6957bdf80e5SMike Pritcharddistribution. 696794b517fSRuslan Ermilov.Sh BUGS 69758f0484fSRodney W. GrimesThis is an alpha release with known defects. 69858f0484fSRodney W. GrimesPlease report problems. 699794b517fSRuslan Ermilov.Pp 70058f0484fSRodney W. GrimesThe back-reference code is subtle and doubts linger about its correctness 70158f0484fSRodney W. Grimesin complex cases. 702794b517fSRuslan Ermilov.Pp 7031fae73b1SRuslan ErmilovThe 7041fae73b1SRuslan Ermilov.Fn regexec 7051fae73b1SRuslan Ermilovfunction 70658f0484fSRodney W. Grimesperformance is poor. 70758f0484fSRodney W. GrimesThis will improve with later releases. 7082efeeba5SRuslan ErmilovThe 7092efeeba5SRuslan Ermilov.Fa nmatch 7102efeeba5SRuslan Ermilovargument 71158f0484fSRodney W. Grimesexceeding 0 is expensive; 712794b517fSRuslan Ermilov.Fa nmatch 71358f0484fSRodney W. Grimesexceeding 1 is worse. 7141fae73b1SRuslan ErmilovThe 7151fae73b1SRuslan Ermilov.Fn regexec 7161fae73b1SRuslan Ermilovfunction 717794b517fSRuslan Ermilovis largely insensitive to RE complexity 718794b517fSRuslan Ermilov.Em except 719794b517fSRuslan Ermilovthat back 72058f0484fSRodney W. Grimesreferences are massively expensive. 72158f0484fSRodney W. GrimesRE length does matter; in particular, there is a strong speed bonus 72258f0484fSRodney W. Grimesfor keeping RE length under about 30 characters, 72358f0484fSRodney W. Grimeswith most special characters counting roughly double. 724794b517fSRuslan Ermilov.Pp 7251fae73b1SRuslan ErmilovThe 7261fae73b1SRuslan Ermilov.Fn regcomp 7271fae73b1SRuslan Ermilovfunction 72858f0484fSRodney W. Grimesimplements bounded repetitions by macro expansion, 72958f0484fSRodney W. Grimeswhich is costly in time and space if counts are large 73058f0484fSRodney W. Grimesor bounded repetitions are nested. 73158f0484fSRodney W. GrimesAn RE like, say, 732794b517fSRuslan Ermilov.Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}" 73358f0484fSRodney W. Grimeswill (eventually) run almost any existing machine out of swap space. 734794b517fSRuslan Ermilov.Pp 73558f0484fSRodney W. GrimesThere are suspected problems with response to obscure error conditions. 73658f0484fSRodney W. GrimesNotably, 73758f0484fSRodney W. Grimescertain kinds of internal overflow, 73858f0484fSRodney W. Grimesproduced only by truly enormous REs or by multiply nested bounded repetitions, 73958f0484fSRodney W. Grimesare probably not handled well. 740794b517fSRuslan Ermilov.Pp 741794b517fSRuslan ErmilovDue to a mistake in 742794b517fSRuslan Ermilov.St -p1003.2 , 743794b517fSRuslan Ermilovthings like 744794b517fSRuslan Ermilov.Ql "a)b" 745794b517fSRuslan Ermilovare legal REs because 746794b517fSRuslan Ermilov.Ql )\& 747794b517fSRuslan Ermilovis 748794b517fSRuslan Ermilova special character only in the presence of a previous unmatched 749794b517fSRuslan Ermilov.Ql (\& . 7500227791bSRuslan ErmilovThis cannot be fixed until the spec is fixed. 751794b517fSRuslan Ermilov.Pp 75258f0484fSRodney W. GrimesThe standard's definition of back references is vague. 75358f0484fSRodney W. GrimesFor example, does 754794b517fSRuslan Ermilov.Ql "a\e(\e(b\e)*\e2\e)*d" 755794b517fSRuslan Ermilovmatch 756794b517fSRuslan Ermilov.Ql "abbbd" ? 75758f0484fSRodney W. GrimesUntil the standard is clarified, 75858f0484fSRodney W. Grimesbehavior in such cases should not be relied on. 759794b517fSRuslan Ermilov.Pp 76058f0484fSRodney W. GrimesThe implementation of word-boundary matching is a bit of a kludge, 76158f0484fSRodney W. Grimesand bugs may lurk in combinations of word-boundary matching and anchoring. 7620eac0547STim J. Robbins.Pp 7630eac0547STim J. RobbinsWord-boundary matching does not work properly in multibyte locales. 764