1.\" Copyright (c) 1992, 1993, 1994 Henry Spencer. 2.\" Copyright (c) 1992, 1993, 1994 3.\" The Regents of the University of California. All rights reserved. 4.\" 5.\" This code is derived from software contributed to Berkeley by 6.\" Henry Spencer. 7.\" 8.\" Redistribution and use in source and binary forms, with or without 9.\" modification, are permitted provided that the following conditions 10.\" are met: 11.\" 1. Redistributions of source code must retain the above copyright 12.\" notice, this list of conditions and the following disclaimer. 13.\" 2. Redistributions in binary form must reproduce the above copyright 14.\" notice, this list of conditions and the following disclaimer in the 15.\" documentation and/or other materials provided with the distribution. 16.\" 3. Neither the name of the University nor the names of its contributors 17.\" may be used to endorse or promote products derived from this software 18.\" without specific prior written permission. 19.\" 20.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30.\" SUCH DAMAGE. 31.\" 32.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for 33.\" permission to reproduce portions of its copyrighted documentation. 34.\" Original documentation from The Open Group can be obtained online at 35.\" http://www.opengroup.org/bookstore/. 36.\" 37.\" The Institute of Electrical and Electronics Engineers and The Open 38.\" Group, have given us permission to reprint portions of their 39.\" documentation. 40.\" 41.\" In the following statement, the phrase ``this text'' refers to portions 42.\" of the system documentation. 43.\" 44.\" Portions of this text are reprinted and reproduced in electronic form 45.\" in the SunOS Reference Manual, from IEEE Std 1003.1, 2004 Edition, 46.\" Standard for Information Technology -- Portable Operating System 47.\" Interface (POSIX), The Open Group Base Specifications Issue 6, 48.\" Copyright (C) 2001-2004 by the Institute of Electrical and Electronics 49.\" Engineers, Inc and The Open Group. In the event of any discrepancy 50.\" between these versions and the original IEEE and The Open Group 51.\" Standard, the original IEEE and The Open Group Standard is the referee 52.\" document. The original Standard can be obtained online at 53.\" http://www.opengroup.org/unix/online.html. 54.\" 55.\" This notice shall appear on any product containing this material. 56.\" 57.\" The contents of this file are subject to the terms of the 58.\" Common Development and Distribution License (the "License"). 59.\" You may not use this file except in compliance with the License. 60.\" 61.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 62.\" or http://www.opensolaris.org/os/licensing. 63.\" See the License for the specific language governing permissions 64.\" and limitations under the License. 65.\" 66.\" When distributing Covered Code, include this CDDL HEADER in each 67.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE. 68.\" If applicable, add the following below this CDDL HEADER, with the 69.\" fields enclosed by brackets "[]" replaced with your own identifying 70.\" information: Portions Copyright [yyyy] [name of copyright owner] 71.\" 72.\" 73.\" Copyright (c) 1992, X/Open Company Limited. All Rights Reserved. 74.\" Portions Copyright (c) 2003, Sun Microsystems, Inc. All Rights Reserved. 75.\" Copyright 2017 Nexenta Systems, Inc. 76.\" 77.Dd December 26, 2023 78.Dt REGCOMP 3C 79.Os 80.Sh NAME 81.Nm regcomp , 82.Nm regexec , 83.Nm regerror , 84.Nm regfree 85.Nd regular-expression library 86.Sh LIBRARY 87.Lb libc 88.Sh SYNOPSIS 89.In regex.h 90.Ft int 91.Fo regcomp 92.Fa "regex_t *restrict preg" "const char *restrict pattern" "int cflags" 93.Fc 94.Ft int 95.Fo regexec 96.Fa "const regex_t *restrict preg" "const char *restrict string" 97.Fa "size_t nmatch" "regmatch_t pmatch[restrict]" "int eflags" 98.Fc 99.Ft size_t 100.Fo regerror 101.Fa "int errcode" "const regex_t *restrict preg" 102.Fa "char *restrict errbuf" "size_t errbuf_size" 103.Fc 104.Ft void 105.Fn regfree "regex_t *preg" 106.Sh DESCRIPTION 107These routines implement 108.St -p1003.2 109regular expressions; see 110.Xr regex 7 . 111The 112.Fn regcomp 113function compiles an RE written as a string into an internal form, 114.Fn regexec 115matches that internal form against a string and reports results, 116.Fn regerror 117transforms error codes from either into human-readable messages, 118and 119.Fn regfree 120frees any dynamically-allocated storage used by the internal form 121of an RE. 122.Pp 123The translation of an RE into the internal form contained in a 124.Ft regex_t 125is inherently locale-specific; changes to the locale in effect between 126.Fn regcomp 127and subsequent calls to 128.Fn regexec 129may result in unexpected or undefined behavior. 130.Pp 131The header 132.In regex.h 133declares two structure types, 134.Ft regex_t 135and 136.Ft regmatch_t , 137the former for compiled internal forms and the latter for match reporting. 138It also declares the four functions, a type 139.Ft regoff_t , 140and a number of constants with names starting with 141.Qq Dv REG_ . 142.Ss Fn regcomp 143The 144.Fn regcomp 145function compiles the regular expression contained in the 146.Fa pattern 147string, subject to the flags in 148.Fa cflags , 149and places the results in the 150.Ft regex_t 151structure pointed to by 152.Fa preg . 153The 154.Fa cflags 155argument is the bitwise OR of zero or more of the following flags: 156.Bl -tag -width REG_EXTENDED 157.It Dv REG_EXTENDED 158Compile extended regular expressions 159.Pq EREs , 160rather than the basic regular expressions 161.Pq BREs 162that are the default. 163.It Dv REG_BASIC 164This is a synonym for 0, provided as a counterpart to 165.Dv REG_EXTENDED 166to improve readability. 167.It Dv REG_NOSPEC 168Compile with recognition of all special characters turned off. 169All characters are thus considered ordinary, so the RE is a literal string. 170This is an extension, compatible with but not specified by 171.St -p1003.2 , 172and should be used with caution in software intended to be portable to other 173systems. 174.Dv REG_EXTENDED 175and 176.Dv REG_NOSPEC 177may not be used in the same call to 178.Fn regcomp . 179.It Dv REG_ICASE 180Compile for matching that ignores upper/lower case distinctions. 181See 182.Xr regex 7 . 183.It Dv REG_NOSUB 184Compile for matching that need only report success or failure, 185not what was matched. 186.It Dv REG_NEWLINE 187Compile for newline-sensitive matching. 188By default, newline is a completely ordinary character with no special 189meaning in either REs or strings. 190With this flag, 191.Qq [^ 192bracket expressions and 193.Qq \&. 194never match newline, 195a 196.Qq \&^ 197anchor matches the null string after any newline in the string in addition to 198its normal function, and the 199.Qq \&$ 200anchor matches the null string before any newline in the string in addition to 201its normal function. 202.It Dv REG_PEND 203The regular expression ends, not at the first NUL, but just before the character 204pointed to by the 205.Va re_endp 206member of the structure pointed to by 207.Fa preg . 208The 209.Va re_endp 210member is of type 211.Vt "const char *" . 212This flag permits inclusion of NULs in the RE; they are considered ordinary 213characters. 214This is an extension, compatible with but not specified by 215.St -p1003.2 , 216and should be used with caution in software intended to be portable to other 217systems. 218.El 219.Pp 220When successful, 221.Fn regcomp 222returns 0 and fills in the structure pointed to by 223.Fa preg . 224One member of that structure 225.Po other than 226.Va re_endp 227.Pc 228is publicized: 229.Va re_nsub , 230of type 231.Ft size_t , 232contains the number of parenthesized subexpressions within the RE 233.Po except that the value of this member is undefined if the 234.Dv REG_NOSUB 235flag was used 236.Pc . 237.Ss Fn regexec 238The 239.Fn regexec 240function matches the compiled RE pointed to by 241.Fa preg 242against the 243.Fa string , 244subject to the flags in 245.Fa eflags , 246and reports results using 247.Fa nmatch , 248.Fa pmatch , 249and the returned value. 250The RE must have been compiled by a previous invocation of 251.Fn regcomp . 252The compiled form is not altered during execution of 253.Fn regexec , 254so a single compiled RE can be used simultaneously by multiple threads. 255The locale in effect at the time of 256.Fn regexec 257must be the same as the one in effect when the RE was compiled by 258.Fn regcomp . 259.Pp 260By default, the NUL-terminated string pointed to by 261.Fa string 262is considered to be the text of an entire line, minus any terminating 263newline. 264The 265.Fa eflags 266argument is the bitwise OR of zero or more of the following flags: 267.Bl -tag -width REG_STARTEND 268.It Dv REG_NOTBOL 269The first character of the string is treated as the continuation 270of a line. 271This means that the anchors 272.Qq \&^ , 273.Qq [[:<:]] , 274and 275.Qq \e< 276do not match before it; but see 277.Dv REG_STARTEND 278below. 279This does not affect the behavior of newlines under 280.Dv REG_NEWLINE . 281.It Dv REG_NOTEOL 282The NUL terminating the string does not end a line, so the 283.Qq \&$ 284anchor does not match before it. 285This does not affect the behavior of newlines under 286.Dv REG_NEWLINE . 287.It Dv REG_STARTEND 288The string is considered to start at 289.Fa string No + 290.Fa pmatch Ns [0]. Ns Fa rm_so 291and to end before the byte located at 292.Fa string No + 293.Fa pmatch Ns [0]. Ns Fa rm_eo , 294regardless of the value of 295.Fa nmatch . 296See below for the definition of 297.Fa pmatch 298and 299.Fa nmatch . 300This is an extension, compatible with but not specified by 301.St -p1003.2 , 302and should be used with caution in software intended to be portable to other 303systems. 304.Pp 305Without 306.Dv REG_NOTBOL , 307the position 308.Fa rm_so 309is considered the beginning of a line, such that 310.Qq \&^ 311matches before it, and the beginning of a word if there is a word character at 312this position, such that 313.Qq [[:<:]] 314and 315.Qq \e< 316match before it. 317.Pp 318With 319.Dv REG_NOTBOL , 320the character at position 321.Fa rm_so 322is treated as the continuation of a line, and if 323.Fa rm_so 324is greater than 0, the preceding character is taken into consideration. 325If the preceding character is a newline and the regular expression was compiled 326with 327.Dv REG_NEWLINE , 328.Qq ^ 329matches before the string; if the preceding character is not a word character 330but the string starts with a word character, 331.Qq [[:<:]] 332and 333.Qq \e< 334match before the string. 335.El 336.Pp 337See 338.Xr regex 7 339for a discussion of what is matched in situations where an RE or a portion 340thereof could match any of several substrings of 341.Fa string . 342.Pp 343If 344.Dv REG_NOSUB 345was specified in the compilation of the RE, or if 346.Fa nmatch 347is 0, 348.Fn regexec 349ignores the 350.Fa pmatch 351argument 352.Po but see below for the case where 353.Dv REG_STARTEND 354is specified 355.Pc . 356Otherwise, 357.Fa pmatch 358points to an array of 359.Fa nmatch 360structures of type 361.Ft regmatch_t . 362Such a structure has at least the members 363.Va rm_so 364and 365.Va rm_eo , 366both of type 367.Ft regoff_t 368.Po a signed arithmetic type at least as large as an 369.Ft off_t 370and a 371.Ft ssize_t 372.Pc , 373containing respectively the offset of the first character of a substring 374and the offset of the first character after the end of the substring. 375Offsets are measured from the beginning of the 376.Fa string 377argument given to 378.Fn regexec . 379An empty substring is denoted by equal offsets, both indicating the character 380following the empty substring. 381.Pp 382The 0th member of the 383.Fa pmatch 384array is filled in to indicate what substring of 385.Fa string 386was matched by the entire RE. 387Remaining members report what substring was matched by parenthesized 388subexpressions within the RE; member 389.Va i 390reports subexpression 391.Va i , 392with subexpressions counted 393.Pq starting at 1 394by the order of their opening parentheses in the RE, left to right. 395Unused entries in the array 396.Po corresponding either to subexpressions that did not participate in the match 397at all, or to subexpressions that do not exist in the RE 398.Po that is, 399.Va i 400> 401.Fa preg Ns -> Ns Va re_nsub 402.Pc 403.Pc 404have both 405.Va rm_so 406and 407.Va rm_eo 408set to -1. 409If a subexpression participated in the match several times, 410the reported substring is the last one it matched. 411.Po Note, as an example in particular, that when the RE 412.Qq (b*)+ 413matches 414.Qq bbb , 415the parenthesized subexpression matches each of the three 416.So Li b Sc Ns s 417and then an infinite number of empty strings following the last 418.Qq b , 419so the reported substring is one of the empties. 420.Pc 421.Pp 422If 423.Dv REG_STARTEND 424is specified, 425.Fa pmatch 426must point to at least one 427.Ft regmatch_t 428.Po even if 429.Fa nmatch 430is 0 or 431.Dv REG_NOSUB 432was specified 433.Pc , 434to hold the input offsets for 435.Dv REG_STARTEND . 436Use for output is still entirely controlled by 437.Fa nmatch ; 438if 439.Fa nmatch 440is 0 or 441.Dv REG_NOSUB 442was specified, 443the value of 444.Fa pmatch Ns [0] 445will not be changed by a successful 446.Fn regexec . 447.Ss Fn regerror 448The 449.Fn regerror 450function maps a non-zero 451.Fa errcode 452from either 453.Fn regcomp 454or 455.Fn regexec 456to a human-readable, printable message. 457If 458.Fa preg 459is non-NULL, the error code should have arisen from use of the 460.Ft regex_t 461pointed to by 462.Fa preg , 463and if the error code came from 464.Fn regcomp , 465it should have been the result from the most recent 466.Fn regcomp 467using that 468.Ft regex_t . 469The 470.Po 471.Fn regerror 472may be able to supply a more detailed message using information 473from the 474.Ft regex_t . 475.Pc 476The 477.Fn regerror 478function places the NUL-terminated message into the buffer pointed to by 479.Fa errbuf , 480limiting the length 481.Pq including the NUL 482to at most 483.Fa errbuf_size 484bytes. 485If the whole message will not fit, as much of it as will fit before the 486terminating NUL is supplied. 487In any case, the returned value is the size of buffer needed to hold the whole 488message 489.Pq including terminating NUL . 490If 491.Fa errbuf_size 492is 0, 493.Fa errbuf 494is ignored but the return value is still correct. 495.Pp 496If the 497.Fa errcode 498given to 499.Fn regerror 500is first ORed with 501.Dv REG_ITOA , 502the 503.Qq message 504that results is the printable name of the error code, e.g. 505.Qq Dv REG_NOMATCH , 506rather than an explanation thereof. 507If 508.Fa errcode 509is 510.Dv REG_ATOI , 511then 512.Fa preg 513shall be non-NULL and the 514.Va re_endp 515member of the structure it points to must point to the printable name of an 516error code; in this case, the result in 517.Fa errbuf 518is the decimal digits of the numeric value of the error code 519.Pq 0 if the name is not recognized . 520.Dv REG_ITOA 521and 522.Dv REG_ATOI 523are intended primarily as debugging facilities; they are extensions, 524compatible with but not specified by 525.St -p1003.2 , 526and should be used with caution in software intended to be portable to other 527systems. 528.Ss Fn regfree 529The 530.Fn regfree 531function frees any dynamically-allocated storage associated with the compiled RE 532pointed to by 533.Fa preg . 534The remaining 535.Ft regex_t 536is no longer a valid compiled RE and the effect of supplying it to 537.Fn regexec 538or 539.Fn regerror 540is undefined. 541.Sh IMPLEMENTATION NOTES 542There are a number of decisions that 543.St -p1003.2 544leaves up to the implementor, 545either by explicitly saying 546.Qq undefined 547or by virtue of them being forbidden by the RE grammar. 548This implementation treats them as follows. 549.Pp 550There is no particular limit on the length of REs, except insofar as memory is 551limited. 552Memory usage is approximately linear in RE size, and largely insensitive 553to RE complexity, except for bounded repetitions. 554.Pp 555A backslashed character other than one specifically given a magic meaning by 556.St -p1003.2 557.Pq such magic meanings occur only in BREs 558is taken as an ordinary character. 559.Pp 560Any unmatched 561.Qq \&[ 562is a 563.Dv REG_EBRACK 564error. 565.Pp 566Equivalence classes cannot begin or end bracket-expression ranges. 567The endpoint of one range cannot begin another. 568.Pp 569.Dv RE_DUP_MAX , 570the limit on repetition counts in bounded repetitions, is 255. 571.Pp 572A repetition operator 573.Po 574.Qq \&? , 575.Qq \&* , 576.Qq \&+ , 577or bounds 578.Pc 579cannot follow another repetition operator. 580A repetition operator cannot begin an expression or subexpression 581or follow 582.Qq \&^ 583or 584.Qq \&| . 585.Pp 586.Qq \&| 587cannot appear first or last in a (sub)expression or after another 588.Qq \&| , 589i.e., an operand of 590.Qq \&| 591cannot be an empty subexpression. 592An empty parenthesized subexpression, 593.Qq () , 594is legal and matches an empty (sub)string. 595An empty string is not a legal RE. 596.Pp 597A 598.Qq \&{ 599followed by a digit is considered the beginning of bounds for a bounded 600repetition, which must then follow the syntax for bounds. 601A 602.Qq \&{ 603.Em not 604followed by a digit is considered an ordinary character. 605.Pp 606.Qq \&^ 607and 608.Qq \&$ 609beginning and ending subexpressions in BREs are anchors, not ordinary 610characters. 611.Sh RETURN VALUES 612On successful completion, the 613.Fn regcomp 614function returns 0. 615Otherwise, it returns an integer value indicating an error as described in 616.In regex.h , 617and the content of preg is undefined. 618.Pp 619On successful completion, the 620.Fn regexec 621function returns 0. 622Otherwise it returns 623.Dv REG_NOMATCH 624to indicate no match, or 625.Dv REG_ENOSYS 626to indicate that the function is not supported. 627.Pp 628Upon successful completion, the 629.Fn regerror 630function returns the number of bytes needed to hold the entire generated string. 631Otherwise, it returns 0 to indicate that the function is not implemented. 632.Pp 633The 634.Fn regfree 635function returns no value. 636.Pp 637The following constants are defined as error return values: 638.Pp 639.Bl -tag -width "REG_ECOLLATE" -compact 640.It Dv REG_NOMATCH 641The 642.Fn regexec 643function failed to match. 644.It Dv REG_BADPAT 645Invalid regular expression. 646.It Dv REG_ECOLLATE 647Invalid collating element referenced. 648.It Dv REG_ECTYPE 649Invalid character class type referenced. 650.It Dv REG_EESCAPE 651Trailing 652.Qq \&\e 653in pattern. 654.It Dv REG_ESUBREG 655Number in 656.Qq \&\e Ns Em digit 657invalid or in error. 658.It Dv REG_EBRACK 659.Qq [] 660imbalance. 661.It Dv REG_ENOSYS 662The function is not supported. 663.It Dv REG_EPAREN 664.Qq \e(\e) 665or 666.Qq () 667imbalance. 668.It Dv REG_EBRACE 669.Qq \e{\e} 670imbalance. 671.It Dv REG_BADBR 672Content of 673.Qq \e{\e} 674invalid: not a number, number too large, more than two 675numbers, first larger than second. 676.It Dv REG_ERANGE 677Invalid endpoint in range expression. 678.It Dv REG_ESPACE 679Out of memory. 680.It Dv REG_BADRPT 681.Qq \&? , 682.Qq * 683or 684.Qq + 685not preceded by valid regular expression. 686.El 687.Sh USAGE 688An application could use: 689.Bd -literal -offset Ds 690regerror(code, preg, (char *)NULL, (size_t)0) 691.Ed 692.Pp 693to find out how big a buffer is needed for the generated string, 694.Fn malloc 695a buffer to hold the string, and then call 696.Fn regerror 697again to get the string 698.Po see 699.Xr malloc 3C 700.Pc . 701Alternately, it could allocate a fixed, static buffer that is big enough to hold 702most strings, and then use 703.Fn malloc 704allocate a larger buffer if it finds that this is too small. 705.Sh EXAMPLES 706Matching string against the extended regular expression in pattern. 707.Bd -literal -offset Ds 708#include <regex.h> 709 710/* 711* Match string against the extended regular expression in 712* pattern, treating errors as no match. 713* 714* return 1 for match, 0 for no match 715*/ 716int 717match(const char *string, char *pattern) 718{ 719 int status; 720 regex_t re; 721 722 if (regcomp(&re, pattern, REG_EXTENDED\||\|REG_NOSUB) != 0) { 723 return(0); /* report error */ 724 } 725 status = regexec(&re, string, (size_t) 0, NULL, 0); 726 regfree(&re); 727 if (status != 0) { 728 return(0); /* report error */ 729 } 730 return(1); 731} 732.Ed 733.Pp 734The following demonstrates how the 735.Dv REG_NOTBOL 736flag could be used with 737.Fn regexec 738to find all substrings in a line that match a pattern supplied by a user. 739.Pq For simplicity of the example, very little error checking is done. 740.Bd -literal -offset Ds 741(void) regcomp(&re, pattern, 0); 742/* this call to regexec() finds the first match on the line */ 743error = regexec(&re, &buffer[0], 1, &pm, 0); 744while (error == 0) { /* while matches found */ 745 /* substring found between pm.rm_so and pm.rm_eo */ 746 /* This call to regexec() finds the next match */ 747 error = regexec(&re, buffer + pm.rm_eo, 1, &pm, REG_NOTBOL); 748} 749.Ed 750.Sh ERRORS 751No errors are defined. 752.Sh CODE SET INDEPENDENCE 753.Sy Enabled 754.Sh INTERFACE STABILITY 755.Sy Standard 756.Sh MT-LEVEL 757.Sy MT-Safe with exceptions 758.Pp 759The 760.Fn regcomp 761function can be used safely in a multithreaded application as long as 762.Xr setlocale 3C 763or 764.Xr uselocale 3C 765are not being called to change the locale. 766.Sh SEE ALSO 767.Xr attributes 7 , 768.Xr locale 7 , 769.Xr regex 7 , 770.Xr standards 7 771.Pp 772.St -p1003.2 , 773sections 2.8 774.Pq Regular Expression Notation 775and 776B.5 777.Pq C Binding for Regular Expression Matching . 778