1#! /usr/bin/env perl 2# Copyright 2018-2021 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9package OpenSSL::ParseC; 10 11use strict; 12use warnings; 13 14use Exporter; 15use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 16$VERSION = "0.9"; 17@ISA = qw(Exporter); 18@EXPORT = qw(parse); 19 20# Global handler data 21my @preprocessor_conds; # A list of simple preprocessor conditions, 22 # each item being a list of macros defined 23 # or not defined. 24 25# Handler helpers 26sub all_conds { 27 return map { ( @$_ ) } @preprocessor_conds; 28} 29 30# A list of handlers that will look at a "complete" string and try to 31# figure out what to make of it. 32# Each handler is a hash with the following keys: 33# 34# regexp a regexp to compare the "complete" string with. 35# checker a function that does a more complex comparison. 36# Use this instead of regexp if that isn't enough. 37# massager massages the "complete" string into an array with 38# the following elements: 39# 40# [0] String that needs further processing (this 41# applies to typedefs of structs), or empty. 42# [1] The name of what was found. 43# [2] A character that denotes what type of thing 44# this is: 'F' for function, 'S' for struct, 45# 'T' for typedef, 'M' for macro, 'V' for 46# variable. 47# [3] Return type (only for type 'F' and 'V') 48# [4] Value (for type 'M') or signature (for type 'F', 49# 'V', 'T' or 'S') 50# [5...] The list of preprocessor conditions this is 51# found in, as in checks for macro definitions 52# (stored as the macro's name) or the absence 53# of definition (stored as the macro's name 54# prefixed with a '!' 55# 56# If the massager returns an empty list, it means the 57# "complete" string has side effects but should otherwise 58# be ignored. 59# If the massager is undefined, the "complete" string 60# should be ignored. 61my @opensslcpphandlers = ( 62 ################################################################## 63 # OpenSSL CPP specials 64 # 65 # These are used to convert certain pre-precessor expressions into 66 # others that @cpphandlers have a better chance to understand. 67 68 # This changes any OPENSSL_NO_DEPRECATED_x_y[_z] check to a check of 69 # OPENSSL_NO_DEPRECATEDIN_x_y[_z]. That's due to <openssl/macros.h> 70 # creating OPENSSL_NO_DEPRECATED_x_y[_z], but the ordinals files using 71 # DEPRECATEDIN_x_y[_z]. 72 { regexp => qr/#if(def|ndef) OPENSSL_NO_DEPRECATED_(\d+_\d+(?:_\d+)?)$/, 73 massager => sub { 74 return (<<"EOF"); 75#if$1 OPENSSL_NO_DEPRECATEDIN_$2 76EOF 77 } 78 } 79); 80my @cpphandlers = ( 81 ################################################################## 82 # CPP stuff 83 84 { regexp => qr/#ifdef ?(.*)/, 85 massager => sub { 86 my %opts; 87 if (ref($_[$#_]) eq "HASH") { 88 %opts = %{$_[$#_]}; 89 pop @_; 90 } 91 push @preprocessor_conds, [ $1 ]; 92 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" 93 if $opts{debug}; 94 return (); 95 }, 96 }, 97 { regexp => qr/#ifndef ?(.*)/, 98 massager => sub { 99 my %opts; 100 if (ref($_[$#_]) eq "HASH") { 101 %opts = %{$_[$#_]}; 102 pop @_; 103 } 104 push @preprocessor_conds, [ '!'.$1 ]; 105 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" 106 if $opts{debug}; 107 return (); 108 }, 109 }, 110 { regexp => qr/#if (0|1)/, 111 massager => sub { 112 my %opts; 113 if (ref($_[$#_]) eq "HASH") { 114 %opts = %{$_[$#_]}; 115 pop @_; 116 } 117 if ($1 eq "1") { 118 push @preprocessor_conds, [ "TRUE" ]; 119 } else { 120 push @preprocessor_conds, [ "!TRUE" ]; 121 } 122 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" 123 if $opts{debug}; 124 return (); 125 }, 126 }, 127 { regexp => qr/#if ?(.*)/, 128 massager => sub { 129 my %opts; 130 if (ref($_[$#_]) eq "HASH") { 131 %opts = %{$_[$#_]}; 132 pop @_; 133 } 134 my @results = (); 135 my $conds = $1; 136 if ($conds =~ m|^defined<<<\(([^\)]*)\)>>>(.*)$|) { 137 push @results, $1; # Handle the simple case 138 my $rest = $2; 139 my $re = qr/^(?:\|\|defined<<<\([^\)]*\)>>>)*$/; 140 print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n" 141 if $opts{debug}; 142 if ($rest =~ m/$re/) { 143 my @rest = split /\|\|/, $rest; 144 shift @rest; 145 foreach (@rest) { 146 m|^defined<<<\(([^\)]*)\)>>>$|; 147 die "Something wrong...$opts{PLACE}" if $1 eq ""; 148 push @results, $1; 149 } 150 } else { 151 $conds =~ s/<<<|>>>//g; 152 warn "Warning: complicated #if expression(1): $conds$opts{PLACE}" 153 if $opts{warnings}; 154 } 155 } elsif ($conds =~ m|^!defined<<<\(([^\)]*)\)>>>(.*)$|) { 156 push @results, '!'.$1; # Handle the simple case 157 my $rest = $2; 158 my $re = qr/^(?:\&\&!defined<<<\([^\)]*\)>>>)*$/; 159 print STDERR "DEBUG[",$opts{debug_type},"]: Matching '$rest' with '$re'\n" 160 if $opts{debug}; 161 if ($rest =~ m/$re/) { 162 my @rest = split /\&\&/, $rest; 163 shift @rest; 164 foreach (@rest) { 165 m|^!defined<<<\(([^\)]*)\)>>>$|; 166 die "Something wrong...$opts{PLACE}" if $1 eq ""; 167 push @results, '!'.$1; 168 } 169 } else { 170 $conds =~ s/<<<|>>>//g; 171 warn "Warning: complicated #if expression(2): $conds$opts{PLACE}" 172 if $opts{warnings}; 173 } 174 } else { 175 $conds =~ s/<<<|>>>//g; 176 warn "Warning: complicated #if expression(3): $conds$opts{PLACE}" 177 if $opts{warnings}; 178 } 179 print STDERR "DEBUG[",$opts{debug_type},"]: Added preprocessor conds: '", join("', '", @results), "'\n" 180 if $opts{debug}; 181 push @preprocessor_conds, [ @results ]; 182 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" 183 if $opts{debug}; 184 return (); 185 }, 186 }, 187 { regexp => qr/#elif (.*)/, 188 massager => sub { 189 my %opts; 190 if (ref($_[$#_]) eq "HASH") { 191 %opts = %{$_[$#_]}; 192 pop @_; 193 } 194 die "An #elif without corresponding condition$opts{PLACE}" 195 if !@preprocessor_conds; 196 pop @preprocessor_conds; 197 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" 198 if $opts{debug}; 199 return (<<"EOF"); 200#if $1 201EOF 202 }, 203 }, 204 { regexp => qr/#else/, 205 massager => sub { 206 my %opts; 207 if (ref($_[$#_]) eq "HASH") { 208 %opts = %{$_[$#_]}; 209 pop @_; 210 } 211 die "An #else without corresponding condition$opts{PLACE}" 212 if !@preprocessor_conds; 213 # Invert all conditions on the last level 214 my $stuff = pop @preprocessor_conds; 215 push @preprocessor_conds, [ 216 map { m|^!(.*)$| ? $1 : '!'.$_ } @$stuff 217 ]; 218 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" 219 if $opts{debug}; 220 return (); 221 }, 222 }, 223 { regexp => qr/#endif ?/, 224 massager => sub { 225 my %opts; 226 if (ref($_[$#_]) eq "HASH") { 227 %opts = %{$_[$#_]}; 228 pop @_; 229 } 230 die "An #endif without corresponding condition$opts{PLACE}" 231 if !@preprocessor_conds; 232 pop @preprocessor_conds; 233 print STDERR "DEBUG[",$opts{debug_type},"]: preprocessor level: ", scalar(@preprocessor_conds), "\n" 234 if $opts{debug}; 235 return (); 236 }, 237 }, 238 { regexp => qr/#define ([[:alpha:]_]\w*)(<<<\(.*?\)>>>)?( (.*))?/, 239 massager => sub { 240 my $name = $1; 241 my $params = $2; 242 my $spaceval = $3||""; 243 my $val = $4||""; 244 return ("", 245 $1, 'M', "", $params ? "$name$params$spaceval" : $val, 246 all_conds()); } 247 }, 248 { regexp => qr/#.*/, 249 massager => sub { return (); } 250 }, 251 ); 252 253my @opensslchandlers = ( 254 ################################################################## 255 # OpenSSL C specials 256 # 257 # They are really preprocessor stuff, but they look like C stuff 258 # to this parser. All of these do replacements, anything else is 259 # an error. 260 261 ##### 262 # Deprecated stuff, by OpenSSL release. 263 264 # OSSL_DEPRECATEDIN_x_y[_z] is simply ignored. Such declarations are 265 # supposed to be guarded with an '#ifdef OPENSSL_NO_DEPRECATED_x_y[_z]' 266 { regexp => qr/OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/, 267 massager => sub { return $1; }, 268 }, 269 { regexp => qr/(.*?)\s+OSSL_DEPRECATEDIN_\d+_\d+(?:_\d+)?\s+(.*)/, 270 massager => sub { return "$1 $2"; }, 271 }, 272 273 ##### 274 # Core stuff 275 276 # OSSL_CORE_MAKE_FUNC is a macro to create the necessary data and inline 277 # function the libcrypto<->provider interface 278 { regexp => qr/OSSL_CORE_MAKE_FUNC<<<\((.*?),(.*?),(.*?)\)>>>/, 279 massager => sub { 280 return (<<"EOF"); 281typedef $1 OSSL_FUNC_$2_fn$3; 282static ossl_inline OSSL_FUNC_$2_fn *OSSL_FUNC_$2(const OSSL_DISPATCH *opf); 283EOF 284 }, 285 }, 286 287 ##### 288 # LHASH stuff 289 290 # LHASH_OF(foo) is used as a type, but the chandlers won't take it 291 # gracefully, so we expand it here. 292 { regexp => qr/(.*)\bLHASH_OF<<<\((.*?)\)>>>(.*)/, 293 massager => sub { return ("$1struct lhash_st_$2$3"); } 294 }, 295 { regexp => qr/DEFINE_LHASH_OF(?:_INTERNAL)?<<<\((.*)\)>>>/, 296 massager => sub { 297 return (<<"EOF"); 298static ossl_inline LHASH_OF($1) * lh_$1_new(unsigned long (*hfn)(const $1 *), 299 int (*cfn)(const $1 *, const $1 *)); 300static ossl_inline void lh_$1_free(LHASH_OF($1) *lh); 301static ossl_inline $1 *lh_$1_insert(LHASH_OF($1) *lh, $1 *d); 302static ossl_inline $1 *lh_$1_delete(LHASH_OF($1) *lh, const $1 *d); 303static ossl_inline $1 *lh_$1_retrieve(LHASH_OF($1) *lh, const $1 *d); 304static ossl_inline int lh_$1_error(LHASH_OF($1) *lh); 305static ossl_inline unsigned long lh_$1_num_items(LHASH_OF($1) *lh); 306static ossl_inline void lh_$1_node_stats_bio(const LHASH_OF($1) *lh, BIO *out); 307static ossl_inline void lh_$1_node_usage_stats_bio(const LHASH_OF($1) *lh, 308 BIO *out); 309static ossl_inline void lh_$1_stats_bio(const LHASH_OF($1) *lh, BIO *out); 310static ossl_inline unsigned long lh_$1_get_down_load(LHASH_OF($1) *lh); 311static ossl_inline void lh_$1_set_down_load(LHASH_OF($1) *lh, unsigned long dl); 312static ossl_inline void lh_$1_doall(LHASH_OF($1) *lh, void (*doall)($1 *)); 313LHASH_OF($1) 314EOF 315 } 316 }, 317 318 ##### 319 # STACK stuff 320 321 # STACK_OF(foo) is used as a type, but the chandlers won't take it 322 # gracefully, so we expand it here. 323 { regexp => qr/(.*)\bSTACK_OF<<<\((.*?)\)>>>(.*)/, 324 massager => sub { return ("$1struct stack_st_$2$3"); } 325 }, 326# { regexp => qr/(.*)\bSTACK_OF\((.*?)\)(.*)/, 327# massager => sub { 328# my $before = $1; 329# my $stack_of = "struct stack_st_$2"; 330# my $after = $3; 331# if ($after =~ m|^\w|) { $after = " ".$after; } 332# return ("$before$stack_of$after"); 333# } 334# }, 335 { regexp => qr/SKM_DEFINE_STACK_OF<<<\((.*),\s*(.*),\s*(.*)\)>>>/, 336 massager => sub { 337 return (<<"EOF"); 338STACK_OF($1); 339typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b); 340typedef void (*sk_$1_freefunc)($3 *a); 341typedef $3 * (*sk_$1_copyfunc)(const $3 *a); 342static ossl_inline int sk_$1_num(const STACK_OF($1) *sk); 343static ossl_inline $2 *sk_$1_value(const STACK_OF($1) *sk, int idx); 344static ossl_inline STACK_OF($1) *sk_$1_new(sk_$1_compfunc compare); 345static ossl_inline STACK_OF($1) *sk_$1_new_null(void); 346static ossl_inline STACK_OF($1) *sk_$1_new_reserve(sk_$1_compfunc compare, 347 int n); 348static ossl_inline int sk_$1_reserve(STACK_OF($1) *sk, int n); 349static ossl_inline void sk_$1_free(STACK_OF($1) *sk); 350static ossl_inline void sk_$1_zero(STACK_OF($1) *sk); 351static ossl_inline $2 *sk_$1_delete(STACK_OF($1) *sk, int i); 352static ossl_inline $2 *sk_$1_delete_ptr(STACK_OF($1) *sk, $2 *ptr); 353static ossl_inline int sk_$1_push(STACK_OF($1) *sk, $2 *ptr); 354static ossl_inline int sk_$1_unshift(STACK_OF($1) *sk, $2 *ptr); 355static ossl_inline $2 *sk_$1_pop(STACK_OF($1) *sk); 356static ossl_inline $2 *sk_$1_shift(STACK_OF($1) *sk); 357static ossl_inline void sk_$1_pop_free(STACK_OF($1) *sk, 358 sk_$1_freefunc freefunc); 359static ossl_inline int sk_$1_insert(STACK_OF($1) *sk, $2 *ptr, int idx); 360static ossl_inline $2 *sk_$1_set(STACK_OF($1) *sk, int idx, $2 *ptr); 361static ossl_inline int sk_$1_find(STACK_OF($1) *sk, $2 *ptr); 362static ossl_inline int sk_$1_find_ex(STACK_OF($1) *sk, $2 *ptr); 363static ossl_inline void sk_$1_sort(STACK_OF($1) *sk); 364static ossl_inline int sk_$1_is_sorted(const STACK_OF($1) *sk); 365static ossl_inline STACK_OF($1) * sk_$1_dup(const STACK_OF($1) *sk); 366static ossl_inline STACK_OF($1) *sk_$1_deep_copy(const STACK_OF($1) *sk, 367 sk_$1_copyfunc copyfunc, 368 sk_$1_freefunc freefunc); 369static ossl_inline sk_$1_compfunc sk_$1_set_cmp_func(STACK_OF($1) *sk, 370 sk_$1_compfunc compare); 371EOF 372 } 373 }, 374 { regexp => qr/SKM_DEFINE_STACK_OF_INTERNAL<<<\((.*),\s*(.*),\s*(.*)\)>>>/, 375 massager => sub { 376 return (<<"EOF"); 377STACK_OF($1); 378typedef int (*sk_$1_compfunc)(const $3 * const *a, const $3 *const *b); 379typedef void (*sk_$1_freefunc)($3 *a); 380typedef $3 * (*sk_$1_copyfunc)(const $3 *a); 381static ossl_unused ossl_inline $2 *ossl_check_$1_type($2 *ptr); 382static ossl_unused ossl_inline const OPENSSL_STACK *ossl_check_const_$1_sk_type(const STACK_OF($1) *sk); 383static ossl_unused ossl_inline OPENSSL_sk_compfunc ossl_check_$1_compfunc_type(sk_$1_compfunc cmp); 384static ossl_unused ossl_inline OPENSSL_sk_copyfunc ossl_check_$1_copyfunc_type(sk_$1_copyfunc cpy); 385static ossl_unused ossl_inline OPENSSL_sk_freefunc ossl_check_$1_freefunc_type(sk_$1_freefunc fr); 386EOF 387 } 388 }, 389 { regexp => qr/DEFINE_SPECIAL_STACK_OF<<<\((.*),\s*(.*)\)>>>/, 390 massager => sub { return ("SKM_DEFINE_STACK_OF($1,$2,$2)"); }, 391 }, 392 { regexp => qr/DEFINE_STACK_OF<<<\((.*)\)>>>/, 393 massager => sub { return ("SKM_DEFINE_STACK_OF($1,$1,$1)"); }, 394 }, 395 { regexp => qr/DEFINE_SPECIAL_STACK_OF_CONST<<<\((.*),\s*(.*)\)>>>/, 396 massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $2,$2)"); }, 397 }, 398 { regexp => qr/DEFINE_STACK_OF_CONST<<<\((.*)\)>>>/, 399 massager => sub { return ("SKM_DEFINE_STACK_OF($1,const $1,$1)"); }, 400 }, 401 402 ##### 403 # ASN1 stuff 404 { regexp => qr/DECLARE_ASN1_ITEM<<<\((.*)\)>>>/, 405 massager => sub { 406 return (<<"EOF"); 407const ASN1_ITEM *$1_it(void); 408EOF 409 }, 410 }, 411 { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_only<<<\((.*),\s*(.*)\)>>>/, 412 massager => sub { 413 return (<<"EOF"); 414int d2i_$2(void); 415int i2d_$2(void); 416EOF 417 }, 418 }, 419 { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS<<<\((.*),\s*(.*),\s*(.*)\)>>>/, 420 massager => sub { 421 return (<<"EOF"); 422int d2i_$3(void); 423int i2d_$3(void); 424DECLARE_ASN1_ITEM($2) 425EOF 426 }, 427 }, 428 { regexp => qr/DECLARE_ASN1_ENCODE_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/, 429 massager => sub { 430 return (<<"EOF"); 431int d2i_$2(void); 432int i2d_$2(void); 433DECLARE_ASN1_ITEM($2) 434EOF 435 }, 436 }, 437 { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/, 438 massager => sub { 439 return (<<"EOF"); 440int $2_free(void); 441int $2_new(void); 442EOF 443 }, 444 }, 445 { regexp => qr/DECLARE_ASN1_ALLOC_FUNCTIONS<<<\((.*)\)>>>/, 446 massager => sub { 447 return (<<"EOF"); 448int $1_free(void); 449int $1_new(void); 450EOF 451 }, 452 }, 453 { regexp => qr/DECLARE_ASN1_FUNCTIONS_name<<<\((.*),\s*(.*)\)>>>/, 454 massager => sub { 455 return (<<"EOF"); 456int d2i_$2(void); 457int i2d_$2(void); 458int $2_free(void); 459int $2_new(void); 460DECLARE_ASN1_ITEM($2) 461EOF 462 }, 463 }, 464 { regexp => qr/DECLARE_ASN1_FUNCTIONS<<<\((.*)\)>>>/, 465 massager => sub { return (<<"EOF"); 466int d2i_$1(void); 467int i2d_$1(void); 468int $1_free(void); 469int $1_new(void); 470DECLARE_ASN1_ITEM($1) 471EOF 472 } 473 }, 474 { regexp => qr/DECLARE_ASN1_NDEF_FUNCTION<<<\((.*)\)>>>/, 475 massager => sub { 476 return (<<"EOF"); 477int i2d_$1_NDEF(void); 478EOF 479 } 480 }, 481 { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION<<<\((.*)\)>>>/, 482 massager => sub { 483 return (<<"EOF"); 484int $1_print_ctx(void); 485EOF 486 } 487 }, 488 { regexp => qr/DECLARE_ASN1_PRINT_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/, 489 massager => sub { 490 return (<<"EOF"); 491int $2_print_ctx(void); 492EOF 493 } 494 }, 495 { regexp => qr/DECLARE_ASN1_SET_OF<<<\((.*)\)>>>/, 496 massager => sub { return (); } 497 }, 498 { regexp => qr/DECLARE_ASN1_DUP_FUNCTION<<<\((.*)\)>>>/, 499 massager => sub { 500 return (<<"EOF"); 501int $1_dup(void); 502EOF 503 } 504 }, 505 { regexp => qr/DECLARE_ASN1_DUP_FUNCTION_name<<<\((.*),\s*(.*)\)>>>/, 506 massager => sub { 507 return (<<"EOF"); 508int $2_dup(void); 509EOF 510 } 511 }, 512 # Universal translator of attributed PEM declarators 513 { regexp => qr/ 514 DECLARE_ASN1 515 (_ENCODE_FUNCTIONS_only|_ENCODE_FUNCTIONS|_ENCODE_FUNCTIONS_name 516 |_ALLOC_FUNCTIONS_name|_ALLOC_FUNCTIONS|_FUNCTIONS_name|_FUNCTIONS 517 |_NDEF_FUNCTION|_PRINT_FUNCTION|_PRINT_FUNCTION_name 518 |_DUP_FUNCTION|_DUP_FUNCTION_name) 519 _attr 520 <<<\(\s*OSSL_DEPRECATEDIN_(.*?)\s*,(.*?)\)>>> 521 /x, 522 massager => sub { return (<<"EOF"); 523DECLARE_ASN1$1($3) 524EOF 525 }, 526 }, 527 { regexp => qr/DECLARE_PKCS12_SET_OF<<<\((.*)\)>>>/, 528 massager => sub { return (); } 529 }, 530 531 ##### 532 # PEM stuff 533 { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)<<<\((.*?),.*\)>>>/, 534 massager => sub { return (<<"EOF"); 535#ifndef OPENSSL_NO_STDIO 536int PEM_read_$1(void); 537int PEM_write_$1(void); 538#endif 539int PEM_read_bio_$1(void); 540int PEM_write_bio_$1(void); 541EOF 542 }, 543 }, 544 { regexp => qr/DECLARE_PEM(?|_rw|_rw_cb|_rw_const)_ex<<<\((.*?),.*\)>>>/, 545 massager => sub { return (<<"EOF"); 546#ifndef OPENSSL_NO_STDIO 547int PEM_read_$1(void); 548int PEM_write_$1(void); 549int PEM_read_$1_ex(void); 550int PEM_write_$1_ex(void); 551#endif 552int PEM_read_bio_$1(void); 553int PEM_write_bio_$1(void); 554int PEM_read_bio_$1_ex(void); 555int PEM_write_bio_$1_ex(void); 556EOF 557 }, 558 }, 559 { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)<<<\((.*?),.*\)>>>/, 560 massager => sub { return (<<"EOF"); 561#ifndef OPENSSL_NO_STDIO 562int PEM_write_$1(void); 563#endif 564int PEM_write_bio_$1(void); 565EOF 566 }, 567 }, 568 { regexp => qr/DECLARE_PEM(?|_write|_write_cb|_write_const)_ex<<<\((.*?),.*\)>>>/, 569 massager => sub { return (<<"EOF"); 570#ifndef OPENSSL_NO_STDIO 571int PEM_write_$1(void); 572int PEM_write_$1_ex(void); 573#endif 574int PEM_write_bio_$1(void); 575int PEM_write_bio_$1_ex(void); 576EOF 577 }, 578 }, 579 { regexp => qr/DECLARE_PEM(?|_read|_read_cb)<<<\((.*?),.*\)>>>/, 580 massager => sub { return (<<"EOF"); 581#ifndef OPENSSL_NO_STDIO 582int PEM_read_$1(void); 583#endif 584int PEM_read_bio_$1(void); 585EOF 586 }, 587 }, 588 { regexp => qr/DECLARE_PEM(?|_read|_read_cb)_ex<<<\((.*?),.*\)>>>/, 589 massager => sub { return (<<"EOF"); 590#ifndef OPENSSL_NO_STDIO 591int PEM_read_$1(void); 592int PEM_read_$1_ex(void); 593#endif 594int PEM_read_bio_$1(void); 595int PEM_read_bio_$1_ex(void); 596EOF 597 }, 598 }, 599 # Universal translator of attributed PEM declarators 600 { regexp => qr/ 601 DECLARE_PEM 602 ((?:_rw|_rw_cb|_rw_const|_write|_write_cb|_write_const|_read|_read_cb) 603 (?:_ex)?) 604 _attr 605 <<<\(\s*OSSL_DEPRECATEDIN_(.*?)\s*,(.*?)\)>>> 606 /x, 607 massager => sub { return (<<"EOF"); 608DECLARE_PEM$1($3) 609EOF 610 }, 611 }, 612 613 # OpenSSL's declaration of externs with possible export linkage 614 # (really only relevant on Windows) 615 { regexp => qr/OPENSSL_(?:EXPORT|EXTERN)/, 616 massager => sub { return ("extern"); } 617 }, 618 619 # Spurious stuff found in the OpenSSL headers 620 # Usually, these are just macros that expand to, well, something 621 { regexp => qr/__NDK_FPABI__/, 622 massager => sub { return (); } 623 }, 624 ); 625 626my $anoncnt = 0; 627 628my @chandlers = ( 629 ################################################################## 630 # C stuff 631 632 # extern "C" of individual items 633 # Note that the main parse function has a special hack for 'extern "C" {' 634 # which can't be done in handlers 635 # We simply ignore it. 636 { regexp => qr/^extern "C" (.*(?:;|>>>))/, 637 massager => sub { return ($1); }, 638 }, 639 # any other extern is just ignored 640 { regexp => qr/^\s* # Any spaces before 641 extern # The keyword we look for 642 \b # word to non-word boundary 643 .* # Anything after 644 ; 645 /x, 646 massager => sub { return (); }, 647 }, 648 # union, struct and enum definitions 649 # Because this one might appear a little everywhere within type 650 # definitions, we take it out and replace it with just 651 # 'union|struct|enum name' while registering it. 652 # This makes use of the parser trick to surround the outer braces 653 # with <<< and >>> 654 { regexp => qr/(.*) # Anything before ($1) 655 \b # word to non-word boundary 656 (union|struct|enum) # The word used ($2) 657 (?:\s([[:alpha:]_]\w*))? # Struct or enum name ($3) 658 <<<(\{.*?\})>>> # Struct or enum definition ($4) 659 (.*) # Anything after ($5) 660 ; 661 /x, 662 massager => sub { 663 my $before = $1; 664 my $word = $2; 665 my $name = $3 666 || sprintf("__anon%03d", ++$anoncnt); # Anonymous struct 667 my $definition = $4; 668 my $after = $5; 669 my $type = $word eq "struct" ? 'S' : 'E'; 670 if ($before ne "" || $after ne ";") { 671 if ($after =~ m|^\w|) { $after = " ".$after; } 672 return ("$before$word $name$after;", 673 "$word $name", $type, "", "$word$definition", all_conds()); 674 } 675 # If there was no before nor after, make the return much simple 676 return ("", "$word $name", $type, "", "$word$definition", all_conds()); 677 } 678 }, 679 # Named struct and enum forward declarations 680 # We really just ignore them, but we need to parse them or the variable 681 # declaration handler further down will think it's a variable declaration. 682 { regexp => qr/^(union|struct|enum) ([[:alpha:]_]\w*);/, 683 massager => sub { return (); } 684 }, 685 # Function returning function pointer declaration 686 # This sort of declaration may have a body (inline functions, for example) 687 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) 688 ((?:\w|\*|\s)*?) # Return type ($2) 689 \s? # Possible space 690 <<<\(\* 691 ([[:alpha:]_]\w*) # Function name ($3) 692 (\(.*\)) # Parameters ($4) 693 \)>>> 694 <<<(\(.*\))>>> # F.p. parameters ($5) 695 (?:<<<\{.*\}>>>|;) # Body or semicolon 696 /x, 697 massager => sub { 698 return ("", $3, 'T', "", "$2(*$4)$5", all_conds()) 699 if defined $1; 700 return ("", $3, 'F', "$2(*)$5", "$2(*$4)$5", all_conds()); } 701 }, 702 # Function pointer declaration, or typedef thereof 703 # This sort of declaration never has a function body 704 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) 705 ((?:\w|\*|\s)*?) # Return type ($2) 706 <<<\(\*([[:alpha:]_]\w*)\)>>> # T.d. or var name ($3) 707 <<<(\(.*\))>>> # F.p. parameters ($4) 708 ; 709 /x, 710 massager => sub { 711 return ("", $3, 'T', "", "$2(*)$4", all_conds()) 712 if defined $1; 713 return ("", $3, 'V', "$2(*)$4", "$2(*)$4", all_conds()); 714 }, 715 }, 716 # Function declaration, or typedef thereof 717 # This sort of declaration may have a body (inline functions, for example) 718 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) 719 ((?:\w|\*|\s)*?) # Return type ($2) 720 \s? # Possible space 721 ([[:alpha:]_]\w*) # Function name ($3) 722 <<<(\(.*\))>>> # Parameters ($4) 723 (?:<<<\{.*\}>>>|;) # Body or semicolon 724 /x, 725 massager => sub { 726 return ("", $3, 'T', "", "$2$4", all_conds()) 727 if defined $1; 728 return ("", $3, 'F', $2, "$2$4", all_conds()); 729 }, 730 }, 731 # Variable declaration, including arrays, or typedef thereof 732 { regexp => qr/(?:(typedef)\s?)? # Possible typedef ($1) 733 ((?:\w|\*|\s)*?) # Type ($2) 734 \s? # Possible space 735 ([[:alpha:]_]\w*) # Variable name ($3) 736 ((?:<<<\[[^\]]*\]>>>)*) # Possible array declaration ($4) 737 ; 738 /x, 739 massager => sub { 740 return ("", $3, 'T', "", $2.($4||""), all_conds()) 741 if defined $1; 742 return ("", $3, 'V', $2.($4||""), $2.($4||""), all_conds()); 743 }, 744 }, 745); 746 747# End handlers are almost the same as handlers, except they are run through 748# ONCE when the input has been parsed through. These are used to check for 749# remaining stuff, such as an unfinished #ifdef and stuff like that that the 750# main parser can't check on its own. 751my @endhandlers = ( 752 { massager => sub { 753 my %opts = %{$_[0]}; 754 755 die "Unfinished preprocessor conditions levels: ",scalar(@preprocessor_conds),($opts{filename} ? " in file ".$opts{filename}: ""),$opts{PLACE} 756 if @preprocessor_conds; 757 } 758 } 759 ); 760 761# takes a list of strings that can each contain one or several lines of code 762# also takes a hash of options as last argument. 763# 764# returns a list of hashes with information: 765# 766# name name of the thing 767# type type, see the massage handler function 768# returntype return type of functions and variables 769# value value for macros, signature for functions, variables 770# and structs 771# conds preprocessor conditions (array ref) 772 773sub parse { 774 my %opts; 775 if (ref($_[$#_]) eq "HASH") { 776 %opts = %{$_[$#_]}; 777 pop @_; 778 } 779 my %state = ( 780 in_extern_C => 0, # An exception to parenthesis processing. 781 cpp_parens => [], # A list of ending parens and braces found in 782 # preprocessor directives 783 c_parens => [], # A list of ending parens and braces found in 784 # C statements 785 in_string => "", # empty string when outside a string, otherwise 786 # "'" or '"' depending on the starting quote. 787 in_comment => "", # empty string when outside a comment, otherwise 788 # "/*" or "//" depending on the type of comment 789 # found. The latter will never be multiline 790 # NOTE: in_string and in_comment will never be 791 # true (in perl semantics) at the same time. 792 current_line => 0, 793 ); 794 my @result = (); 795 my $normalized_line = ""; # $input_line, but normalized. In essence, this 796 # means that ALL whitespace is removed unless 797 # it absolutely has to be present, and in that 798 # case, there's only one space. 799 # The cases where a space needs to stay present 800 # are: 801 # 1. between words 802 # 2. between words and number 803 # 3. after the first word of a preprocessor 804 # directive. 805 # 4. for the #define directive, between the macro 806 # name/args and its value, so we end up with: 807 # #define FOO val 808 # #define BAR(x) something(x) 809 my $collected_stmt = ""; # Where we're building up a C line until it's a 810 # complete definition/declaration, as determined 811 # by any handler being capable of matching it. 812 813 # We use $_ shamelessly when looking through @lines. 814 # In case we find a \ at the end, we keep filling it up with more lines. 815 $_ = undef; 816 817 foreach my $line (@_) { 818 # split tries to be smart when a string ends with the thing we split on 819 $line .= "\n" unless $line =~ m|\R$|; 820 $line .= "#"; 821 822 # We use ¦undef¦ as a marker for a new line from the file. 823 # Since we convert one line to several and unshift that into @lines, 824 # that's the only safe way we have to track the original lines 825 my @lines = map { ( undef, $_ ) } split m|\R|, $line; 826 827 # Remember that extra # we added above? Now we remove it 828 pop @lines; 829 pop @lines; # Don't forget the undef 830 831 while (@lines) { 832 if (!defined($lines[0])) { 833 shift @lines; 834 $state{current_line}++; 835 if (!defined($_)) { 836 $opts{PLACE} = " at ".$opts{filename}." line ".$state{current_line}."\n"; 837 $opts{PLACE2} = $opts{filename}.":".$state{current_line}; 838 } 839 next; 840 } 841 842 $_ = "" unless defined $_; 843 $_ .= shift @lines; 844 845 if (m|\\$|) { 846 $_ = $`; 847 next; 848 } 849 850 if ($opts{debug}) { 851 print STDERR "DEBUG:----------------------------\n"; 852 print STDERR "DEBUG: \$_ = '$_'\n"; 853 } 854 855 ########################################################## 856 # Now that we have a full line, let's process through it 857 while(1) { 858 unless ($state{in_comment}) { 859 # Begin with checking if the current $normalized_line 860 # contains a preprocessor directive 861 # This is only done if we're not inside a comment and 862 # if it's a preprocessor directive and it's finished. 863 if ($normalized_line =~ m|^#| && $_ eq "") { 864 print STDERR "DEBUG[OPENSSL CPP]: \$normalized_line = '$normalized_line'\n" 865 if $opts{debug}; 866 $opts{debug_type} = "OPENSSL CPP"; 867 my @r = ( _run_handlers($normalized_line, 868 @opensslcpphandlers, 869 \%opts) ); 870 if (shift @r) { 871 # Checking if there are lines to inject. 872 if (@r) { 873 @r = split $/, (pop @r).$_; 874 print STDERR "DEBUG[OPENSSL CPP]: injecting '", join("', '", @r),"'\n" 875 if $opts{debug} && @r; 876 @lines = ( @r, @lines ); 877 878 $_ = ""; 879 } 880 } else { 881 print STDERR "DEBUG[CPP]: \$normalized_line = '$normalized_line'\n" 882 if $opts{debug}; 883 $opts{debug_type} = "CPP"; 884 my @r = ( _run_handlers($normalized_line, 885 @cpphandlers, 886 \%opts) ); 887 if (shift @r) { 888 if (ref($r[0]) eq "HASH") { 889 push @result, shift @r; 890 } 891 892 # Now, check if there are lines to inject. 893 # Really, this should never happen, it IS a 894 # preprocessor directive after all... 895 if (@r) { 896 @r = split $/, pop @r; 897 print STDERR "DEBUG[CPP]: injecting '", join("', '", @r),"'\n" 898 if $opts{debug} && @r; 899 @lines = ( @r, @lines ); 900 $_ = ""; 901 } 902 } 903 } 904 905 # Note: we simply ignore all directives that no 906 # handler matches 907 $normalized_line = ""; 908 } 909 910 # If the two strings end and start with a character that 911 # shouldn't get concatenated, add a space 912 my $space = 913 ($collected_stmt =~ m/(?:"|')$/ 914 || ($collected_stmt =~ m/(?:\w|\d)$/ 915 && $normalized_line =~ m/^(?:\w|\d)/)) ? " " : ""; 916 917 # Now, unless we're building up a preprocessor directive or 918 # are in the middle of a string, or the parens et al aren't 919 # balanced up yet, let's try and see if there's a OpenSSL 920 # or C handler that can make sense of what we have so far. 921 if ( $normalized_line !~ m|^#| 922 && ($collected_stmt ne "" || $normalized_line ne "") 923 && ! @{$state{c_parens}} 924 && ! $state{in_string} ) { 925 if ($opts{debug}) { 926 print STDERR "DEBUG[OPENSSL C]: \$collected_stmt = '$collected_stmt'\n"; 927 print STDERR "DEBUG[OPENSSL C]: \$normalized_line = '$normalized_line'\n"; 928 } 929 $opts{debug_type} = "OPENSSL C"; 930 my @r = ( _run_handlers($collected_stmt 931 .$space 932 .$normalized_line, 933 @opensslchandlers, 934 \%opts) ); 935 if (shift @r) { 936 # Checking if there are lines to inject. 937 if (@r) { 938 @r = split $/, (pop @r).$_; 939 print STDERR "DEBUG[OPENSSL]: injecting '", join("', '", @r),"'\n" 940 if $opts{debug} && @r; 941 @lines = ( @r, @lines ); 942 943 $_ = ""; 944 } 945 $normalized_line = ""; 946 $collected_stmt = ""; 947 } else { 948 if ($opts{debug}) { 949 print STDERR "DEBUG[C]: \$collected_stmt = '$collected_stmt'\n"; 950 print STDERR "DEBUG[C]: \$normalized_line = '$normalized_line'\n"; 951 } 952 $opts{debug_type} = "C"; 953 my @r = ( _run_handlers($collected_stmt 954 .$space 955 .$normalized_line, 956 @chandlers, 957 \%opts) ); 958 if (shift @r) { 959 if (ref($r[0]) eq "HASH") { 960 push @result, shift @r; 961 } 962 963 # Checking if there are lines to inject. 964 if (@r) { 965 @r = split $/, (pop @r).$_; 966 print STDERR "DEBUG[C]: injecting '", join("', '", @r),"'\n" 967 if $opts{debug} && @r; 968 @lines = ( @r, @lines ); 969 970 $_ = ""; 971 } 972 $normalized_line = ""; 973 $collected_stmt = ""; 974 } 975 } 976 } 977 if ($_ eq "") { 978 $collected_stmt .= $space.$normalized_line; 979 $normalized_line = ""; 980 } 981 } 982 983 if ($_ eq "") { 984 $_ = undef; 985 last; 986 } 987 988 # Take care of inside string first. 989 if ($state{in_string}) { 990 if (m/ (?:^|(?<!\\)) # Make sure it's not escaped 991 $state{in_string} # Look for matching quote 992 /x) { 993 $normalized_line .= $`.$&; 994 $state{in_string} = ""; 995 $_ = $'; 996 next; 997 } else { 998 die "Unfinished string without continuation found$opts{PLACE}\n"; 999 } 1000 } 1001 # ... or inside comments, whichever happens to apply 1002 elsif ($state{in_comment}) { 1003 1004 # This should never happen 1005 die "Something went seriously wrong, multiline //???$opts{PLACE}\n" 1006 if ($state{in_comment} eq "//"); 1007 1008 # A note: comments are simply discarded. 1009 1010 if (m/ (?:^|(?<!\\)) # Make sure it's not escaped 1011 \*\/ # Look for C comment end 1012 /x) { 1013 $state{in_comment} = ""; 1014 $_ = $'; 1015 print STDERR "DEBUG: Found end of comment, followed by '$_'\n" 1016 if $opts{debug}; 1017 next; 1018 } else { 1019 $_ = ""; 1020 next; 1021 } 1022 } 1023 1024 # At this point, it's safe to remove leading whites, but 1025 # we need to be careful with some preprocessor lines 1026 if (m|^\s+|) { 1027 my $rest = $'; 1028 my $space = ""; 1029 $space = " " 1030 if ($normalized_line =~ m/^ 1031 \#define\s\w(?:\w|\d)*(?:<<<\([^\)]*\)>>>)? 1032 | \#[a-z]+ 1033 $/x); 1034 print STDERR "DEBUG: Processing leading spaces: \$normalized_line = '$normalized_line', \$space = '$space', \$rest = '$rest'\n" 1035 if $opts{debug}; 1036 $_ = $space.$rest; 1037 } 1038 1039 my $parens = 1040 $normalized_line =~ m|^#| ? 'cpp_parens' : 'c_parens'; 1041 (my $paren_singular = $parens) =~ s|s$||; 1042 1043 # Now check for specific tokens, and if they are parens, 1044 # check them against $state{$parens}. Note that we surround 1045 # the outermost parens with extra "<<<" and ">>>". Those 1046 # are for the benefit of handlers who to need to detect 1047 # them, and they will be removed from the final output. 1048 if (m|^[\{\[\(]|) { 1049 my $body = $&; 1050 $_ = $'; 1051 if (!@{$state{$parens}}) { 1052 if ("$normalized_line$body" =~ m|^extern "C"\{$|) { 1053 $state{in_extern_C} = 1; 1054 print STDERR "DEBUG: found start of 'extern \"C\"' ($normalized_line$body)\n" 1055 if $opts{debug}; 1056 $normalized_line = ""; 1057 } else { 1058 $normalized_line .= "<<<".$body; 1059 } 1060 } else { 1061 $normalized_line .= $body; 1062 } 1063 1064 if ($normalized_line ne "") { 1065 print STDERR "DEBUG: found $paren_singular start '$body'\n" 1066 if $opts{debug}; 1067 $body =~ tr|\{\[\(|\}\]\)|; 1068 print STDERR "DEBUG: pushing $paren_singular end '$body'\n" 1069 if $opts{debug}; 1070 push @{$state{$parens}}, $body; 1071 } 1072 } elsif (m|^[\}\]\)]|) { 1073 $_ = $'; 1074 1075 if (!@{$state{$parens}} 1076 && $& eq '}' && $state{in_extern_C}) { 1077 print STDERR "DEBUG: found end of 'extern \"C\"'\n" 1078 if $opts{debug}; 1079 $state{in_extern_C} = 0; 1080 } else { 1081 print STDERR "DEBUG: Trying to match '$&' against '" 1082 ,join("', '", @{$state{$parens}}) 1083 ,"'\n" 1084 if $opts{debug}; 1085 die "Unmatched parentheses$opts{PLACE}\n" 1086 unless (@{$state{$parens}} 1087 && pop @{$state{$parens}} eq $&); 1088 if (!@{$state{$parens}}) { 1089 $normalized_line .= $&.">>>"; 1090 } else { 1091 $normalized_line .= $&; 1092 } 1093 } 1094 } elsif (m|^["']|) { # string start 1095 my $body = $&; 1096 $_ = $'; 1097 1098 # We want to separate strings from \w and \d with one space. 1099 $normalized_line .= " " if $normalized_line =~ m/(\w|\d)$/; 1100 $normalized_line .= $body; 1101 $state{in_string} = $body; 1102 } elsif (m|^\/\*|) { # C style comment 1103 print STDERR "DEBUG: found start of C style comment\n" 1104 if $opts{debug}; 1105 $state{in_comment} = $&; 1106 $_ = $'; 1107 } elsif (m|^\/\/|) { # C++ style comment 1108 print STDERR "DEBUG: found C++ style comment\n" 1109 if $opts{debug}; 1110 $_ = ""; # (just discard it entirely) 1111 } elsif (m/^ (?| (?: 0[xX][[:xdigit:]]+ | 0[bB][01]+ | [0-9]+ ) 1112 (?i: U | L | UL | LL | ULL )? 1113 | [0-9]+\.[0-9]+(?:[eE][\-\+]\d+)? (?i: F | L)? 1114 ) /x) { 1115 print STDERR "DEBUG: Processing numbers: \$normalized_line = '$normalized_line', \$& = '$&', \$' = '$''\n" 1116 if $opts{debug}; 1117 $normalized_line .= $&; 1118 $_ = $'; 1119 } elsif (m/^[[:alpha:]_]\w*/) { 1120 my $body = $&; 1121 my $rest = $'; 1122 my $space = ""; 1123 1124 # Now, only add a space if it's needed to separate 1125 # two \w characters, and we also surround strings with 1126 # a space. In this case, that's if $normalized_line ends 1127 # with a \w, \d, " or '. 1128 $space = " " 1129 if ($normalized_line =~ m/("|')$/ 1130 || ($normalized_line =~ m/(\w|\d)$/ 1131 && $body =~ m/^(\w|\d)/)); 1132 1133 print STDERR "DEBUG: Processing words: \$normalized_line = '$normalized_line', \$space = '$space', \$body = '$body', \$rest = '$rest'\n" 1134 if $opts{debug}; 1135 $normalized_line .= $space.$body; 1136 $_ = $rest; 1137 } elsif (m|^(?:\\)?.|) { # Catch-all 1138 $normalized_line .= $&; 1139 $_ = $'; 1140 } 1141 } 1142 } 1143 } 1144 foreach my $handler (@endhandlers) { 1145 if ($handler->{massager}) { 1146 $handler->{massager}->(\%opts); 1147 } 1148 } 1149 return @result; 1150} 1151 1152# arg1: line to check 1153# arg2...: handlers to check 1154# return undef when no handler matched 1155sub _run_handlers { 1156 my %opts; 1157 if (ref($_[$#_]) eq "HASH") { 1158 %opts = %{$_[$#_]}; 1159 pop @_; 1160 } 1161 my $line = shift; 1162 my @handlers = @_; 1163 1164 foreach my $handler (@handlers) { 1165 if ($handler->{regexp} 1166 && $line =~ m|^$handler->{regexp}$|) { 1167 if ($handler->{massager}) { 1168 if ($opts{debug}) { 1169 print STDERR "DEBUG[",$opts{debug_type},"]: Trying to handle '$line'\n"; 1170 print STDERR "DEBUG[",$opts{debug_type},"]: (matches /\^",$handler->{regexp},"\$/)\n"; 1171 } 1172 my $saved_line = $line; 1173 my @massaged = 1174 map { s/(<<<|>>>)//g; $_ } 1175 $handler->{massager}->($saved_line, \%opts); 1176 print STDERR "DEBUG[",$opts{debug_type},"]: Got back '" 1177 , join("', '", @massaged), "'\n" 1178 if $opts{debug}; 1179 1180 # Because we may get back new lines to be 1181 # injected before whatever else that follows, 1182 # and the injected stuff might include 1183 # preprocessor lines, we need to inject them 1184 # in @lines and set $_ to the empty string to 1185 # break out from the inner loops 1186 my $injected_lines = shift @massaged || ""; 1187 1188 if (@massaged) { 1189 return (1, 1190 { 1191 name => shift @massaged, 1192 type => shift @massaged, 1193 returntype => shift @massaged, 1194 value => shift @massaged, 1195 conds => [ @massaged ] 1196 }, 1197 $injected_lines 1198 ); 1199 } else { 1200 print STDERR "DEBUG[",$opts{debug_type},"]: (ignore, possible side effects)\n" 1201 if $opts{debug} && $injected_lines eq ""; 1202 return (1, $injected_lines); 1203 } 1204 } 1205 return (1); 1206 } 1207 } 1208 return (0); 1209} 1210