#include <cmt_regexp.h>
Collaboration diagram for cmt_regexp:
Public Member Functions | |
cmt_regexp () | |
cmt_regexp (const cmt_string &expression) | |
~cmt_regexp () | |
void | set (const cmt_string &expression) |
bool | is_valid () const |
iterator | begin (const cmt_string &text, int pos=0) |
iterator | end () |
iterator | begin (const cmt_string &text, int pos=0) const |
iterator | end () const |
bool | match (const cmt_string &text) const |
Private Attributes | |
cmt_regexp_node * | _root |
|
Definition at line 1156 of file cmt_regexp.cxx. References _root.
01157 { 01158 _root = 0; 01159 } |
|
Definition at line 1162 of file cmt_regexp.cxx.
|
|
Definition at line 1563 of file cmt_regexp.cxx. References _root.
|
|
Definition at line 1598 of file cmt_regexp.cxx. References _root, end(), cmt_regexp_node::match(), and cmt_string::size().
|
|
Definition at line 1577 of file cmt_regexp.cxx. References _root, end(), cmt_regexp_node::match(), and cmt_string::size(). Referenced by MacroBuilder::build(), SetBuilder::build(), match(), CmtModel::strict_expand(), and CmtModel::test_regexp().
|
|
Definition at line 1614 of file cmt_regexp.cxx. References cmt_regexp::iterator::null().
01615 { 01616 return (cmt_regexp::iterator::null ()); 01617 } |
|
Definition at line 1593 of file cmt_regexp.cxx. References cmt_regexp::iterator::null(). Referenced by begin(), MacroBuilder::build(), SetBuilder::build(), match(), CmtModel::strict_expand(), and CmtModel::test_regexp().
01594 { 01595 return (cmt_regexp::iterator::null ()); 01596 } |
|
Definition at line 1571 of file cmt_regexp.cxx. References _root.
01572 { 01573 if (_root != 0) return (true); 01574 else return (false); 01575 } |
|
Definition at line 1619 of file cmt_regexp.cxx. References begin(), and end(). Referenced by PathBuilder::build(), PathBuilder::clean(), Symbol::expand(), CvsImplementation::filter_list(), CmtGenerator::get_all_files(), CvsImplementation::match_version_request(), Parser::parse_line(), Cmt::print_macros(), Cmt::print_symbol_names(), PAwk::run(), and CmtSystem::scan_dir().
|
|
Definition at line 1169 of file cmt_regexp.cxx. References _root, cmt_regexp_node::dump(), cmt_regexp_node_set::father(), cmt_regexp_node_set::nodes(), cmt_regexp_node_set::parentheses(), cmt_regexp_node_set::pop(), cmt_regexp_node_set::push(), cmt_regexp_node_set::reduce(), cmt_regexp_node_set::set_parentheses(), cmt_string::size(), CmtSystem::testenv(), and cmt_regexp_node_set::top(). Referenced by cmt_regexp(), and constituents_action_iterator::set().
01170 { 01171 if (_root != 0) 01172 { 01173 delete _root; 01174 _root = 0; 01175 } 01176 01177 // 01178 // The root is the cmt_or_node which will be returned. It is 01179 // the top of the hierarchy. 01180 // 01181 // top is the running cmt_and_node. 01182 // 01183 cmt_regexp_node_set* or_root = 0; 01184 cmt_regexp_node_set* top_and = 0; 01185 01186 // abcd 01187 // ab|cd 01188 // a|b|cd 01189 // a|b*|cd 01190 // a|b*|cd?e 01191 // 01192 // exp : and 01193 // | exp '|' and 01194 // 01195 // and : unary 01196 // | unary and 01197 // 01198 // unary : primary '*' 01199 // | primary '?' 01200 // 01201 // primary : '[' opt_begin opt_chars opt_end ']' 01202 // | '^' 01203 // | '$' 01204 // | char 01205 // | '(' exp ')' 01206 // 01207 01208 { 01209 // 01210 // First we build an cmt_or_node (corresponding to the 01211 // first grammatical rule) 01212 // 01213 // Then cmt_and_nodes are pushed into it. 01214 // and standard nodes are pushed into the running (top_and) cmt_and_node 01215 // 01216 or_root = new cmt_or_node (0); 01217 top_and = new cmt_and_node (or_root); 01218 } 01219 01220 int i; 01221 01222 for (i = 0; i < expression.size (); i++) 01223 { 01224 char c = expression[i]; 01225 switch (c) 01226 { 01227 case '[': 01228 { 01229 // 01230 // The case is 01231 // 01232 // exp : '[' char ... ']' 01233 // exp : '[' '^' char ... ']' 01234 // 01235 01236 if (i >= expression.size ()) 01237 { 01238 // syntax error : unbalanced '[' 01239 delete or_root; 01240 return; 01241 } 01242 i++; 01243 01244 int i0 = i; 01245 01246 bool done = false; 01247 bool has_not = false; 01248 01249 cmt_string choices = ""; 01250 01251 for (; i < expression.size (); i++) 01252 { 01253 c = expression[i]; 01254 switch (c) 01255 { 01256 case ']': 01257 done = true; 01258 break; 01259 case '^': 01260 if (i == i0) has_not = true; 01261 else choices += c; 01262 break; 01263 case '\\': 01264 choices += c; 01265 if (i >= expression.size ()) 01266 { 01267 // syntax error : unbalanced '[' and unfinished 01268 // escape sequence 01269 delete or_root; 01270 return; 01271 } 01272 i++; 01273 c = expression[i]; 01274 choices += c; 01275 break; 01276 default: 01277 choices += c; 01278 break; 01279 } 01280 if (done) break; 01281 } 01282 01283 if (!done) 01284 { 01285 // syntax error : unbalanced '[' 01286 delete or_root; 01287 return; 01288 } 01289 if (has_not) 01290 top_and->push (new cmt_not_char_list_node (choices)); 01291 else 01292 top_and->push (new cmt_char_list_node (choices)); 01293 } 01294 break; 01295 case '*': 01296 { 01297 // 01298 // exp : exp '*' 01299 // 01300 if (top_and->nodes () == 0) 01301 { 01302 // Syntax error : '*' is not preceded by an expression 01303 delete or_root; 01304 return; 01305 } 01306 01307 cmt_regexp_node* n = top_and->pop (); 01308 top_and->push (new cmt_zero_more (n)); 01309 } 01310 break; 01311 case '+': 01312 { 01313 // 01314 // exp : exp '+' 01315 // 01316 if (top_and->nodes () == 0) 01317 { 01318 // Syntax error : '+' is not preceded by an expression 01319 delete or_root; 01320 return; 01321 } 01322 01323 cmt_regexp_node* n = top_and->pop (); 01324 top_and->push (new cmt_one_more (n)); 01325 } 01326 break; 01327 case '?': 01328 { 01329 // 01330 // exp : exp '?' 01331 // 01332 if (top_and->nodes () == 0) 01333 { 01334 // Syntax error : '?' is not preceded by an expression 01335 delete or_root; 01336 return; 01337 } 01338 01339 cmt_regexp_node* n = top_and->pop (); 01340 top_and->push (new cmt_zero_one (n)); 01341 } 01342 break; 01343 case '.': 01344 // 01345 // exp : '.' 01346 // 01347 top_and->push (new cmt_any_node ()); 01348 break; 01349 case '(': 01350 { 01351 // 01352 // exp : '(' exp ')' 01353 // 01354 if (top_and->parentheses ()) 01355 { 01356 // This should never happen. 01357 delete or_root; 01358 return; 01359 } 01360 01361 top_and->set_parentheses (true); 01362 01363 // 01364 // A new complete expression is started. 01365 // -> do as for top_and parsing. 01366 // 01367 01368 top_and = new cmt_and_node (new cmt_or_node (top_and)); 01369 } 01370 break; 01371 case ')': 01372 { 01373 // 01374 // exp : '(' exp ')' 01375 // 01376 01377 // top_and is the cmt_and_node into which new nodes are pushed. 01378 cmt_regexp_node_set* or_node = top_and->father (); 01379 if (or_node == 0) 01380 { 01381 // This should never happen : top_and should always be 01382 // at least an cmt_and_node hanging at an cmt_or_node 01383 delete or_root; 01384 return; 01385 } 01386 01387 // 01388 // The last cmt_and_node was empty, thus we had either '()' or '(...|)' 01389 // 01390 01391 if (top_and->nodes () == 0) 01392 { 01393 delete (or_node->pop ()); 01394 } 01395 else 01396 { 01397 top_and->reduce (); 01398 } 01399 01400 top_and = or_node->father (); 01401 01402 if (top_and == 0) 01403 { 01404 // Syntax error : too many ')' 01405 delete or_root; 01406 return; 01407 } 01408 01409 // 01410 // top_and is now the previous running cmt_and_node where the '(' 01411 // was originally met its top_and node contains the parenthesized 01412 // sub expression If this one is empty, (due to an empty '()' 01413 // expression) then it may simply be discarded. 01414 // 01415 01416 if (!top_and->parentheses ()) 01417 { 01418 // Syntax error : too many ')' 01419 delete or_root; 01420 return; 01421 } 01422 01423 top_and->set_parentheses (false); 01424 01425 cmt_regexp_node* unique = 0; 01426 if (or_node->nodes () == 1) 01427 { 01428 cmt_regexp_node_set* and_node = (cmt_regexp_node_set*) or_node->top (); 01429 if (and_node->nodes () == 1) 01430 { 01431 unique = and_node->pop (); 01432 delete (or_node->pop ()); 01433 } 01434 else if (and_node->nodes () == 0) 01435 { 01436 delete (or_node->pop ()); 01437 } 01438 } 01439 01440 if (or_node->nodes () == 0) delete (top_and->pop ()); 01441 if (unique != 0) top_and->push (unique); 01442 } 01443 01444 break; 01445 case '|': 01446 { 01447 // 01448 // exp : exp '|' exp 01449 // 01450 01451 cmt_regexp_node_set* or_node = top_and->father (); 01452 01453 top_and->reduce (); 01454 01455 // 01456 // or is the father cmt_or_node, which only contains cmt_and_nodes 01457 // 01458 01459 const cmt_regexp_node_set* and_node = (cmt_regexp_node_set*) or_node->top (); 01460 if (and_node->nodes () == 0) 01461 { 01462 // the previous node was empty. 01463 // we may discard it 01464 or_node->pop (); 01465 } 01466 01467 top_and = new cmt_and_node (or_node); 01468 } 01469 break; 01470 case '^': 01471 // 01472 // exp : '^' 01473 // 01474 top_and->push (new cmt_begin_node ()); 01475 break; 01476 case '$': 01477 // 01478 // exp : '$' 01479 // 01480 top_and->push (new cmt_end_node ()); 01481 break; 01482 case '\\': 01483 if (i >= expression.size ()) 01484 { 01485 delete or_root; 01486 return; 01487 } 01488 i++; 01489 c = expression[i]; 01490 switch (c) 01491 { 01492 case '[': 01493 case ']': 01494 case '(': 01495 case ')': 01496 case '.': 01497 case '*': 01498 case '?': 01499 case '^': 01500 case '$': 01501 case '\\': 01502 break; 01503 case 'r': 01504 c = '\r'; 01505 break; 01506 case 't': 01507 c = '\t'; 01508 break; 01509 case 'n': 01510 c = '\n'; 01511 break; 01512 default: 01513 break; 01514 } 01515 default: 01516 top_and->push (new cmt_char_node (c)); 01517 break; 01518 } 01519 } 01520 01521 if (or_root != 0) 01522 { 01523 cmt_regexp_node_set* and_node = (cmt_regexp_node_set*) or_root->top (); 01524 01525 if (or_root->nodes () == 1) 01526 { 01527 // 01528 // Check whether there is at least one non-empty 01529 // cmt_and_node 01530 // 01531 if (and_node->nodes () == 0) 01532 { 01533 delete or_root; 01534 return; 01535 } 01536 } 01537 01538 if (and_node != 0) 01539 { 01540 and_node->reduce (); 01541 01542 if (and_node->parentheses ()) 01543 { 01544 delete or_root; 01545 return; 01546 } 01547 } 01548 } 01549 01550 _root = or_root; 01551 01552 bool dbg = CmtSystem::testenv ("CMTTESTREGEXP"); 01553 01554 if (dbg) 01555 { 01556 if (_root != 0) 01557 { 01558 _root->dump (); 01559 } 01560 } 01561 } |
|
Definition at line 55 of file cmt_regexp.h. Referenced by begin(), cmt_regexp(), is_valid(), set(), and ~cmt_regexp(). |