Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

cmt_regexp Class Reference

#include <cmt_regexp.h>

Collaboration diagram for cmt_regexp:

Collaboration graph
[legend]
List of all members.

Public Member Functions

 cmt_regexp ()
 cmt_regexp (const cmt_string &expression)
 ~cmt_regexp ()
void set (const cmt_string &expression)
bool is_valid () const
iterator begin (const cmt_string &text, int pos=0)
iterator end ()
iterator begin (const cmt_string &text, int pos=0) const
iterator end () const
bool match (const cmt_string &text) const

Private Attributes

cmt_regexp_node_root

Constructor & Destructor Documentation

cmt_regexp::cmt_regexp  ) 
 

Definition at line 1156 of file cmt_regexp.cxx.

References _root.

01157 {
01158   _root = 0;
01159 }

cmt_regexp::cmt_regexp const cmt_string expression  ) 
 

Definition at line 1162 of file cmt_regexp.cxx.

References _root, and set().

01163 {
01164   _root = 0;
01165   set (expression);
01166 }

cmt_regexp::~cmt_regexp  ) 
 

Definition at line 1563 of file cmt_regexp.cxx.

References _root.

01564 {
01565   if (_root != 0)
01566     {
01567       delete _root;
01568     }
01569 }


Member Function Documentation

cmt_regexp::iterator cmt_regexp::begin const cmt_string text,
int  pos = 0
const
 

Definition at line 1598 of file cmt_regexp.cxx.

References _root, end(), cmt_regexp_node::match(), and cmt_string::size().

01599 {
01600   if (_root != 0)
01601     {
01602       int i;
01603       
01604       for (i = pos; i < text.size (); i++)
01605         {
01606           cmt_regexp::iterator it = _root->match (text, i);
01607           if (it != end ()) return (it);
01608         }
01609     }
01610   
01611   return (end ());
01612 }

cmt_regexp::iterator cmt_regexp::begin const cmt_string text,
int  pos = 0
 

Definition at line 1577 of file cmt_regexp.cxx.

References _root, end(), cmt_regexp_node::match(), and cmt_string::size().

Referenced by MacroBuilder::build(), SetBuilder::build(), match(), CmtModel::strict_expand(), and CmtModel::test_regexp().

01578 {
01579   if (_root != 0)
01580     {
01581       int i;
01582       
01583       for (i = pos; i < text.size (); i++)
01584         {
01585           cmt_regexp::iterator it = _root->match (text, i);
01586           if (it != end ()) return (it);
01587         }
01588     }
01589   
01590   return (end ());
01591 }

cmt_regexp::iterator cmt_regexp::end  )  const
 

Definition at line 1614 of file cmt_regexp.cxx.

References cmt_regexp::iterator::null().

01615 {
01616   return (cmt_regexp::iterator::null ());
01617 }

cmt_regexp::iterator cmt_regexp::end  ) 
 

Definition at line 1593 of file cmt_regexp.cxx.

References cmt_regexp::iterator::null().

Referenced by begin(), MacroBuilder::build(), SetBuilder::build(), match(), CmtModel::strict_expand(), and CmtModel::test_regexp().

01594 {
01595   return (cmt_regexp::iterator::null ());
01596 }

bool cmt_regexp::is_valid  )  const
 

Definition at line 1571 of file cmt_regexp.cxx.

References _root.

01572 {
01573   if (_root != 0) return (true);
01574   else return (false);
01575 }

bool cmt_regexp::match const cmt_string text  )  const
 

Definition at line 1619 of file cmt_regexp.cxx.

References begin(), and end().

Referenced by PathBuilder::build(), PathBuilder::clean(), Symbol::expand(), CvsImplementation::filter_list(), CmtGenerator::get_all_files(), CvsImplementation::match_version_request(), Parser::parse_line(), Cmt::print_macros(), Cmt::print_symbol_names(), PAwk::run(), and CmtSystem::scan_dir().

01620 {
01621   iterator it = begin (text);
01622   if (it == end ()) return (false);
01623   else return (true);
01624 }

void cmt_regexp::set const cmt_string expression  ) 
 

Definition at line 1169 of file cmt_regexp.cxx.

References _root, cmt_regexp_node::dump(), cmt_regexp_node_set::father(), cmt_regexp_node_set::nodes(), cmt_regexp_node_set::parentheses(), cmt_regexp_node_set::pop(), cmt_regexp_node_set::push(), cmt_regexp_node_set::reduce(), cmt_regexp_node_set::set_parentheses(), cmt_string::size(), CmtSystem::testenv(), and cmt_regexp_node_set::top().

Referenced by cmt_regexp(), and constituents_action_iterator::set().

01170 {
01171   if (_root != 0)
01172     {
01173       delete _root;
01174       _root = 0;
01175     }
01176 
01177     //
01178     // The root is the cmt_or_node which will be returned. It is
01179     // the top of the hierarchy.
01180     //
01181     //  top is the running cmt_and_node.
01182     //
01183   cmt_regexp_node_set* or_root = 0;
01184   cmt_regexp_node_set* top_and = 0;
01185   
01186     // abcd
01187     // ab|cd
01188     // a|b|cd
01189     // a|b*|cd
01190     // a|b*|cd?e
01191     //
01192     // exp     : and
01193     //         | exp '|' and
01194     //
01195     // and     : unary 
01196     //         | unary and
01197     //
01198     // unary   : primary '*'
01199     //         | primary '?'
01200     //
01201     // primary : '[' opt_begin opt_chars opt_end ']'
01202     //         | '^'
01203     //         | '$'
01204     //         | char
01205     //         | '(' exp ')'
01206     //
01207   
01208   {
01209       //
01210       // First we build an cmt_or_node (corresponding to the
01211       // first grammatical rule)
01212       //
01213       //  Then cmt_and_nodes are pushed into it.
01214       //  and standard nodes are pushed into the running (top_and) cmt_and_node
01215       //
01216     or_root = new cmt_or_node (0);
01217     top_and = new cmt_and_node (or_root);
01218   }
01219   
01220   int i;
01221   
01222   for (i = 0; i < expression.size (); i++)
01223     {
01224       char c = expression[i];
01225       switch (c)
01226         {
01227           case '[':
01228           {
01229               //
01230               // The case is 
01231               //
01232               //  exp   : '['     char ... ']'
01233               //  exp   : '[' '^' char ... ']'
01234               //
01235 
01236             if (i >= expression.size ()) 
01237               {
01238                   // syntax error : unbalanced '['
01239                 delete or_root;
01240                 return;
01241               }
01242             i++;
01243             
01244             int i0 = i;
01245             
01246             bool done = false;
01247             bool has_not = false;
01248             
01249             cmt_string choices = "";
01250             
01251             for (; i < expression.size (); i++)
01252               {
01253                 c = expression[i];
01254                 switch (c)
01255                   {
01256                     case ']':
01257                       done = true;
01258                       break;
01259                     case '^':
01260                       if (i == i0) has_not = true;
01261                       else choices += c;
01262                       break;
01263                     case '\\':
01264                       choices += c;
01265                       if (i >= expression.size ())
01266                         {
01267                             // syntax error : unbalanced '[' and unfinished
01268                             // escape sequence
01269                           delete or_root;
01270                           return;
01271                         }
01272                       i++;
01273                       c = expression[i];
01274                       choices += c;
01275                       break;
01276                     default:
01277                       choices += c;
01278                       break;
01279                   }
01280                 if (done) break;
01281               }
01282             
01283             if (!done)
01284               {
01285                   // syntax error : unbalanced '['
01286                 delete or_root;
01287                 return;
01288               }
01289             if (has_not)
01290               top_and->push (new cmt_not_char_list_node (choices));
01291             else        
01292               top_and->push (new cmt_char_list_node (choices));
01293           }
01294           break;
01295           case '*':
01296           {
01297               //
01298               //  exp : exp '*'
01299               //
01300             if (top_and->nodes () == 0)
01301               {
01302                   // Syntax error : '*' is not preceded by an expression
01303                 delete or_root;
01304                 return;
01305               }
01306             
01307             cmt_regexp_node* n = top_and->pop ();
01308             top_and->push (new cmt_zero_more (n));
01309           }
01310           break;
01311           case '+':
01312           {
01313               //
01314               //  exp : exp '+'
01315               //
01316             if (top_and->nodes () == 0)
01317               {
01318                   // Syntax error : '+' is not preceded by an expression
01319                 delete or_root;
01320                 return;
01321               }
01322             
01323             cmt_regexp_node* n = top_and->pop ();
01324             top_and->push (new cmt_one_more (n));
01325           }
01326           break;
01327           case '?':
01328           {
01329               //
01330               //  exp : exp '?'
01331               //
01332             if (top_and->nodes () == 0)
01333               {
01334                   // Syntax error : '?' is not preceded by an expression
01335                 delete or_root;
01336                 return;
01337               }
01338             
01339             cmt_regexp_node* n = top_and->pop ();
01340             top_and->push (new cmt_zero_one (n));
01341           }
01342           break;
01343           case '.':
01344               //
01345               //  exp : '.'
01346               //
01347             top_and->push (new cmt_any_node ());
01348             break;
01349           case '(':
01350           {
01351               //
01352               //  exp : '(' exp ')'
01353               //
01354             if (top_and->parentheses ())
01355               {
01356                   // This should never happen.
01357                 delete or_root;
01358                 return;
01359               }
01360             
01361             top_and->set_parentheses (true);
01362             
01363               //
01364               // A new complete expression is started.
01365               //  -> do as for top_and parsing.
01366               //
01367             
01368             top_and = new cmt_and_node (new cmt_or_node (top_and));
01369           }
01370           break;
01371           case ')':
01372           {
01373               //
01374               //  exp : '(' exp ')'
01375               //
01376             
01377               // top_and is the cmt_and_node into which new nodes are pushed.
01378             cmt_regexp_node_set* or_node = top_and->father ();
01379             if (or_node == 0) 
01380               {
01381                   // This should never happen : top_and should always be
01382                   // at least an cmt_and_node hanging at an cmt_or_node
01383                 delete or_root;
01384                 return;
01385               }
01386             
01387               //
01388               // The last cmt_and_node was empty, thus we had either '()' or '(...|)'
01389               //
01390             
01391             if (top_and->nodes () == 0) 
01392               {
01393                 delete (or_node->pop ());
01394               }
01395             else
01396               {
01397                 top_and->reduce ();
01398               }
01399             
01400             top_and = or_node->father ();
01401             
01402             if (top_and == 0)
01403               {
01404                   // Syntax error : too many ')'
01405                 delete or_root;
01406                 return;
01407               }
01408             
01409               //
01410               // top_and is now the previous running cmt_and_node where the '(' 
01411               // was originally met its top_and node contains the parenthesized 
01412               // sub expression  If this one is empty, (due to an empty '()' 
01413               // expression) then it may simply be discarded.
01414               //
01415             
01416             if (!top_and->parentheses ())
01417               {
01418                   // Syntax error : too many ')'
01419                 delete or_root;
01420                 return;
01421               }
01422             
01423             top_and->set_parentheses (false);
01424             
01425             cmt_regexp_node* unique = 0;
01426             if (or_node->nodes () == 1)
01427               {
01428                 cmt_regexp_node_set* and_node = (cmt_regexp_node_set*) or_node->top ();
01429                 if (and_node->nodes () == 1)
01430                   {
01431                     unique = and_node->pop ();
01432                     delete (or_node->pop ());
01433                   }
01434                 else if (and_node->nodes () == 0)
01435                   {
01436                     delete (or_node->pop ());
01437                   }
01438               }
01439             
01440             if (or_node->nodes () == 0) delete (top_and->pop ());
01441             if (unique != 0) top_and->push (unique);
01442           }
01443           
01444           break;
01445           case '|':
01446           {
01447               //
01448               //  exp : exp '|' exp
01449               //
01450 
01451             cmt_regexp_node_set* or_node = top_and->father ();
01452             
01453             top_and->reduce ();
01454             
01455               //
01456               // or is the father cmt_or_node, which only contains cmt_and_nodes
01457               //
01458             
01459             const cmt_regexp_node_set* and_node = (cmt_regexp_node_set*) or_node->top ();
01460             if (and_node->nodes () == 0)
01461               {
01462                   // the previous node was empty.
01463                   // we may discard it
01464                 or_node->pop ();
01465               }
01466             
01467             top_and = new cmt_and_node (or_node);
01468           }
01469           break;
01470           case '^':
01471               //
01472               //  exp : '^'
01473               //
01474             top_and->push (new cmt_begin_node ());
01475             break;
01476           case '$':
01477               //
01478               //  exp : '$'
01479               //
01480             top_and->push (new cmt_end_node ());
01481             break;
01482           case '\\':
01483             if (i >= expression.size ())
01484               {
01485                 delete or_root;
01486                 return;
01487               }
01488             i++;
01489             c = expression[i];
01490             switch (c)
01491               {
01492                 case '[':
01493                 case ']':
01494                 case '(':
01495                 case ')':
01496                 case '.':
01497                 case '*':
01498                 case '?':
01499                 case '^':
01500                 case '$':
01501                 case '\\':
01502                   break;
01503                 case 'r':
01504                   c = '\r';
01505                   break;
01506                 case 't':
01507                   c = '\t';
01508                   break;
01509                 case 'n':
01510                   c = '\n';
01511                   break;
01512                 default:
01513                   break;
01514               }
01515           default:
01516             top_and->push (new cmt_char_node (c));
01517             break;
01518         }
01519     }
01520   
01521   if (or_root != 0)
01522     {
01523       cmt_regexp_node_set* and_node = (cmt_regexp_node_set*) or_root->top ();
01524       
01525       if (or_root->nodes () == 1)
01526         {
01527             //
01528             // Check whether there is at least one non-empty
01529             // cmt_and_node
01530             //
01531           if (and_node->nodes () == 0)
01532             {
01533               delete or_root;
01534               return;
01535             }
01536         }
01537       
01538       if (and_node != 0)
01539         {
01540           and_node->reduce ();
01541           
01542           if (and_node->parentheses ())
01543             {
01544               delete or_root;
01545               return;
01546             }
01547         }
01548     }
01549   
01550   _root = or_root;
01551 
01552   bool dbg = CmtSystem::testenv ("CMTTESTREGEXP");
01553 
01554   if (dbg)
01555     {
01556       if (_root != 0)
01557         {
01558           _root->dump ();
01559         }
01560     }
01561 }


Member Data Documentation

cmt_regexp_node* cmt_regexp::_root [private]
 

Definition at line 55 of file cmt_regexp.h.

Referenced by begin(), cmt_regexp(), is_valid(), set(), and ~cmt_regexp().


The documentation for this class was generated from the following files:
Generated on Mon May 2 10:25:31 2005 for CMT by doxygen 1.3.5