ffead.server.doc
RegexUtil.cpp
1 /*
2  Copyright 2009-2012, Sumeet Chhetri
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 /*
17  * RegexUtil.cpp
18  *
19  * Created on: 14-Aug-2012
20  * Author: sumeetc
21  */
22 
23 #include "RegexUtil.h"
24 
25 map<string, regex_t> RegexUtil::patterns;
26 
27 RegexUtil::RegexUtil() {
28  // TODO Auto-generated constructor stub
29 
30 }
31 
32 RegexUtil::~RegexUtil() {
33  // TODO Auto-generated destructor stub
34 }
35 
36 vector<string> RegexUtil::search(const string& text, const string& pattern) {
37  vector<string> vec;
38  string ttext(text);
39  regex_t regex;
40  int reti;
41  if(patterns.find(pattern)!=patterns.end())
42  {
43  regex = patterns[pattern];
44  }
45  else
46  {
47  /* Compile regular expression */
48  reti = regcomp(&regex, pattern.c_str(), REG_EXTENDED);
49  if(reti)
50  {
51  cout << "Could not compile regex\n" << endl;
52  }
53  else
54  {
55  patterns[pattern] = regex;
56  }
57  }
58  regmatch_t pm;
59  reti = regexec(&regex, ttext.c_str(), 1, &pm, 0);
60  while (reti == 0) { /* while matches found */
61  /* substring found between pm.rm_so and pm.rm_eo */
62  /* This call to regexec() finds the next match */
63  if(!reti) {
64  string match;
65  match = ttext.substr(pm.rm_so, pm.rm_eo-pm.rm_so);
66  vec.push_back(match);
67  } else {
68  break;
69  }
70  ttext = ttext.substr(pm.rm_eo);
71  pm.rm_eo = -1;
72  pm.rm_so = -1;
73  reti = regexec (&regex, ttext.c_str(), 1, &pm, 0);
74  }
75  return vec;
76 }
77 
78 string RegexUtil::replace(const string& text, const string& pattern, const string& with) {
79  string ttext(text);
80  string rettxt;
81  regex_t regex;
82  int reti;
83  if(patterns.find(pattern)!=patterns.end())
84  {
85  regex = patterns[pattern];
86  }
87  else
88  {
89  /* Compile regular expression */
90  reti = regcomp(&regex, pattern.c_str(), REG_EXTENDED);
91  if(reti)
92  {
93  cout << "Could not compile regex\n" << endl;
94  }
95  else
96  {
97  patterns[pattern] = regex;
98  }
99  }
100  regmatch_t pm;
101  reti = regexec(&regex, ttext.c_str(), 1, &pm, 0);
102  while (reti == 0) { /* while matches found */
103  /* substring found between pm.rm_so and pm.rm_eo */
104  /* This call to regexec() finds the next match */
105  if(!reti) {
106  string match;
107  match = ttext.substr(pm.rm_so, pm.rm_eo-pm.rm_so);
108  rettxt += ttext.substr(0, pm.rm_so) + with;
109  } else {
110  rettxt += ttext;
111  break;
112  }
113  ttext = ttext.substr(pm.rm_eo);
114  pm.rm_eo = -1;
115  pm.rm_so = -1;
116  reti = regexec (&regex, ttext.c_str(), 1, &pm, 0);
117  }
118  if(ttext!="")rettxt += ttext;
119  return rettxt;
120 }