ffead.server.doc
XmlParser.cpp
1 /*
2  Copyright 2009-2012, Sumeet Chhetri
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 /*
17  * XmlParser.cpp
18  *
19  * Created on: Sep 11, 2009
20  * Author: sumeet
21  */
22 
23 #include "XmlParser.h"
24 
25 
26 XmlParser::XmlParser(string mode)
27 {
28  logger = Logger::getLogger("XmlParser");
29  this->mode = mode;
30 }
31 
32 XmlParser::~XmlParser() {
33  // TODO Auto-generated destructor stub
34 }
35 
36 Document XmlParser::getDocument(string xml)
37 {
38  Document doc;
39  ifstream infile(xml.c_str());
40  string temp;
41  if(infile.is_open())
42  {
43  xml = "";
44  while(getline(infile, temp))
45  {
46  if(temp.find("<?")==string::npos && temp.find("?>")==string::npos)
47  xml.append(temp+"\n");
48  }
49  }
50 
51  Element root;
52  StringUtil::trim(xml);
53  if(xml.find("<")==0 && xml.find(">")!=string::npos)
54  {
55  if(xml.find("<?")!=string::npos && xml.find("?>")!=string::npos)
56  {
57  xml = xml.substr(xml.find("?>")+2);
58  }
59  readXML(xml,"",&root);
60  }
61  /*else if(xml!="")
62  {
63  logger << "Invalid XML Document" << endl;
64  string errmsg = ("Invalid XML Document");
65  XmlParseException exception(errmsg);
66  throw exception;
67  }*/
68  doc.setRootElement(root);
69  return doc;
70 }
71 
72 void XmlParser::readXML(string xml,string parent,Element *par)
73 {
74  if(xml=="")
75  return;
76  StringUtil::trim(xml);
77  int cdt = xml.find("<![CDATA[");
78  if(cdt==0)
79  {
80  int ecdt = xml.find("]]>");
81  if(ecdt==(int)string::npos)
82  {
83  string errmsg = ("Incomplete CDATA tag\n");
84  XmlParseException exception(errmsg);
85  throw exception;
86  }
87  else
88  {
89  par->setCdata(true);
90  par->setText(xml.substr(cdt+9,ecdt-cdt-9));
91  return;
92  }
93  }
94  int cmt = xml.find("<!--");
95  if(cmt!=(int)string::npos)
96  {
97  int ecmt = xml.find("-->");
98  if(ecmt==(int)string::npos)
99  {
100  string errmsg = ("Incomplete Comment tag\n");
101  XmlParseException exception(errmsg);
102  throw exception;
103  }
104  else
105  {
106  string stx = xml.substr(0,cmt);
107  string enx = xml.substr(ecmt+3);
108  xml = stx + enx;
109  }
110  }
111  int st = xml.find("<")+1;
112  int ed = 0;
113  int ed1 = xml.find("/>");
114  int ed2 = xml.find(">");
115  if((ed2<ed1 || ed1==-1) && ed2!=-1)
116  ed = ed2;
117  else if(ed1!=-1)
118  {
119  /*if(xml[ed1+1]=='>')*/
120  ed = ed1;
121  /*else
122  ed = ed2;*/
123  }
124  string tag = xml.substr(st,ed-st);
125  int ss = tag.find_first_not_of(" ");
126  int se = tag.find_last_not_of(" ")+1;
127  tag = tag.substr(ss,se-ss);
128  Element element;
129 
130  //split the tag with a space to get all the attribute sets of the element
131  string ta;
132  if(tag.find_first_of(" ")!=string::npos)
133  {
134  ta = tag.substr(0,tag.find_first_of(" "));
135  tag = tag.substr(tag.find_first_of(" ")+1);
136  while(tag.find_first_of("=")!=string::npos && tag.find_first_of("\"")!=string::npos)
137  {
138  string atname = tag.substr(0,tag.find_first_of("="));
139  int as = atname.find_first_not_of(" ");
140  int ae = atname.find_last_not_of(" ")+1;
141  atname = atname.substr(as,ae-as);
142  int ds = tag.find_first_of("\"")+1;
143  tag = tag.substr(ds);
144  string atvalue = tag.substr(0,tag.find_first_of("\""));
145  tag = tag.substr(tag.find_first_of("\"")+1);
146  if(parent!="")
147  {
148  element.addAttribute(atname,atvalue);
149  }
150  else
151  {
152  par->addAttribute(atname,atvalue);
153  }
154  //logger << "attname = " << atname << " attvalue = " << atvalue << "\n" << flush;
155  }
156  }
157  else
158  ta = tag;
159  int initcheck = xml.find_first_of("<");
160  unsigned int someTag = (xml.substr(initcheck+1)).find("<");
161  int pndTag=0,endTag=0;
162  if(xml.find("</"+ta)!=string::npos)
163  pndTag = xml.find("</"+ta);
164  else if(xml.find("/>")!=string::npos && xml.find("/>")<someTag)
165  endTag = xml.find("/>");
166  if(xml.find("< ")!=string::npos)
167  {
168  string errmsg = ("Invalid Start Tag - at position: " + CastUtil::lexical_cast<string>((int)xml.find("< ")+1) + "\n");
169  XmlParseException exception(errmsg);
170  throw exception;
171  }
172  else if(xml.find("<\t")!=string::npos)
173  {
174  string errmsg = ("Invalid Start Tag - at position: " + CastUtil::lexical_cast<string>((int)xml.find("<\t")+1) + "\n");
175  XmlParseException exception(errmsg);
176  throw exception;
177  }
178  else if(xml.find("</ ")!=string::npos)
179  {
180  string errmsg = ("Invalid End Tag - at position: " + CastUtil::lexical_cast<string>((int)xml.find("</ ")+1) + "\n");
181  XmlParseException exception(errmsg);
182  throw exception;
183  }
184  else if(xml.find("</\t")!=string::npos)
185  {
186  string errmsg = ("Invalid End Tag - at position: " + CastUtil::lexical_cast<string>((int)xml.find("</\t")+1) + "\n");
187  XmlParseException exception(errmsg);
188  throw exception;
189  }
190  else if(xml.find("< /")!=string::npos)
191  {
192  string errmsg = ("Invalid End Tag - at position: " + CastUtil::lexical_cast<string>((int)xml.find("< /")+1) + "\n");
193  XmlParseException exception(errmsg);
194  throw exception;
195  }
196  else if(xml.find("/ >")!=string::npos)
197  {
198  string errmsg = ("Invalid End Tag - at position: " + CastUtil::lexical_cast<string>((int)xml.find("/ >")+1) + "\n");
199  XmlParseException exception(errmsg);
200  throw exception;
201  }
202  else if(xml.find("<\t/")!=string::npos)
203  {
204  string errmsg = ("Invalid End Tag - at position: " + CastUtil::lexical_cast<string>((int)xml.find("<\t/")+1) + "\n");
205  XmlParseException exception(errmsg);
206  throw exception;
207  }
208  else if(xml.find("<"+ta)==string::npos && xml.find("</"+ta)!=string::npos)
209  {
210  string errmsg = ("No Start Tag - for : " + ta + "\n");
211  XmlParseException exception(errmsg);
212  throw exception;
213  }
214  else if(xml.find("<"+ta)!=string::npos && pndTag==0 && endTag==0)
215  {
216  string errmsg = ("No End Tag - for : " + ta + "\n");
217  XmlParseException exception(errmsg);
218  throw exception;
219  }
220  if(xml.find("<"+ta)!=string::npos && (xml.find("</"+ta)!=string::npos || xml.find("/>")!=string::npos))
221  {
222  //logger << "tag = " << ta << flush;
223  //logger << " parent = " << parent << flush;
224  //logger << "\n" << flush;
225 
226  if(ed==ed1)
227  {
228  if(parent!="")
229  {
230  element.setTagName(ta);
231  par->addElement(element);
232  }
233  else
234  {
235  par->setTagName(ta);
236  }
237  xml = xml.substr(xml.find("/>")+2);
238  }
239  else if(xml.find("</"+ta)!=string::npos)
240  {
241  //split each set by = to get the pairs
242  string tagx = "</"+ta+">";
243  int end = xml.find("</"+ta+">");
244  string txml = xml.substr(ed+1,end-ed-1);
245  //logger << "temp = " << txml << flush;
246  //logger << "\n" << flush;
247  if(parent!="")
248  {
249  element.setTagName(ta);
250  if(txml.find("<")!=string::npos)
251  {
252  readXML(txml,ta,&element);
253  }
254  else
255  element.setText(txml);
256  par->addElement(element);
257  }
258  else
259  {
260  par->setTagName(ta);
261  if(txml.find("<")!=string::npos)
262  {
263  readXML(txml,ta,par);
264  }
265  else
266  par->setText(txml);
267  }
268  xml = xml.substr(end+tagx.length());
269  }
270  }
271  if(xml.find("<")!=string::npos && (xml.find("</")!=string::npos || xml.find("/>")!=string::npos))
272  {
273  //logger << "xml = " << xml << flush;
274  //logger << "\n" << flush;
275  readXML(xml,parent,par);
276  }
277  else if(xml.find("<")!=string::npos && (xml.find("</")==string::npos || xml.find("/>")==string::npos))
278  {
279  string errmsg = ("Invalid Start Tag\n");
280  XmlParseException exception(errmsg);
281  throw exception;
282  }
283  else if(xml.find("<")==string::npos && (xml.find("</")!=string::npos || xml.find("/>")!=string::npos))
284  {
285  string errmsg = ("Invalid End Tag\n");
286  XmlParseException exception(errmsg);
287  throw exception;
288  }
289 }
290