libdap Updated for version 3.18.1
D4ParserSax2.h
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2012 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26#ifndef d4_parser_sax2_h
27#define d4_parser_sax2_h
28
29#define ATTR 1
30
31#include <string.h>
32
33#include <string>
34#include <iostream>
35#include <map>
36#include <stack>
37
38#include <libxml/parserInternals.h>
39
40#define CRLF "\r\n"
41
42namespace libdap
43{
44
45class DMR;
46class BaseType;
47class D4BaseTypeFactory;
48class D4Group;
49class D4Attributes;
50class D4EnumDef;
51class D4Dimension;
52
77{
78private:
81 enum ParseState {
82 parser_start,
83
84 inside_dataset,
85
86 // inside_group is the state just after parsing the start of a Group
87 // element.
88 inside_group,
89
90 inside_attribute_container,
91 inside_attribute,
92 inside_attribute_value,
93 inside_other_xml_attribute,
94
95 inside_enum_def,
96 inside_enum_const,
97
98 inside_dim_def,
99
100 // This covers Byte, ..., Url, Opaque
101 inside_simple_type,
102
103 // inside_array,
104 inside_dim,
105 inside_map,
106
107 inside_constructor,
108
109 // inside_sequence, Removed from merged code jhrg 5/2/14
110
111 parser_unknown,
112 parser_error,
113 parser_fatal_error,
114
115 parser_end
116 };
117
118 xmlSAXHandler ddx_sax_parser;
119
120 // The results of the parse operation are stored in these fields.
121 // This is passed into the parser using the intern() methods.
122 DMR *d_dmr; // dump DMR here
123 DMR *dmr() const { return d_dmr; }
124
125 // These stacks hold the state of the parse as it progresses.
126 stack<ParseState> s; // Current parse state
127 void push_state(D4ParserSax2::ParseState state) { s.push(state); }
128 D4ParserSax2::ParseState get_state() const { return s.top(); }
129 void pop_state() { s.pop(); }
130 bool empty_state() const { return s.empty(); }
131
132 stack<BaseType*> btp_stack; // current variable(s)
133 void push_basetype(BaseType *btp) { btp_stack.push(btp); }
134 BaseType *top_basetype() const { return btp_stack.top(); }
135 void pop_basetype() { btp_stack.pop(); }
136 bool empty_basetype() const { return btp_stack.empty(); }
137
138 stack<D4Group*> grp_stack; // current groups(s)
139 void push_group(D4Group *grp) { grp_stack.push(grp); }
140 D4Group *top_group() const { return grp_stack.top(); }
141 void pop_group() { grp_stack.pop(); }
142 bool empty_group() const { return grp_stack.empty(); }
143
144 stack<D4Attributes*> d_attrs_stack; // DAP4 Attributes
145 void push_attributes(D4Attributes *attr) { d_attrs_stack.push(attr); }
146 D4Attributes *top_attributes() const { return d_attrs_stack.top(); }
147 void pop_attributes() { d_attrs_stack.pop(); }
148 bool empty_attributes() const { return d_attrs_stack.empty(); }
149
150 D4EnumDef *d_enum_def;
151 D4EnumDef *enum_def();
152 void clear_enum_def() { d_enum_def = 0; }
153
154 D4Dimension *d_dim_def;
155 D4Dimension *dim_def();
156 void clear_dim_def() { d_dim_def = 0; }
157
158 // Accumulate stuff inside an 'OtherXML' DAP attribute here
159 string other_xml;
160
161 // When we're parsing unknown XML, how deeply is it nested? This is used
162 // for the OtherXML DAP attributes.
163 unsigned int other_xml_depth;
164 unsigned int unknown_depth;
165
166 // These are used for processing errors.
167 string error_msg; // Error message(s), if any.
168 xmlParserCtxtPtr context; // used for error message line numbers
169
170 // These hold temporary values read during the parse.
171 string dods_attr_name; // DAP4 attributes, not XML attributes
172 string dods_attr_type; // ... not XML ...
173 string char_data; // char data in value elements; null after use
174 string root_ns; // What is the namespace of the root node (Group)
175
176 bool d_debug;
177 bool debug() const { return d_debug; }
178
179 bool d_strict;
180
181 class XMLAttribute {
182 public:
183 string prefix;
184 string nsURI;
185 string value;
186
187 void clone(const XMLAttribute &src) {
188 prefix = src.prefix;
189 nsURI = src.nsURI;
190 value = src.value;
191 }
192
193 XMLAttribute() : prefix(""), nsURI(""), value("") {}
194 XMLAttribute(const string &p, const string &ns, const string &v)
195 : prefix(p), nsURI(ns), value(v) {}
196 // 'attributes' as passed from libxml2 is a five element array but this
197 // ctor gets the back four elements.
198 XMLAttribute(const xmlChar **attributes/*[4]*/) {
199 prefix = attributes[0] != 0 ? (const char *)attributes[0]: "";
200 nsURI = attributes[1] != 0 ? (const char *)attributes[1]: "";
201 value = string((const char *)attributes[2], (const char *)attributes[3]);
202 }
203 XMLAttribute(const XMLAttribute &rhs) {
204 clone(rhs);
205 }
206 XMLAttribute &operator=(const XMLAttribute &rhs) {
207 if (this == &rhs)
208 return *this;
209 clone(rhs);
210 return *this;
211 }
212 };
213
214 typedef map<string, XMLAttribute> XMLAttrMap;
215 XMLAttrMap xml_attrs; // dump XML attributes here
216
217 XMLAttrMap::iterator xml_attr_begin() { return xml_attrs.begin(); }
218
219 XMLAttrMap::iterator xml_attr_end() { return xml_attrs.end(); }
220
221 map<string, string> namespace_table;
222
223 void cleanup_parse();
224
231 void transfer_xml_attrs(const xmlChar **attrs, int nb_attributes);
232 void transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces);
233 bool check_required_attribute(const string &attr);
234 bool check_attribute(const string & attr);
235 void process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes);
236
237 void process_enum_const_helper(const xmlChar **attrs, int nb_attributes);
238 void process_enum_def_helper(const xmlChar **attrs, int nb_attributes);
239
240 bool process_dimension(const char *name, const xmlChar **attrs, int nb_attrs);
241 bool process_dimension_def(const char *name, const xmlChar **attrs, int nb_attrs);
242 bool process_map(const char *name, const xmlChar **attrs, int nb_attributes);
243 bool process_attribute(const char *name, const xmlChar **attrs, int nb_attributes);
244 bool process_variable(const char *name, const xmlChar **attrs, int nb_attributes);
245 bool process_group(const char *name, const xmlChar **attrs, int nb_attributes);
246 bool process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes);
247 bool process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes);
248
249 void finish_variable(const char *tag, Type t, const char *expected);
251
252 friend class D4ParserSax2Test;
253
254public:
255 D4ParserSax2() :
256 d_dmr(0), d_enum_def(0), d_dim_def(0),
257 other_xml(""), other_xml_depth(0), unknown_depth(0),
258 error_msg(""), context(0),
259 dods_attr_name(""), dods_attr_type(""),
260 char_data(""), root_ns(""), d_debug(false), d_strict(true)
261 {
262 //xmlSAXHandler ddx_sax_parser;
263 memset(&ddx_sax_parser, 0, sizeof(xmlSAXHandler));
264
265 ddx_sax_parser.getEntity = &D4ParserSax2::dmr_get_entity;
266 ddx_sax_parser.startDocument = &D4ParserSax2::dmr_start_document;
267 ddx_sax_parser.endDocument = &D4ParserSax2::dmr_end_document;
268 ddx_sax_parser.characters = &D4ParserSax2::dmr_get_characters;
269 ddx_sax_parser.ignorableWhitespace = &D4ParserSax2::dmr_ignoreable_whitespace;
270 ddx_sax_parser.cdataBlock = &D4ParserSax2::dmr_get_cdata;
271 ddx_sax_parser.warning = &D4ParserSax2::dmr_error;
272 ddx_sax_parser.error = &D4ParserSax2::dmr_error;
273 ddx_sax_parser.fatalError = &D4ParserSax2::dmr_fatal_error;
274 ddx_sax_parser.initialized = XML_SAX2_MAGIC;
275 ddx_sax_parser.startElementNs = &D4ParserSax2::dmr_start_element;
276 ddx_sax_parser.endElementNs = &D4ParserSax2::dmr_end_element;
277 }
278
279 void intern(istream &f, DMR *dest_dmr, bool debug = false);
280 void intern(const string &document, DMR *dest_dmr, bool debug = false);
281 void intern(const char *buffer, int size, DMR *dest_dmr, bool debug = false);
282
295 void set_strict(bool s) { d_strict = s; }
299 bool get_strict() const { return d_strict; }
302 static void dmr_start_document(void *parser);
303 static void dmr_end_document(void *parser);
304
305 static void dmr_start_element(void *parser,
306 const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
307 int nb_namespaces, const xmlChar **namespaces, int nb_attributes,
308 int nb_defaulted, const xmlChar **attributes);
309 static void dmr_end_element(void *parser, const xmlChar *localname,
310 const xmlChar *prefix, const xmlChar *URI);
311
312 static void dmr_get_characters(void *parser, const xmlChar *ch, int len);
313 static void dmr_ignoreable_whitespace(void *parser,
314 const xmlChar * ch, int len);
315 static void dmr_get_cdata(void *parser, const xmlChar *value, int len);
316
317 static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name);
318 static void dmr_fatal_error(void *parser, const char *msg, ...);
319 static void dmr_error(void *parser, const char *msg, ...);
320};
321
322} // namespace libdap
323
324#endif // d4_parser_sax2_h
The basic data type for the DODS DAP types.
Definition: BaseType.h:118
bool get_strict() const
Get the setting of the 'strict' mode.
Definition: D4ParserSax2.h:299
static void dmr_start_document(void *parser)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
static void dmr_end_document(void *parser)
void set_strict(bool s)
Set the 'strict' mode to true or false.
Definition: D4ParserSax2.h:295
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_fatal_error(void *parser, const char *msg,...)
Type
Identifies the data type.
Definition: Type.h:94