libdap Updated for version 3.18.1
D4ParserSax2.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of libdap, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2012 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24
25#include "config.h"
26
27//#define DODS_DEBUG 1
28
29#include <iostream>
30#include <sstream>
31
32#include <cstring>
33#include <cstdarg>
34#include <cassert>
35
36#include <libxml/parserInternals.h>
37
38#include "DMR.h"
39
40#include "BaseType.h"
41#include "Array.h"
42#include "D4Group.h"
43#include "D4Attributes.h"
44#include "D4Maps.h"
45#include "D4Enum.h"
46
47#include "D4BaseTypeFactory.h"
48
49#include "D4ParserSax2.h"
50
51#include "util.h"
52#include "debug.h"
53
54namespace libdap {
55
56static const char *states[] = {
57 "parser_start",
58
59 "inside_dataset",
60
61 // inside_group is the state just after parsing the start of a Group
62 // element.
63 "inside_group",
64
65 "inside_attribute_container",
66 "inside_attribute",
67 "inside_attribute_value",
68 "inside_other_xml_attribute",
69
70 "inside_enum_def",
71 "inside_enum_const",
72
73 "inside_dim_def",
74
75 // This covers Byte, ..., Url, Opaque
76 "inside_simple_type",
77
78 // "inside_array",
79 "inside_dim",
80 "inside_map",
81
82 "inside_constructor",
83
84 "parser_unknown",
85 "parser_error",
86 "parser_fatal_error",
87
88 "parser_end"
89};
90
91static bool is_not(const char *name, const char *tag)
92{
93 return strcmp(name, tag) != 0;
94}
95
104D4EnumDef *
105D4ParserSax2::enum_def()
106{
107 if (!d_enum_def) d_enum_def = new D4EnumDef;
108
109 return d_enum_def;
110}
111
118D4Dimension *
119D4ParserSax2::dim_def() {
120 if (!d_dim_def) d_dim_def = new D4Dimension;
121
122 return d_dim_def;
123}
124
130void D4ParserSax2::transfer_xml_attrs(const xmlChar **attributes, int nb_attributes)
131{
132 if (!xml_attrs.empty())
133 xml_attrs.clear(); // erase old attributes
134
135 // Make a value using the attribute name and the prefix, namespace URI
136 // and the value. The prefix might be null.
137 unsigned int index = 0;
138 for (int i = 0; i < nb_attributes; ++i, index += 5) {
139 xml_attrs.insert(map<string, XMLAttribute>::value_type(string((const char *)attributes[index]),
140 XMLAttribute(attributes + index + 1)));
141
142 DBG(cerr << "XML Attribute '" << (const char *)attributes[index] << "': "
143 << xml_attrs[(const char *)attributes[index]].value << endl);
144 }
145}
146
153void D4ParserSax2::transfer_xml_ns(const xmlChar **namespaces, int nb_namespaces)
154{
155 // make a value with the prefix and namespace URI. The prefix might be null.
156 for (int i = 0; i < nb_namespaces; ++i) {
157 namespace_table.insert(map<string, string>::value_type(namespaces[i * 2] != 0 ? (const char *)namespaces[i * 2] : "",
158 (const char *)namespaces[i * 2 + 1]));
159 }
160}
161
168bool D4ParserSax2::check_required_attribute(const string & attr)
169{
170 if (xml_attrs.find(attr) == xml_attrs.end()) {
171 dmr_error(this, "Required attribute '%s' not found.", attr.c_str());
172 return false;
173 }
174 else
175 return true;
176}
177
184bool D4ParserSax2::check_attribute(const string & attr)
185{
186 return (xml_attrs.find(attr) != xml_attrs.end());
187}
188
189bool D4ParserSax2::process_dimension_def(const char *name, const xmlChar **attrs, int nb_attributes)
190{
191 if (is_not(name, "Dimension"))
192 return false;
193
194 transfer_xml_attrs(attrs, nb_attributes);
195
196 if (!(check_required_attribute("name") && check_required_attribute("size"))) {
197 dmr_error(this, "The required attribute 'name' or 'size' was missing from a Dimension element.");
198 return false;
199 }
200
201 // This getter (dim_def) allocates a new object if needed.
202 dim_def()->set_name(xml_attrs["name"].value);
203 try {
204 dim_def()->set_size(xml_attrs["size"].value);
205 }
206 catch (Error &e) {
207 dmr_error(this, e.get_error_message().c_str());
208 return false;
209 }
210
211 return true;
212}
213
231bool D4ParserSax2::process_dimension(const char *name, const xmlChar **attrs, int nb_attributes)
232{
233 if (is_not(name, "Dim"))
234 return false;
235
236 transfer_xml_attrs(attrs, nb_attributes);
237
238 if (check_attribute("size") && check_attribute("name")) {
239 dmr_error(this, "Only one of 'size' and 'name' are allowed in a Dim element, but both were used.");
240 return false;
241 }
242 if (!(check_attribute("size") || check_attribute("name"))) {
243 dmr_error(this, "Either 'size' or 'name' must be used in a Dim element.");
244 return false;
245 }
246
247 if (!top_basetype()->is_vector_type()) {
248 // Make the top BaseType* an array
249 BaseType *b = top_basetype();
250 pop_basetype();
251
252 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
253 a->set_is_dap4(true);
254 a->add_var_nocopy(b);
255 a->set_attributes_nocopy(b->attributes());
256 // trick: instead of popping b's attributes, copying them and then pushing
257 // a's copy, just move the pointer (but make sure there's only one object that
258 // references that pointer).
259 b->set_attributes_nocopy(0);
260
261 push_basetype(a);
262 }
263
264 assert(top_basetype()->is_vector_type());
265
266 Array *a = static_cast<Array*>(top_basetype());
267 if (check_attribute("size")) {
268 a->append_dim(atoi(xml_attrs["size"].value.c_str())); // low budget code for now. jhrg 8/20/13
269 return true;
270 }
271 else if (check_attribute("name")) {
272 string name = xml_attrs["name"].value;
273
274 D4Dimension *dim = 0;
275 if (name[0] == '/') // lookup the Dimension in the root group
276 dim = dmr()->root()->find_dim(name);
277 else // get enclosing Group and lookup Dimension there
278 dim = top_group()->find_dim(name);
279
280 if (!dim)
281 throw Error("The dimension '" + name + "' was not found while parsing the variable '" + a->name() + "'.");
282 a->append_dim(dim);
283 return true;
284 }
285
286 return false;
287}
288
289bool D4ParserSax2::process_map(const char *name, const xmlChar **attrs, int nb_attributes)
290{
291 if (is_not(name, "Map"))
292 return false;
293
294 transfer_xml_attrs(attrs, nb_attributes);
295
296 if (!check_attribute("name")) {
297 dmr_error(this, "The 'name' attribute must be used in a Map element.");
298 return false;
299 }
300
301 if (!top_basetype()->is_vector_type()) {
302 // Make the top BaseType* an array
303 BaseType *b = top_basetype();
304 pop_basetype();
305
306 Array *a = static_cast<Array*>(dmr()->factory()->NewVariable(dods_array_c, b->name()));
307 a->set_is_dap4(true);
308 a->add_var_nocopy(b);
309 a->set_attributes_nocopy(b->attributes());
310 // trick: instead of popping b's attributes, copying them and then pushing
311 // a's copy, just move the pointer (but make sure there's only one object that
312 // references that pointer).
313 b->set_attributes_nocopy(0);
314
315 push_basetype(a);
316 }
317
318 assert(top_basetype()->is_vector_type());
319
320 Array *a = static_cast<Array*>(top_basetype());
321
322 string map_name = xml_attrs["name"].value;
323 if (xml_attrs["name"].value[0] != '/')
324 map_name = top_group()->FQN() + map_name;
325
326 Array *map_source = 0; // The array variable that holds the data for the Map
327
328 if (map_name[0] == '/') // lookup the Map in the root group
329 map_source = dmr()->root()->find_map_source(map_name);
330 else // get enclosing Group and lookup Map there
331 map_source = top_group()->find_map_source(map_name);
332
333 // Change: If the parser is in 'strict' mode (the default) and the Array named by
334 // the Map cannot be fond, it is an error. If 'strict' mode is false (permissive
335 // mode), then this is not an error. However, the Array referenced by the Map will
336 // be null. This is a change in the parser's behavior to accommodate requests for
337 // Arrays that include Maps that do not also include the Map(s) in the request.
338 // See https://opendap.atlassian.net/browse/HYRAX-98. jhrg 4/13/16
339 if (!map_source && d_strict)
340 throw Error("The Map '" + map_name + "' was not found while parsing the variable '" + a->name() + "'.");
341
342 a->maps()->add_map(new D4Map(map_name, map_source));
343
344 return true;
345}
346
347bool D4ParserSax2::process_group(const char *name, const xmlChar **attrs, int nb_attributes)
348{
349 if (is_not(name, "Group"))
350 return false;
351
352 transfer_xml_attrs(attrs, nb_attributes);
353
354 if (!check_required_attribute("name")) {
355 dmr_error(this, "The required attribute 'name' was missing from a Group element.");
356 return false;
357 }
358
359 BaseType *btp = dmr()->factory()->NewVariable(dods_group_c, xml_attrs["name"].value);
360 if (!btp) {
361 dmr_fatal_error(this, "Could not instantiate the Group '%s'.", xml_attrs["name"].value.c_str());
362 return false;
363 }
364
365 D4Group *grp = static_cast<D4Group*>(btp);
366
367 // Need to set this to get the D4Attribute behavior in the type classes
368 // shared between DAP2 and DAP4. jhrg 4/18/13
369 grp->set_is_dap4(true);
370
371 // link it up and change the current group
372 D4Group *parent = top_group();
373 if (!parent) {
374 dmr_fatal_error(this, "No Group on the Group stack.");
375 return false;
376 }
377
378 grp->set_parent(parent);
379 parent->add_group_nocopy(grp);
380
381 push_group(grp);
382 push_attributes(grp->attributes());
383 return true;
384}
385
392inline bool D4ParserSax2::process_attribute(const char *name, const xmlChar **attrs, int nb_attributes)
393{
394 if (is_not(name, "Attribute"))
395 return false;
396
397 // These methods set the state to parser_error if a problem is found.
398 transfer_xml_attrs(attrs, nb_attributes);
399
400 // add error
401 if (!(check_required_attribute(string("name")) && check_required_attribute(string("type")))) {
402 dmr_error(this, "The required attribute 'name' or 'type' was missing from an Attribute element.");
403 return false;
404 }
405
406 if (xml_attrs["type"].value == "Container") {
407 push_state(inside_attribute_container);
408
409 DBG(cerr << "Pushing attribute container " << xml_attrs["name"].value << endl);
410 D4Attribute *child = new D4Attribute(xml_attrs["name"].value, attr_container_c);
411
412 D4Attributes *tos = top_attributes();
413 // add return
414 if (!tos) {
415 delete child;
416 dmr_fatal_error(this, "Expected an Attribute container on the top of the attribute stack.");
417 return false;
418 }
419
420 tos->add_attribute_nocopy(child);
421 push_attributes(child->attributes());
422 }
423 else if (xml_attrs["type"].value == "OtherXML") {
424 push_state(inside_other_xml_attribute);
425
426 dods_attr_name = xml_attrs["name"].value;
427 dods_attr_type = xml_attrs["type"].value;
428 }
429 else {
430 push_state(inside_attribute);
431
432 dods_attr_name = xml_attrs["name"].value;
433 dods_attr_type = xml_attrs["type"].value;
434 }
435
436 return true;
437}
438
444inline bool D4ParserSax2::process_enum_def(const char *name, const xmlChar **attrs, int nb_attributes)
445{
446 if (is_not(name, "Enumeration"))
447 return false;
448
449 transfer_xml_attrs(attrs, nb_attributes);
450
451 if (!(check_required_attribute("name") && check_required_attribute("basetype"))) {
452 dmr_error(this, "The required attribute 'name' or 'basetype' was missing from an Enumeration element.");
453 return false;
454 }
455
456 Type t = get_type(xml_attrs["basetype"].value.c_str());
457 if (!is_integer_type(t)) {
458 dmr_error(this, "The Enumeration '%s' must have an integer type, instead the type '%s' was used.",
459 xml_attrs["name"].value.c_str(), xml_attrs["basetype"].value.c_str());
460 return false;
461 }
462
463 // This getter allocates a new object if needed.
464 string enum_def_path = xml_attrs["name"].value;
465#if 0
466 // Use FQNs when things are referenced, not when they are defined
467 if (xml_attrs["name"].value[0] != '/')
468 enum_def_path = top_group()->FQN() + enum_def_path;
469#endif
470 enum_def()->set_name(enum_def_path);
471 enum_def()->set_type(t);
472
473 return true;
474}
475
476inline bool D4ParserSax2::process_enum_const(const char *name, const xmlChar **attrs, int nb_attributes)
477{
478 if (is_not(name, "EnumConst"))
479 return false;
480
481 // These methods set the state to parser_error if a problem is found.
482 transfer_xml_attrs(attrs, nb_attributes);
483
484 if (!(check_required_attribute("name") && check_required_attribute("value"))) {
485 dmr_error(this, "The required attribute 'name' or 'value' was missing from an EnumConst element.");
486 return false;
487 }
488
489 istringstream iss(xml_attrs["value"].value);
490 long long value = 0;
491 iss >> skipws >> value;
492 if (iss.fail() || iss.bad()) {
493 dmr_error(this, "Expected an integer value for an Enumeration constant, got '%s' instead.",
494 xml_attrs["value"].value.c_str());
495 }
496 else if (!enum_def()->is_valid_enum_value(value)) {
497 dmr_error(this, "In an Enumeration constant, the value '%s' cannot fit in a variable of type '%s'.",
498 xml_attrs["value"].value.c_str(), D4type_name(d_enum_def->type()).c_str());
499 }
500 else {
501 // unfortunate choice of names... args are 'label' and 'value'
502 enum_def()->add_value(xml_attrs["name"].value, value);
503 }
504
505 return true;
506}
507
513inline bool D4ParserSax2::process_variable(const char *name, const xmlChar **attrs, int nb_attributes)
514{
515 Type t = get_type(name);
516 if (is_simple_type(t)) {
517 process_variable_helper(t, inside_simple_type, attrs, nb_attributes);
518 return true;
519 }
520 else {
521 switch(t) {
522 case dods_structure_c:
523 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
524 return true;
525
526 case dods_sequence_c:
527 process_variable_helper(t, inside_constructor, attrs, nb_attributes);
528 return true;
529
530 default:
531 return false;
532 }
533 }
534}
535
543void D4ParserSax2::process_variable_helper(Type t, ParseState s, const xmlChar **attrs, int nb_attributes)
544{
545 transfer_xml_attrs(attrs, nb_attributes);
546
547 if (check_required_attribute("name")) {
548 BaseType *btp = dmr()->factory()->NewVariable(t, xml_attrs["name"].value);
549 if (!btp) {
550 dmr_fatal_error(this, "Could not instantiate the variable '%s'.", xml_attrs["name"].value.c_str());
551 return;
552 }
553
554 if ((t == dods_enum_c) && check_required_attribute("enum")) {
555 D4EnumDef *enum_def = 0;
556 string enum_path = xml_attrs["enum"].value;
557 if (enum_path[0] == '/')
558 enum_def = dmr()->root()->find_enum_def(enum_path);
559 else
560 enum_def = top_group()->find_enum_def(enum_path);
561
562 if (!enum_def)
563 dmr_fatal_error(this, "Could not find the Enumeration definition '%s'.", enum_path.c_str());
564
565 static_cast<D4Enum*>(btp)->set_enumeration(enum_def);
566 }
567
568 btp->set_is_dap4(true); // see comment above
569 push_basetype(btp);
570
571 push_attributes(btp->attributes());
572
573 push_state(s);
574 }
575}
576
583
589{
590 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
591 parser->error_msg = "";
592 parser->char_data = "";
593
594 // Set this in intern_helper so that the loop test for the parser_end
595 // state works for the first iteration. It seems like XMLParseChunk calls this
596 // function on it's first run. jhrg 9/16/13
597 // parser->push_state(parser_start);
598
599 parser->push_attributes(parser->dmr()->root()->attributes());
600
601 if (parser->debug()) cerr << "Parser start state: " << states[parser->get_state()] << endl;
602}
603
607{
608 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
609
610 if (parser->debug()) cerr << "Parser end state: " << states[parser->get_state()] << endl;
611
612 if (parser->get_state() != parser_end)
613 D4ParserSax2::dmr_error(parser, "The document contained unbalanced tags.");
614
615 // If we've found any sort of error, don't make the DMR; intern() will
616 // take care of the error.
617 if (parser->get_state() == parser_error || parser->get_state() == parser_fatal_error)
618 return;
619
620 if (!parser->empty_basetype() || parser->empty_group())
621 D4ParserSax2::dmr_error(parser, "The document did not contain a valid root Group or contained unbalanced tags.");
622
623 parser->pop_group(); // leave the stack 'clean'
624 parser->pop_attributes();
625}
626
627void D4ParserSax2::dmr_start_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI,
628 int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /*nb_defaulted*/,
629 const xmlChar **attributes)
630{
631 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
632 const char *localname = (const char *) l;
633
634 if (parser->debug()) cerr << "Start element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
635
636 switch (parser->get_state()) {
637 case parser_start:
638 if (is_not(localname, "Dataset"))
639 D4ParserSax2::dmr_error(parser, "Expected DMR to start with a Dataset element; found '%s' instead.", localname);
640
641 parser->root_ns = URI ? (const char *) URI : "";
642 parser->transfer_xml_attrs(attributes, nb_attributes);
643
644 if (parser->check_required_attribute(string("name")))
645 parser->dmr()->set_name(parser->xml_attrs["name"].value);
646
647 if (parser->check_attribute("dapVersion"))
648 parser->dmr()->set_dap_version(parser->xml_attrs["dapVersion"].value);
649
650 if (parser->check_attribute("dmrVersion"))
651 parser->dmr()->set_dmr_version(parser->xml_attrs["dmrVersion"].value);
652
653 if (parser->check_attribute("base"))
654 parser->dmr()->set_request_xml_base(parser->xml_attrs["base"].value);
655
656 if (!parser->root_ns.empty())
657 parser->dmr()->set_namespace(parser->root_ns);
658
659 // Push the root Group on the stack
660 parser->push_group(parser->dmr()->root());
661
662 parser->push_state(inside_dataset);
663
664 break;
665
666 // Both inside dataset and inside group can have the same stuff.
667 // The difference is that the Dataset holds the root group, which
668 // must be present; other groups are optional
669 case inside_dataset:
670 case inside_group:
671 if (parser->process_enum_def(localname, attributes, nb_attributes))
672 parser->push_state(inside_enum_def);
673 else if (parser->process_dimension_def(localname, attributes, nb_attributes))
674 parser->push_state(inside_dim_def);
675 else if (parser->process_group(localname, attributes, nb_attributes))
676 parser->push_state(inside_group);
677 else if (parser->process_variable(localname, attributes, nb_attributes))
678 // This will push either inside_simple_type or inside_structure
679 // onto the parser state stack.
680 break;
681 else if (parser->process_attribute(localname, attributes, nb_attributes))
682 // This will push either inside_attribute, inside_attribute_container
683 // or inside_otherxml_attribute onto the parser state stack
684 break;
685 else
686 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Enumeration, Dimension, Group or variable element; found '%s' instead.", localname);
687 break;
688
689 case inside_attribute_container:
690 if (parser->process_attribute(localname, attributes, nb_attributes))
691 break;
692 else
693 D4ParserSax2::dmr_error(parser, "Expected an Attribute element; found '%s' instead.", localname);
694 break;
695
696 case inside_attribute:
697 if (parser->process_attribute(localname, attributes, nb_attributes))
698 break;
699 else if (strcmp(localname, "Value") == 0)
700 parser->push_state(inside_attribute_value);
701 else
702 dmr_error(parser, "Expected an 'Attribute' or 'Value' element; found '%s' instead.", localname);
703 break;
704
705 case inside_attribute_value:
706 // Attribute values are processed by the end element code.
707 break;
708
709 case inside_other_xml_attribute:
710 parser->other_xml_depth++;
711
712 // Accumulate the elements here
713 parser->other_xml.append("<");
714 if (prefix) {
715 parser->other_xml.append((const char *) prefix);
716 parser->other_xml.append(":");
717 }
718 parser->other_xml.append(localname);
719
720 if (nb_namespaces != 0) {
721 parser->transfer_xml_ns(namespaces, nb_namespaces);
722
723 for (map<string, string>::iterator i = parser->namespace_table.begin();
724 i != parser->namespace_table.end(); ++i) {
725 parser->other_xml.append(" xmlns");
726 if (!i->first.empty()) {
727 parser->other_xml.append(":");
728 parser->other_xml.append(i->first);
729 }
730 parser->other_xml.append("=\"");
731 parser->other_xml.append(i->second);
732 parser->other_xml.append("\"");
733 }
734 }
735
736 if (nb_attributes != 0) {
737 parser->transfer_xml_attrs(attributes, nb_attributes);
738 for (XMLAttrMap::iterator i = parser->xml_attr_begin(); i != parser->xml_attr_end(); ++i) {
739 parser->other_xml.append(" ");
740 if (!i->second.prefix.empty()) {
741 parser->other_xml.append(i->second.prefix);
742 parser->other_xml.append(":");
743 }
744 parser->other_xml.append(i->first);
745 parser->other_xml.append("=\"");
746 parser->other_xml.append(i->second.value);
747 parser->other_xml.append("\"");
748 }
749 }
750
751 parser->other_xml.append(">");
752 break;
753
754 case inside_enum_def:
755 // process an EnumConst element
756 if (parser->process_enum_const(localname, attributes, nb_attributes))
757 parser->push_state(inside_enum_const);
758 else
759 dmr_error(parser, "Expected an 'EnumConst' element; found '%s' instead.", localname);
760 break;
761
762 case inside_enum_const:
763 // No content; nothing to do
764 break;
765
766 case inside_dim_def:
767 // No content; nothing to do
768 break;
769#if 0
770 case inside_dimension:
771 // No content.
772 break;
773#endif
774 case inside_dim:
775 // No content.
776 break;
777
778 case inside_map:
779 // No content.
780 break;
781
782 case inside_simple_type:
783 if (parser->process_attribute(localname, attributes, nb_attributes))
784 break;
785 else if (parser->process_dimension(localname, attributes, nb_attributes))
786 parser->push_state(inside_dim);
787 else if (parser->process_map(localname, attributes, nb_attributes))
788 parser->push_state(inside_map);
789 else
790 dmr_error(parser, "Expected an 'Attribute', 'Dim' or 'Map' element; found '%s' instead.", localname);
791 break;
792
793 case inside_constructor:
794 if (parser->process_variable(localname, attributes, nb_attributes))
795 // This will push either inside_simple_type or inside_structure
796 // onto the parser state stack.
797 break;
798 else if (parser->process_attribute(localname, attributes, nb_attributes))
799 break;
800 else if (parser->process_dimension(localname, attributes, nb_attributes))
801 parser->push_state(inside_dim);
802 else if (parser->process_map(localname, attributes, nb_attributes))
803 parser->push_state(inside_map);
804 else
805 D4ParserSax2::dmr_error(parser, "Expected an Attribute, Dim, Map or variable element; found '%s' instead.", localname);
806 break;
807
808 case parser_unknown:
809 // FIXME?
810 // *** Never used? If so remove/error
811 parser->push_state(parser_unknown);
812 break;
813
814 case parser_error:
815 case parser_fatal_error:
816 break;
817
818 case parser_end:
819 // FIXME Error?
820 break;
821 }
822
823 if (parser->debug()) cerr << "Start element exit state: " << states[parser->get_state()] << endl;
824}
825
826void D4ParserSax2::dmr_end_element(void *p, const xmlChar *l, const xmlChar *prefix, const xmlChar *URI)
827{
828 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
829 const char *localname = (const char *) l;
830
831 if (parser->debug())
832 cerr << "End element " << localname << " (state " << states[parser->get_state()] << ")" << endl;
833
834 switch (parser->get_state()) {
835 case parser_start:
836 dmr_fatal_error(parser, "Unexpected state, inside start state while processing element '%s'.", localname);
837 break;
838
839 case inside_dataset:
840 if (is_not(localname, "Dataset"))
841 D4ParserSax2::dmr_error(parser, "Expected an end Dataset tag; found '%s' instead.", localname);
842
843 parser->pop_state();
844 if (parser->get_state() != parser_start)
845 dmr_fatal_error(parser, "Unexpected state, expected start state.");
846 else {
847 parser->pop_state();
848 parser->push_state(parser_end);
849 }
850 break;
851
852 case inside_group: {
853 if (is_not(localname, "Group"))
854 D4ParserSax2::dmr_error(parser, "Expected an end tag for a Group; found '%s' instead.", localname);
855
856 if (!parser->empty_basetype() || parser->empty_group())
857 D4ParserSax2::dmr_error(parser,
858 "The document did not contain a valid root Group or contained unbalanced tags.");
859
860 parser->pop_group();
861 parser->pop_state();
862 break;
863 }
864
865 case inside_attribute_container:
866 if (is_not(localname, "Attribute"))
867 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
868
869 parser->pop_state();
870 parser->pop_attributes();
871 break;
872
873 case inside_attribute:
874 if (is_not(localname, "Attribute"))
875 D4ParserSax2::dmr_error(parser, "Expected an end Attribute tag; found '%s' instead.", localname);
876
877 parser->pop_state();
878 break;
879
880 case inside_attribute_value: {
881 if (is_not(localname, "Value"))
882 D4ParserSax2::dmr_error(parser, "Expected an end value tag; found '%s' instead.", localname);
883
884 parser->pop_state();
885
886 // The old code added more values using the name and type as
887 // indexes to find the correct attribute. Use get() for that
888 // now. Or fix this code to keep a pointer to the to attribute...
889 D4Attributes *attrs = parser->top_attributes();
890 D4Attribute *attr = attrs->get(parser->dods_attr_name);
891 if (!attr) {
892 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
893 attrs->add_attribute_nocopy(attr);
894 }
895 attr->add_value(parser->char_data);
896
897 parser->char_data = ""; // Null this after use.
898 break;
899 }
900
901 case inside_other_xml_attribute: {
902 if (strcmp(localname, "Attribute") == 0 && parser->root_ns == (const char *) URI) {
903 parser->pop_state();
904
905 // The old code added more values using the name and type as
906 // indexes to find the correct attribute. Use get() for that
907 // now. Or fix this code to keep a pointer to the to attribute...
908 D4Attributes *attrs = parser->top_attributes();
909 D4Attribute *attr = attrs->get(parser->dods_attr_name);
910 if (!attr) {
911 attr = new D4Attribute(parser->dods_attr_name, StringToD4AttributeType(parser->dods_attr_type));
912 attrs->add_attribute_nocopy(attr);
913 }
914 attr->add_value(parser->other_xml);
915
916 parser->other_xml = ""; // Null this after use.
917 }
918 else {
919 if (parser->other_xml_depth == 0) {
920 D4ParserSax2::dmr_error(parser, "Expected an OtherXML attribute to end! Instead I found '%s'",
921 localname);
922 break;
923 }
924 parser->other_xml_depth--;
925
926 parser->other_xml.append("</");
927 if (prefix) {
928 parser->other_xml.append((const char *) prefix);
929 parser->other_xml.append(":");
930 }
931 parser->other_xml.append(localname);
932 parser->other_xml.append(">");
933 }
934 break;
935 }
936
937 case inside_enum_def:
938 if (is_not(localname, "Enumeration"))
939 D4ParserSax2::dmr_error(parser, "Expected an end Enumeration tag; found '%s' instead.", localname);
940 if (!parser->top_group())
942 "Expected a Group to be the current item, while finishing up an Enumeration.");
943 else {
944 // copy the pointer; not a deep copy
945 parser->top_group()->enum_defs()->add_enum_nocopy(parser->enum_def());
946 // Set the enum_def to null; next call to enum_def() will
947 // allocate a new object
948 parser->clear_enum_def();
949 parser->pop_state();
950 }
951 break;
952
953 case inside_enum_const:
954 if (is_not(localname, "EnumConst"))
955 D4ParserSax2::dmr_error(parser, "Expected an end EnumConst tag; found '%s' instead.", localname);
956
957 parser->pop_state();
958 break;
959
960 case inside_dim_def: {
961 if (is_not(localname, "Dimension"))
962 D4ParserSax2::dmr_error(parser, "Expected an end Dimension tag; found '%s' instead.", localname);
963
964 if (!parser->top_group())
965 D4ParserSax2::dmr_error(parser,
966 "Expected a Group to be the current item, while finishing up an Dimension.");
967
968 // FIXME Use the Group on the top of the group stack
969 // copy the pointer; not a deep copy
970 parser->top_group()->dims()->add_dim_nocopy(parser->dim_def());
971 //parser->dmr()->root()->dims()->add_dim_nocopy(parser->dim_def());
972 // Set the dim_def to null; next call to dim_def() will
973 // allocate a new object. Calling 'clear' is important because
974 // the cleanup method will free dim_def if it's not null and
975 // we just copied the pointer in the add_dim_nocopy() call
976 // above.
977 parser->clear_dim_def();
978 parser->pop_state();
979 break;
980 }
981
982 case inside_simple_type:
983 if (is_simple_type(get_type(localname))) {
984 BaseType *btp = parser->top_basetype();
985 parser->pop_basetype();
986 parser->pop_attributes();
987
988 BaseType *parent = 0;
989 if (!parser->empty_basetype())
990 parent = parser->top_basetype();
991 else if (!parser->empty_group())
992 parent = parser->top_group();
993 else {
994 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
995 localname);
996 delete btp;
997 parser->pop_state();
998 break;
999 }
1000
1001 if (parent->type() == dods_array_c)
1002 static_cast<Array*>(parent)->prototype()->add_var_nocopy(btp);
1003 else
1004 parent->add_var_nocopy(btp);
1005 }
1006 else
1007 D4ParserSax2::dmr_error(parser, "Expected an end tag for a simple type; found '%s' instead.", localname);
1008
1009 parser->pop_state();
1010 break;
1011
1012 case inside_dim:
1013 if (is_not(localname, "Dim"))
1014 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Dim tag; found '%s' instead.", localname);
1015
1016 parser->pop_state();
1017 break;
1018
1019 case inside_map:
1020 if (is_not(localname, "Map"))
1021 D4ParserSax2::dmr_fatal_error(parser, "Expected an end Map tag; found '%s' instead.", localname);
1022
1023 parser->pop_state();
1024 break;
1025
1026 case inside_constructor: {
1027 if (strcmp(localname, "Structure") != 0 && strcmp(localname, "Sequence") != 0) {
1028 D4ParserSax2::dmr_error(parser, "Expected an end tag for a constructor; found '%s' instead.", localname);
1029 return;
1030 }
1031
1032 BaseType *btp = parser->top_basetype();
1033 parser->pop_basetype();
1034 parser->pop_attributes();
1035
1036 BaseType *parent = 0;
1037 if (!parser->empty_basetype())
1038 parent = parser->top_basetype();
1039 else if (!parser->empty_group())
1040 parent = parser->top_group();
1041 else {
1042 dmr_fatal_error(parser, "Both the Variable and Groups stacks are empty while closing a %s element.",
1043 localname);
1044 delete btp;
1045 parser->pop_state();
1046 break;
1047 }
1048
1049 // TODO Why doesn't this code mirror the simple_var case and test
1050 // for the parent being an array? jhrg 10/13/13
1051 parent->add_var_nocopy(btp);
1052 parser->pop_state();
1053 break;
1054 }
1055
1056 case parser_unknown:
1057 parser->pop_state();
1058 break;
1059
1060 case parser_error:
1061 case parser_fatal_error:
1062 break;
1063
1064 case parser_end:
1065 // FIXME Error?
1066 break;
1067 }
1068
1069 if (parser->debug()) cerr << "End element exit state: " << states[parser->get_state()] << endl;
1070}
1071
1075void D4ParserSax2::dmr_get_characters(void * p, const xmlChar * ch, int len)
1076{
1077 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1078
1079 switch (parser->get_state()) {
1080 case inside_attribute_value:
1081 parser->char_data.append((const char *) (ch), len);
1082 DBG(cerr << "Characters: '" << parser->char_data << "'" << endl);
1083 break;
1084
1085 case inside_other_xml_attribute:
1086 parser->other_xml.append((const char *) (ch), len);
1087 DBG(cerr << "Other XML Characters: '" << parser->other_xml << "'" << endl);
1088 break;
1089
1090 default:
1091 break;
1092 }
1093}
1094
1099void D4ParserSax2::dmr_ignoreable_whitespace(void *p, const xmlChar *ch, int len)
1100{
1101 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1102
1103 switch (parser->get_state()) {
1104 case inside_other_xml_attribute:
1105 parser->other_xml.append((const char *) (ch), len);
1106 break;
1107
1108 default:
1109 break;
1110 }
1111}
1112
1118void D4ParserSax2::dmr_get_cdata(void *p, const xmlChar *value, int len)
1119{
1120 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1121
1122 switch (parser->get_state()) {
1123 case inside_other_xml_attribute:
1124 parser->other_xml.append((const char *) (value), len);
1125 break;
1126
1127 case parser_unknown:
1128 break;
1129
1130 default:
1131 D4ParserSax2::dmr_error(parser, "Found a CData block but none are allowed by DAP4.");
1132
1133 break;
1134 }
1135}
1136
1141xmlEntityPtr D4ParserSax2::dmr_get_entity(void *, const xmlChar * name)
1142{
1143 return xmlGetPredefinedEntity(name);
1144}
1145
1156void D4ParserSax2::dmr_fatal_error(void * p, const char *msg, ...)
1157{
1158 va_list args;
1159 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1160
1161 parser->push_state(parser_fatal_error);
1162
1163 va_start(args, msg);
1164 char str[1024];
1165 vsnprintf(str, 1024, msg, args);
1166 va_end(args);
1167
1168 int line = xmlSAX2GetLineNumber(parser->context);
1169
1170 if (!parser->error_msg.empty()) parser->error_msg += "\n";
1171 parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1172}
1173
1174void D4ParserSax2::dmr_error(void *p, const char *msg, ...)
1175{
1176 va_list args;
1177 D4ParserSax2 *parser = static_cast<D4ParserSax2*>(p);
1178
1179 parser->push_state(parser_error);
1180
1181 va_start(args, msg);
1182 char str[1024];
1183 vsnprintf(str, 1024, msg, args);
1184 va_end(args);
1185
1186 int line = xmlSAX2GetLineNumber(parser->context);
1187
1188 if (!parser->error_msg.empty()) parser->error_msg += "\n";
1189 parser->error_msg += "At line " + long_to_string(line) + ": " + string(str);
1190}
1192
1196void D4ParserSax2::cleanup_parse()
1197{
1198 bool wellFormed = context->wellFormed;
1199 bool valid = context->valid;
1200
1201 context->sax = NULL;
1202 xmlFreeParserCtxt(context);
1203
1204 delete d_enum_def;
1205 d_enum_def = 0;
1206
1207 delete d_dim_def;
1208 d_dim_def = 0;
1209
1210 // If there's an error, there may still be items on the stack at the
1211 // end of the parse.
1212 while (!btp_stack.empty()) {
1213 delete top_basetype();
1214 pop_basetype();
1215 }
1216
1217 if (!wellFormed)
1218 throw Error("The DMR was not well formed. " + error_msg);
1219 else if (!valid)
1220 throw Error("The DMR was not valid." + error_msg);
1221 else if (get_state() == parser_error)
1222 throw Error(error_msg);
1223 else if (get_state() == parser_fatal_error)
1224 throw InternalErr(error_msg);
1225}
1226
1241void D4ParserSax2::intern(istream &f, DMR *dest_dmr, bool debug)
1242{
1243 d_debug = debug;
1244
1245 // Code example from libxml2 docs re: read from a stream.
1246
1247 if (!f.good())
1248 throw Error("Input stream not open or read error");
1249 if (!dest_dmr)
1250 throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1251
1252 d_dmr = dest_dmr; // dump values here
1253
1254 const int size = 1024;
1255 char chars[size];
1256 int line = 1;
1257
1258 f.getline(chars, size);
1259 int res = f.gcount();
1260 if (res == 0) throw Error("No input found while parsing the DMR.");
1261
1262 if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1263
1264 context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, chars, res - 1, "stream");
1265 context->validate = true;
1266 push_state(parser_start);
1267
1268 f.getline(chars, size);
1269 while ((f.gcount() > 0) && (get_state() != parser_end)) {
1270 if (debug) cerr << "line: (" << line++ << "): " << chars << endl;
1271 xmlParseChunk(context, chars, f.gcount() - 1, 0);
1272 f.getline(chars, size);
1273 }
1274
1275 // This call ends the parse.
1276 xmlParseChunk(context, chars, 0, 1/*terminate*/);
1277
1278 // This checks that the state on the parser stack is parser_end and throws
1279 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1280 cleanup_parse();
1281}
1282
1293void D4ParserSax2::intern(const string &document, DMR *dest_dmr, bool debug)
1294{
1295 intern(document.c_str(), document.length(), dest_dmr, debug);
1296}
1297
1308void D4ParserSax2::intern(const char *buffer, int size, DMR *dest_dmr, bool debug)
1309{
1310 if (!(size > 0)) return;
1311
1312 d_debug = debug;
1313
1314 // Code example from libxml2 docs re: read from a stream.
1315
1316 if (!dest_dmr) throw InternalErr(__FILE__, __LINE__, "DMR object is null");
1317 d_dmr = dest_dmr; // dump values in dest_dmr
1318
1319 push_state(parser_start);
1320 context = xmlCreatePushParserCtxt(&ddx_sax_parser, this, buffer, size, "stream");
1321 context->validate = true;
1322 //push_state(parser_start);
1323 //xmlParseChunk(context, buffer, size, 0);
1324
1325 // This call ends the parse.
1326 xmlParseChunk(context, buffer, 0, 1/*terminate*/);
1327
1328 // This checks that the state on the parser stack is parser_end and throws
1329 // an exception if it's not (i.e., the loop exited with gcount() == 0).
1330 cleanup_parse();
1331}
1332
1333} // namespace libdap
virtual D4Attributes * attributes()
Definition: BaseType.cc:544
virtual BaseType * NewVariable(Type t, const string &name) const
D4Dimension * find_dim(const string &path)
Find the dimension using a path. Using the DAP4 name syntax, lookup a dimension. The dimension must b...
Definition: D4Group.cc:268
virtual std::string FQN() const
Definition: D4Group.cc:176
void set_dap_version(const string &version_string)
Definition: DMR.cc:254
void set_request_xml_base(const string &xb)
Definition: DMR.h:152
D4Group * root()
Definition: DMR.cc:242
void set_namespace(const string &ns)
Set the namespace for this DDS/DDX object/response.
Definition: DMR.h:158
virtual D4BaseTypeFactory * factory()
Definition: DMR.h:125
A class for software fault reporting.
Definition: InternalErr.h:65
static void dmr_start_document(void *parser)
static void dmr_ignoreable_whitespace(void *parser, const xmlChar *ch, int len)
static void dmr_get_characters(void *parser, const xmlChar *ch, int len)
static void dmr_end_document(void *parser)
static void dmr_get_cdata(void *parser, const xmlChar *value, int len)
static xmlEntityPtr dmr_get_entity(void *parser, const xmlChar *name)
static void dmr_fatal_error(void *parser, const char *msg,...)
Type
Identifies the data type.
Definition: Type.h:94
bool is_simple_type(Type t)
Returns true if the instance is a numeric, string or URL type variable.
Definition: util.cc:771
string D4type_name(Type t)
Returns the type of the class instance as a string. Supports all DAP4 types and not the DAP2-only typ...
Definition: util.cc:690
bool is_vector_type(Type t)
Returns true if the instance is a vector (i.e., array) type variable.
Definition: util.cc:813
bool is_integer_type(Type t)
Definition: util.cc:902
ObjectType get_type(const string &value)
Definition: mime_util.cc:326